ggml.c: bugfix CBLAS profile ggml-org#1 was not executed; misc minor refactors

mqy · mqy · commit 2ea239aa1e10 · 2023-05-29T09:00:15.000+08:00
diff --git a/examples/mulmat-tune/bench-out/7b.q4_0.accelerate.txt b/examples/mulmat-tune/bench-out/7b.q4_0.accelerate.txt
@@ -3,38 +3,38 @@
 -1 0 0   3 0 1  -1 0 0
  0 1 0   3 0 1  -1 0 0
 4096 4096
- 16       23    14046 0 0        0 0    11366     6297 0
- 32       36    26793 0 0        0 0    11244     6201 0
- 48       55    40187 0 0        0 0    11316     7811 0
- 64       78    54450 0 0        0 0    11149     7859 0
- 80       96    68095 0 0        0 0    11258     8748 0
- 96      114    81588 0 0        0 0    11017    10248 0
-112      134    96596 0 0        0 0    11186    10506 0
-128      157   112871 0 0        0 0    11179    11887 0
+ 16       17    14400 0 0    20380 0    13643     6406 0
+ 32       48    26184 0 0    17892 0    12759     6875 0
+ 48       62    40950 0 0    20940 0    11344     6470 0
+ 64       75    54959 0 0    19897 0    12056     8272 0
+ 80       95    69812 0 0    23261 0    13296    10944 0
+ 96      135    82530 0 0    20238 0    11363     9733 0
+112      135    97063 0 0    21620 0    11008    10231 0
+128      160   110596 0 0    22374 0    11130    12202 0
 4096 11008
- 16       55    36520 0 0        0 0    29851     9467 0
- 32      103    73460 0 0        0 0    29815    11175 0
- 48      173   109619 0 0        0 0    29870    13368 0
- 64      206   147174 0 0        0 0    29571    16828 0
- 80      289   178721 0 0        0 0    29895    18013 0
- 96      343   219130 0 0        0 0    29633    21457 0
-112      550   257754 0 0        0 0    30342    23557 0
-128      594   298395 0 0        0 0    29683    24796 0
+ 16       63    34214 0 0    43145 0    30377     9875 0
+ 32       98    71625 0 0    43591 0    29675    11653 0
+ 48      155   109818 0 0    44130 0    30964    14123 0
+ 64      253   144841 0 0    46174 0    29843    17059 0
+ 80      279   175670 0 0    47225 0    29574    16913 0
+ 96      331   217921 0 0    48978 0    29582    19354 0
+112      408   254362 0 0    53326 0    29963    22962 0
+128      611   281834 0 0    57593 0    30629    25448 0
 11008 4096
- 16       19    35077 0 0        0 0    30130    21051 0
- 32       43    71844 0 0        0 0    29937    21740 0
- 48       56   108664 0 0        0 0    30534    23017 0
- 64       94   148288 0 0        0 0    29848    26486 0
- 80      108   187098 0 0        0 0    29896    29687 0
- 96      116   224466 0 0        0 0    29931    31416 0
-112      137   264372 0 0        0 0    29797    34035 0
-128      178   300958 0 0        0 0    29713    37036 0
+ 16       18    35422 0 0    53263 0    30608    20630 0
+ 32       37    69747 0 0    54542 0    30501    23162 0
+ 48       53   107693 0 0    56207 0    29500    23522 0
+ 64       99   144891 0 0    60231 0    29461    23695 0
+ 80       98   178384 0 0    60697 0    29281    26783 0
+ 96      112   217583 0 0    63507 0    29741    31710 0
+112      170   253402 0 0    65329 0    28823    34861 0
+128      189   290395 0 0    70656 0    31023    35913 0
 32000 4096
- 16       18   105077 0 0        0 0    87731    67479 0
- 32       36   205088 0 0        0 0    86620    72865 0
- 48       54   314438 0 0        0 0    87458    77700 0
- 64       75   420397 0 0        0 0    86515    83575 0
- 80      109   541305 0 0        0 0    86580    88873 0
- 96      121   646842 0 0        0 0    86500    96982 0
-112      134   761083 0 0        0 0    87326   102948 0
-128      155   872466 0 0        0 0    87668   112924 0
+ 16       18   104453 0 0   146992 0    86361    67977 0
+ 32       36   203698 0 0   150361 0    87629    71108 0
+ 48       89   312316 0 0   155162 0    86803    76783 0
+ 64      104   428321 0 0   161366 0    89776    82720 0
+ 80       93   532930 0 0   171931 0    87039    88321 0
+ 96      113   642233 0 0   176509 0    86327    95598 0
+112      169   745426 0 0   186020 0    87538   102664 0
+128      202   860052 0 0   196480 0    88918   109959 0
diff --git a/examples/mulmat-tune/mulmat-tune.c b/examples/mulmat-tune/mulmat-tune.c
@@ -79,25 +79,31 @@ int ggml_mulmat_tune_read_data(struct ggml_mulmat_tune *tune, FILE *fp) {
         return rc;
     }
 
-    tune->items = malloc(sizeof(struct ggml_mulmat_tune_m) *
-                         (tune->n_shapes * tune->n_profiles * tune->m_num));
-    if (tune->items == NULL) {
-        fprintf(stderr, "failed to allocate memory\n");
-        return -2;
+    {
+        size_t item_size = sizeof(struct ggml_mulmat_tune_m) *
+                           (tune->n_shapes * tune->n_profiles * tune->m_num);
+        tune->items = malloc(item_size);
+        if (tune->items == NULL) {
+            fprintf(stderr, "failed to allocate memory\n");
+            return -2;
+        }
+        memset(tune->items, 0, item_size);
     }
 
-    size_t sz = sizeof(struct ggml_task_profile) * tune->n_profiles;
-    tune->profiles = malloc(sz);
-    GGML_ASSERT(tune->profiles);
-    memset(tune->profiles, 0, sz);
+    {
+        size_t sz = sizeof(struct ggml_task_profile) * tune->n_profiles;
+        tune->profiles = malloc(sz);
+        GGML_ASSERT(tune->profiles);
+        memset(tune->profiles, 0, sz);
+    }
 
     for (int ip = 0; ip < tune->n_profiles; ip++) {
         struct ggml_task_profile *profile = &tune->profiles[ip];
         for (int j = 0; j < 3; j++) {
             struct ggml_task_stage *ts = &profile->stages[j];
             int backend, parallel, wait;
-            rc = fscanf(fp, "%d %d %d", &backend, &parallel, &wait);
-            if (rc <= 0) {
+            if (rc = fscanf(fp, "%d %d %d", &backend, &parallel, &wait),
+                rc <= 0) {
                 return rc;
             }
             ts->backend = backend;
@@ -107,28 +113,27 @@ int ggml_mulmat_tune_read_data(struct ggml_mulmat_tune *tune, FILE *fp) {
     }
 
     for (int i_shape = 0; i_shape < tune->n_shapes; i_shape++) {
-        rc = fscanf(fp, "%d %d", &tune->shapes[i_shape].N,
-                    &tune->shapes[i_shape].K);
-        if (rc <= 0) {
+        if (rc = fscanf(fp, "%d %d", &tune->shapes[i_shape].N,
+                        &tune->shapes[i_shape].K),
+            rc <= 0) {
             return rc;
         }
 
         for (int i_m = 0; i_m < tune->m_num; i_m++) {
             int M;
             for (int ip = 0; ip < tune->n_profiles; ip++) {
                 if (ip == 0) {
-                    rc = fscanf(fp, "%d", &M);
-                    if (rc <= 0) {
+                    if (rc = fscanf(fp, "%d", &M), rc <= 0) {
                         return rc;
                     }
                 }
                 int index =
                     ggml_mulmat_tune_get_item_index(tune, i_shape, ip, i_m);
                 struct ggml_mulmat_tune_m *item = &tune->items[index];
                 item->M = M;
-                rc = fscanf(fp, "%d %d %d", &item->stages_time[0],
-                            &item->stages_time[1], &item->stages_time[2]);
-                if (rc <= 0) {
+                if (rc = fscanf(fp, "%d %d %d", &item->stages_time[0],
+                                &item->stages_time[1], &item->stages_time[2]),
+                    rc <= 0) {
                     return rc;
                 }
             }
@@ -139,40 +144,38 @@ int ggml_mulmat_tune_read_data(struct ggml_mulmat_tune *tune, FILE *fp) {
 }
 
 int ggml_mulmat_tune_write_data(const struct ggml_mulmat_tune *tune, FILE *fp) {
-    int rc = fprintf(fp, "%d %s %d %s %d %s %d %d %d %d\n", tune->version,
+    int rc;
+    if (rc = fprintf(fp, "%d %s %d %s %d %s %d %d %d %d\n", tune->version,
                      tune->model, tune->type, tune->type_name, tune->backend,
                      tune->blas_vendor, tune->n_shapes, tune->m_step,
-                     tune->m_num, tune->n_profiles);
-    if (rc <= 0) {
+                     tune->m_num, tune->n_profiles),
+        rc <= 0) {
         return rc;
     }
 
     for (int i = 0; i < tune->n_profiles; i++) {
         struct ggml_task_profile *profile = &tune->profiles[i];
         for (int j = 0; j < 3; j++) {
             struct ggml_task_stage *ts = &profile->stages[j];
-            rc = fprintf(fp, "%2d %d %d", ts->backend,
-                         ts->parallel ? 1 : 0, ts->wait ? 1 : 0);
-            if (rc <= 0) {
+            if (rc = fprintf(fp, "%2d %d %d", ts->backend, ts->parallel ? 1 : 0,
+                             ts->wait ? 1 : 0),
+                rc <= 0) {
                 return rc;
             }
             if (j < 2) {
-                rc = fprintf(fp, "  ");
-                if (rc <= 0) {
+                if (rc = fprintf(fp, "  "), rc <= 0) {
                     return rc;
                 }
             }
         }
-        rc = fprintf(fp, "\n");
-        if (rc <= 0) {
+        if (rc = fprintf(fp, "\n"), rc <= 0) {
             return rc;
         }
     }
 
     for (int i_shape = 0; i_shape < tune->n_shapes; i_shape++) {
         const struct ggml_mulmat_tune_nk *shape = &tune->shapes[i_shape];
-        rc = fprintf(fp, "%d %d\n", shape->N, shape->K);
-        if (rc <= 0) {
+        if (rc = fprintf(fp, "%d %d\n", shape->N, shape->K), rc <= 0) {
             return rc;
         }
 
@@ -182,29 +185,26 @@ int ggml_mulmat_tune_write_data(const struct ggml_mulmat_tune *tune, FILE *fp) {
                     ggml_mulmat_tune_get_item_index(tune, i_shape, ip, i_m);
                 struct ggml_mulmat_tune_m *item = &tune->items[index];
                 if (ip == 0) {
-                    rc = fprintf(fp, "%3d", item->M);
-                    if (rc <= 0) {
+                    if (rc = fprintf(fp, "%3d", item->M), rc <= 0) {
                         return rc;
                     }
                 }
 
                 struct ggml_task_profile *profile = &tune->profiles[ip];
                 for (int k = 0; k < 3; k++) {
                     if (profile->stages[k].backend != GGML_BACKEND_UNKNOWN) {
-                        rc = fprintf(fp, "%9d", item->stages_time[k]);
-                        if (rc <= 0) {
+                        if (rc = fprintf(fp, "%9d", item->stages_time[k]),
+                            rc <= 0) {
                             return rc;
                         }
                     } else {
-                        rc = fprintf(fp, " 0");
-                        if (rc <= 0) {
+                        if (rc = fprintf(fp, " 0"), rc <= 0) {
                             return rc;
                         }
                     }
                 }
             }
-            rc = fprintf(fp, "\n");
-            if (rc <= 0) {
+            if (rc = fprintf(fp, "\n"), rc <= 0) {
                 return rc;
             }
         }
@@ -298,8 +298,8 @@ void ggml_mulmat_tune_estimate_time(
             if (ts->parallel) {
                 t /= nth;
             }
-            time_stats->profile_time[ip].stage_time[stage] = t;
-            time_stats->profile_time[ip].total_time += t;
+            time_stats->profile_time[ip].stage_time[stage] = (int)t;
+            time_stats->profile_time[ip].total_time += (int)t;
         }
     }
 }
@@ -313,7 +313,7 @@ static const char *ggml_backend_names[] = {
 
 const char *ggml_get_backend_name(enum ggml_backend backend) {
     if (backend == GGML_BACKEND_UNKNOWN) {
-        return "";
+        return "UNKNOWN";
     }
     return ggml_backend_names[backend];
 }
diff --git a/examples/mulmat-tune/mulmat-tune.h b/examples/mulmat-tune/mulmat-tune.h
@@ -13,7 +13,7 @@ extern "C" {
 #define GGML_MULMAT_MAX_PROFILES 4
 
 struct ggml_task_stage {
-    int backend; // enum ggml_backend
+    /*enum ggml_backend*/ int backend;
     bool parallel;
     bool wait;
 };
@@ -99,7 +99,6 @@ void ggml_mulmat_init_task_profiles(/*enum ggml_backend*/ int backend);
 int ggml_mulmat_get_task_profiles(struct ggml_task_profile **profiles,
                                   int src0_type, int src1_type);
 
-// returns enum ggml_backend
 /*enum ggml_backend*/ int ggml_auto_detect_backend(void);
 
 const char *ggml_get_backend_name(/*enum ggml_backend*/ int backend);
diff --git a/ggml.c b/ggml.c

Original file line number	Diff line number	Diff line change
`@@ -79,25 +79,31 @@ int ggml_mulmat_tune_read_data(struct ggml_mulmat_tune tune, FILE fp) {`
`79`	`79`	`return rc;`
`80`	`80`	`}`
`81`	`81`
`82`		`- tune->items = malloc(sizeof(struct ggml_mulmat_tune_m) *`
`83`		`- (tune->n_shapes * tune->n_profiles * tune->m_num));`
`84`		`- if (tune->items == NULL) {`
`85`		`- fprintf(stderr, "failed to allocate memory\n");`
`86`		`- return -2;`
	`82`	`+ {`
	`83`	`+ size_t item_size = sizeof(struct ggml_mulmat_tune_m) *`
	`84`	`+ (tune->n_shapes * tune->n_profiles * tune->m_num);`
	`85`	`+ tune->items = malloc(item_size);`
	`86`	`+ if (tune->items == NULL) {`
	`87`	`+ fprintf(stderr, "failed to allocate memory\n");`
	`88`	`+ return -2;`
	`89`	`+ }`
	`90`	`+ memset(tune->items, 0, item_size);`
`87`	`91`	`}`
`88`	`92`
`89`		`- size_t sz = sizeof(struct ggml_task_profile) * tune->n_profiles;`
`90`		`- tune->profiles = malloc(sz);`
`91`		`- GGML_ASSERT(tune->profiles);`
`92`		`- memset(tune->profiles, 0, sz);`
	`93`	`+ {`
	`94`	`+ size_t sz = sizeof(struct ggml_task_profile) * tune->n_profiles;`
	`95`	`+ tune->profiles = malloc(sz);`
	`96`	`+ GGML_ASSERT(tune->profiles);`
	`97`	`+ memset(tune->profiles, 0, sz);`
	`98`	`+ }`
`93`	`99`
`94`	`100`	`for (int ip = 0; ip < tune->n_profiles; ip++) {`
`95`	`101`	`struct ggml_task_profile *profile = &tune->profiles[ip];`
`96`	`102`	`for (int j = 0; j < 3; j++) {`
`97`	`103`	`struct ggml_task_stage *ts = &profile->stages[j];`
`98`	`104`	`int backend, parallel, wait;`
`99`		`- rc = fscanf(fp, "%d %d %d", &backend, &parallel, &wait);`
`100`		`- if (rc <= 0) {`
	`105`	`+ if (rc = fscanf(fp, "%d %d %d", &backend, &parallel, &wait),`
	`106`	`+ rc <= 0) {`
`101`	`107`	`return rc;`
`102`	`108`	`}`
`103`	`109`	`ts->backend = backend;`
`@@ -107,28 +113,27 @@ int ggml_mulmat_tune_read_data(struct ggml_mulmat_tune tune, FILE fp) {`
`107`	`113`	`}`
`108`	`114`
`109`	`115`	`for (int i_shape = 0; i_shape < tune->n_shapes; i_shape++) {`
`110`		`- rc = fscanf(fp, "%d %d", &tune->shapes[i_shape].N,`
`111`		`- &tune->shapes[i_shape].K);`
`112`		`- if (rc <= 0) {`
	`116`	`+ if (rc = fscanf(fp, "%d %d", &tune->shapes[i_shape].N,`
	`117`	`+ &tune->shapes[i_shape].K),`
	`118`	`+ rc <= 0) {`
`113`	`119`	`return rc;`
`114`	`120`	`}`
`115`	`121`
`116`	`122`	`for (int i_m = 0; i_m < tune->m_num; i_m++) {`
`117`	`123`	`int M;`
`118`	`124`	`for (int ip = 0; ip < tune->n_profiles; ip++) {`
`119`	`125`	`if (ip == 0) {`
`120`		`- rc = fscanf(fp, "%d", &M);`
`121`		`- if (rc <= 0) {`
	`126`	`+ if (rc = fscanf(fp, "%d", &M), rc <= 0) {`
`122`	`127`	`return rc;`
`123`	`128`	`}`
`124`	`129`	`}`
`125`	`130`	`int index =`
`126`	`131`	`ggml_mulmat_tune_get_item_index(tune, i_shape, ip, i_m);`
`127`	`132`	`struct ggml_mulmat_tune_m *item = &tune->items[index];`
`128`	`133`	`item->M = M;`
`129`		`- rc = fscanf(fp, "%d %d %d", &item->stages_time[0],`
`130`		`- &item->stages_time[1], &item->stages_time[2]);`
`131`		`- if (rc <= 0) {`
	`134`	`+ if (rc = fscanf(fp, "%d %d %d", &item->stages_time[0],`
	`135`	`+ &item->stages_time[1], &item->stages_time[2]),`
	`136`	`+ rc <= 0) {`
`132`	`137`	`return rc;`
`133`	`138`	`}`
`134`	`139`	`}`
`@@ -139,40 +144,38 @@ int ggml_mulmat_tune_read_data(struct ggml_mulmat_tune tune, FILE fp) {`
`139`	`144`	`}`
`140`	`145`
`141`	`146`	`int ggml_mulmat_tune_write_data(const struct ggml_mulmat_tune tune, FILE fp) {`
`142`		`- int rc = fprintf(fp, "%d %s %d %s %d %s %d %d %d %d\n", tune->version,`
	`147`	`+ int rc;`
	`148`	`+ if (rc = fprintf(fp, "%d %s %d %s %d %s %d %d %d %d\n", tune->version,`
`143`	`149`	`tune->model, tune->type, tune->type_name, tune->backend,`
`144`	`150`	`tune->blas_vendor, tune->n_shapes, tune->m_step,`
`145`		`- tune->m_num, tune->n_profiles);`
`146`		`- if (rc <= 0) {`
	`151`	`+ tune->m_num, tune->n_profiles),`
	`152`	`+ rc <= 0) {`
`147`	`153`	`return rc;`
`148`	`154`	`}`
`149`	`155`
`150`	`156`	`for (int i = 0; i < tune->n_profiles; i++) {`
`151`	`157`	`struct ggml_task_profile *profile = &tune->profiles[i];`
`152`	`158`	`for (int j = 0; j < 3; j++) {`
`153`	`159`	`struct ggml_task_stage *ts = &profile->stages[j];`
`154`		`- rc = fprintf(fp, "%2d %d %d", ts->backend,`
`155`		`- ts->parallel ? 1 : 0, ts->wait ? 1 : 0);`
`156`		`- if (rc <= 0) {`
	`160`	`+ if (rc = fprintf(fp, "%2d %d %d", ts->backend, ts->parallel ? 1 : 0,`
	`161`	`+ ts->wait ? 1 : 0),`
	`162`	`+ rc <= 0) {`
`157`	`163`	`return rc;`
`158`	`164`	`}`
`159`	`165`	`if (j < 2) {`
`160`		`- rc = fprintf(fp, " ");`
`161`		`- if (rc <= 0) {`
	`166`	`+ if (rc = fprintf(fp, " "), rc <= 0) {`
`162`	`167`	`return rc;`
`163`	`168`	`}`
`164`	`169`	`}`
`165`	`170`	`}`
`166`		`- rc = fprintf(fp, "\n");`
`167`		`- if (rc <= 0) {`
	`171`	`+ if (rc = fprintf(fp, "\n"), rc <= 0) {`
`168`	`172`	`return rc;`
`169`	`173`	`}`
`170`	`174`	`}`
`171`	`175`
`172`	`176`	`for (int i_shape = 0; i_shape < tune->n_shapes; i_shape++) {`
`173`	`177`	`const struct ggml_mulmat_tune_nk *shape = &tune->shapes[i_shape];`
`174`		`- rc = fprintf(fp, "%d %d\n", shape->N, shape->K);`
`175`		`- if (rc <= 0) {`
	`178`	`+ if (rc = fprintf(fp, "%d %d\n", shape->N, shape->K), rc <= 0) {`
`176`	`179`	`return rc;`
`177`	`180`	`}`
`178`	`181`
`@@ -182,29 +185,26 @@ int ggml_mulmat_tune_write_data(const struct ggml_mulmat_tune tune, FILE fp) {`
`182`	`185`	`ggml_mulmat_tune_get_item_index(tune, i_shape, ip, i_m);`
`183`	`186`	`struct ggml_mulmat_tune_m *item = &tune->items[index];`
`184`	`187`	`if (ip == 0) {`
`185`		`- rc = fprintf(fp, "%3d", item->M);`
`186`		`- if (rc <= 0) {`
	`188`	`+ if (rc = fprintf(fp, "%3d", item->M), rc <= 0) {`
`187`	`189`	`return rc;`
`188`	`190`	`}`
`189`	`191`	`}`
`190`	`192`
`191`	`193`	`struct ggml_task_profile *profile = &tune->profiles[ip];`
`192`	`194`	`for (int k = 0; k < 3; k++) {`
`193`	`195`	`if (profile->stages[k].backend != GGML_BACKEND_UNKNOWN) {`
`194`		`- rc = fprintf(fp, "%9d", item->stages_time[k]);`
`195`		`- if (rc <= 0) {`
	`196`	`+ if (rc = fprintf(fp, "%9d", item->stages_time[k]),`
	`197`	`+ rc <= 0) {`
`196`	`198`	`return rc;`
`197`	`199`	`}`
`198`	`200`	`} else {`
`199`		`- rc = fprintf(fp, " 0");`
`200`		`- if (rc <= 0) {`
	`201`	`+ if (rc = fprintf(fp, " 0"), rc <= 0) {`
`201`	`202`	`return rc;`
`202`	`203`	`}`
`203`	`204`	`}`
`204`	`205`	`}`
`205`	`206`	`}`
`206`		`- rc = fprintf(fp, "\n");`
`207`		`- if (rc <= 0) {`
	`207`	`+ if (rc = fprintf(fp, "\n"), rc <= 0) {`
`208`	`208`	`return rc;`
`209`	`209`	`}`
`210`	`210`	`}`
`@@ -298,8 +298,8 @@ void ggml_mulmat_tune_estimate_time(`
`298`	`298`	`if (ts->parallel) {`
`299`	`299`	`t /= nth;`
`300`	`300`	`}`
`301`		`- time_stats->profile_time[ip].stage_time[stage] = t;`
`302`		`- time_stats->profile_time[ip].total_time += t;`
	`301`	`+ time_stats->profile_time[ip].stage_time[stage] = (int)t;`
	`302`	`+ time_stats->profile_time[ip].total_time += (int)t;`
`303`	`303`	`}`
`304`	`304`	`}`
`305`	`305`	`}`
`@@ -313,7 +313,7 @@ static const char *ggml_backend_names[] = {`
`313`	`313`
`314`	`314`	`const char *ggml_get_backend_name(enum ggml_backend backend) {`
`315`	`315`	`if (backend == GGML_BACKEND_UNKNOWN) {`
`316`		`- return "";`
	`316`	`+ return "UNKNOWN";`
`317`	`317`	`}`
`318`	`318`	`return ggml_backend_names[backend];`
`319`	`319`	`}`