Skip to content

Commit 2ea239a

Browse files
committed
ggml.c: bugfix CBLAS profile ggml-org#1 was not executed; misc minor refactors
1 parent 51beb6d commit 2ea239a

File tree

4 files changed

+117
-155
lines changed

4 files changed

+117
-155
lines changed

examples/mulmat-tune/bench-out/7b.q4_0.accelerate.txt

Lines changed: 32 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -3,38 +3,38 @@
33
-1 0 0 3 0 1 -1 0 0
44
0 1 0 3 0 1 -1 0 0
55
4096 4096
6-
16 23 14046 0 0 0 0 11366 6297 0
7-
32 36 26793 0 0 0 0 11244 6201 0
8-
48 55 40187 0 0 0 0 11316 7811 0
9-
64 78 54450 0 0 0 0 11149 7859 0
10-
80 96 68095 0 0 0 0 11258 8748 0
11-
96 114 81588 0 0 0 0 11017 10248 0
12-
112 134 96596 0 0 0 0 11186 10506 0
13-
128 157 112871 0 0 0 0 11179 11887 0
6+
16 17 14400 0 0 20380 0 13643 6406 0
7+
32 48 26184 0 0 17892 0 12759 6875 0
8+
48 62 40950 0 0 20940 0 11344 6470 0
9+
64 75 54959 0 0 19897 0 12056 8272 0
10+
80 95 69812 0 0 23261 0 13296 10944 0
11+
96 135 82530 0 0 20238 0 11363 9733 0
12+
112 135 97063 0 0 21620 0 11008 10231 0
13+
128 160 110596 0 0 22374 0 11130 12202 0
1414
4096 11008
15-
16 55 36520 0 0 0 0 29851 9467 0
16-
32 103 73460 0 0 0 0 29815 11175 0
17-
48 173 109619 0 0 0 0 29870 13368 0
18-
64 206 147174 0 0 0 0 29571 16828 0
19-
80 289 178721 0 0 0 0 29895 18013 0
20-
96 343 219130 0 0 0 0 29633 21457 0
21-
112 550 257754 0 0 0 0 30342 23557 0
22-
128 594 298395 0 0 0 0 29683 24796 0
15+
16 63 34214 0 0 43145 0 30377 9875 0
16+
32 98 71625 0 0 43591 0 29675 11653 0
17+
48 155 109818 0 0 44130 0 30964 14123 0
18+
64 253 144841 0 0 46174 0 29843 17059 0
19+
80 279 175670 0 0 47225 0 29574 16913 0
20+
96 331 217921 0 0 48978 0 29582 19354 0
21+
112 408 254362 0 0 53326 0 29963 22962 0
22+
128 611 281834 0 0 57593 0 30629 25448 0
2323
11008 4096
24-
16 19 35077 0 0 0 0 30130 21051 0
25-
32 43 71844 0 0 0 0 29937 21740 0
26-
48 56 108664 0 0 0 0 30534 23017 0
27-
64 94 148288 0 0 0 0 29848 26486 0
28-
80 108 187098 0 0 0 0 29896 29687 0
29-
96 116 224466 0 0 0 0 29931 31416 0
30-
112 137 264372 0 0 0 0 29797 34035 0
31-
128 178 300958 0 0 0 0 29713 37036 0
24+
16 18 35422 0 0 53263 0 30608 20630 0
25+
32 37 69747 0 0 54542 0 30501 23162 0
26+
48 53 107693 0 0 56207 0 29500 23522 0
27+
64 99 144891 0 0 60231 0 29461 23695 0
28+
80 98 178384 0 0 60697 0 29281 26783 0
29+
96 112 217583 0 0 63507 0 29741 31710 0
30+
112 170 253402 0 0 65329 0 28823 34861 0
31+
128 189 290395 0 0 70656 0 31023 35913 0
3232
32000 4096
33-
16 18 105077 0 0 0 0 87731 67479 0
34-
32 36 205088 0 0 0 0 86620 72865 0
35-
48 54 314438 0 0 0 0 87458 77700 0
36-
64 75 420397 0 0 0 0 86515 83575 0
37-
80 109 541305 0 0 0 0 86580 88873 0
38-
96 121 646842 0 0 0 0 86500 96982 0
39-
112 134 761083 0 0 0 0 87326 102948 0
40-
128 155 872466 0 0 0 0 87668 112924 0
33+
16 18 104453 0 0 146992 0 86361 67977 0
34+
32 36 203698 0 0 150361 0 87629 71108 0
35+
48 89 312316 0 0 155162 0 86803 76783 0
36+
64 104 428321 0 0 161366 0 89776 82720 0
37+
80 93 532930 0 0 171931 0 87039 88321 0
38+
96 113 642233 0 0 176509 0 86327 95598 0
39+
112 169 745426 0 0 186020 0 87538 102664 0
40+
128 202 860052 0 0 196480 0 88918 109959 0

examples/mulmat-tune/mulmat-tune.c

Lines changed: 42 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -79,25 +79,31 @@ int ggml_mulmat_tune_read_data(struct ggml_mulmat_tune *tune, FILE *fp) {
7979
return rc;
8080
}
8181

82-
tune->items = malloc(sizeof(struct ggml_mulmat_tune_m) *
83-
(tune->n_shapes * tune->n_profiles * tune->m_num));
84-
if (tune->items == NULL) {
85-
fprintf(stderr, "failed to allocate memory\n");
86-
return -2;
82+
{
83+
size_t item_size = sizeof(struct ggml_mulmat_tune_m) *
84+
(tune->n_shapes * tune->n_profiles * tune->m_num);
85+
tune->items = malloc(item_size);
86+
if (tune->items == NULL) {
87+
fprintf(stderr, "failed to allocate memory\n");
88+
return -2;
89+
}
90+
memset(tune->items, 0, item_size);
8791
}
8892

89-
size_t sz = sizeof(struct ggml_task_profile) * tune->n_profiles;
90-
tune->profiles = malloc(sz);
91-
GGML_ASSERT(tune->profiles);
92-
memset(tune->profiles, 0, sz);
93+
{
94+
size_t sz = sizeof(struct ggml_task_profile) * tune->n_profiles;
95+
tune->profiles = malloc(sz);
96+
GGML_ASSERT(tune->profiles);
97+
memset(tune->profiles, 0, sz);
98+
}
9399

94100
for (int ip = 0; ip < tune->n_profiles; ip++) {
95101
struct ggml_task_profile *profile = &tune->profiles[ip];
96102
for (int j = 0; j < 3; j++) {
97103
struct ggml_task_stage *ts = &profile->stages[j];
98104
int backend, parallel, wait;
99-
rc = fscanf(fp, "%d %d %d", &backend, &parallel, &wait);
100-
if (rc <= 0) {
105+
if (rc = fscanf(fp, "%d %d %d", &backend, &parallel, &wait),
106+
rc <= 0) {
101107
return rc;
102108
}
103109
ts->backend = backend;
@@ -107,28 +113,27 @@ int ggml_mulmat_tune_read_data(struct ggml_mulmat_tune *tune, FILE *fp) {
107113
}
108114

109115
for (int i_shape = 0; i_shape < tune->n_shapes; i_shape++) {
110-
rc = fscanf(fp, "%d %d", &tune->shapes[i_shape].N,
111-
&tune->shapes[i_shape].K);
112-
if (rc <= 0) {
116+
if (rc = fscanf(fp, "%d %d", &tune->shapes[i_shape].N,
117+
&tune->shapes[i_shape].K),
118+
rc <= 0) {
113119
return rc;
114120
}
115121

116122
for (int i_m = 0; i_m < tune->m_num; i_m++) {
117123
int M;
118124
for (int ip = 0; ip < tune->n_profiles; ip++) {
119125
if (ip == 0) {
120-
rc = fscanf(fp, "%d", &M);
121-
if (rc <= 0) {
126+
if (rc = fscanf(fp, "%d", &M), rc <= 0) {
122127
return rc;
123128
}
124129
}
125130
int index =
126131
ggml_mulmat_tune_get_item_index(tune, i_shape, ip, i_m);
127132
struct ggml_mulmat_tune_m *item = &tune->items[index];
128133
item->M = M;
129-
rc = fscanf(fp, "%d %d %d", &item->stages_time[0],
130-
&item->stages_time[1], &item->stages_time[2]);
131-
if (rc <= 0) {
134+
if (rc = fscanf(fp, "%d %d %d", &item->stages_time[0],
135+
&item->stages_time[1], &item->stages_time[2]),
136+
rc <= 0) {
132137
return rc;
133138
}
134139
}
@@ -139,40 +144,38 @@ int ggml_mulmat_tune_read_data(struct ggml_mulmat_tune *tune, FILE *fp) {
139144
}
140145

141146
int ggml_mulmat_tune_write_data(const struct ggml_mulmat_tune *tune, FILE *fp) {
142-
int rc = fprintf(fp, "%d %s %d %s %d %s %d %d %d %d\n", tune->version,
147+
int rc;
148+
if (rc = fprintf(fp, "%d %s %d %s %d %s %d %d %d %d\n", tune->version,
143149
tune->model, tune->type, tune->type_name, tune->backend,
144150
tune->blas_vendor, tune->n_shapes, tune->m_step,
145-
tune->m_num, tune->n_profiles);
146-
if (rc <= 0) {
151+
tune->m_num, tune->n_profiles),
152+
rc <= 0) {
147153
return rc;
148154
}
149155

150156
for (int i = 0; i < tune->n_profiles; i++) {
151157
struct ggml_task_profile *profile = &tune->profiles[i];
152158
for (int j = 0; j < 3; j++) {
153159
struct ggml_task_stage *ts = &profile->stages[j];
154-
rc = fprintf(fp, "%2d %d %d", ts->backend,
155-
ts->parallel ? 1 : 0, ts->wait ? 1 : 0);
156-
if (rc <= 0) {
160+
if (rc = fprintf(fp, "%2d %d %d", ts->backend, ts->parallel ? 1 : 0,
161+
ts->wait ? 1 : 0),
162+
rc <= 0) {
157163
return rc;
158164
}
159165
if (j < 2) {
160-
rc = fprintf(fp, " ");
161-
if (rc <= 0) {
166+
if (rc = fprintf(fp, " "), rc <= 0) {
162167
return rc;
163168
}
164169
}
165170
}
166-
rc = fprintf(fp, "\n");
167-
if (rc <= 0) {
171+
if (rc = fprintf(fp, "\n"), rc <= 0) {
168172
return rc;
169173
}
170174
}
171175

172176
for (int i_shape = 0; i_shape < tune->n_shapes; i_shape++) {
173177
const struct ggml_mulmat_tune_nk *shape = &tune->shapes[i_shape];
174-
rc = fprintf(fp, "%d %d\n", shape->N, shape->K);
175-
if (rc <= 0) {
178+
if (rc = fprintf(fp, "%d %d\n", shape->N, shape->K), rc <= 0) {
176179
return rc;
177180
}
178181

@@ -182,29 +185,26 @@ int ggml_mulmat_tune_write_data(const struct ggml_mulmat_tune *tune, FILE *fp) {
182185
ggml_mulmat_tune_get_item_index(tune, i_shape, ip, i_m);
183186
struct ggml_mulmat_tune_m *item = &tune->items[index];
184187
if (ip == 0) {
185-
rc = fprintf(fp, "%3d", item->M);
186-
if (rc <= 0) {
188+
if (rc = fprintf(fp, "%3d", item->M), rc <= 0) {
187189
return rc;
188190
}
189191
}
190192

191193
struct ggml_task_profile *profile = &tune->profiles[ip];
192194
for (int k = 0; k < 3; k++) {
193195
if (profile->stages[k].backend != GGML_BACKEND_UNKNOWN) {
194-
rc = fprintf(fp, "%9d", item->stages_time[k]);
195-
if (rc <= 0) {
196+
if (rc = fprintf(fp, "%9d", item->stages_time[k]),
197+
rc <= 0) {
196198
return rc;
197199
}
198200
} else {
199-
rc = fprintf(fp, " 0");
200-
if (rc <= 0) {
201+
if (rc = fprintf(fp, " 0"), rc <= 0) {
201202
return rc;
202203
}
203204
}
204205
}
205206
}
206-
rc = fprintf(fp, "\n");
207-
if (rc <= 0) {
207+
if (rc = fprintf(fp, "\n"), rc <= 0) {
208208
return rc;
209209
}
210210
}
@@ -298,8 +298,8 @@ void ggml_mulmat_tune_estimate_time(
298298
if (ts->parallel) {
299299
t /= nth;
300300
}
301-
time_stats->profile_time[ip].stage_time[stage] = t;
302-
time_stats->profile_time[ip].total_time += t;
301+
time_stats->profile_time[ip].stage_time[stage] = (int)t;
302+
time_stats->profile_time[ip].total_time += (int)t;
303303
}
304304
}
305305
}
@@ -313,7 +313,7 @@ static const char *ggml_backend_names[] = {
313313

314314
const char *ggml_get_backend_name(enum ggml_backend backend) {
315315
if (backend == GGML_BACKEND_UNKNOWN) {
316-
return "";
316+
return "UNKNOWN";
317317
}
318318
return ggml_backend_names[backend];
319319
}

examples/mulmat-tune/mulmat-tune.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ extern "C" {
1313
#define GGML_MULMAT_MAX_PROFILES 4
1414

1515
struct ggml_task_stage {
16-
int backend; // enum ggml_backend
16+
/*enum ggml_backend*/ int backend;
1717
bool parallel;
1818
bool wait;
1919
};
@@ -99,7 +99,6 @@ void ggml_mulmat_init_task_profiles(/*enum ggml_backend*/ int backend);
9999
int ggml_mulmat_get_task_profiles(struct ggml_task_profile **profiles,
100100
int src0_type, int src1_type);
101101

102-
// returns enum ggml_backend
103102
/*enum ggml_backend*/ int ggml_auto_detect_backend(void);
104103

105104
const char *ggml_get_backend_name(/*enum ggml_backend*/ int backend);

0 commit comments

Comments
 (0)