Skip to content

Commit c0d321f

Browse files
committed
mulmat-tune: fixed wrong result file name; decrease hist buf size;
broken change: delete original profile ggml-org#1 from q_f32 profiles
1 parent bad2202 commit c0d321f

File tree

4 files changed

+96
-81
lines changed

4 files changed

+96
-81
lines changed

examples/mulmat-tune/mulmat-tune.c

+37-40
Original file line numberDiff line numberDiff line change
@@ -19,23 +19,28 @@ static void cmd_analyze(struct ggml_mulmat_tune *tune);
1919
static void usage(char *prog) {
2020
const char *usage_lines[] = {
2121
"usage: %s [bench ...] | [analyze FILE] [-h | --help]\n",
22-
"\n",
23-
"bench [-m MODEL] [-t TYPE] [-f FILE] [-y]\n",
24-
"--model MODEL 7B | 13B | 30B | 65B\n",
25-
" default 7B\n",
26-
"--type TYPE Q4_0 | Q4_1 | Q5_0 | Q5_1 | Q8_0 | F32 | F16\n",
27-
" default Q4_0\n",
28-
"--m_num M_NUM number of M, the max M = 2^(M_NUM-1)\n",
29-
" requires: in range [8, 12]\n",
30-
" default 10\n",
31-
"--backend BACKEND backend: CUDA | CL | BLAS\n",
32-
" default: auto detect\n",
33-
"--n_pass number of passes to run\n",
34-
" default 3\n",
35-
" requires: in range [1, 5]\n",
36-
"--file FILE data file to write\n",
37-
" default stdout\n",
38-
"-y always answer \"yes\" to all prompts\n",
22+
"",
23+
"bench [-m MODEL] [-t TYPE] [-f FILE] [-y]",
24+
"--model MODEL 3B | 7B | 13B | 30B | 65B",
25+
" default 7B",
26+
"--type TYPE Q4_0 | Q4_1 | Q5_0 | Q5_1 | Q8_0 | F32 | F16",
27+
" default Q4_0",
28+
"--m_num M_NUM number of M, the max M = 2^(M_NUM-1)",
29+
" requires within [8, 12]",
30+
" default 10",
31+
"--backend BACKEND backend: CUDA | CL | BLAS",
32+
" default: auto detect",
33+
"--n_pass number of passes to run",
34+
" default 3",
35+
" requires: within [1, 5]",
36+
"--file FILE data file to write",
37+
" default stdout",
38+
"--hint enable hint",
39+
" run less bench for constant or linear stages.",
40+
" CAUTION: hint is experimental and the resulting",
41+
" data may be unreliable, enable it only",
42+
" if you know what you are doing",
43+
"-y always answer \"yes\" to all prompts",
3944
};
4045

4146
int len = (int)(sizeof(usage_lines) / sizeof(char *));
@@ -44,7 +49,7 @@ static void usage(char *prog) {
4449
if (i == 0) {
4550
fprintf(stderr, line, prog);
4651
} else {
47-
fprintf(stderr, "%s", line);
52+
fprintf(stderr, "%s\n", line);
4853
}
4954
}
5055

@@ -74,7 +79,7 @@ int main(int argc, char **argv) {
7479

7580
if (strcmp(cmd, "bench") == 0) {
7681
struct ggml_mulmat_tune tune = {
77-
.version = 1,
82+
.version = GGML_MULMAT_TUNE_VERSION,
7883
.n_shapes = 0,
7984
};
8085

@@ -124,6 +129,9 @@ int main(int argc, char **argv) {
124129
arg_file = argv[i + 1];
125130
++i;
126131
}
132+
} else if (strcmp(argv[i], "--hint") == 0) {
133+
fprintf(stderr, "The `hint` feature is not implemented\n");
134+
exit(1);
127135
} else if (strcmp(argv[i], "-y") == 0) {
128136
always_yes = true;
129137
} else {
@@ -196,7 +204,7 @@ int main(int argc, char **argv) {
196204
m_num = v;
197205
}
198206
if (m_num < 8 || m_num > 12) {
199-
fprintf(stderr, "invalid m_num: %d, expect in range [8, 12]\n",
207+
fprintf(stderr, "invalid m_num: %d, expect within [8, 12]\n",
200208
m_num);
201209
usage(argv[0]);
202210
exit(1);
@@ -209,8 +217,8 @@ int main(int argc, char **argv) {
209217
int v = atoi(arg_n_pass);
210218
n_pass = v;
211219
}
212-
if (n_pass < 1 || n_pass > MAX_NUM_PASS) {
213-
fprintf(stderr, "invalid n_pass: %d, expect in range [1, 5]\n",
220+
if (n_pass < 1 || n_pass > GGML_MULMAT_MAX_PASS) {
221+
fprintf(stderr, "invalid n_pass: %d, expect within [1, 5]\n",
214222
n_pass);
215223
usage(argv[0]);
216224
exit(1);
@@ -350,7 +358,7 @@ int main(int argc, char **argv) {
350358

351359
void cmd_bench(struct ggml_mulmat_tune *tune, int n_pass, bool verbose) {
352360
size_t wsize = 0;
353-
void *q_buf = NULL;
361+
char hist[64]; // TODO: make sure this size is safe.
354362
void *wdata = NULL;
355363

356364
// alloc q_buf and wdata with max size.
@@ -364,16 +372,6 @@ void cmd_bench(struct ggml_mulmat_tune *tune, int n_pass, bool verbose) {
364372
}
365373
GGML_ASSERT(max_NxK > 0);
366374

367-
// NOTE: proximate.
368-
size_t q_buf_size = max_NxK * sizeof(int64_t);
369-
370-
q_buf = malloc(q_buf_size);
371-
if (!q_buf) {
372-
fprintf(stderr,
373-
"failed to allocate memory for q_buf, size: %zu MiB\n",
374-
q_buf_size / 1024 / 1024);
375-
exit(1);
376-
}
377375
wsize = max_NxK * sizeof(float);
378376
wdata = malloc(wsize);
379377
if (!wdata) {
@@ -449,23 +447,23 @@ void cmd_bench(struct ggml_mulmat_tune *tune, int n_pass, bool verbose) {
449447
switch (shape->src0_type) {
450448
case GGML_TYPE_Q4_0:
451449
ggml_quantize_q4_0((const float *)src0_f32->data,
452-
src0->data, N * K, K, (int64_t *)q_buf);
450+
src0->data, N * K, K, (void *)hist);
453451
break;
454452
case GGML_TYPE_Q4_1:
455453
ggml_quantize_q4_1((const float *)src0_f32->data,
456-
src0->data, N * K, K, (int64_t *)q_buf);
454+
src0->data, N * K, K, (void *)hist);
457455
break;
458456
case GGML_TYPE_Q5_0:
459457
ggml_quantize_q5_0((const float *)src0_f32->data,
460-
src0->data, N * K, K, (int64_t *)q_buf);
458+
src0->data, N * K, K, (void *)hist);
461459
break;
462460
case GGML_TYPE_Q5_1:
463461
ggml_quantize_q5_1((const float *)src0_f32->data,
464-
src0->data, N * K, K, (int64_t *)q_buf);
462+
src0->data, N * K, K, (void *)hist);
465463
break;
466464
case GGML_TYPE_Q8_0:
467465
ggml_quantize_q8_0((const float *)src0_f32->data,
468-
src0->data, N * K, K, (int64_t *)q_buf);
466+
src0->data, N * K, K, (void *)hist);
469467
break;
470468
default:
471469
GGML_ASSERT(false);
@@ -492,7 +490,7 @@ void cmd_bench(struct ggml_mulmat_tune *tune, int n_pass, bool verbose) {
492490
// without memset, the first run may be significant slow.
493491
memset(wdata, 0, wsize);
494492

495-
int stage_time[MAX_NUM_PASS];
493+
int stage_time[GGML_MULMAT_MAX_PASS];
496494
for (int i_bench = 0; i_bench < n_pass; i_bench++) {
497495
int t0 = (int)ggml_time_us();
498496

@@ -529,7 +527,6 @@ void cmd_bench(struct ggml_mulmat_tune *tune, int n_pass, bool verbose) {
529527
}
530528

531529
free(wdata);
532-
free(q_buf);
533530
}
534531

535532
static void print_build_tips(void) {

ggml-tune.c

+42-25
Original file line numberDiff line numberDiff line change
@@ -116,26 +116,39 @@ int ggml_mulmat_tune_validate(struct ggml_mulmat_tune *tune, const char *model,
116116
const char *backend_vendor = ggml_get_backend_vendor();
117117

118118
int rc = 0;
119+
char err_buf[1024];
119120

120-
if (strcmp(model, tune->model) != 0) {
121+
if (tune->version != GGML_MULMAT_TUNE_VERSION) {
122+
snprintf(
123+
err_buf, sizeof(err_buf),
124+
"version mismatch, please re-run bench. current: %d, incoming: %d",
125+
GGML_MULMAT_TUNE_VERSION, tune->version);
121126
rc = -1;
122-
} else if (type != tune->type) {
127+
} else if (strcmp(model, tune->model) != 0) {
128+
snprintf(err_buf, sizeof(err_buf),
129+
"model mismatch. current: %s, incoming: %s", model,
130+
tune->model);
123131
rc = -2;
124-
} else if ((int)backend != tune->backend) {
132+
} else if (type != tune->type) {
133+
snprintf(err_buf, sizeof(err_buf),
134+
"type mismatch. current: %d, incoming: %d\n", type,
135+
tune->type);
125136
rc = -3;
137+
} else if ((int)backend != tune->backend) {
138+
snprintf(err_buf, sizeof(err_buf),
139+
"backend mismatch. current: %d, incoming: %d\n", backend,
140+
tune->backend);
141+
rc = -4;
126142
} else if (backend_vendor == NULL ||
127143
strcmp(backend_vendor, tune->backend_vendor) != 0) {
128-
rc = -4;
129-
} else {
130-
// TODO
144+
rc = -5;
145+
snprintf(err_buf, sizeof(err_buf),
146+
"backend vendor mismatch. current: %s, incoming: %s",
147+
backend_vendor, tune->backend_vendor);
131148
}
132149

133150
if (rc != 0) {
134-
printf("model: %s, tune model: %s\n", model, tune->model);
135-
printf("type: %d, tune type: %d\n", type, tune->type);
136-
printf("backend: %d, tune backend: %d\n", backend, tune->backend);
137-
printf("backend vendor: %s, tune backend vendor: %s\n", backend_vendor,
138-
tune->backend_vendor);
151+
fprintf(stderr, "mulmat-tune: error: %s\n", err_buf);
139152
}
140153

141154
return rc;
@@ -572,21 +585,25 @@ void ggml_mulmat_init_task_profiles(enum ggml_backend backend) {
572585

573586
p[1].stages[1].backend = backend;
574587
p[1].stages[1].wait = true;
575-
576588
} else if (backend == GGML_BACKEND_BLAS) {
577-
ggml_mulmat_task_profiles_qxx_n = 3;
578-
579-
p[0].stages[0].backend = GGML_BACKEND_CPU;
580-
p[0].stages[1].backend = GGML_BACKEND_CPU;
581-
p[0].stages[1].parallel = true;
582-
583-
p[1].stages[1].backend = backend;
584-
p[1].stages[1].wait = true;
585-
586-
p[2].stages[0].backend = GGML_BACKEND_CPU;
587-
p[2].stages[0].parallel = true;
588-
p[2].stages[1].backend = backend;
589-
p[2].stages[1].wait = true;
589+
int i = 0;
590+
p[i].stages[0].backend = GGML_BACKEND_CPU;
591+
p[i].stages[1].backend = GGML_BACKEND_CPU;
592+
p[i].stages[1].parallel = true;
593+
++i;
594+
595+
// p[i].stages[1].backend = backend;
596+
// p[i].stages[1].wait = true;
597+
// ++i;
598+
599+
p[i].stages[0].backend = GGML_BACKEND_CPU;
600+
p[i].stages[0].parallel = true;
601+
// p[i].stages[1].tune_hint = GGML_TUNE_HINT_CONSTANT;
602+
p[i].stages[1].backend = backend;
603+
p[i].stages[1].wait = true;
604+
++i;
605+
606+
ggml_mulmat_task_profiles_qxx_n = i;
590607
} else {
591608
fprintf(stderr, "invalid backend: %d\n", backend);
592609
GGML_ASSERT(false);

ggml-tune.h

+11-2
Original file line numberDiff line numberDiff line change
@@ -10,15 +10,24 @@
1010
extern "C" {
1111
#endif
1212

13-
#define MAX_NUM_PASS 5
14-
13+
#define GGML_MULMAT_TUNE_VERSION 2
1514
#define GGML_MULMAT_N_SHAPES 6
15+
16+
#define GGML_MULMAT_MAX_PASS 5
1617
#define GGML_MULMAT_MAX_PROFILES 8
1718

19+
enum ggml_mulmat_tune_hint {
20+
GGML_TUNE_HINT_UNKNOWN = 0,
21+
GGML_TUNE_HINT_CONSTANT = 1,
22+
};
23+
1824
struct ggml_task_stage {
1925
enum ggml_backend backend;
2026
bool parallel;
2127
bool wait;
28+
29+
// TODO: experimental, may be moved to other place.
30+
// enum ggml_mulmat_tune_hint tune_hint;
2231
};
2332

2433
struct ggml_task_profile {

llama.cpp

+6-14
Original file line numberDiff line numberDiff line change
@@ -2330,7 +2330,7 @@ struct llama_context * llama_init_from_file(
23302330

23312331
ctx->mm_tune = (struct ggml_mulmat_tune *)malloc(sizeof(struct ggml_mulmat_tune));
23322332
if (ctx->mm_tune == nullptr) {
2333-
fprintf(stderr, "\nERROR: failed to allocate memory for struct ggml_mulmat_tune\n");
2333+
fprintf(stderr, "ERROR: failed to allocate memory for struct ggml_mulmat_tune\n");
23342334
return nullptr;
23352335
}
23362336

@@ -2343,13 +2343,13 @@ struct llama_context * llama_init_from_file(
23432343
char buf[128];
23442344
GGML_ASSERT(strlen(env_dir) < sizeof(buf) - 10);
23452345
// TODO: take care the path separator for Windows.
2346-
snprintf(buf, sizeof(buf), "%s/%s.%s", env_dir, model_name, type_name);
2346+
snprintf(buf, sizeof(buf), "%s/%s.%s.txt", env_dir, model_name, type_name);
23472347
file = buf;
23482348
}
23492349

23502350
FILE *fp = fopen(file, "r");
23512351
if (!fp) {
2352-
fprintf(stderr, "\nWARN: mulmat-tune: failed to open file %s, ignore.\n", file);
2352+
fprintf(stderr, "mulmat-tune: failed to open file %s, ignore.\n", file);
23532353
free(ctx->mm_tune);
23542354
ctx->mm_tune = NULL;
23552355
} else {
@@ -2358,24 +2358,16 @@ struct llama_context * llama_init_from_file(
23582358

23592359
if (rc != 0) {
23602360
free(ctx->mm_tune);
2361-
fprintf(stderr, "\nERROR: mulmat-tune: failed to load file %s, error code: %d\n", file, rc);
2361+
fprintf(stderr, "mulmat-tune: failed to load file %s, error code: %d\n", file, rc);
23622362
return nullptr;
23632363
}
23642364

2365-
fprintf(stderr, "\nINFO: mulmat-tune: loaded file %s\n", file);
2365+
fprintf(stderr, "mulmat-tune: loaded file %s\n", file);
23662366

23672367
rc = ggml_mulmat_tune_validate(ctx->mm_tune, model_name, type);
23682368
if (rc != 0) {
23692369
free(ctx->mm_tune);
2370-
const char *err = "unknown";
2371-
switch (rc) {
2372-
case -1: err = "model mismatch"; break;
2373-
case -2: err = "type mismatch"; break;
2374-
case -3: err = "backend mismatch"; break;
2375-
case -4: err = "backend vendor mismatch"; break;
2376-
}
2377-
fprintf(stderr, "\nERROR: mulmat-tune: failed to validate file %s: %s\n", file, err);
2378-
return nullptr;
2370+
exit(1);
23792371
}
23802372
}
23812373
}

0 commit comments

Comments
 (0)