Expose type name from ggml (#970)

prusnak · unbounded · web-flow · commit c56b7152690c · 2023-04-14T20:05:37.000+02:00
Avoid duplication of type names in utils

Co-authored-by: Håkon H. Hitland &lt;haakon@likedan.net&gt;
diff --git a/examples/quantize-stats/quantize-stats.cpp b/examples/quantize-stats/quantize-stats.cpp
@@ -16,9 +16,6 @@
 #include <unordered_map>
 #include <vector>
 
-static const char * type_strs[] = { "q4_0", "q4_1", "i8", "i16", "i32", "f16", "f32"  };
-static_assert(sizeof(type_strs) == GGML_TYPE_COUNT * sizeof(char *), "Incomplete type list");
-
 struct quantize_stats_params {
     std::string model = "models/7B/ggml-model-f16.bin";
     bool verbose = false;
@@ -224,7 +221,7 @@ int main(int argc, char ** argv) {
                 break;
             }
             int j;
-            for (j = 0; j < GGML_TYPE_COUNT && strcmp(argv[i], type_strs[j]) != 0; j++) {
+            for (j = 0; j < GGML_TYPE_COUNT && strcmp(argv[i], ggml_type_name((ggml_type) i)) != 0; j++) {
                 // find match
             }
             if (j < GGML_TYPE_COUNT) {
@@ -279,7 +276,7 @@ int main(int argc, char ** argv) {
             continue;
         }
         if (params.verbose) {
-            printf("%s: type %s, size %" PRId64 "\n", kv_tensor.first.c_str(), type_strs[kv_tensor.second->type], ggml_nelements(kv_tensor.second));
+            printf("%s: type %s, size %" PRId64 "\n", kv_tensor.first.c_str(), ggml_type_name(kv_tensor.second->type), ggml_nelements(kv_tensor.second));
         }
         if (kv_tensor.second->type == GGML_TYPE_F16) {
             is_f16 = true;
@@ -304,13 +301,14 @@ int main(int argc, char ** argv) {
 
     // loop throught quantization types
     for (int i = 0; i < GGML_TYPE_COUNT; i++) {
+        const ggml_type type = (ggml_type) i;
         if (!params.include_types.empty() && std::find(params.include_types.begin(), params.include_types.end(), i) == params.include_types.end()) {
             continue;
         }
         quantize_fns_t qfns = ggml_internal_get_quantize_fn(i);
         if (qfns.quantize_row_q && qfns.dequantize_row_q) {
             if (params.verbose) {
-                printf("testing %s ...\n",  type_strs[i]);
+                printf("testing %s ...\n",  ggml_type_name(type));
             }
 
             error_stats global_stats {};
@@ -322,7 +320,7 @@ int main(int argc, char ** argv) {
                 if (params.verbose) {
                     printf("  %s ...\n",  kv_tensor.first.c_str());
                 }
-                std::string layer_name { type_strs[i] };
+                std::string layer_name { ggml_type_name(type) };
                 layer_name += "::" + kv_tensor.first;
                 test_roundtrip_on_layer(
                         layer_name,
@@ -337,7 +335,7 @@ int main(int argc, char ** argv) {
                 );
             }
 
-            print_error_stats(type_strs[i], global_stats, params.print_histogram);
+            print_error_stats(ggml_type_name(type), global_stats, params.print_histogram);
         }
     }
 
diff --git a/ggml.c b/ggml.c
@@ -2671,6 +2671,18 @@ static const size_t GGML_TYPE_SIZE[GGML_TYPE_COUNT] = {
 };
 static_assert(GGML_TYPE_COUNT == 7, "GGML_TYPE_SIZE is outdated");
 
+
+static const char * GGML_TYPE_NAME[GGML_TYPE_COUNT] = {
+    [GGML_TYPE_F32]  = "f32",
+    [GGML_TYPE_F16]  = "f16",
+    [GGML_TYPE_Q4_0] = "q4_0",
+    [GGML_TYPE_Q4_1] = "q4_1",
+    [GGML_TYPE_I8]   = "i8",
+    [GGML_TYPE_I16]  = "i16",
+    [GGML_TYPE_I32]  = "i32",
+};
+static_assert(GGML_TYPE_COUNT == 7, "GGML_TYPE_NAME is outdated");
+
 static const char * GGML_OP_LABEL[GGML_OP_COUNT] = {
     "NONE",
 
@@ -2895,6 +2907,11 @@ float ggml_type_sizef(enum ggml_type type) {
     return ((float)(GGML_TYPE_SIZE[type]))/GGML_BLCK_SIZE[type];
 }
 
+const char * ggml_type_name(enum ggml_type type) {
+    return GGML_TYPE_NAME[type];
+}
+
+
 size_t ggml_element_size(const struct ggml_tensor * tensor) {
     return GGML_TYPE_SIZE[tensor->type];
 }
diff --git a/ggml.h b/ggml.h
@@ -354,6 +354,8 @@ int    ggml_blck_size (enum ggml_type type);
 size_t ggml_type_size (enum ggml_type type); // size in bytes for all elements in a block
 float  ggml_type_sizef(enum ggml_type type); // ggml_type_size()/ggml_blck_size() as float
 
+const char * ggml_type_name(enum ggml_type type);
+
 size_t ggml_element_size(const struct ggml_tensor * tensor);
 
 struct ggml_context * ggml_init(struct ggml_init_params params);
diff --git a/llama.cpp b/llama.cpp
@@ -269,16 +269,6 @@ static std::string llama_format_tensor_shape(const std::vector<uint32_t> & ne) {
     return ret;
 }
 
-static const char * llama_format_type(enum ggml_type type) {
-    switch (type) {
-        case GGML_TYPE_F32: return "f32";
-        case GGML_TYPE_F16: return "f16";
-        case GGML_TYPE_Q4_0: return "q4_0";
-        case GGML_TYPE_Q4_1: return "q4_1";
-        default: LLAMA_ASSERT(false);
-    }
-}
-
 static size_t llama_calc_tensor_size(const std::vector<uint32_t> & ne, enum ggml_type type) {
     size_t size = ggml_type_size(type);
     for (uint32_t dim : ne) {
@@ -1582,7 +1572,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
         printf("[%zu/%zu] %36s - %s, type = %6s, ",
                ++idx, model_loader->tensors_map.tensors.size(),
                tensor.name.c_str(), llama_format_tensor_shape(tensor.ne).c_str(),
-               llama_format_type(tensor.type));
+               ggml_type_name(tensor.type));
 
         // This used to be a regex, but <regex> has an extreme cost to compile times.
         bool quantize = tensor.name.rfind("weight") == tensor.name.size() - 6; // ends with 'weight'?
@@ -1615,7 +1605,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
                     f32_data[i] = ggml_fp16_to_fp32(f16_data[i]);
                 }
             } else {
-                throw format("type %s unsupported for integer quantization", llama_format_type(tensor.type));
+                throw format("type %s unsupported for integer quantization", ggml_type_name(tensor.type));
             }
 
             printf("quantizing .. ");

Original file line number	Diff line number	Diff line change
`@@ -16,9 +16,6 @@`
`16`	`16`	`#include <unordered_map>`
`17`	`17`	`#include <vector>`
`18`	`18`
`19`		`-static const char * type_strs[] = { "q4_0", "q4_1", "i8", "i16", "i32", "f16", "f32" };`
`20`		`-static_assert(sizeof(type_strs) == GGML_TYPE_COUNT * sizeof(char *), "Incomplete type list");`
`21`		`-`
`22`	`19`	`struct quantize_stats_params {`
`23`	`20`	`std::string model = "models/7B/ggml-model-f16.bin";`
`24`	`21`	`bool verbose = false;`
`@@ -224,7 +221,7 @@ int main(int argc, char ** argv) {`
`224`	`221`	`break;`
`225`	`222`	`}`
`226`	`223`	`int j;`
`227`		`- for (j = 0; j < GGML_TYPE_COUNT && strcmp(argv[i], type_strs[j]) != 0; j++) {`
	`224`	`+ for (j = 0; j < GGML_TYPE_COUNT && strcmp(argv[i], ggml_type_name((ggml_type) i)) != 0; j++) {`
`228`	`225`	`// find match`
`229`	`226`	`}`
`230`	`227`	`if (j < GGML_TYPE_COUNT) {`
`@@ -279,7 +276,7 @@ int main(int argc, char ** argv) {`
`279`	`276`	`continue;`
`280`	`277`	`}`
`281`	`278`	`if (params.verbose) {`
`282`		`- printf("%s: type %s, size %" PRId64 "\n", kv_tensor.first.c_str(), type_strs[kv_tensor.second->type], ggml_nelements(kv_tensor.second));`
	`279`	`+ printf("%s: type %s, size %" PRId64 "\n", kv_tensor.first.c_str(), ggml_type_name(kv_tensor.second->type), ggml_nelements(kv_tensor.second));`
`283`	`280`	`}`
`284`	`281`	`if (kv_tensor.second->type == GGML_TYPE_F16) {`
`285`	`282`	`is_f16 = true;`
`@@ -304,13 +301,14 @@ int main(int argc, char ** argv) {`
`304`	`301`
`305`	`302`	`// loop throught quantization types`
`306`	`303`	`for (int i = 0; i < GGML_TYPE_COUNT; i++) {`
	`304`	`+ const ggml_type type = (ggml_type) i;`
`307`	`305`	`if (!params.include_types.empty() && std::find(params.include_types.begin(), params.include_types.end(), i) == params.include_types.end()) {`
`308`	`306`	`continue;`
`309`	`307`	`}`
`310`	`308`	`quantize_fns_t qfns = ggml_internal_get_quantize_fn(i);`
`311`	`309`	`if (qfns.quantize_row_q && qfns.dequantize_row_q) {`
`312`	`310`	`if (params.verbose) {`
`313`		`- printf("testing %s ...\n", type_strs[i]);`
	`311`	`+ printf("testing %s ...\n", ggml_type_name(type));`
`314`	`312`	`}`
`315`	`313`
`316`	`314`	`error_stats global_stats {};`
`@@ -322,7 +320,7 @@ int main(int argc, char ** argv) {`
`322`	`320`	`if (params.verbose) {`
`323`	`321`	`printf(" %s ...\n", kv_tensor.first.c_str());`
`324`	`322`	`}`
`325`		`- std::string layer_name { type_strs[i] };`
	`323`	`+ std::string layer_name { ggml_type_name(type) };`
`326`	`324`	`layer_name += "::" + kv_tensor.first;`
`327`	`325`	`test_roundtrip_on_layer(`
`328`	`326`	`layer_name,`
`@@ -337,7 +335,7 @@ int main(int argc, char ** argv) {`
`337`	`335`	`);`
`338`	`336`	`}`
`339`	`337`
`340`		`- print_error_stats(type_strs[i], global_stats, params.print_histogram);`
	`338`	`+ print_error_stats(ggml_type_name(type), global_stats, params.print_histogram);`
`341`	`339`	`}`
`342`	`340`	`}`
`343`	`341`