Skip to content

Commit be58fd8

Browse files
slarenarthw
authored andcommitted
ggml : fix q4xx mat mul, increase ggml_aligned_malloc alignment (ggml-org#10167)
1 parent 2cbc3b3 commit be58fd8

File tree

2 files changed

+8
-6
lines changed

2 files changed

+8
-6
lines changed

ggml/src/ggml-cpu.c

+2-3
Original file line numberDiff line numberDiff line change
@@ -304,6 +304,7 @@ static const struct ggml_type_traits_cpu type_traits_cpu[GGML_TYPE_COUNT] = {
304304
.nrows = 1,
305305
},
306306
[GGML_TYPE_Q8_0] = {
307+
.from_float_to_mat = quantize_mat_q8_0,
307308
.vec_dot = ggml_vec_dot_q8_0_q8_0,
308309
.vec_dot_type = GGML_TYPE_Q8_0,
309310
#if defined (__ARM_FEATURE_MATMUL_INT8)
@@ -13692,9 +13693,7 @@ void ggml_cpu_init(void) {
1369213693
uint16_t u16;
1369313694
ggml_fp16_t fp16;
1369413695
} u = {i};
13695-
// FIXME: this table is used in conversion functions outside of compute
13696-
// current code depends on ggml_init initializing this table
13697-
float f = ggml_table_f32_f16[i] = GGML_COMPUTE_FP16_TO_FP32(u.fp16);
13696+
float f = GGML_FP16_TO_FP32(u.fp16);
1369813697
ggml_table_gelu_f16[i] = GGML_FP32_TO_FP16(ggml_gelu_f32(f));
1369913698
ggml_table_gelu_quick_f16[i] = GGML_FP32_TO_FP16(ggml_gelu_quick_f32(f));
1370013699
}

ggml/src/ggml.c

+6-3
Original file line numberDiff line numberDiff line change
@@ -220,17 +220,20 @@ void ggml_log_callback_default(enum ggml_log_level level, const char * text, voi
220220

221221

222222
void * ggml_aligned_malloc(size_t size) {
223+
const int alignment = 64;
224+
223225
#if defined(_MSC_VER) || defined(__MINGW32__)
224-
return _aligned_malloc(size, TENSOR_ALIGNMENT);
226+
return _aligned_malloc(size, alignment);
225227
#else
226228
if (size == 0) {
227229
GGML_LOG_WARN("Behavior may be unexpected when allocating 0 bytes for ggml_aligned_malloc!\n");
228230
return NULL;
229231
}
230232
void * aligned_memory = NULL;
231233
#ifdef GGML_USE_CPU_HBM
232-
int result = hbw_posix_memalign(&aligned_memory, TENSOR_ALIGNMENT, size);
234+
int result = hbw_posix_memalign(&aligned_memory, alignment, size);
233235
#elif TARGET_OS_OSX
236+
GGML_UNUSED(alignment);
234237
kern_return_t alloc_status = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t *) &aligned_memory, size, VM_FLAGS_ANYWHERE);
235238
int result = EFAULT;
236239
switch (alloc_status) {
@@ -248,7 +251,7 @@ void * ggml_aligned_malloc(size_t size) {
248251
break;
249252
}
250253
#else
251-
int result = posix_memalign(&aligned_memory, TENSOR_ALIGNMENT, size);
254+
int result = posix_memalign(&aligned_memory, alignment, size);
252255
#endif
253256
if (result != 0) {
254257
// Handle allocation failure

0 commit comments

Comments
 (0)