Skip to content

Commit 22ab495

Browse files
YixinSong-esyx
authored andcommitted
fix warning in ggml.c (ggml-org#5)
Co-authored-by: syx <yixinsong@sjtu.edu.com>
1 parent 1557b81 commit 22ab495

File tree

1 file changed

+47
-58
lines changed

1 file changed

+47
-58
lines changed

ggml.c

+47-58
Original file line numberDiff line numberDiff line change
@@ -13952,7 +13952,7 @@ static void ggml_compute_forward_mul_mat_sparse_head(
1395213952

1395313953
int64_t ir010 = dr0*ith0;
1395413954
// const int64_t ir011 = MIN(ir010 + dr0, nr0);
13955-
const int64_t ir011 = ir010 + dr0;
13955+
// const int64_t ir011 = ir010 + dr0;
1395613956

1395713957
const int64_t ir110 = dr1*ith1;
1395813958
const int64_t ir111 = MIN(ir110 + dr1, nr1);
@@ -13969,13 +13969,13 @@ static void ggml_compute_forward_mul_mat_sparse_head(
1396913969
assert(ne13 % ne03 == 0);
1397013970

1397113971
// block-tiling attempt
13972-
const int64_t blck_0 = 16;
13972+
// const int64_t blck_0 = 16;
1397313973
const int64_t blck_1 = 16;
1397413974

1397513975
// attempt to reduce false-sharing (does not seem to make a difference)
13976-
float tmp[16];
13976+
// float tmp[16];
1397713977
float *ffdata = (float *)dst->src[2]->data;
13978-
int *gid = (int *)dst->src[3]->data;
13978+
// int *gid = (int *)dst->src[3]->data;
1397913979
while(true) {
1398013980
ir010 = atomic_fetch_add(params->aic, dr0);
1398113981
for (int64_t iir1 = ir110; iir1 < ir111; iir1 += blck_1) {
@@ -14210,12 +14210,12 @@ static void ggml_compute_forward_mul_mat_sparse(
1421014210
assert(ne13 % ne03 == 0);
1421114211

1421214212
// block-tiling attempt
14213-
const int64_t blck_0 = 16;
14213+
// const int64_t blck_0 = 16;
1421414214
const int64_t blck_1 = 16;
14215-
int total = 0;
14215+
// int total = 0;
1421614216

1421714217
// attempt to reduce false-sharing (does not seem to make a difference)
14218-
float tmp[16];
14218+
// float tmp[16];
1421914219
float *ffdata = (float *)dst->src[2]->data;
1422014220
int *gid = (int *)dst->src[3]->data;
1422114221
float *predictor_data = (float *)dst->src[2]->data;
@@ -14291,13 +14291,14 @@ static void ggml_compute_forward_mul_mat_sparse(
1429114291
}
1429214292

1429314293
// vz = alpha * vx + vy
14294-
static void ggml_axpy_normal_f16(const int n, const ggml_fp16_t * vx, const ggml_fp16_t * restrict vy, const void* restrict vz, ggml_fp16_t alpha) {
14294+
static void ggml_axpy_normal_f16(const int n, const ggml_fp16_t * vx, const ggml_fp16_t * restrict vy, void* restrict vz, ggml_fp16_t alpha) {
1429514295
float *res = (float *)vz;
1429614296
for (int i = 0; i < n; i++) {
1429714297
res[i] = res[i] + (GGML_FP16_TO_FP32(vx[i])*GGML_FP16_TO_FP32(alpha));
1429814298
}
14299+
(void) vy;
1429914300
}
14300-
static void ggml_axpy_avx_f16(const int n, const ggml_fp16_t * restrict vx, const ggml_fp16_t * restrict vy, void* restrict vz, ggml_fp16_t alpha) {
14301+
static void ggml_axpy_avx_f16(const int n, const ggml_fp16_t * restrict vx, const ggml_fp16_t * vy, void* vz, ggml_fp16_t alpha) {
1430114302
#if defined(__AVX2__)
1430214303
float *result = (float *)vz;
1430314304
float alpha_f32 = GGML_FP16_TO_FP32(alpha);
@@ -14316,7 +14317,7 @@ static void ggml_axpy_avx_f16(const int n, const ggml_fp16_t * restrict vx, cons
1431614317
res[i] = res[i] + (GGML_FP16_TO_FP32(vx[i])*alpha_convert);
1431714318
}
1431814319
#endif
14319-
14320+
(void)vy;
1432014321
}
1432114322
atomic_flag g_axpy_dense_lock = ATOMIC_FLAG_INIT;
1432214323
static void ggml_compute_forward_mul_mat_axpy_dense(
@@ -14329,14 +14330,14 @@ static void ggml_compute_forward_mul_mat_axpy_dense(
1432914330

1433014331
GGML_TENSOR_BINARY_OP_LOCALS;
1433114332

14332-
const int ith = params->ith;
14333+
// const int ith = params->ith;
1433314334
const int nth = params->nth;
1433414335

1433514336
const enum ggml_type type = src0->type;
1433614337

14337-
const bool src1_cont = ggml_is_contiguous(src1);
14338+
// const bool src1_cont = ggml_is_contiguous(src1);
1433814339

14339-
ggml_vec_dot_t const vec_dot = type_traits[type].vec_dot;
14340+
// ggml_vec_dot_t const vec_dot = type_traits[type].vec_dot;
1434014341
enum ggml_type const vec_dot_type = type_traits[type].vec_dot_type;
1434114342
ggml_from_float_t const from_float_to_vec_dot = type_traits[vec_dot_type].from_float;
1434214343

@@ -14356,8 +14357,8 @@ static void ggml_compute_forward_mul_mat_axpy_dense(
1435614357
GGML_ASSERT(nb2 <= nb3);
1435714358

1435814359
// broadcast factors
14359-
const int64_t r2 = ne12/ne02;
14360-
const int64_t r3 = ne13/ne03;
14360+
// const int64_t r2 = ne12/ne02;
14361+
// const int64_t r3 = ne13/ne03;
1436114362

1436214363
// nb01 >= nb00 - src0 is not transposed
1436314364
// compute by src0 rows
@@ -14387,7 +14388,7 @@ static void ggml_compute_forward_mul_mat_axpy_dense(
1438714388
}
1438814389

1438914390
ggml_fp16_t* wdata = (src1->type == vec_dot_type) ? src1->data : params->wdata;
14390-
const size_t row_size = ne10*ggml_type_size(vec_dot_type)/ggml_blck_size(vec_dot_type);
14391+
// const size_t row_size = ne10*ggml_type_size(vec_dot_type)/ggml_blck_size(vec_dot_type);
1439114392

1439214393
struct ggml_tensor *src2 = dst->src[2];
1439314394

@@ -14399,15 +14400,15 @@ static void ggml_compute_forward_mul_mat_axpy_dense(
1439914400
// const int64_t ir11 = MIN(ir10 + dr, src2->ne[0]);
1440014401

1440114402
// src1 rows
14402-
const int64_t nr1 = ne11*ne12*ne13;
14403+
// const int64_t nr1 = ne11*ne12*ne13;
1440314404
// float *idx = src2->data;
1440414405
// int *gid = (int *)(dst->src[3]->data);
1440514406
// printf("down %d up %d ne00 %d\n", ir10, ir11, ne00);
1440614407

1440714408
float vec[ne00*4];
1440814409
void *vy = vec;
1440914410
memset(vy, 0, ne00*4);
14410-
char* src0_row = (const char *) src0->data;
14411+
char* src0_row = (char *) src0->data;
1441114412
while(true) {
1441214413
const int ir0 = atomic_fetch_add(params->aic, dr);
1441314414
for (int64_t ir1 = ir0; ir1 < ir0+dr; ir1++) {
@@ -14417,7 +14418,7 @@ static void ggml_compute_forward_mul_mat_axpy_dense(
1441714418
// if (idx[ir1] < 0.0f)
1441814419
// continue;
1441914420
// ggml_axpy_normal_f16(ne00, src0_row+nb01*ir1, vy, vy, wdata[ir1]);
14420-
ggml_axpy_avx_f16(ne00, src0_row+nb01*ir1, vy, vy, wdata[ir1]);
14421+
ggml_axpy_avx_f16(ne00, (ggml_fp16_t *)(src0_row+nb01*ir1), (ggml_fp16_t *)vy, vy, wdata[ir1]);
1442114422
}
1442214423
if (ir0 + dr >= nr)
1442314424
break;
@@ -14475,9 +14476,9 @@ static void ggml_compute_forward_mul_mat_axpy(
1447514476

1447614477
const enum ggml_type type = src0->type;
1447714478

14478-
const bool src1_cont = ggml_is_contiguous(src1);
14479+
// const bool src1_cont = ggml_is_contiguous(src1);
1447914480

14480-
ggml_vec_dot_t const vec_dot = type_traits[type].vec_dot;
14481+
// ggml_vec_dot_t const vec_dot = type_traits[type].vec_dot;
1448114482
enum ggml_type const vec_dot_type = type_traits[type].vec_dot_type;
1448214483
ggml_from_float_t const from_float_to_vec_dot = type_traits[vec_dot_type].from_float;
1448314484

@@ -14497,8 +14498,8 @@ static void ggml_compute_forward_mul_mat_axpy(
1449714498
GGML_ASSERT(nb2 <= nb3);
1449814499

1449914500
// broadcast factors
14500-
const int64_t r2 = ne12/ne02;
14501-
const int64_t r3 = ne13/ne03;
14501+
// const int64_t r2 = ne12/ne02;
14502+
// const int64_t r3 = ne13/ne03;
1450214503

1450314504
// nb01 >= nb00 - src0 is not transposed
1450414505
// compute by src0 rows
@@ -14550,7 +14551,7 @@ static void ggml_compute_forward_mul_mat_axpy(
1455014551

1455114552
float vec[ne00*4];
1455214553
void *vy = vec;
14553-
char* src0_row = (const char *) src0->data;
14554+
char* src0_row = (char *) src0->data;
1455414555
ggml_fp16_t * src1_ptr = NULL;
1455514556
for (int col_idx = 0; col_idx < nr1; col_idx++) {
1455614557
src1_ptr = (ggml_fp16_t *)((char *)wdata + col_idx * row_size);
@@ -14571,7 +14572,7 @@ static void ggml_compute_forward_mul_mat_axpy(
1457114572
if (idx[ir1] < -0.0f)
1457214573
continue;
1457314574
// ggml_axpy_normal_f16(ne00, src0_row+nb01*ir1, vy, vy, wdata[ir1]);
14574-
ggml_axpy_avx_f16(ne00, src0_row+nb01*ir1, vy, vy, src1_ptr[ir1]);
14575+
ggml_axpy_avx_f16(ne00, (ggml_fp16_t *)(src0_row+nb01*ir1), (ggml_fp16_t *)vy, vy, src1_ptr[ir1]);
1457514576
}
1457614577

1457714578
// 获取锁
@@ -14625,9 +14626,9 @@ static void ggml_compute_forward_mul_mat_axpy_q4_0(
1462514626

1462614627
const enum ggml_type type = src0->type;
1462714628

14628-
const bool src1_cont = ggml_is_contiguous(src1);
14629+
// const bool src1_cont = ggml_is_contiguous(src1);
1462914630

14630-
ggml_vec_dot_t const vec_dot = type_traits[type].vec_dot;
14631+
// ggml_vec_dot_t const vec_dot = type_traits[type].vec_dot;
1463114632
enum ggml_type const vec_dot_type = type_traits[type].vec_dot_type;
1463214633
ggml_from_float_t const from_float_to_vec_dot = type_traits[vec_dot_type].from_float;
1463314634

@@ -14647,8 +14648,8 @@ static void ggml_compute_forward_mul_mat_axpy_q4_0(
1464714648
GGML_ASSERT(nb2 <= nb3);
1464814649

1464914650
// broadcast factors
14650-
const int64_t r2 = ne12/ne02;
14651-
const int64_t r3 = ne13/ne03;
14651+
// const int64_t r2 = ne12/ne02;
14652+
// const int64_t r3 = ne13/ne03;
1465214653

1465314654
// nb01 >= nb00 - src0 is not transposed
1465414655
// compute by src0 rows
@@ -14698,10 +14699,10 @@ static void ggml_compute_forward_mul_mat_axpy_q4_0(
1469814699

1469914700
float vec[ne00*4];
1470014701
void *vy = vec;
14701-
char* src0_row = (const char *) src0->data;
14702+
char* src0_row = (char *) src0->data;
1470214703
for (int col_idx = 0; col_idx < nr1; col_idx++) {
1470314704
// const block_q8_0 * restrict nerual = wdata;
14704-
const block_q8_0 *restrict nerual = ((char *)wdata + col_idx * row_size);
14705+
const block_q8_0 *restrict nerual = (block_q8_0 *)((char *)wdata + col_idx * row_size);
1470514706
idx = (float *)((char *)src2->data + col_idx * idx_row_size);
1470614707
memset(vy, 0, ne00 * 4);
1470714708
// while(true) {
@@ -14774,14 +14775,14 @@ static void ggml_compute_forward_mul_mat_axpy_head(
1477414775

1477514776
GGML_TENSOR_BINARY_OP_LOCALS;
1477614777

14777-
const int ith = params->ith;
14778-
const int nth = params->nth;
14778+
// const int ith = params->ith;
14779+
// const int nth = params->nth;
1477914780

1478014781
const enum ggml_type type = src0->type;
1478114782

14782-
const bool src1_cont = ggml_is_contiguous(src1);
14783+
// const bool src1_cont = ggml_is_contiguous(src1);
1478314784

14784-
ggml_vec_dot_t const vec_dot = type_traits[type].vec_dot;
14785+
// ggml_vec_dot_t const vec_dot = type_traits[type].vec_dot;
1478514786
enum ggml_type const vec_dot_type = type_traits[type].vec_dot_type;
1478614787
ggml_from_float_t const from_float_to_vec_dot = type_traits[vec_dot_type].from_float;
1478714788

@@ -14801,8 +14802,8 @@ static void ggml_compute_forward_mul_mat_axpy_head(
1480114802
GGML_ASSERT(nb2 <= nb3);
1480214803

1480314804
// broadcast factors
14804-
const int64_t r2 = ne12/ne02;
14805-
const int64_t r3 = ne13/ne03;
14805+
// const int64_t r2 = ne12/ne02;
14806+
// const int64_t r3 = ne13/ne03;
1480614807

1480714808
// nb01 >= nb00 - src0 is not transposed
1480814809
// compute by src0 rows
@@ -14832,7 +14833,7 @@ static void ggml_compute_forward_mul_mat_axpy_head(
1483214833
}
1483314834

1483414835
const ggml_fp16_t* wdata = (src1->type == vec_dot_type) ? src1->data : params->wdata;
14835-
const size_t row_size = ne10*ggml_type_size(vec_dot_type)/ggml_blck_size(vec_dot_type);
14836+
// const size_t row_size = ne10*ggml_type_size(vec_dot_type)/ggml_blck_size(vec_dot_type);
1483614837

1483714838
struct ggml_tensor *src2 = dst->src[2];
1483814839
int chunk = ne00 / 32;
@@ -14845,15 +14846,15 @@ static void ggml_compute_forward_mul_mat_axpy_head(
1484514846
// const int64_t ir11 = MIN(ir10 + dr, src2->ne[0]);
1484614847

1484714848
// src1 rows
14848-
const int64_t nr1 = ne11*ne12*ne13;
14849-
float *idx = src2->data;
14850-
int *gid = (int *)(dst->src[3]->data);
14849+
// const int64_t nr1 = ne11*ne12*ne13;
14850+
// float *idx = src2->data;
14851+
// int *gid = (int *)(dst->src[3]->data);
1485114852
// printf("down %d up %d ne00 %d\n", ir10, ir11, ne00);
1485214853

1485314854
float vec[ne00*4];
1485414855
void *vy = vec;
1485514856
memset(vy, 0, ne00*4);
14856-
char* src0_row = (const char *) src0->data;
14857+
char* src0_row = (char *) src0->data;
1485714858
while (true) {
1485814859
const int ir0 = atomic_fetch_add(params->aic, dr);
1485914860
// int id = ir0 >> 7;
@@ -14862,7 +14863,7 @@ static void ggml_compute_forward_mul_mat_axpy_head(
1486214863
for (int64_t ir1 = ir0; ir1 < ir0+dr; ir1++) {
1486314864
if (ir1 >= nr) break;
1486414865
// ggml_axpy_normal_f16(ne00, src0_row+nb01*ir1, vy, vy, wdata[ir1]);
14865-
ggml_axpy_avx_f16(ne00, src0_row+nb01*ir1, vy, vy, wdata[ir1]);
14866+
ggml_axpy_avx_f16(ne00, (ggml_fp16_t *)(src0_row+nb01*ir1), (ggml_fp16_t *)vy, vy, wdata[ir1]);
1486614867
}
1486714868
if (ir0 + dr >= nr)
1486814869
break;
@@ -15746,6 +15747,7 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
1574615747
GGML_ASSERT(false); // TODO: not implemented
1574715748
} break;
1574815749
case GGML_OP_MUL_MAT:
15750+
case GGML_OP_AXPY:
1574915751
{
1575015752
// https://cs231n.github.io/optimization-2/#staged
1575115753
// # forward pass
@@ -16737,20 +16739,7 @@ static void ggml_graph_compute_perf_stats_node_gpu(struct ggml_tensor * node, co
1673716739
node->perf_cycles += cycles_cur;
1673816740
node->perf_time_us += time_us_cur;
1673916741
}
16740-
void busy_wait_cycles(int cycles) {
16741-
struct timespec ts_start, ts_end;
16742-
16743-
clock_gettime(CLOCK_MONOTONIC, &ts_start);
1674416742

16745-
while (1) {
16746-
clock_gettime(CLOCK_MONOTONIC, &ts_end);
16747-
long diff_ns = (ts_end.tv_sec - ts_start.tv_sec) * 1000000000 +
16748-
(ts_end.tv_nsec - ts_start.tv_nsec);
16749-
if (diff_ns >= cycles) {
16750-
break;
16751-
}
16752-
}
16753-
}
1675416743

1675516744
static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
1675616745
int n_tasks = 0;
@@ -17164,8 +17153,8 @@ static thread_ret_t ggml_graph_compute_thread_hybrid(void * data) {
1716417153
/*.type =*/GGML_TASK_COMPUTE,
1716517154
/*.ith =*/0,
1716617155
/*.nth =*/1,
17167-
/*.wsize =*/NULL,
17168-
/*.wdata =*/NULL,
17156+
/*.wsize =*/0,
17157+
/*.wdata =*/0,
1716917158
/*.aic =*/0,
1717017159
};
1717117160

0 commit comments

Comments
 (0)