Skip to content

Commit 46d4278

Browse files
Consistently use int64_t
1 parent b0570b1 commit 46d4278

File tree

1 file changed

+24
-24
lines changed

1 file changed

+24
-24
lines changed

ggml-cuda.cu

+24-24
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ typedef void (*to_fp32_cuda_t)(const void * x, float * y, int k, cudaStream_t st
4949
typedef void (*ggml_cuda_func_t)(const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
5050
typedef void (*ggml_cuda_op_t)(
5151
const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst, char * src0_ddq_i,
52-
float * src0_ddf_i, float * src1_ddf_i, float * dst_ddf_i, uint64_t i0_low, uint64_t i0_high, int i1, cudaStream_t & cudaStream_main);
52+
float * src0_ddf_i, float * src1_ddf_i, float * dst_ddf_i, int64_t i0_low, int64_t i0_high, int i1, cudaStream_t & cudaStream_main);
5353

5454
// QK = number of values after dequantization
5555
// QR = QK / number of values before dequantization
@@ -537,26 +537,26 @@ void ggml_cuda_host_free(void * ptr) {
537537
}
538538

539539
static cudaError_t ggml_cuda_h2d_tensor_2d(
540-
void * dst, const struct ggml_tensor * src, uint64_t i3, uint64_t i2, uint64_t i1_low, uint64_t i1_high, cudaStream_t stream) {
540+
void * dst, const struct ggml_tensor * src, int64_t i3, int64_t i2, int64_t i1_low, int64_t i1_high, cudaStream_t stream) {
541541

542542
char * dst_char = (char *) dst;
543-
const uint64_t ne0 = src->ne[0];
544-
const uint64_t nb0 = src->nb[0];
545-
const uint64_t nb1 = src->nb[1];
546-
const uint64_t nb2 = src->nb[2];
547-
const uint64_t nb3 = src->nb[3];
543+
const int64_t ne0 = src->ne[0];
544+
const int64_t nb0 = src->nb[0];
545+
const int64_t nb1 = src->nb[1];
546+
const int64_t nb2 = src->nb[2];
547+
const int64_t nb3 = src->nb[3];
548548
const enum ggml_type type = src->type;
549-
const size_t ts = ggml_type_size(type);
550-
const size_t bs = ggml_blck_size(type);
551-
uint64_t i1_diff = i1_high - i1_low;
549+
const int64_t ts = ggml_type_size(type);
550+
const int64_t bs = ggml_blck_size(type);
551+
int64_t i1_diff = i1_high - i1_low;
552552

553553
const void * x = (const void *) ((const char *) src->data + i1_low*nb1 + i2*nb2 + i3*nb3);
554554
if (nb0 == ts && nb1 == ts*ne0/bs) {
555555
return cudaMemcpyAsync(dst_char, x, i1_diff*nb1, cudaMemcpyHostToDevice, stream);
556556
} else if (nb0 == ts) {
557557
return cudaMemcpy2DAsync(dst_char, ts*ne0/bs, x, nb1, ts*ne0/bs, i1_diff, cudaMemcpyHostToDevice, stream);
558558
} else {
559-
for (uint64_t i1 = 0; i1 < i1_diff; i1++) {
559+
for (int64_t i1 = 0; i1 < i1_diff; i1++) {
560560
const void * rx = (const void *) ((const char *) x + i1*nb1);
561561
void * rd = (void *) (dst_char + i1*ts*ne0/bs);
562562
// pretend the row is a matrix with cols=1
@@ -569,20 +569,20 @@ static cudaError_t ggml_cuda_h2d_tensor_2d(
569569

570570
inline void ggml_cuda_op_mul(
571571
const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst, char * src0_ddq_i,
572-
float * src0_ddf_i, float * src1_ddf_i, float * dst_ddf_i, uint64_t i0_low, uint64_t i0_high, int i1,
572+
float * src0_ddf_i, float * src1_ddf_i, float * dst_ddf_i, int64_t i0_low, int64_t i0_high, int i1,
573573
cudaStream_t & cudaStream_main){
574574

575575
GGML_ASSERT(src0_ddf_i != nullptr);
576576
GGML_ASSERT(src1_ddf_i != nullptr);
577577
GGML_ASSERT(dst_ddf_i != nullptr);
578578

579-
const uint64_t ne00 = src0->ne[0];
579+
const int64_t ne00 = src0->ne[0];
580580

581-
const uint64_t ne10 = src1->ne[0];
582-
const uint64_t ne11 = src1->ne[1];
581+
const int64_t ne10 = src1->ne[0];
582+
const int64_t ne11 = src1->ne[1];
583583

584-
for (uint64_t i01 = i0_low; i01 < i0_high; i01++) {
585-
const uint64_t i11 = i1*ne11 + i01%ne11; // broadcast src1 across src0
584+
for (int64_t i01 = i0_low; i01 < i0_high; i01++) {
585+
const int64_t i11 = i1*ne11 + i01%ne11; // broadcast src1 across src0
586586

587587
float * src0_ddf_i01 = src0_ddf_i + i01*ne00;
588588
float * src1_ddf_i01 = src1_ddf_i + i11*ne10;
@@ -599,7 +599,7 @@ inline void ggml_cuda_op_mul(
599599

600600
inline void ggml_cuda_op_dequantize_mul_mat_vec(
601601
const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst, char * src0_ddq_i,
602-
float * src0_ddf_i, float * src1_ddf_i, float * dst_ddf_i, uint64_t i0_low, uint64_t i0_high, int i1,
602+
float * src0_ddf_i, float * src1_ddf_i, float * dst_ddf_i, int64_t i0_low, int64_t i0_high, int i1,
603603
cudaStream_t & cudaStream_main){
604604

605605
GGML_ASSERT(src0_ddq_i != nullptr);
@@ -642,7 +642,7 @@ inline void ggml_cuda_op_dequantize_mul_mat_vec(
642642

643643
inline void ggml_cuda_op_mul_mat_cublas(
644644
const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst, char * src0_ddq_i,
645-
float * src0_ddf_i, float * src1_ddf_i, float * dst_ddf_i, uint64_t i0_low, uint64_t i0_high, int i1,
645+
float * src0_ddf_i, float * src1_ddf_i, float * dst_ddf_i, int64_t i0_low, int64_t i0_high, int i1,
646646
cudaStream_t & cudaStream_main){
647647

648648
GGML_ASSERT(src0_ddf_i != nullptr);
@@ -652,12 +652,12 @@ inline void ggml_cuda_op_mul_mat_cublas(
652652
const float alpha = 1.0f;
653653
const float beta = 0.0f;
654654

655-
const uint64_t ne00 = src0->ne[0];
655+
const int64_t ne00 = src0->ne[0];
656656

657-
const uint64_t ne10 = src1->ne[0];
658-
const uint64_t ne11 = src1->ne[1];
657+
const int64_t ne10 = src1->ne[0];
658+
const int64_t ne11 = src1->ne[1];
659659

660-
const uint64_t i0_diff = i0_high - i0_low;
660+
const int64_t i0_diff = i0_high - i0_low;
661661

662662
int id;
663663
CUDA_CHECK(cudaGetDevice(&id));
@@ -988,7 +988,7 @@ void ggml_cuda_load_data(const char * fname, struct ggml_tensor * tensor, const
988988
continue;
989989
}
990990

991-
uint64_t nrows_split = row_high - row_low;
991+
int64_t nrows_split = row_high - row_low;
992992

993993
const size_t offset_split = offset + row_low*nb1;
994994
const size_t size = ggml_nbytes_split(tensor, nrows_split);

0 commit comments

Comments
 (0)