@@ -49,7 +49,7 @@ typedef void (*to_fp32_cuda_t)(const void * x, float * y, int k, cudaStream_t st
49
49
typedef void (*ggml_cuda_func_t )(const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
50
50
typedef void (*ggml_cuda_op_t )(
51
51
const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst, char * src0_ddq_i,
52
- float * src0_ddf_i, float * src1_ddf_i, float * dst_ddf_i, uint64_t i0_low, uint64_t i0_high, int i1, cudaStream_t & cudaStream_main);
52
+ float * src0_ddf_i, float * src1_ddf_i, float * dst_ddf_i, int64_t i0_low, int64_t i0_high, int i1, cudaStream_t & cudaStream_main);
53
53
54
54
// QK = number of values after dequantization
55
55
// QR = QK / number of values before dequantization
@@ -537,26 +537,26 @@ void ggml_cuda_host_free(void * ptr) {
537
537
}
538
538
539
539
static cudaError_t ggml_cuda_h2d_tensor_2d (
540
- void * dst, const struct ggml_tensor * src, uint64_t i3, uint64_t i2, uint64_t i1_low, uint64_t i1_high, cudaStream_t stream) {
540
+ void * dst, const struct ggml_tensor * src, int64_t i3, int64_t i2, int64_t i1_low, int64_t i1_high, cudaStream_t stream) {
541
541
542
542
char * dst_char = (char *) dst;
543
- const uint64_t ne0 = src->ne [0 ];
544
- const uint64_t nb0 = src->nb [0 ];
545
- const uint64_t nb1 = src->nb [1 ];
546
- const uint64_t nb2 = src->nb [2 ];
547
- const uint64_t nb3 = src->nb [3 ];
543
+ const int64_t ne0 = src->ne [0 ];
544
+ const int64_t nb0 = src->nb [0 ];
545
+ const int64_t nb1 = src->nb [1 ];
546
+ const int64_t nb2 = src->nb [2 ];
547
+ const int64_t nb3 = src->nb [3 ];
548
548
const enum ggml_type type = src->type ;
549
- const size_t ts = ggml_type_size (type);
550
- const size_t bs = ggml_blck_size (type);
551
- uint64_t i1_diff = i1_high - i1_low;
549
+ const int64_t ts = ggml_type_size (type);
550
+ const int64_t bs = ggml_blck_size (type);
551
+ int64_t i1_diff = i1_high - i1_low;
552
552
553
553
const void * x = (const void *) ((const char *) src->data + i1_low*nb1 + i2*nb2 + i3*nb3);
554
554
if (nb0 == ts && nb1 == ts*ne0/bs) {
555
555
return cudaMemcpyAsync (dst_char, x, i1_diff*nb1, cudaMemcpyHostToDevice, stream);
556
556
} else if (nb0 == ts) {
557
557
return cudaMemcpy2DAsync (dst_char, ts*ne0/bs, x, nb1, ts*ne0/bs, i1_diff, cudaMemcpyHostToDevice, stream);
558
558
} else {
559
- for (uint64_t i1 = 0 ; i1 < i1_diff; i1++) {
559
+ for (int64_t i1 = 0 ; i1 < i1_diff; i1++) {
560
560
const void * rx = (const void *) ((const char *) x + i1*nb1);
561
561
void * rd = (void *) (dst_char + i1*ts*ne0/bs);
562
562
// pretend the row is a matrix with cols=1
@@ -569,20 +569,20 @@ static cudaError_t ggml_cuda_h2d_tensor_2d(
569
569
570
570
inline void ggml_cuda_op_mul (
571
571
const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst, char * src0_ddq_i,
572
- float * src0_ddf_i, float * src1_ddf_i, float * dst_ddf_i, uint64_t i0_low, uint64_t i0_high, int i1,
572
+ float * src0_ddf_i, float * src1_ddf_i, float * dst_ddf_i, int64_t i0_low, int64_t i0_high, int i1,
573
573
cudaStream_t & cudaStream_main){
574
574
575
575
GGML_ASSERT (src0_ddf_i != nullptr );
576
576
GGML_ASSERT (src1_ddf_i != nullptr );
577
577
GGML_ASSERT (dst_ddf_i != nullptr );
578
578
579
- const uint64_t ne00 = src0->ne [0 ];
579
+ const int64_t ne00 = src0->ne [0 ];
580
580
581
- const uint64_t ne10 = src1->ne [0 ];
582
- const uint64_t ne11 = src1->ne [1 ];
581
+ const int64_t ne10 = src1->ne [0 ];
582
+ const int64_t ne11 = src1->ne [1 ];
583
583
584
- for (uint64_t i01 = i0_low; i01 < i0_high; i01++) {
585
- const uint64_t i11 = i1*ne11 + i01%ne11; // broadcast src1 across src0
584
+ for (int64_t i01 = i0_low; i01 < i0_high; i01++) {
585
+ const int64_t i11 = i1*ne11 + i01%ne11; // broadcast src1 across src0
586
586
587
587
float * src0_ddf_i01 = src0_ddf_i + i01*ne00;
588
588
float * src1_ddf_i01 = src1_ddf_i + i11*ne10;
@@ -599,7 +599,7 @@ inline void ggml_cuda_op_mul(
599
599
600
600
inline void ggml_cuda_op_dequantize_mul_mat_vec (
601
601
const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst, char * src0_ddq_i,
602
- float * src0_ddf_i, float * src1_ddf_i, float * dst_ddf_i, uint64_t i0_low, uint64_t i0_high, int i1,
602
+ float * src0_ddf_i, float * src1_ddf_i, float * dst_ddf_i, int64_t i0_low, int64_t i0_high, int i1,
603
603
cudaStream_t & cudaStream_main){
604
604
605
605
GGML_ASSERT (src0_ddq_i != nullptr );
@@ -642,7 +642,7 @@ inline void ggml_cuda_op_dequantize_mul_mat_vec(
642
642
643
643
inline void ggml_cuda_op_mul_mat_cublas (
644
644
const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst, char * src0_ddq_i,
645
- float * src0_ddf_i, float * src1_ddf_i, float * dst_ddf_i, uint64_t i0_low, uint64_t i0_high, int i1,
645
+ float * src0_ddf_i, float * src1_ddf_i, float * dst_ddf_i, int64_t i0_low, int64_t i0_high, int i1,
646
646
cudaStream_t & cudaStream_main){
647
647
648
648
GGML_ASSERT (src0_ddf_i != nullptr );
@@ -652,12 +652,12 @@ inline void ggml_cuda_op_mul_mat_cublas(
652
652
const float alpha = 1 .0f ;
653
653
const float beta = 0 .0f ;
654
654
655
- const uint64_t ne00 = src0->ne [0 ];
655
+ const int64_t ne00 = src0->ne [0 ];
656
656
657
- const uint64_t ne10 = src1->ne [0 ];
658
- const uint64_t ne11 = src1->ne [1 ];
657
+ const int64_t ne10 = src1->ne [0 ];
658
+ const int64_t ne11 = src1->ne [1 ];
659
659
660
- const uint64_t i0_diff = i0_high - i0_low;
660
+ const int64_t i0_diff = i0_high - i0_low;
661
661
662
662
int id;
663
663
CUDA_CHECK (cudaGetDevice (&id));
@@ -988,7 +988,7 @@ void ggml_cuda_load_data(const char * fname, struct ggml_tensor * tensor, const
988
988
continue ;
989
989
}
990
990
991
- uint64_t nrows_split = row_high - row_low;
991
+ int64_t nrows_split = row_high - row_low;
992
992
993
993
const size_t offset_split = offset + row_low*nb1;
994
994
const size_t size = ggml_nbytes_split (tensor, nrows_split);
0 commit comments