Skip to content

Commit e37718b

Browse files
More GPU threads for dequantization
1 parent 173d0e6 commit e37718b

File tree

1 file changed

+6
-4
lines changed

1 file changed

+6
-4
lines changed

ggml-cuda.cu

+6-4
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ static_assert(sizeof(block_q8_0) == sizeof(float) + QK8_0, "wrong q8_0 block siz
8383
static __global__ void dequantize_block_q4_0(const void * vx, float * y) {
8484
const block_q4_0 * x = (const block_q4_0 *) vx;
8585

86-
const int i = blockIdx.x;
86+
const int i = blockIdx.x*blockDim.x + threadIdx.x;
8787

8888
const float d = x[i].d;
8989

@@ -182,7 +182,7 @@ static __global__ void dequantize_block_q5_0(const void * vx, float * y) {
182182
static __global__ void dequantize_block_q5_1(const void * vx, float * y) {
183183
const block_q5_1 * x = (const block_q5_1 *) vx;
184184

185-
const int i = blockIdx.x;
185+
const int i = blockIdx.x*blockDim.x + threadIdx.x;
186186

187187
const float d = x[i].d;
188188
const float m = x[i].m;
@@ -227,7 +227,8 @@ static __global__ void dequantize_block_q8_0(const void * vx, float * y) {
227227

228228
static void dequantize_row_q4_0_cuda(const void * vx, float * y, int k, cudaStream_t stream) {
229229
const int nb = k / QK4_0;
230-
dequantize_block_q4_0<<<nb, 1, 0, stream>>>(vx, y);
230+
GGML_ASSERT(nb % 256 == 0);
231+
dequantize_block_q4_0<<<nb/256, 256, 0, stream>>>(vx, y);
231232
}
232233

233234
static void dequantize_row_q4_1_cuda(const void * vx, float * y, int k, cudaStream_t stream) {
@@ -247,7 +248,8 @@ static void dequantize_row_q5_0_cuda(const void * vx, float * y, int k, cudaStre
247248

248249
static void dequantize_row_q5_1_cuda(const void * vx, float * y, int k, cudaStream_t stream) {
249250
const int nb = k / QK5_1;
250-
dequantize_block_q5_1<<<nb, 1, 0, stream>>>(vx, y);
251+
GGML_ASSERT(nb % 256 == 0);
252+
dequantize_block_q5_1<<<nb/256, 256, 0, stream>>>(vx, y);
251253
}
252254

253255
static void dequantize_row_q8_0_cuda(const void * vx, float * y, int k, cudaStream_t stream) {

0 commit comments

Comments
 (0)