Skip to content

Commit 40846bd

Browse files
committed
Cleanup cublas comments
1 parent 5fc6799 commit 40846bd

File tree

2 files changed

+15
-16
lines changed

2 files changed

+15
-16
lines changed

Diff for: CMakeLists.txt

+3-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
cmake_minimum_required(VERSION 3.17) # Don't bump this version for no reason
1+
cmake_minimum_required(VERSION 3.12) # Don't bump this version for no reason
22
project("llama.cpp" C CXX)
33

44
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
@@ -144,6 +144,8 @@ if (LLAMA_OPENBLAS)
144144
endif()
145145

146146
if (LLAMA_CUBLAS)
147+
cmake_minimum_required(VERSION 3.17)
148+
147149
find_package(CUDAToolkit)
148150
if (CUDAToolkit_FOUND)
149151
message(STATUS "cuBLAS found")

Diff for: ggml.c

+12-15
Original file line numberDiff line numberDiff line change
@@ -172,14 +172,14 @@ static cublasHandle_t cublasH = NULL;
172172
static cudaStream_t cudaStream = NULL;
173173
static void init_cublas(void) {
174174
if (cublasH == NULL) {
175-
/* step 1: create cublas handle, bind a stream */
175+
// create cublas handle, bind a stream
176176
CUBLAS_CHECK(cublasCreate(&cublasH));
177177

178178
CUDA_CHECK(cudaStreamCreateWithFlags(&cudaStream, cudaStreamNonBlocking));
179179
CUBLAS_CHECK(cublasSetStream(cublasH, cudaStream));
180180

181181
// configure logging to stdout
182-
//CUBLAS_CHECK(cublasLoggerConfigure(1, 1, 0, NULL));
182+
// CUBLAS_CHECK(cublasLoggerConfigure(1, 1, 0, NULL));
183183
}
184184
}
185185
#endif
@@ -7336,19 +7336,19 @@ static void ggml_compute_forward_mul_mat_f32(
73367336
float * d = (float *) ((char *) dst->data + i02*nb2 + i03*nb3);
73377337

73387338
#if defined(GGML_USE_CUBLAS)
7339-
/* step 2: copy data to device */
7339+
// copy data to device
73407340
CUDA_CHECK(cudaMemcpyAsync(d_X, x, sizeof(float) * x_ne, cudaMemcpyHostToDevice, cudaStream));
73417341
CUDA_CHECK(cudaMemcpyAsync(d_Y, y, sizeof(float) * y_ne, cudaMemcpyHostToDevice, cudaStream));
73427342

7343-
/* step 3: compute */
7343+
// compute
73447344
CUBLAS_CHECK(
73457345
cublasSgemm(cublasH, CUBLAS_OP_T, CUBLAS_OP_N,
73467346
ne01, ne11, ne10,
73477347
&alpha, d_X, ne00,
73487348
d_Y, ne10,
73497349
&beta, d_D, ne01));
73507350

7351-
/* step 4: copy data to host */
7351+
// copy data to host
73527352
CUDA_CHECK(cudaMemcpyAsync(d, d_D, sizeof(float) * d_ne, cudaMemcpyDeviceToHost, cudaStream));
73537353
CUDA_CHECK(cudaStreamSynchronize(cudaStream));
73547354
#else
@@ -7362,7 +7362,6 @@ static void ggml_compute_forward_mul_mat_f32(
73627362
}
73637363
}
73647364
#if defined(GGML_USE_CUBLAS)
7365-
/* free resources */
73667365
CUDA_CHECK(cudaFree(d_X));
73677366
CUDA_CHECK(cudaFree(d_Y));
73687367
CUDA_CHECK(cudaFree(d_D));
@@ -7533,7 +7532,7 @@ static void ggml_compute_forward_mul_mat_f16_f32(
75337532
for (int64_t i03 = 0; i03 < ne03; i03++) {
75347533
for (int64_t i02 = 0; i02 < ne02; i02++) {
75357534
#if defined(GGML_USE_CUBLAS)
7536-
// with cuBlAS, instead of converting src0 to fp32, we convert src1 to fp16
7535+
// with cuBlAS, instead of converting src0 to fp32, we convert src1 to fp16
75377536
{
75387537
size_t id = 0;
75397538
for (int64_t i01 = 0; i01 < ne11; ++i01) {
@@ -7559,11 +7558,11 @@ static void ggml_compute_forward_mul_mat_f16_f32(
75597558

75607559
float * d = (float *) ((char *) dst->data + i02*nb2 + i03*nb3);
75617560

7562-
/* step 2: copy data to device */
7561+
// copy data to device
75637562
CUDA_CHECK(cudaMemcpyAsync(d_X, x, sizeof(ggml_fp16_t) * x_ne, cudaMemcpyHostToDevice, cudaStream));
75647563
CUDA_CHECK(cudaMemcpyAsync(d_Y, y, sizeof(ggml_fp16_t) * y_ne, cudaMemcpyHostToDevice, cudaStream));
75657564

7566-
/* step 3: compute */
7565+
// compute
75677566
CUBLAS_CHECK(
75687567
cublasGemmEx(cublasH, CUBLAS_OP_T, CUBLAS_OP_N,
75697568
ne01, ne11, ne10,
@@ -7573,7 +7572,7 @@ static void ggml_compute_forward_mul_mat_f16_f32(
75737572
CUBLAS_COMPUTE_32F,
75747573
CUBLAS_GEMM_DEFAULT));
75757574

7576-
/* step 4: copy data to host */
7575+
// copy data to host
75777576
CUDA_CHECK(cudaMemcpyAsync(d, d_D, sizeof(float) * d_ne, cudaMemcpyDeviceToHost, cudaStream));
75787577
CUDA_CHECK(cudaStreamSynchronize(cudaStream));
75797578
#else
@@ -7593,7 +7592,6 @@ static void ggml_compute_forward_mul_mat_f16_f32(
75937592
}
75947593

75957594
#if defined(GGML_USE_CUBLAS)
7596-
/* free resources */
75977595
CUDA_CHECK(cudaFree(d_X));
75987596
CUDA_CHECK(cudaFree(d_Y));
75997597
CUDA_CHECK(cudaFree(d_D));
@@ -7797,19 +7795,19 @@ static void ggml_compute_forward_mul_mat_q_f32(
77977795
float * d = (float *) ((char *) dst->data + i02*nb2 + i03*nb3);
77987796

77997797
#if defined(GGML_USE_CUBLAS)
7800-
/* step 2: copy data to device */
7798+
// copy data to device
78017799
CUDA_CHECK(cudaMemcpyAsync(d_X, x, sizeof(float) * x_ne, cudaMemcpyHostToDevice, cudaStream));
78027800
CUDA_CHECK(cudaMemcpyAsync(d_Y, y, sizeof(float) * y_ne, cudaMemcpyHostToDevice, cudaStream));
78037801

7804-
/* step 3: compute */
7802+
// compute
78057803
CUBLAS_CHECK(
78067804
cublasSgemm(cublasH, CUBLAS_OP_T, CUBLAS_OP_N,
78077805
ne01, ne11, ne10,
78087806
&alpha, d_X, ne00,
78097807
d_Y, ne10,
78107808
&beta, d_D, ne01));
78117809

7812-
/* step 4: copy data to host */
7810+
// copy data to host
78137811
CUDA_CHECK(cudaMemcpyAsync(d, d_D, sizeof(float) * d_ne, cudaMemcpyDeviceToHost, cudaStream));
78147812
CUDA_CHECK(cudaStreamSynchronize(cudaStream));
78157813
#else
@@ -7824,7 +7822,6 @@ static void ggml_compute_forward_mul_mat_q_f32(
78247822
}
78257823

78267824
#if defined(GGML_USE_CUBLAS)
7827-
/* free resources */
78287825
CUDA_CHECK(cudaFree(d_X));
78297826
CUDA_CHECK(cudaFree(d_Y));
78307827
CUDA_CHECK(cudaFree(d_D));

0 commit comments

Comments
 (0)