Skip to content

Commit 096f0b0

Browse files
committed
revert unnecessary hipblas conditionals
1 parent d81e81a commit 096f0b0

File tree

9 files changed

+36
-36
lines changed

9 files changed

+36
-36
lines changed

examples/common.cpp

+4-4
Original file line numberDiff line numberDiff line change
@@ -304,7 +304,7 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
304304
invalid_param = true;
305305
break;
306306
}
307-
#if defined GGML_USE_CUBLAS || defined GGML_USE_HIPBLAS
307+
#ifdef GGML_USE_CUBLAS
308308
params.main_gpu = std::stoi(argv[i]);
309309
#else
310310
fprintf(stderr, "warning: llama.cpp was compiled without cuBLAS. It is not possible to set a main GPU.\n");
@@ -314,7 +314,7 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
314314
invalid_param = true;
315315
break;
316316
}
317-
#if defined GGML_USE_CUBLAS || defined GGML_USE_HIPBLAS
317+
#ifdef GGML_USE_CUBLAS
318318
std::string arg_next = argv[i];
319319

320320
// split string by , and /
@@ -334,7 +334,7 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
334334
fprintf(stderr, "warning: llama.cpp was compiled without cuBLAS. It is not possible to set a tensor split.\n");
335335
#endif // GGML_USE_CUBLAS
336336
} else if (arg == "--low-vram" || arg == "-lv") {
337-
#if defined GGML_USE_CUBLAS || defined GGML_USE_HIPBLAS
337+
#ifdef GGML_USE_CUBLAS
338338
params.low_vram = true;
339339
#else
340340
fprintf(stderr, "warning: llama.cpp was compiled without cuBLAS. It is not possible to set lower vram usage.\n");
@@ -414,7 +414,7 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
414414
exit(1);
415415
}
416416

417-
#if defined GGML_USE_CUBLAS || defined GGML_USE_HIPBLAS
417+
#ifdef GGML_USE_CUBLAS
418418
if (!params.lora_adapter.empty() && params.n_gpu_layers > 0) {
419419
fprintf(stderr, "%s: error: the simultaneous use of LoRAs and GPU acceleration is not supported", __func__);
420420
exit(1);

examples/server/server.cpp

+3-3
Original file line numberDiff line numberDiff line change
@@ -565,7 +565,7 @@ static void server_params_parse(int argc, char ** argv, server_params & sparams,
565565
invalid_param = true;
566566
break;
567567
}
568-
#if defined GGML_USE_CUBLAS || defined GGML_USE_HIPBLAS
568+
#ifdef GGML_USE_CUBLAS
569569
std::string arg_next = argv[i];
570570

571571
// split string by , and /
@@ -588,7 +588,7 @@ static void server_params_parse(int argc, char ** argv, server_params & sparams,
588588
}
589589
else if (arg == "--low-vram" || arg == "-lv")
590590
{
591-
#if defined GGML_USE_CUBLAS || defined GGML_USE_HIPBLAS
591+
#ifdef GGML_USE_CUBLAS
592592
params.low_vram = true;
593593
#else
594594
fprintf(stderr, "warning: llama.cpp was compiled without cuBLAS. It is not possible to set lower vram usage.\n");
@@ -599,7 +599,7 @@ static void server_params_parse(int argc, char ** argv, server_params & sparams,
599599
invalid_param = true;
600600
break;
601601
}
602-
#if defined GGML_USE_CUBLAS || defined GGML_USE_HIPBLAS
602+
#ifdef GGML_USE_CUBLAS
603603
params.main_gpu = std::stoi(argv[i]);
604604
#else
605605
LOG_WARNING("llama.cpp was compiled without cuBLAS. It is not possible to set a main GPU.", {});

ggml.c

+5-5
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,7 @@ inline static void* ggml_aligned_malloc(size_t size) {
163163
#elif defined(GGML_USE_OPENBLAS)
164164
#include <cblas.h>
165165
#endif
166-
#if defined(GGML_USE_CUBLAS) || defined(GGML_USE_HIPBLAS)
166+
#if defined(GGML_USE_CUBLAS)
167167
#include "ggml-cuda.h"
168168
#endif
169169
#if defined(GGML_USE_CLBLAST)
@@ -4119,7 +4119,7 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
41194119
GGML_PRINT_DEBUG("%s: g_state initialized in %f ms\n", __func__, (t_end - t_start)/1000.0f);
41204120
}
41214121

4122-
#if defined(GGML_USE_CUBLAS) || defined(GGML_USE_HIPBLAS)
4122+
#if defined(GGML_USE_CUBLAS)
41234123
ggml_init_cublas();
41244124
#elif defined(GGML_USE_CLBLAST)
41254125
ggml_cl_init();
@@ -14908,7 +14908,7 @@ static void ggml_compute_forward_cross_entropy_loss_back(
1490814908
static void ggml_compute_forward(struct ggml_compute_params * params, struct ggml_tensor * tensor) {
1490914909
GGML_ASSERT(params);
1491014910

14911-
#if defined GGML_USE_CUBLAS || defined GGML_USE_HIPBLAS
14911+
#ifdef GGML_USE_CUBLAS
1491214912
bool skip_cpu = ggml_cuda_compute_forward(params, tensor);
1491314913
if (skip_cpu) {
1491414914
return;
@@ -16395,7 +16395,7 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph)
1639516395

1639616396
size_t cur = 0;
1639716397

16398-
#if defined(GGML_USE_CUBLAS) || defined(GGML_USE_HIPBLAS)
16398+
#if defined(GGML_USE_CUBLAS)
1639916399
if (ggml_cuda_can_mul_mat(node->src0, node->src1, node)) {
1640016400
node->n_tasks = 1; // TODO: this actually is doing nothing
1640116401
// the threads are still spinning
@@ -18696,7 +18696,7 @@ int ggml_cpu_has_wasm_simd(void) {
1869618696
}
1869718697

1869818698
int ggml_cpu_has_blas(void) {
18699-
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) || defined(GGML_USE_CUBLAS) || defined(GGML_USE_HIPBLAS) || defined(GGML_USE_CLBLAST)
18699+
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) || defined(GGML_USE_CUBLAS) || defined(GGML_USE_CLBLAST)
1870018700
return 1;
1870118701
#else
1870218702
return 0;

llama-util.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -441,7 +441,7 @@ struct llama_buffer {
441441
llama_buffer& operator=(llama_buffer&&) = delete;
442442
};
443443

444-
#if defined GGML_USE_CUBLAS || defined GGML_USE_HIPBLAS
444+
#ifdef GGML_USE_CUBLAS
445445
#include "ggml-cuda.h"
446446
struct llama_ctx_buffer {
447447
uint8_t * addr = NULL;

llama.cpp

+11-11
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
#include "llama.h"
1111

1212
#include "ggml.h"
13-
#if defined GGML_USE_CUBLAS || defined GGML_USE_HIPBLAS
13+
#ifdef GGML_USE_CUBLAS
1414
#include "ggml-cuda.h"
1515
#elif defined(GGML_USE_CLBLAST)
1616
#include "ggml-opencl.h"
@@ -175,7 +175,7 @@ struct llama_kv_cache {
175175
ggml_free(ctx);
176176
}
177177

178-
#if defined GGML_USE_CUBLAS || defined GGML_USE_HIPBLAS
178+
#ifdef GGML_USE_CUBLAS
179179
ggml_cuda_free_data(k);
180180
ggml_cuda_free_data(v);
181181
#endif // GGML_USE_CUBLAS
@@ -234,7 +234,7 @@ struct llama_model {
234234
ggml_free(ctx);
235235
}
236236

237-
#if defined GGML_USE_CUBLAS || defined GGML_USE_HIPBLAS
237+
#ifdef GGML_USE_CUBLAS
238238
for (size_t i = 0; i < tensors_by_name.size(); ++i) {
239239
ggml_cuda_free_data(tensors_by_name[i].second);
240240
}
@@ -800,7 +800,7 @@ struct llama_model_loader {
800800
lmlock->grow_to(lock_size);
801801
}
802802
break;
803-
#if defined(GGML_USE_CUBLAS) || defined(GGML_USE_HIPBLAS)
803+
#if defined(GGML_USE_CUBLAS)
804804
case GGML_BACKEND_GPU:
805805
case GGML_BACKEND_GPU_SPLIT:
806806
ggml_cuda_transform_tensor(lt.data, lt.ggml_tensor);
@@ -920,7 +920,7 @@ static bool kv_cache_init(
920920
ggml_set_name(cache.v, "cache_v");
921921

922922
(void) n_gpu_layers;
923-
#if defined GGML_USE_CUBLAS || defined GGML_USE_HIPBLAS
923+
#ifdef GGML_USE_CUBLAS
924924
if (n_gpu_layers > n_layer + 1) {
925925
ggml_cuda_assign_buffers_no_scratch(cache.v);
926926
}
@@ -1150,7 +1150,7 @@ static void llama_model_load_internal(
11501150
}
11511151

11521152
(void) main_gpu;
1153-
#if defined(GGML_USE_CUBLAS) || defined(GGML_USE_HIPBLAS)
1153+
#if defined(GGML_USE_CUBLAS)
11541154
fprintf(stderr, "%s: using CUDA for GPU acceleration\n", __func__);
11551155
ggml_cuda_set_main_device(main_gpu);
11561156
#define LLAMA_BACKEND_OFFLOAD GGML_BACKEND_GPU
@@ -1261,7 +1261,7 @@ static void llama_model_load_internal(
12611261

12621262
(void) vram_scratch;
12631263
(void) n_batch;
1264-
#if defined GGML_USE_CUBLAS || defined GGML_USE_HIPBLAS
1264+
#ifdef GGML_USE_CUBLAS
12651265
if (low_vram) {
12661266
fprintf(stderr, "%s: not allocating a VRAM scratch buffer due to low VRAM option\n", __func__);
12671267
ggml_cuda_set_scratch_size(0); // disable scratch
@@ -1274,7 +1274,7 @@ static void llama_model_load_internal(
12741274
}
12751275
}
12761276
#endif // GGML_USE_CUBLAS
1277-
#if defined(GGML_USE_CUBLAS) || defined(GGML_USE_HIPBLAS) || defined(GGML_USE_CLBLAST)
1277+
#if defined(GGML_USE_CUBLAS) || defined(GGML_USE_CLBLAST)
12781278
const int n_gpu = std::min(n_gpu_layers, int(hparams.n_layer));
12791279

12801280
fprintf(stderr, "%s: offloading %d repeating layers to GPU\n", __func__, n_gpu);
@@ -1314,7 +1314,7 @@ static void llama_model_load_internal(
13141314
}
13151315

13161316
(void) tensor_split;
1317-
#if defined(GGML_USE_CUBLAS) || defined(GGML_USE_HIPBLAS)
1317+
#if defined(GGML_USE_CUBLAS)
13181318
{
13191319
ggml_cuda_set_tensor_split(tensor_split);
13201320
}
@@ -1435,7 +1435,7 @@ static bool llama_eval_internal(
14351435
offload_func_t offload_func_kq = llama_nop;
14361436
offload_func_t offload_func_v = llama_nop;
14371437

1438-
#if defined GGML_USE_CUBLAS || defined GGML_USE_HIPBLAS
1438+
#ifdef GGML_USE_CUBLAS
14391439
if (n_gpu_layers > n_layer) {
14401440
offload_func_nr = ggml_cuda_assign_buffers;
14411441
}
@@ -1450,7 +1450,7 @@ static bool llama_eval_internal(
14501450
for (int il = 0; il < n_layer; ++il) {
14511451
offload_func_t offload_func = llama_nop;
14521452

1453-
#if defined GGML_USE_CUBLAS || defined GGML_USE_HIPBLAS
1453+
#ifdef GGML_USE_CUBLAS
14541454
if (il >= i_gpu_start) {
14551455
offload_func = ggml_cuda_assign_buffers;
14561456
}

llama.h

+2-2
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
#define LLAMA_H
33

44
#include "ggml.h"
5-
#if defined GGML_USE_CUBLAS || defined GGML_USE_HIPBLAS
5+
#ifdef GGML_USE_CUBLAS
66
#include "ggml-cuda.h"
77
#define LLAMA_MAX_DEVICES GGML_CUDA_MAX_DEVICES
88
#else
@@ -46,7 +46,7 @@
4646
#define LLAMA_SESSION_MAGIC LLAMA_FILE_MAGIC_GGSN
4747
#define LLAMA_SESSION_VERSION 1
4848

49-
#if defined(GGML_USE_CUBLAS) || defined(GGML_USE_HIPBLAS) || defined(GGML_USE_CLBLAST) || defined(GGML_USE_METAL)
49+
#if defined(GGML_USE_CUBLAS) || defined(GGML_USE_CLBLAST) || defined(GGML_USE_METAL)
5050
// Defined when llama.cpp is compiled with support for offloading model layers to GPU.
5151
#define LLAMA_SUPPORTS_GPU_OFFLOAD
5252
#endif

otherarch/ggml_v2.c

+7-7
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,7 @@ inline static void* ggml_v2_aligned_malloc(size_t size) {
140140
#elif defined(GGML_USE_OPENBLAS)
141141
#include <cblas.h>
142142
#endif
143-
#if defined(GGML_USE_CUBLAS) || defined(GGML_USE_HIPBLAS)
143+
#if defined(GGML_USE_CUBLAS)
144144
#include "ggml_v2-cuda.h"
145145
#endif
146146
#if defined(GGML_USE_CLBLAST)
@@ -3897,7 +3897,7 @@ struct ggml_v2_context * ggml_v2_init(struct ggml_v2_init_params params) {
38973897
GGML_V2_PRINT_DEBUG("%s: g_state initialized in %f ms\n", __func__, (t_end - t_start)/1000.0f);
38983898
}
38993899

3900-
#if defined(GGML_USE_CUBLAS) || defined(GGML_USE_HIPBLAS)
3900+
#if defined(GGML_USE_CUBLAS)
39013901
ggml_v2_init_cublas();
39023902
#elif defined(GGML_USE_CLBLAST)
39033903
if(quants_unshuffled)
@@ -9451,7 +9451,7 @@ static void ggml_v2_compute_forward_mul_mat_f32(
94519451
// nb01 >= nb00 - src0 is not transposed
94529452
// compute by src0 rows
94539453

9454-
#if defined(GGML_USE_CUBLAS) || defined(GGML_USE_HIPBLAS)
9454+
#if defined(GGML_USE_CUBLAS)
94559455
if (ggml_v2_cuda_can_mul_mat(src0, src1, dst)) {
94569456
if (params->ith == 0 && params->type == GGML_V2_TASK_COMPUTE) {
94579457
ggml_v2_cuda_mul_mat(src0, src1, dst, params->wdata, params->wsize);
@@ -9645,7 +9645,7 @@ static void ggml_v2_compute_forward_mul_mat_f16_f32(
96459645
// nb01 >= nb00 - src0 is not transposed
96469646
// compute by src0 rows
96479647

9648-
#if defined(GGML_USE_CUBLAS) || defined(GGML_USE_HIPBLAS)
9648+
#if defined(GGML_USE_CUBLAS)
96499649
if (ggml_v2_cuda_can_mul_mat(src0, src1, dst)) {
96509650
if (params->ith == 0 && params->type == GGML_V2_TASK_COMPUTE) {
96519651
ggml_v2_cuda_mul_mat(src0, src1, dst, params->wdata, params->wsize);
@@ -9884,7 +9884,7 @@ static void ggml_v2_compute_forward_mul_mat_q_f32(
98849884
// nb01 >= nb00 - src0 is not transposed
98859885
// compute by src0 rows
98869886

9887-
#if defined(GGML_USE_CUBLAS) || defined(GGML_USE_HIPBLAS)
9887+
#if defined(GGML_USE_CUBLAS)
98889888
if (ggml_v2_cuda_can_mul_mat(src0, src1, dst)) {
98899889
if (params->ith == 0 && params->type == GGML_V2_TASK_COMPUTE) {
98909890
ggml_v2_cuda_mul_mat(src0, src1, dst, params->wdata, params->wsize);
@@ -14064,7 +14064,7 @@ void ggml_v2_graph_compute(struct ggml_v2_context * ctx, struct ggml_v2_cgraph *
1406414064

1406514065
size_t cur = 0;
1406614066

14067-
#if defined(GGML_USE_CUBLAS) || defined(GGML_USE_HIPBLAS)
14067+
#if defined(GGML_USE_CUBLAS)
1406814068
if (ggml_v2_cuda_can_mul_mat(node->src0, node->src1, node)) {
1406914069
node->n_tasks = 1; // TODO: this actually is doing nothing
1407014070
// the threads are still spinning
@@ -15562,7 +15562,7 @@ int ggml_v2_cpu_has_wasm_simd(void) {
1556215562
}
1556315563

1556415564
int ggml_v2_cpu_has_blas(void) {
15565-
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) || defined(GGML_USE_CUBLAS) || defined(GGML_USE_HIPBLAS) || defined(GGML_USE_CLBLAST)
15565+
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) || defined(GGML_USE_CUBLAS) || defined(GGML_USE_CLBLAST)
1556615566
return 1;
1556715567
#else
1556815568
return 0;

otherarch/llama_v2-util.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -415,7 +415,7 @@ struct llama_v2_buffer {
415415
llama_v2_buffer& operator=(llama_v2_buffer&&) = delete;
416416
};
417417

418-
#if defined GGML_USE_CUBLAS || defined GGML_USE_HIPBLAS
418+
#ifdef GGML_USE_CUBLAS
419419
#include "ggml_v2-cuda.h"
420420
struct llama_v2_ctx_buffer {
421421
uint8_t * addr = NULL;

otherarch/llama_v2.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
#include "llama_v2.h"
1010

1111
#include "ggml_v2.h"
12-
#if defined GGML_USE_CUBLAS || defined GGML_USE_HIPBLAS
12+
#ifdef GGML_USE_CUBLAS
1313
#include "ggml_v2-cuda.h"
1414
#elif defined(GGML_USE_CLBLAST)
1515
#include "ggml_v2-opencl.h"
@@ -3088,4 +3088,4 @@ std::vector<llama_token> llama_v2_tokenize(struct llama_v2_context * ctx, const
30883088
res.resize(n);
30893089

30903090
return res;
3091-
}
3091+
}

0 commit comments

Comments
 (0)