Skip to content

Commit f8e3fc6

Browse files
committed
rocblas init stuff
1 parent d2ade63 commit f8e3fc6

File tree

1 file changed

+10
-3
lines changed

1 file changed

+10
-3
lines changed

ggml-cuda.cu

+10-3
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,10 @@
1010
#include <hip/hip_runtime.h>
1111
#include <hipblas/hipblas.h>
1212
#include <hip/hip_fp16.h>
13+
#ifdef __HIP_PLATFORM_AMD__
14+
// for rocblas_initialize()
1315
#include "rocblas/rocblas.h"
16+
#endif
1417
#define CUBLAS_COMPUTE_32F HIPBLAS_R_32F
1518
#define CUBLAS_COMPUTE_32F_FAST_16F HIPBLAS_R_32F
1619
#define CUBLAS_GEMM_DEFAULT HIPBLAS_GEMM_DEFAULT
@@ -2746,10 +2749,14 @@ void ggml_init_cublas() {
27462749
static bool initialized = false;
27472750

27482751
if (!initialized) {
2749-
#ifdef GGML_USE_HIPBLAS
2750-
rocblas_initialize();
2751-
hipDeviceSynchronize();
2752+
2753+
#ifdef __HIP_PLATFORM_AMD__
2754+
// Workaround for a rocBLAS bug when using multiple graphics cards:
2755+
// https://github.com/ROCmSoftwarePlatform/rocBLAS/issues/1346
2756+
rocblas_initialize();
2757+
CUDA_CHECK(cudaDeviceSynchronize());
27522758
#endif
2759+
27532760
CUDA_CHECK(cudaGetDeviceCount(&g_device_count));
27542761
GGML_ASSERT(g_device_count <= GGML_CUDA_MAX_DEVICES);
27552762
int64_t total_vram = 0;

0 commit comments

Comments
 (0)