File tree 1 file changed +10
-3
lines changed
1 file changed +10
-3
lines changed Original file line number Diff line number Diff line change 10
10
#include < hip/hip_runtime.h>
11
11
#include < hipblas/hipblas.h>
12
12
#include < hip/hip_fp16.h>
13
+ #ifdef __HIP_PLATFORM_AMD__
14
+ // for rocblas_initialize()
13
15
#include " rocblas/rocblas.h"
16
+ #endif
14
17
#define CUBLAS_COMPUTE_32F HIPBLAS_R_32F
15
18
#define CUBLAS_COMPUTE_32F_FAST_16F HIPBLAS_R_32F
16
19
#define CUBLAS_GEMM_DEFAULT HIPBLAS_GEMM_DEFAULT
@@ -2746,10 +2749,14 @@ void ggml_init_cublas() {
2746
2749
static bool initialized = false ;
2747
2750
2748
2751
if (!initialized) {
2749
- #ifdef GGML_USE_HIPBLAS
2750
- rocblas_initialize ();
2751
- hipDeviceSynchronize ();
2752
+
2753
+ #ifdef __HIP_PLATFORM_AMD__
2754
+ // Workaround for a rocBLAS bug when using multiple graphics cards:
2755
+ // https://github.com/ROCmSoftwarePlatform/rocBLAS/issues/1346
2756
+ rocblas_initialize ();
2757
+ CUDA_CHECK (cudaDeviceSynchronize ());
2752
2758
#endif
2759
+
2753
2760
CUDA_CHECK (cudaGetDeviceCount (&g_device_count));
2754
2761
GGML_ASSERT (g_device_count <= GGML_CUDA_MAX_DEVICES);
2755
2762
int64_t total_vram = 0 ;
You can’t perform that action at this time.
0 commit comments