Skip to content

Commit ac7ebc3

Browse files
committed
add hipBLAS name scheme to GUI and update README
1 parent 7f85cc5 commit ac7ebc3

File tree

2 files changed

+17
-11
lines changed

2 files changed

+17
-11
lines changed

README.md

+10-4
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,15 @@
11
# koboldcpp-ROCM
22

3-
To install, run
4-
```make LLAMA_HIPBLAS=1```
5-
To use ROCM, set GPU layers with --gpulayers when starting koboldcpp
6-
Original [llama.cpp rocm port](https://github.com/ggerganov/llama.cpp/pull/1087) by SlyEcho, ported to koboldcpp by yellowrosecx
3+
To install, navigate to the folder you want to download to in Terminal and run
4+
```
5+
git clone https://github.com/YellowRoseCx/koboldcpp-rocm.git -b main --depth 1 && \
6+
cd koboldcpp-rocm && \
7+
make LLAMA_HIPBLAS=1 -j4 && \
8+
./koboldcpp.py
9+
```
10+
When the KoboldCPP GUI appears, make sure to select "Use CuBLAS/hipBLAS" and set GPU layers
11+
12+
Original [llama.cpp rocm port](https://github.com/ggerganov/llama.cpp/pull/1087) by SlyEcho, modified and ported to koboldcpp by YellowRoseCx
713

814
Comparison with OpenCL using 6800xt
915
| Model | Offloading Method | Time Taken - Processing 593 tokens| Time Taken - Generating 200 tokens| Total Time | Perf. Diff.

koboldcpp.py

+7-7
Original file line numberDiff line numberDiff line change
@@ -662,7 +662,7 @@ def show_new_gui():
662662
blasbatchsize_values = ["-1", "32", "64", "128", "256", "512", "1024"]
663663
blasbatchsize_text = ["Don't Batch BLAS","32","64","128","256","512","1024"]
664664
contextsize_text = ["512", "1024", "2048", "3072", "4096", "6144", "8192"]
665-
runopts = ["Use OpenBLAS","Use CLBlast", "Use CuBLAS", "Use No BLAS","Use OpenBLAS (Old CPU, noavx2)","Failsafe Mode (Old CPU, noavx)"]
665+
runopts = ["Use OpenBLAS","Use CLBlast", "Use CuBLAS/hipBLAS", "Use No BLAS","Use OpenBLAS (Old CPU, noavx2)","Failsafe Mode (Old CPU, noavx)"]
666666

667667
def tabbuttonaction(name):
668668
for t in tabcontent:
@@ -781,13 +781,13 @@ def getfilename(var, text):
781781

782782
def changerunmode(a,b,c):
783783
index = runopts_var.get()
784-
if index == "Use CLBlast" or index == "Use CuBLAS":
784+
if index == "Use CLBlast" or index == "Use CuBLAS/hipBLAS":
785785
gpu_selector_label.grid(row=3, column=0, padx = 8, pady=1, stick="nw")
786786
quick_gpu_selector_label.grid(row=3, column=0, padx = 8, pady=1, stick="nw")
787787
if index == "Use CLBlast":
788788
gpu_selector_box.grid(row=3, column=1, padx=8, pady=1, stick="nw")
789789
quick_gpu_selector_box.grid(row=3, column=1, padx=8, pady=1, stick="nw")
790-
elif index == "Use CuBLAS":
790+
elif index == "Use CuBLAS/hipBLAS":
791791
CUDA_gpu_selector_box.grid(row=3, column=1, padx=8, pady=1, stick="nw")
792792
CUDA_quick_gpu_selector_box.grid(row=3, column=1, padx=8, pady=1, stick="nw")
793793
else:
@@ -798,14 +798,14 @@ def changerunmode(a,b,c):
798798
quick_gpu_selector_box.grid_forget()
799799
CUDA_quick_gpu_selector_box.grid_forget()
800800

801-
if index == "Use CuBLAS":
801+
if index == "Use CuBLAS/hipBLAS":
802802
lowvram_box.grid(row=4, column=0, padx=8, pady=1, stick="nw")
803803
quick_lowvram_box.grid(row=4, column=0, padx=8, pady=1, stick="nw")
804804
else:
805805
lowvram_box.grid_forget()
806806
quick_lowvram_box.grid_forget()
807807

808-
if index == "Use CLBlast" or index == "Use CuBLAS":
808+
if index == "Use CLBlast" or index == "Use CuBLAS/hipBLAS":
809809
gpu_layers_label.grid(row=5, column=0, padx = 8, pady=1, stick="nw")
810810
gpu_layers_entry.grid(row=5, column=1, padx=8, pady=1, stick="nw")
811811
quick_gpu_layers_label.grid(row=5, column=0, padx = 8, pady=1, stick="nw")
@@ -1155,7 +1155,7 @@ def guilaunch():
11551155
blaschoice = tk.StringVar()
11561156
blaschoice.set("BLAS = 512")
11571157

1158-
runopts = ["Use OpenBLAS","Use CLBLast GPU #1","Use CLBLast GPU #2","Use CLBLast GPU #3","Use CuBLAS GPU","Use No BLAS","Use OpenBLAS (Old CPU, noavx2)","Failsafe Mode (Old CPU, noavx)"]
1158+
runopts = ["Use OpenBLAS","Use CLBLast GPU #1","Use CLBLast GPU #2","Use CLBLast GPU #3","Use CuBLAS/hipBLAS GPU","Use No BLAS","Use OpenBLAS (Old CPU, noavx2)","Failsafe Mode (Old CPU, noavx)"]
11591159
runchoice = tk.StringVar()
11601160
runchoice.set("Use OpenBLAS")
11611161

@@ -1449,7 +1449,7 @@ def main(args):
14491449
compatgroup = parser.add_mutually_exclusive_group()
14501450
compatgroup.add_argument("--noblas", help="Do not use OpenBLAS for accelerated prompt ingestion", action='store_true')
14511451
compatgroup.add_argument("--useclblast", help="Use CLBlast for GPU Acceleration. Must specify exactly 2 arguments, platform ID and device ID (e.g. --useclblast 1 0).", type=int, choices=range(0,9), nargs=2)
1452-
compatgroup.add_argument("--usecublas", help="Use CuBLAS for GPU Acceleration. Requires CUDA. Select lowvram to not allocate VRAM scratch buffer. Enter a number afterwards to select and use 1 GPU. Leaving no number will use all GPUs.", nargs='*',metavar=('[lowvram|normal] [main GPU ID]'), choices=['normal', 'lowvram', '0', '1', '2'])
1452+
compatgroup.add_argument("--usecublas", help="Use CuBLAS/hipBLAS for GPU Acceleration. Requires CUDA. Select lowvram to not allocate VRAM scratch buffer. Enter a number afterwards to select and use 1 GPU. Leaving no number will use all GPUs.", nargs='*',metavar=('[lowvram|normal] [main GPU ID]'), choices=['normal', 'lowvram', '0', '1', '2'])
14531453
parser.add_argument("--gpulayers", help="Set number of layers to offload to GPU when using GPU. Requires GPU.",metavar=('[GPU layers]'), type=int, default=0)
14541454
args = parser.parse_args()
14551455
main(args)

0 commit comments

Comments
 (0)