-
Notifications
You must be signed in to change notification settings - Fork 9
/
model_definitions.py
47 lines (43 loc) · 1.45 KB
/
model_definitions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
from typing import Dict
from llama_api.schemas.models import ExllamaModel, LlamaCppModel
# ================== LLaMA.cpp models ================== #
airoboros_l2_13b_gguf = LlamaCppModel(
model_path="TheBloke/Airoboros-L2-13B-2.1-GGUF", # automatic download
max_total_tokens=8192,
rope_freq_base=26000,
rope_freq_scale=0.5,
n_gpu_layers=30,
n_batch=8192,
)
mythomax_l2_kimiko_13b_gguf = LlamaCppModel(
model_path="mythomax-l2-kimiko-v2-13b.Q4_K_S.gguf", # manual download
max_total_tokens=4096,
n_gpu_layers=40,
n_batch=4096,
)
open_llama_3b_v2_gguf = LlamaCppModel(
model_path="open-llama-3b-v2-q4_0.gguf", # manual download
max_total_tokens=2048,
n_gpu_layers=-1,
)
# ================== ExLLaMa models ================== #
orca_mini_7b = ExllamaModel(
model_path="orca_mini_7b", # manual download
max_total_tokens=4096,
compress_pos_emb=2.0,
)
chronos_hermes_13b_v2 = ExllamaModel(
model_path="Austism/chronos-hermes-13b-v2-GPTQ", # automatic download
max_total_tokens=4096,
)
mythomax_l2_13b_gptq = ExllamaModel(
model_path="TheBloke/MythoMax-L2-13B-GPTQ", # automatic download
max_total_tokens=4096,
)
# Define a mapping from OpenAI model names to LLaMA models.
# e.g. If you request API model "gpt-3.5-turbo",
# the API will load the LLaMA model "orca_mini_3b"
openai_replacement_models: Dict[str, str] = {
"gpt-3.5-turbo": "airoboros_l2_13b_gguf",
"gpt-4": "mythomax_l2_13b_gptq",
}