-
Notifications
You must be signed in to change notification settings - Fork 60
/
Copy pathConfig.toml
57 lines (47 loc) · 2.53 KB
/
Config.toml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
[model]
embed_device = "Cpu" # Device to put the embed tensor ("Cpu" or "Gpu").
max_batch = 8 # The maximum batches that are cached on GPU.
name = "RWKV-x070-World-0.1B-v2.8-20241210-ctx4096.st" # Name of the model.
path = "assets/models" # Path to the folder containing all models.
precision = "Fp16" # Precision for intermediate tensors ("Fp16" or "Fp32"). "Fp32" yields better outputs but slower.
quant = 0 # Layers to be quantized.
quant_type = "Int8" # Quantization type ("Int8" or "NF4").
stop = ["\n\n"] # Additional stop words in generation.
token_chunk_size = 128 # Size of token chunk that is inferred at once. For high end GPUs, this could be 64 or 128 (faster).
# [[state]] # State-tuned initial state.
# id = "fd7a60ed-7807-449f-8256-bccae3246222" # UUID for this state, which is used to specify which one to use in the APIs.
# name = "x060-3B" # Given name for this state (optional).
# path = "rwkv-x060-chn_single_round_qa-3B-20240505-ctx1024.state"
# [[state]] # Load another initial state.
# id = "6a9c60a4-0f4c-40b1-a31f-987f73e20315" # UUID for this state.
# path = "rwkv-x060-chn_single_round_qa-3B-20240502-ctx1024.state"
# [[lora]] # LoRA and blend factors.
# alpha = 192
# path = "assets/models/rwkv-x060-3b.lora"
[tokenizer]
path = "assets/tokenizer/rwkv_vocab_v20230424.json" # Path to the tokenizer.
[bnf]
enable_bytes_cache = true # Enable the cache that accelerates the expansion of certain short schemas.
start_nonterminal = "start" # The initial nonterminal of the BNF schemas.
[adapter]
Auto = {} # Choose the best GPU.
# Manual = 0 # Manually specify which GPU to use.
[listen]
acme = false
domain = "local"
ip = "0.0.0.0" # Use IpV4.
# ip = "::" # Use IpV6.
force_pass = true
port = 65530
slot = "permisionkey"
tls = false
[[listen.app_keys]] # Allow mutiple app keys.
app_id = "admin"
secret_key = "ai00_is_good"
[web] # Remove this to disable WebUI.
path = "assets/www/index.zip" # Path to the WebUI.
# [embed] # Uncomment to enable embed models (via fast-embedding onnx models).
# endpoint = "https://hf-mirror.com"
# home = "assets/models/hf"
# lib = "assets/ort/onnxruntime.dll" # Only used under windows.
# name = { MultilingualE5Small = {} }