forked from openshift-psap/llm-load-test
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathconfig.yaml
29 lines (29 loc) · 973 Bytes
/
config.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
output:
format: "json" # Maybe add option for pickle?
dir: "./output/"
file: "output-{concurrency:03d}.json"
storage: # TODO
type: local
dataset:
file: "datasets/openorca_large_subset_011.jsonl"
max_queries: 1000
min_input_tokens: 0
max_input_tokens: 1024
min_output_tokens: 0
max_output_tokens: 1024
max_sequence_tokens: 2048 # system_prompt tokens not counted towards filters
custom_prompt_format: null # Sample : "{system_prompt}\n\n{prompt}""
load_options:
type: constant #Future options: loadgen, stair-step
concurrency: 1 # can also be a list [1,2,4]
duration: 20 # In seconds. Maybe in future support "100s" "10m", etc...
plugin: "openai_plugin"
plugin_options:
use_tls: False # Use True if querying an SSL grpc endpoint over https
streaming: True
model_name: "flan-t5-small"
host: "http://route.to.host"
endpoint: "/v1/completions"
authorization: "" # Set if host requires Authorization Token
extra_metadata:
replicas: 1