Skip to content

Commit fb671c3

Browse files
committed
Add resolve_model_relative_to_config_file config option
Signed-off-by: Leon Kiefer <leon.k97@gmx.de>
1 parent 52c1c3c commit fb671c3

File tree

4 files changed

+45
-5
lines changed

4 files changed

+45
-5
lines changed

README.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,10 @@ Specifically,
127127
and
128128
[here](https://github.com/vllm-project/vllm/blob/ee8217e5bee5860469204ee57077a91138c9af02/vllm/engine/arg_utils.py#L201).
129129

130+
When using local model files, specify the path to the model in the `model` field.
131+
By default relative paths are resolved relative to the working directory of the Triton server process.
132+
To specify a path relative to the `model.json` file, set the `resolve_model_relative_to_config_file` field to `true`.
133+
130134
For multi-GPU support, EngineArgs like tensor_parallel_size can be specified in
131135
[model.json](samples/model_repository/vllm_model/1/model.json).
132136

ci/L0_backend_vllm/vllm_backend/test.sh

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,12 +39,26 @@ SAMPLE_MODELS_REPO="../../../samples/model_repository"
3939
EXPECTED_NUM_TESTS=3
4040

4141
rm -rf models && mkdir -p models
42+
43+
# operational vllm model
4244
cp -r ${SAMPLE_MODELS_REPO}/vllm_model models/vllm_opt
4345

46+
# python model
4447
mkdir -p models/add_sub/1/
4548
wget -P models/add_sub/1/ https://raw.githubusercontent.com/triton-inference-server/python_backend/main/examples/add_sub/model.py
4649
wget -P models/add_sub https://raw.githubusercontent.com/triton-inference-server/python_backend/main/examples/add_sub/config.pbtxt
4750

51+
# local vllm model
52+
cp -r ${SAMPLE_MODELS_REPO}/vllm_model models/vllm_local
53+
sed -i 's/"facebook\/opt-125m"/"./local_model"/' models/vllm_local/1/model.json
54+
sed -i '/"model": /a "resolve_model_relative_to_config_file": true,' models/vllm_local/1/model.json
55+
wget -P models/vllm_local/1/local_model https://huggingface.co/facebook/opt-125m/resolve/main/config.json
56+
wget -P models/vllm_local/1/local_model https://huggingface.co/facebook/opt-125m/resolve/main/merges.txt
57+
wget -P models/vllm_local/1/local_model https://huggingface.co/facebook/opt-125m/resolve/main/pytorch_model.bin
58+
wget -P models/vllm_local/1/local_model https://huggingface.co/facebook/opt-125m/resolve/main/special_tokens_map.json
59+
wget -P models/vllm_local/1/local_model https://huggingface.co/facebook/opt-125m/resolve/main/tokenizer_config.json
60+
wget -P models/vllm_local/1/local_model https://huggingface.co/facebook/opt-125m/resolve/main/vocab.json
61+
4862
# Invalid model attribute
4963
cp -r ${SAMPLE_MODELS_REPO}/vllm_model models/vllm_invalid_1/
5064
sed -i 's/"disable_log_requests"/"invalid_attribute"/' models/vllm_invalid_1/1/model.json
@@ -53,6 +67,7 @@ sed -i 's/"disable_log_requests"/"invalid_attribute"/' models/vllm_invalid_1/1/m
5367
cp -r ${SAMPLE_MODELS_REPO}/vllm_model models/vllm_invalid_2/
5468
sed -i 's/"facebook\/opt-125m"/"invalid_model"/' models/vllm_invalid_2/1/model.json
5569

70+
5671
RET=0
5772

5873
run_server

ci/L0_backend_vllm/vllm_backend/vllm_backend_test.py

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ def setUp(self):
4141
self.triton_client = grpcclient.InferenceServerClient(url="localhost:8001")
4242
self.vllm_model_name = "vllm_opt"
4343
self.python_model_name = "add_sub"
44+
self.local_vllm_model_name = "vllm_local"
4445

4546
def test_vllm_triton_backend(self):
4647
# Load both vllm and add_sub models
@@ -60,9 +61,21 @@ def test_vllm_triton_backend(self):
6061
self.assertFalse(self.triton_client.is_model_ready(self.python_model_name))
6162

6263
# Test vllm model and unload vllm model
63-
self._test_vllm_model(send_parameters_as_tensor=True)
64-
self._test_vllm_model(send_parameters_as_tensor=False)
64+
self._test_vllm_model(self.vllm_model_name, send_parameters_as_tensor=True)
65+
self._test_vllm_model(self.vllm_model_name, send_parameters_as_tensor=False)
6566
self.triton_client.unload_model(self.vllm_model_name)
67+
68+
def test_local_vllm_model(self):
69+
# Load local vllm model
70+
self.triton_client.load_model(self.local_vllm_model_name)
71+
self.assertTrue(self.triton_client.is_model_ready(self.local_vllm_model_name))
72+
73+
# Test local vllm model
74+
self._test_vllm_model(self.local_vllm_model_name, send_parameters_as_tensor=True)
75+
self._test_vllm_model(self.local_vllm_model_name, send_parameters_as_tensor=False)
76+
77+
# Unload local vllm model
78+
self.triton_client.unload_model(self.local_vllm_model_name)
6679

6780
def test_model_with_invalid_attributes(self):
6881
model_name = "vllm_invalid_1"
@@ -74,7 +87,7 @@ def test_vllm_invalid_model_name(self):
7487
with self.assertRaises(InferenceServerException):
7588
self.triton_client.load_model(model_name)
7689

77-
def _test_vllm_model(self, send_parameters_as_tensor):
90+
def _test_vllm_model(self, model_name, send_parameters_as_tensor):
7891
user_data = UserData()
7992
stream = False
8093
prompts = [
@@ -92,11 +105,11 @@ def _test_vllm_model(self, send_parameters_as_tensor):
92105
i,
93106
stream,
94107
sampling_parameters,
95-
self.vllm_model_name,
108+
model_name,
96109
send_parameters_as_tensor,
97110
)
98111
self.triton_client.async_stream_infer(
99-
model_name=self.vllm_model_name,
112+
model_name=model_name,
100113
request_id=request_data["request_id"],
101114
inputs=request_data["inputs"],
102115
outputs=request_data["outputs"],

src/model.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,14 @@ def initialize(self, args):
112112
with open(engine_args_filepath) as file:
113113
vllm_engine_config = json.load(file)
114114

115+
# Resolve the model path relative to the config file
116+
if vllm_engine_config.pop("resolve_model_relative_to_config_file", False):
117+
vllm_engine_config["model"] = os.path.abspath(
118+
os.path.join(
119+
pb_utils.get_model_dir(), vllm_engine_config["model"]
120+
)
121+
)
122+
115123
# Create an AsyncLLMEngine from the config from JSON
116124
self.llm_engine = AsyncLLMEngine.from_engine_args(
117125
AsyncEngineArgs(**vllm_engine_config)

0 commit comments

Comments
 (0)