From 11134b69ae6a395b35ec76d3517d1bcf54be3850 Mon Sep 17 00:00:00 2001 From: Kuntai Du Date: Thu, 16 Jan 2025 23:46:18 +0800 Subject: [PATCH 1/2] adjust the path Signed-off-by: Kuntai Du --- examples/online_serving/disaggregated_prefill.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/online_serving/disaggregated_prefill.sh b/examples/online_serving/disaggregated_prefill.sh index 87155273a81d1..82288ccb74f68 100644 --- a/examples/online_serving/disaggregated_prefill.sh +++ b/examples/online_serving/disaggregated_prefill.sh @@ -69,7 +69,7 @@ wait_for_server 8200 # instance # NOTE: the usage of this API is subject to change --- in the future we will # introduce "vllm connect" to connect between prefill and decode instances -python3 ../benchmarks/disagg_benchmarks/disagg_prefill_proxy_server.py & +python3 ../../benchmarks/disagg_benchmarks/disagg_prefill_proxy_server.py & sleep 1 # serve two example requests From 4b888cf8d6714f6ca4858630dd9a53fdd6f485a1 Mon Sep 17 00:00:00 2001 From: Kuntai Du Date: Thu, 16 Jan 2025 23:53:11 +0800 Subject: [PATCH 2/2] add -xe flag to make sure the script crashes when error happens Signed-off-by: Kuntai Du --- examples/online_serving/disaggregated_prefill.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/examples/online_serving/disaggregated_prefill.sh b/examples/online_serving/disaggregated_prefill.sh index 82288ccb74f68..2bb2824c6c86f 100644 --- a/examples/online_serving/disaggregated_prefill.sh +++ b/examples/online_serving/disaggregated_prefill.sh @@ -3,6 +3,8 @@ # We will launch 2 vllm instances (1 for prefill and 1 for decode), # and then transfer the KV cache between them. +set -xe + echo "🚧🚧 Warning: The usage of disaggregated prefill is experimental and subject to change 🚧🚧" sleep 1