From 0c7af7a8eaccda546736654691ab19c5575ae806 Mon Sep 17 00:00:00 2001 From: gabrielmbmb Date: Wed, 11 Sep 2024 18:13:49 +0200 Subject: [PATCH 1/2] Update entrypoint --- candle-holder-serve/entrypoint.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/candle-holder-serve/entrypoint.sh b/candle-holder-serve/entrypoint.sh index 32e1f2f..335f2b3 100644 --- a/candle-holder-serve/entrypoint.sh +++ b/candle-holder-serve/entrypoint.sh @@ -23,6 +23,10 @@ if [ ! -z "$CANDLE_HOLDER_DTYPE" ]; then ARGS+=("--dtype" "$CANDLE_HOLDER_DTYPE") fi +if [ ! -z "$CANDLE_HOLDER_NUM_WORKERS" ]; then + ARGS+=("--num-workers" "$CANDLE_HOLDER_NUM_WORKERS") +fi + CANDLE_HOLDER_HOST=${CANDLE_HOLDER_HOST:-0.0.0.0:8080} ARGS+=("--host" "$CANDLE_HOLDER_HOST") From 5a261747317f57c4dd5a248357e870840baeddb2 Mon Sep 17 00:00:00 2001 From: gabrielmbmb Date: Wed, 11 Sep 2024 18:22:37 +0200 Subject: [PATCH 2/2] Add `--buffer-size` argument --- candle-holder-serve/entrypoint-cuda.sh | 4 ++++ candle-holder-serve/entrypoint.sh | 4 ++++ candle-holder-serve/src/cli.rs | 8 ++++++++ candle-holder-serve/src/router_macro.rs | 3 ++- 4 files changed, 18 insertions(+), 1 deletion(-) diff --git a/candle-holder-serve/entrypoint-cuda.sh b/candle-holder-serve/entrypoint-cuda.sh index 895eb53..cf4101e 100644 --- a/candle-holder-serve/entrypoint-cuda.sh +++ b/candle-holder-serve/entrypoint-cuda.sh @@ -35,6 +35,10 @@ if [ ! -z "$CANDLE_HOLDER_NUM_WORKERS" ]; then ARGS+=("--num-workers" "$CANDLE_HOLDER_NUM_WORKERS") fi +if [ ! -z "$CANDLE_HOLDER_BUFFER_SIZE" ]; then + ARGS+=("--buffer-size" "$CANDLE_HOLDER_BUFFER_SIZE") +fi + CANDLE_HOLDER_HOST=${CANDLE_HOLDER_HOST:-0.0.0.0:8080} ARGS+=("--host" "$CANDLE_HOLDER_HOST") diff --git a/candle-holder-serve/entrypoint.sh b/candle-holder-serve/entrypoint.sh index 335f2b3..4cec613 100644 --- a/candle-holder-serve/entrypoint.sh +++ b/candle-holder-serve/entrypoint.sh @@ -27,6 +27,10 @@ if [ ! -z "$CANDLE_HOLDER_NUM_WORKERS" ]; then ARGS+=("--num-workers" "$CANDLE_HOLDER_NUM_WORKERS") fi +if [ ! -z "$CANDLE_HOLDER_BUFFER_SIZE" ]; then + ARGS+=("--buffer-size" "$CANDLE_HOLDER_BUFFER_SIZE") +fi + CANDLE_HOLDER_HOST=${CANDLE_HOLDER_HOST:-0.0.0.0:8080} ARGS+=("--host" "$CANDLE_HOLDER_HOST") diff --git a/candle-holder-serve/src/cli.rs b/candle-holder-serve/src/cli.rs index 7947877..13ae5b0 100644 --- a/candle-holder-serve/src/cli.rs +++ b/candle-holder-serve/src/cli.rs @@ -30,6 +30,10 @@ pub(crate) struct Cli { /// The number of workers to use for inference. #[arg(long, default_value = "1")] num_workers: usize, + + /// Channel buffer size for the inference worker. + #[arg(long, default_value = "32")] + buffer_size: usize, } impl Cli { @@ -49,6 +53,10 @@ impl Cli { self.num_workers } + pub fn buffer_size(&self) -> usize { + self.buffer_size + } + /// Get the [`candle_core::Device`] corresponding to the selected device option. /// /// # Errors diff --git a/candle-holder-serve/src/router_macro.rs b/candle-holder-serve/src/router_macro.rs index 64c078d..8b4e4a6 100644 --- a/candle-holder-serve/src/router_macro.rs +++ b/candle-holder-serve/src/router_macro.rs @@ -32,8 +32,9 @@ macro_rules! generate_router { tracing::error!("Failed to warm up the model: {}", e); }); + tracing::info!("Channel buffer size: {}", args.buffer_size()); let (tx, rx) = - mpsc::channel::>>(32); + mpsc::channel::>>(args.buffer_size()); tokio::spawn(task_distributor::< $pipeline,