diff --git a/optimum_benchmark/backends/py_txi/backend.py b/optimum_benchmark/backends/py_txi/backend.py index 6e637a31..1b02277a 100644 --- a/optimum_benchmark/backends/py_txi/backend.py +++ b/optimum_benchmark/backends/py_txi/backend.py @@ -53,7 +53,6 @@ def download_pretrained_model(self) -> None: def prepare_generation_config(self) -> None: self.generation_config.eos_token_id = None self.generation_config.pad_token_id = None - model_cache_folder = f"models/{self.config.model}".replace("/", "--") model_cache_path = f"{self.volume}/{model_cache_folder}" snapshot_file = f"{model_cache_path}/refs/{self.config.model_kwargs.get('revision', 'main')}" @@ -95,8 +94,7 @@ def create_no_weights_model(self) -> None: def load_model_with_no_weights(self) -> None: original_volumes, self.config.volumes = self.config.volumes, {self.tmpdir.name: {"bind": "/data", "mode": "rw"}} - original_model, self.config.model = self.config.model, "/data/no_weights_model" - self.logger.info("\t+ Loading no weights model") + original_model, self.config.model = self.config.model, "/data/no_weights_model/" self.load_model_from_pretrained() self.config.model, self.config.volumes = original_model, original_volumes diff --git a/optimum_benchmark/backends/py_txi/config.py b/optimum_benchmark/backends/py_txi/config.py index dae410c4..2bf6c04c 100644 --- a/optimum_benchmark/backends/py_txi/config.py +++ b/optimum_benchmark/backends/py_txi/config.py @@ -22,7 +22,7 @@ class PyTXIConfig(BackendConfig): # Image to use for the container image: Optional[str] = None # Shared memory size for the container - shm_size: str = "1g" + shm_size: Optional[str] = None # List of custom devices to forward to the container e.g. ["/dev/kfd", "/dev/dri"] for ROCm devices: Optional[List[str]] = None # NVIDIA-docker GPU device options e.g. "all" (all) or "0,1,2,3" (ids) or 4 (count) @@ -41,9 +41,13 @@ class PyTXIConfig(BackendConfig): metadata={"help": "List of environment variables to forward to the container from the host."}, ) + # first connection/request + connection_timeout: int = 60 + first_request_timeout: int = 60 + max_concurrent_requests: Optional[int] = None + # Common options dtype: Optional[str] = None - max_concurrent_requests: Optional[int] = None # TGI specific sharded: Optional[str] = None @@ -72,13 +76,6 @@ def __post_init__(self): renderDs = [file for file in os.listdir("/dev/dri") if file.startswith("renderD")] self.devices = ["/dev/kfd"] + [f"/dev/dri/{renderDs[i]}" for i in ids] - # Common options - if self.max_concurrent_requests is None: - if self.task in TEXT_GENERATION_TASKS: - self.max_concurrent_requests = 128 - elif self.task in TEXT_EMBEDDING_TASKS: - self.max_concurrent_requests = 512 - # TGI specific if self.task in TEXT_GENERATION_TASKS: if self.trust_remote_code is None: diff --git a/tests/configs/cpu_inference_py_txi_gpt2.yaml b/tests/configs/cpu_inference_py_txi_gpt2.yaml index 76e90775..1aef598e 100644 --- a/tests/configs/cpu_inference_py_txi_gpt2.yaml +++ b/tests/configs/cpu_inference_py_txi_gpt2.yaml @@ -3,6 +3,7 @@ defaults: - _base_ # inherits from base config - _cpu_ # inherits from cpu config - _inference_ # inherits from inference config + - _no_weights_ # inherits from no weights config - _gpt2_ # inherits from gpt2 config - _self_ # hydra 1.1 compatibility - override backend: py-txi diff --git a/tests/configs/cpu_inference_py_txi_st_bert.yaml b/tests/configs/cpu_inference_py_txi_st_bert.yaml index 2650e1bf..99e571b5 100644 --- a/tests/configs/cpu_inference_py_txi_st_bert.yaml +++ b/tests/configs/cpu_inference_py_txi_st_bert.yaml @@ -3,6 +3,7 @@ defaults: - _base_ # inherits from base config - _cpu_ # inherits from cpu config - _inference_ # inherits from inference config + - _no_weights_ # inherits from no weights config - _st_bert_ # inherits from bert config - _self_ # hydra 1.1 compatibility - override backend: py-txi diff --git a/tests/configs/cuda_inference_py_txi_gpt2.yaml b/tests/configs/cuda_inference_py_txi_gpt2.yaml index 73a5c10a..1c93ac36 100644 --- a/tests/configs/cuda_inference_py_txi_gpt2.yaml +++ b/tests/configs/cuda_inference_py_txi_gpt2.yaml @@ -3,6 +3,7 @@ defaults: - _base_ # inherits from base config - _cuda_ # inherits from cuda config - _inference_ # inherits from inference config + - _no_weights_ # inherits from no weights config - _gpt2_ # inherits from gpt2 config - _self_ # hydra 1.1 compatibility - override backend: py-txi diff --git a/tests/configs/cuda_inference_py_txi_st_bert.yaml b/tests/configs/cuda_inference_py_txi_st_bert.yaml index 8ae494e7..5bb38528 100644 --- a/tests/configs/cuda_inference_py_txi_st_bert.yaml +++ b/tests/configs/cuda_inference_py_txi_st_bert.yaml @@ -3,6 +3,7 @@ defaults: - _base_ # inherits from base config - _cuda_ # inherits from cuda config - _inference_ # inherits from inference config + - _no_weights_ # inherits from no weights config - _st_bert_ # inherits from bert config - _self_ # hydra 1.1 compatibility - override backend: py-txi