no_weights for TXI

huggingface · Dec 13, 2024 · 0b2f878 · 0b2f878
1 parent cc2f77c
commit 0b2f878
Show file tree

Hide file tree

Showing 6 changed files with 11 additions and 12 deletions.
diff --git a/optimum_benchmark/backends/py_txi/backend.py b/optimum_benchmark/backends/py_txi/backend.py
@@ -53,7 +53,6 @@ def download_pretrained_model(self) -> None:
     def prepare_generation_config(self) -> None:
         self.generation_config.eos_token_id = None
         self.generation_config.pad_token_id = None
-
         model_cache_folder = f"models/{self.config.model}".replace("/", "--")
         model_cache_path = f"{self.volume}/{model_cache_folder}"
         snapshot_file = f"{model_cache_path}/refs/{self.config.model_kwargs.get('revision', 'main')}"
@@ -95,8 +94,7 @@ def create_no_weights_model(self) -> None:
 
     def load_model_with_no_weights(self) -> None:
         original_volumes, self.config.volumes = self.config.volumes, {self.tmpdir.name: {"bind": "/data", "mode": "rw"}}
-        original_model, self.config.model = self.config.model, "/data/no_weights_model"
-        self.logger.info("\t+ Loading no weights model")
+        original_model, self.config.model = self.config.model, "/data/no_weights_model/"
         self.load_model_from_pretrained()
         self.config.model, self.config.volumes = original_model, original_volumes
 

diff --git a/optimum_benchmark/backends/py_txi/config.py b/optimum_benchmark/backends/py_txi/config.py
@@ -22,7 +22,7 @@ class PyTXIConfig(BackendConfig):
     # Image to use for the container
     image: Optional[str] = None
     # Shared memory size for the container
-    shm_size: str = "1g"
+    shm_size: Optional[str] = None
     # List of custom devices to forward to the container e.g. ["/dev/kfd", "/dev/dri"] for ROCm
     devices: Optional[List[str]] = None
     # NVIDIA-docker GPU device options e.g. "all" (all) or "0,1,2,3" (ids) or 4 (count)
@@ -41,9 +41,13 @@ class PyTXIConfig(BackendConfig):
         metadata={"help": "List of environment variables to forward to the container from the host."},
     )
 
+    # first connection/request
+    connection_timeout: int = 60
+    first_request_timeout: int = 60
+    max_concurrent_requests: Optional[int] = None
+
     # Common options
     dtype: Optional[str] = None
-    max_concurrent_requests: Optional[int] = None
 
     # TGI specific
     sharded: Optional[str] = None
@@ -72,13 +76,6 @@ def __post_init__(self):
             renderDs = [file for file in os.listdir("/dev/dri") if file.startswith("renderD")]
             self.devices = ["/dev/kfd"] + [f"/dev/dri/{renderDs[i]}" for i in ids]
 
-        # Common options
-        if self.max_concurrent_requests is None:
-            if self.task in TEXT_GENERATION_TASKS:
-                self.max_concurrent_requests = 128
-            elif self.task in TEXT_EMBEDDING_TASKS:
-                self.max_concurrent_requests = 512
-
         # TGI specific
         if self.task in TEXT_GENERATION_TASKS:
             if self.trust_remote_code is None:

diff --git a/tests/configs/cpu_inference_py_txi_gpt2.yaml b/tests/configs/cpu_inference_py_txi_gpt2.yaml
@@ -3,6 +3,7 @@ defaults:
   - _base_ # inherits from base config
   - _cpu_ # inherits from cpu config
   - _inference_ # inherits from inference config
+  - _no_weights_ # inherits from no weights config
   - _gpt2_ # inherits from gpt2 config
   - _self_ # hydra 1.1 compatibility
   - override backend: py-txi

diff --git a/tests/configs/cpu_inference_py_txi_st_bert.yaml b/tests/configs/cpu_inference_py_txi_st_bert.yaml
@@ -3,6 +3,7 @@ defaults:
   - _base_ # inherits from base config
   - _cpu_ # inherits from cpu config
   - _inference_ # inherits from inference config
+  - _no_weights_ # inherits from no weights config
   - _st_bert_ # inherits from bert config
   - _self_ # hydra 1.1 compatibility
   - override backend: py-txi

diff --git a/tests/configs/cuda_inference_py_txi_gpt2.yaml b/tests/configs/cuda_inference_py_txi_gpt2.yaml
@@ -3,6 +3,7 @@ defaults:
   - _base_ # inherits from base config
   - _cuda_ # inherits from cuda config
   - _inference_ # inherits from inference config
+  - _no_weights_ # inherits from no weights config
   - _gpt2_ # inherits from gpt2 config
   - _self_ # hydra 1.1 compatibility
   - override backend: py-txi

diff --git a/tests/configs/cuda_inference_py_txi_st_bert.yaml b/tests/configs/cuda_inference_py_txi_st_bert.yaml
@@ -3,6 +3,7 @@ defaults:
   - _base_ # inherits from base config
   - _cuda_ # inherits from cuda config
   - _inference_ # inherits from inference config
+  - _no_weights_ # inherits from no weights config
   - _st_bert_ # inherits from bert config
   - _self_ # hydra 1.1 compatibility
   - override backend: py-txi