Skip to content

Commit

Permalink
no_weights for TXI
Browse files Browse the repository at this point in the history
  • Loading branch information
IlyasMoutawwakil committed Dec 13, 2024
1 parent cc2f77c commit 0b2f878
Show file tree
Hide file tree
Showing 6 changed files with 11 additions and 12 deletions.
4 changes: 1 addition & 3 deletions optimum_benchmark/backends/py_txi/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,6 @@ def download_pretrained_model(self) -> None:
def prepare_generation_config(self) -> None:
self.generation_config.eos_token_id = None
self.generation_config.pad_token_id = None

model_cache_folder = f"models/{self.config.model}".replace("/", "--")
model_cache_path = f"{self.volume}/{model_cache_folder}"
snapshot_file = f"{model_cache_path}/refs/{self.config.model_kwargs.get('revision', 'main')}"
Expand Down Expand Up @@ -95,8 +94,7 @@ def create_no_weights_model(self) -> None:

def load_model_with_no_weights(self) -> None:
original_volumes, self.config.volumes = self.config.volumes, {self.tmpdir.name: {"bind": "/data", "mode": "rw"}}
original_model, self.config.model = self.config.model, "/data/no_weights_model"
self.logger.info("\t+ Loading no weights model")
original_model, self.config.model = self.config.model, "/data/no_weights_model/"
self.load_model_from_pretrained()
self.config.model, self.config.volumes = original_model, original_volumes

Expand Down
15 changes: 6 additions & 9 deletions optimum_benchmark/backends/py_txi/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ class PyTXIConfig(BackendConfig):
# Image to use for the container
image: Optional[str] = None
# Shared memory size for the container
shm_size: str = "1g"
shm_size: Optional[str] = None
# List of custom devices to forward to the container e.g. ["/dev/kfd", "/dev/dri"] for ROCm
devices: Optional[List[str]] = None
# NVIDIA-docker GPU device options e.g. "all" (all) or "0,1,2,3" (ids) or 4 (count)
Expand All @@ -41,9 +41,13 @@ class PyTXIConfig(BackendConfig):
metadata={"help": "List of environment variables to forward to the container from the host."},
)

# first connection/request
connection_timeout: int = 60
first_request_timeout: int = 60
max_concurrent_requests: Optional[int] = None

# Common options
dtype: Optional[str] = None
max_concurrent_requests: Optional[int] = None

# TGI specific
sharded: Optional[str] = None
Expand Down Expand Up @@ -72,13 +76,6 @@ def __post_init__(self):
renderDs = [file for file in os.listdir("/dev/dri") if file.startswith("renderD")]
self.devices = ["/dev/kfd"] + [f"/dev/dri/{renderDs[i]}" for i in ids]

# Common options
if self.max_concurrent_requests is None:
if self.task in TEXT_GENERATION_TASKS:
self.max_concurrent_requests = 128
elif self.task in TEXT_EMBEDDING_TASKS:
self.max_concurrent_requests = 512

# TGI specific
if self.task in TEXT_GENERATION_TASKS:
if self.trust_remote_code is None:
Expand Down
1 change: 1 addition & 0 deletions tests/configs/cpu_inference_py_txi_gpt2.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ defaults:
- _base_ # inherits from base config
- _cpu_ # inherits from cpu config
- _inference_ # inherits from inference config
- _no_weights_ # inherits from no weights config
- _gpt2_ # inherits from gpt2 config
- _self_ # hydra 1.1 compatibility
- override backend: py-txi
Expand Down
1 change: 1 addition & 0 deletions tests/configs/cpu_inference_py_txi_st_bert.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ defaults:
- _base_ # inherits from base config
- _cpu_ # inherits from cpu config
- _inference_ # inherits from inference config
- _no_weights_ # inherits from no weights config
- _st_bert_ # inherits from bert config
- _self_ # hydra 1.1 compatibility
- override backend: py-txi
Expand Down
1 change: 1 addition & 0 deletions tests/configs/cuda_inference_py_txi_gpt2.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ defaults:
- _base_ # inherits from base config
- _cuda_ # inherits from cuda config
- _inference_ # inherits from inference config
- _no_weights_ # inherits from no weights config
- _gpt2_ # inherits from gpt2 config
- _self_ # hydra 1.1 compatibility
- override backend: py-txi
Expand Down
1 change: 1 addition & 0 deletions tests/configs/cuda_inference_py_txi_st_bert.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ defaults:
- _base_ # inherits from base config
- _cuda_ # inherits from cuda config
- _inference_ # inherits from inference config
- _no_weights_ # inherits from no weights config
- _st_bert_ # inherits from bert config
- _self_ # hydra 1.1 compatibility
- override backend: py-txi
Expand Down

0 comments on commit 0b2f878

Please # to comment.