From 1dcb38d7f2c03ffa800aedf48745f646e42f0dc4 Mon Sep 17 00:00:00 2001 From: Shawn Tan Date: Mon, 19 Aug 2024 19:11:19 +0000 Subject: [PATCH] Formatting & adding `quant_config` --- vllm/model_executor/models/granite.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/vllm/model_executor/models/granite.py b/vllm/model_executor/models/granite.py index 07a71109f4895..892ad8a58b8f6 100644 --- a/vllm/model_executor/models/granite.py +++ b/vllm/model_executor/models/granite.py @@ -278,6 +278,7 @@ def __init__( self.vocab_size, config.hidden_size, org_num_embeddings=config.vocab_size, + quant_config=quant_config, ) else: self.embed_tokens = PPMissingLayer() @@ -424,8 +425,9 @@ def forward( attn_metadata, intermediate_tensors) return model_output - def compute_logits(self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata) -> Optional[torch.Tensor]: + def compute_logits( + self, hidden_states: torch.Tensor, + sampling_metadata: SamplingMetadata) -> Optional[torch.Tensor]: logits = self.logits_processor(self.lm_head, hidden_states, sampling_metadata) logits /= self.config.logits_scaling