|
28 | 28 |
|
29 | 29 | from tqdm import tqdm
|
30 | 30 |
|
| 31 | +from lighteval.models.model_output import ModelResponse |
31 | 32 | from lighteval.utils.imports import is_litellm_available, is_openai_available, is_vllm_available
|
32 | 33 |
|
33 | 34 |
|
@@ -195,20 +196,30 @@ def __call_litellm(self, prompts):
|
195 | 196 | def __call_api(prompt):
|
196 | 197 | for _ in range(self.API_MAX_RETRY):
|
197 | 198 | try:
|
198 |
| - response = litellm.completion( |
199 |
| - model=self.model, |
200 |
| - messages=prompt, |
201 |
| - response_format={"type": "text"}, |
202 |
| - max_tokens=512, |
203 |
| - n=1, |
204 |
| - caching=True, |
205 |
| - ) |
| 199 | + kwargs = { |
| 200 | + "model": self.model, |
| 201 | + "messages": prompt, |
| 202 | + "response_format": {"type": "text"}, |
| 203 | + "max_tokens": 512, |
| 204 | + "n": 1, |
| 205 | + "caching": True, |
| 206 | + } |
| 207 | + response = litellm.completion(**kwargs) |
206 | 208 | text = response.choices[0].message.content
|
| 209 | + if not text or response.failed: |
| 210 | + kwargs["caching"] = False |
| 211 | + response = litellm.completion(**kwargs) |
| 212 | + text = response.choices[0].message.content |
| 213 | + if not text or response.failed: |
| 214 | + # Just return an error response if the second attempt fails too |
| 215 | + return ModelResponse( |
| 216 | + text="Failed to get response from the API.", model=self.model, failed=True |
| 217 | + ) |
207 | 218 | return text
|
208 | 219 | except Exception as e:
|
209 | 220 | logger.warning(f"{type(e), e}")
|
210 | 221 | time.sleep(self.API_RETRY_SLEEP)
|
211 |
| - raise Exception("Failed to get response from the API") |
| 222 | + return ModelResponse(text="Failed to get response from the API.", model=self.model, failed=True) |
212 | 223 |
|
213 | 224 | results = []
|
214 | 225 | with ThreadPoolExecutor(100) as executor:
|
|
0 commit comments