Skip to content

Commit

Permalink
Merge pull request exo-explore#482 from exo-explore/is_finished_fixes
Browse files Browse the repository at this point in the history
fix end of request behaviour and add back broadcasting tokens to other nodes
  • Loading branch information
AlexCheema authored Nov 21, 2024
2 parents c773552 + 72c3fda commit 93d38e2
Showing 1 changed file with 2 additions and 1 deletion.
3 changes: 2 additions & 1 deletion exo/orchestration/standard_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,10 +115,11 @@ async def process_inference_result(
token = await self.inference_engine.sample(result)
await self.inference_engine.ensure_shard(shard)
self.buffered_token_output[request_id][0].append(token.item())
self.trigger_on_token_callbacks(request_id, self.buffered_token_output[request_id][0], is_finished)
if DEBUG >= 2: print(f"[{request_id}] result size: {result.size}, is finished: {is_finished}, buffered tokens: {len(self.buffered_token_output[request_id][0])}")
is_finished = token.item() == self.inference_engine.tokenizer.eos_token_id
forward = token.reshape(1, -1)
self.trigger_on_token_callbacks(request_id, self.buffered_token_output[request_id][0], is_finished)
asyncio.create_task(self.broadcast_result(request_id, self.buffered_token_output[request_id][0], is_finished))
else:
forward = result

Expand Down

0 comments on commit 93d38e2

Please # to comment.