Skip to content

Commit

Permalink
feat: avg tokens / second. (#44)
Browse files Browse the repository at this point in the history
  • Loading branch information
b4rtaz authored May 9, 2024
1 parent e93d1e6 commit 0f3c9e9
Showing 1 changed file with 4 additions and 2 deletions.
6 changes: 4 additions & 2 deletions src/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ void generate(Inference* inference, SocketPool* socketPool, Tokenizer *tokenizer
char* piece = tokenizer->decode(token, next);

if (args->benchmark)
printf("🔶 %4d G %4ld ms I %4ld ms T %4ld ms S %6ld kB R %6ld kB ", pos, generationTime, inferenceTime, transferTime, sentBytes / 1024, recvBytes / 1024);
printf("🔶 G %4ld ms I %4ld ms T %4ld ms S %6ld kB R %6ld kB ", generationTime, inferenceTime, transferTime, sentBytes / 1024, recvBytes / 1024);
safePrintf(piece); // same as printf("%s", piece), but skips "unsafe" bytes
if (args->benchmark)
printf("\n");
Expand All @@ -120,8 +120,10 @@ void generate(Inference* inference, SocketPool* socketPool, Tokenizer *tokenizer
free(promptTokens);

if (!args->benchmark) printf("\n");
double avgGenerationTime = totalGenerationTime / (double)pos;
printf("Generated tokens: %d\n", pos);
printf("Avg generation time: %.2f ms\n", totalGenerationTime / (double)pos);
printf("Avg tokens / second: %.2f\n", 1000.0 / avgGenerationTime);
printf("Avg generation time: %.2f ms\n", avgGenerationTime);
printf("Avg inference time: %.2f ms\n", totalInferenceTime / (double)pos);
printf("Avg transfer time: %.2f ms\n", totalTransferTime / (double)pos);
}
Expand Down

0 comments on commit 0f3c9e9

Please # to comment.