Skip to content

Commit 0f3c9e9

Browse files
authored
feat: avg tokens / second. (#44)
1 parent e93d1e6 commit 0f3c9e9

File tree

1 file changed

+4
-2
lines changed

1 file changed

+4
-2
lines changed

src/main.cpp

+4-2
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ void generate(Inference* inference, SocketPool* socketPool, Tokenizer *tokenizer
109109
char* piece = tokenizer->decode(token, next);
110110

111111
if (args->benchmark)
112-
printf("🔶 %4d G %4ld ms I %4ld ms T %4ld ms S %6ld kB R %6ld kB ", pos, generationTime, inferenceTime, transferTime, sentBytes / 1024, recvBytes / 1024);
112+
printf("🔶 G %4ld ms I %4ld ms T %4ld ms S %6ld kB R %6ld kB ", generationTime, inferenceTime, transferTime, sentBytes / 1024, recvBytes / 1024);
113113
safePrintf(piece); // same as printf("%s", piece), but skips "unsafe" bytes
114114
if (args->benchmark)
115115
printf("\n");
@@ -120,8 +120,10 @@ void generate(Inference* inference, SocketPool* socketPool, Tokenizer *tokenizer
120120
free(promptTokens);
121121

122122
if (!args->benchmark) printf("\n");
123+
double avgGenerationTime = totalGenerationTime / (double)pos;
123124
printf("Generated tokens: %d\n", pos);
124-
printf("Avg generation time: %.2f ms\n", totalGenerationTime / (double)pos);
125+
printf("Avg tokens / second: %.2f\n", 1000.0 / avgGenerationTime);
126+
printf("Avg generation time: %.2f ms\n", avgGenerationTime);
125127
printf("Avg inference time: %.2f ms\n", totalInferenceTime / (double)pos);
126128
printf("Avg transfer time: %.2f ms\n", totalTransferTime / (double)pos);
127129
}

0 commit comments

Comments
 (0)