Skip to content

Commit b5283f7

Browse files
committed
fix: position calculation
1 parent 121bc8c commit b5283f7

File tree

2 files changed

+7
-6
lines changed

2 files changed

+7
-6
lines changed

src/dllama-api.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -386,8 +386,8 @@ class ApiServer {
386386

387387
inference->setBatchSize(batchSize);
388388
inference->setPosition(pos);
389-
for (NnSize i = 0; i < batchSize; i++)
390-
inference->setToken(i, promptTokens[i]);
389+
for (NnSize j = 0; j < batchSize; j++)
390+
inference->setToken(j, promptTokens[i + j]);
391391

392392
inference->forward();
393393

src/dllama.cpp

+5-4
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,7 @@ static void chat(AppInferenceContext *context) {
152152
context->tokenizer->encode((char*)inputPrompt.c_str(), inputTokens, &nInputTokens, addBos, true);
153153

154154
NnSize userPromptEndPos = (NnSize)std::min<unsigned int>(seqLen, pos + nInputTokens - 1);
155-
for (;;) {
155+
for (NnSize i = 0; ;) {
156156
int remainingTokens = userPromptEndPos - pos;
157157
if (remainingTokens <= 0)
158158
break;
@@ -162,13 +162,14 @@ static void chat(AppInferenceContext *context) {
162162

163163
context->inference->setBatchSize(batchSize);
164164
context->inference->setPosition(pos);
165-
for (NnSize i = 0; i < batchSize; i++)
166-
context->inference->setToken(i, inputTokens[i]);
165+
for (NnSize j = 0; j < batchSize; j++)
166+
context->inference->setToken(j, inputTokens[i + j]);
167167

168168
context->inference->forward();
169169

170+
i += batchSize;
170171
pos += batchSize;
171-
token = inputTokens[pos + 1];
172+
token = inputTokens[i + 1];
172173
}
173174

174175
context->inference->setBatchSize(1);

0 commit comments

Comments
 (0)