fix maximum token count (#553)

zhujunsan · web-flow · commit d01fabe5cb98 · 2024-06-11T00:29:42.000+08:00
* fix maximum token count

* fix maximum token count 1

* fix maximum token count 2
原来补全最多只有4096啊...

* fix maximum token count 3
重新梳理一下逻辑

---------

Signed-off-by: San &lt;zhujunsan@gmail.com&gt;
diff --git a/service/src/chatgpt/index.ts b/service/src/chatgpt/index.ts
@@ -57,31 +57,44 @@ export async function initApi(key: KeyConfig, chatModel: string, maxContextCount
 
     // Set the token limits based on the model's type. This is because different models have different token limits.
     // The token limit includes the token count from both the message array sent and the model response.
-    // 'gpt-35-turbo' has a limit of 4096 tokens, 'gpt-4' and 'gpt-4-32k' have limits of 8192 and 32768 tokens respectively.
-    // Check if the model type is GPT-4-turbo
-    if (model.toLowerCase().includes('gpt-4o') || model.toLowerCase().includes('gpt-4-turbo') || model.toLowerCase().includes('1106-preview') || model.toLowerCase().includes('0125-preview')) {
-      // If it's a 'gpt-4o'/'gpt-4-turbo'/'1106-preview'/'0125-preview' model, set the maxModelTokens to 128000
+
+    // Check if the model type is GPT-4-turbo or newer
+    if (model.toLowerCase().includes('gpt-4o') || model.toLowerCase().includes('gpt-4-turbo') || model.toLowerCase().includes('-preview')) {
+      // If it's a 'gpt-4o'/'gpt-4-turbo'/'xxxx-preview' model, set the maxModelTokens to 128000
       options.maxModelTokens = 128000
-      options.maxResponseTokens = 32768
+      options.maxResponseTokens = 4096
     }
-    // Check if the model type includes '16k'
-    if (model.toLowerCase().includes('16k')) {
-      // If it's a '16k' model, set the maxModelTokens to 16384 and maxResponseTokens to 4096
-      options.maxModelTokens = 16384
+    else if (model.toLowerCase().includes('gpt-4')) {
+      // If it's a 'gpt-4' model, set the maxModelTokens and maxResponseTokens to 8192 and 2048 respectively
+      options.maxModelTokens = 8192
+      options.maxResponseTokens = 2048
+    }
+    // Check if the model type includes 'gpt-3.5-turbo'
+    else if (model.toLowerCase().includes('gpt-3.5-turbo-instruct') || model.toLowerCase().includes('gpt-3.5-turbo-0613')) {
+      // If it's a old 'gpt-3.5-turbo' model, set the maxModelTokens to 4096 and maxResponseTokens to 1024
+      options.maxModelTokens = 4096
+      options.maxResponseTokens = 1024
+    }
+    // Check if the model type includes 'gpt-3.5-turbo'
+    else if (model.toLowerCase().includes('gpt-3.5-turbo')) {
+      // If it's a 'gpt-3.5-turbo' model, set the maxModelTokens to 16385 and maxResponseTokens to 4096
+      options.maxModelTokens = 16385
       options.maxResponseTokens = 4096
     }
+    // Check if the model type includes '32k'
     else if (model.toLowerCase().includes('32k')) {
       // If it's a '32k' model, set the maxModelTokens to 32768 and maxResponseTokens to 8192
       options.maxModelTokens = 32768
       options.maxResponseTokens = 8192
     }
-    else if (model.toLowerCase().includes('gpt-4')) {
-      // If it's a 'gpt-4' model, set the maxModelTokens and maxResponseTokens to 8192 and 2048 respectively
-      options.maxModelTokens = 8192
-      options.maxResponseTokens = 2048
+    // Check if the model type includes '16k'
+    else if (model.toLowerCase().includes('16k')) {
+      // If it's a '16k' model, set the maxModelTokens to 16385 and maxResponseTokens to 4096
+      options.maxModelTokens = 16385
+      options.maxResponseTokens = 4096
     }
+    // If none of the above, use the default values
     else {
-      // If none of the above, use the default values, set the maxModelTokens and maxResponseTokens to 8192 and 2048 respectively
       options.maxModelTokens = 4096
       options.maxResponseTokens = 1024
     }