You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
logger.error(f"Failed to build the chat prompt. The input is too long for the available context length.\n\nTruncation length: {state['truncation_length']}\nmax_new_tokens: {state['max_new_tokens']} (is it too high?)\nAvailable context length: {max_length}\n")
210
+
raiseValueError
201
211
else:
202
-
left=mid
212
+
logger.warning(f"The input has been truncated. Context length: {state['truncation_length']}, max_new_tokens: {state['max_new_tokens']}, available context length: {max_length}.")
213
+
break
203
214
204
-
messages[-1]['content'] =user_message[right:]
205
215
prompt=make_prompt(messages)
206
216
encoded_length=get_encoded_length(prompt)
207
-
ifencoded_length>max_length:
208
-
logger.error(f"Failed to build the chat prompt. The input is too long for the available context length.\n\nTruncation length: {state['truncation_length']}\nmax_new_tokens: {state['max_new_tokens']} (is it too high?)\nAvailable context length: {max_length}\n")
209
-
raiseValueError
210
-
else:
211
-
logger.warning(f"The input has been truncated. Context length: {state['truncation_length']}, max_new_tokens: {state['max_new_tokens']}, available context length: {max_length}.")
# If the user is using an alternative loader for the same model type, let them keep using it
228
-
ifnot (loader=='ExLlamav2_HF'andstate['loader'] in ['GPTQ-for-LLaMa', 'ExLlamav2', 'AutoGPTQ']) andnot (loader=='llama.cpp'andstate['loader'] in ['llamacpp_HF', 'ctransformers']):
230
+
ifnot (loader=='ExLlamav2_HF'andstate['loader'] in ['GPTQ-for-LLaMa', 'ExLlamav2', 'AutoGPTQ']) andnot (loader=='llama.cpp'andstate['loader'] in ['ctransformers']):
0 commit comments