-
Notifications
You must be signed in to change notification settings - Fork 171
feat: Add "think" parameter for Ollama #1948
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||||||
---|---|---|---|---|---|---|---|---|---|---|
|
@@ -109,7 +109,6 @@ def _convert_ollama_response_to_chatmessage(ollama_response: "ChatResponse") -> | |||||||||
response_dict = ollama_response.model_dump() | ||||||||||
|
||||||||||
ollama_message = response_dict["message"] | ||||||||||
|
||||||||||
text = ollama_message["content"] | ||||||||||
|
||||||||||
tool_calls = [] | ||||||||||
|
@@ -122,6 +121,12 @@ def _convert_ollama_response_to_chatmessage(ollama_response: "ChatResponse") -> | |||||||||
message = ChatMessage.from_assistant(text=text, tool_calls=tool_calls) | ||||||||||
|
||||||||||
message._meta = _convert_ollama_meta_to_openai_format(response_dict) | ||||||||||
|
||||||||||
thinking = ollama_message.get("thinking", None) | ||||||||||
|
||||||||||
if thinking is not None: | ||||||||||
message._meta["thinking"] = thinking | ||||||||||
|
||||||||||
return message | ||||||||||
|
||||||||||
|
||||||||||
|
@@ -156,6 +161,7 @@ def __init__( | |||||||||
url: str = "http://localhost:11434", | ||||||||||
generation_kwargs: Optional[Dict[str, Any]] = None, | ||||||||||
timeout: int = 120, | ||||||||||
think=False, | ||||||||||
keep_alive: Optional[Union[float, str]] = None, | ||||||||||
streaming_callback: Optional[Callable[[StreamingChunk], None]] = None, | ||||||||||
tools: Optional[List[Tool]] = None, | ||||||||||
|
@@ -172,6 +178,8 @@ def __init__( | |||||||||
[Ollama docs](https://github.com/jmorganca/ollama/blob/main/docs/modelfile.md#valid-parameters-and-values). | ||||||||||
:param timeout: | ||||||||||
The number of seconds before throwing a timeout error from the Ollama API. | ||||||||||
:param think | ||||||||||
Enables the model's "thinking" process. | ||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would expand this explanation to something like
Suggested change
|
||||||||||
:param keep_alive: | ||||||||||
The option that controls how long the model will stay loaded into memory following the request. | ||||||||||
If not set, it will use the default value from the Ollama (5 minutes). | ||||||||||
|
@@ -200,6 +208,7 @@ def __init__( | |||||||||
self.generation_kwargs = generation_kwargs or {} | ||||||||||
self.url = url | ||||||||||
self.model = model | ||||||||||
self.think = think | ||||||||||
self.keep_alive = keep_alive | ||||||||||
self.streaming_callback = streaming_callback | ||||||||||
self.tools = tools | ||||||||||
|
@@ -329,6 +338,7 @@ def run( | |||||||||
messages=ollama_messages, | ||||||||||
tools=ollama_tools, | ||||||||||
stream=stream, | ||||||||||
think=self.think, | ||||||||||
keep_alive=self.keep_alive, | ||||||||||
options=generation_kwargs, | ||||||||||
format=self.response_format, | ||||||||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -36,6 +36,7 @@ def __init__( | |
template: Optional[str] = None, | ||
raw: bool = False, | ||
timeout: int = 120, | ||
think: bool = False, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We are trying to introduce new features in the Chat Generators only. In the long run, we may deprecate Generators and keep only Chat Generators. For this reason, I won't introduce support for thinking in Generators. |
||
keep_alive: Optional[Union[float, str]] = None, | ||
streaming_callback: Optional[Callable[[StreamingChunk], None]] = None, | ||
): | ||
|
@@ -57,6 +58,8 @@ def __init__( | |
if you are specifying a full templated prompt in your API request. | ||
:param timeout: | ||
The number of seconds before throwing a timeout error from the Ollama API. | ||
:param think | ||
Enables the model's "thinking" process. | ||
:param streaming_callback: | ||
A callback function that is called when a new token is received from the stream. | ||
The callback function accepts StreamingChunk as an argument. | ||
|
@@ -75,6 +78,7 @@ def __init__( | |
self.system_prompt = system_prompt | ||
self.model = model | ||
self.url = url | ||
self.think = think | ||
self.keep_alive = keep_alive | ||
self.generation_kwargs = generation_kwargs or {} | ||
self.streaming_callback = streaming_callback | ||
|
@@ -194,6 +198,7 @@ def run( | |
model=self.model, | ||
prompt=prompt, | ||
stream=stream, | ||
think=self.think, | ||
keep_alive=self.keep_alive, | ||
options=generation_kwargs, | ||
) | ||
|
Original file line number | Diff line number | Diff line change | ||
---|---|---|---|---|
|
@@ -508,6 +508,17 @@ def test_run_with_chat_history(self): | |||
city.lower() in response["replies"][-1].text.lower() for city in ["Manchester", "Birmingham", "Glasgow"] | ||||
) | ||||
|
||||
@pytest.mark.integration | ||||
def test_live_run_with_thinking(self): | ||||
chat_generator = OllamaChatGenerator(model="qwen3:1.7b", think=True) | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. to use this model in an integration test, you should also change the following line
However, I would recommend using |
||||
|
||||
message = ChatMessage.from_user("How many times does the letter 'r' appear in the word 'strawberry'?") | ||||
response = chat_generator.run([message]) | ||||
|
||||
assert isinstance(response, dict) | ||||
assert isinstance(response["replies"], list) | ||||
assert "thinking" in response["replies"][0]._meta | ||||
|
||||
@pytest.mark.integration | ||||
def test_run_model_unavailable(self): | ||||
component = OllamaChatGenerator(model="unknown_model") | ||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I would put this new parameter at the end, to make this change non-breaking.