Merge pull request #110 from cagostino/chris/hitl

cagostino · web-flow · commit 1870445f0333 · 2025-03-01T15:54:12.000-05:00
adding in a human in the loop capability, specifically when discussing with reasoning models
diff --git a/examples/deep_think_check.py b/examples/deep_think_check.py
@@ -0,0 +1,23 @@
+from typing import Generator, Dict, List, Any, Optional
+import re
+
+from npcsh.llm_funcs import enter_reasoning_human_in_the_loop
+
+if __name__ == "__main__":
+    # Example usage
+    messages = [
+        {
+            "role": "user",
+            "content": "Tell me a joke about my favorite animal and my favorite city",
+        },
+    ]
+
+    for chunk in enter_reasoning_human_in_the_loop(
+        messages, "deepseek-reasoner", "deepseek"
+    ):
+        chunk_content = "".join(
+            choice.delta.content
+            for choice in chunk.choices
+            if choice.delta.content is not None
+        )
+        print(chunk_content, end="")
diff --git a/npcsh/llm_funcs.py b/npcsh/llm_funcs.py
@@ -810,7 +810,7 @@ def check_llm_command(
 
     The format of the JSON object is:
     {{
-        "action": "execute_command" | "invoke_tool" | "answer_question" | "pass_to_npc" | "execute_sequence",
+        "action": "execute_command" | "invoke_tool" | "answer_question" | "pass_to_npc" | "execute_sequence" | "request_input",
         "tool_name": "<tool_name(s)_if_applicable>",
         "explanation": "<your_explanation>",
         "npc_name": "<npc_name_if_applicable>"
@@ -1438,3 +1438,199 @@ def get_data_response(
             failures.append(str(e))
 
     return {"response": "Max retries exceeded", "code": 400}
+
+
+def enter_reasoning_human_in_the_loop(
+    messages: List[Dict[str, str]],
+    reasoning_model: str = NPCSH_REASONING_MODEL,
+    reasoning_provider: str = NPCSH_REASONING_PROVIDER,
+    chat_model: str = NPCSH_CHAT_MODEL,
+    chat_provider: str = NPCSH_CHAT_PROVIDER,
+    npc: Any = None,
+    answer_only: bool = False,
+) -> Generator[str, None, None]:
+    """
+    Stream responses while checking for think tokens and handling human input when needed.
+
+    Args:
+        messages: List of conversation messages
+        model: LLM model to use
+        provider: Model provider
+        npc: NPC instance if applicable
+
+    Yields:
+        Streamed response chunks
+    """
+    # Get the initial stream
+    if answer_only:
+        messages[-1]["content"] = (
+            messages[-1]["content"].replace(
+                "Think first though and use <think> tags", ""
+            )
+            + " Do not think just answer. "
+        )
+    else:
+        messages[-1]["content"] = (
+            messages[-1]["content"]
+            + "         Think first though and use <think> tags.  "
+        )
+
+    response_stream = get_stream(
+        messages, model=reasoning_model, provider=reasoning_provider, npc=npc
+    )
+
+    thoughts = []
+    response_chunks = []
+    in_think_block = False
+
+    for chunk in response_stream:
+        # Extract content based on provider/model type
+        if reasoning_provider == "ollama":
+            chunk_content = chunk.get("message", {}).get("content", "")
+        elif reasoning_provider == "openai" or reasoning_provider == "deepseek":
+            chunk_content = "".join(
+                choice.delta.content
+                for choice in chunk.choices
+                if choice.delta.content is not None
+            )
+        elif reasoning_provider == "anthropic":
+            if chunk.type == "content_block_delta":
+                chunk_content = chunk.delta.text
+            else:
+                chunk_content = ""
+        else:
+            # Default extraction
+            chunk_content = str(chunk)
+
+        # Always yield the chunk whether in think block or not
+        response_chunks.append(chunk_content)
+        # Track think block state and accumulate thoughts
+        if answer_only:
+            yield chunk
+        else:
+            if "<th" in "".join(response_chunks) and "/th" not in "".join(
+                response_chunks
+            ):
+                in_think_block = True
+
+            if in_think_block:
+                thoughts.append(chunk_content)
+                yield chunk  # Show the thoughts as they come
+
+            if "</th" in "".join(response_chunks):
+                thought_text = "".join(thoughts)
+                # Analyze thoughts before stopping
+                input_needed = analyze_thoughts_for_input(
+                    thought_text, model=chat_model, provider=chat_provider
+                )
+
+                if input_needed:
+                    # If input needed, get it and restart with new context
+                    user_input = request_user_input(input_needed)
+
+                    messages.append(
+                        {
+                            "role": "assistant",
+                            "content": f"""its clear that extra input is required.
+                                            could you please provide it? Here is the reason:
+
+                                            {input_needed['reason']},
+
+                                            and the prompt: {input_needed['prompt']}""",
+                        }
+                    )
+
+                    messages.append({"role": "user", "content": user_input})
+                    yield from enter_reasoning_human_in_the_loop(
+                        messages,
+                        reasoning_model=reasoning_model,
+                        reasoning_provider=reasoning_provider,
+                        chat_model=chat_model,
+                        chat_provider=chat_provider,
+                        npc=npc,
+                        answer_only=True,
+                    )
+                else:
+                    # If no input needed, just get the answer
+                    messages.append({"role": "assistant", "content": thought_text})
+                    messages.append(
+                        {"role": "user", "content": messages[-2]["content"]}
+                    )
+                    yield from enter_reasoning_human_in_the_loop(  # Restart with new context
+                        messages,
+                        reasoning_model=reasoning_model,
+                        reasoning_provider=reasoning_provider,
+                        chat_model=chat_model,
+                        chat_provider=chat_provider,
+                        npc=npc,
+                        answer_only=True,
+                    )
+
+                return  # Stop the original stream in either case
+
+
+def analyze_thoughts_for_input(
+    thought_text: str,
+    model: str = NPCSH_CHAT_MODEL,
+    provider: str = NPCSH_CHAT_PROVIDER,
+) -> Optional[Dict[str, str]]:
+    """
+    Analyze accumulated thoughts to determine if user input is needed.
+
+    Args:
+        thought_text: Accumulated text from think block
+        messages: Conversation history
+
+    Returns:
+        Dict with input request details if needed, None otherwise
+    """
+
+    prompt = (
+        f"""
+         Analyze these thoughts:
+         {thought_text}
+         and determine if additional user input would be helpful.
+        Return a JSON object with:"""
+        + """
+        {
+            "input_needed": boolean,
+            "request_reason": string explaining why input is needed,
+            "request_prompt": string to show user if input needed
+        }
+        Consider things like:
+        - Ambiguity in the user's request
+        - Missing context that would help provide a better response
+        - Clarification needed about user preferences/requirements
+        Only request input if it would meaningfully improve the response.
+        Do not include any additional markdown formatting or leading ```json tags. Your response
+        must be a valid JSON object.
+        """
+    )
+
+    response = get_llm_response(
+        prompt, model=model, provider=provider, messages=[], format="json"
+    )
+
+    result = response.get("response", {})
+    if isinstance(result, str):
+        result = json.loads(result)
+
+    if result.get("input_needed"):
+        return {
+            "reason": result["request_reason"],
+            "prompt": result["request_prompt"],
+        }
+
+
+def request_user_input(input_request: Dict[str, str]) -> str:
+    """
+    Request and get input from user.
+
+    Args:
+        input_request: Dict with reason and prompt for input
+
+    Returns:
+        User's input text
+    """
+    print(f"\nAdditional input needed: {input_request['reason']}")
+    return input(f"{input_request['prompt']}: ")
diff --git a/npcsh/stream.py b/npcsh/stream.py
@@ -579,7 +579,7 @@ def get_deepseek_stream(
         messages_copy.insert(0, {"role": "system", "content": system_message})
 
     completion = client.chat.completions.create(
-        model="deepseek-chat",
+        model=model,
         messages=messages,
         tools=tools,
         stream=True,
diff --git a/setup.py b/setup.py
@@ -57,7 +57,7 @@ def get_setup_message():
 
 setup(
     name="npcsh",
-    version="0.3.10",
+    version="0.3.11",
     packages=find_packages(exclude=["tests*"]),
     install_requires=[
         "redis",