Skip to content

Commit 44688c5

Browse files
committed
html agent: add self reflection to model, logic to parse answers from strucutred outputs
1 parent be78632 commit 44688c5

File tree

2 files changed

+113
-28
lines changed

2 files changed

+113
-28
lines changed

server/agent/html_agent.py

+37-25
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,15 @@
66

77
from server.partition import get_processor
88
from server.partition.html_processor import HTMLProcessingSettings
9-
9+
from server.model import JsonFieldStreamProcessor
1010

1111
SYSTEM_PROMPT = """You are an intelligent browser assistant that helps users analyze and work with content from the currently active browser tab. Your main tasks are:
1212
13-
1. Understand and process content only from the current active tab (HTML, PDF, plain text)
14-
2. Provide relevant information and answers based on the given context
15-
3. Help users find specific information within the current page
16-
4. Generate summaries, explanations, or analyses as requested
13+
1. Reflect on the information you have and what answers you will give to the question
14+
2. Understand and process content only from the current active tab (HTML, PDF, plain text)
15+
3. Provide relevant information and answers based on the given context
16+
4. Help users find specific information within the current page
17+
5. Generate summaries, explanations, or analyses as requested
1718
1819
Important rules:
1920
- Always respond in the same language the user's question is asked in
@@ -29,16 +30,18 @@
2930

3031
CHUNK_PROCESSING_PROMPT = """You are processing a part of a webpage. Your task is to:
3132
32-
1. Extract only relevant information from this chunk that relates to the user's question
33-
2. Provide a focused, self-contained response about this specific part
34-
3. Consider previous findings when analyzing new information
35-
4. Keep the response concise and factual
36-
5. Format the response so it can be easily combined with other parts
33+
1. Reflect on the information you have and what answers you will give to the question
34+
2. Extract only relevant information from this chunk that relates to the user's question
35+
3. Provide a focused, self-contained response about this specific part
36+
4. Consider previous findings when analyzing new information
37+
5. Keep the response concise and factual
38+
6. Format the response so it can be easily combined with other parts
3739
3840
Remember:
3941
- This is part of an iterative analysis process
4042
- Focus on new relevant information in this chunk
4143
- Avoid repeating information already found in previous parts
44+
- Always respond in the same language the user's question is asked in
4245
- Maintain the user's original language in the response
4346
- If you find information that complements or contradicts previous findings, note this
4447
@@ -47,15 +50,15 @@
4750

4851

4952
class AnswerGeneratorWithRelevanceScore(BaseModel):
50-
reflection: str
53+
reflections: str
5154
answer: str
5255
answer_relevance_score_to_question: float = Field(
5356
default=None, description="Relevance to the question (0-1)"
5457
)
5558

5659

5760
class AnswerGenerator(BaseModel):
58-
reflection: str
61+
reflections: str
5962
answer: str
6063

6164

@@ -69,6 +72,7 @@ def __init__(
6972
self.client = llm_client
7073

7174
self.content_processor = get_processor()
75+
self.answer_processor = JsonFieldStreamProcessor(field_name="answer")
7276

7377
def get_relevant_info(
7478
self, question, dialog_history, context, url, processing_settings
@@ -79,7 +83,7 @@ def get_relevant_info(
7983
messages += [
8084
{
8185
"role": "user" if conv.role == "user" else "assistant",
82-
"content": conv.message,
86+
"content": f"{conv.message} Page Url: ```{conv.url}```",
8387
}
8488
for conv in dialog_history
8589
]
@@ -128,7 +132,8 @@ def get_relevant_info(
128132
messages_parting += [
129133
{
130134
"role": "user",
131-
"content": f"{question} \n\n Page Url: ```{url}``` \n\nPart of web page \n\n {doc} \n\nYour response format: {AnswerGeneratorWithRelevanceScore.model_json_schema()}",
135+
"content": f"{question} \n\n Page Url: ```{url}``` \n\nPart of web page \n\n {doc} \n\n"
136+
+ f"Your response format: {AnswerGeneratorWithRelevanceScore.model_json_schema()}",
132137
},
133138
]
134139

@@ -147,31 +152,38 @@ def get_relevant_info(
147152
"content": f"My question: {question} \n\n {selected_content}. The content has already been submitted part by part here are the answers to my question in parts with reflection: \n\n```{self.content_processor.make_page(documents, relevant_chunks, processing_settings)}```",
148153
},
149154
]
150-
response_from_model = self.client.generate(messages, stream=True)
155+
response_from_model = self.client.generate(
156+
messages,
157+
stream=True,
158+
schema=AnswerGenerator.model_json_schema(),
159+
stream_processor=self.answer_processor,
160+
)
151161
else:
152162
print("\n\nSINGLE RUN\n\n")
153163
print(str(documents))
154164

155165
messages += [
156166
{
157167
"role": "user",
158-
"content": f"{question} \n\n Page url: ```{url}```\n\n {selected_content} \n\n ```{str(documents)}```",
159-
# Your response format: {AnswerGenerator.model_json_schema()}",
168+
"content": f"Question: {question} \n\n Page url: ```{url}```\n\n {selected_content} \n\n ```{str(documents)}```"
169+
+ f"\nYour response format: {AnswerGenerator.model_json_schema()}",
160170
},
161171
]
162172
response_from_model = self.client.generate(
163173
messages,
164-
stream=True,
165-
# schema=AnswerGenerator.model_json_schema()
174+
stream=True,
175+
schema=AnswerGenerator.model_json_schema(),
176+
stream_processor=self.answer_processor,
166177
)
167178
return response_from_model
168179

169180
def generate_chat_response(self, dialog_history):
170181
messages = [{"role": "system", "content": SYSTEM_PROMPT}]
171-
for conv in dialog_history:
172-
if conv.role == "user":
173-
messages.append({"role": "user", "content": conv.message})
174-
else:
175-
messages.append({"role": "assistant", "content": conv.message})
176-
182+
messages += [
183+
{
184+
"role": "user" if conv.role == "user" else "assistant",
185+
"content": f"{conv.message} Page Url: ```{conv.url}```",
186+
}
187+
for conv in dialog_history
188+
]
177189
return self.client.generate(messages, stream=True)

server/model.py

+76-3
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,71 @@
44
from tqdm import tqdm
55

66

7+
from abc import ABC, abstractmethod
8+
from typing import Generator, Any, Optional, Callable
9+
10+
11+
class StreamProcessor(ABC):
12+
@abstractmethod
13+
def process_stream(self, stream: Generator) -> Generator:
14+
pass
15+
16+
17+
class JsonFieldStreamProcessor(StreamProcessor):
18+
def __init__(self, field_name: str):
19+
self.field_name = field_name
20+
self.buffer = ""
21+
self.in_field = False
22+
self.json_started = False
23+
24+
def process_stream(self, stream: Generator) -> Generator:
25+
for token in stream:
26+
content = token["choices"][0]["delta"].get("content", "")
27+
if not content:
28+
continue
29+
30+
self.buffer += content
31+
32+
if not self.json_started and "{" in self.buffer:
33+
self.json_started = True
34+
35+
if not self.json_started:
36+
continue
37+
38+
field_marker = f'"{self.field_name}": "'
39+
if field_marker in self.buffer and not self.in_field:
40+
self.in_field = True
41+
self.buffer = self.buffer.split(field_marker)[1]
42+
43+
if self.in_field:
44+
index = 0
45+
while index < len(self.buffer):
46+
if self.buffer[index] == '"':
47+
if index > 0 and self.buffer[index - 1] == '\\':
48+
index += 1
49+
continue
50+
51+
field_content = self.buffer[:index]
52+
self.buffer = self.buffer[index + 1:]
53+
54+
if field_content:
55+
yield field_content
56+
57+
self.in_field = False
58+
break
59+
index += 1
60+
61+
if self.in_field and self.buffer:
62+
yield self.buffer
63+
self.buffer = ""
64+
65+
66+
class DefaultStreamProcessor(StreamProcessor):
67+
def process_stream(self, stream: Generator) -> Generator:
68+
for token in stream:
69+
yield token["choices"][0]["delta"].get("content", "")
70+
71+
772
class LlamaCppWrapper:
873
def __init__(
974
self,
@@ -50,7 +115,13 @@ def get_params(self):
50115
def tokenize(self, text):
51116
return self.model.tokenize(text.encode("utf8"))
52117

53-
def generate(self, template, stream=False, schema=None):
118+
def generate(
119+
self,
120+
template,
121+
stream=False,
122+
schema=None,
123+
stream_processor=None,
124+
):
54125
if schema:
55126
response_generator = self.model.create_chat_completion(
56127
template,
@@ -70,8 +141,10 @@ def generate(self, template, stream=False, schema=None):
70141
if stream:
71142

72143
def generate():
73-
for token in response_generator:
74-
yield token["choices"][0]["delta"].get("content", "")
144+
processor = (
145+
stream_processor if stream_processor else DefaultStreamProcessor()
146+
)
147+
return processor.process_stream(response_generator)
75148

76149
return generate()
77150
return response_generator["choices"][0]["message"]["content"]

0 commit comments

Comments
 (0)