7
7
8
8
from fastapi_app .api_models import (
9
9
AIChatRoles ,
10
- ChatRequestOverrides ,
11
10
Message ,
12
11
RAGContext ,
13
12
RetrievalResponse ,
@@ -63,10 +62,15 @@ async def generate_search_query(
63
62
64
63
return query_messages , query_text , filters
65
64
66
- async def retrieve_and_build_context (
67
- self , chat_params : ChatParams , query_text : str | Any | None , filters : list
68
- ) -> tuple [list [ChatCompletionMessageParam ], list [Item ]]:
69
- """Retrieve relevant items from the database and build a context for the chat model."""
65
+ async def prepare_context (
66
+ self , chat_params : ChatParams
67
+ ) -> tuple [list [ChatCompletionMessageParam ], list [Item ], list [ThoughtStep ]]:
68
+ query_messages , query_text , filters = await self .generate_search_query (
69
+ original_user_query = chat_params .original_user_query ,
70
+ past_messages = chat_params .past_messages ,
71
+ query_response_token_limit = 500 ,
72
+ )
73
+
70
74
# Retrieve relevant items from the database with the GPT optimized query
71
75
results = await self .searcher .search_and_embed (
72
76
query_text ,
@@ -88,28 +92,41 @@ async def retrieve_and_build_context(
88
92
max_tokens = self .chat_token_limit - chat_params .response_token_limit ,
89
93
fallback_to_default = True ,
90
94
)
91
- return contextual_messages , results
92
95
93
- async def run (
96
+ thoughts = [
97
+ ThoughtStep (
98
+ title = "Prompt to generate search arguments" ,
99
+ description = [str (message ) for message in query_messages ],
100
+ props = (
101
+ {"model" : self .chat_model , "deployment" : self .chat_deployment }
102
+ if self .chat_deployment
103
+ else {"model" : self .chat_model }
104
+ ),
105
+ ),
106
+ ThoughtStep (
107
+ title = "Search using generated search arguments" ,
108
+ description = query_text ,
109
+ props = {
110
+ "top" : chat_params .top ,
111
+ "vector_search" : chat_params .enable_vector_search ,
112
+ "text_search" : chat_params .enable_text_search ,
113
+ "filters" : filters ,
114
+ },
115
+ ),
116
+ ThoughtStep (
117
+ title = "Search results" ,
118
+ description = [result .to_dict () for result in results ],
119
+ ),
120
+ ]
121
+ return contextual_messages , results , thoughts
122
+
123
+ async def answer (
94
124
self ,
95
- messages : list [ChatCompletionMessageParam ],
96
- overrides : ChatRequestOverrides ,
125
+ chat_params : ChatParams ,
126
+ contextual_messages : list [ChatCompletionMessageParam ],
127
+ results : list [Item ],
128
+ earlier_thoughts : list [ThoughtStep ],
97
129
) -> RetrievalResponse :
98
- chat_params = self .get_params (messages , overrides )
99
-
100
- # Generate an optimized keyword search query based on the chat history and the last question
101
- query_messages , query_text , filters = await self .generate_search_query (
102
- original_user_query = chat_params .original_user_query ,
103
- past_messages = chat_params .past_messages ,
104
- query_response_token_limit = 500 ,
105
- )
106
-
107
- # Retrieve relevant items from the database with the GPT optimized query
108
- # Generate a contextual and content specific answer using the search results and chat history
109
- contextual_messages , results = await self .retrieve_and_build_context (
110
- chat_params = chat_params , query_text = query_text , filters = filters
111
- )
112
-
113
130
chat_completion_response : ChatCompletion = await self .openai_chat_client .chat .completions .create (
114
131
# Azure OpenAI takes the deployment name as the model name
115
132
model = self .chat_deployment if self .chat_deployment else self .chat_model ,
@@ -126,30 +143,8 @@ async def run(
126
143
),
127
144
context = RAGContext (
128
145
data_points = {item .id : item .to_dict () for item in results },
129
- thoughts = [
130
- ThoughtStep (
131
- title = "Prompt to generate search arguments" ,
132
- description = [str (message ) for message in query_messages ],
133
- props = (
134
- {"model" : self .chat_model , "deployment" : self .chat_deployment }
135
- if self .chat_deployment
136
- else {"model" : self .chat_model }
137
- ),
138
- ),
139
- ThoughtStep (
140
- title = "Search using generated search arguments" ,
141
- description = query_text ,
142
- props = {
143
- "top" : chat_params .top ,
144
- "vector_search" : chat_params .enable_vector_search ,
145
- "text_search" : chat_params .enable_text_search ,
146
- "filters" : filters ,
147
- },
148
- ),
149
- ThoughtStep (
150
- title = "Search results" ,
151
- description = [result .to_dict () for result in results ],
152
- ),
146
+ thoughts = earlier_thoughts
147
+ + [
153
148
ThoughtStep (
154
149
title = "Prompt to generate answer" ,
155
150
description = [str (message ) for message in contextual_messages ],
@@ -163,23 +158,13 @@ async def run(
163
158
),
164
159
)
165
160
166
- async def run_stream (
161
+ async def answer_stream (
167
162
self ,
168
- messages : list [ChatCompletionMessageParam ],
169
- overrides : ChatRequestOverrides ,
163
+ chat_params : ChatParams ,
164
+ contextual_messages : list [ChatCompletionMessageParam ],
165
+ results : list [Item ],
166
+ earlier_thoughts : list [ThoughtStep ],
170
167
) -> AsyncGenerator [RetrievalResponseDelta , None ]:
171
- chat_params = self .get_params (messages , overrides )
172
-
173
- query_messages , query_text , filters = await self .generate_search_query (
174
- original_user_query = chat_params .original_user_query ,
175
- past_messages = chat_params .past_messages ,
176
- query_response_token_limit = 500 ,
177
- )
178
-
179
- contextual_messages , results = await self .retrieve_and_build_context (
180
- chat_params = chat_params , query_text = query_text , filters = filters
181
- )
182
-
183
168
chat_completion_async_stream : AsyncStream [
184
169
ChatCompletionChunk
185
170
] = await self .openai_chat_client .chat .completions .create (
@@ -192,38 +177,11 @@ async def run_stream(
192
177
stream = True ,
193
178
)
194
179
195
- # Forcefully close the database session before yielding the response
196
- # Yielding keeps the connection open while streaming the response until the end
197
- # The connection closes when it returns back to the context manger in the dependencies
198
- await self .searcher .db_session .close ()
199
-
200
180
yield RetrievalResponseDelta (
201
181
context = RAGContext (
202
182
data_points = {item .id : item .to_dict () for item in results },
203
- thoughts = [
204
- ThoughtStep (
205
- title = "Prompt to generate search arguments" ,
206
- description = [str (message ) for message in query_messages ],
207
- props = (
208
- {"model" : self .chat_model , "deployment" : self .chat_deployment }
209
- if self .chat_deployment
210
- else {"model" : self .chat_model }
211
- ),
212
- ),
213
- ThoughtStep (
214
- title = "Search using generated search arguments" ,
215
- description = query_text ,
216
- props = {
217
- "top" : chat_params .top ,
218
- "vector_search" : chat_params .enable_vector_search ,
219
- "text_search" : chat_params .enable_text_search ,
220
- "filters" : filters ,
221
- },
222
- ),
223
- ThoughtStep (
224
- title = "Search results" ,
225
- description = [result .to_dict () for result in results ],
226
- ),
183
+ thoughts = earlier_thoughts
184
+ + [
227
185
ThoughtStep (
228
186
title = "Prompt to generate answer" ,
229
187
description = [str (message ) for message in contextual_messages ],
0 commit comments