@@ -19,8 +19,10 @@ class QdrantEmbeddingRetriever:
19
19
":memory:",
20
20
recreate_index=True,
21
21
return_embedding=True,
22
- wait_result_from_api=True,
23
22
)
23
+
24
+ document_store.write_documents([Document(content="test", embedding=[0.5]*768)])
25
+
24
26
retriever = QdrantEmbeddingRetriever(document_store=document_store)
25
27
26
28
# using a fake vector to keep the example simple
@@ -112,7 +114,7 @@ def run(
112
114
The retrieved documents.
113
115
114
116
"""
115
- docs = self ._document_store .query_by_embedding (
117
+ docs = self ._document_store ._query_by_embedding (
116
118
query_embedding = query_embedding ,
117
119
filters = filters or self ._filters ,
118
120
top_k = top_k or self ._top_k ,
@@ -136,10 +138,14 @@ class QdrantSparseEmbeddingRetriever:
136
138
137
139
document_store = QdrantDocumentStore(
138
140
":memory:",
141
+ use_sparse_embeddings=True,
139
142
recreate_index=True,
140
143
return_embedding=True,
141
- wait_result_from_api=True,
142
144
)
145
+
146
+ doc = Document(content="test", sparse_embedding=SparseEmbedding(indices=[0, 3, 5], values=[0.1, 0.5, 0.12]))
147
+ document_store.write_documents([doc])
148
+
143
149
retriever = QdrantSparseEmbeddingRetriever(document_store=document_store)
144
150
sparse_embedding = SparseEmbedding(indices=[0, 1, 2, 3], values=[0.1, 0.8, 0.05, 0.33])
145
151
retriever.run(query_sparse_embedding=sparse_embedding)
@@ -196,7 +202,7 @@ def to_dict(self) -> Dict[str, Any]:
196
202
return d
197
203
198
204
@classmethod
199
- def from_dict (cls , data : Dict [str , Any ]) -> "QdrantEmbeddingRetriever " :
205
+ def from_dict (cls , data : Dict [str , Any ]) -> "QdrantSparseEmbeddingRetriever " :
200
206
"""
201
207
Deserializes the component from a dictionary.
202
208
@@ -230,7 +236,7 @@ def run(
230
236
The retrieved documents.
231
237
232
238
"""
233
- docs = self ._document_store .query_by_sparse (
239
+ docs = self ._document_store ._query_by_sparse (
234
240
query_sparse_embedding = query_sparse_embedding ,
235
241
filters = filters or self ._filters ,
236
242
top_k = top_k or self ._top_k ,
@@ -239,3 +245,124 @@ def run(
239
245
)
240
246
241
247
return {"documents" : docs }
248
+
249
+
250
+ @component
251
+ class QdrantHybridRetriever :
252
+ """
253
+ A component for retrieving documents from an QdrantDocumentStore using both dense and sparse vectors
254
+ and fusing the results using Reciprocal Rank Fusion.
255
+
256
+ Usage example:
257
+ ```python
258
+ from haystack_integrations.components.retrievers.qdrant import QdrantHybridRetriever
259
+ from haystack_integrations.document_stores.qdrant import QdrantDocumentStore
260
+ from haystack.dataclasses.sparse_embedding import SparseEmbedding
261
+
262
+ document_store = QdrantDocumentStore(
263
+ ":memory:",
264
+ use_sparse_embeddings=True,
265
+ recreate_index=True,
266
+ return_embedding=True,
267
+ wait_result_from_api=True,
268
+ )
269
+
270
+ doc = Document(content="test",
271
+ embedding=[0.5]*768,
272
+ sparse_embedding=SparseEmbedding(indices=[0, 3, 5], values=[0.1, 0.5, 0.12]))
273
+
274
+ document_store.write_documents([doc])
275
+
276
+ retriever = QdrantHybridRetriever(document_store=document_store)
277
+ embedding = [0.1]*768
278
+ sparse_embedding = SparseEmbedding(indices=[0, 1, 2, 3], values=[0.1, 0.8, 0.05, 0.33])
279
+ retriever.run(query_embedding=embedding, query_sparse_embedding=sparse_embedding)
280
+ ```
281
+ """
282
+
283
+ def __init__ (
284
+ self ,
285
+ document_store : QdrantDocumentStore ,
286
+ filters : Optional [Dict [str , Any ]] = None ,
287
+ top_k : int = 10 ,
288
+ return_embedding : bool = False ,
289
+ ):
290
+ """
291
+ Create a QdrantHybridRetriever component.
292
+
293
+ :param document_store: An instance of QdrantDocumentStore.
294
+ :param filters: A dictionary with filters to narrow down the search space.
295
+ :param top_k: The maximum number of documents to retrieve.
296
+ :param return_embedding: Whether to return the embeddings of the retrieved Documents.
297
+
298
+ :raises ValueError: If 'document_store' is not an instance of QdrantDocumentStore.
299
+ """
300
+
301
+ if not isinstance (document_store , QdrantDocumentStore ):
302
+ msg = "document_store must be an instance of QdrantDocumentStore"
303
+ raise ValueError (msg )
304
+
305
+ self ._document_store = document_store
306
+ self ._filters = filters
307
+ self ._top_k = top_k
308
+ self ._return_embedding = return_embedding
309
+
310
+ def to_dict (self ) -> Dict [str , Any ]:
311
+ """
312
+ Serializes the component to a dictionary.
313
+
314
+ :returns:
315
+ Dictionary with serialized data.
316
+ """
317
+ return default_to_dict (
318
+ self ,
319
+ document_store = self ._document_store .to_dict (),
320
+ filters = self ._filters ,
321
+ top_k = self ._top_k ,
322
+ return_embedding = self ._return_embedding ,
323
+ )
324
+
325
+ @classmethod
326
+ def from_dict (cls , data : Dict [str , Any ]) -> "QdrantHybridRetriever" :
327
+ """
328
+ Deserializes the component from a dictionary.
329
+
330
+ :param data:
331
+ Dictionary to deserialize from.
332
+ :returns:
333
+ Deserialized component.
334
+ """
335
+ document_store = QdrantDocumentStore .from_dict (data ["init_parameters" ]["document_store" ])
336
+ data ["init_parameters" ]["document_store" ] = document_store
337
+ return default_from_dict (cls , data )
338
+
339
+ @component .output_types (documents = List [Document ])
340
+ def run (
341
+ self ,
342
+ query_embedding : List [float ],
343
+ query_sparse_embedding : SparseEmbedding ,
344
+ filters : Optional [Dict [str , Any ]] = None ,
345
+ top_k : Optional [int ] = None ,
346
+ return_embedding : Optional [bool ] = None ,
347
+ ):
348
+ """
349
+ Run the Sparse Embedding Retriever on the given input data.
350
+
351
+ :param query_embedding: Dense embedding of the query.
352
+ :param query_sparse_embedding: Sparse embedding of the query.
353
+ :param filters: A dictionary with filters to narrow down the search space.
354
+ :param top_k: The maximum number of documents to return.
355
+ :param return_embedding: Whether to return the embedding of the retrieved Documents.
356
+ :returns:
357
+ The retrieved documents.
358
+
359
+ """
360
+ docs = self ._document_store ._query_hybrid (
361
+ query_embedding = query_embedding ,
362
+ query_sparse_embedding = query_sparse_embedding ,
363
+ filters = filters or self ._filters ,
364
+ top_k = top_k or self ._top_k ,
365
+ return_embedding = return_embedding or self ._return_embedding ,
366
+ )
367
+
368
+ return {"documents" : docs }
0 commit comments