upgrade query for image search (#534)

badmonster0 · web-flow · commit ab81d5f5bfcc · 2025-05-20T22:08:13.000-07:00
diff --git a/examples/image_search/README.md b/examples/image_search/README.md
@@ -2,31 +2,29 @@
 
 ![image](https://github.com/user-attachments/assets/3a696344-c9b4-46e8-9413-6229dbb8672a)
 
-- QDrant for Vector Storage
+- Qdrant for Vector Storage
 - Ollama Gemma3 (Image to Text)
 - CLIP ViT-L/14 - Embeddings Model
 - Live Update
 
 ## Make sure Postgres and Qdrant are running
 ```
-docker run -d --name qdrant -p 6334:6334 qdrant/qdrant:latest
+docker run -d -p 6334:6334 -p 6333:6333 qdrant/qdrant
 export COCOINDEX_DATABASE_URL="postgres://cocoindex:cocoindex@localhost/cocoindex"
 ```
 
-## Create QDrant Collection
+## Create Qdrant Collection
 ```
-curl -X PUT
-  'http://localhost:6333/collections/image_search' \
-  --header 'Content-Type: application/json' \
-  --data-raw '{
+curl -X PUT 'http://localhost:6333/collections/image_search' \
+  -H 'Content-Type: application/json' \
+  -d '{
     "vectors": {
       "embedding": {
         "size": 768,
         "distance": "Cosine"
       }
     }
   }'
-
 ```
 
 ## Run Ollama
@@ -35,29 +33,28 @@ ollama pull gemma3
 ollama serve
 ```
 
-## Create virtual environment and install dependencies
-```
-python -m venv .venv
-source .venv/bin/activate
-pip install -r requirements.txt
-```
-
 ### Place your images in the `img` directory.
 - No need to update manually. CocoIndex will automatically update the index as new images are added to the directory.
 
 
 ## Run Backend
-```
-cocoindex setup main.py
-uvicorn main:app --reload --host 0.0.0.0 --port 8000
-```
-
-## Run Frontend
-```
-cd frontend
-npm install
-npm run dev
-```
+- Install dependencies:
+  ```
+  pip install -e .
+  ```
+
+- Run Backend
+  ```
+  cocoindex setup main.py
+  uvicorn main:app --reload --host 0.0.0.0 --port 8000
+  ```
+
+- Run Frontend
+  ```
+  cd frontend
+  npm install
+  npm run dev
+  ```
 
 Go to `http://localhost:5174` to search.
 
diff --git a/examples/image_search/main.py b/examples/image_search/main.py
@@ -7,9 +7,11 @@
 from fastapi import FastAPI, Query
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.staticfiles import StaticFiles
+from qdrant_client import QdrantClient
 
 OLLAMA_URL = "http://localhost:11434/api/generate"
 OLLAMA_MODEL = "gemma3"
+QDRANT_GRPC_URL = os.getenv("QDRANT_GRPC_URL", "http://localhost:6334/")
 
 # 1. Extract caption from image using Ollama vision model
 @cocoindex.op.function(cache=True, behavior_version=1)
@@ -42,7 +44,12 @@ def get_image_caption(img_bytes: bytes) -> str:
 
 
 # 2. Embed the caption string
-def caption_to_embedding(caption: cocoindex.DataSlice) -> cocoindex.DataSlice:
+@cocoindex.transform_flow()
+def caption_to_embedding(caption: cocoindex.DataSlice[str]) -> cocoindex.DataSlice[list[float]]:
+    """
+    Embed the caption using a CLIP model.
+    This is shared logic between indexing and querying.
+    """
     return caption.transform(
         cocoindex.functions.SentenceTransformerEmbed(
             model="clip-ViT-L-14",
@@ -70,7 +77,7 @@ def image_object_embedding_flow(flow_builder: cocoindex.FlowBuilder, data_scope:
         "img_embeddings",
         cocoindex.storages.Qdrant(
             collection_name="image_search",
-            grpc_url=os.getenv("QDRANT_GRPC_URL", "http://localhost:6334/"),
+            grpc_url=QDRANT_GRPC_URL,
         ),
         primary_key_fields=["id"],
         setup_by_user=True,
@@ -93,26 +100,31 @@ def image_object_embedding_flow(flow_builder: cocoindex.FlowBuilder, data_scope:
 def startup_event():
     load_dotenv()
     cocoindex.init()
-    app.state.query_handler = cocoindex.query.SimpleSemanticsQueryHandler(
-        name="ImageObjectSearch",
-        flow=image_object_embedding_flow,
-        target_name="img_embeddings",
-        query_transform_flow=caption_to_embedding,
-        default_similarity_metric=cocoindex.VectorSimilarityMetric.COSINE_SIMILARITY,
+    # Initialize Qdrant client
+    app.state.qdrant_client = QdrantClient(
+        url=QDRANT_GRPC_URL,
+        prefer_grpc=True
     )
     app.state.live_updater = cocoindex.FlowLiveUpdater(image_object_embedding_flow)
     app.state.live_updater.start()
 
 @app.get("/search")
 def search(q: str = Query(..., description="Search query"), limit: int = Query(5, description="Number of results")):
-    query_handler = app.state.query_handler
-    results, _ = query_handler.search(q, limit, "embedding")
+    # Get the embedding for the query
+    query_embedding = caption_to_embedding.eval(q)
+    
+    # Search in Qdrant
+    search_results = app.state.qdrant_client.search(
+        collection_name="image_search",
+        query_vector=("embedding", query_embedding),
+        limit=limit
+    )
+    
+    # Format results
     out = []
-    for result in results:
-        row = dict(result.data)
-        # Only include filename and score
+    for result in search_results:
         out.append({
-            "filename": row["filename"],
+            "filename": result.payload["filename"],
             "score": result.score
         })
     return {"results": out}
diff --git a/examples/image_search/pyproject.toml b/examples/image_search/pyproject.toml
@@ -0,0 +1,9 @@
+[project]
+name = "image-search"
+version = "0.1.0"
+description = "Simple example for cocoindex: build embedding index based on images."
+requires-python = ">=3.11"
+dependencies = ["cocoindex>=0.1.42", "python-dotenv>=1.0.1", "fastapi>=0.100.0"]
+
+[tool.setuptools]
+packages = []