Skip to content

Commit ab81d5f

Browse files
authored
upgrade query for image search (#534)
1 parent 22eeb59 commit ab81d5f

File tree

3 files changed

+58
-40
lines changed

3 files changed

+58
-40
lines changed

examples/image_search/README.md

Lines changed: 23 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -2,31 +2,29 @@
22

33
![image](https://github.com/user-attachments/assets/3a696344-c9b4-46e8-9413-6229dbb8672a)
44

5-
- QDrant for Vector Storage
5+
- Qdrant for Vector Storage
66
- Ollama Gemma3 (Image to Text)
77
- CLIP ViT-L/14 - Embeddings Model
88
- Live Update
99

1010
## Make sure Postgres and Qdrant are running
1111
```
12-
docker run -d --name qdrant -p 6334:6334 qdrant/qdrant:latest
12+
docker run -d -p 6334:6334 -p 6333:6333 qdrant/qdrant
1313
export COCOINDEX_DATABASE_URL="postgres://cocoindex:cocoindex@localhost/cocoindex"
1414
```
1515

16-
## Create QDrant Collection
16+
## Create Qdrant Collection
1717
```
18-
curl -X PUT
19-
'http://localhost:6333/collections/image_search' \
20-
--header 'Content-Type: application/json' \
21-
--data-raw '{
18+
curl -X PUT 'http://localhost:6333/collections/image_search' \
19+
-H 'Content-Type: application/json' \
20+
-d '{
2221
"vectors": {
2322
"embedding": {
2423
"size": 768,
2524
"distance": "Cosine"
2625
}
2726
}
2827
}'
29-
3028
```
3129

3230
## Run Ollama
@@ -35,29 +33,28 @@ ollama pull gemma3
3533
ollama serve
3634
```
3735

38-
## Create virtual environment and install dependencies
39-
```
40-
python -m venv .venv
41-
source .venv/bin/activate
42-
pip install -r requirements.txt
43-
```
44-
4536
### Place your images in the `img` directory.
4637
- No need to update manually. CocoIndex will automatically update the index as new images are added to the directory.
4738

4839

4940
## Run Backend
50-
```
51-
cocoindex setup main.py
52-
uvicorn main:app --reload --host 0.0.0.0 --port 8000
53-
```
54-
55-
## Run Frontend
56-
```
57-
cd frontend
58-
npm install
59-
npm run dev
60-
```
41+
- Install dependencies:
42+
```
43+
pip install -e .
44+
```
45+
46+
- Run Backend
47+
```
48+
cocoindex setup main.py
49+
uvicorn main:app --reload --host 0.0.0.0 --port 8000
50+
```
51+
52+
- Run Frontend
53+
```
54+
cd frontend
55+
npm install
56+
npm run dev
57+
```
6158

6259
Go to `http://localhost:5174` to search.
6360

examples/image_search/main.py

Lines changed: 26 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,11 @@
77
from fastapi import FastAPI, Query
88
from fastapi.middleware.cors import CORSMiddleware
99
from fastapi.staticfiles import StaticFiles
10+
from qdrant_client import QdrantClient
1011

1112
OLLAMA_URL = "http://localhost:11434/api/generate"
1213
OLLAMA_MODEL = "gemma3"
14+
QDRANT_GRPC_URL = os.getenv("QDRANT_GRPC_URL", "http://localhost:6334/")
1315

1416
# 1. Extract caption from image using Ollama vision model
1517
@cocoindex.op.function(cache=True, behavior_version=1)
@@ -42,7 +44,12 @@ def get_image_caption(img_bytes: bytes) -> str:
4244

4345

4446
# 2. Embed the caption string
45-
def caption_to_embedding(caption: cocoindex.DataSlice) -> cocoindex.DataSlice:
47+
@cocoindex.transform_flow()
48+
def caption_to_embedding(caption: cocoindex.DataSlice[str]) -> cocoindex.DataSlice[list[float]]:
49+
"""
50+
Embed the caption using a CLIP model.
51+
This is shared logic between indexing and querying.
52+
"""
4653
return caption.transform(
4754
cocoindex.functions.SentenceTransformerEmbed(
4855
model="clip-ViT-L-14",
@@ -70,7 +77,7 @@ def image_object_embedding_flow(flow_builder: cocoindex.FlowBuilder, data_scope:
7077
"img_embeddings",
7178
cocoindex.storages.Qdrant(
7279
collection_name="image_search",
73-
grpc_url=os.getenv("QDRANT_GRPC_URL", "http://localhost:6334/"),
80+
grpc_url=QDRANT_GRPC_URL,
7481
),
7582
primary_key_fields=["id"],
7683
setup_by_user=True,
@@ -93,26 +100,31 @@ def image_object_embedding_flow(flow_builder: cocoindex.FlowBuilder, data_scope:
93100
def startup_event():
94101
load_dotenv()
95102
cocoindex.init()
96-
app.state.query_handler = cocoindex.query.SimpleSemanticsQueryHandler(
97-
name="ImageObjectSearch",
98-
flow=image_object_embedding_flow,
99-
target_name="img_embeddings",
100-
query_transform_flow=caption_to_embedding,
101-
default_similarity_metric=cocoindex.VectorSimilarityMetric.COSINE_SIMILARITY,
103+
# Initialize Qdrant client
104+
app.state.qdrant_client = QdrantClient(
105+
url=QDRANT_GRPC_URL,
106+
prefer_grpc=True
102107
)
103108
app.state.live_updater = cocoindex.FlowLiveUpdater(image_object_embedding_flow)
104109
app.state.live_updater.start()
105110

106111
@app.get("/search")
107112
def search(q: str = Query(..., description="Search query"), limit: int = Query(5, description="Number of results")):
108-
query_handler = app.state.query_handler
109-
results, _ = query_handler.search(q, limit, "embedding")
113+
# Get the embedding for the query
114+
query_embedding = caption_to_embedding.eval(q)
115+
116+
# Search in Qdrant
117+
search_results = app.state.qdrant_client.search(
118+
collection_name="image_search",
119+
query_vector=("embedding", query_embedding),
120+
limit=limit
121+
)
122+
123+
# Format results
110124
out = []
111-
for result in results:
112-
row = dict(result.data)
113-
# Only include filename and score
125+
for result in search_results:
114126
out.append({
115-
"filename": row["filename"],
127+
"filename": result.payload["filename"],
116128
"score": result.score
117129
})
118130
return {"results": out}

examples/image_search/pyproject.toml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
[project]
2+
name = "image-search"
3+
version = "0.1.0"
4+
description = "Simple example for cocoindex: build embedding index based on images."
5+
requires-python = ">=3.11"
6+
dependencies = ["cocoindex>=0.1.42", "python-dotenv>=1.0.1", "fastapi>=0.100.0"]
7+
8+
[tool.setuptools]
9+
packages = []

0 commit comments

Comments
 (0)