Skip to content

Commit 995c0a1

Browse files
authored
Integration tests for collections (#299)
## Problem We received a bug report that creation of indexes using `PodSpec` fails if `source_collection` is specified. ## Solution - The fix for the bug was a one-line change. - Added several integration tests to exercise `index --> collection --> index` path and error cases. - Restructured integration tests so that tests using pod-based indexes reside in `tests/integration/control/pod` and can be run separately from severless indexes tested in `tests/integration/control/serverless`. This allows for greater parallelism in CI. - Adjusted CI configs to run these tests in parallel to integration tests using serverless indexes. The collections tests are quite slow due to the waiting required for pod indexes and collections to become ready for use. ## Type of Change - [x] Bug fix (non-breaking change which fixes an issue) - [x] Infrastructure change (CI configs, etc)
1 parent 5eb002e commit 995c0a1

21 files changed

+474
-77
lines changed

.github/workflows/testing-integration.yaml

+52-8
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,54 @@ jobs:
4545
# spec: '${{ matrix.spec }}'
4646
# PINECONE_API_KEY: '${{ secrets.PINECONE_API_KEY }}'
4747

48-
control-rest:
49-
name: control plane
48+
control-rest-pod:
49+
name: control plane pod/collection tests
50+
runs-on: ubuntu-latest
51+
strategy:
52+
matrix:
53+
pineconeEnv:
54+
- prod
55+
testConfig:
56+
- python-version: 3.8
57+
pod: { environment: 'us-east1-gcp'}
58+
- python-version: 3.11
59+
pod: { environment: 'us-east4-gcp'}
60+
fail-fast: false
61+
steps:
62+
- uses: actions/checkout@v4
63+
- name: 'Set up Python ${{ matrix.testConfig.python-version }}'
64+
uses: actions/setup-python@v4
65+
with:
66+
python-version: '${{ matrix.testConfig.python-version }}'
67+
- name: Setup Poetry
68+
uses: ./.github/actions/setup-poetry
69+
- name: 'Run integration tests (REST, prod)'
70+
if: matrix.pineconeEnv == 'prod'
71+
run: poetry run pytest tests/integration/control/serverless -s -v
72+
env:
73+
PINECONE_DEBUG_CURL: 'true'
74+
PINECONE_API_KEY: '${{ secrets.PINECONE_API_KEY }}'
75+
PINECONE_ENVIRONMENT: '${{ matrix.testConfig.pod.environment }}'
76+
GITHUB_BUILD_NUMBER: '${{ github.run_number }}-s-${{ matrix.testConfig.python-version}}'
77+
DIMENSION: 1536
78+
METRIC: 'cosine'
79+
- name: 'Run integration tests (REST, staging)'
80+
if: matrix.pineconeEnv == 'staging'
81+
run: poetry run pytest tests/integration/control/serverless -s -v
82+
env:
83+
PINECONE_DEBUG_CURL: 'true'
84+
PINECONE_CONTROLLER_HOST: 'https://api-staging.pinecone.io'
85+
PINECONE_API_KEY: '${{ secrets.PINECONE_API_KEY_STAGING }}'
86+
PINECONE_ENVIRONMENT: '${{ matrix.testConfig.pod.environment }}'
87+
GITHUB_BUILD_NUMBER: '${{ github.run_number }}-p-${{ matrix.testConfig.python-version}}'
88+
DIMENSION: 1536
89+
METRIC: 'cosine'
90+
91+
92+
93+
94+
control-rest-serverless:
95+
name: control plane serverless
5096
runs-on: ubuntu-latest
5197
strategy:
5298
matrix:
@@ -59,7 +105,6 @@ jobs:
59105
- python-version: 3.11
60106
pod: { environment: 'us-east1-gcp'}
61107
serverless: { cloud: 'aws', region: 'us-west-2'}
62-
max-parallel: 1
63108
fail-fast: false
64109
steps:
65110
- uses: actions/checkout@v4
@@ -71,21 +116,20 @@ jobs:
71116
uses: ./.github/actions/setup-poetry
72117
- name: 'Run integration tests (REST, prod)'
73118
if: matrix.pineconeEnv == 'prod'
74-
run: poetry run pytest tests/integration/control -s -v
119+
run: poetry run pytest tests/integration/control/serverless -s -vv
75120
env:
76-
PINECONE_CONTROLLER_HOST: 'https://api.pinecone.io'
121+
PINECONE_DEBUG_CURL: 'true'
77122
PINECONE_API_KEY: '${{ secrets.PINECONE_API_KEY }}'
78123
GITHUB_BUILD_NUMBER: '${{ github.run_number }}-p-${{ matrix.testConfig.python-version}}'
79-
POD_ENVIRONMENT: '${{ matrix.testConfig.pod.environment }}'
80124
SERVERLESS_CLOUD: '${{ matrix.testConfig.serverless.cloud }}'
81125
SERVERLESS_REGION: '${{ matrix.testConfig.serverless.region }}'
82126
- name: 'Run integration tests (REST, staging)'
83127
if: matrix.pineconeEnv == 'staging'
84-
run: poetry run pytest tests/integration -s -v
128+
run: poetry run pytest tests/integration/control/serverless -s -vv
85129
env:
130+
PINECONE_DEBUG_CURL: 'true'
86131
PINECONE_CONTROLLER_HOST: 'https://api-staging.pinecone.io'
87132
PINECONE_API_KEY: '${{ secrets.PINECONE_API_KEY_STAGING }}'
88133
GITHUB_BUILD_NUMBER: '${{ github.run_number }}-s-${{ matrix.testConfig.python-version}}'
89-
POD_ENVIRONMENT: '${{ matrix.testConfig.pod.environment }}'
90134
SERVERLESS_CLOUD: '${{ matrix.testConfig.serverless.cloud }}'
91135
SERVERLESS_REGION: '${{ matrix.testConfig.serverless.region }}'

pinecone/models/pod_spec.py

+5
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,11 @@ class PodSpec(NamedTuple):
6060
{'indexed': ['field1', 'field2']}
6161
"""
6262

63+
source_collection: Optional[str] = None
64+
"""
65+
The name of the collection to use as the source for the pod index. This configuration is only used when creating a pod index from an existing collection.
66+
"""
67+
6368
def asdict(self):
6469
"""
6570
Returns the PodSpec as a dictionary.

scripts/delete-all-collections.py

+20
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
import os
2+
from pinecone import Pinecone
3+
4+
def read_env_var(name):
5+
value = os.environ.get(name)
6+
if value is None:
7+
raise Exception('Environment variable {} is not set'.format(name))
8+
return value
9+
10+
def main():
11+
pc = Pinecone(api_key=read_env_var('PINECONE_API_KEY'))
12+
13+
collections = pc.list_collections().names()
14+
for collection in collections:
15+
if collection != "":
16+
pc.delete_collection(collection)
17+
18+
if __name__ == '__main__':
19+
main()
20+

tests/integration/control/pod/__init__.py

Whitespace-only changes.
+149
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,149 @@
1+
import pytest
2+
import random
3+
import string
4+
import time
5+
from pinecone import Pinecone, PodSpec
6+
from ...helpers import generate_index_name, get_environment_var
7+
8+
@pytest.fixture()
9+
def client():
10+
api_key = get_environment_var('PINECONE_API_KEY')
11+
return Pinecone(
12+
api_key=api_key,
13+
additional_headers={'sdk-test-suite': 'pinecone-python-client'}
14+
)
15+
16+
@pytest.fixture()
17+
def environment():
18+
return get_environment_var('PINECONE_ENVIRONMENT')
19+
20+
@pytest.fixture()
21+
def dimension():
22+
return int(get_environment_var('DIMENSION'))
23+
24+
@pytest.fixture()
25+
def create_index_params(index_name, environment, dimension, metric):
26+
spec = {
27+
'pod': {
28+
'environment': environment,
29+
'pod_type': 'p1.x1'
30+
}
31+
}
32+
return dict(
33+
name=index_name,
34+
dimension=dimension,
35+
metric=metric,
36+
spec=spec,
37+
timeout=-1
38+
)
39+
40+
@pytest.fixture()
41+
def metric():
42+
return get_environment_var('METRIC')
43+
44+
@pytest.fixture()
45+
def random_vector(dimension):
46+
def _random_vector():
47+
return [random.uniform(0, 1) for _ in range(dimension)]
48+
return _random_vector
49+
50+
@pytest.fixture()
51+
def index_name(request):
52+
test_name = request.node.name
53+
return generate_index_name(test_name)
54+
55+
@pytest.fixture()
56+
def ready_index(client, index_name, create_index_params):
57+
create_index_params['timeout'] = None
58+
client.create_index(**create_index_params)
59+
time.sleep(10) # Extra wait, since status is sometimes inaccurate
60+
yield index_name
61+
client.delete_index(index_name, -1)
62+
63+
@pytest.fixture()
64+
def notready_index(client, index_name, create_index_params):
65+
create_index_params.update({'timeout': -1 })
66+
client.create_index(**create_index_params)
67+
yield index_name
68+
69+
def index_exists(index_name, client):
70+
return index_name in client.list_indexes().names()
71+
72+
73+
def random_string():
74+
return ''.join(random.choice(string.ascii_lowercase) for i in range(10))
75+
76+
@pytest.fixture(scope='session')
77+
def reusable_collection():
78+
pc = Pinecone(
79+
api_key=get_environment_var('PINECONE_API_KEY'),
80+
additional_headers={'sdk-test-suite': 'pinecone-python-client'}
81+
)
82+
index_name = 'temp-index-' + random_string()
83+
dimension = int(get_environment_var('DIMENSION'))
84+
print(f"Creating index {index_name} to prepare a collection...")
85+
pc.create_index(
86+
name=index_name,
87+
dimension=dimension,
88+
metric=get_environment_var('METRIC'),
89+
spec=PodSpec(
90+
environment=get_environment_var('PINECONE_ENVIRONMENT'),
91+
)
92+
)
93+
print(f"Created index {index_name}. Waiting 10 seconds to make sure it's ready...")
94+
time.sleep(10)
95+
96+
num_vectors = 10
97+
vectors = [
98+
(str(i), [random.uniform(0, 1) for _ in range(dimension)]) for i in range(num_vectors) ]
99+
100+
index = pc.Index(index_name)
101+
index.upsert(vectors=vectors)
102+
103+
collection_name = 'reused-coll-' + random_string()
104+
pc.create_collection(
105+
name=collection_name,
106+
source=index_name
107+
)
108+
109+
time_waited = 0
110+
desc = pc.describe_collection(collection_name)
111+
collection_ready = desc['status']
112+
while collection_ready.lower() != 'ready' and time_waited < 120:
113+
print(f"Waiting for collection {collection_name} to be ready. Waited {time_waited} seconds...")
114+
time.sleep(5)
115+
time_waited += 5
116+
desc = pc.describe_collection(collection_name)
117+
collection_ready = desc['status']
118+
119+
if time_waited >= 120:
120+
raise Exception(f"Collection {collection_name} is not ready after 120 seconds")
121+
122+
print(f"Collection {collection_name} is ready. Deleting index {index_name}...")
123+
pc.delete_index(index_name)
124+
125+
yield collection_name
126+
127+
print(f"Deleting collection {collection_name}...")
128+
pc.delete_collection(collection_name)
129+
130+
@pytest.fixture(autouse=True)
131+
def cleanup(client, index_name):
132+
yield
133+
134+
time_waited = 0
135+
while index_exists(index_name, client) and time_waited < 120:
136+
print(f"Waiting for index {index_name} to be ready to delete. Waited {time_waited} seconds..")
137+
time_waited += 5
138+
time.sleep(5)
139+
try:
140+
print(f"Attempting delete of index {index_name}")
141+
client.delete_index(index_name, -1)
142+
print(f"Deleted index {index_name}")
143+
break
144+
except Exception as e:
145+
print(f"Unable to delete index {index_name}: {e}")
146+
pass
147+
148+
if time_waited >= 120:
149+
raise Exception(f"Index {index_name} could not be deleted after 120 seconds")
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
import string
2+
import random
3+
import pytest
4+
import time
5+
from pinecone import PodSpec
6+
7+
def random_string():
8+
return ''.join(random.choice(string.ascii_lowercase) for i in range(10))
9+
10+
class TestCollectionsHappyPath:
11+
def test_index_to_collection_to_index_happy_path(self, client, environment, dimension, metric, ready_index, random_vector):
12+
index = client.Index(ready_index)
13+
num_vectors = 10
14+
vectors = [ (str(i), random_vector()) for i in range(num_vectors) ]
15+
index.upsert(vectors=vectors)
16+
17+
collection_name = 'coll1-' + random_string()
18+
client.create_collection(name=collection_name, source=ready_index)
19+
desc = client.describe_collection(collection_name)
20+
assert desc['name'] == collection_name
21+
assert desc['environment'] == environment
22+
assert desc['status'] == 'Initializing'
23+
24+
time_waited = 0
25+
collection_ready = desc['status']
26+
while collection_ready.lower() != 'ready' and time_waited < 120:
27+
print(f"Waiting for collection {collection_name} to be ready. Waited {time_waited} seconds...")
28+
time.sleep(5)
29+
time_waited += 5
30+
desc = client.describe_collection(collection_name)
31+
collection_ready = desc['status']
32+
33+
assert collection_name in client.list_collections().names()
34+
35+
if time_waited >= 120:
36+
raise Exception(f"Collection {collection_name} is not ready after 120 seconds")
37+
38+
# After collection ready, these should all be defined
39+
assert desc['name'] == collection_name
40+
assert desc['status'] == 'Ready'
41+
assert desc['environment'] == environment
42+
assert desc['dimension'] == dimension
43+
assert desc['vector_count'] == num_vectors
44+
assert desc['size'] != None
45+
assert desc['size'] > 0
46+
47+
# Create index from collection
48+
index_name = 'index-from-collection-' + collection_name
49+
print(f"Creating index {index_name} from collection {collection_name}...")
50+
client.create_index(
51+
name=index_name,
52+
dimension=dimension,
53+
metric=metric,
54+
spec=PodSpec(
55+
environment=environment,
56+
source_collection=collection_name
57+
)
58+
)
59+
print(f"Created index {index_name} from collection {collection_name}. Waiting a little more to make sure it's ready...")
60+
time.sleep(30)
61+
desc = client.describe_index(index_name)
62+
assert desc['name'] == index_name
63+
assert desc['status']['ready'] == True
64+
65+
new_index = client.Index(index_name)
66+
67+
# Verify stats reflect the vectors present in the collection
68+
stats = new_index.describe_index_stats()
69+
print(stats)
70+
assert stats.total_vector_count == num_vectors
71+
72+
# Verify the vectors from the collection can be fetched
73+
results = new_index.fetch(ids=[v[0] for v in vectors])
74+
print(results)
75+
for v in vectors:
76+
assert results.vectors[v[0]].id == v[0]
77+
assert results.vectors[v[0]].values == pytest.approx(v[1], rel=0.01)
78+
79+
# Cleanup
80+
client.delete_collection(collection_name)
81+
client.delete_index(index_name)
82+
83+
def test_create_index_with_different_metric_from_orig_index(self, client, dimension, metric, environment, reusable_collection):
84+
metrics = ['cosine', 'euclidean', 'dotproduct']
85+
target_metric = random.choice([x for x in metrics if x != metric])
86+
87+
index_name = 'from-coll-' + random_string()
88+
client.create_index(
89+
name=index_name,
90+
dimension=dimension,
91+
metric=target_metric,
92+
spec=PodSpec(
93+
environment=environment,
94+
source_collection=reusable_collection
95+
)
96+
)
97+
time.sleep(10)
98+
client.delete_index(index_name, -1)

0 commit comments

Comments
 (0)