Skip to content

Commit d1fda39

Browse files
authored
[Chore] Improve test flakes (#404)
## Problem I frequently see recurring test flakes, most often at the cleanup stage of `test_configure_index_with_deletion_protection` because the index cannot be deleted while still in an "upgrading" state. The index is in that state while configuration changes are being applied. ## Solution - In deletion protection test, wait for the index to be ready before attempting the delete. - In dependency tests which do a basic sanity test, loosen the assertions. We're not really intending to test the backend functionality with these tests; we just want to make sure network calls are successfully made with a variety of different grpc / protobuff / etc dependency versions. - Remove some assertions that rely on non-deterministic behavior on the backend. E.g. how long it takes for a serverless index to be considered ready. - Check for deletion protection status before trying to delete. - Don't fail the job when cleanup fails; even if we re-run the entire job, there will still be an orphaned index from a previous run that needs to be cleaned up. Best to just attempt to delete, then let the nightly cleanup job garbage collect anything left over. ## Type of Change - [x] Bug fix (non-breaking change which fixes an issue) - [ ] New feature (non-breaking change which adds functionality) - [ ] Breaking change (fix or feature that would cause existing functionality to not work as expected) - [ ] This change requires a documentation update - [x] Infrastructure change (CI configs, etc) - [ ] Non-code change (docs, etc) - [ ] None of the above: (explain here)
1 parent 3780924 commit d1fda39

File tree

9 files changed

+120
-32
lines changed

9 files changed

+120
-32
lines changed

.github/actions/cleanup-all/action.yml

+6-1
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,16 @@ inputs:
55
PINECONE_API_KEY:
66
description: 'The Pinecone API key'
77
required: true
8+
DELETE_ALL:
9+
description: 'Delete all indexes and collections'
10+
required: false
11+
default: 'false'
812

913
runs:
1014
using: 'composite'
1115
steps:
1216
- name: Set up Python
13-
uses: actions/setup-python@v4
17+
uses: actions/setup-python@v5
1418
with:
1519
python-version: 3.9
1620
- name: Setup Poetry
@@ -20,3 +24,4 @@ runs:
2024
run: poetry run python3 scripts/cleanup-all.py
2125
env:
2226
PINECONE_API_KEY: ${{ inputs.PINECONE_API_KEY }}
27+
DELETE_ALL: ${{ inputs.DELETE_ALL }}

scripts/cleanup-all.py

+25
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import re
33
from pinecone import Pinecone
44
from datetime import datetime, timedelta
5+
import time
56

67

78
def delete_everything(pc):
@@ -16,6 +17,30 @@ def delete_everything(pc):
1617
for index in pc.list_indexes().names():
1718
try:
1819
print("Deleting index: " + index)
20+
desc = pc.describe_index(index)
21+
22+
# Check whether index can be deleted
23+
if desc.deletion_protection == "enabled":
24+
pc.configure_index(index, deletion_protection="disabled")
25+
26+
# Wait for index to be ready before deleting
27+
ready_to_delete = False
28+
max_wait = 60
29+
time_waited = 0
30+
while not ready_to_delete:
31+
desc = pc.describe_index(index)
32+
if desc.status.state == "Ready":
33+
ready_to_delete = True
34+
break
35+
else:
36+
print("Index is not ready yet. Waiting for 2 seconds.")
37+
time.sleep(2)
38+
time_waited += 2
39+
40+
if time_waited > max_wait:
41+
print(f"Timed out waiting for index {index} to be ready")
42+
break
43+
1944
pc.delete_index(index)
2045
except Exception as e:
2146
print("Failed to delete index: " + index + " " + str(e))

tests/dependency/grpc/test_sanity.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ def test_sanity(self, index_name, client):
3636
# Check the vector count reflects some data has been upserted
3737
description = idx.describe_index_stats()
3838
assert description.dimension == 2
39-
assert description.total_vector_count == 3
39+
assert description.total_vector_count >= 3
4040

4141
# Query for results
4242
query_results = idx.query(id="1", top_k=10, include_values=True)

tests/dependency/rest/test_sanity.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ def test_sanity(self, index_name, client):
3636
# Check the vector count reflects some data has been upserted
3737
description = idx.describe_index_stats()
3838
assert description.dimension == 2
39-
assert description.total_vector_count == 3
39+
assert description.total_vector_count >= 3
4040

4141
# Query for results
4242
query_results = idx.query(id="1", top_k=10, include_values=True)

tests/integration/control/pod/conftest.py

+40-12
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ def ready_index(client, index_name, create_index_params):
5454
client.create_index(**create_index_params)
5555
time.sleep(10) # Extra wait, since status is sometimes inaccurate
5656
yield index_name
57-
client.delete_index(index_name, -1)
57+
attempt_delete_index(client, index_name)
5858

5959

6060
@pytest.fixture()
@@ -64,10 +64,6 @@ def notready_index(client, index_name, create_index_params):
6464
yield index_name
6565

6666

67-
def index_exists(index_name, client):
68-
return index_name in client.list_indexes().names()
69-
70-
7167
@pytest.fixture(scope="session")
7268
def reusable_collection():
7369
pc = Pinecone(
@@ -113,20 +109,43 @@ def reusable_collection():
113109
raise Exception(f"Collection {collection_name} is not ready after 120 seconds")
114110

115111
print(f"Collection {collection_name} is ready. Deleting index {index_name}...")
116-
pc.delete_index(index_name)
112+
attempt_delete_index(pc, index_name)
117113

118114
yield collection_name
119115

120116
print(f"Deleting collection {collection_name}...")
121-
pc.delete_collection(collection_name)
117+
attempt_delete_collection(pc, collection_name)
122118

123119

124-
@pytest.fixture(autouse=True)
125-
def cleanup(client, index_name):
126-
yield
120+
def attempt_delete_collection(client, collection_name):
121+
time_waited = 0
122+
while collection_name in client.list_collections().names() and time_waited < 120:
123+
print(
124+
f"Waiting for collection {collection_name} to be ready to delete. Waited {time_waited} seconds.."
125+
)
126+
time_waited += 5
127+
time.sleep(5)
128+
try:
129+
print(f"Attempting delete of collection {collection_name}")
130+
client.delete_collection(collection_name)
131+
print(f"Deleted collection {collection_name}")
132+
break
133+
except Exception as e:
134+
print(f"Unable to delete collection {collection_name}: {e}")
135+
pass
136+
137+
if time_waited >= 120:
138+
# Things that fail to delete due to transient statuses will be garbage
139+
# collected by the nightly cleanup script
140+
print(f"Collection {collection_name} could not be deleted after 120 seconds")
141+
127142

143+
def attempt_delete_index(client, index_name):
128144
time_waited = 0
129-
while index_exists(index_name, client) and time_waited < 120:
145+
while client.has_index(index_name) and time_waited < 120:
146+
if client.describe_index(index_name).delete_protection == "enabled":
147+
client.configure_index(index_name, deletion_protection="disabled")
148+
130149
print(
131150
f"Waiting for index {index_name} to be ready to delete. Waited {time_waited} seconds.."
132151
)
@@ -142,4 +161,13 @@ def cleanup(client, index_name):
142161
pass
143162

144163
if time_waited >= 120:
145-
raise Exception(f"Index {index_name} could not be deleted after 120 seconds")
164+
# Things that fail to delete due to transient statuses will be garbage
165+
# collected by the nightly cleanup script
166+
print(f"Index {index_name} could not be deleted after 120 seconds")
167+
168+
169+
@pytest.fixture(autouse=True)
170+
def cleanup(client, index_name):
171+
yield
172+
173+
attempt_delete_index(client, index_name)

tests/integration/control/pod/test_collections.py

+25-4
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,28 @@
55
from ...helpers import generate_index_name, generate_collection_name
66

77

8+
def attempt_cleanup_collection(client, collection_name):
9+
try:
10+
time.sleep(10)
11+
client.delete_collection(collection_name)
12+
except Exception as e:
13+
# Failures here usually happen because the backend thinks there is still some
14+
# operation pending on the resource.
15+
# These orphaned resources will get cleaned up by the cleanup job later.
16+
print(f"Failed to cleanup collection: {e}")
17+
18+
19+
def attempt_cleanup_index(client, index_name):
20+
try:
21+
time.sleep(10)
22+
client.delete_index(index_name, -1)
23+
except Exception as e:
24+
# Failures here usually happen because the backend thinks there is still some
25+
# operation pending on the resource.
26+
# These orphaned resources will get cleaned up by the cleanup job later.
27+
print(f"Failed to cleanup collection: {e}")
28+
29+
830
class TestCollectionsHappyPath:
931
def test_index_to_collection_to_index_happy_path(
1032
self, client, environment, dimension, metric, ready_index, random_vector
@@ -78,8 +100,8 @@ def test_index_to_collection_to_index_happy_path(
78100
assert results.vectors[v[0]].values == pytest.approx(v[1], rel=0.01)
79101

80102
# Cleanup
81-
client.delete_collection(collection_name)
82-
client.delete_index(index_name)
103+
attempt_cleanup_collection(client, collection_name)
104+
attempt_cleanup_index(client, index_name)
83105

84106
def test_create_index_with_different_metric_from_orig_index(
85107
self, client, dimension, metric, environment, reusable_collection
@@ -94,5 +116,4 @@ def test_create_index_with_different_metric_from_orig_index(
94116
metric=target_metric,
95117
spec=PodSpec(environment=environment, source_collection=reusable_collection),
96118
)
97-
time.sleep(10)
98-
client.delete_index(index_name, -1)
119+
attempt_cleanup_index(client, index_name)

tests/integration/control/pod/test_deletion_protection.py

+22-2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import pytest
2+
import time
23
from pinecone import PodSpec
34

45

@@ -47,5 +48,24 @@ def test_configure_index_with_deletion_protection(self, client, index_name, envi
4748
assert desc.spec.pod.replicas == 3
4849
assert desc.deletion_protection == "disabled"
4950

50-
# Cleanup
51-
client.delete_index(index_name)
51+
# Wait up to 30*2 seconds for the index to be ready before attempting to delete
52+
for t in range(1, 30):
53+
delta = 2
54+
desc = client.describe_index(index_name)
55+
if desc.status.state == "Ready":
56+
print(f"Index {index_name} is ready after {(t-1)*delta} seconds")
57+
break
58+
print("Index is not ready yet. Waiting for 2 seconds.")
59+
time.sleep(delta)
60+
61+
attempts = 0
62+
while attempts < 12:
63+
try:
64+
client.delete_index(index_name)
65+
break
66+
except Exception as e:
67+
attempts += 1
68+
print(f"Failed to delete index {index_name} on attempt {attempts}.")
69+
print(f"Error: {e}")
70+
client.describe_index(index_name)
71+
time.sleep(10)

tests/integration/control/serverless/test_create_index_timeouts.py

-8
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,3 @@
1-
import pytest
2-
3-
41
class TestCreateIndexWithTimeout:
52
def test_create_index_default_timeout(self, client, create_sl_index_params):
63
create_sl_index_params["timeout"] = None
@@ -17,11 +14,6 @@ def test_create_index_when_timeout_set(self, client, create_sl_index_params):
1714
desc = client.describe_index(create_sl_index_params["name"])
1815
assert desc.status.ready == True
1916

20-
def test_create_index_when_timeout_error(self, client, create_sl_index_params):
21-
create_sl_index_params["timeout"] = 1
22-
with pytest.raises(TimeoutError):
23-
client.create_index(**create_sl_index_params)
24-
2517
def test_create_index_with_negative_timeout(self, client, create_sl_index_params):
2618
create_sl_index_params["timeout"] = -1
2719
client.create_index(**create_sl_index_params)

tests/integration/control/serverless/test_describe_index.py

-3
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,3 @@ def test_describe_index_when_not_ready(self, client, notready_sl_index, create_s
4444
assert isinstance(description.host, str)
4545
assert description.host != ""
4646
assert notready_sl_index in description.host
47-
48-
assert description.status.ready == False
49-
assert description.status.state in ["Ready", "Initializing"]

0 commit comments

Comments
 (0)