Skip to content

Commit 3c10c03

Browse files
authored
Litellm 1 66 0 (#94)
* bump litellm * client v2.5.4 * vector_stores no longer in beta * comment out response_format=json_object from tests * comment out response_format=json_object from tests * embedding to_dict * client v2.5.5 * Update inference_utils.py
1 parent 3f43c19 commit 3c10c03

12 files changed

+66
-40
lines changed

client/astra_assistants/patch.py

+12-3
Original file line numberDiff line numberDiff line change
@@ -403,7 +403,10 @@ def patched_create(self, *args, **kwargs):
403403
):
404404
# TODO figure out how to get the model from the tool resources
405405
vector_store_id = assistant.tool_resources.file_search.vector_store_ids[0]
406-
file_list_paginator = client.beta.vector_stores.files.list(vector_store_id=vector_store_id)
406+
try:
407+
file_list_paginator = client.beta.vector_stores.files.list(vector_store_id=vector_store_id)
408+
except Exception as e:
409+
file_list_paginator = client.vector_stores.files.list(vector_store_id=vector_store_id)
407410
vs_file = async_helper.run_async(fetch_first_page(file_list_paginator))
408411
if vs_file is not None:
409412
# use the first file
@@ -453,7 +456,10 @@ def patched_create(self, *args, **kwargs):
453456
):
454457
# TODO figure out how to get the model from the tool resources
455458
vector_store_id = assistant.tool_resources.file_search.vector_store_ids[0]
456-
vs_files = client.beta.vector_stores.files.list(vector_store_id=vector_store_id).data
459+
try:
460+
vs_files = client.beta.vector_stores.files.list(vector_store_id=vector_store_id).data
461+
except Exception as e:
462+
vs_files = client.vector_stores.files.list(vector_store_id=vector_store_id).data
457463
if len(vs_files) > 0:
458464
# use the first file
459465
vs_file: VectorStoreFile= vs_files[0]
@@ -503,7 +509,10 @@ async def patched_create(self, *args, **kwargs):
503509
):
504510
# TODO figure out how to get the model from the tool resources
505511
vector_store_id = assistant.tool_resources.file_search.vector_store_ids[0]
506-
vs_files = await client.beta.vector_stores.files.list(vector_store_id=vector_store_id).data
512+
try:
513+
vs_files = await client.beta.vector_stores.files.list(vector_store_id=vector_store_id).data
514+
except Exception as e:
515+
vs_files = await client.vector_stores.files.list(vector_store_id=vector_store_id).data
507516
if len(vs_files) > 0:
508517
# use the first file
509518
vs_file: VectorStoreFile= vs_files[0]

client/pyproject.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "astra-assistants"
3-
version = "2.5.4"
3+
version = "2.5.5"
44
description = "Astra Assistants API - drop in replacement for OpenAI Assistants, powered by AstraDB"
55
authors = ["phact <estevezsebastian@gmail.com>"]
66
readme = "README.md"

client/tests/astra-assistants/test_chat_completion.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ def print_chat_completion(model, client):
4242
{"role": "system", "content": "You are an amazing json generator."},
4343
{"role": "user", "content": prompt}
4444
],
45-
response_format={"type": "json_object"},
45+
#response_format={"type": "json_object"},
4646
)
4747

4848
logger.info(f'prompt> {prompt}')

client/tests/astra-assistants/test_run_retreival_v2.py

+7-7
Original file line numberDiff line numberDiff line change
@@ -32,17 +32,17 @@ def run_with_assistant(assistant, client, file_path, embedding_model):
3232
pass
3333

3434

35-
vector_store = client.beta.vector_stores.create(
35+
vector_store = client.vector_stores.create(
3636
name="papers",
3737
file_ids=[file.id]
3838
)
3939

40-
vs_list = client.beta.vector_stores.list()
40+
vs_list = client.vector_stores.list()
4141

4242
assert len(vs_list.data) > 0, "vector store list is empty"
4343

4444
# TODO support vector store file creation
45-
#file = client.beta.vector_stores.files.create_and_poll(
45+
#file = client.vector_stores.files.create_and_poll(
4646
# vector_store_id=vector_store.id,
4747
# file_id=file2.id
4848
#)
@@ -54,7 +54,7 @@ def run_with_assistant(assistant, client, file_path, embedding_model):
5454

5555
# Use the upload and poll SDK helper to upload the files, add them to the vector store,
5656
# and poll the status of the file batch for completion.
57-
#file_batch = client.beta.vector_stores.file_batches.upload_and_poll(
57+
#file_batch = client.vector_stores.file_batches.upload_and_poll(
5858
# vector_store_id=vector_store.id, files=file_streams
5959
#)
6060

@@ -116,10 +116,10 @@ def run_with_assistant(assistant, client, file_path, embedding_model):
116116
response = client.beta.threads.messages.list(thread_id=thread.id)
117117
logger.info(response.data[0].content[0].text.value)
118118

119-
vs_files = client.beta.vector_stores.files.list(vector_store_id=vector_store.id)
119+
vs_files = client.vector_stores.files.list(vector_store_id=vector_store.id)
120120
for vsf in vs_files.data:
121-
client.beta.vector_stores.files.delete(file_id=vsf.id, vector_store_id=vector_store.id)
122-
client.beta.vector_stores.delete(vector_store.id)
121+
client.vector_stores.files.delete(file_id=vsf.id, vector_store_id=vector_store.id)
122+
client.vector_stores.delete(vector_store.id)
123123

124124

125125

client/tests/astra-assistants/test_streaming_run_retrieval_async_v2.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,13 @@ async def run_with_assistant(assistant, client):
2323
purpose="assistants",
2424
)
2525

26-
vector_store = await client.beta.vector_stores.create(
26+
vector_store = await client.vector_stores.create(
2727
name="papers",
2828
file_ids=[file.id]
2929
)
3030

3131
# TODO support vector store file creation
32-
#file = await client.beta.vector_stores.files.create_and_poll(
32+
#file = await client.vector_stores.files.create_and_poll(
3333
# vector_store_id=vector_store.id,
3434
# file_id=file2.id
3535
#)
@@ -41,7 +41,7 @@ async def run_with_assistant(assistant, client):
4141

4242
# Use the upload and poll SDK helper to upload the files, add them to the vector store,
4343
# and poll the status of the file batch for completion.
44-
#file_batch = await client.beta.vector_stores.file_batches.upload_and_poll(
44+
#file_batch = await client.vector_stores.file_batches.upload_and_poll(
4545
# vector_store_id=vector_store.id, files=file_streams
4646
#)
4747

client/tests/astra-assistants/test_streaming_run_retrieval_v2.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -19,13 +19,13 @@ def run_with_assistant(assistant, client):
1919
purpose="assistants",
2020
)
2121

22-
vector_store = client.beta.vector_stores.create(
22+
vector_store = client.vector_stores.create(
2323
name="papers",
2424
file_ids=[file.id]
2525
)
2626

2727
# TODO support vector store file creation
28-
#file = client.beta.vector_stores.files.create_and_poll(
28+
#file = client.vector_stores.files.create_and_poll(
2929
# vector_store_id=vector_store.id,
3030
# file_id=file2.id
3131
#)
@@ -37,7 +37,7 @@ def run_with_assistant(assistant, client):
3737

3838
# Use the upload and poll SDK helper to upload the files, add them to the vector store,
3939
# and poll the status of the file batch for completion.
40-
#file_batch = client.beta.vector_stores.file_batches.upload_and_poll(
40+
#file_batch = client.vector_stores.file_batches.upload_and_poll(
4141
# vector_store_id=vector_store.id, files=file_streams
4242
#)
4343

client/tests/openai-sdk/test_assistants_v2.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ def test_assistants_crud(openai_client):
4747
assert asst.response_format == response_format
4848
assert len(asst.tool_resources.file_search.vector_store_ids[0]) > 0
4949

50-
vs = openai_client.beta.vector_stores.retrieve(asst.tool_resources.file_search.vector_store_ids[0])
50+
vs = openai_client.vector_stores.retrieve(asst.tool_resources.file_search.vector_store_ids[0])
5151
assert vs.id == asst.tool_resources.file_search.vector_store_ids[0]
5252

5353
assert asst.name == "Math Tutor"

impl/routes/stateless.py

+2
Original file line numberDiff line numberDiff line change
@@ -323,6 +323,8 @@ async def create_embedding(
323323

324324
data = []
325325
for datum in embedding_response.data:
326+
if hasattr(datum, "to_dict"):
327+
datum = datum.to_dict()
326328
embedding = Embedding(**datum)
327329
data.append(embedding)
328330

impl/services/inference_utils.py

+21-7
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,9 @@ def get_embeddings(
8383
return [result["embedding"] for result in response.data]
8484

8585

86+
import asyncio
87+
from litellm.exceptions import RateLimitError
88+
8689
async def get_async_chat_completion_response(
8790
messages: List[Dict[str, Any]],
8891
model: Optional[str] = None,
@@ -116,13 +119,24 @@ async def get_async_chat_completion_response(
116119
else:
117120
litellm_kwargs[key] = type_hints[key](value)
118121

119-
completion = await acompletion(
120-
model=model,
121-
messages=messages,
122-
deployment_id=deployment_id,
123-
**litellm_kwargs
124-
)
125-
return completion
122+
max_retries = 5
123+
for attempt in range(max_retries):
124+
try:
125+
# Your existing logic to get the response
126+
completion = await acompletion(
127+
model=model,
128+
messages=messages,
129+
deployment_id=deployment_id,
130+
**litellm_kwargs
131+
)
132+
return completion
133+
except RateLimitError as e:
134+
if attempt < max_retries - 1:
135+
backoff_time = 2 ** attempt # Exponential backoff
136+
await asyncio.sleep(backoff_time)
137+
else:
138+
raise HTTPException(status_code=429, detail=f"Rate limit exceeded: {e}")
139+
126140
except Exception as e:
127141
if "LLM Provider NOT provided" in e.args[0]:
128142
logger.error(f"Error: error {model} is not currently supported")

poetry.lock

+13-12
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ tiktoken = "^0.7.0"
5050
docx2txt = "^0.8"
5151
pypdf2 = "^3.0.1"
5252
python-pptx = "^0.6.23"
53-
litellm = "1.42.5"
53+
litellm = "1.66.0"
5454
boto3 = "^1.29.6"
5555
prometheus-fastapi-instrumentator = "^6.1.0"
5656
google-cloud-aiplatform = "^1.38.0"

tests/http/test_chat_api.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ def test_create_chat_completion(client: TestClient):
4141
"n":2,
4242
"top_p":1,
4343
"frequency_penalty":-1.6796687238155954,
44-
"response_format":{"type":"json_object"},
44+
#"response_format":{"type":"json_object"},
4545
"stream":False,
4646
"temperature":1,
4747
"messages":[

0 commit comments

Comments
 (0)