Skip to content

Commit 8a69de5

Browse files
authored
openai[patch]: ignore file blocks when counting tokens (#30601)
OpenAI does not appear to document how it transforms PDF pages to images, which determines how tokens are counted: https://platform.openai.com/docs/guides/pdf-files?api-mode=chat#usage-considerations Currently these block types raise ValueError inside `get_num_tokens_from_messages`. Here we update to generate a warning and continue.
1 parent 5581911 commit 8a69de5

File tree

2 files changed

+25
-0
lines changed
  • libs/partners/openai

2 files changed

+25
-0
lines changed

libs/partners/openai/langchain_openai/chat_models/base.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1298,6 +1298,12 @@ def get_num_tokens_from_messages(
12981298
encoding.encode(val["function"]["arguments"])
12991299
)
13001300
num_tokens += len(encoding.encode(val["function"]["name"]))
1301+
elif val["type"] == "file":
1302+
warnings.warn(
1303+
"Token counts for file inputs are not supported. "
1304+
"Ignoring file inputs."
1305+
)
1306+
pass
13011307
else:
13021308
raise ValueError(
13031309
f"Unrecognized content block type\n\n{val}"

libs/partners/openai/tests/unit_tests/chat_models/test_base.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -752,6 +752,25 @@ def test_get_num_tokens_from_messages() -> None:
752752
actual = llm.get_num_tokens_from_messages(messages)
753753
assert expected == actual
754754

755+
# Test file inputs
756+
messages = [
757+
HumanMessage(
758+
[
759+
"Summarize this document.",
760+
{
761+
"type": "file",
762+
"file": {
763+
"filename": "my file",
764+
"file_data": "data:application/pdf;base64,<data>",
765+
},
766+
},
767+
]
768+
)
769+
]
770+
with pytest.warns(match="file inputs are not supported"):
771+
actual = llm.get_num_tokens_from_messages(messages)
772+
assert actual == 13
773+
755774

756775
class Foo(BaseModel):
757776
bar: int

0 commit comments

Comments
 (0)