From ab31212442439852bf272e7d42671a050654ae1d Mon Sep 17 00:00:00 2001 From: Dave Rigby Date: Mon, 29 Jan 2024 10:10:43 +0000 Subject: [PATCH] gRPC: parse_query_response: Skip parsing empty Usage When parsing the result of a gRPC query() call, we unconditionally create a Usage model object, even if no usage information was returned (e.g. non-serverless index). This adds a small but not insignificant cost to every query() call - mostly due to the fact we use OpenAPI auto-generated model code for the Usage and QueryResponse objects. Benchmarks using a simple PineconeGRPC-based program making query() calls against a p2.x4 pod show a 1.05x improvment in QPS by only constructing a Usage class (and associating it to QueryResponse) if a 'usage' field is present in the protobuf response: Before: Type Name # reqs # fails | Avg Min Max Med | req/s failures/s --------|----------------------------------------------------------------------------|-------|-------------|-------|-------|-------|-------|--------|----------- grpc query_pinecone_no_filter 3223 0(0.00%) | 17 17 139 18 | 55.01 0.00 --------|----------------------------------------------------------------------------|-------|-------------|-------|-------|-------|-------|--------|----------- After: Type Name # reqs # fails | Avg Min Max Med | req/s failures/s --------|----------------------------------------------------------------------------|-------|-------------|-------|-------|-------|-------|--------|----------- grpc query_pinecone_no_filter 3408 0(0.00%) | 17 16 96 17 | 57.55 0.00 --------|----------------------------------------------------------------------------|-------|-------------|-------|-------|-------|-------|--------|----------- --- pinecone/grpc/utils.py | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/pinecone/grpc/utils.py b/pinecone/grpc/utils.py index c1f69bbb..c03cfc0f 100644 --- a/pinecone/grpc/utils.py +++ b/pinecone/grpc/utils.py @@ -50,13 +50,13 @@ def parse_fetch_response(response: dict): return FetchResponse( vectors=vd, namespace=namespace, - usage=parse_usage(response), + usage=parse_usage(response.get("usage", {})), _check_type=False ) -def parse_usage(response): - u = response.get("usage", {}) - return Usage(read_units=int(u.get("readUnits", 0))) + +def parse_usage(usage: dict): + return Usage(read_units=int(usage.get("readUnits", 0))) def parse_query_response(response: dict, _check_type: bool = False): @@ -72,13 +72,16 @@ def parse_query_response(response: dict, _check_type: bool = False): ) matches.append(sc) - return QueryResponse( - namespace=response.get("namespace", ""), - matches=matches, - usage = parse_usage(response), - _check_type=_check_type - ) - + # Due to OpenAPI model classes / actual parsing cost, we want to avoid + # creating empty `Usage` objects and then passing them into QueryResponse + # when they are not actually present in the response from the server. + args = {'namespace': response.get("namespace", ""), + 'matches': matches, + '_check_type': _check_type} + usage = response.get("usage") + if usage: + args['usage'] = parse_usage(usage) + return QueryResponse(**args) def parse_stats_response(response: dict): fullness = response.get("indexFullness", 0.0)