From ab31212442439852bf272e7d42671a050654ae1d Mon Sep 17 00:00:00 2001
From: Dave Rigby <dave.r@pinecone.io>
Date: Mon, 29 Jan 2024 10:10:43 +0000
Subject: [PATCH] gRPC: parse_query_response: Skip parsing empty Usage

When parsing the result of a gRPC query() call, we unconditionally
create a Usage model object, even if no usage information was returned
(e.g. non-serverless index).

This adds a small but not insignificant cost to every query() call -
mostly due to the fact we use OpenAPI auto-generated model code for
the Usage and QueryResponse objects.

Benchmarks using a simple PineconeGRPC-based program making query()
calls against a p2.x4 pod show a 1.05x improvment in QPS by only
constructing a Usage class (and associating it to QueryResponse) if a
'usage' field is present in the protobuf response:

Before:

    Type     Name                                                                          # reqs      # fails |    Avg     Min     Max    Med |   req/s  failures/s
    --------|----------------------------------------------------------------------------|-------|-------------|-------|-------|-------|-------|--------|-----------
    grpc     query_pinecone_no_filter                                                        3223     0(0.00%) |     17      17     139     18 |   55.01        0.00
    --------|----------------------------------------------------------------------------|-------|-------------|-------|-------|-------|-------|--------|-----------

After:

    Type     Name                                                                          # reqs      # fails |    Avg     Min     Max    Med |   req/s  failures/s
    --------|----------------------------------------------------------------------------|-------|-------------|-------|-------|-------|-------|--------|-----------
    grpc     query_pinecone_no_filter                                                        3408     0(0.00%) |     17      16      96     17 |   57.55        0.00
    --------|----------------------------------------------------------------------------|-------|-------------|-------|-------|-------|-------|--------|-----------
---
 pinecone/grpc/utils.py | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/pinecone/grpc/utils.py b/pinecone/grpc/utils.py
index c1f69bbb..c03cfc0f 100644
--- a/pinecone/grpc/utils.py
+++ b/pinecone/grpc/utils.py
@@ -50,13 +50,13 @@ def parse_fetch_response(response: dict):
     return FetchResponse(
         vectors=vd, 
         namespace=namespace,
-        usage=parse_usage(response),
+        usage=parse_usage(response.get("usage", {})),
         _check_type=False
     )
 
-def parse_usage(response):
-    u = response.get("usage", {})
-    return Usage(read_units=int(u.get("readUnits", 0)))
+
+def parse_usage(usage: dict):
+    return Usage(read_units=int(usage.get("readUnits", 0)))
 
 
 def parse_query_response(response: dict, _check_type: bool = False):
@@ -72,13 +72,16 @@ def parse_query_response(response: dict, _check_type: bool = False):
         )
         matches.append(sc)
 
-    return QueryResponse(
-        namespace=response.get("namespace", ""), 
-        matches=matches,
-        usage = parse_usage(response),
-        _check_type=_check_type
-    )
-
+    # Due to OpenAPI model classes / actual parsing cost, we want to avoid
+    # creating empty `Usage` objects and then passing them into QueryResponse
+    # when they are not actually present in the response from the server.
+    args = {'namespace': response.get("namespace", ""),
+            'matches': matches,
+            '_check_type': _check_type}
+    usage = response.get("usage")
+    if usage:
+        args['usage'] = parse_usage(usage)
+    return QueryResponse(**args)
 
 def parse_stats_response(response: dict):
     fullness = response.get("indexFullness", 0.0)