From 4926ffe078b24865398de42b04a8e20fecccad70 Mon Sep 17 00:00:00 2001 From: Jen Hamon Date: Mon, 10 Mar 2025 11:46:31 -0400 Subject: [PATCH 1/3] Add pdoc readme, fix relative links --- .github/actions/build-docs/action.yml | 8 + .../actions/build-docs/fix-relative-links.py | 66 +++++ pdoc/README.md | 225 ++++++++++++++++++ pinecone/__init__.py | 2 +- pinecone/data/features/inference/inference.py | 20 ++ .../features/inference/inference_asyncio.py | 19 +- pinecone/grpc/index_grpc.py | 1 + poetry.lock | 35 ++- pyproject.toml | 1 + 9 files changed, 374 insertions(+), 3 deletions(-) create mode 100644 .github/actions/build-docs/fix-relative-links.py create mode 100644 pdoc/README.md diff --git a/.github/actions/build-docs/action.yml b/.github/actions/build-docs/action.yml index 91454c42..e6952f07 100644 --- a/.github/actions/build-docs/action.yml +++ b/.github/actions/build-docs/action.yml @@ -17,8 +17,16 @@ runs: uses: ./.github/actions/setup-poetry with: include_grpc: 'true' + include_dev: 'true' - name: Build html documentation shell: bash run: | poetry run pdoc pinecone '!pinecone.core' '!pinecone.utils' --favicon ./favicon-32x32.png --docformat google -o ./pdoc + + - name: Fix relative links + shell: bash + run: | + poetry run python3 fix-relative-links.py ./pdoc ./pdoc + env: + BASE_URL: "https://github.com/pinecone-io/pinecone-python-client/blob/main/" diff --git a/.github/actions/build-docs/fix-relative-links.py b/.github/actions/build-docs/fix-relative-links.py new file mode 100644 index 00000000..d54a3f19 --- /dev/null +++ b/.github/actions/build-docs/fix-relative-links.py @@ -0,0 +1,66 @@ +import os +import sys +from bs4 import BeautifulSoup +from urllib.parse import urljoin + +# Base URL to prepend to relative links +BASE_URL = os.environ.get( + "BASE_URL", "https://github.com/pinecone-io/pinecone-python-client/blob/main/" +) + + +def replace_relative_links(html): + soup = BeautifulSoup(html, "html.parser") + + # Find all anchor tags with an href attribute + for a in soup.find_all("a", href=True): + href = a["href"] + # Skip if the link is already absolute or an anchor link + if href.startswith(("http://", "https://", "#")): + continue + + # Skip if the link is not a markdown file + if not href.endswith(".md"): + continue + + # Replace the relative link with an absolute URL + new_href = urljoin(BASE_URL, href) + print(f"{href} => {new_href}") + a["href"] = new_href + return str(soup) + + +if __name__ == "__main__": + if len(sys.argv) < 2: + print("Usage: python script.py input.html [output.html]") + sys.exit(1) + + input_dir = sys.argv[1] + output_dir = sys.argv[2] if len(sys.argv) > 2 else None + + # Recursively process all html files in the input directory + for root, dirs, files in os.walk(input_dir): + for file in files: + if not file.endswith(".html"): + continue + + print(f"Processing {file}") + input_path = os.path.join(root, file) + + with open(input_path, "r", encoding="utf-8") as f: + html = f.read() + + updated_html = replace_relative_links(html) + + if output_dir: + # Get the relative path from input_dir to maintain folder structure + rel_path = os.path.relpath(input_path, input_dir) + output_path = os.path.join(output_dir, rel_path) + + # Create the necessary subdirectories + os.makedirs(os.path.dirname(output_path), exist_ok=True) + + with open(output_path, "w", encoding="utf-8") as f: + f.write(updated_html) + else: + print(updated_html) diff --git a/pdoc/README.md b/pdoc/README.md new file mode 100644 index 00000000..06dea43a --- /dev/null +++ b/pdoc/README.md @@ -0,0 +1,225 @@ +# Pinecone Python SDK +![License](https://img.shields.io/github/license/pinecone-io/pinecone-python-client?color=orange) + +The official Pinecone Python SDK. + +## Documentation + +- [**Conceptual docs and guides**](https://docs.pinecone.io) +- [**Github Source**](https://github.com/pinecone-io/pinecone-python-client) + +## Points of interest + +**DB control plane** + +- [`Pinecone`](./pinecone/control/pinecone.html) +- [`PineconeAsyncio`](./pinecone/control/pinecone_asyncio.html) +- [`PineconeGRPC`](./pinecone/grpc/pinecone.html) + +**DB data operations** +- [`Index`](./pinecone/data/index.html) +- [`IndexAsyncio`](./pinecone/data/index_asyncio.html) +- [`IndexGRPC`](./pinecone/grpc/index_grpc.html) + +**Inference API** +- [`Inference`](./pinecone/data/features/inference/inference.html) +- [`InferenceAsyncio`](./pinecone/data/features/inference/inference_asyncio.html) + +## Upgrading the SDK + +> [!NOTE] +> The official SDK package was renamed from `pinecone-client` to `pinecone` beginning in version `5.1.0`. +> Please remove `pinecone-client` from your project dependencies and add `pinecone` instead to get +> the latest updates. + +For notes on changes between major versions, see [Upgrading](./docs/upgrading.md) + +## Prerequisites + +- The Pinecone Python SDK is compatible with Python 3.9 and greater. It has been tested with CPython versions from 3.9 to 3.13. +- Before you can use the Pinecone SDK, you must sign up for an account and find your API key in the Pinecone console dashboard at [https://app.pinecone.io](https://app.pinecone.io). + +## Installation + +The Pinecone Python SDK is distributed on PyPI using the package name `pinecone`. By default the `pinecone` has a minimal set of dependencies, but you can install some extras to unlock additional functionality. + +Available extras: + +- `pinecone[asyncio]` will add a dependency on `aiohttp` and enable usage of `PineconeAsyncio`, the asyncio-enabled version of the client for use with highly asynchronous modern web frameworks such as FastAPI. +- `pinecone[grpc]` will add dependencies on `grpcio` and related libraries needed to make pinecone data calls such as `upsert` and `query` over [GRPC](https://grpc.io/) for a modest performance improvement. See the guide on [tuning performance](https://docs.pinecone.io/docs/performance-tuning). + +#### Installing with pip + +```shell +# Install the latest version +pip3 install pinecone + +# Install the latest version, with optional dependencies +pip3 install "pinecone[asyncio,grpc]" +``` + +#### Installing with uv + +[uv](https://docs.astral.sh/uv/) is a modern package manager that runs 10-100x faster than pip and supports most pip syntax. + +```shell +# Install the latest version +uv install pinecone + +# Install the latest version, optional dependencies +uv install "pinecone[asyncio,grpc]" +``` + +#### Installing with [poetry](https://python-poetry.org/) + +```shell +# Install the latest version +poetry add pinecone + +# Install the latest version, with optional dependencies +poetry add pinecone --extras asyncio --extras grpc +``` + +# Quickstart + +## Bringing your own vectors to Pinecone + +```python +from pinecone import ( + Pinecone, + ServerlessSpec, + CloudProvider, + AwsRegion, + VectorType +) + +# 1. Instantiate the Pinecone client +pc = Pinecone(api_key='YOUR_API_KEY') + +# 2. Create an index +index_config = pc.create_index( + name="index-name", + dimension=1536, + spec=ServerlessSpec( + cloud=CloudProvider.AWS, + region=AwsRegion.US_EAST_1 + ), + vector_type=VectorType.DENSE +) + +# 3. Instantiate an Index client +idx = pc.Index(host=index_config.host) + +# 4. Upsert embeddings +idx.upsert( + vectors=[ + ("id1", [0.1, 0.2, 0.3, 0.4, ...], {"metadata_key": "value1"}), + ("id2", [0.2, 0.3, 0.4, 0.5, ...], {"metadata_key": "value2"}), + ], + namespace="example-namespace" +) + +# 5. Query your index using an embedding +query_embedding = [...] # list should have length == index dimension +idx.query( + vector=query_embedding, + top_k=10, + include_metadata=True, + filter={"metadata_key": { "$eq": "value1" }} +) +``` + +## Bring your own data using Pinecone integrated inference + +```python +from pinecone import ( + Pinecone, + CloudProvider, + AwsRegion, + EmbedModel, +) + +# 1. Instantiate the Pinecone client +pc = Pinecone(api_key="<>") + +# 2. Create an index configured for use with a particular model +index_config = pc.create_index_for_model( + name="my-model-index", + cloud=CloudProvider.AWS, + region=AwsRegion.US_EAST_1, + embed=IndexEmbed( + model=EmbedModel.Multilingual_E5_Large, + field_map={"text": "my_text_field"} + ) +) + +# 3. Instantiate an Index client +idx = pc.Index(host=index_config.host) + +# 4. Upsert records +idx.upsert_records( + namespace="my-namespace", + records=[ + { + "_id": "test1", + "my_text_field": "Apple is a popular fruit known for its sweetness and crisp texture.", + }, + { + "_id": "test2", + "my_text_field": "The tech company Apple is known for its innovative products like the iPhone.", + }, + { + "_id": "test3", + "my_text_field": "Many people enjoy eating apples as a healthy snack.", + }, + { + "_id": "test4", + "my_text_field": "Apple Inc. has revolutionized the tech industry with its sleek designs and user-friendly interfaces.", + }, + { + "_id": "test5", + "my_text_field": "An apple a day keeps the doctor away, as the saying goes.", + }, + { + "_id": "test6", + "my_text_field": "Apple Computer Company was founded on April 1, 1976, by Steve Jobs, Steve Wozniak, and Ronald Wayne as a partnership.", + }, + ], +) + +# 5. Search for similar records +from pinecone import SearchQuery, SearchRerank, RerankModel + +response = index.search_records( + namespace="my-namespace", + query=SearchQuery( + inputs={ + "text": "Apple corporation", + }, + top_k=3 + ), + rerank=SearchRerank( + model=RerankModel.Bge_Reranker_V2_M3, + rank_fields=["my_text_field"], + top_n=3, + ), +) +``` + +## More information on usage + +Detailed information on specific ways of using the SDK are covered in these other pages. + + +- [Serverless Indexes](./docs/db_control/serverless-indexes.md) +- [Pod Indexes](./docs/db_control/pod-indexes.md) +- [Working with vectors](./docs/db_data/index-usage-byov.md) +- [Inference API](./docs/inference-api.md) +- [FAQ](./docs/faq.md) + + +# Issues & Bugs + +If you notice bugs or have feedback, please [file an issue](https://github.com/pinecone-io/pinecone-python-client/issues). + +You can also get help in the [Pinecone Community Forum](https://community.pinecone.io/). diff --git a/pinecone/__init__.py b/pinecone/__init__.py index d1f2b132..13a65bd1 100644 --- a/pinecone/__init__.py +++ b/pinecone/__init__.py @@ -1,5 +1,5 @@ """ -.. include:: ../README.md +.. include:: ../pdoc/README.md """ from .deprecated_plugins import check_for_deprecated_plugins diff --git a/pinecone/data/features/inference/inference.py b/pinecone/data/features/inference/inference.py index 8fbaaa0c..71ada564 100644 --- a/pinecone/data/features/inference/inference.py +++ b/pinecone/data/features/inference/inference.py @@ -23,6 +23,20 @@ class Inference(PluginAware): The `Inference` class configures and uses the Pinecone Inference API to generate embeddings and rank documents. + It is generally not instantiated directly, but rather accessed through a parent `Pinecone` client + object that is responsible for managing shared configurations. + + ```python + from pinecone import Pinecone + + pc = Pinecone() + embeddings = pc.inference.embed( + model="text-embedding-3-small", + inputs=["Hello, world!"], + parameters={"input_type": "passage", "truncate": "END"} + ) + ``` + :param config: A `pinecone.config.Config` object, configured and built in the Pinecone class. :type config: `pinecone.config.Config`, required """ @@ -32,8 +46,13 @@ class Inference(PluginAware): def __init__(self, config, openapi_config, **kwargs) -> None: self.config = config + """ @private """ + self.openapi_config = openapi_config + """ @private """ + self.pool_threads = kwargs.get("pool_threads", 1) + """ @private """ self.__inference_api = setup_openapi_client( api_client_klass=ApiClient, @@ -43,6 +62,7 @@ def __init__(self, config, openapi_config, **kwargs) -> None: pool_threads=kwargs.get("pool_threads", 1), api_version=API_VERSION, ) + self.load_plugins( config=self.config, openapi_config=self.openapi_config, pool_threads=self.pool_threads ) diff --git a/pinecone/data/features/inference/inference_asyncio.py b/pinecone/data/features/inference/inference_asyncio.py index a78f1d6e..06ec7388 100644 --- a/pinecone/data/features/inference/inference_asyncio.py +++ b/pinecone/data/features/inference/inference_asyncio.py @@ -12,9 +12,23 @@ class AsyncioInference: """ - The `Inference` class configures and uses the Pinecone Inference API to generate embeddings and + The `AsyncioInference` class configures and uses the Pinecone Inference API to generate embeddings and rank documents. + This class is generally not instantiated directly, but rather accessed through a parent `Pinecone` client + object that is responsible for managing shared configurations. + + ```python + from pinecone import PineconeAsyncio + + pc = PineconeAsyncio() + embeddings = await pc.inference.embed( + model="text-embedding-3-small", + inputs=["Hello, world!"], + parameters={"input_type": "passage", "truncate": "END"} + ) + ``` + :param config: A `pinecone.config.Config` object, configured and built in the Pinecone class. :type config: `pinecone.config.Config`, required """ @@ -24,7 +38,10 @@ class AsyncioInference: def __init__(self, api_client, **kwargs) -> None: self.api_client = api_client + """ @private """ + self.__inference_api = AsyncioInferenceApi(api_client) + """ @private """ async def embed( self, diff --git a/pinecone/grpc/index_grpc.py b/pinecone/grpc/index_grpc.py index eeac20ff..9b68c0b6 100644 --- a/pinecone/grpc/index_grpc.py +++ b/pinecone/grpc/index_grpc.py @@ -62,6 +62,7 @@ class GRPCIndex(GRPCIndexBase): @property def stub_class(self): + """@private""" return VectorServiceStub def upsert( diff --git a/poetry.lock b/poetry.lock index ef453757..8805fbf9 100644 --- a/poetry.lock +++ b/poetry.lock @@ -153,6 +153,28 @@ docs = ["cogapp", "furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphi tests = ["cloudpickle", "hypothesis", "mypy (>=1.11.1)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"] tests-mypy = ["mypy (>=1.11.1)", "pytest-mypy-plugins"] +[[package]] +name = "beautifulsoup4" +version = "4.13.3" +description = "Screen-scraping library" +optional = false +python-versions = ">=3.7.0" +files = [ + {file = "beautifulsoup4-4.13.3-py3-none-any.whl", hash = "sha256:99045d7d3f08f91f0d656bc9b7efbae189426cd913d830294a15eefa0ea4df16"}, + {file = "beautifulsoup4-4.13.3.tar.gz", hash = "sha256:1bd32405dacc920b42b83ba01644747ed77456a65760e285fbc47633ceddaf8b"}, +] + +[package.dependencies] +soupsieve = ">1.2" +typing-extensions = ">=4.0.0" + +[package.extras] +cchardet = ["cchardet"] +chardet = ["chardet"] +charset-normalizer = ["charset-normalizer"] +html5lib = ["html5lib"] +lxml = ["lxml"] + [[package]] name = "certifi" version = "2024.7.4" @@ -1607,6 +1629,17 @@ files = [ {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, ] +[[package]] +name = "soupsieve" +version = "2.6" +description = "A modern CSS selector implementation for Beautiful Soup." +optional = false +python-versions = ">=3.8" +files = [ + {file = "soupsieve-2.6-py3-none-any.whl", hash = "sha256:e72c4ff06e4fb6e4b5a9f0f55fe6e81514581fca1515028625d0f299c602ccc9"}, + {file = "soupsieve-2.6.tar.gz", hash = "sha256:e2e68417777af359ec65daac1057404a3c8a5455bb8abc36f1a9866ab1a51abb"}, +] + [[package]] name = "tomli" version = "2.0.1" @@ -1849,4 +1882,4 @@ grpc = ["googleapis-common-protos", "grpcio", "grpcio", "grpcio", "lz4", "protob [metadata] lock-version = "2.0" python-versions = "^3.9" -content-hash = "77c1844df36ad78e1d4609f1a135804cf35da0ef5a8df8eaf0c7b177c663139a" +content-hash = "bf919c64ad5ce7179d48c97ab8b608e49671ce20db9ed660121e99eea9d574cc" diff --git a/pyproject.toml b/pyproject.toml index 7f01dc77..9f81f8c9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -92,6 +92,7 @@ pytest-benchmark = [ urllib3_mock = "0.3.3" responses = ">=0.8.1" ruff = "^0.9.3" +beautifulsoup4 = "^4.13.3" [tool.poetry.extras] From a3eb2ec3c59da3aafa3e532aa07280915baf0864 Mon Sep 17 00:00:00 2001 From: Jen Hamon Date: Mon, 10 Mar 2025 11:55:19 -0400 Subject: [PATCH 2/3] Adjust path --- .github/actions/build-docs/action.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/actions/build-docs/action.yml b/.github/actions/build-docs/action.yml index e6952f07..95d47fc9 100644 --- a/.github/actions/build-docs/action.yml +++ b/.github/actions/build-docs/action.yml @@ -27,6 +27,6 @@ runs: - name: Fix relative links shell: bash run: | - poetry run python3 fix-relative-links.py ./pdoc ./pdoc + poetry run python3 ./.github/actions/build-docs/fix-relative-links.py ./pdoc ./pdoc env: BASE_URL: "https://github.com/pinecone-io/pinecone-python-client/blob/main/" From 6dae06f3a43e45d3e7a445ce385ecc9b28307eb4 Mon Sep 17 00:00:00 2001 From: Jen Hamon Date: Mon, 10 Mar 2025 13:04:11 -0400 Subject: [PATCH 3/3] Adjust docstring in script --- .github/actions/build-docs/fix-relative-links.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/actions/build-docs/fix-relative-links.py b/.github/actions/build-docs/fix-relative-links.py index d54a3f19..3ed29d46 100644 --- a/.github/actions/build-docs/fix-relative-links.py +++ b/.github/actions/build-docs/fix-relative-links.py @@ -32,7 +32,7 @@ def replace_relative_links(html): if __name__ == "__main__": if len(sys.argv) < 2: - print("Usage: python script.py input.html [output.html]") + print("Usage: python fix-relative-links.py input-dir [output-dir]") sys.exit(1) input_dir = sys.argv[1]