Fix relative links in reference documentation (#468)

jhamon · web-flow · commit 938a5c8c0e74 · 2025-03-10T13:22:16.000-04:00
## Problem In the rendered static site at https://sdk.pinecone.io/python, links from within the README to other markdown files in the github repo are currently broken links. ## Solution - Add `beautifulsoup4` as dev dependency to enable parsing generated docs pages - After the docs build, find relative links pointing to markdown files and replace the path with a github url. - Setup a different flavor of the README specifically for pdoc to link to the most important parts of the reference docs - Make a few docs tweaks (mainly adding `@private` annotations in a few places to hide clutter) ## Type of Change - [x] Infrastructure change (CI configs, etc)
diff --git a/.github/actions/build-docs/action.yml b/.github/actions/build-docs/action.yml
@@ -17,8 +17,16 @@ runs:
       uses: ./.github/actions/setup-poetry
       with:
         include_grpc: 'true'
+        include_dev: 'true'
 
     - name: Build html documentation
       shell: bash
       run: |
         poetry run pdoc pinecone '!pinecone.core' '!pinecone.utils' --favicon ./favicon-32x32.png --docformat google -o ./pdoc
+
+    - name: Fix relative links
+      shell: bash
+      run: |
+        poetry run python3 ./.github/actions/build-docs/fix-relative-links.py ./pdoc ./pdoc
+      env:
+        BASE_URL: "https://github.com/pinecone-io/pinecone-python-client/blob/main/"
diff --git a/.github/actions/build-docs/fix-relative-links.py b/.github/actions/build-docs/fix-relative-links.py
@@ -0,0 +1,66 @@
+import os
+import sys
+from bs4 import BeautifulSoup
+from urllib.parse import urljoin
+
+# Base URL to prepend to relative links
+BASE_URL = os.environ.get(
+    "BASE_URL", "https://github.com/pinecone-io/pinecone-python-client/blob/main/"
+)
+
+
+def replace_relative_links(html):
+    soup = BeautifulSoup(html, "html.parser")
+
+    # Find all anchor tags with an href attribute
+    for a in soup.find_all("a", href=True):
+        href = a["href"]
+        # Skip if the link is already absolute or an anchor link
+        if href.startswith(("http://", "https://", "#")):
+            continue
+
+        # Skip if the link is not a markdown file
+        if not href.endswith(".md"):
+            continue
+
+        # Replace the relative link with an absolute URL
+        new_href = urljoin(BASE_URL, href)
+        print(f"{href} => {new_href}")
+        a["href"] = new_href
+    return str(soup)
+
+
+if __name__ == "__main__":
+    if len(sys.argv) < 2:
+        print("Usage: python fix-relative-links.py input-dir [output-dir]")
+        sys.exit(1)
+
+    input_dir = sys.argv[1]
+    output_dir = sys.argv[2] if len(sys.argv) > 2 else None
+
+    # Recursively process all html files in the input directory
+    for root, dirs, files in os.walk(input_dir):
+        for file in files:
+            if not file.endswith(".html"):
+                continue
+
+            print(f"Processing {file}")
+            input_path = os.path.join(root, file)
+
+            with open(input_path, "r", encoding="utf-8") as f:
+                html = f.read()
+
+            updated_html = replace_relative_links(html)
+
+            if output_dir:
+                # Get the relative path from input_dir to maintain folder structure
+                rel_path = os.path.relpath(input_path, input_dir)
+                output_path = os.path.join(output_dir, rel_path)
+
+                # Create the necessary subdirectories
+                os.makedirs(os.path.dirname(output_path), exist_ok=True)
+
+                with open(output_path, "w", encoding="utf-8") as f:
+                    f.write(updated_html)
+            else:
+                print(updated_html)
diff --git a/pdoc/README.md b/pdoc/README.md
@@ -0,0 +1,225 @@
+# Pinecone Python SDK
+![License](https://img.shields.io/github/license/pinecone-io/pinecone-python-client?color=orange)
+
+The official Pinecone Python SDK.
+
+## Documentation
+
+- [**Conceptual docs and guides**](https://docs.pinecone.io)
+- [**Github Source**](https://github.com/pinecone-io/pinecone-python-client)
+
+## Points of interest
+
+**DB control plane**
+
+- [`Pinecone`](./pinecone/control/pinecone.html)
+- [`PineconeAsyncio`](./pinecone/control/pinecone_asyncio.html)
+- [`PineconeGRPC`](./pinecone/grpc/pinecone.html)
+
+**DB data operations**
+- [`Index`](./pinecone/data/index.html)
+- [`IndexAsyncio`](./pinecone/data/index_asyncio.html)
+- [`IndexGRPC`](./pinecone/grpc/index_grpc.html)
+
+**Inference API**
+- [`Inference`](./pinecone/data/features/inference/inference.html)
+- [`InferenceAsyncio`](./pinecone/data/features/inference/inference_asyncio.html)
+
+## Upgrading the SDK
+
+> [!NOTE]
+> The official SDK package was renamed from `pinecone-client` to `pinecone` beginning in version `5.1.0`.
+> Please remove `pinecone-client` from your project dependencies and add `pinecone` instead to get
+> the latest updates.
+
+For notes on changes between major versions, see [Upgrading](./docs/upgrading.md)
+
+## Prerequisites
+
+- The Pinecone Python SDK is compatible with Python 3.9 and greater. It has been tested with CPython versions from 3.9 to 3.13.
+- Before you can use the Pinecone SDK, you must sign up for an account and find your API key in the Pinecone console dashboard at [https://app.pinecone.io](https://app.pinecone.io).
+
+## Installation
+
+The Pinecone Python SDK is distributed on PyPI using the package name `pinecone`. By default the `pinecone` has a minimal set of dependencies, but you can install some extras to unlock additional functionality.
+
+Available extras:
+
+- `pinecone[asyncio]` will add a dependency on `aiohttp` and enable usage of `PineconeAsyncio`, the asyncio-enabled version of the client for use with highly asynchronous modern web frameworks such as FastAPI.
+- `pinecone[grpc]` will add dependencies on `grpcio` and related libraries needed to make pinecone data calls such as `upsert` and `query` over [GRPC](https://grpc.io/) for a modest performance improvement. See the guide on [tuning performance](https://docs.pinecone.io/docs/performance-tuning).
+
+#### Installing with pip
+
+```shell
+# Install the latest version
+pip3 install pinecone
+
+# Install the latest version, with optional dependencies
+pip3 install "pinecone[asyncio,grpc]"
+```
+
+#### Installing with uv
+
+[uv](https://docs.astral.sh/uv/) is a modern package manager that runs 10-100x faster than pip and supports most pip syntax.
+
+```shell
+# Install the latest version
+uv install pinecone
+
+# Install the latest version, optional dependencies
+uv install "pinecone[asyncio,grpc]"
+```
+
+#### Installing with [poetry](https://python-poetry.org/)
+
+```shell
+# Install the latest version
+poetry add pinecone
+
+# Install the latest version, with optional dependencies
+poetry add pinecone --extras asyncio --extras grpc
+```
+
+# Quickstart
+
+## Bringing your own vectors to Pinecone
+
+```python
+from pinecone import (
+    Pinecone,
+    ServerlessSpec,
+    CloudProvider,
+    AwsRegion,
+    VectorType
+)
+
+# 1. Instantiate the Pinecone client
+pc = Pinecone(api_key='YOUR_API_KEY')
+
+# 2. Create an index
+index_config = pc.create_index(
+    name="index-name",
+    dimension=1536,
+    spec=ServerlessSpec(
+        cloud=CloudProvider.AWS,
+        region=AwsRegion.US_EAST_1
+    ),
+    vector_type=VectorType.DENSE
+)
+
+# 3. Instantiate an Index client
+idx = pc.Index(host=index_config.host)
+
+# 4. Upsert embeddings
+idx.upsert(
+    vectors=[
+        ("id1", [0.1, 0.2, 0.3, 0.4, ...], {"metadata_key": "value1"}),
+        ("id2", [0.2, 0.3, 0.4, 0.5, ...], {"metadata_key": "value2"}),
+    ],
+    namespace="example-namespace"
+)
+
+# 5. Query your index using an embedding
+query_embedding = [...] # list should have length == index dimension
+idx.query(
+    vector=query_embedding,
+    top_k=10,
+    include_metadata=True,
+    filter={"metadata_key": { "$eq": "value1" }}
+)
+```
+
+## Bring your own data using Pinecone integrated inference
+
+```python
+from pinecone import (
+    Pinecone,
+    CloudProvider,
+    AwsRegion,
+    EmbedModel,
+)
+
+# 1. Instantiate the Pinecone client
+pc = Pinecone(api_key="<<PINECONE_API_KEY>>")
+
+# 2. Create an index configured for use with a particular model
+index_config = pc.create_index_for_model(
+    name="my-model-index",
+    cloud=CloudProvider.AWS,
+    region=AwsRegion.US_EAST_1,
+    embed=IndexEmbed(
+        model=EmbedModel.Multilingual_E5_Large,
+        field_map={"text": "my_text_field"}
+    )
+)
+
+# 3. Instantiate an Index client
+idx = pc.Index(host=index_config.host)
+
+# 4. Upsert records
+idx.upsert_records(
+    namespace="my-namespace",
+    records=[
+        {
+            "_id": "test1",
+            "my_text_field": "Apple is a popular fruit known for its sweetness and crisp texture.",
+        },
+        {
+            "_id": "test2",
+            "my_text_field": "The tech company Apple is known for its innovative products like the iPhone.",
+        },
+        {
+            "_id": "test3",
+            "my_text_field": "Many people enjoy eating apples as a healthy snack.",
+        },
+        {
+            "_id": "test4",
+            "my_text_field": "Apple Inc. has revolutionized the tech industry with its sleek designs and user-friendly interfaces.",
+        },
+        {
+            "_id": "test5",
+            "my_text_field": "An apple a day keeps the doctor away, as the saying goes.",
+        },
+        {
+            "_id": "test6",
+            "my_text_field": "Apple Computer Company was founded on April 1, 1976, by Steve Jobs, Steve Wozniak, and Ronald Wayne as a partnership.",
+        },
+    ],
+)
+
+# 5. Search for similar records
+from pinecone import SearchQuery, SearchRerank, RerankModel
+
+response = index.search_records(
+    namespace="my-namespace",
+    query=SearchQuery(
+        inputs={
+            "text": "Apple corporation",
+        },
+        top_k=3
+    ),
+    rerank=SearchRerank(
+        model=RerankModel.Bge_Reranker_V2_M3,
+        rank_fields=["my_text_field"],
+        top_n=3,
+    ),
+)
+```
+
+## More information on usage
+
+Detailed information on specific ways of using the SDK are covered in these other pages.
+
+
+- [Serverless Indexes](./docs/db_control/serverless-indexes.md)
+- [Pod Indexes](./docs/db_control/pod-indexes.md)
+- [Working with vectors](./docs/db_data/index-usage-byov.md)
+- [Inference API](./docs/inference-api.md)
+- [FAQ](./docs/faq.md)
+
+
+# Issues & Bugs
+
+If you notice bugs or have feedback, please [file an issue](https://github.com/pinecone-io/pinecone-python-client/issues).
+
+You can also get help in the [Pinecone Community Forum](https://community.pinecone.io/).
diff --git a/pinecone/__init__.py b/pinecone/__init__.py
@@ -1,5 +1,5 @@
 """
-.. include:: ../README.md
+.. include:: ../pdoc/README.md
 """
 
 from .deprecated_plugins import check_for_deprecated_plugins
diff --git a/pinecone/data/features/inference/inference.py b/pinecone/data/features/inference/inference.py
@@ -23,6 +23,20 @@ class Inference(PluginAware):
     The `Inference` class configures and uses the Pinecone Inference API to generate embeddings and
     rank documents.
 
+    It is generally not instantiated directly, but rather accessed through a parent `Pinecone` client
+    object that is responsible for managing shared configurations.
+
+    ```python
+    from pinecone import Pinecone
+
+    pc = Pinecone()
+    embeddings = pc.inference.embed(
+        model="text-embedding-3-small",
+        inputs=["Hello, world!"],
+        parameters={"input_type": "passage", "truncate": "END"}
+    )
+    ```
+
     :param config: A `pinecone.config.Config` object, configured and built in the Pinecone class.
     :type config: `pinecone.config.Config`, required
     """
@@ -32,8 +46,13 @@ class Inference(PluginAware):
 
     def __init__(self, config, openapi_config, **kwargs) -> None:
         self.config = config
+        """ @private """
+
         self.openapi_config = openapi_config
+        """ @private """
+
         self.pool_threads = kwargs.get("pool_threads", 1)
+        """ @private """
 
         self.__inference_api = setup_openapi_client(
             api_client_klass=ApiClient,
@@ -43,6 +62,7 @@ def __init__(self, config, openapi_config, **kwargs) -> None:
             pool_threads=kwargs.get("pool_threads", 1),
             api_version=API_VERSION,
         )
+
         self.load_plugins(
             config=self.config, openapi_config=self.openapi_config, pool_threads=self.pool_threads
         )
diff --git a/pinecone/data/features/inference/inference_asyncio.py b/pinecone/data/features/inference/inference_asyncio.py
@@ -12,9 +12,23 @@
 
 class AsyncioInference:
     """
-    The `Inference` class configures and uses the Pinecone Inference API to generate embeddings and
+    The `AsyncioInference` class configures and uses the Pinecone Inference API to generate embeddings and
     rank documents.
 
+    This class is generally not instantiated directly, but rather accessed through a parent `Pinecone` client
+    object that is responsible for managing shared configurations.
+
+    ```python
+    from pinecone import PineconeAsyncio
+
+    pc = PineconeAsyncio()
+    embeddings = await pc.inference.embed(
+        model="text-embedding-3-small",
+        inputs=["Hello, world!"],
+        parameters={"input_type": "passage", "truncate": "END"}
+    )
+    ```
+
     :param config: A `pinecone.config.Config` object, configured and built in the Pinecone class.
     :type config: `pinecone.config.Config`, required
     """
@@ -24,7 +38,10 @@ class AsyncioInference:
 
     def __init__(self, api_client, **kwargs) -> None:
         self.api_client = api_client
+        """ @private """
+
         self.__inference_api = AsyncioInferenceApi(api_client)
+        """ @private """
 
     async def embed(
         self,
diff --git a/pinecone/grpc/index_grpc.py b/pinecone/grpc/index_grpc.py
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml