Skip to content

Commit 938a5c8

Browse files
authored
Fix relative links in reference documentation (#468)
## Problem In the rendered static site at https://sdk.pinecone.io/python, links from within the README to other markdown files in the github repo are currently broken links. ## Solution - Add `beautifulsoup4` as dev dependency to enable parsing generated docs pages - After the docs build, find relative links pointing to markdown files and replace the path with a github url. - Setup a different flavor of the README specifically for pdoc to link to the most important parts of the reference docs - Make a few docs tweaks (mainly adding `@private` annotations in a few places to hide clutter) ## Type of Change - [x] Infrastructure change (CI configs, etc)
2 parents cf20e5d + 6dae06f commit 938a5c8

File tree

9 files changed

+374
-3
lines changed

9 files changed

+374
-3
lines changed

.github/actions/build-docs/action.yml

+8
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,16 @@ runs:
1717
uses: ./.github/actions/setup-poetry
1818
with:
1919
include_grpc: 'true'
20+
include_dev: 'true'
2021

2122
- name: Build html documentation
2223
shell: bash
2324
run: |
2425
poetry run pdoc pinecone '!pinecone.core' '!pinecone.utils' --favicon ./favicon-32x32.png --docformat google -o ./pdoc
26+
27+
- name: Fix relative links
28+
shell: bash
29+
run: |
30+
poetry run python3 ./.github/actions/build-docs/fix-relative-links.py ./pdoc ./pdoc
31+
env:
32+
BASE_URL: "https://github.com/pinecone-io/pinecone-python-client/blob/main/"
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
import os
2+
import sys
3+
from bs4 import BeautifulSoup
4+
from urllib.parse import urljoin
5+
6+
# Base URL to prepend to relative links
7+
BASE_URL = os.environ.get(
8+
"BASE_URL", "https://github.com/pinecone-io/pinecone-python-client/blob/main/"
9+
)
10+
11+
12+
def replace_relative_links(html):
13+
soup = BeautifulSoup(html, "html.parser")
14+
15+
# Find all anchor tags with an href attribute
16+
for a in soup.find_all("a", href=True):
17+
href = a["href"]
18+
# Skip if the link is already absolute or an anchor link
19+
if href.startswith(("http://", "https://", "#")):
20+
continue
21+
22+
# Skip if the link is not a markdown file
23+
if not href.endswith(".md"):
24+
continue
25+
26+
# Replace the relative link with an absolute URL
27+
new_href = urljoin(BASE_URL, href)
28+
print(f"{href} => {new_href}")
29+
a["href"] = new_href
30+
return str(soup)
31+
32+
33+
if __name__ == "__main__":
34+
if len(sys.argv) < 2:
35+
print("Usage: python fix-relative-links.py input-dir [output-dir]")
36+
sys.exit(1)
37+
38+
input_dir = sys.argv[1]
39+
output_dir = sys.argv[2] if len(sys.argv) > 2 else None
40+
41+
# Recursively process all html files in the input directory
42+
for root, dirs, files in os.walk(input_dir):
43+
for file in files:
44+
if not file.endswith(".html"):
45+
continue
46+
47+
print(f"Processing {file}")
48+
input_path = os.path.join(root, file)
49+
50+
with open(input_path, "r", encoding="utf-8") as f:
51+
html = f.read()
52+
53+
updated_html = replace_relative_links(html)
54+
55+
if output_dir:
56+
# Get the relative path from input_dir to maintain folder structure
57+
rel_path = os.path.relpath(input_path, input_dir)
58+
output_path = os.path.join(output_dir, rel_path)
59+
60+
# Create the necessary subdirectories
61+
os.makedirs(os.path.dirname(output_path), exist_ok=True)
62+
63+
with open(output_path, "w", encoding="utf-8") as f:
64+
f.write(updated_html)
65+
else:
66+
print(updated_html)

pdoc/README.md

+225
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,225 @@
1+
# Pinecone Python SDK
2+
![License](https://img.shields.io/github/license/pinecone-io/pinecone-python-client?color=orange)
3+
4+
The official Pinecone Python SDK.
5+
6+
## Documentation
7+
8+
- [**Conceptual docs and guides**](https://docs.pinecone.io)
9+
- [**Github Source**](https://github.com/pinecone-io/pinecone-python-client)
10+
11+
## Points of interest
12+
13+
**DB control plane**
14+
15+
- [`Pinecone`](./pinecone/control/pinecone.html)
16+
- [`PineconeAsyncio`](./pinecone/control/pinecone_asyncio.html)
17+
- [`PineconeGRPC`](./pinecone/grpc/pinecone.html)
18+
19+
**DB data operations**
20+
- [`Index`](./pinecone/data/index.html)
21+
- [`IndexAsyncio`](./pinecone/data/index_asyncio.html)
22+
- [`IndexGRPC`](./pinecone/grpc/index_grpc.html)
23+
24+
**Inference API**
25+
- [`Inference`](./pinecone/data/features/inference/inference.html)
26+
- [`InferenceAsyncio`](./pinecone/data/features/inference/inference_asyncio.html)
27+
28+
## Upgrading the SDK
29+
30+
> [!NOTE]
31+
> The official SDK package was renamed from `pinecone-client` to `pinecone` beginning in version `5.1.0`.
32+
> Please remove `pinecone-client` from your project dependencies and add `pinecone` instead to get
33+
> the latest updates.
34+
35+
For notes on changes between major versions, see [Upgrading](./docs/upgrading.md)
36+
37+
## Prerequisites
38+
39+
- The Pinecone Python SDK is compatible with Python 3.9 and greater. It has been tested with CPython versions from 3.9 to 3.13.
40+
- Before you can use the Pinecone SDK, you must sign up for an account and find your API key in the Pinecone console dashboard at [https://app.pinecone.io](https://app.pinecone.io).
41+
42+
## Installation
43+
44+
The Pinecone Python SDK is distributed on PyPI using the package name `pinecone`. By default the `pinecone` has a minimal set of dependencies, but you can install some extras to unlock additional functionality.
45+
46+
Available extras:
47+
48+
- `pinecone[asyncio]` will add a dependency on `aiohttp` and enable usage of `PineconeAsyncio`, the asyncio-enabled version of the client for use with highly asynchronous modern web frameworks such as FastAPI.
49+
- `pinecone[grpc]` will add dependencies on `grpcio` and related libraries needed to make pinecone data calls such as `upsert` and `query` over [GRPC](https://grpc.io/) for a modest performance improvement. See the guide on [tuning performance](https://docs.pinecone.io/docs/performance-tuning).
50+
51+
#### Installing with pip
52+
53+
```shell
54+
# Install the latest version
55+
pip3 install pinecone
56+
57+
# Install the latest version, with optional dependencies
58+
pip3 install "pinecone[asyncio,grpc]"
59+
```
60+
61+
#### Installing with uv
62+
63+
[uv](https://docs.astral.sh/uv/) is a modern package manager that runs 10-100x faster than pip and supports most pip syntax.
64+
65+
```shell
66+
# Install the latest version
67+
uv install pinecone
68+
69+
# Install the latest version, optional dependencies
70+
uv install "pinecone[asyncio,grpc]"
71+
```
72+
73+
#### Installing with [poetry](https://python-poetry.org/)
74+
75+
```shell
76+
# Install the latest version
77+
poetry add pinecone
78+
79+
# Install the latest version, with optional dependencies
80+
poetry add pinecone --extras asyncio --extras grpc
81+
```
82+
83+
# Quickstart
84+
85+
## Bringing your own vectors to Pinecone
86+
87+
```python
88+
from pinecone import (
89+
Pinecone,
90+
ServerlessSpec,
91+
CloudProvider,
92+
AwsRegion,
93+
VectorType
94+
)
95+
96+
# 1. Instantiate the Pinecone client
97+
pc = Pinecone(api_key='YOUR_API_KEY')
98+
99+
# 2. Create an index
100+
index_config = pc.create_index(
101+
name="index-name",
102+
dimension=1536,
103+
spec=ServerlessSpec(
104+
cloud=CloudProvider.AWS,
105+
region=AwsRegion.US_EAST_1
106+
),
107+
vector_type=VectorType.DENSE
108+
)
109+
110+
# 3. Instantiate an Index client
111+
idx = pc.Index(host=index_config.host)
112+
113+
# 4. Upsert embeddings
114+
idx.upsert(
115+
vectors=[
116+
("id1", [0.1, 0.2, 0.3, 0.4, ...], {"metadata_key": "value1"}),
117+
("id2", [0.2, 0.3, 0.4, 0.5, ...], {"metadata_key": "value2"}),
118+
],
119+
namespace="example-namespace"
120+
)
121+
122+
# 5. Query your index using an embedding
123+
query_embedding = [...] # list should have length == index dimension
124+
idx.query(
125+
vector=query_embedding,
126+
top_k=10,
127+
include_metadata=True,
128+
filter={"metadata_key": { "$eq": "value1" }}
129+
)
130+
```
131+
132+
## Bring your own data using Pinecone integrated inference
133+
134+
```python
135+
from pinecone import (
136+
Pinecone,
137+
CloudProvider,
138+
AwsRegion,
139+
EmbedModel,
140+
)
141+
142+
# 1. Instantiate the Pinecone client
143+
pc = Pinecone(api_key="<<PINECONE_API_KEY>>")
144+
145+
# 2. Create an index configured for use with a particular model
146+
index_config = pc.create_index_for_model(
147+
name="my-model-index",
148+
cloud=CloudProvider.AWS,
149+
region=AwsRegion.US_EAST_1,
150+
embed=IndexEmbed(
151+
model=EmbedModel.Multilingual_E5_Large,
152+
field_map={"text": "my_text_field"}
153+
)
154+
)
155+
156+
# 3. Instantiate an Index client
157+
idx = pc.Index(host=index_config.host)
158+
159+
# 4. Upsert records
160+
idx.upsert_records(
161+
namespace="my-namespace",
162+
records=[
163+
{
164+
"_id": "test1",
165+
"my_text_field": "Apple is a popular fruit known for its sweetness and crisp texture.",
166+
},
167+
{
168+
"_id": "test2",
169+
"my_text_field": "The tech company Apple is known for its innovative products like the iPhone.",
170+
},
171+
{
172+
"_id": "test3",
173+
"my_text_field": "Many people enjoy eating apples as a healthy snack.",
174+
},
175+
{
176+
"_id": "test4",
177+
"my_text_field": "Apple Inc. has revolutionized the tech industry with its sleek designs and user-friendly interfaces.",
178+
},
179+
{
180+
"_id": "test5",
181+
"my_text_field": "An apple a day keeps the doctor away, as the saying goes.",
182+
},
183+
{
184+
"_id": "test6",
185+
"my_text_field": "Apple Computer Company was founded on April 1, 1976, by Steve Jobs, Steve Wozniak, and Ronald Wayne as a partnership.",
186+
},
187+
],
188+
)
189+
190+
# 5. Search for similar records
191+
from pinecone import SearchQuery, SearchRerank, RerankModel
192+
193+
response = index.search_records(
194+
namespace="my-namespace",
195+
query=SearchQuery(
196+
inputs={
197+
"text": "Apple corporation",
198+
},
199+
top_k=3
200+
),
201+
rerank=SearchRerank(
202+
model=RerankModel.Bge_Reranker_V2_M3,
203+
rank_fields=["my_text_field"],
204+
top_n=3,
205+
),
206+
)
207+
```
208+
209+
## More information on usage
210+
211+
Detailed information on specific ways of using the SDK are covered in these other pages.
212+
213+
214+
- [Serverless Indexes](./docs/db_control/serverless-indexes.md)
215+
- [Pod Indexes](./docs/db_control/pod-indexes.md)
216+
- [Working with vectors](./docs/db_data/index-usage-byov.md)
217+
- [Inference API](./docs/inference-api.md)
218+
- [FAQ](./docs/faq.md)
219+
220+
221+
# Issues & Bugs
222+
223+
If you notice bugs or have feedback, please [file an issue](https://github.com/pinecone-io/pinecone-python-client/issues).
224+
225+
You can also get help in the [Pinecone Community Forum](https://community.pinecone.io/).

pinecone/__init__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
"""
2-
.. include:: ../README.md
2+
.. include:: ../pdoc/README.md
33
"""
44

55
from .deprecated_plugins import check_for_deprecated_plugins

pinecone/data/features/inference/inference.py

+20
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,20 @@ class Inference(PluginAware):
2323
The `Inference` class configures and uses the Pinecone Inference API to generate embeddings and
2424
rank documents.
2525
26+
It is generally not instantiated directly, but rather accessed through a parent `Pinecone` client
27+
object that is responsible for managing shared configurations.
28+
29+
```python
30+
from pinecone import Pinecone
31+
32+
pc = Pinecone()
33+
embeddings = pc.inference.embed(
34+
model="text-embedding-3-small",
35+
inputs=["Hello, world!"],
36+
parameters={"input_type": "passage", "truncate": "END"}
37+
)
38+
```
39+
2640
:param config: A `pinecone.config.Config` object, configured and built in the Pinecone class.
2741
:type config: `pinecone.config.Config`, required
2842
"""
@@ -32,8 +46,13 @@ class Inference(PluginAware):
3246

3347
def __init__(self, config, openapi_config, **kwargs) -> None:
3448
self.config = config
49+
""" @private """
50+
3551
self.openapi_config = openapi_config
52+
""" @private """
53+
3654
self.pool_threads = kwargs.get("pool_threads", 1)
55+
""" @private """
3756

3857
self.__inference_api = setup_openapi_client(
3958
api_client_klass=ApiClient,
@@ -43,6 +62,7 @@ def __init__(self, config, openapi_config, **kwargs) -> None:
4362
pool_threads=kwargs.get("pool_threads", 1),
4463
api_version=API_VERSION,
4564
)
65+
4666
self.load_plugins(
4767
config=self.config, openapi_config=self.openapi_config, pool_threads=self.pool_threads
4868
)

pinecone/data/features/inference/inference_asyncio.py

+18-1
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,23 @@
1212

1313
class AsyncioInference:
1414
"""
15-
The `Inference` class configures and uses the Pinecone Inference API to generate embeddings and
15+
The `AsyncioInference` class configures and uses the Pinecone Inference API to generate embeddings and
1616
rank documents.
1717
18+
This class is generally not instantiated directly, but rather accessed through a parent `Pinecone` client
19+
object that is responsible for managing shared configurations.
20+
21+
```python
22+
from pinecone import PineconeAsyncio
23+
24+
pc = PineconeAsyncio()
25+
embeddings = await pc.inference.embed(
26+
model="text-embedding-3-small",
27+
inputs=["Hello, world!"],
28+
parameters={"input_type": "passage", "truncate": "END"}
29+
)
30+
```
31+
1832
:param config: A `pinecone.config.Config` object, configured and built in the Pinecone class.
1933
:type config: `pinecone.config.Config`, required
2034
"""
@@ -24,7 +38,10 @@ class AsyncioInference:
2438

2539
def __init__(self, api_client, **kwargs) -> None:
2640
self.api_client = api_client
41+
""" @private """
42+
2743
self.__inference_api = AsyncioInferenceApi(api_client)
44+
""" @private """
2845

2946
async def embed(
3047
self,

0 commit comments

Comments
 (0)