Skip to content

Commit

Permalink
reformat python code samples (#380)
Browse files Browse the repository at this point in the history
Co-authored-by: Max Shkutnyk <max@lightsonsoftware.com>
Co-authored-by: trentfowlercohere <141260477+trentfowlercohere@users.noreply.github.com>
  • Loading branch information
3 people authored Jan 23, 2025
1 parent 60d68b8 commit 847b2ef
Show file tree
Hide file tree
Showing 12 changed files with 201 additions and 142 deletions.
97 changes: 54 additions & 43 deletions fern/pages/-ARCHIVE-/old-tutorials/semantic-search.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,9 @@ You can find the code in the <a target="_blank" href="https://github.com/cohere-
### 1. Download the Dependencies

```python PYTHON
# Install Cohere for embeddings, Umap to reduce embeddings to 2 dimensions,
# Install Cohere for embeddings, Umap to reduce embeddings to 2 dimensions,
# Altair for visualization, Annoy for approximate nearest neighbor search
pip install cohere umap-learn altair annoy datasets tqdm
!pip install cohere umap-learn altair annoy datasets tqdm
```

And if you're running an older version of the SDK, you might need to upgrade it like so:
Expand All @@ -63,7 +63,7 @@ Get your Cohere API key by [signing up here](https://os.cohere.ai/register). Pas
#### 1a. Import the Necessary Dependencies to Run this Example

```python PYTHON
#title Import libraries (Run this cell to execute required code) {display-mode: "form"}
# title Import libraries (Run this cell to execute required code) {display-mode: "form"}

import cohere
import numpy as np
Expand All @@ -76,8 +76,9 @@ import altair as alt
from sklearn.metrics.pairwise import cosine_similarity
from annoy import AnnoyIndex
import warnings
warnings.filterwarnings('ignore')
pd.set_option('display.max_colwidth', None)

warnings.filterwarnings("ignore")
pd.set_option("display.max_colwidth", None)
```

### 2. Get the Archive of Questions
Expand Down Expand Up @@ -128,9 +129,11 @@ input_type_embed = "search_document"
co = cohere.Client(api_key)

# Get the embeddings
embeds = co.embed(texts=list(df['text']),
model=model_name,
input_type=input_type_embed).embeddings
embeds = co.embed(
texts=list(df["text"]),
model=model_name,
input_type=input_type_embed,
).embeddings
```

### 4. Build the Index, search Using an Index and Conduct Nearest Neighbour Search
Expand All @@ -141,13 +144,13 @@ Let's build an index using the library called annoy. Annoy is a library created

```python PYTHON
# Create the search index, pass the size of embedding
search_index = AnnoyIndex(np.array(embeds).shape[1], 'angular')
search_index = AnnoyIndex(np.array(embeds).shape[1], "angular")

# Add all the vectors to the search index
for i in range(len(embeds)):
search_index.add_item(i, embeds[i])
search_index.build(10) # 10 trees
search_index.save('test.ann')
search_index.build(10) # 10 trees
search_index.save("test.ann")
```

After building the index, we can use it to retrieve the nearest neighbours either of existing questions (section 3.1), or of new questions that we embed (section 3.2).
Expand All @@ -161,15 +164,21 @@ If we're only interested in measuring the similarities between the questions in
example_id = 92

# Retrieve nearest neighbors
similar_item_ids = search_index.get_nns_by_item(example_id,10,
include_distances=True)
similar_item_ids = search_index.get_nns_by_item(
example_id, 10, include_distances=True
)

# Format and print the text and distances
results = pd.DataFrame(data={'texts': df.iloc[similar_item_ids[0]]['text'],
'distance': similar_item_ids[1]}).drop(example_id)

results = pd.DataFrame(
data={
"texts": df.iloc[similar_item_ids[0]]["text"],
"distance": similar_item_ids[1],
}
).drop(example_id)

# NOTE: Your results might look slightly different to ours.
print(f"Question:'{df.iloc[example_id]['text']}'\nNearest neighbors:")
print(results) # NOTE: Your results might look slightly different to ours.
print(results)
```

```
Expand Down Expand Up @@ -199,20 +208,26 @@ query = "What is the tallest mountain in the world?"
input_type_query = "search_query"

# Get the query's embedding
query_embed = co.embed(texts=[query],
model=model_name,
input_type=input_type_query).embeddings
query_embed = co.embed(
texts=[query], model=model_name, input_type=input_type_query
).embeddings

# Retrieve the nearest neighbors
similar_item_ids = search_index.get_nns_by_vector(query_embed[0],10,
include_distances=True)
similar_item_ids = search_index.get_nns_by_vector(
query_embed[0], 10, include_distances=True
)
# Format the results
query_results = pd.DataFrame(data={'texts': df.iloc[similar_item_ids[0]]['text'],
'distance': similar_item_ids[1]})
query_results = pd.DataFrame(
data={
"texts": df.iloc[similar_item_ids[0]]["text"],
"distance": similar_item_ids[1],
}
)


# NOTE: Your results might look slightly different to ours.
print(f"Query:'{query}'\nNearest neighbors:")
print(query_results) # NOTE: Your results might look slightly different to ours.
print(query_results)
```

| | texts | distance |
Expand All @@ -233,32 +248,28 @@ print(query_results) # NOTE: Your results might look slightly different to ours.
Use the code below to create a visualization of the embedded archive. As written, this code will only run in a jupyter notebook.

```python PYTHON
#@title Plot the archive {display-mode: "form"}
# @title Plot the archive {display-mode: "form"}

# UMAP reduces the dimensions from 1024 to 2 dimensions that we can plot
reducer = umap.UMAP(n_neighbors=20)
reducer = umap.UMAP(n_neighbors=20)
umap_embeds = reducer.fit_transform(embeds)

# Prepare the data to plot and interactive visualization
# using Altair
df_explore = pd.DataFrame(data={'text': df['text']})
df_explore['x'] = umap_embeds[:,0]
df_explore['y'] = umap_embeds[:,1]
df_explore = pd.DataFrame(data={"text": df["text"]})
df_explore["x"] = umap_embeds[:, 0]
df_explore["y"] = umap_embeds[:, 1]

# Plot
chart = alt.Chart(df_explore).mark_circle(size=60).encode(
x=#'x',
alt.X('x',
scale=alt.Scale(zero=False)
),
y=
alt.Y('y',
scale=alt.Scale(zero=False)
),
tooltip=['text']
).properties(
width=700,
height=400
chart = (
alt.Chart(df_explore)
.mark_circle(size=60)
.encode(
x=alt.X("x", scale=alt.Scale(zero=False)), #'x',
y=alt.Y("y", scale=alt.Scale(zero=False)),
tooltip=["text"],
)
.properties(width=700, height=400)
)
chart.interactive()
```
Expand Down
38 changes: 22 additions & 16 deletions fern/pages/deployment-options/cohere-on-aws/amazon-bedrock.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -45,24 +45,29 @@ co = cohere.BedrockClient(
)

# Input parameters for embed. In this example we are embedding hacker news post titles.
texts = ["Interesting (Non software) books?",
"Non-tech books that have helped you grow professionally?",
"I sold my company last month for $5m. What do I do with the money?",
"How are you getting through (and back from) burning out?",
"I made $24k over the last month. Now what?",
"What kind of personal financial investment do you do?",
"Should I quit the field of software development?"]
texts = [
"Interesting (Non software) books?",
"Non-tech books that have helped you grow professionally?",
"I sold my company last month for $5m. What do I do with the money?",
"How are you getting through (and back from) burning out?",
"I made $24k over the last month. Now what?",
"What kind of personal financial investment do you do?",
"Should I quit the field of software development?",
]
input_type = "clustering"
truncate = "NONE" # optional
model_id = "cohere.embed-english-v3" # or "cohere.embed-multilingual-v3"
truncate = "NONE" # optional
model_id = (
"cohere.embed-english-v3" # or "cohere.embed-multilingual-v3"
)


# Invoke the model and print the response
result = co.embed(
model=model_id,
input_type=input_type,
texts=texts,
truncate=truncate) # aws_client.invoke_model(**params)
model=model_id,
input_type=input_type,
texts=texts,
truncate=truncate,
) # aws_client.invoke_model(**params)

print(result)
```
Expand All @@ -81,9 +86,10 @@ co = cohere.BedrockClient(
aws_session_token="...",
)

result = co.chat(message="Write a LinkedIn post about starting a career in tech:",
model='cohere.command-r-plus-v1:0' # or 'cohere.command-r-v1:0'
)
result = co.chat(
message="Write a LinkedIn post about starting a career in tech:",
model="cohere.command-r-plus-v1:0", # or 'cohere.command-r-v1:0'
)

print(result)
```
Original file line number Diff line number Diff line change
Expand Up @@ -55,24 +55,27 @@ co = cohere.SageMakerClient(
)

# Input parameters for embed. In this example we are embedding hacker news post titles.
texts = ["Interesting (Non software) books?",
"Non-tech books that have helped you grow professionally?",
"I sold my company last month for $5m. What do I do with the money?",
"How are you getting through (and back from) burning out?",
"I made $24k over the last month. Now what?",
"What kind of personal financial investment do you do?",
"Should I quit the field of software development?"]
texts = [
"Interesting (Non software) books?",
"Non-tech books that have helped you grow professionally?",
"I sold my company last month for $5m. What do I do with the money?",
"How are you getting through (and back from) burning out?",
"I made $24k over the last month. Now what?",
"What kind of personal financial investment do you do?",
"Should I quit the field of software development?",
]
input_type = "clustering"
truncate = "NONE" # optional
model_id = "<YOUR ENDPOINT NAME>" # On SageMaker, you create a model name that you'll pass here.
truncate = "NONE" # optional
model_id = "<YOUR ENDPOINT NAME>" # On SageMaker, you create a model name that you'll pass here.


# Invoke the model and print the response
result = co.embed(
model=model_id,
input_type=input_type,
texts=texts,
truncate=truncate)
model=model_id,
input_type=input_type,
texts=texts,
truncate=truncate,
)

print(result)
```
Expand Down
31 changes: 20 additions & 11 deletions fern/pages/deployment-options/cohere-on-microsoft-azure.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -68,14 +68,19 @@ data = {
body = str.encode(json.dumps(data))

# Replace the url with your API endpoint
url = "https://your-endpoint.inference.ai.azure.com/v1/chat/completions"
url = (
"https://your-endpoint.inference.ai.azure.com/v1/chat/completions"
)

# Replace this with the key for the endpoint
api_key = "your-auth-key"
if not api_key:
raise Exception("API Key is missing")

headers = {"Content-Type": "application/json", "Authorization": (api_key)}
headers = {
"Content-Type": "application/json",
"Authorization": (api_key),
}

req = urllib.request.Request(url, body, headers)

Expand Down Expand Up @@ -111,9 +116,7 @@ import urllib.request
import json

# Configure payload data sending to API endpoint
data = {
"input": ["hi"]
}
data = {"input": ["hi"]}

body = str.encode(json.dumps(data))

Expand All @@ -125,7 +128,10 @@ api_key = "your-auth-key"
if not api_key:
raise Exception("API Key is missing")

headers = {"Content-Type": "application/json", "Authorization": (api_key)}
headers = {
"Content-Type": "application/json",
"Authorization": (api_key),
}

req = urllib.request.Request(url, body, headers)

Expand All @@ -149,7 +155,7 @@ import cohere

co = cohere.Client(
base_url="https://<endpoint>.<region>.inference.ai.azure.com/v1",
api_key="<key>"
api_key="<key>",
)

documents = [
Expand Down Expand Up @@ -214,19 +220,19 @@ import cohere
# For Command models
co_chat = cohere.Client(
api_key="AZURE_INFERENCE_CREDENTIAL",
base_url="AZURE_MODEL_ENDPOINT", # Example - https://Cohere-command-r-plus-08-2024-xyz.eastus.models.ai.azure.com/
base_url="AZURE_MODEL_ENDPOINT", # Example - https://Cohere-command-r-plus-08-2024-xyz.eastus.models.ai.azure.com/
)

# For Embed models
co_embed = cohere.Client(
api_key="AZURE_INFERENCE_CREDENTIAL",
base_url="AZURE_MODEL_ENDPOINT", # Example - hhttps://cohere-embed-v3-multilingual-xyz.eastus.models.ai.azure.com/
base_url="AZURE_MODEL_ENDPOINT", # Example - hhttps://cohere-embed-v3-multilingual-xyz.eastus.models.ai.azure.com/
)

# For Rerank models
co_rerank = cohere.Client(
api_key="AZURE_INFERENCE_CREDENTIAL",
base_url="AZURE_MODEL_ENDPOINT", # Example - hhttps://cohere-rerank-v3-multilingual-xyz.eastus.models.ai.azure.com/
base_url="AZURE_MODEL_ENDPOINT", # Example - hhttps://cohere-rerank-v3-multilingual-xyz.eastus.models.ai.azure.com/
)
```

Expand Down Expand Up @@ -286,7 +292,10 @@ faqs_short = [
query = "Are there fitness-related perks?"

results = co_rerank.rerank(
query=query, documents=faqs_short, top_n=2, model="rerank-english-v3.0"
query=query,
documents=faqs_short,
top_n=2,
model="rerank-english-v3.0",
)
```

Expand Down
9 changes: 4 additions & 5 deletions fern/pages/deployment-options/cohere-works-everywhere.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -192,8 +192,7 @@ const cohere = new CohereClient({
```python PYTHON
import cohere

co = cohere.Client(api_key="",
base_url="<YOUR_DEPLOYMENT_URL>")
co = cohere.Client(api_key="", base_url="<YOUR_DEPLOYMENT_URL>")

response = co.chat(
chat_history=[
Expand Down Expand Up @@ -320,7 +319,7 @@ co = cohere.BedrockClient(
)

response = co.chat(
model="cohere.command-r-plus-v1:0",
model="cohere.command-r-plus-v1:0",
chat_history=[
{"role": "USER", "message": "Who discovered gravity?"},
{
Expand Down Expand Up @@ -513,8 +512,8 @@ const cohere = new CohereClient({
import cohere

co = cohere.Client(
api_key="<azure token>",
base_url="https://Cohere-command-r-plus-phulf-serverless.eastus2.inference.ai.azure.com/v1",
api_key="<azure token>",
base_url="https://Cohere-command-r-plus-phulf-serverless.eastus2.inference.ai.azure.com/v1",
)

response = co.chat(
Expand Down
Loading

0 comments on commit 847b2ef

Please sign in to comment.