Apply formatting by black to text-embedding pages (#343)

Co-authored-by: Max Shkutnyk <max@lightsonsoftware.com>
cohere-ai · Jan 13, 2025 · 8b2ded7 · 8b2ded7
1 parent 5f21680
commit 8b2ded7
Show file tree

Hide file tree

Showing 8 changed files with 223 additions and 176 deletions.
diff --git a/fern/pages/v2/text-embeddings/embed-jobs-api.mdx b/fern/pages/v2/text-embeddings/embed-jobs-api.mdx
@@ -116,17 +116,23 @@ Since we’d like to search over these embeddings and we can think of them as co
 The output of embed jobs is a dataset object which you can download or pipe directly to a database of your choice:
 
 ```python PYTHON
-output_dataset_response = co.datasets.get(id=embed_job.output_dataset_id)
+output_dataset_response = co.datasets.get(
+    id=embed_job.output_dataset_id
+)
 output_dataset = output_dataset_response.dataset
 co.utils.save_dataset(
-    dataset=output_dataset, filepath="/content/embed_job_output.csv", format="csv"
+    dataset=output_dataset,
+    filepath="/content/embed_job_output.csv",
+    format="csv",
 )
 ```
 
 Alternatively if you would like to pass the dataset into a downstream function you can do the following:
 
 ```python PYTHON
-output_dataset_response = co.datasets.get(id=embed_job.output_dataset_id)
+output_dataset_response = co.datasets.get(
+    id=embed_job.output_dataset_id
+)
 output_dataset = output_dataset_response.dataset
 results = []
 for record in output_dataset:

diff --git a/fern/pages/v2/text-embeddings/embeddings.mdx b/fern/pages/v2/text-embeddings/embeddings.mdx
@@ -28,22 +28,26 @@ co = cohere.ClientV2(api_key="YOUR_API_KEY")
 # get the embeddings
 phrases = ["i love soup", "soup is my favorite", "london is far away"]
 
-model="embed-english-v3.0"
-input_type="search_query"
+model = "embed-english-v3.0"
+input_type = "search_query"
 
-res = co.embed(texts=phrases,
-                model=model,
-                input_type=input_type,
-                embedding_types=['float'])
+res = co.embed(
+    texts=phrases,
+    model=model,
+    input_type=input_type,
+    embedding_types=["float"],
+)
 
 (soup1, soup2, london) = res.embeddings.float
 
+
 # compare them
 def calculate_similarity(a, b):
-  return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))
+    return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))
+
 
-calculate_similarity(soup1, soup2) # 0.85 - very similar!
-calculate_similarity(soup1, london) # 0.16 - not similar!
+calculate_similarity(soup1, soup2)  # 0.85 - very similar!
+calculate_similarity(soup1, london)  # 0.16 - not similar!
 ```
 
 ## The `input_type` parameter
@@ -60,24 +64,31 @@ Cohere embeddings are optimized for different types of inputs.
 In addition to `embed-english-v3.0` we offer a best-in-class multilingual model [embed-multilingual-v3.0](/docs/embed-2#multi-lingual-models)  with support for over 100 languages, including Chinese, Spanish, and French. This model can be used with the Embed API, just like its English counterpart:
 
 ```python PYTHON
-import cohere  
+import cohere
+
 co = cohere.ClientV2(api_key="<YOUR API KEY>")
 
-texts = [  
-   'Hello from Cohere!', 'مرحبًا من كوهير!', 'Hallo von Cohere!',  
-   'Bonjour de Cohere!', '¡Hola desde Cohere!', 'Olá do Cohere!',  
-   'Ciao da Cohere!', '您好，来自 Cohere！', 'कोहेरे से नमस्ते!'  
-]  
+texts = [
+    "Hello from Cohere!",
+    "مرحبًا من كوهير!",
+    "Hallo von Cohere!",
+    "Bonjour de Cohere!",
+    "¡Hola desde Cohere!",
+    "Olá do Cohere!",
+    "Ciao da Cohere!",
+    "您好，来自 Cohere！",
+    "कोहेरे से नमस्ते!",
+]
 
 response = co.embed(
-  model='embed-multilingual-v3.0',
-  texts=texts, 
-  input_type='classification',
-  embedding_types=['float']) 
-
-embeddings = response.embeddings.float # All text embeddings 
-print(embeddings[0][:5]) # Print embeddings for the first text
-
+    model="embed-multilingual-v3.0",
+    texts=texts,
+    input_type="classification",
+    embedding_types=["float"],
+)
+
+embeddings = response.embeddings.float  # All text embeddings
+print(embeddings[0][:5])  # Print embeddings for the first text
 ```
 
 ## Image Embeddings
@@ -98,12 +109,13 @@ Be aware that image embedding has the following restrictions:
 import cohere
 from PIL import Image
 from io import BytesIO
-import base64 
-  
+import base64
+
 co = cohere.ClientV2(api_key="<YOUR API KEY>")
 
 # The model accepts input in base64 as a Data URL
 
+
 def image_to_base64_data_url(image_path):
     # Open the image file
     with Image.open(image_path) as img:
@@ -112,18 +124,23 @@ def image_to_base64_data_url(image_path):
         # Save the image as PNG to the BytesIO object
         img.save(buffered, format="PNG")
         # Encode the image data in base64
-        img_base64 = base64.b64encode(buffered.getvalue()).decode("utf-8")
-
+        img_base64 = base64.b64encode(buffered.getvalue()).decode(
+            "utf-8"
+        )
+
     # Create the Data URL and assumes the original image file type was png
     data_url = f"data:image/png;base64,{img_base64}"
     return data_url
-
+
+
 processed_image = image_to_base64_data_url("<PATH_TO_IMAGE>")
-
-ret = co.embed(images=[processed_image],
-               model='embed-english-v3.0',
-               embedding_types= ["float"],
-               input_type='image')
+
+ret = co.embed(
+    images=[processed_image],
+    model="embed-english-v3.0",
+    embedding_types=["float"],
+    input_type="image",
+)
 
 ret.embeddings.float
 ```
@@ -145,25 +162,29 @@ The following embedding types are supported:
  To specify an embedding types, pass one of the types from the list above in as list containing a string:
 
 ```python PYTHON
-ret = co.embed(texts=phrases,
-               model=model,
-               input_type=input_type,
-               embedding_types= ["float"])
-
-ret.embeddings # This contains the float embeddings
+ret = co.embed(
+    texts=phrases,
+    model=model,
+    input_type=input_type,
+    embedding_types=["float"],
+)
+
+ret.embeddings  # This contains the float embeddings
 ```
 
 You can specify multiple embedding types in a single call. For example, the following call will return both `int8` and `float` embeddings:
 
 ```python PYTHON
-ret = co.embed(texts=phrases,
-               model=model,
-               input_type=input_type,
-               embedding_types=["int8", "float"])
-
-ret.embeddings.int8 # This contains your int8 embeddings
-ret.embeddings.float # This contains your float embeddings
-ret.embeddings.uint8 # This will be empty
-ret.embeddings.ubinary # This will be empty
-ret.embeddings.binary # This will be empty
+ret = co.embed(
+    texts=phrases,
+    model=model,
+    input_type=input_type,
+    embedding_types=["int8", "float"],
+)
+
+ret.embeddings.int8  # This contains your int8 embeddings
+ret.embeddings.float  # This contains your float embeddings
+ret.embeddings.uint8  # This will be empty
+ret.embeddings.ubinary  # This will be empty
+ret.embeddings.binary  # This will be empty
 ```
diff --git a/fern/pages/v2/text-embeddings/multimodal-embeddings.mdx b/fern/pages/v2/text-embeddings/multimodal-embeddings.mdx
@@ -35,30 +35,33 @@ The Embed API takes in images with the following file formats: `png`, `jpeg`,`We
 import os
 import base64
 
+
 # Defining the function to convert an image to a base 64 Data URL
 def image_to_base64_data_url(image_path):
-  _, file_extension = os.path.splitext(image_path)
-  file_type=(file_extension[1:])
-
-  with open(image_path, "rb") as f:
-    enc_img = base64.b64encode(f.read()).decode('utf-8')
-    enc_img = f"data:image/{file_type};base64,{enc_img}"
-  return enc_img
-
-image_path='<YOUR IMAGE PATH>'
-processed_image=image_to_base64_data_url(image_path)
+    _, file_extension = os.path.splitext(image_path)
+    file_type = file_extension[1:]
+
+    with open(image_path, "rb") as f:
+        enc_img = base64.b64encode(f.read()).decode("utf-8")
+        enc_img = f"data:image/{file_type};base64,{enc_img}"
+    return enc_img
+
+
+image_path = "<YOUR IMAGE PATH>"
+processed_image = image_to_base64_data_url(image_path)
 ```
 #### 2\. Call the Embed Endpoint
 ```python PYTHON
 # Import the necessary packages
 import cohere
+
 co = cohere.ClientV2(api_key="<YOUR API KEY>")
 
 co.embed(
-    model='embed-english-v3.0',
+    model="embed-english-v3.0",
     images=[processed_image],
-    input_type='image',
-    embedding_types=['float']
+    input_type="image",
+    embedding_types=["float"],
 )
 ```
 ## Sample Output

diff --git a/fern/pages/v2/text-embeddings/reranking/overview.mdx b/fern/pages/v2/text-embeddings/reranking/overview.mdx
@@ -27,22 +27,21 @@ In this example, the documents being passed in are a list of strings:
 
 ```python PYTHON
 import cohere
+
 co = cohere.ClientV2()
 
 query = "What is the capital of the United States?"
 docs = [
-  "Carson City is the capital city of the American state of Nevada. At the 2010 United States Census, Carson City had a population of 55,274.",
-  "The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean that are a political division controlled by the United States. Its capital is Saipan.",
-  "Charlotte Amalie is the capital and largest city of the United States Virgin Islands. It has about 20,000 people. The city is on the island of Saint Thomas.",
-  "Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States. It is a federal district. The President of the USA and many major national government offices are in the territory. This makes it the political center of the United States of America.",
-  "Capital punishment has existed in the United States since before the United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states. The federal government (including the United States military) also uses capital punishment."
+    "Carson City is the capital city of the American state of Nevada. At the 2010 United States Census, Carson City had a population of 55,274.",
+    "The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean that are a political division controlled by the United States. Its capital is Saipan.",
+    "Charlotte Amalie is the capital and largest city of the United States Virgin Islands. It has about 20,000 people. The city is on the island of Saint Thomas.",
+    "Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States. It is a federal district. The President of the USA and many major national government offices are in the territory. This makes it the political center of the United States of America.",
+    "Capital punishment has existed in the United States since before the United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states. The federal government (including the United States military) also uses capital punishment.",
 ]
 
 results = co.rerank(
-	model="rerank-v3.5",
-	query=query,
-	documents=docs,
-	top_n=5)
+    model="rerank-v3.5", query=query, documents=docs, top_n=5
+)
 ```
 
 **Response**
@@ -97,35 +96,32 @@ co = cohere.ClientV2()
 
 query = "What is the capital of the United States?"
 docs = [
-  {
-	"Title": "Facts about Carson City",
-	"Content": "Carson City is the capital city of the American state of Nevada. At the 2010 United States Census, Carson City had a population of 55,274."
-  },
-  {
-    "Title": "The Commonwealth of Northern Mariana Islands",
-	"Content": "The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean that are a political division controlled by the United States. Its capital is Saipan."
-  },
-  {
-	"Title": "The Capital of United States Virgin Islands",
-	"Content": "Charlotte Amalie is the capital and largest city of the United States Virgin Islands. It has about 20,000 people. The city is on the island of Saint Thomas."
-  },
-  {
-	"Title": "Washington D.C.",
-	"Content":"Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States. It is a federal district. The President of the USA and many major national government offices are in the territory. This makes it the political center of the United States of America."
-  },
-  {
-	"Title": "Capital Punishment in the US",
-	"Content": "Capital punishment has existed in the United States since before the United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states. The federal government (including the United States military) also uses capital punishment."
-  }
+    {
+        "Title": "Facts about Carson City",
+        "Content": "Carson City is the capital city of the American state of Nevada. At the 2010 United States Census, Carson City had a population of 55,274.",
+    },
+    {
+        "Title": "The Commonwealth of Northern Mariana Islands",
+        "Content": "The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean that are a political division controlled by the United States. Its capital is Saipan.",
+    },
+    {
+        "Title": "The Capital of United States Virgin Islands",
+        "Content": "Charlotte Amalie is the capital and largest city of the United States Virgin Islands. It has about 20,000 people. The city is on the island of Saint Thomas.",
+    },
+    {
+        "Title": "Washington D.C.",
+        "Content": "Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States. It is a federal district. The President of the USA and many major national government offices are in the territory. This makes it the political center of the United States of America.",
+    },
+    {
+        "Title": "Capital Punishment in the US",
+        "Content": "Capital punishment has existed in the United States since before the United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states. The federal government (including the United States military) also uses capital punishment.",
+    },
 ]
 
-yaml_docs = [yaml.dump(doc, sort_keys=False) for doc in docs] 
+yaml_docs = [yaml.dump(doc, sort_keys=False) for doc in docs]
 
 results = co.rerank(
-	model="rerank-v3.5",
-	query=query,
-	documents=yaml_docs,
-	top_n=5
+    model="rerank-v3.5", query=query, documents=yaml_docs, top_n=5
 )
 ```