From 31747f6be071f2d8300791383a2da4f740b53de4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Istv=C3=A1n=20Zolt=C3=A1n=20Szab=C3=B3?=
 <istvan.szabo@elastic.co>
Date: Mon, 28 Oct 2024 14:31:42 +0100
Subject: [PATCH] =?UTF-8?q?Revert=20"[DOCS]=20Documents=20that=20ELSER=20i?=
 =?UTF-8?q?s=20the=20default=20service=20for=20`semantic=5Ftext=E2=80=A6"?=
 =?UTF-8?q?=20(#115748)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This reverts commit 541bcf30e5d03944cace8deec24559fc63c8bcb5.
---
 .../mapping/types/semantic-text.asciidoc      | 26 +--------
 .../semantic-search-semantic-text.asciidoc    | 57 ++++++++++++++++---
 2 files changed, 50 insertions(+), 33 deletions(-)

diff --git a/docs/reference/mapping/types/semantic-text.asciidoc b/docs/reference/mapping/types/semantic-text.asciidoc
index 893e2c6cff8ed..ac23c153e01a3 100644
--- a/docs/reference/mapping/types/semantic-text.asciidoc
+++ b/docs/reference/mapping/types/semantic-text.asciidoc
@@ -13,47 +13,25 @@ Long passages are <<auto-text-chunking, automatically chunked>> to smaller secti
 The `semantic_text` field type specifies an inference endpoint identifier that will be used to generate embeddings.
 You can create the inference endpoint by using the <<put-inference-api>>.
 This field type and the <<query-dsl-semantic-query,`semantic` query>> type make it simpler to perform semantic search on your data.
-If you don't specify an inference endpoint, the <<infer-service-elser,ELSER service>> is used by default.
 
 Using `semantic_text`, you won't need to specify how to generate embeddings for your data, or how to index it.
 The {infer} endpoint automatically determines the embedding generation, indexing, and query to use.
 
-If you use the ELSER service, you can set up `semantic_text` with the following API request:
-
 [source,console]
 ------------------------------------------------------------
 PUT my-index-000001
-{
-  "mappings": {
-    "properties": {
-      "inference_field": {
-        "type": "semantic_text"
-      }
-    }
-  }
-}
-------------------------------------------------------------
-
-NOTE: In Serverless, you must create an {infer} endpoint using the <<put-inference-api>> and reference it when setting up `semantic_text` even if you use the ELSER service. 
-
-If you use a service other than ELSER, you must create an {infer} endpoint using the <<put-inference-api>> and reference it when setting up `semantic_text` as the following example demonstrates:
-
-[source,console]
-------------------------------------------------------------
-PUT my-index-000002
 {
   "mappings": {
     "properties": {
       "inference_field": {
         "type": "semantic_text",
-        "inference_id": "my-openai-endpoint" <1>
+        "inference_id": "my-elser-endpoint"
       }
     }
   }
 }
 ------------------------------------------------------------
 // TEST[skip:Requires inference endpoint]
-<1> The `inference_id` of the {infer} endpoint to use to generate embeddings.
 
 
 The recommended way to use semantic_text is by having dedicated {infer} endpoints for ingestion and search.
@@ -62,7 +40,7 @@ After creating dedicated {infer} endpoints for both, you can reference them usin
 
 [source,console]
 ------------------------------------------------------------
-PUT my-index-000003
+PUT my-index-000002
 {
   "mappings": {
     "properties": {
diff --git a/docs/reference/search/search-your-data/semantic-search-semantic-text.asciidoc b/docs/reference/search/search-your-data/semantic-search-semantic-text.asciidoc
index 45546d20c3327..6c3f3b2128740 100644
--- a/docs/reference/search/search-your-data/semantic-search-semantic-text.asciidoc
+++ b/docs/reference/search/search-your-data/semantic-search-semantic-text.asciidoc
@@ -21,11 +21,45 @@ This tutorial uses the <<inference-example-elser,`elser` service>> for demonstra
 [[semantic-text-requirements]]
 ==== Requirements
 
-This tutorial uses the <<infer-service-elser,ELSER service>> for demonstration, which is created automatically as needed. 
-To use the `semantic_text` field type with an {infer} service other than ELSER, you must create an inference endpoint using the <<put-inference-api>>.
+To use the `semantic_text` field type, you must have an {infer} endpoint deployed in
+your cluster using the <<put-inference-api>>.
 
-NOTE: In Serverless, you must create an {infer} endpoint using the <<put-inference-api>> and reference it when setting up `semantic_text` even if you use the ELSER service. 
+[discrete]
+[[semantic-text-infer-endpoint]]
+==== Create the {infer} endpoint
+
+Create an inference endpoint by using the <<put-inference-api>>:
 
+[source,console]
+------------------------------------------------------------
+PUT _inference/sparse_embedding/my-elser-endpoint <1>
+{
+  "service": "elser", <2>
+  "service_settings": {
+    "adaptive_allocations": { <3>
+      "enabled": true,
+      "min_number_of_allocations": 3,
+      "max_number_of_allocations": 10
+    },
+    "num_threads": 1
+  }
+}
+------------------------------------------------------------
+// TEST[skip:TBD]
+<1> The task type is `sparse_embedding` in the path as the `elser` service will
+be used and ELSER creates sparse vectors. The `inference_id` is
+`my-elser-endpoint`.
+<2> The `elser` service is used in this example.
+<3> This setting enables and configures {ml-docs}/ml-nlp-auto-scale.html#nlp-model-adaptive-allocations[adaptive allocations].
+Adaptive allocations make it possible for ELSER to automatically scale up or down resources based on the current load on the process.
+
+[NOTE]
+====
+You might see a 502 bad gateway error in the response when using the {kib} Console.
+This error usually just reflects a timeout, while the model downloads in the background.
+You can check the download progress in the {ml-app} UI.
+If using the Python client, you can set the `timeout` parameter to a higher value.
+====
 
 [discrete]
 [[semantic-text-index-mapping]]
@@ -41,7 +75,8 @@ PUT semantic-embeddings
   "mappings": {
     "properties": {
       "content": { <1>
-        "type": "semantic_text" <2>
+        "type": "semantic_text", <2>
+        "inference_id": "my-elser-endpoint" <3>
       }
     }
   }
@@ -50,15 +85,19 @@ PUT semantic-embeddings
 // TEST[skip:TBD]
 <1> The name of the field to contain the generated embeddings.
 <2> The field to contain the embeddings is a `semantic_text` field.
-Since no `inference_id` is provided, the <<infer-service-elser,ELSER service>> is used by default.
-To use a different {infer} service, you must create an {infer} endpoint first using the <<put-inference-api>> and then specify it in the `semantic_text` field mapping using the `inference_id` parameter.
+<3> The `inference_id` is the inference endpoint you created in the previous step.
+It will be used to generate the embeddings based on the input text.
+Every time you ingest data into the related `semantic_text` field, this endpoint will be used for creating the vector representation of the text.
 
 
 [NOTE]
 ====
-If you're using web crawlers or connectors to generate indices, you have to <<indices-put-mapping,update the index mappings>> for these indices to include the `semantic_text` field.
-Once the mapping is updated, you'll need to run a full web crawl or a full connector sync.
-This ensures that all existing documents are reprocessed and updated with the new semantic embeddings, enabling semantic search on the updated data.
+If you're using web crawlers or connectors to generate indices, you have to
+<<indices-put-mapping,update the index mappings>> for these indices to
+include the `semantic_text` field. Once the mapping is updated, you'll need to run
+a full web crawl or a full connector sync. This ensures that all existing
+documents are reprocessed and updated with the new semantic embeddings,
+enabling semantic search on the updated data.
 ====