diff --git a/docs/changelog/117176.yaml b/docs/changelog/117176.yaml new file mode 100644 index 0000000000000..26e0d3635bc9e --- /dev/null +++ b/docs/changelog/117176.yaml @@ -0,0 +1,5 @@ +pr: 117176 +summary: Integrate IBM watsonx to Inference API for re-ranking task +area: Experiences +type: enhancement +issues: [] diff --git a/server/src/main/java/org/elasticsearch/TransportVersions.java b/server/src/main/java/org/elasticsearch/TransportVersions.java index 603fc6a32f078..8b25a6d1106ec 100644 --- a/server/src/main/java/org/elasticsearch/TransportVersions.java +++ b/server/src/main/java/org/elasticsearch/TransportVersions.java @@ -177,6 +177,7 @@ static TransportVersion def(int id) { public static final TransportVersion LINEAR_RETRIEVER_SUPPORT = def(8_837_00_0); public static final TransportVersion TIMEOUT_GET_PARAM_FOR_RESOLVE_CLUSTER = def(8_838_00_0); public static final TransportVersion INFERENCE_REQUEST_ADAPTIVE_RATE_LIMITING = def(8_839_00_0); + public static final TransportVersion ML_INFERENCE_IBM_WATSONX_RERANK_ADDED = def(8_840_00_0); /* * STOP! READ THIS FIRST! No, really, diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceNamedWriteablesProvider.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceNamedWriteablesProvider.java index 6fc9870034018..e8dc763116707 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceNamedWriteablesProvider.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceNamedWriteablesProvider.java @@ -75,6 +75,8 @@ import org.elasticsearch.xpack.inference.services.huggingface.HuggingFaceServiceSettings; import org.elasticsearch.xpack.inference.services.huggingface.elser.HuggingFaceElserServiceSettings; import org.elasticsearch.xpack.inference.services.ibmwatsonx.embeddings.IbmWatsonxEmbeddingsServiceSettings; +import org.elasticsearch.xpack.inference.services.ibmwatsonx.rerank.IbmWatsonxRerankServiceSettings; +import org.elasticsearch.xpack.inference.services.ibmwatsonx.rerank.IbmWatsonxRerankTaskSettings; import org.elasticsearch.xpack.inference.services.jinaai.JinaAIServiceSettings; import org.elasticsearch.xpack.inference.services.jinaai.embeddings.JinaAIEmbeddingsServiceSettings; import org.elasticsearch.xpack.inference.services.jinaai.embeddings.JinaAIEmbeddingsTaskSettings; @@ -364,6 +366,17 @@ private static void addIbmWatsonxNamedWritables(List namedWriteables) { diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/action/ibmwatsonx/IbmWatsonxActionCreator.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/action/ibmwatsonx/IbmWatsonxActionCreator.java index 7cad7c42bdcf1..6b1097256e97f 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/action/ibmwatsonx/IbmWatsonxActionCreator.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/action/ibmwatsonx/IbmWatsonxActionCreator.java @@ -12,9 +12,11 @@ import org.elasticsearch.xpack.inference.external.action.ExecutableAction; import org.elasticsearch.xpack.inference.external.action.SenderExecutableAction; import org.elasticsearch.xpack.inference.external.http.sender.IbmWatsonxEmbeddingsRequestManager; +import org.elasticsearch.xpack.inference.external.http.sender.IbmWatsonxRerankRequestManager; import org.elasticsearch.xpack.inference.external.http.sender.Sender; import org.elasticsearch.xpack.inference.services.ServiceComponents; import org.elasticsearch.xpack.inference.services.ibmwatsonx.embeddings.IbmWatsonxEmbeddingsModel; +import org.elasticsearch.xpack.inference.services.ibmwatsonx.rerank.IbmWatsonxRerankModel; import java.util.Map; import java.util.Objects; @@ -22,7 +24,6 @@ import static org.elasticsearch.xpack.inference.external.action.ActionUtils.constructFailedToSendRequestMessage; public class IbmWatsonxActionCreator implements IbmWatsonxActionVisitor { - private final Sender sender; private final ServiceComponents serviceComponents; @@ -41,6 +42,17 @@ public ExecutableAction create(IbmWatsonxEmbeddingsModel model, Map taskSettings) { + var overriddenModel = IbmWatsonxRerankModel.of(model, taskSettings); + var requestCreator = IbmWatsonxRerankRequestManager.of(overriddenModel, serviceComponents.threadPool()); + var failedToSendRequestErrorMessage = constructFailedToSendRequestMessage( + overriddenModel.getServiceSettings().uri(), + "Ibm Watsonx rerank" + ); + return new SenderExecutableAction(sender, requestCreator, failedToSendRequestErrorMessage); + } + protected IbmWatsonxEmbeddingsRequestManager getEmbeddingsRequestManager( IbmWatsonxEmbeddingsModel model, Truncator truncator, diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/action/ibmwatsonx/IbmWatsonxActionVisitor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/action/ibmwatsonx/IbmWatsonxActionVisitor.java index 0a13ec2fb4645..474533040e0c3 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/action/ibmwatsonx/IbmWatsonxActionVisitor.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/action/ibmwatsonx/IbmWatsonxActionVisitor.java @@ -9,9 +9,12 @@ import org.elasticsearch.xpack.inference.external.action.ExecutableAction; import org.elasticsearch.xpack.inference.services.ibmwatsonx.embeddings.IbmWatsonxEmbeddingsModel; +import org.elasticsearch.xpack.inference.services.ibmwatsonx.rerank.IbmWatsonxRerankModel; import java.util.Map; public interface IbmWatsonxActionVisitor { ExecutableAction create(IbmWatsonxEmbeddingsModel model, Map taskSettings); + + ExecutableAction create(IbmWatsonxRerankModel model, Map taskSettings); } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/http/sender/IbmWatsonxRerankRequestManager.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/http/sender/IbmWatsonxRerankRequestManager.java new file mode 100644 index 0000000000000..f503771510e72 --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/http/sender/IbmWatsonxRerankRequestManager.java @@ -0,0 +1,72 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.external.http.sender; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.inference.InferenceServiceResults; +import org.elasticsearch.threadpool.ThreadPool; +import org.elasticsearch.xpack.inference.external.http.retry.RequestSender; +import org.elasticsearch.xpack.inference.external.http.retry.ResponseHandler; +import org.elasticsearch.xpack.inference.external.ibmwatsonx.IbmWatsonxResponseHandler; +import org.elasticsearch.xpack.inference.external.request.ibmwatsonx.IbmWatsonxRerankRequest; +import org.elasticsearch.xpack.inference.external.response.ibmwatsonx.IbmWatsonxRankedResponseEntity; +import org.elasticsearch.xpack.inference.services.ibmwatsonx.rerank.IbmWatsonxRerankModel; + +import java.util.List; +import java.util.Objects; +import java.util.function.Supplier; + +public class IbmWatsonxRerankRequestManager extends IbmWatsonxRequestManager { + private static final Logger logger = LogManager.getLogger(IbmWatsonxRerankRequestManager.class); + private static final ResponseHandler HANDLER = createIbmWatsonxResponseHandler(); + + private static ResponseHandler createIbmWatsonxResponseHandler() { + return new IbmWatsonxResponseHandler( + "ibm watsonx rerank", + (request, response) -> IbmWatsonxRankedResponseEntity.fromResponse(response) + ); + } + + public static IbmWatsonxRerankRequestManager of(IbmWatsonxRerankModel model, ThreadPool threadPool) { + return new IbmWatsonxRerankRequestManager(Objects.requireNonNull(model), Objects.requireNonNull(threadPool)); + } + + private final IbmWatsonxRerankModel model; + + public IbmWatsonxRerankRequestManager(IbmWatsonxRerankModel model, ThreadPool threadPool) { + super(threadPool, model); + this.model = model; + } + + @Override + public void execute( + InferenceInputs inferenceInputs, + RequestSender requestSender, + Supplier hasRequestCompletedFunction, + ActionListener listener + ) { + var rerankInput = QueryAndDocsInputs.of(inferenceInputs); + + execute( + new ExecutableInferenceRequest( + requestSender, + logger, + getRerankRequest(rerankInput.getQuery(), rerankInput.getChunks(), model), + HANDLER, + hasRequestCompletedFunction, + listener + ) + ); + } + + protected IbmWatsonxRerankRequest getRerankRequest(String query, List chunks, IbmWatsonxRerankModel model) { + return new IbmWatsonxRerankRequest(query, chunks, model); + } +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/ibmwatsonx/IbmWatsonxResponseHandler.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/ibmwatsonx/IbmWatsonxResponseHandler.java index 019d72a381c27..f5512c311c5d6 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/ibmwatsonx/IbmWatsonxResponseHandler.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/ibmwatsonx/IbmWatsonxResponseHandler.java @@ -17,7 +17,6 @@ import static org.elasticsearch.core.Strings.format; public class IbmWatsonxResponseHandler extends BaseResponseHandler { - public IbmWatsonxResponseHandler(String requestType, ResponseParser parseFunction) { super(requestType, parseFunction, IbmWatsonxErrorResponseEntity::fromResponse); } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/request/ibmwatsonx/IbmWatsonxRerankRequest.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/request/ibmwatsonx/IbmWatsonxRerankRequest.java new file mode 100644 index 0000000000000..cfc1f367be45c --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/request/ibmwatsonx/IbmWatsonxRerankRequest.java @@ -0,0 +1,110 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.external.request.ibmwatsonx; + +import org.apache.http.HttpHeaders; +import org.apache.http.client.methods.HttpPost; +import org.apache.http.entity.ByteArrayEntity; +import org.elasticsearch.common.Strings; +import org.elasticsearch.xcontent.XContentType; +import org.elasticsearch.xpack.inference.external.request.HttpRequest; +import org.elasticsearch.xpack.inference.external.request.Request; +import org.elasticsearch.xpack.inference.services.ibmwatsonx.rerank.IbmWatsonxRerankModel; +import org.elasticsearch.xpack.inference.services.ibmwatsonx.rerank.IbmWatsonxRerankTaskSettings; + +import java.net.URI; +import java.net.URISyntaxException; +import java.nio.charset.StandardCharsets; +import java.util.List; +import java.util.Objects; + +public class IbmWatsonxRerankRequest implements IbmWatsonxRequest { + + private final String query; + private final List input; + private final IbmWatsonxRerankTaskSettings taskSettings; + private final IbmWatsonxRerankModel model; + + public IbmWatsonxRerankRequest(String query, List input, IbmWatsonxRerankModel model) { + Objects.requireNonNull(model); + + this.input = Objects.requireNonNull(input); + this.query = Objects.requireNonNull(query); + taskSettings = model.getTaskSettings(); + this.model = model; + } + + @Override + public HttpRequest createHttpRequest() { + URI uri; + + try { + uri = new URI(model.uri().toString()); + } catch (URISyntaxException ex) { + throw new IllegalArgumentException("cannot parse URI patter"); + } + + HttpPost httpPost = new HttpPost(uri); + + ByteArrayEntity byteEntity = new ByteArrayEntity( + Strings.toString( + new IbmWatsonxRerankRequestEntity( + query, + input, + taskSettings, + model.getServiceSettings().modelId(), + model.getServiceSettings().projectId() + ) + ).getBytes(StandardCharsets.UTF_8) + ); + + httpPost.setEntity(byteEntity); + httpPost.setHeader(HttpHeaders.CONTENT_TYPE, XContentType.JSON.mediaType()); + + decorateWithAuth(httpPost); + + return new HttpRequest(httpPost, getInferenceEntityId()); + } + + public void decorateWithAuth(HttpPost httpPost) { + IbmWatsonxRequest.decorateWithBearerToken(httpPost, model.getSecretSettings(), model.getInferenceEntityId()); + } + + @Override + public String getInferenceEntityId() { + return model.getInferenceEntityId(); + } + + @Override + public URI getURI() { + return model.uri(); + } + + @Override + public Request truncate() { + return this; + } + + public String getQuery() { + return query; + } + + public List getInput() { + return input; + } + + public IbmWatsonxRerankModel getModel() { + return model; + } + + @Override + public boolean[] getTruncationInfo() { + return null; + } + +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/request/ibmwatsonx/IbmWatsonxRerankRequestEntity.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/request/ibmwatsonx/IbmWatsonxRerankRequestEntity.java new file mode 100644 index 0000000000000..36e5951ebdc15 --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/request/ibmwatsonx/IbmWatsonxRerankRequestEntity.java @@ -0,0 +1,77 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.external.request.ibmwatsonx; + +import org.elasticsearch.xcontent.ToXContentObject; +import org.elasticsearch.xcontent.XContentBuilder; +import org.elasticsearch.xpack.inference.services.ibmwatsonx.rerank.IbmWatsonxRerankTaskSettings; + +import java.io.IOException; +import java.util.List; +import java.util.Objects; + +public record IbmWatsonxRerankRequestEntity( + String query, + List inputs, + IbmWatsonxRerankTaskSettings taskSettings, + String modelId, + String projectId +) implements ToXContentObject { + + private static final String INPUTS_FIELD = "inputs"; + private static final String QUERY_FIELD = "query"; + private static final String MODEL_ID_FIELD = "model_id"; + private static final String PROJECT_ID_FIELD = "project_id"; + + public IbmWatsonxRerankRequestEntity { + Objects.requireNonNull(query); + Objects.requireNonNull(inputs); + Objects.requireNonNull(modelId); + Objects.requireNonNull(projectId); + Objects.requireNonNull(taskSettings); + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + + builder.field(MODEL_ID_FIELD, modelId); + builder.field(QUERY_FIELD, query); + builder.startArray(INPUTS_FIELD); + for (String input : inputs) { + builder.startObject(); + builder.field("text", input); + builder.endObject(); + } + builder.endArray(); + builder.field(PROJECT_ID_FIELD, projectId); + + builder.startObject("parameters"); + { + if (taskSettings.getTruncateInputTokens() != null) { + builder.field("truncate_input_tokens", taskSettings.getTruncateInputTokens()); + } + + builder.startObject("return_options"); + { + if (taskSettings.getDoesReturnDocuments() != null) { + builder.field("inputs", taskSettings.getDoesReturnDocuments()); + } + if (taskSettings.getTopNDocumentsOnly() != null) { + builder.field("top_n", taskSettings.getTopNDocumentsOnly()); + } + } + builder.endObject(); + } + builder.endObject(); + + builder.endObject(); + + return builder; + } +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/request/ibmwatsonx/IbmWatsonxUtils.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/request/ibmwatsonx/IbmWatsonxUtils.java index a506a33385dfb..91679288e5ae3 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/request/ibmwatsonx/IbmWatsonxUtils.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/request/ibmwatsonx/IbmWatsonxUtils.java @@ -13,6 +13,7 @@ public class IbmWatsonxUtils { public static final String V1 = "v1"; public static final String TEXT = "text"; public static final String EMBEDDINGS = "embeddings"; + public static final String RERANKS = "reranks"; private IbmWatsonxUtils() {} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/response/ibmwatsonx/IbmWatsonxRankedResponseEntity.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/response/ibmwatsonx/IbmWatsonxRankedResponseEntity.java new file mode 100644 index 0000000000000..05f369bd8961e --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/response/ibmwatsonx/IbmWatsonxRankedResponseEntity.java @@ -0,0 +1,157 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + * + * this file was contributed to by a generative AI + */ + +package org.elasticsearch.xpack.inference.external.response.ibmwatsonx; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.elasticsearch.common.xcontent.LoggingDeprecationHandler; +import org.elasticsearch.inference.InferenceServiceResults; +import org.elasticsearch.xcontent.XContentFactory; +import org.elasticsearch.xcontent.XContentParser; +import org.elasticsearch.xcontent.XContentParserConfiguration; +import org.elasticsearch.xcontent.XContentType; +import org.elasticsearch.xpack.core.inference.results.RankedDocsResults; +import org.elasticsearch.xpack.inference.external.http.HttpResult; + +import java.io.IOException; + +import static org.elasticsearch.common.xcontent.XContentParserUtils.ensureExpectedToken; +import static org.elasticsearch.common.xcontent.XContentParserUtils.parseList; +import static org.elasticsearch.common.xcontent.XContentParserUtils.throwUnknownField; +import static org.elasticsearch.common.xcontent.XContentParserUtils.throwUnknownToken; +import static org.elasticsearch.xpack.inference.external.response.XContentUtils.moveToFirstToken; +import static org.elasticsearch.xpack.inference.external.response.XContentUtils.positionParserAtTokenAfterField; + +public class IbmWatsonxRankedResponseEntity { + + private static final Logger logger = LogManager.getLogger(IbmWatsonxRankedResponseEntity.class); + + /** + * Parses the Ibm Watsonx ranked response. + * + * For a request like: + * "model": "rerank-english-v2.0", + * "query": "database", + * "return_documents": true, + * "top_n": 3, + * "input": ["greenland", "google","john", "mysql","potter", "grammar"] + *

+ * The response will look like (without whitespace): + * { + * "rerank": [ + * { + * "index": 3, + * "relevance_score": 0.7989932 + * }, + * { + * "index": 5, + * "relevance_score": 0.61281824 + * }, + * { + * "index": 1, + * "relevance_score": 0.5762553 + * }, + * { + * "index": 4, + * "relevance_score": 0.47395563 + * }, + * { + * "index": 0, + * "relevance_score": 0.4338926 + * }, + * { + * "index": 2, + * "relevance_score": 0.42638257 + * } + * ], + * } + * + * @param response the http response from ibm watsonx + * @return the parsed response + * @throws IOException if there is an error parsing the response + */ + public static InferenceServiceResults fromResponse(HttpResult response) throws IOException { + var parserConfig = XContentParserConfiguration.EMPTY.withDeprecationHandler(LoggingDeprecationHandler.INSTANCE); + + try (XContentParser jsonParser = XContentFactory.xContent(XContentType.JSON).createParser(parserConfig, response.body())) { + moveToFirstToken(jsonParser); + + XContentParser.Token token = jsonParser.currentToken(); + ensureExpectedToken(XContentParser.Token.START_OBJECT, token, jsonParser); + + positionParserAtTokenAfterField(jsonParser, "results", FAILED_TO_FIND_FIELD_TEMPLATE); // TODO error message + + token = jsonParser.currentToken(); + if (token == XContentParser.Token.START_ARRAY) { + return new RankedDocsResults(parseList(jsonParser, IbmWatsonxRankedResponseEntity::parseRankedDocObject)); + } else { + throwUnknownToken(token, jsonParser); + } + + // This should never be reached. The above code should either return successfully or hit the throwUnknownToken + // or throw a parsing exception + throw new IllegalStateException("Reached an invalid state while parsing the Watsonx response"); + } + } + + private static RankedDocsResults.RankedDoc parseRankedDocObject(XContentParser parser) throws IOException { + ensureExpectedToken(XContentParser.Token.START_OBJECT, parser.currentToken(), parser); + int index = -1; + float score = -1; + String documentText = null; + parser.nextToken(); + while (parser.currentToken() != XContentParser.Token.END_OBJECT) { + if (parser.currentToken() == XContentParser.Token.FIELD_NAME) { + switch (parser.currentName()) { + case "index": + parser.nextToken(); // move to VALUE_NUMBER + index = parser.intValue(); + parser.nextToken(); // move to next FIELD_NAME or END_OBJECT + break; + case "score": + parser.nextToken(); // move to VALUE_NUMBER + score = parser.floatValue(); + parser.nextToken(); // move to next FIELD_NAME or END_OBJECT + break; + case "input": + parser.nextToken(); // move to START_OBJECT; document text is wrapped in an object + ensureExpectedToken(XContentParser.Token.START_OBJECT, parser.currentToken(), parser); + do { + if (parser.currentToken() == XContentParser.Token.FIELD_NAME && parser.currentName().equals("text")) { + parser.nextToken(); // move to VALUE_STRING + documentText = parser.text(); + } + } while (parser.nextToken() != XContentParser.Token.END_OBJECT); + parser.nextToken();// move past END_OBJECT + // parser should now be at the next FIELD_NAME or END_OBJECT + break; + default: + throwUnknownField(parser.currentName(), parser); + } + } else { + parser.nextToken(); + } + } + + if (index == -1) { + logger.warn("Failed to find required field [index] in Watsonx rerank response"); + } + if (score == -1) { + logger.warn("Failed to find required field [relevance_score] in Watsonx rerank response"); + } + // documentText may or may not be present depending on the request parameter + + return new RankedDocsResults.RankedDoc(index, score, documentText); + } + + private IbmWatsonxRankedResponseEntity() {} + + static String FAILED_TO_FIND_FIELD_TEMPLATE = "Failed to find required field [%s] in Watsonx rerank response"; +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/IbmWatsonxModel.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/IbmWatsonxModel.java index 4f0b425cdaa51..09706f70e3684 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/IbmWatsonxModel.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/IbmWatsonxModel.java @@ -12,6 +12,7 @@ import org.elasticsearch.inference.ModelConfigurations; import org.elasticsearch.inference.ModelSecrets; import org.elasticsearch.inference.ServiceSettings; +import org.elasticsearch.inference.TaskSettings; import org.elasticsearch.xpack.inference.external.action.ExecutableAction; import org.elasticsearch.xpack.inference.external.action.ibmwatsonx.IbmWatsonxActionVisitor; @@ -38,6 +39,12 @@ public IbmWatsonxModel(IbmWatsonxModel model, ServiceSettings serviceSettings) { rateLimitServiceSettings = model.rateLimitServiceSettings(); } + public IbmWatsonxModel(IbmWatsonxModel model, TaskSettings taskSettings) { + super(model, taskSettings); + + rateLimitServiceSettings = model.rateLimitServiceSettings(); + } + public abstract ExecutableAction accept(IbmWatsonxActionVisitor creator, Map taskSettings, InputType inputType); public IbmWatsonxRateLimitServiceSettings rateLimitServiceSettings() { diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/IbmWatsonxService.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/IbmWatsonxService.java index 477225f00d22b..3fa423c2dae19 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/IbmWatsonxService.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/IbmWatsonxService.java @@ -41,6 +41,7 @@ import org.elasticsearch.xpack.inference.services.ServiceUtils; import org.elasticsearch.xpack.inference.services.ibmwatsonx.embeddings.IbmWatsonxEmbeddingsModel; import org.elasticsearch.xpack.inference.services.ibmwatsonx.embeddings.IbmWatsonxEmbeddingsServiceSettings; +import org.elasticsearch.xpack.inference.services.ibmwatsonx.rerank.IbmWatsonxRerankModel; import org.elasticsearch.xpack.inference.services.validation.ModelValidatorBuilder; import java.util.EnumSet; @@ -138,6 +139,15 @@ private static IbmWatsonxModel createModel( secretSettings, context ); + case RERANK -> new IbmWatsonxRerankModel( + inferenceEntityId, + taskType, + NAME, + serviceSettings, + taskSettings, + secretSettings, + context + ); default -> throw new ElasticsearchStatusException(failureMessage, RestStatus.BAD_REQUEST); }; } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/rerank/IbmWatsonxRerankModel.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/rerank/IbmWatsonxRerankModel.java new file mode 100644 index 0000000000000..cb4c509d88c2b --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/rerank/IbmWatsonxRerankModel.java @@ -0,0 +1,121 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.services.ibmwatsonx.rerank; + +import org.apache.http.client.utils.URIBuilder; +import org.elasticsearch.core.Nullable; +import org.elasticsearch.inference.InputType; +import org.elasticsearch.inference.ModelConfigurations; +import org.elasticsearch.inference.ModelSecrets; +import org.elasticsearch.inference.TaskType; +import org.elasticsearch.xpack.inference.external.action.ExecutableAction; +import org.elasticsearch.xpack.inference.external.action.ibmwatsonx.IbmWatsonxActionVisitor; +import org.elasticsearch.xpack.inference.services.ConfigurationParseContext; +import org.elasticsearch.xpack.inference.services.ibmwatsonx.IbmWatsonxModel; +import org.elasticsearch.xpack.inference.services.settings.DefaultSecretSettings; + +import java.net.URI; +import java.net.URISyntaxException; +import java.util.Map; + +import static org.elasticsearch.xpack.inference.external.request.ibmwatsonx.IbmWatsonxUtils.ML; +import static org.elasticsearch.xpack.inference.external.request.ibmwatsonx.IbmWatsonxUtils.RERANKS; +import static org.elasticsearch.xpack.inference.external.request.ibmwatsonx.IbmWatsonxUtils.TEXT; +import static org.elasticsearch.xpack.inference.external.request.ibmwatsonx.IbmWatsonxUtils.V1; + +public class IbmWatsonxRerankModel extends IbmWatsonxModel { + public static IbmWatsonxRerankModel of(IbmWatsonxRerankModel model, Map taskSettings) { + var requestTaskSettings = IbmWatsonxRerankTaskSettings.fromMap(taskSettings); + return new IbmWatsonxRerankModel(model, IbmWatsonxRerankTaskSettings.of(model.getTaskSettings(), requestTaskSettings)); + } + + public IbmWatsonxRerankModel( + String modelId, + TaskType taskType, + String service, + Map serviceSettings, + Map taskSettings, + @Nullable Map secrets, + ConfigurationParseContext context + ) { + this( + modelId, + taskType, + service, + IbmWatsonxRerankServiceSettings.fromMap(serviceSettings, context), + IbmWatsonxRerankTaskSettings.fromMap(taskSettings), + DefaultSecretSettings.fromMap(secrets) + ); + } + + // should only be used for testing + IbmWatsonxRerankModel( + String modelId, + TaskType taskType, + String service, + IbmWatsonxRerankServiceSettings serviceSettings, + IbmWatsonxRerankTaskSettings taskSettings, + @Nullable DefaultSecretSettings secretSettings + ) { + super( + new ModelConfigurations(modelId, taskType, service, serviceSettings, taskSettings), + new ModelSecrets(secretSettings), + serviceSettings + ); + } + + private IbmWatsonxRerankModel(IbmWatsonxRerankModel model, IbmWatsonxRerankTaskSettings taskSettings) { + super(model, taskSettings); + } + + @Override + public IbmWatsonxRerankServiceSettings getServiceSettings() { + return (IbmWatsonxRerankServiceSettings) super.getServiceSettings(); + } + + @Override + public IbmWatsonxRerankTaskSettings getTaskSettings() { + return (IbmWatsonxRerankTaskSettings) super.getTaskSettings(); + } + + @Override + public DefaultSecretSettings getSecretSettings() { + return (DefaultSecretSettings) super.getSecretSettings(); + } + + public URI uri() { + URI uri; + try { + uri = buildUri(this.getServiceSettings().uri().toString(), this.getServiceSettings().apiVersion()); + } catch (URISyntaxException e) { + throw new RuntimeException(e); + } + + return uri; + } + + /** + * Accepts a visitor to create an executable action. The returned action will not return documents in the response. + * @param visitor _ + * @param taskSettings _ + * @param inputType ignored for rerank task + * @return the rerank action + */ + @Override + public ExecutableAction accept(IbmWatsonxActionVisitor visitor, Map taskSettings, InputType inputType) { + return visitor.create(this, taskSettings); + } + + public static URI buildUri(String uri, String apiVersion) throws URISyntaxException { + return new URIBuilder().setScheme("https") + .setHost(uri) + .setPathSegments(ML, V1, TEXT, RERANKS) + .setParameter("version", apiVersion) + .build(); + } +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/rerank/IbmWatsonxRerankServiceSettings.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/rerank/IbmWatsonxRerankServiceSettings.java new file mode 100644 index 0000000000000..969622f9ba54f --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/rerank/IbmWatsonxRerankServiceSettings.java @@ -0,0 +1,190 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.services.ibmwatsonx.rerank; + +import org.elasticsearch.TransportVersion; +import org.elasticsearch.TransportVersions; +import org.elasticsearch.common.ValidationException; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.core.Nullable; +import org.elasticsearch.inference.ModelConfigurations; +import org.elasticsearch.inference.ServiceSettings; +import org.elasticsearch.xcontent.XContentBuilder; +import org.elasticsearch.xpack.inference.services.ConfigurationParseContext; +import org.elasticsearch.xpack.inference.services.ibmwatsonx.IbmWatsonxRateLimitServiceSettings; +import org.elasticsearch.xpack.inference.services.ibmwatsonx.IbmWatsonxService; +import org.elasticsearch.xpack.inference.services.settings.FilteredXContentObject; +import org.elasticsearch.xpack.inference.services.settings.RateLimitSettings; + +import java.io.IOException; +import java.net.URI; +import java.util.Map; +import java.util.Objects; + +import static org.elasticsearch.xpack.inference.services.ServiceFields.MODEL_ID; +import static org.elasticsearch.xpack.inference.services.ServiceFields.URL; +import static org.elasticsearch.xpack.inference.services.ServiceUtils.convertToUri; +import static org.elasticsearch.xpack.inference.services.ServiceUtils.createUri; +import static org.elasticsearch.xpack.inference.services.ServiceUtils.extractRequiredString; +import static org.elasticsearch.xpack.inference.services.ibmwatsonx.IbmWatsonxServiceFields.API_VERSION; +import static org.elasticsearch.xpack.inference.services.ibmwatsonx.IbmWatsonxServiceFields.PROJECT_ID; + +public class IbmWatsonxRerankServiceSettings extends FilteredXContentObject implements ServiceSettings, IbmWatsonxRateLimitServiceSettings { + public static final String NAME = "ibm_watsonx_rerank_service_settings"; + + /** + * Rate limits are defined at + * Watson Machine Learning plans. + * For Lite plan, you've 120 requests per minute. + */ + private static final RateLimitSettings DEFAULT_RATE_LIMIT_SETTINGS = new RateLimitSettings(120); + + public static IbmWatsonxRerankServiceSettings fromMap(Map map, ConfigurationParseContext context) { + ValidationException validationException = new ValidationException(); + + String url = extractRequiredString(map, URL, ModelConfigurations.SERVICE_SETTINGS, validationException); + URI uri = convertToUri(url, URL, ModelConfigurations.SERVICE_SETTINGS, validationException); + String apiVersion = extractRequiredString(map, API_VERSION, ModelConfigurations.SERVICE_SETTINGS, validationException); + + String modelId = extractRequiredString(map, MODEL_ID, ModelConfigurations.SERVICE_SETTINGS, validationException); + String projectId = extractRequiredString(map, PROJECT_ID, ModelConfigurations.SERVICE_SETTINGS, validationException); + + RateLimitSettings rateLimitSettings = RateLimitSettings.of( + map, + DEFAULT_RATE_LIMIT_SETTINGS, + validationException, + IbmWatsonxService.NAME, + context + ); + + if (validationException.validationErrors().isEmpty() == false) { + throw validationException; + } + + return new IbmWatsonxRerankServiceSettings(uri, apiVersion, modelId, projectId, rateLimitSettings); + } + + private final URI uri; + + private final String apiVersion; + + private final String modelId; + + private final String projectId; + + private final RateLimitSettings rateLimitSettings; + + public IbmWatsonxRerankServiceSettings( + URI uri, + String apiVersion, + String modelId, + String projectId, + @Nullable RateLimitSettings rateLimitSettings + ) { + this.uri = uri; + this.apiVersion = apiVersion; + this.projectId = projectId; + this.modelId = modelId; + this.rateLimitSettings = Objects.requireNonNullElse(rateLimitSettings, DEFAULT_RATE_LIMIT_SETTINGS); + } + + public IbmWatsonxRerankServiceSettings(StreamInput in) throws IOException { + this.uri = createUri(in.readString()); + this.apiVersion = in.readString(); + this.modelId = in.readString(); + this.projectId = in.readString(); + this.rateLimitSettings = new RateLimitSettings(in); + + } + + public URI uri() { + return uri; + } + + public String apiVersion() { + return apiVersion; + } + + @Override + public String modelId() { + return modelId; + } + + public String projectId() { + return projectId; + } + + @Override + public RateLimitSettings rateLimitSettings() { + return rateLimitSettings; + } + + @Override + public String getWriteableName() { + return NAME; + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + + toXContentFragmentOfExposedFields(builder, params); + + builder.endObject(); + return builder; + } + + @Override + protected XContentBuilder toXContentFragmentOfExposedFields(XContentBuilder builder, Params params) throws IOException { + builder.field(URL, uri.toString()); + + builder.field(API_VERSION, apiVersion); + + builder.field(MODEL_ID, modelId); + + builder.field(PROJECT_ID, projectId); + + rateLimitSettings.toXContent(builder, params); + + return builder; + } + + @Override + public TransportVersion getMinimalSupportedVersion() { + return TransportVersions.ML_INFERENCE_IBM_WATSONX_RERANK_ADDED; + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeString(uri.toString()); + out.writeString(apiVersion); + + out.writeString(modelId); + out.writeString(projectId); + + rateLimitSettings.writeTo(out); + } + + @Override + public boolean equals(Object object) { + if (this == object) return true; + if (object == null || getClass() != object.getClass()) return false; + IbmWatsonxRerankServiceSettings that = (IbmWatsonxRerankServiceSettings) object; + return Objects.equals(uri, that.uri) + && Objects.equals(apiVersion, that.apiVersion) + && Objects.equals(modelId, that.modelId) + && Objects.equals(projectId, that.projectId) + && Objects.equals(rateLimitSettings, that.rateLimitSettings); + } + + @Override + public int hashCode() { + return Objects.hash(uri, apiVersion, modelId, projectId, rateLimitSettings); + } +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/rerank/IbmWatsonxRerankTaskSettings.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/rerank/IbmWatsonxRerankTaskSettings.java new file mode 100644 index 0000000000000..12f4b8f6fa33e --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/rerank/IbmWatsonxRerankTaskSettings.java @@ -0,0 +1,192 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.services.ibmwatsonx.rerank; + +import org.elasticsearch.TransportVersion; +import org.elasticsearch.TransportVersions; +import org.elasticsearch.common.Strings; +import org.elasticsearch.common.ValidationException; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.core.Nullable; +import org.elasticsearch.inference.InputType; +import org.elasticsearch.inference.ModelConfigurations; +import org.elasticsearch.inference.TaskSettings; +import org.elasticsearch.xcontent.XContentBuilder; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; +import java.util.Objects; + +import static org.elasticsearch.xpack.inference.services.ServiceUtils.extractOptionalBoolean; +import static org.elasticsearch.xpack.inference.services.ServiceUtils.extractOptionalPositiveInteger; + +public class IbmWatsonxRerankTaskSettings implements TaskSettings { + + public static final String NAME = "ibm_watsonx_rerank_task_settings"; + public static final String RETURN_DOCUMENTS = "return_documents"; + public static final String TOP_N_DOCS_ONLY = "top_n"; + public static final String TRUNCATE_INPUT_TOKENS = "truncate_input_tokens"; + + static final IbmWatsonxRerankTaskSettings EMPTY_SETTINGS = new IbmWatsonxRerankTaskSettings(null, null, null); + + public static IbmWatsonxRerankTaskSettings fromMap(Map map) { + ValidationException validationException = new ValidationException(); + + if (map == null || map.isEmpty()) { + return EMPTY_SETTINGS; + } + + Boolean returnDocuments = extractOptionalBoolean(map, RETURN_DOCUMENTS, validationException); + Integer topNDocumentsOnly = extractOptionalPositiveInteger( + map, + TOP_N_DOCS_ONLY, + ModelConfigurations.TASK_SETTINGS, + validationException + ); + Integer truncateInputTokens = extractOptionalPositiveInteger( + map, + TRUNCATE_INPUT_TOKENS, + ModelConfigurations.TASK_SETTINGS, + validationException + ); + + if (validationException.validationErrors().isEmpty() == false) { + throw validationException; + } + + return of(topNDocumentsOnly, returnDocuments, truncateInputTokens); + } + + /** + * Creates a new {@link IbmWatsonxRerankTaskSettings} + * by preferring non-null fields from the request settings over the original settings. + * + * @param originalSettings the settings stored as part of the inference entity configuration + * @param requestTaskSettings the settings passed in within the task_settings field of the request + * @return a constructed {@link IbmWatsonxRerankTaskSettings} + */ + public static IbmWatsonxRerankTaskSettings of( + IbmWatsonxRerankTaskSettings originalSettings, + IbmWatsonxRerankTaskSettings requestTaskSettings + ) { + return new IbmWatsonxRerankTaskSettings( + requestTaskSettings.getTopNDocumentsOnly() != null + ? requestTaskSettings.getTopNDocumentsOnly() + : originalSettings.getTopNDocumentsOnly(), + requestTaskSettings.getReturnDocuments() != null + ? requestTaskSettings.getReturnDocuments() + : originalSettings.getReturnDocuments(), + requestTaskSettings.getTruncateInputTokens() != null + ? requestTaskSettings.getTruncateInputTokens() + : originalSettings.getTruncateInputTokens() + ); + } + + public static IbmWatsonxRerankTaskSettings of(Integer topNDocumentsOnly, Boolean returnDocuments, Integer maxChunksPerDoc) { + return new IbmWatsonxRerankTaskSettings(topNDocumentsOnly, returnDocuments, maxChunksPerDoc); + } + + private final Integer topNDocumentsOnly; + private final Boolean returnDocuments; + private final Integer truncateInputTokens; + + public IbmWatsonxRerankTaskSettings(StreamInput in) throws IOException { + this(in.readOptionalInt(), in.readOptionalBoolean(), in.readOptionalInt()); + } + + public IbmWatsonxRerankTaskSettings( + @Nullable Integer topNDocumentsOnly, + @Nullable Boolean doReturnDocuments, + @Nullable Integer truncateInputTokens + ) { + this.topNDocumentsOnly = topNDocumentsOnly; + this.returnDocuments = doReturnDocuments; + this.truncateInputTokens = truncateInputTokens; + } + + @Override + public boolean isEmpty() { + return topNDocumentsOnly == null && returnDocuments == null && truncateInputTokens == null; + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + if (topNDocumentsOnly != null) { + builder.field(TOP_N_DOCS_ONLY, topNDocumentsOnly); + } + if (returnDocuments != null) { + builder.field(RETURN_DOCUMENTS, returnDocuments); + } + if (truncateInputTokens != null) { + builder.field(TRUNCATE_INPUT_TOKENS, truncateInputTokens); + } + builder.endObject(); + return builder; + } + + @Override + public String getWriteableName() { + return NAME; + } + + @Override + public TransportVersion getMinimalSupportedVersion() { + return TransportVersions.ML_INFERENCE_IBM_WATSONX_RERANK_ADDED; + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeOptionalInt(topNDocumentsOnly); + out.writeOptionalBoolean(returnDocuments); + out.writeOptionalInt(truncateInputTokens); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + IbmWatsonxRerankTaskSettings that = (IbmWatsonxRerankTaskSettings) o; + return Objects.equals(returnDocuments, that.returnDocuments) + && Objects.equals(topNDocumentsOnly, that.topNDocumentsOnly) + && Objects.equals(truncateInputTokens, that.truncateInputTokens); + } + + @Override + public int hashCode() { + return Objects.hash(returnDocuments, topNDocumentsOnly, truncateInputTokens); + } + + public static String invalidInputTypeMessage(InputType inputType) { + return Strings.format("received invalid input type value [%s]", inputType.toString()); + } + + public Boolean getDoesReturnDocuments() { + return returnDocuments; + } + + public Integer getTopNDocumentsOnly() { + return topNDocumentsOnly; + } + + public Boolean getReturnDocuments() { + return returnDocuments; + } + + public Integer getTruncateInputTokens() { + return truncateInputTokens; + } + + @Override + public TaskSettings updatedTaskSettings(Map newSettings) { + IbmWatsonxRerankTaskSettings updatedSettings = IbmWatsonxRerankTaskSettings.fromMap(new HashMap<>(newSettings)); + return IbmWatsonxRerankTaskSettings.of(this, updatedSettings); + } +} diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/external/request/ibmwatsonx/rerank/IbmWatsonxRerankRequestEntityTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/external/request/ibmwatsonx/rerank/IbmWatsonxRerankRequestEntityTests.java new file mode 100644 index 0000000000000..8278b76a1cee4 --- /dev/null +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/external/request/ibmwatsonx/rerank/IbmWatsonxRerankRequestEntityTests.java @@ -0,0 +1,60 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.external.request.ibmwatsonx.rerank; + +import org.elasticsearch.common.Strings; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.xcontent.XContentBuilder; +import org.elasticsearch.xcontent.XContentFactory; +import org.elasticsearch.xcontent.XContentType; +import org.elasticsearch.xpack.inference.external.request.ibmwatsonx.IbmWatsonxRerankRequestEntity; +import org.elasticsearch.xpack.inference.services.ibmwatsonx.rerank.IbmWatsonxRerankTaskSettings; + +import java.io.IOException; +import java.util.List; + +import static org.elasticsearch.xpack.inference.MatchersUtils.equalToIgnoringWhitespaceInJsonString; + +public class IbmWatsonxRerankRequestEntityTests extends ESTestCase { + public void testXContent_Request() throws IOException { + IbmWatsonxRerankTaskSettings taskSettings = new IbmWatsonxRerankTaskSettings(5, true, 100); + var entity = new IbmWatsonxRerankRequestEntity( + "database", + List.of("greenland", "google", "john", "mysql", "potter", "grammar"), + taskSettings, + "model", + "project_id" + ); + + XContentBuilder builder = XContentFactory.contentBuilder(XContentType.JSON); + entity.toXContent(builder, null); + String xContentResult = Strings.toString(builder); + + assertThat(xContentResult, equalToIgnoringWhitespaceInJsonString(""" + {"model_id":"model", + "query":"database", + "inputs":[ + {"text":"greenland"}, + {"text":"google"}, + {"text":"john"}, + {"text":"mysql"}, + {"text":"potter"}, + {"text":"grammar"} + ], + "project_id":"project_id", + "parameters":{ + "truncate_input_tokens":100, + "return_options":{ + "inputs":true, + "top_n":5 + } + } + } + """)); + } +} diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/external/request/ibmwatsonx/rerank/IbmWatsonxRerankRequestTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/external/request/ibmwatsonx/rerank/IbmWatsonxRerankRequestTests.java new file mode 100644 index 0000000000000..8c95a01bc3230 --- /dev/null +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/external/request/ibmwatsonx/rerank/IbmWatsonxRerankRequestTests.java @@ -0,0 +1,107 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.external.request.ibmwatsonx.rerank; + +import org.apache.http.HttpHeaders; +import org.apache.http.client.methods.HttpPost; +import org.elasticsearch.core.Strings; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.xcontent.XContentType; +import org.elasticsearch.xpack.inference.external.request.ibmwatsonx.IbmWatsonxRerankRequest; +import org.elasticsearch.xpack.inference.services.ibmwatsonx.rerank.IbmWatsonxRerankModel; +import org.elasticsearch.xpack.inference.services.ibmwatsonx.rerank.IbmWatsonxRerankModelTests; + +import java.io.IOException; +import java.net.URI; +import java.util.List; +import java.util.Map; + +import static org.elasticsearch.xpack.inference.external.http.Utils.entityAsMap; +import static org.hamcrest.Matchers.aMapWithSize; +import static org.hamcrest.Matchers.endsWith; +import static org.hamcrest.Matchers.instanceOf; +import static org.hamcrest.Matchers.is; + +public class IbmWatsonxRerankRequestTests extends ESTestCase { + private static final String AUTH_HEADER_VALUE = "foo"; + + public void testCreateRequest() throws IOException { + var model = "model"; + var projectId = "project_id"; + URI uri = null; + try { + uri = new URI("http://abc.com"); + } catch (Exception ignored) {} + var apiVersion = "2023-05-04"; + var apiKey = "api_key"; + var query = "database"; + List input = List.of("greenland", "google", "john", "mysql", "potter", "grammar"); + + var request = createRequest(model, projectId, uri, apiVersion, apiKey, query, input); + var httpRequest = request.createHttpRequest(); + + assertThat(httpRequest.httpRequestBase(), instanceOf(HttpPost.class)); + var httpPost = (HttpPost) httpRequest.httpRequestBase(); + + assertThat(httpPost.getURI().toString(), endsWith(Strings.format("%s=%s", "version", apiVersion))); + assertThat(httpPost.getLastHeader(HttpHeaders.CONTENT_TYPE).getValue(), is(XContentType.JSON.mediaType())); + + var requestMap = entityAsMap(httpPost.getEntity().getContent()); + assertThat(requestMap, aMapWithSize(5)); + assertThat( + requestMap, + is( + + Map.of( + "project_id", + "project_id", + "model_id", + "model", + "inputs", + List.of( + Map.of("text", "greenland"), + Map.of("text", "google"), + Map.of("text", "john"), + Map.of("text", "mysql"), + Map.of("text", "potter"), + Map.of("text", "grammar") + ), + "query", + "database", + "parameters", + Map.of("return_options", Map.of("top_n", 2, "inputs", true), "truncate_input_tokens", 100) + ) + ) + ); + } + + public static IbmWatsonxRerankRequest createRequest( + String model, + String projectId, + URI uri, + String apiVersion, + String apiKey, + String query, + List input + ) { + var embeddingsModel = IbmWatsonxRerankModelTests.createModel(model, projectId, uri, apiVersion, apiKey); + + return new IbmWatsonxRerankWithoutAuthRequest(query, input, embeddingsModel); + } + + private static class IbmWatsonxRerankWithoutAuthRequest extends IbmWatsonxRerankRequest { + IbmWatsonxRerankWithoutAuthRequest(String query, List input, IbmWatsonxRerankModel model) { + super(query, input, model); + } + + @Override + public void decorateWithAuth(HttpPost httpPost) { + httpPost.setHeader(HttpHeaders.AUTHORIZATION, AUTH_HEADER_VALUE); + } + } +} diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/external/response/ibmwatsonx/IbmWatsonxRankedResponseEntityTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/external/response/ibmwatsonx/IbmWatsonxRankedResponseEntityTests.java new file mode 100644 index 0000000000000..6b59f25896a48 --- /dev/null +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/external/response/ibmwatsonx/IbmWatsonxRankedResponseEntityTests.java @@ -0,0 +1,166 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ +package org.elasticsearch.xpack.inference.external.response.ibmwatsonx; + +import org.apache.http.HttpResponse; +import org.elasticsearch.inference.InferenceServiceResults; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.xpack.core.inference.results.RankedDocsResults; +import org.elasticsearch.xpack.inference.external.http.HttpResult; +import org.hamcrest.MatcherAssert; + +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.List; + +import static org.hamcrest.Matchers.instanceOf; +import static org.hamcrest.Matchers.is; +import static org.mockito.Mockito.mock; + +public class IbmWatsonxRankedResponseEntityTests extends ESTestCase { + + public void testResponseLiteral() throws IOException { + InferenceServiceResults parsedResults = IbmWatsonxRankedResponseEntity.fromResponse( + new HttpResult(mock(HttpResponse.class), responseLiteral.getBytes(StandardCharsets.UTF_8)) + ); + + MatcherAssert.assertThat(parsedResults, instanceOf(RankedDocsResults.class)); + List expected = responseLiteralDocs(); + for (int i = 0; i < ((RankedDocsResults) parsedResults).getRankedDocs().size(); i++) { + assertEquals(((RankedDocsResults) parsedResults).getRankedDocs().get(i).index(), expected.get(i).index()); + } + } + + public void testGeneratedResponse() throws IOException { + int numDocs = randomIntBetween(1, 10); + + List expected = new ArrayList<>(numDocs); + StringBuilder responseBuilder = new StringBuilder(); + + responseBuilder.append("{"); + responseBuilder.append("\"results\": ["); + List indices = linear(numDocs); + List scores = linearFloats(numDocs); + for (int i = 0; i < numDocs; i++) { + int index = indices.remove(randomInt(indices.size() - 1)); + + responseBuilder.append("{"); + responseBuilder.append("\"index\":").append(index).append(","); + responseBuilder.append("\"score\":").append(scores.get(i).toString()).append("}"); + expected.add(new RankedDocsResults.RankedDoc(index, scores.get(i), null)); + if (i < numDocs - 1) { + responseBuilder.append(","); + } + } + responseBuilder.append("]"); + + responseBuilder.append(randomIntBetween(1, 10)).append("}"); + + InferenceServiceResults parsedResults = IbmWatsonxRankedResponseEntity.fromResponse( + new HttpResult(mock(HttpResponse.class), responseBuilder.toString().getBytes(StandardCharsets.UTF_8)) + ); + MatcherAssert.assertThat(parsedResults, instanceOf(RankedDocsResults.class)); + for (int i = 0; i < ((RankedDocsResults) parsedResults).getRankedDocs().size(); i++) { + assertEquals(((RankedDocsResults) parsedResults).getRankedDocs().get(i).index(), expected.get(i).index()); + } + } + + private ArrayList responseLiteralDocs() { + var list = new ArrayList(); + + list.add(new RankedDocsResults.RankedDoc(2, 0.98005307F, null)); + list.add(new RankedDocsResults.RankedDoc(3, 0.27904198F, null)); + list.add(new RankedDocsResults.RankedDoc(0, 0.10194652F, null)); + return list; + } + + private final String responseLiteral = """ + { + "results": [ + { + "index": 2, + "score": 0.98005307 + }, + { + "index": 3, + "score": 0.27904198 + }, + { + "index": 0, + "score": 0.10194652 + } + ] + } + """; + + public void testResponseLiteralWithDocuments() throws IOException { + InferenceServiceResults parsedResults = IbmWatsonxRankedResponseEntity.fromResponse( + new HttpResult(mock(HttpResponse.class), responseLiteralWithDocuments.getBytes(StandardCharsets.UTF_8)) + ); + + MatcherAssert.assertThat(parsedResults, instanceOf(RankedDocsResults.class)); + MatcherAssert.assertThat(((RankedDocsResults) parsedResults).getRankedDocs(), is(responseLiteralDocsWithText)); + } + + private final String responseLiteralWithDocuments = """ + { + "results": [ + { + "input": { + "text": "Washington, D.C.." + }, + "index": 2, + "score": 0.98005307 + }, + { + "input": { + "text": "Capital punishment has existed in the United States since before the United States was a country. " + }, + "index": 3, + "score": 0.27904198 + }, + { + "input": { + "text": "Carson City is the capital city of the American state of Nevada." + }, + "index": 0, + "score": 0.10194652 + } + ] + } + """; + + private final List responseLiteralDocsWithText = List.of( + new RankedDocsResults.RankedDoc(2, 0.98005307F, "Washington, D.C.."), + new RankedDocsResults.RankedDoc( + 3, + 0.27904198F, + "Capital punishment has existed in the United States since before the United States was a country. " + ), + new RankedDocsResults.RankedDoc(0, 0.10194652F, "Carson City is the capital city of the American state of Nevada.") + ); + + private ArrayList linear(int n) { + ArrayList list = new ArrayList<>(); + for (int i = 0; i <= n; i++) { + list.add(i); + } + return list; + } + + // creates a list of doubles of monotonically decreasing magnitude + private ArrayList linearFloats(int n) { + ArrayList list = new ArrayList<>(); + float startValue = 1.0f; + float decrement = startValue / n + 1; + for (int i = 0; i <= n; i++) { + list.add(startValue - (i * decrement)); + } + return list; + } +} diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/IbmWatsonxServiceTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/IbmWatsonxServiceTests.java index ff99101fc4ee5..99b7b3868b7f4 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/IbmWatsonxServiceTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/IbmWatsonxServiceTests.java @@ -50,6 +50,7 @@ import org.elasticsearch.xpack.inference.services.ServiceFields; import org.elasticsearch.xpack.inference.services.ibmwatsonx.embeddings.IbmWatsonxEmbeddingsModel; import org.elasticsearch.xpack.inference.services.ibmwatsonx.embeddings.IbmWatsonxEmbeddingsModelTests; +import org.elasticsearch.xpack.inference.services.ibmwatsonx.rerank.IbmWatsonxRerankModel; import org.elasticsearch.xpack.inference.services.openai.completion.OpenAiChatCompletionModelTests; import org.hamcrest.MatcherAssert; import org.hamcrest.Matchers; @@ -154,6 +155,42 @@ public void testParseRequestConfig_CreatesAIbmWatsonxEmbeddingsModel() throws IO } } + public void testParseRequestConfig_CreatesAIbmWatsonxRerankModel() throws IOException { + try (var service = createIbmWatsonxService()) { + ActionListener modelListener = ActionListener.wrap(model -> { + assertThat(model, instanceOf(IbmWatsonxRerankModel.class)); + + var rerankModel = (IbmWatsonxRerankModel) model; + assertThat(rerankModel.getServiceSettings().modelId(), is(modelId)); + assertThat(rerankModel.getServiceSettings().projectId(), is(projectId)); + assertThat(rerankModel.getServiceSettings().apiVersion(), is(apiVersion)); + assertThat(rerankModel.getSecretSettings().apiKey().toString(), is(apiKey)); + }, e -> fail("Model parsing should have succeeded, but failed: " + e.getMessage())); + + service.parseRequestConfig( + "id", + TaskType.RERANK, + getRequestConfigMap( + new HashMap<>( + Map.of( + ServiceFields.MODEL_ID, + modelId, + IbmWatsonxServiceFields.PROJECT_ID, + projectId, + ServiceFields.URL, + url, + IbmWatsonxServiceFields.API_VERSION, + apiVersion + ) + ), + new HashMap<>(Map.of()), + getSecretSettingsMap(apiKey) + ), + modelListener + ); + } + } + public void testParseRequestConfig_CreatesAIbmWatsonxEmbeddingsModelWhenChunkingSettingsProvided() throws IOException { try (var service = createIbmWatsonxService()) { ActionListener modelListener = ActionListener.wrap(model -> { diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/rerank/IbmWatsonxRerankModelTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/rerank/IbmWatsonxRerankModelTests.java new file mode 100644 index 0000000000000..0138952c11e07 --- /dev/null +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/rerank/IbmWatsonxRerankModelTests.java @@ -0,0 +1,28 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.services.ibmwatsonx.rerank; + +import org.elasticsearch.common.settings.SecureString; +import org.elasticsearch.inference.TaskType; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.xpack.inference.services.settings.DefaultSecretSettings; + +import java.net.URI; + +public class IbmWatsonxRerankModelTests extends ESTestCase { + public static IbmWatsonxRerankModel createModel(String model, String projectId, URI uri, String apiVersion, String apiKey) { + return new IbmWatsonxRerankModel( + "id", + TaskType.RERANK, + "service", + new IbmWatsonxRerankServiceSettings(uri, apiVersion, model, projectId, null), + new IbmWatsonxRerankTaskSettings(2, true, 100), + new DefaultSecretSettings(new SecureString(apiKey.toCharArray())) + ); + } +}