Skip to content

Commit 6f404b6

Browse files
Semantic_text match_all with Highlighter (#128702) (#128922)
* initial implementation for match_All * reformat * [CI] Auto commit changes from spotless * Excluding matchAllintercepter * Adding matchAllDocs support for vector fields * [CI] Auto commit changes from spotless * Remove previous implementation * Adding yaml tests for match_all * fixed yaml tests * Update docs/changelog/128702.yaml * Update changelog * changelog - update summary * Fix wrong inference names for the yaml tests --------- Co-authored-by: elasticsearchmachine <infra-root+elasticsearchmachine@elastic.co> Co-authored-by: Elastic Machine <elasticmachine@users.noreply.github.com> (cherry picked from commit d1b5532) # Conflicts: # x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java # x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/90_semantic_text_highlighter.yml # x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/90_semantic_text_highlighter_bwc.yml
1 parent 7521556 commit 6f404b6

File tree

5 files changed

+115
-1
lines changed

5 files changed

+115
-1
lines changed

docs/changelog/128702.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 128702
2+
summary: Fix missing highlighting in `match_all` queries for `semantic_text` fields
3+
area: Search
4+
type: bug
5+
issues: []

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ public class InferenceFeatures implements FeatureSpecification {
2727

2828
private static final NodeFeature SEMANTIC_TEXT_HIGHLIGHTER = new NodeFeature("semantic_text.highlighter");
2929
private static final NodeFeature SEMANTIC_TEXT_HIGHLIGHTER_DEFAULT = new NodeFeature("semantic_text.highlighter.default");
30+
private static final NodeFeature SEMANTIC_TEXT_MATCH_ALL_HIGHLIGHTER = new NodeFeature("semantic_text.match_all_highlighter");
3031

3132
@Override
3233
public Set<NodeFeature> getTestFeatures() {
@@ -45,7 +46,8 @@ public Set<NodeFeature> getTestFeatures() {
4546
TextSimilarityRankRetrieverBuilder.TEXT_SIMILARITY_RERANKER_ALIAS_HANDLING_FIX,
4647
SemanticInferenceMetadataFieldsMapper.INFERENCE_METADATA_FIELDS_ENABLED_BY_DEFAULT,
4748
SEMANTIC_TEXT_HIGHLIGHTER_DEFAULT,
48-
SEMANTIC_KNN_FILTER_FIX
49+
SEMANTIC_KNN_FILTER_FIX,
50+
SEMANTIC_TEXT_MATCH_ALL_HIGHLIGHTER
4951
);
5052
}
5153
}

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/highlight/SemanticTextHighlighter.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
import org.apache.lucene.search.IndexSearcher;
1616
import org.apache.lucene.search.KnnByteVectorQuery;
1717
import org.apache.lucene.search.KnnFloatVectorQuery;
18+
import org.apache.lucene.search.MatchAllDocsQuery;
1819
import org.apache.lucene.search.Query;
1920
import org.apache.lucene.search.QueryVisitor;
2021
import org.apache.lucene.search.ScoreMode;
@@ -267,6 +268,8 @@ public void visitLeaf(Query query) {
267268
queries.add(fieldType.createExactKnnQuery(VectorData.fromFloats(knnQuery.getTargetCopy()), null));
268269
} else if (query instanceof KnnByteVectorQuery knnQuery) {
269270
queries.add(fieldType.createExactKnnQuery(VectorData.fromBytes(knnQuery.getTargetCopy()), null));
271+
} else if (query instanceof MatchAllDocsQuery) {
272+
queries.add(new MatchAllDocsQuery());
270273
}
271274
}
272275
});
@@ -293,6 +296,13 @@ public QueryVisitor getSubVisitor(BooleanClause.Occur occur, Query parent) {
293296
}
294297
return this;
295298
}
299+
300+
@Override
301+
public void visitLeaf(Query query) {
302+
if (query instanceof MatchAllDocsQuery) {
303+
queries.add(new MatchAllDocsQuery());
304+
}
305+
}
296306
});
297307
return queries;
298308
}

x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/90_semantic_text_highlighter.yml

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -291,3 +291,44 @@ setup:
291291
- match: { hits.hits.0._id: "doc_1" }
292292
- not_exists: hits.hits.0.highlight.title
293293

294+
---
295+
"Highlighting with match_all query":
296+
- requires:
297+
cluster_features: "semantic_text.match_all_highlighter"
298+
reason: semantic text field supports match_all query with semantic highlighter.
299+
300+
- do:
301+
search:
302+
index: test-sparse-index
303+
body:
304+
query:
305+
match_all: {}
306+
highlight:
307+
fields:
308+
body:
309+
type: "semantic"
310+
number_of_fragments: 2
311+
312+
- match: { hits.total.value: 1 }
313+
- match: { hits.hits.0._id: "doc_1" }
314+
- length: { hits.hits.0.highlight.body: 2 }
315+
- match: { hits.hits.0.highlight.body.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." }
316+
- match: { hits.hits.0.highlight.body.1: "You Know, for Search!" }
317+
318+
- do:
319+
search:
320+
index: test-dense-index
321+
body:
322+
query:
323+
match_all: {}
324+
highlight:
325+
fields:
326+
body:
327+
type: "semantic"
328+
number_of_fragments: 2
329+
330+
- match: { hits.total.value: 1 }
331+
- match: { hits.hits.0._id: "doc_1" }
332+
- length: { hits.hits.0.highlight.body: 2 }
333+
- match: { hits.hits.0.highlight.body.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." }
334+
- match: { hits.hits.0.highlight.body.1: "You Know, for Search!" }

x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/90_semantic_text_highlighter_bwc.yml

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -243,4 +243,60 @@ setup:
243243
- match: { hits.hits.0.highlight.body.0: "You Know, for Search!" }
244244
- match: { hits.hits.0.highlight.body.1: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." }
245245

246+
---
247+
"Highlighting with match_all query":
248+
- requires:
249+
cluster_features: "semantic_text.match_all_highlighter"
250+
reason: semantic text field supports match_all query with semantic highlighter
246251

252+
- do:
253+
index:
254+
index: test-sparse-index
255+
id: doc_1
256+
body:
257+
body: [ "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides.", "You Know, for Search!" ]
258+
refresh: true
259+
260+
- do:
261+
search:
262+
index: test-sparse-index
263+
body:
264+
query:
265+
match_all: {}
266+
highlight:
267+
fields:
268+
body:
269+
type: "semantic"
270+
number_of_fragments: 2
271+
272+
- match: { hits.total.value: 1 }
273+
- match: { hits.hits.0._id: "doc_1" }
274+
- length: { hits.hits.0.highlight.body: 2 }
275+
- match: { hits.hits.0.highlight.body.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." }
276+
- match: { hits.hits.0.highlight.body.1: "You Know, for Search!" }
277+
278+
- do:
279+
index:
280+
index: test-dense-index
281+
id: doc_1
282+
body:
283+
body: [ "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides.", "You Know, for Search!" ]
284+
refresh: true
285+
286+
- do:
287+
search:
288+
index: test-dense-index
289+
body:
290+
query:
291+
match_all: {}
292+
highlight:
293+
fields:
294+
body:
295+
type: "semantic"
296+
number_of_fragments: 2
297+
298+
- match: { hits.total.value: 1 }
299+
- match: { hits.hits.0._id: "doc_1" }
300+
- length: { hits.hits.0.highlight.body: 2 }
301+
- match: { hits.hits.0.highlight.body.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." }
302+
- match: { hits.hits.0.highlight.body.1: "You Know, for Search!" }

0 commit comments

Comments
 (0)