Skip to content

Commit fd2bac4

Browse files
authored
[8.19] Fix NPE in flat_bbq scorer when all vectors are missing (#129548) (#129624)
* Fix NPE in flat_bbq scorer when all vectors are missing (#129548) It is possible to get all the way down to the knn format reader and there be no vectors in the index. This execution path is possible if utilizing nested queries (which bypasses the higher level checks in `KnnFloatVectorQuery#approximateSearch`). bbq_flat should check for the existence of vectors before attempting to create the scorer. (cherry picked from commit 80667d0) * fixing comp
1 parent 14e7037 commit fd2bac4

File tree

7 files changed

+143
-2
lines changed

7 files changed

+143
-2
lines changed

docs/changelog/129548.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 129548
2+
summary: Fix NPE in `flat_bbq` scorer when all vectors are missing
3+
area: Vector Search
4+
type: bug
5+
issues: []

server/src/main/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryFlatVectorsScorer.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,9 @@ public RandomVectorScorer getRandomVectorScorer(
5959
float[] target
6060
) throws IOException {
6161
if (vectorValues instanceof RandomAccessBinarizedByteVectorValues binarizedVectors) {
62+
assert binarizedVectors.getQuantizer() != null
63+
: "BinarizedByteVectorValues must have a quantizer for ES816BinaryFlatVectorsScorer";
64+
assert binarizedVectors.size() > 0 : "BinarizedByteVectorValues must have at least one vector for ES816BinaryFlatVectorsScorer";
6265
BinaryQuantizer quantizer = binarizedVectors.getQuantizer();
6366
float[] centroid = binarizedVectors.getCentroid();
6467
// FIXME: precompute this once?

server/src/main/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryQuantizedVectorsReader.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,7 @@ static void validateFieldEntry(FieldInfo info, FieldEntry fieldEntry) {
154154
@Override
155155
public RandomVectorScorer getRandomVectorScorer(String field, float[] target) throws IOException {
156156
FieldEntry fi = fields.get(field);
157-
if (fi == null) {
157+
if (fi == null || fi.size() == 0) {
158158
return null;
159159
}
160160
return vectorScorer.getRandomVectorScorer(

server/src/main/java/org/elasticsearch/index/codec/vectors/es818/ES818BinaryFlatVectorsScorer.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,9 @@ public RandomVectorScorer getRandomVectorScorer(
6565
float[] target
6666
) throws IOException {
6767
if (vectorValues instanceof RandomAccessBinarizedByteVectorValues binarizedVectors) {
68+
assert binarizedVectors.getQuantizer() != null
69+
: "BinarizedByteVectorValues must have a quantizer for ES816BinaryFlatVectorsScorer";
70+
assert binarizedVectors.size() > 0 : "BinarizedByteVectorValues must have at least one vector for ES816BinaryFlatVectorsScorer";
6871
OptimizedScalarQuantizer quantizer = binarizedVectors.getQuantizer();
6972
float[] centroid = binarizedVectors.getCentroid();
7073
// We make a copy as the quantization process mutates the input

server/src/main/java/org/elasticsearch/index/codec/vectors/es818/ES818BinaryQuantizedVectorsReader.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,7 @@ static void validateFieldEntry(FieldInfo info, FieldEntry fieldEntry) {
154154
@Override
155155
public RandomVectorScorer getRandomVectorScorer(String field, float[] target) throws IOException {
156156
FieldEntry fi = fields.get(field);
157-
if (fi == null) {
157+
if (fi == null || fi.size() == 0) {
158158
return null;
159159
}
160160
return vectorScorer.getRandomVectorScorer(

server/src/test/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryQuantizedVectorsFormatTests.java

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,25 +24,38 @@
2424
import org.apache.lucene.codecs.KnnVectorsFormat;
2525
import org.apache.lucene.codecs.lucene912.Lucene912Codec;
2626
import org.apache.lucene.document.Document;
27+
import org.apache.lucene.document.Field;
2728
import org.apache.lucene.document.KnnFloatVectorField;
2829
import org.apache.lucene.index.DirectoryReader;
2930
import org.apache.lucene.index.FloatVectorValues;
3031
import org.apache.lucene.index.IndexReader;
3132
import org.apache.lucene.index.IndexWriter;
3233
import org.apache.lucene.index.IndexWriterConfig;
3334
import org.apache.lucene.index.LeafReader;
35+
import org.apache.lucene.index.SoftDeletesRetentionMergePolicy;
36+
import org.apache.lucene.index.Term;
3437
import org.apache.lucene.index.VectorSimilarityFunction;
38+
import org.apache.lucene.search.FieldExistsQuery;
3539
import org.apache.lucene.search.IndexSearcher;
3640
import org.apache.lucene.search.KnnFloatVectorQuery;
41+
import org.apache.lucene.search.MatchAllDocsQuery;
3742
import org.apache.lucene.search.Query;
43+
import org.apache.lucene.search.TermQuery;
3844
import org.apache.lucene.search.TopDocs;
3945
import org.apache.lucene.search.TotalHits;
46+
import org.apache.lucene.search.join.BitSetProducer;
47+
import org.apache.lucene.search.join.CheckJoinIndex;
48+
import org.apache.lucene.search.join.DiversifyingChildrenFloatKnnVectorQuery;
49+
import org.apache.lucene.search.join.QueryBitSetProducer;
4050
import org.apache.lucene.store.Directory;
4151
import org.apache.lucene.tests.index.BaseKnnVectorsFormatTestCase;
4252
import org.elasticsearch.common.logging.LogConfigurator;
4353
import org.elasticsearch.index.codec.vectors.BQVectorUtils;
4454

4555
import java.io.IOException;
56+
import java.util.ArrayList;
57+
import java.util.Arrays;
58+
import java.util.List;
4659
import java.util.Locale;
4760

4861
import static java.lang.String.format;
@@ -67,6 +80,58 @@ public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
6780
};
6881
}
6982

83+
static String encodeInts(int[] i) {
84+
return Arrays.toString(i);
85+
}
86+
87+
static BitSetProducer parentFilter(IndexReader r) throws IOException {
88+
// Create a filter that defines "parent" documents in the index
89+
BitSetProducer parentsFilter = new QueryBitSetProducer(new TermQuery(new Term("docType", "_parent")));
90+
CheckJoinIndex.check(r, parentsFilter);
91+
return parentsFilter;
92+
}
93+
94+
Document makeParent(int[] children) {
95+
Document parent = new Document();
96+
parent.add(newStringField("docType", "_parent", Field.Store.NO));
97+
parent.add(newStringField("id", encodeInts(children), Field.Store.YES));
98+
return parent;
99+
}
100+
101+
public void testEmptyDiversifiedChildSearch() throws Exception {
102+
String fieldName = "field";
103+
int dims = random().nextInt(4, 65);
104+
float[] vector = randomVector(dims);
105+
VectorSimilarityFunction similarityFunction = VectorSimilarityFunction.EUCLIDEAN;
106+
try (Directory d = newDirectory()) {
107+
IndexWriterConfig iwc = newIndexWriterConfig().setCodec(getCodec());
108+
iwc.setMergePolicy(new SoftDeletesRetentionMergePolicy("soft_delete", MatchAllDocsQuery::new, iwc.getMergePolicy()));
109+
try (IndexWriter w = new IndexWriter(d, iwc)) {
110+
List<Document> toAdd = new ArrayList<>();
111+
for (int j = 1; j <= 5; j++) {
112+
Document doc = new Document();
113+
doc.add(new KnnFloatVectorField(fieldName, vector, similarityFunction));
114+
doc.add(newStringField("id", Integer.toString(j), Field.Store.YES));
115+
toAdd.add(doc);
116+
}
117+
toAdd.add(makeParent(new int[] { 1, 2, 3, 4, 5 }));
118+
w.addDocuments(toAdd);
119+
w.addDocuments(List.of(makeParent(new int[] { 6, 7, 8, 9, 10 })));
120+
w.deleteDocuments(new FieldExistsQuery(fieldName), new TermQuery(new Term("id", encodeInts(new int[] { 1, 2, 3, 4, 5 }))));
121+
w.flush();
122+
w.commit();
123+
w.forceMerge(1);
124+
try (IndexReader reader = DirectoryReader.open(w)) {
125+
IndexSearcher searcher = new IndexSearcher(reader);
126+
BitSetProducer parentFilter = parentFilter(searcher.getIndexReader());
127+
Query query = new DiversifyingChildrenFloatKnnVectorQuery(fieldName, vector, null, 1, parentFilter);
128+
assertTrue(searcher.search(query, 1).scoreDocs.length == 0);
129+
}
130+
}
131+
132+
}
133+
}
134+
70135
public void testSearch() throws Exception {
71136
String fieldName = "field";
72137
int numVectors = random().nextInt(99, 500);

server/src/test/java/org/elasticsearch/index/codec/vectors/es818/ES818BinaryQuantizedVectorsFormatTests.java

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,26 +24,39 @@
2424
import org.apache.lucene.codecs.KnnVectorsFormat;
2525
import org.apache.lucene.codecs.lucene912.Lucene912Codec;
2626
import org.apache.lucene.document.Document;
27+
import org.apache.lucene.document.Field;
2728
import org.apache.lucene.document.KnnFloatVectorField;
2829
import org.apache.lucene.index.DirectoryReader;
2930
import org.apache.lucene.index.FloatVectorValues;
3031
import org.apache.lucene.index.IndexReader;
3132
import org.apache.lucene.index.IndexWriter;
3233
import org.apache.lucene.index.IndexWriterConfig;
3334
import org.apache.lucene.index.LeafReader;
35+
import org.apache.lucene.index.SoftDeletesRetentionMergePolicy;
36+
import org.apache.lucene.index.Term;
3437
import org.apache.lucene.index.VectorSimilarityFunction;
38+
import org.apache.lucene.search.FieldExistsQuery;
3539
import org.apache.lucene.search.IndexSearcher;
3640
import org.apache.lucene.search.KnnFloatVectorQuery;
41+
import org.apache.lucene.search.MatchAllDocsQuery;
3742
import org.apache.lucene.search.Query;
43+
import org.apache.lucene.search.TermQuery;
3844
import org.apache.lucene.search.TopDocs;
3945
import org.apache.lucene.search.TotalHits;
46+
import org.apache.lucene.search.join.BitSetProducer;
47+
import org.apache.lucene.search.join.CheckJoinIndex;
48+
import org.apache.lucene.search.join.DiversifyingChildrenFloatKnnVectorQuery;
49+
import org.apache.lucene.search.join.QueryBitSetProducer;
4050
import org.apache.lucene.store.Directory;
4151
import org.apache.lucene.tests.index.BaseKnnVectorsFormatTestCase;
4252
import org.elasticsearch.common.logging.LogConfigurator;
4353
import org.elasticsearch.index.codec.vectors.BQVectorUtils;
4454
import org.elasticsearch.index.codec.vectors.OptimizedScalarQuantizer;
4555

4656
import java.io.IOException;
57+
import java.util.ArrayList;
58+
import java.util.Arrays;
59+
import java.util.List;
4760
import java.util.Locale;
4861

4962
import static java.lang.String.format;
@@ -68,6 +81,58 @@ public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
6881
};
6982
}
7083

84+
static String encodeInts(int[] i) {
85+
return Arrays.toString(i);
86+
}
87+
88+
static BitSetProducer parentFilter(IndexReader r) throws IOException {
89+
// Create a filter that defines "parent" documents in the index
90+
BitSetProducer parentsFilter = new QueryBitSetProducer(new TermQuery(new Term("docType", "_parent")));
91+
CheckJoinIndex.check(r, parentsFilter);
92+
return parentsFilter;
93+
}
94+
95+
Document makeParent(int[] children) {
96+
Document parent = new Document();
97+
parent.add(newStringField("docType", "_parent", Field.Store.NO));
98+
parent.add(newStringField("id", encodeInts(children), Field.Store.YES));
99+
return parent;
100+
}
101+
102+
public void testEmptyDiversifiedChildSearch() throws Exception {
103+
String fieldName = "field";
104+
int dims = random().nextInt(4, 65);
105+
float[] vector = randomVector(dims);
106+
VectorSimilarityFunction similarityFunction = VectorSimilarityFunction.EUCLIDEAN;
107+
try (Directory d = newDirectory()) {
108+
IndexWriterConfig iwc = newIndexWriterConfig().setCodec(getCodec());
109+
iwc.setMergePolicy(new SoftDeletesRetentionMergePolicy("soft_delete", MatchAllDocsQuery::new, iwc.getMergePolicy()));
110+
try (IndexWriter w = new IndexWriter(d, iwc)) {
111+
List<Document> toAdd = new ArrayList<>();
112+
for (int j = 1; j <= 5; j++) {
113+
Document doc = new Document();
114+
doc.add(new KnnFloatVectorField(fieldName, vector, similarityFunction));
115+
doc.add(newStringField("id", Integer.toString(j), Field.Store.YES));
116+
toAdd.add(doc);
117+
}
118+
toAdd.add(makeParent(new int[] { 1, 2, 3, 4, 5 }));
119+
w.addDocuments(toAdd);
120+
w.addDocuments(List.of(makeParent(new int[] { 6, 7, 8, 9, 10 })));
121+
w.deleteDocuments(new FieldExistsQuery(fieldName), new TermQuery(new Term("id", encodeInts(new int[] { 1, 2, 3, 4, 5 }))));
122+
w.flush();
123+
w.commit();
124+
w.forceMerge(1);
125+
try (IndexReader reader = DirectoryReader.open(w)) {
126+
IndexSearcher searcher = new IndexSearcher(reader);
127+
BitSetProducer parentFilter = parentFilter(searcher.getIndexReader());
128+
Query query = new DiversifyingChildrenFloatKnnVectorQuery(fieldName, vector, null, 1, parentFilter);
129+
assertTrue(searcher.search(query, 1).scoreDocs.length == 0);
130+
}
131+
}
132+
133+
}
134+
}
135+
71136
public void testSearch() throws Exception {
72137
String fieldName = "field";
73138
int numVectors = random().nextInt(99, 500);

0 commit comments

Comments
 (0)