Skip to content

Commit 155c0da

Browse files
authored
Vector test tools (#128934)
This adds some testing tools for verifying vector recall and latency directly without having to spin up an entire ES node and running a rally track. Its pretty barebones and takes inspiration from lucene-util, but I wanted access to our own formats and tooling to make our lives easier. Here is an example config file. This will build the initial index, run queries at num_candidates: 50, then again at num_candidates 100 (without reindexing, and re-using the cached nearest neighbors). ``` [{ "doc_vectors" : "path", "query_vectors" : "path", "num_docs" : 10000, "num_queries" : 10, "index_type" : "hnsw", "num_candidates" : 50, "k" : 10, "hnsw_m" : 16, "hnsw_ef_construction" : 200, "index_threads" : 4, "reindex" : true, "force_merge" : false, "vector_space" : "maximum_inner_product", "dimensions" : 768 }, { "doc_vectors" : "path", "query_vectors" : "path", "num_docs" : 10000, "num_queries" : 10, "index_type" : "hnsw", "num_candidates" : 100, "k" : 10, "hnsw_m" : 16, "hnsw_ef_construction" : 200, "vector_space" : "maximum_inner_product", "dimensions" : 768 } ] ``` To execute: ``` ./gradlew :qa:vector:checkVec --args="/Path/to/knn_tester_config.json" ``` Calling `./gradlew :qa:vector:checkVecHelp` gives some guidance on how to use it, additionally providing a way to run it via java directly (useful to bypass gradlew guff).
1 parent ffa8927 commit 155c0da

17 files changed

+2312
-6
lines changed

qa/vector/build.gradle

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the "Elastic License
4+
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
5+
* Public License v 1"; you may not use this file except in compliance with, at
6+
* your election, the "Elastic License 2.0", the "GNU Affero General Public
7+
* License v3.0 only", or the "Server Side Public License, v 1".
8+
*/
9+
10+
apply plugin: 'elasticsearch.java'
11+
apply plugin: 'elasticsearch.build'
12+
13+
14+
tasks.named("dependencyLicenses").configure {
15+
mapping from: /lucene-.*/, to: 'lucene'
16+
}
17+
18+
tasks.named('forbiddenApisMain').configure {
19+
enabled = false
20+
}
21+
22+
dependencies {
23+
api "org.apache.lucene:lucene-core:${versions.lucene}"
24+
api "org.apache.lucene:lucene-queries:${versions.lucene}"
25+
api "org.apache.lucene:lucene-codecs:${versions.lucene}"
26+
implementation project(':libs:logging')
27+
implementation project(':server')
28+
}
29+
/**
30+
* Task to run the KnnIndexTester with the provided parameters.
31+
*/
32+
tasks.register("checkVec", JavaExec) {
33+
group = "Execution"
34+
description = "Runs KnnIndexTester with the provided parameters to validate recall and performance."
35+
classpath = sourceSets.main.runtimeClasspath
36+
mainClass.set("org.elasticsearch.test.knn.KnnIndexTester")
37+
// Configure logging to console
38+
systemProperty "es.logger.out", "console"
39+
systemProperty "es.logger.level", "INFO" // Change to DEBUG if needed
40+
41+
if (buildParams.getRuntimeJavaVersion().map { it.majorVersion.toInteger() }.get() >= 21) {
42+
jvmArgs '-Xms4g', '-Xmx4g', '--add-modules=jdk.incubator.vector', '--enable-native-access=ALL-UNNAMED', '-Djava.util.concurrent.ForkJoinPool.common.parallelism=8', '-XX:+UnlockDiagnosticVMOptions', '-XX:+DebugNonSafepoints', '-XX:+HeapDumpOnOutOfMemoryError'
43+
}
44+
}
45+
46+
tasks.register("checkVecHelp", JavaExec) {
47+
group = "Help"
48+
description = "Prints help for the KnnIndexTester task."
49+
classpath = sourceSets.main.runtimeClasspath
50+
mainClass.set("org.elasticsearch.test.knn.KnnIndexTester")
51+
args = ["--help"]
52+
doLast {
53+
println """
54+
=============================================================================
55+
KnnIndexTester Help
56+
=============================================================================
57+
58+
Run with Gradle:
59+
----------------
60+
# Using default configuration file
61+
./gradlew :qa:vector:checkVec
62+
63+
# Using custom configuration file
64+
./gradlew :qa:vector:checkVec --args="path/to/your/config.json"
65+
66+
# Adjust heap size
67+
./gradlew :qa:vector:checkVec -Dorg.gradle.jvmargs="-Xmx8g" --args="path/to/your/config.json"
68+
69+
# Set environment variable for more extensive JVM options
70+
export GRADLE_OPTS="-Xmx8g -XX:+UseG1GC -XX:MaxGCPauseMillis=100"
71+
./gradlew :qa:vector:checkVec
72+
73+
74+
Run directly with Java:
75+
----------------------
76+
# Generate classpath (run once to create the file)
77+
./gradlew :qa:vector:printClasspath
78+
79+
# Then use the classpath file with java
80+
java -cp "\$(cat build/vector_classpath.txt)" \\
81+
--add-modules=jdk.incubator.vector \\
82+
--enable-native-access=ALL-UNNAMED \\
83+
-Djava.util.concurrent.ForkJoinPool.common.parallelism=8 \\
84+
-Xmx4g \\
85+
-Xms4g \\\\
86+
org.elasticsearch.test.knn.KnnIndexTester path/to/your/config.json
87+
"""
88+
}
89+
}
90+
91+
tasks.register("printClasspath") {
92+
group = "Help"
93+
description = "Prints the classpath needed to run KnnIndexTester directly with java"
94+
95+
doLast {
96+
def classpathFile = new File("${buildDir}/vector_classpath.txt")
97+
classpathFile.parentFile.mkdirs()
98+
classpathFile.text = sourceSets.main.runtimeClasspath.asPath
99+
println "Classpath written to: ${classpathFile.absolutePath}"
100+
}
101+
}

0 commit comments

Comments
 (0)