Skip to content

Commit 67d688e

Browse files
authored
ESQL: Speed up TO_IP (#126338) (#126431)
Speed up the TO_IP method by converting directly from utf-8 encoded strings to the ip encoding. Previously we did: ``` utf-8 -> String -> INetAddress -> ip encoding ``` In a step towards solving #125460 this creates three IP parsing functions, one the rejects leading zeros, one that interprets leading zeros as decimal numbers, and one the interprets leading zeros as octal numbers. IPs have historically been parsed in all three of those ways. This plugs the "rejects leading zeros" parser into `TO_IP` because that's the behavior it had before. Here is the performance: ``` Benchmark Score Error Units leadingZerosAreDecimal 14.007 ± 0.093 ns/op leadingZerosAreOctal 15.020 ± 0.373 ns/op leadingZerosRejected 14.176 ± 3.861 ns/op original 32.950 ± 1.062 ns/op ``` So this is roughly 45% faster than what we had. This includes a big chunk of #124676 - but not the behavior change - just the code that allowed it.
1 parent 749bc36 commit 67d688e

File tree

123 files changed

+2810
-1069
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

123 files changed

+2810
-1069
lines changed

Diff for: benchmarks/README.md

+31-5
Original file line numberDiff line numberDiff line change
@@ -82,19 +82,21 @@ To get realistic results, you should exercise care when running benchmarks. Here
8282
NOTE: Linux only. Sorry Mac and Windows.
8383

8484
Disassembling is fun! Maybe not always useful, but always fun! Generally, you'll want to install `perf` and the JDK's `hsdis`.
85-
`perf` is generally available via `apg-get install perf` or `pacman -S perf`. `hsdis` you'll want to compile from source. is a little more involved. This worked
85+
`perf` is generally available via `apg-get install perf` or `pacman -S perf linux-tools`. `hsdis` you'll want to compile from source. is a little more involved. This worked
8686
on 2020-08-01:
8787

8888
```
8989
git clone git@github.com:openjdk/jdk.git
9090
cd jdk
91-
git checkout jdk-17-ga
92-
cd src/utils/hsdis
91+
git checkout jdk-24-ga
9392
# Get a known good binutils
9493
wget https://ftp.gnu.org/gnu/binutils/binutils-2.35.tar.gz
9594
tar xf binutils-2.35.tar.gz
96-
make BINUTILS=binutils-2.35 ARCH=amd64
97-
sudo cp build/linux-amd64/hsdis-amd64.so /usr/lib/jvm/java-17-openjdk/lib/server/
95+
bash configure --with-hsdis=binutils --with-binutils-src=binutils-2.35 \
96+
--with-boot-jdk=~/.gradle/jdks/oracle_corporation-24-amd64-linux.2
97+
make build-hsdis
98+
cp ./build/linux-x86_64-server-release/jdk/lib/hsdis-amd64.so \
99+
~/.gradle/jdks/oracle_corporation-24-amd64-linux.2/lib/hsdis.so
98100
```
99101

100102
If you want to disassemble a single method do something like this:
@@ -105,6 +107,30 @@ gradlew -p benchmarks run --args ' MemoryStatsBenchmark -jvmArgs "-XX:+UnlockDia
105107

106108
If you want `perf` to find the hot methods for you, then do add `-prof perfasm`.
107109

110+
NOTE: `perfasm` will need more access:
111+
```
112+
sudo bash
113+
echo -1 > /proc/sys/kernel/perf_event_paranoid
114+
exit
115+
```
116+
117+
If you get warnings like:
118+
```
119+
The perf event count is suspiciously low (0).
120+
```
121+
then check if you are bumping into [this](https://man.archlinux.org/man/perf-stat.1.en#INTEL_HYBRID_SUPPORT)
122+
by running:
123+
```
124+
perf stat -B dd if=/dev/zero of=/dev/null count=1000000
125+
```
126+
127+
If you see lines like:
128+
```
129+
765019980 cpu_atom/cycles/ # 1.728 GHz (0.60%)
130+
2258845959 cpu_core/cycles/ # 5.103 GHz (99.18%)
131+
```
132+
then `perf` is just not going to work for you.
133+
108134
## Async Profiler
109135

110136
Note: Linux and Mac only. Sorry Windows.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the "Elastic License
4+
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
5+
* Public License v 1"; you may not use this file except in compliance with, at
6+
* your election, the "Elastic License 2.0", the "GNU Affero General Public
7+
* License v3.0 only", or the "Server Side Public License, v 1".
8+
*/
9+
10+
package org.elasticsearch.benchmark.compute.operator;
11+
12+
import org.apache.lucene.document.InetAddressPoint;
13+
import org.apache.lucene.util.BytesRef;
14+
import org.elasticsearch.common.breaker.NoopCircuitBreaker;
15+
import org.elasticsearch.common.network.InetAddresses;
16+
import org.elasticsearch.compute.operator.BreakingBytesRefBuilder;
17+
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ParseIp;
18+
import org.openjdk.jmh.annotations.Benchmark;
19+
import org.openjdk.jmh.annotations.BenchmarkMode;
20+
import org.openjdk.jmh.annotations.Fork;
21+
import org.openjdk.jmh.annotations.Measurement;
22+
import org.openjdk.jmh.annotations.Mode;
23+
import org.openjdk.jmh.annotations.OutputTimeUnit;
24+
import org.openjdk.jmh.annotations.Scope;
25+
import org.openjdk.jmh.annotations.State;
26+
import org.openjdk.jmh.annotations.Warmup;
27+
28+
import java.net.InetAddress;
29+
import java.util.concurrent.TimeUnit;
30+
31+
@Warmup(iterations = 5)
32+
@Measurement(iterations = 7)
33+
@BenchmarkMode(Mode.AverageTime)
34+
@OutputTimeUnit(TimeUnit.NANOSECONDS)
35+
@State(Scope.Thread)
36+
@Fork(1)
37+
public class ParseIpBenchmark {
38+
private final BytesRef ip = new BytesRef("192.168.0.1");
39+
private final BreakingBytesRefBuilder scratch = ParseIp.buildScratch(new NoopCircuitBreaker("request"));
40+
41+
@Benchmark
42+
public BytesRef leadingZerosRejected() {
43+
return ParseIp.leadingZerosRejected(ip, scratch);
44+
}
45+
46+
@Benchmark
47+
public BytesRef leadingZerosAreDecimal() {
48+
return ParseIp.leadingZerosAreDecimal(ip, scratch);
49+
}
50+
51+
@Benchmark
52+
public BytesRef leadingZerosAreOctal() {
53+
return ParseIp.leadingZerosAreOctal(ip, scratch);
54+
}
55+
56+
@Benchmark
57+
public BytesRef original() {
58+
InetAddress inetAddress = InetAddresses.forString(ip.utf8ToString());
59+
return new BytesRef(InetAddressPoint.encode(inetAddress));
60+
}
61+
}

Diff for: docs/changelog/126338.yaml

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 126338
2+
summary: Speed up TO_IP
3+
area: ES|QL
4+
type: enhancement
5+
issues: []

Diff for: x-pack/plugin/esql/compute/gen/src/main/java/org/elasticsearch/compute/gen/AggregatorFunctionSupplierImplementer.java

+7-4
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
import static org.elasticsearch.compute.gen.Types.LIST_AGG_FUNC_DESC;
3535
import static org.elasticsearch.compute.gen.Types.LIST_INTEGER;
3636
import static org.elasticsearch.compute.gen.Types.STRING;
37+
import static org.elasticsearch.compute.gen.Types.WARNINGS;
3738

3839
/**
3940
* Implements "AggregationFunctionSupplier" from a class annotated with both
@@ -161,8 +162,9 @@ private MethodSpec aggregator() {
161162

162163
if (hasWarnings) {
163164
builder.addStatement(
164-
"var warnings = Warnings.createWarnings(driverContext.warningsMode(), "
165-
+ "warningsLineNumber, warningsColumnNumber, warningsSourceText)"
165+
"var warnings = $T.createWarnings(driverContext.warningsMode(), "
166+
+ "warningsLineNumber, warningsColumnNumber, warningsSourceText)",
167+
WARNINGS
166168
);
167169
}
168170

@@ -187,8 +189,9 @@ private MethodSpec groupingAggregator() {
187189

188190
if (hasWarnings) {
189191
builder.addStatement(
190-
"var warnings = Warnings.createWarnings(driverContext.warningsMode(), "
191-
+ "warningsLineNumber, warningsColumnNumber, warningsSourceText)"
192+
"var warnings = $T.createWarnings(driverContext.warningsMode(), "
193+
+ "warningsLineNumber, warningsColumnNumber, warningsSourceText)",
194+
WARNINGS
192195
);
193196
}
194197

Diff for: x-pack/plugin/esql/compute/gen/src/main/java/org/elasticsearch/compute/gen/ConsumeProcessor.java

+1
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ public Set<String> getSupportedAnnotationTypes() {
4242
"org.elasticsearch.xpack.esql.expression.function.MapParam",
4343
"org.elasticsearch.rest.ServerlessScope",
4444
"org.elasticsearch.xcontent.ParserConstructor",
45+
"org.elasticsearch.core.UpdateForV9",
4546
Fixed.class.getName()
4647
);
4748
}

Diff for: x-pack/plugin/esql/compute/gen/src/main/java/org/elasticsearch/compute/gen/ConvertEvaluatorImplementer.java

+54-60
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@
2323
import javax.lang.model.type.TypeMirror;
2424
import javax.lang.model.util.Elements;
2525

26-
import static org.elasticsearch.compute.gen.Methods.appendMethod;
2726
import static org.elasticsearch.compute.gen.Methods.buildFromFactory;
2827
import static org.elasticsearch.compute.gen.Methods.getMethod;
2928
import static org.elasticsearch.compute.gen.Types.ABSTRACT_CONVERT_FUNCTION_EVALUATOR;
@@ -41,27 +40,34 @@
4140
public class ConvertEvaluatorImplementer {
4241

4342
private final TypeElement declarationType;
44-
private final ExecutableElement processFunction;
43+
private final EvaluatorImplementer.ProcessFunction processFunction;
4544
private final String extraName;
4645
private final ClassName implementation;
4746
private final TypeName argumentType;
48-
private final TypeName resultType;
4947
private final List<TypeMirror> warnExceptions;
5048

5149
public ConvertEvaluatorImplementer(
5250
Elements elements,
51+
javax.lang.model.util.Types types,
5352
ExecutableElement processFunction,
5453
String extraName,
5554
List<TypeMirror> warnExceptions
5655
) {
5756
this.declarationType = (TypeElement) processFunction.getEnclosingElement();
58-
this.processFunction = processFunction;
59-
if (processFunction.getParameters().size() != 1) {
60-
throw new IllegalArgumentException("processing function should have exactly one parameter");
57+
this.processFunction = new EvaluatorImplementer.ProcessFunction(types, processFunction, warnExceptions);
58+
59+
if (this.processFunction.args.getFirst() instanceof EvaluatorImplementer.StandardProcessFunctionArg == false) {
60+
throw new IllegalArgumentException("first argument must be the field to process");
61+
}
62+
for (int a = 1; a < this.processFunction.args.size(); a++) {
63+
if (this.processFunction.args.get(a) instanceof EvaluatorImplementer.FixedProcessFunctionArg == false) {
64+
throw new IllegalArgumentException("fixed function args supported after the first");
65+
// TODO support more function types when we need them
66+
}
6167
}
68+
6269
this.extraName = extraName;
6370
this.argumentType = TypeName.get(processFunction.getParameters().get(0).asType());
64-
this.resultType = TypeName.get(processFunction.getReturnType());
6571
this.warnExceptions = warnExceptions;
6672

6773
this.implementation = ClassName.get(
@@ -87,29 +93,36 @@ private TypeSpec type() {
8793
builder.addModifiers(Modifier.PUBLIC, Modifier.FINAL);
8894
builder.superclass(ABSTRACT_CONVERT_FUNCTION_EVALUATOR);
8995

96+
for (EvaluatorImplementer.ProcessFunctionArg a : processFunction.args) {
97+
a.declareField(builder);
98+
}
9099
builder.addMethod(ctor());
91-
builder.addMethod(name());
100+
builder.addMethod(next());
92101
builder.addMethod(evalVector());
93102
builder.addMethod(evalValue(true));
94103
builder.addMethod(evalBlock());
95104
builder.addMethod(evalValue(false));
105+
builder.addMethod(processFunction.toStringMethod(implementation));
106+
builder.addMethod(processFunction.close());
96107
builder.addType(factory());
97108
return builder.build();
98109
}
99110

100111
private MethodSpec ctor() {
101112
MethodSpec.Builder builder = MethodSpec.constructorBuilder().addModifiers(Modifier.PUBLIC);
102-
builder.addParameter(EXPRESSION_EVALUATOR, "field");
103113
builder.addParameter(SOURCE, "source");
114+
builder.addStatement("super(driverContext, source)");
115+
for (EvaluatorImplementer.ProcessFunctionArg a : processFunction.args) {
116+
a.implementCtor(builder);
117+
}
104118
builder.addParameter(DRIVER_CONTEXT, "driverContext");
105-
builder.addStatement("super(driverContext, field, source)");
106119
return builder.build();
107120
}
108121

109-
private MethodSpec name() {
110-
MethodSpec.Builder builder = MethodSpec.methodBuilder("name").addModifiers(Modifier.PUBLIC);
111-
builder.addAnnotation(Override.class).returns(String.class);
112-
builder.addStatement("return $S", declarationType.getSimpleName() + extraName);
122+
private MethodSpec next() {
123+
MethodSpec.Builder builder = MethodSpec.methodBuilder("next").addAnnotation(Override.class).addModifiers(Modifier.PUBLIC);
124+
builder.returns(EXPRESSION_EVALUATOR);
125+
builder.addStatement("return $N", ((EvaluatorImplementer.StandardProcessFunctionArg) processFunction.args.getFirst()).name());
113126
return builder.build();
114127
}
115128

@@ -129,7 +142,7 @@ private MethodSpec evalVector() {
129142
builder.beginControlFlow("if (vector.isConstant())");
130143
{
131144
catchingWarnExceptions(builder, () -> {
132-
var constVectType = blockType(resultType);
145+
var constVectType = processFunction.resultDataType(true);
133146
builder.addStatement(
134147
"return driverContext.blockFactory().newConstant$TWith($N, positionCount)",
135148
constVectType,
@@ -139,7 +152,7 @@ private MethodSpec evalVector() {
139152
}
140153
builder.endControlFlow();
141154

142-
ClassName resultBuilderType = builderType(blockType(resultType));
155+
ClassName resultBuilderType = builderType(processFunction.resultDataType(true));
143156
builder.beginControlFlow(
144157
"try ($T builder = driverContext.blockFactory().$L(positionCount))",
145158
resultBuilderType,
@@ -150,7 +163,11 @@ private MethodSpec evalVector() {
150163
{
151164
catchingWarnExceptions(
152165
builder,
153-
() -> builder.addStatement("builder.$L($N)", appendMethod(resultType), evalValueCall("vector", "p", scratchPadName)),
166+
() -> builder.addStatement(
167+
"builder.$L($N)",
168+
processFunction.appendMethod(),
169+
evalValueCall("vector", "p", scratchPadName)
170+
),
154171
() -> builder.addStatement("builder.appendNull()")
155172
);
156173
}
@@ -185,7 +202,7 @@ private MethodSpec evalBlock() {
185202
TypeName blockType = blockType(argumentType);
186203
builder.addStatement("$T block = ($T) b", blockType, blockType);
187204
builder.addStatement("int positionCount = block.getPositionCount()");
188-
TypeName resultBuilderType = builderType(blockType(resultType));
205+
TypeName resultBuilderType = builderType(processFunction.resultDataType(true));
189206
builder.beginControlFlow(
190207
"try ($T builder = driverContext.blockFactory().$L(positionCount))",
191208
resultBuilderType,
@@ -196,19 +213,18 @@ private MethodSpec evalBlock() {
196213
builder.addStatement("BytesRef $N = new BytesRef()", scratchPadName);
197214
}
198215

199-
String appendMethod = appendMethod(resultType);
216+
String appendMethod = processFunction.appendMethod();
200217
builder.beginControlFlow("for (int p = 0; p < positionCount; p++)");
201218
{
202219
builder.addStatement("int valueCount = block.getValueCount(p)");
203220
builder.addStatement("int start = block.getFirstValueIndex(p)");
204221
builder.addStatement("int end = start + valueCount");
205222
builder.addStatement("boolean positionOpened = false");
206223
builder.addStatement("boolean valuesAppended = false");
207-
// builder.addStatement("builder.beginPositionEntry()");
208224
builder.beginControlFlow("for (int i = start; i < end; i++)");
209225
{
210226
catchingWarnExceptions(builder, () -> {
211-
builder.addStatement("$T value = $N", resultType, evalValueCall("block", "i", scratchPadName));
227+
builder.addStatement("$T value = $N", processFunction.returnType(), evalValueCall("block", "i", scratchPadName));
212228
builder.beginControlFlow("if (positionOpened == false && valueCount > 1)");
213229
{
214230
builder.addStatement("builder.beginPositionEntry()");
@@ -253,8 +269,8 @@ private String evalValueCall(String container, String index, String scratchPad)
253269

254270
private MethodSpec evalValue(boolean forVector) {
255271
MethodSpec.Builder builder = MethodSpec.methodBuilder("evalValue")
256-
.addModifiers(Modifier.PRIVATE, Modifier.STATIC)
257-
.returns(resultType);
272+
.addModifiers(Modifier.PRIVATE)
273+
.returns(processFunction.returnType());
258274

259275
if (forVector) {
260276
builder.addParameter(vectorType(argumentType), "container");
@@ -269,8 +285,17 @@ private MethodSpec evalValue(boolean forVector) {
269285
builder.addStatement("$T value = container.$N(index)", argumentType, getMethod(argumentType));
270286
}
271287

272-
builder.addStatement("return $T.$N(value)", declarationType, processFunction.getSimpleName());
273-
288+
StringBuilder pattern = new StringBuilder();
289+
List<Object> args = new ArrayList<>();
290+
pattern.append("return $T.$N(value");
291+
args.add(declarationType);
292+
args.add(processFunction.function.getSimpleName());
293+
for (int a = 1; a < processFunction.args.size(); a++) {
294+
pattern.append(", ");
295+
processFunction.args.get(a).buildInvocation(pattern, args, false /* block style parameter should be unused */);
296+
}
297+
pattern.append(")");
298+
builder.addStatement(pattern.toString(), args.toArray());
274299
return builder.build();
275300
}
276301

@@ -280,42 +305,11 @@ private TypeSpec factory() {
280305
builder.addModifiers(Modifier.PUBLIC, Modifier.STATIC);
281306

282307
builder.addField(SOURCE, "source", Modifier.PRIVATE, Modifier.FINAL);
283-
builder.addField(EXPRESSION_EVALUATOR_FACTORY, "field", Modifier.PRIVATE, Modifier.FINAL);
284-
285-
builder.addMethod(factoryCtor());
286-
builder.addMethod(factoryGet());
287-
builder.addMethod(factoryToString());
288-
return builder.build();
289-
}
290-
291-
private MethodSpec factoryCtor() {
292-
MethodSpec.Builder builder = MethodSpec.constructorBuilder().addModifiers(Modifier.PUBLIC);
293-
builder.addParameter(EXPRESSION_EVALUATOR_FACTORY, "field");
294-
builder.addParameter(SOURCE, "source");
295-
builder.addStatement("this.field = field");
296-
builder.addStatement("this.source = source");
297-
return builder.build();
298-
}
299-
300-
private MethodSpec factoryGet() {
301-
MethodSpec.Builder builder = MethodSpec.methodBuilder("get").addAnnotation(Override.class);
302-
builder.addModifiers(Modifier.PUBLIC);
303-
builder.addParameter(DRIVER_CONTEXT, "context");
304-
builder.returns(implementation);
305-
306-
List<String> args = new ArrayList<>();
307-
args.add("field.get(context)");
308-
args.add("source");
309-
args.add("context");
310-
builder.addStatement("return new $T($L)", implementation, args.stream().collect(Collectors.joining(", ")));
311-
return builder.build();
312-
}
308+
processFunction.args.forEach(a -> a.declareFactoryField(builder));
313309

314-
private MethodSpec factoryToString() {
315-
MethodSpec.Builder builder = MethodSpec.methodBuilder("toString").addAnnotation(Override.class);
316-
builder.addModifiers(Modifier.PUBLIC);
317-
builder.returns(String.class);
318-
builder.addStatement("return $S + field + $S", declarationType.getSimpleName() + extraName + "Evaluator[field=", "]");
310+
builder.addMethod(processFunction.factoryCtor());
311+
builder.addMethod(processFunction.factoryGet(implementation));
312+
builder.addMethod(processFunction.toStringMethod(implementation));
319313
return builder.build();
320314
}
321315
}

0 commit comments

Comments
 (0)