Skip to content

Commit d59e267

Browse files
Move Text class to libs/xcontent (#128780) (#128926)
This PR is a precursor to #126492. It does three things: 1. Move org.elasticsearch.common.text.Text from :server to org.elasticsearch.xcontent.Text in :libs:x-content. 2. Refactor the Text class to use a new EncodedBytes record instead of the elasticsearch BytesReference. 3. Add the XContentString interface, with the Text class implementing that interface. These changes were originally implemented in #127666 and #128316, however they were reverted in #128484 due to problems caused by the mutable nature of java ByteBuffers. This is resolved by instead using a new immutable EncodedBytes record. (cherry picked from commit de40ac4) # Conflicts: # server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/DefaultHighlighter.java # server/src/test/java/org/elasticsearch/common/xcontent/BaseXContentTestCase.java # server/src/test/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightFieldTests.java # test/framework/src/main/java/org/elasticsearch/search/SearchResponseUtils.java
1 parent 7e0aacf commit d59e267

File tree

42 files changed

+374
-79
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

42 files changed

+374
-79
lines changed

server/src/main/java/org/elasticsearch/common/text/Text.java renamed to libs/x-content/src/main/java/org/elasticsearch/xcontent/Text.java

Lines changed: 48 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -6,22 +6,17 @@
66
* your election, the "Elastic License 2.0", the "GNU Affero General Public
77
* License v3.0 only", or the "Server Side Public License, v 1".
88
*/
9-
package org.elasticsearch.common.text;
10-
11-
import org.apache.lucene.util.BytesRef;
12-
import org.elasticsearch.common.bytes.BytesArray;
13-
import org.elasticsearch.common.bytes.BytesReference;
14-
import org.elasticsearch.xcontent.ToXContentFragment;
15-
import org.elasticsearch.xcontent.XContentBuilder;
9+
package org.elasticsearch.xcontent;
1610

1711
import java.io.IOException;
12+
import java.nio.ByteBuffer;
1813
import java.nio.charset.StandardCharsets;
1914

2015
/**
21-
* Both {@link String} and {@link BytesReference} representation of the text. Starts with one of those, and if
22-
* the other is requests, caches the other one in a local reference so no additional conversion will be needed.
16+
* Both {@link String} and {@link UTF8Bytes} representation of the text. Starts with one of those, and if
17+
* the other is requested, caches the other one in a local reference so no additional conversion will be needed.
2318
*/
24-
public final class Text implements Comparable<Text>, ToXContentFragment {
19+
public final class Text implements XContentString, Comparable<Text>, ToXContentFragment {
2520

2621
public static final Text[] EMPTY_ARRAY = new Text[0];
2722

@@ -36,31 +31,46 @@ public static Text[] convertFromStringArray(String[] strings) {
3631
return texts;
3732
}
3833

39-
private BytesReference bytes;
40-
private String text;
34+
private UTF8Bytes bytes;
35+
private String string;
4136
private int hash;
37+
private int stringLength = -1;
38+
39+
/**
40+
* Construct a Text from encoded UTF8Bytes. Since no string length is specified, {@link #stringLength()}
41+
* will perform a string conversion to measure the string length.
42+
*/
43+
public Text(UTF8Bytes bytes) {
44+
this.bytes = bytes;
45+
}
4246

43-
public Text(BytesReference bytes) {
47+
/**
48+
* Construct a Text from encoded UTF8Bytes and an explicit string length. Used to avoid string conversion
49+
* in {@link #stringLength()}. The provided stringLength should match the value that would
50+
* be calculated by {@link Text#Text(UTF8Bytes)}.
51+
*/
52+
public Text(UTF8Bytes bytes, int stringLength) {
4453
this.bytes = bytes;
54+
this.stringLength = stringLength;
4555
}
4656

47-
public Text(String text) {
48-
this.text = text;
57+
public Text(String string) {
58+
this.string = string;
4959
}
5060

5161
/**
52-
* Whether a {@link BytesReference} view of the data is already materialized.
62+
* Whether an {@link UTF8Bytes} view of the data is already materialized.
5363
*/
5464
public boolean hasBytes() {
5565
return bytes != null;
5666
}
5767

58-
/**
59-
* Returns a {@link BytesReference} view of the data.
60-
*/
61-
public BytesReference bytes() {
68+
@Override
69+
public UTF8Bytes bytes() {
6270
if (bytes == null) {
63-
bytes = new BytesArray(text.getBytes(StandardCharsets.UTF_8));
71+
var byteBuff = StandardCharsets.UTF_8.encode(string);
72+
assert byteBuff.hasArray();
73+
bytes = new UTF8Bytes(byteBuff.array(), byteBuff.arrayOffset() + byteBuff.position(), byteBuff.remaining());
6474
}
6575
return bytes;
6676
}
@@ -69,14 +79,25 @@ public BytesReference bytes() {
6979
* Whether a {@link String} view of the data is already materialized.
7080
*/
7181
public boolean hasString() {
72-
return text != null;
82+
return string != null;
7383
}
7484

75-
/**
76-
* Returns a {@link String} view of the data.
77-
*/
85+
@Override
7886
public String string() {
79-
return text == null ? bytes.utf8ToString() : text;
87+
if (string == null) {
88+
var byteBuff = ByteBuffer.wrap(bytes.bytes(), bytes.offset(), bytes.length());
89+
string = StandardCharsets.UTF_8.decode(byteBuff).toString();
90+
assert (stringLength < 0) || (string.length() == stringLength);
91+
}
92+
return string;
93+
}
94+
95+
@Override
96+
public int stringLength() {
97+
if (stringLength < 0) {
98+
stringLength = string().length();
99+
}
100+
return stringLength;
80101
}
81102

82103
@Override
@@ -115,8 +136,7 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
115136
} else {
116137
// TODO: TextBytesOptimization we can use a buffer here to convert it? maybe add a
117138
// request to jackson to support InputStream as well?
118-
BytesRef br = this.bytes().toBytesRef();
119-
return builder.utf8Value(br.bytes, br.offset, br.length);
139+
return builder.utf8Value(bytes.bytes(), bytes.offset(), bytes.length());
120140
}
121141
}
122142
}
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the "Elastic License
4+
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
5+
* Public License v 1"; you may not use this file except in compliance with, at
6+
* your election, the "Elastic License 2.0", the "GNU Affero General Public
7+
* License v3.0 only", or the "Server Side Public License, v 1".
8+
*/
9+
10+
package org.elasticsearch.xcontent;
11+
12+
import java.nio.ByteBuffer;
13+
14+
public interface XContentString {
15+
record UTF8Bytes(byte[] bytes, int offset, int length) implements Comparable<UTF8Bytes> {
16+
public UTF8Bytes(byte[] bytes) {
17+
this(bytes, 0, bytes.length);
18+
}
19+
20+
@Override
21+
public int compareTo(UTF8Bytes o) {
22+
if (this.bytes == o.bytes && this.offset == o.offset && this.length == o.length) {
23+
return 0;
24+
}
25+
26+
return ByteBuffer.wrap(bytes, offset, length).compareTo(ByteBuffer.wrap(o.bytes, o.offset, o.length));
27+
}
28+
29+
@Override
30+
public boolean equals(Object o) {
31+
if (this == o) {
32+
return true;
33+
}
34+
if (o == null || getClass() != o.getClass()) {
35+
return false;
36+
}
37+
38+
return this.compareTo((UTF8Bytes) o) == 0;
39+
}
40+
41+
@Override
42+
public int hashCode() {
43+
return ByteBuffer.wrap(bytes, offset, length).hashCode();
44+
}
45+
}
46+
47+
/**
48+
* Returns a {@link String} view of the data.
49+
*/
50+
String string();
51+
52+
/**
53+
* Returns an encoded {@link UTF8Bytes} view of the data.
54+
*/
55+
UTF8Bytes bytes();
56+
57+
/**
58+
* Returns the number of characters in the represented string.
59+
*/
60+
int stringLength();
61+
}
Lines changed: 190 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,190 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the "Elastic License
4+
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
5+
* Public License v 1"; you may not use this file except in compliance with, at
6+
* your election, the "Elastic License 2.0", the "GNU Affero General Public
7+
* License v3.0 only", or the "Server Side Public License, v 1".
8+
*/
9+
10+
package org.elasticsearch.xcontent;
11+
12+
import org.elasticsearch.test.ESTestCase;
13+
14+
import java.nio.charset.StandardCharsets;
15+
16+
public class TextTests extends ESTestCase {
17+
public void testConvertToBytes() {
18+
String value = randomUnicodeOfLength(randomInt(128));
19+
byte[] encodedArr = value.getBytes(StandardCharsets.UTF_8);
20+
var encoded = new XContentString.UTF8Bytes(encodedArr);
21+
22+
var text = new Text(value);
23+
assertTrue(text.hasString());
24+
assertFalse(text.hasBytes());
25+
26+
assertEquals(value, text.string());
27+
assertEquals(encoded, text.bytes());
28+
29+
assertTrue(text.hasString());
30+
assertTrue(text.hasBytes());
31+
32+
// Ensure the conversion didn't mess up subsequent calls
33+
assertEquals(value, text.string());
34+
assertEquals(encoded, text.bytes());
35+
36+
assertSame(text.bytes(), text.bytes());
37+
}
38+
39+
public void testConvertToString() {
40+
String value = randomUnicodeOfLength(randomInt(128));
41+
byte[] encodedArr = value.getBytes(StandardCharsets.UTF_8);
42+
var encoded = new XContentString.UTF8Bytes(encodedArr);
43+
44+
var text = new Text(encoded);
45+
assertFalse(text.hasString());
46+
assertTrue(text.hasBytes());
47+
48+
assertEquals(value, text.string());
49+
assertEquals(encoded, text.bytes());
50+
51+
assertTrue(text.hasString());
52+
assertTrue(text.hasBytes());
53+
54+
// Ensure the conversion didn't mess up subsequent calls
55+
assertEquals(value, text.string());
56+
assertEquals(encoded, text.bytes());
57+
58+
assertSame(encoded, text.bytes());
59+
}
60+
61+
public void testStringLength() {
62+
int stringLength = randomInt(128);
63+
String value = randomUnicodeOfLength(stringLength);
64+
byte[] encodedArr = value.getBytes(StandardCharsets.UTF_8);
65+
var encoded = new XContentString.UTF8Bytes(encodedArr);
66+
67+
{
68+
var text = new Text(value);
69+
assertTrue(text.hasString());
70+
assertEquals(stringLength, text.stringLength());
71+
}
72+
73+
{
74+
var text = new Text(encoded);
75+
assertFalse(text.hasString());
76+
assertEquals(stringLength, text.stringLength());
77+
assertTrue(text.hasString());
78+
}
79+
80+
{
81+
var text = new Text(encoded, stringLength);
82+
assertFalse(text.hasString());
83+
assertEquals(stringLength, text.stringLength());
84+
assertFalse(text.hasString());
85+
}
86+
}
87+
88+
public void testEquals() {
89+
String value = randomUnicodeOfLength(randomInt(128));
90+
byte[] encodedArr = value.getBytes(StandardCharsets.UTF_8);
91+
var encoded = new XContentString.UTF8Bytes(encodedArr);
92+
93+
{
94+
var text1 = new Text(value);
95+
var text2 = new Text(value);
96+
assertTrue(text1.equals(text2));
97+
}
98+
99+
{
100+
var text1 = new Text(value);
101+
var text2 = new Text(encoded);
102+
assertTrue(text1.equals(text2));
103+
}
104+
105+
{
106+
var text1 = new Text(encoded);
107+
var text2 = new Text(encoded);
108+
assertTrue(text1.equals(text2));
109+
}
110+
}
111+
112+
public void testCompareTo() {
113+
String value1 = randomUnicodeOfLength(randomInt(128));
114+
byte[] encodedArr1 = value1.getBytes(StandardCharsets.UTF_8);
115+
var encoded1 = new XContentString.UTF8Bytes(encodedArr1);
116+
117+
{
118+
var text1 = new Text(value1);
119+
var text2 = new Text(value1);
120+
assertEquals(0, text1.compareTo(text2));
121+
}
122+
123+
{
124+
var text1 = new Text(value1);
125+
var text2 = new Text(encoded1);
126+
assertEquals(0, text1.compareTo(text2));
127+
}
128+
129+
{
130+
var text1 = new Text(encoded1);
131+
var text2 = new Text(encoded1);
132+
assertEquals(0, text1.compareTo(text2));
133+
}
134+
135+
String value2 = randomUnicodeOfLength(randomInt(128));
136+
byte[] encodedArr2 = value2.getBytes(StandardCharsets.UTF_8);
137+
var encoded2 = new XContentString.UTF8Bytes(encodedArr2);
138+
139+
int compSign = (int) Math.signum(encoded1.compareTo(encoded2));
140+
141+
{
142+
var text1 = new Text(value1);
143+
var text2 = new Text(value2);
144+
assertEquals(compSign, (int) Math.signum(text1.compareTo(text2)));
145+
}
146+
147+
{
148+
var text1 = new Text(value1);
149+
var text2 = new Text(encoded2);
150+
assertEquals(compSign, (int) Math.signum(text1.compareTo(text2)));
151+
}
152+
153+
{
154+
var text1 = new Text(encoded1);
155+
var text2 = new Text(value2);
156+
assertEquals(compSign, (int) Math.signum(text1.compareTo(text2)));
157+
}
158+
159+
{
160+
var text1 = new Text(encoded1);
161+
var text2 = new Text(encoded2);
162+
assertEquals(compSign, (int) Math.signum(text1.compareTo(text2)));
163+
}
164+
}
165+
166+
public void testRandomized() {
167+
int stringLength = randomInt(128);
168+
String value = randomUnicodeOfLength(stringLength);
169+
byte[] encodedArr = value.getBytes(StandardCharsets.UTF_8);
170+
var encoded = new XContentString.UTF8Bytes(encodedArr);
171+
172+
Text text = switch (randomInt(2)) {
173+
case 0 -> new Text(value);
174+
case 1 -> new Text(encoded);
175+
default -> new Text(encoded, stringLength);
176+
};
177+
178+
for (int i = 0; i < 20; i++) {
179+
switch (randomInt(5)) {
180+
case 0 -> assertEquals(encoded, text.bytes());
181+
case 1 -> assertSame(text.bytes(), text.bytes());
182+
case 2 -> assertEquals(value, text.string());
183+
case 3 -> assertEquals(value, text.toString());
184+
case 4 -> assertEquals(stringLength, text.stringLength());
185+
case 5 -> assertEquals(new Text(value), text);
186+
}
187+
}
188+
}
189+
190+
}

plugins/examples/custom-suggester/src/main/java/org/elasticsearch/example/customsuggester/CustomSuggester.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111

1212
import org.apache.lucene.search.IndexSearcher;
1313
import org.apache.lucene.util.CharsRefBuilder;
14-
import org.elasticsearch.common.text.Text;
14+
import org.elasticsearch.xcontent.Text;
1515
import org.elasticsearch.search.suggest.Suggest;
1616
import org.elasticsearch.search.suggest.Suggester;
1717

plugins/examples/custom-suggester/src/main/java/org/elasticsearch/example/customsuggester/CustomSuggestion.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111

1212
import org.elasticsearch.common.io.stream.StreamInput;
1313
import org.elasticsearch.common.io.stream.StreamOutput;
14-
import org.elasticsearch.common.text.Text;
14+
import org.elasticsearch.xcontent.Text;
1515
import org.elasticsearch.search.suggest.Suggest;
1616
import org.elasticsearch.xcontent.ParseField;
1717
import org.elasticsearch.xcontent.XContentBuilder;

0 commit comments

Comments
 (0)