Skip to content

Commit b32641f

Browse files
authored
feat: Barrage Refactor Read/Write Chunk Factories (#6065)
This is an overhaul of type mapping from arrow to deephaven columns. We can now coerce from many kinds of types, enabling widening / truncating and conversion between types. For example uint64_t is now mapped to BigInteger by default, but can be explicitly mapped to `long.class` (may overflow). To set an explicit mapping set the schema's column attribute "deephaven:type" to the canonical name of the class. Fixes #58 (custom type serialization / deserialization). Fixes #936 (ColumnConversionModes is being replaced with easy to integrate custom serialization). Fixes #2984 (refactoring has good interface documentation). Fixes #3403 (by supporting a variety of mappings, these now must match client wiring). Fixes #5258 (snapshot/subscribe methods with default w2w options). Fixes #5453 (support other Timestamp arrow wire encodings). Fixes #5864 (support for uint64_t). Fixes #6114 (supports ObjectVector<Boolean> properly).
1 parent 4d57893 commit b32641f

File tree

154 files changed

+15562
-5555
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

154 files changed

+15562
-5555
lines changed

cpp-client/build.gradle

+1
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,7 @@ def testCppClient = Docker.registerDockerTask(project, 'testCppClient') {
114114
environmentVariable 'DH_HOST', deephavenDocker.containerName.get()
115115
environmentVariable 'DH_PORT', '10000'
116116
}
117+
waitTimeMinutes = 1
117118
containerDependencies.dependsOn = [deephavenDocker.healthyTask]
118119
containerDependencies.finalizedBy = deephavenDocker.endTask
119120
network = deephavenDocker.networkName.get()

cpp-client/deephaven/tests/src/time_unit_test.cc

+1-1
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ TEST_CASE("Uploaded Arrow Timestamp units get normalized to nanos at FillChunk t
7272
}
7373
}
7474

75-
TEST_CASE("Uploaded Arrow Time64 units get normalized to nanos at FillChunk time", "[timeunit][.hidden]") {
75+
TEST_CASE("Uploaded Arrow Time64 units get normalized to nanos at FillChunk time", "[timeunit]") {
7676
auto tm = TableMakerForTests::Create();
7777

7878
std::vector<std::optional<InternalLocalTime<arrow::TimeUnit::MICRO>>> lt_micro;

engine/api/src/main/java/io/deephaven/engine/table/ColumnDefinition.java

+9
Original file line numberDiff line numberDiff line change
@@ -400,6 +400,15 @@ public <Other> ColumnDefinition<Other> withDataType(@NotNull final Class<Other>
400400
: fromGenericType(name, newDataType, componentType, columnType);
401401
}
402402

403+
public <Other> ColumnDefinition<Other> withDataType(
404+
@NotNull final Class<Other> newDataType,
405+
@Nullable final Class<?> newComponentType) {
406+
// noinspection unchecked
407+
return dataType == newDataType && componentType == newComponentType
408+
? (ColumnDefinition<Other>) this
409+
: fromGenericType(name, newDataType, newComponentType, columnType);
410+
}
411+
403412
public ColumnDefinition<?> withName(@NotNull final String newName) {
404413
return newName.equals(name) ? this : new ColumnDefinition<>(newName, dataType, componentType, columnType);
405414
}

engine/api/src/main/java/io/deephaven/engine/table/Table.java

+6
Original file line numberDiff line numberDiff line change
@@ -223,6 +223,12 @@ public interface Table extends
223223
* Set this attribute to enable collection of barrage performance stats.
224224
*/
225225
String BARRAGE_PERFORMANCE_KEY_ATTRIBUTE = "BarragePerformanceTableKey";
226+
/**
227+
* Set an Apache Arrow POJO Schema to this attribute to control the column encoding used for barrage serialization.
228+
* <p>
229+
* See {@code org.apache.arrow.vector.types.pojo.Schema}.
230+
*/
231+
String BARRAGE_SCHEMA_ATTRIBUTE = "BarrageSchema";
226232

227233
// -----------------------------------------------------------------------------------------------------------------
228234
// ColumnSources for fetching data by row key

engine/chunk/src/main/java/io/deephaven/chunk/BooleanChunk.java

+7
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
// @formatter:off
88
package io.deephaven.chunk;
99

10+
import io.deephaven.util.QueryConstants;
1011
import io.deephaven.util.type.ArrayTypeUtils;
1112
import io.deephaven.chunk.attributes.Any;
1213

@@ -74,6 +75,12 @@ public final boolean get(int index) {
7475
return data[offset + index];
7576
}
7677

78+
// region isNull
79+
public final boolean isNull(int index) {
80+
return false;
81+
}
82+
// endregion isNull
83+
7784
@Override
7885
public BooleanChunk<ATTR> slice(int offset, int capacity) {
7986
ChunkHelpers.checkSliceArgs(size, offset, capacity);

engine/chunk/src/main/java/io/deephaven/chunk/ByteChunk.java

+7
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
// @formatter:off
88
package io.deephaven.chunk;
99

10+
import io.deephaven.util.QueryConstants;
1011
import io.deephaven.util.type.ArrayTypeUtils;
1112
import io.deephaven.chunk.attributes.Any;
1213

@@ -78,6 +79,12 @@ public final byte get(int index) {
7879
return data[offset + index];
7980
}
8081

82+
// region isNull
83+
public final boolean isNull(int index) {
84+
return data[offset + index] == QueryConstants.NULL_BYTE;
85+
}
86+
// endregion isNull
87+
8188
@Override
8289
public ByteChunk<ATTR> slice(int offset, int capacity) {
8390
ChunkHelpers.checkSliceArgs(size, offset, capacity);

engine/chunk/src/main/java/io/deephaven/chunk/CharChunk.java

+7
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
//
44
package io.deephaven.chunk;
55

6+
import io.deephaven.util.QueryConstants;
67
import io.deephaven.util.type.ArrayTypeUtils;
78
import io.deephaven.chunk.attributes.Any;
89

@@ -73,6 +74,12 @@ public final char get(int index) {
7374
return data[offset + index];
7475
}
7576

77+
// region isNull
78+
public final boolean isNull(int index) {
79+
return data[offset + index] == QueryConstants.NULL_CHAR;
80+
}
81+
// endregion isNull
82+
7683
@Override
7784
public CharChunk<ATTR> slice(int offset, int capacity) {
7885
ChunkHelpers.checkSliceArgs(size, offset, capacity);

engine/chunk/src/main/java/io/deephaven/chunk/DoubleChunk.java

+7
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
// @formatter:off
88
package io.deephaven.chunk;
99

10+
import io.deephaven.util.QueryConstants;
1011
import io.deephaven.util.type.ArrayTypeUtils;
1112
import io.deephaven.chunk.attributes.Any;
1213

@@ -77,6 +78,12 @@ public final double get(int index) {
7778
return data[offset + index];
7879
}
7980

81+
// region isNull
82+
public final boolean isNull(int index) {
83+
return data[offset + index] == QueryConstants.NULL_DOUBLE;
84+
}
85+
// endregion isNull
86+
8087
@Override
8188
public DoubleChunk<ATTR> slice(int offset, int capacity) {
8289
ChunkHelpers.checkSliceArgs(size, offset, capacity);

engine/chunk/src/main/java/io/deephaven/chunk/FloatChunk.java

+7
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
// @formatter:off
88
package io.deephaven.chunk;
99

10+
import io.deephaven.util.QueryConstants;
1011
import io.deephaven.util.type.ArrayTypeUtils;
1112
import io.deephaven.chunk.attributes.Any;
1213

@@ -77,6 +78,12 @@ public final float get(int index) {
7778
return data[offset + index];
7879
}
7980

81+
// region isNull
82+
public final boolean isNull(int index) {
83+
return data[offset + index] == QueryConstants.NULL_FLOAT;
84+
}
85+
// endregion isNull
86+
8087
@Override
8188
public FloatChunk<ATTR> slice(int offset, int capacity) {
8289
ChunkHelpers.checkSliceArgs(size, offset, capacity);

engine/chunk/src/main/java/io/deephaven/chunk/IntChunk.java

+7
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
// @formatter:off
88
package io.deephaven.chunk;
99

10+
import io.deephaven.util.QueryConstants;
1011
import io.deephaven.util.type.ArrayTypeUtils;
1112
import io.deephaven.chunk.attributes.Any;
1213

@@ -77,6 +78,12 @@ public final int get(int index) {
7778
return data[offset + index];
7879
}
7980

81+
// region isNull
82+
public final boolean isNull(int index) {
83+
return data[offset + index] == QueryConstants.NULL_INT;
84+
}
85+
// endregion isNull
86+
8087
@Override
8188
public IntChunk<ATTR> slice(int offset, int capacity) {
8289
ChunkHelpers.checkSliceArgs(size, offset, capacity);

engine/chunk/src/main/java/io/deephaven/chunk/LongChunk.java

+7
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
// @formatter:off
88
package io.deephaven.chunk;
99

10+
import io.deephaven.util.QueryConstants;
1011
import io.deephaven.util.type.ArrayTypeUtils;
1112
import io.deephaven.chunk.attributes.Any;
1213

@@ -77,6 +78,12 @@ public final long get(int index) {
7778
return data[offset + index];
7879
}
7980

81+
// region isNull
82+
public final boolean isNull(int index) {
83+
return data[offset + index] == QueryConstants.NULL_LONG;
84+
}
85+
// endregion isNull
86+
8087
@Override
8188
public LongChunk<ATTR> slice(int offset, int capacity) {
8289
ChunkHelpers.checkSliceArgs(size, offset, capacity);

engine/chunk/src/main/java/io/deephaven/chunk/ObjectChunk.java

+7
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
// @formatter:off
88
package io.deephaven.chunk;
99

10+
import io.deephaven.util.QueryConstants;
1011
import io.deephaven.util.type.ArrayTypeUtils;
1112
import io.deephaven.chunk.attributes.Any;
1213

@@ -77,6 +78,12 @@ public final T get(int index) {
7778
return data[offset + index];
7879
}
7980

81+
// region isNull
82+
public final boolean isNull(int index) {
83+
return data[offset + index] == null;
84+
}
85+
// endregion isNull
86+
8087
@Override
8188
public ObjectChunk<T, ATTR> slice(int offset, int capacity) {
8289
ChunkHelpers.checkSliceArgs(size, offset, capacity);

engine/chunk/src/main/java/io/deephaven/chunk/ResettableReadOnlyChunk.java

+2-1
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,8 @@
1010
* {@link Chunk} that may have its backing storage reset to a slice of that belonging to another {@link Chunk} or a
1111
* native array.
1212
*/
13-
public interface ResettableReadOnlyChunk<ATTR_BASE extends Any> extends ResettableChunk<ATTR_BASE>, PoolableChunk {
13+
public interface ResettableReadOnlyChunk<ATTR_BASE extends Any>
14+
extends ResettableChunk<ATTR_BASE>, PoolableChunk<ATTR_BASE> {
1415

1516
/**
1617
* Reset the data and bounds of this chunk to a range or sub-range of the specified {@link Chunk}.

engine/chunk/src/main/java/io/deephaven/chunk/ResettableWritableChunk.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
* {@link WritableChunk} or a native array.
1212
*/
1313
public interface ResettableWritableChunk<ATTR_BASE extends Any>
14-
extends ResettableChunk<ATTR_BASE>, WritableChunk<ATTR_BASE>, PoolableChunk {
14+
extends ResettableChunk<ATTR_BASE>, WritableChunk<ATTR_BASE>, PoolableChunk<ATTR_BASE> {
1515

1616
@Override
1717
<ATTR extends ATTR_BASE> WritableChunk<ATTR> resetFromChunk(WritableChunk<ATTR> other, int offset, int capacity);

engine/chunk/src/main/java/io/deephaven/chunk/ShortChunk.java

+7
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
// @formatter:off
88
package io.deephaven.chunk;
99

10+
import io.deephaven.util.QueryConstants;
1011
import io.deephaven.util.type.ArrayTypeUtils;
1112
import io.deephaven.chunk.attributes.Any;
1213

@@ -77,6 +78,12 @@ public final short get(int index) {
7778
return data[offset + index];
7879
}
7980

81+
// region isNull
82+
public final boolean isNull(int index) {
83+
return data[offset + index] == QueryConstants.NULL_SHORT;
84+
}
85+
// endregion isNull
86+
8087
@Override
8188
public ShortChunk<ATTR> slice(int offset, int capacity) {
8289
ChunkHelpers.checkSliceArgs(size, offset, capacity);

engine/chunk/src/main/java/io/deephaven/chunk/WritableChunk.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
*
1515
* @param <ATTR> Descriptive attribute that applies to the elements stored within this WritableChunk
1616
*/
17-
public interface WritableChunk<ATTR extends Any> extends Chunk<ATTR>, PoolableChunk {
17+
public interface WritableChunk<ATTR extends Any> extends Chunk<ATTR>, PoolableChunk<ATTR> {
1818
@Override
1919
WritableChunk<ATTR> slice(int offset, int capacity);
2020

engine/chunk/src/main/java/io/deephaven/chunk/sized/SizedChunk.java

+3-3
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ public WritableChunk<T> get() {
3737

3838
/**
3939
* Ensure the underlying chunk has a capacity of at least {@code capacity}.
40-
*
40+
* <p>
4141
* The data and size of the returned chunk are undefined.
4242
*
4343
* @param capacity the minimum capacity for the chunk.
@@ -56,9 +56,9 @@ public WritableChunk<T> ensureCapacity(int capacity) {
5656

5757
/**
5858
* Ensure the underlying chunk has a capacity of at least {@code capacity}.
59-
*
59+
* <p>
6060
* If the chunk has existing data, then it is copied to the new chunk.
61-
*
61+
* <p>
6262
* If the underlying chunk already exists, then the size of the chunk is the original size. If the chunk did not
6363
* exist, then the size of the returned chunk is zero.
6464
*

engine/chunk/src/main/java/io/deephaven/chunk/util/pools/PoolableChunk.java

+2-1
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,12 @@
44
package io.deephaven.chunk.util.pools;
55

66
import io.deephaven.chunk.Chunk;
7+
import io.deephaven.chunk.attributes.Any;
78
import io.deephaven.util.SafeCloseable;
89

910
/**
1011
* Marker interface for {@link Chunk} subclasses that can be kept with in a {@link ChunkPool}, and whose
1112
* {@link #close()} method will return them to the appropriate pool.
1213
*/
13-
public interface PoolableChunk extends SafeCloseable {
14+
public interface PoolableChunk<ATTR extends Any> extends Chunk<ATTR>, SafeCloseable {
1415
}

engine/table/src/main/java/io/deephaven/engine/table/impl/BaseTable.java

+9
Original file line numberDiff line numberDiff line change
@@ -360,6 +360,15 @@ public enum CopyAttributeOperation {
360360
CopyAttributeOperation.Flatten, // add flatten for now because web flattens all views
361361
CopyAttributeOperation.Preview));
362362

363+
tempMap.put(BARRAGE_SCHEMA_ATTRIBUTE, EnumSet.of(
364+
CopyAttributeOperation.Filter,
365+
CopyAttributeOperation.FirstBy,
366+
CopyAttributeOperation.Flatten,
367+
CopyAttributeOperation.LastBy,
368+
CopyAttributeOperation.PartitionBy,
369+
CopyAttributeOperation.Reverse,
370+
CopyAttributeOperation.Sort));
371+
363372
attributeToCopySet = Collections.unmodifiableMap(tempMap);
364373
}
365374

engine/table/src/main/java/io/deephaven/engine/table/impl/sources/ReinterpretUtils.java

+25
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
package io.deephaven.engine.table.impl.sources;
55

66
import io.deephaven.chunk.ChunkType;
7+
import io.deephaven.engine.table.ColumnDefinition;
78
import io.deephaven.engine.table.ColumnSource;
89
import io.deephaven.engine.table.WritableColumnSource;
910
import org.jetbrains.annotations.NotNull;
@@ -212,6 +213,27 @@ public static ColumnSource<?>[] maybeConvertToPrimitive(@NotNull final ColumnSou
212213
return result;
213214
}
214215

216+
/**
217+
* If {@code columnDefinition.getDataType()} or {@code columnDefinition.getComponentType} are something that we
218+
* prefer to handle as a primitive, do the appropriate conversion.
219+
*
220+
* @param columnDefinition The column definition to convert
221+
* @return if possible, {@code columnDefinition} converted to a primitive, otherewise {@code columnDefinition}
222+
*/
223+
@NotNull
224+
public static ColumnDefinition<?> maybeConvertToPrimitive(@NotNull final ColumnDefinition<?> columnDefinition) {
225+
final Class<?> dataType = ReinterpretUtils.maybeConvertToPrimitiveDataType(columnDefinition.getDataType());
226+
Class<?> componentType = columnDefinition.getComponentType();
227+
if (componentType != null) {
228+
componentType = ReinterpretUtils.maybeConvertToPrimitiveDataType(componentType);
229+
}
230+
if (columnDefinition.getDataType() == dataType
231+
&& columnDefinition.getComponentType() == componentType) {
232+
return columnDefinition;
233+
}
234+
return columnDefinition.withDataType(dataType, componentType);
235+
}
236+
215237
/**
216238
* If {@code source} is something that we prefer to handle as a primitive, do the appropriate conversion.
217239
*
@@ -265,6 +287,7 @@ public static ChunkType maybeConvertToWritablePrimitiveChunkType(@NotNull final
265287
}
266288
if (dataType == Instant.class) {
267289
// Note that storing ZonedDateTime as a primitive is lossy on the time zone.
290+
// TODO (https://github.com/deephaven/deephaven-core/issues/5241): Inconsistent handling of ZonedDateTime
268291
return ChunkType.Long;
269292
}
270293
return ChunkType.fromElementType(dataType);
@@ -283,6 +306,8 @@ public static Class<?> maybeConvertToPrimitiveDataType(@NotNull final Class<?> d
283306
return byte.class;
284307
}
285308
if (dataType == Instant.class || dataType == ZonedDateTime.class) {
309+
// Note: not all ZonedDateTime sources are convertible to long, so this doesn't match column source behavior
310+
// TODO (https://github.com/deephaven/deephaven-core/issues/5241): Inconsistent handling of ZonedDateTime
286311
return long.class;
287312
}
288313
return dataType;

engine/table/src/main/java/io/deephaven/engine/updategraph/impl/PeriodicUpdateGraph.java

+7-3
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,11 @@ public static Builder newBuilder(final String name) {
100100

101101
public static final String DEFAULT_TARGET_CYCLE_DURATION_MILLIS_PROP =
102102
"PeriodicUpdateGraph.targetCycleDurationMillis";
103+
104+
public static int getDefaultTargetCycleDurationMillis() {
105+
return Configuration.getInstance().getIntegerWithDefault(DEFAULT_TARGET_CYCLE_DURATION_MILLIS_PROP, 1000);
106+
}
107+
103108
private final long defaultTargetCycleDurationMillis;
104109
private volatile long targetCycleDurationMillis;
105110
private final ThreadInitializationFactory threadInitializationFactory;
@@ -252,7 +257,7 @@ public boolean isCycleOnBudget(long cycleTimeNanos) {
252257
* Resets the run cycle time to the default target configured via the {@link Builder} setting.
253258
*
254259
* @implNote If the {@link Builder#targetCycleDurationMillis(long)} property is not set, this value defaults to
255-
* {@link Builder#DEFAULT_TARGET_CYCLE_DURATION_MILLIS_PROP} which defaults to 1000ms.
260+
* {@link #DEFAULT_TARGET_CYCLE_DURATION_MILLIS_PROP} which defaults to 1000ms.
256261
*/
257262
@SuppressWarnings("unused")
258263
public void resetTargetCycleDuration() {
@@ -1166,8 +1171,7 @@ public static PeriodicUpdateGraph getInstance(final String name) {
11661171
public static final class Builder {
11671172
private final boolean allowUnitTestMode =
11681173
Configuration.getInstance().getBooleanWithDefault(ALLOW_UNIT_TEST_MODE_PROP, false);
1169-
private long targetCycleDurationMillis =
1170-
Configuration.getInstance().getIntegerWithDefault(DEFAULT_TARGET_CYCLE_DURATION_MILLIS_PROP, 1000);
1174+
private long targetCycleDurationMillis = getDefaultTargetCycleDurationMillis();
11711175
private long minimumCycleDurationToLogNanos = DEFAULT_MINIMUM_CYCLE_DURATION_TO_LOG_NANOSECONDS;
11721176

11731177
private String name;

extensions/barrage/BarrageTypeMapping.md

Whitespace-only changes.

0 commit comments

Comments
 (0)