|
10 | 10 | import io.deephaven.base.FileUtils;
|
11 | 11 | import io.deephaven.base.verify.Assert;
|
12 | 12 | import io.deephaven.engine.context.ExecutionContext;
|
| 13 | +import io.deephaven.engine.liveness.LivenessScopeStack; |
13 | 14 | import io.deephaven.engine.primitive.function.ByteConsumer;
|
14 | 15 | import io.deephaven.engine.primitive.function.CharConsumer;
|
15 | 16 | import io.deephaven.engine.primitive.function.FloatConsumer;
|
|
58 | 59 | import io.deephaven.test.types.OutOfBandTest;
|
59 | 60 | import io.deephaven.time.DateTimeUtils;
|
60 | 61 | import io.deephaven.util.QueryConstants;
|
| 62 | +import io.deephaven.util.SafeCloseable; |
61 | 63 | import io.deephaven.util.codec.SimpleByteArrayCodec;
|
62 | 64 | import io.deephaven.util.compare.DoubleComparisons;
|
63 | 65 | import io.deephaven.util.compare.FloatComparisons;
|
|
88 | 90 | import java.math.BigInteger;
|
89 | 91 | import java.net.URI;
|
90 | 92 | import java.nio.file.Files;
|
| 93 | +import java.nio.file.Path; |
91 | 94 | import java.nio.file.StandardCopyOption;
|
92 | 95 | import java.time.Instant;
|
93 | 96 | import java.time.LocalDate;
|
@@ -337,6 +340,102 @@ public void vectorParquetFormat() {
|
337 | 340 | groupedTable("largeAggParquet", LARGE_TABLE_SIZE, false);
|
338 | 341 | }
|
339 | 342 |
|
| 343 | + @Test |
| 344 | + public void indexRetentionThroughGC() { |
| 345 | + final String destPath = Path.of(rootFile.getPath(), "ParquetTest_indexRetention_test").toString(); |
| 346 | + final int tableSize = 10_000; |
| 347 | + final Table testTable = TableTools.emptyTable(tableSize).update( |
| 348 | + "symbol = randomInt(0,4)", |
| 349 | + "price = randomInt(0,10000) * 0.01", |
| 350 | + "str_id = `str_` + String.format(`%08d`, randomInt(0,1_000_000))", |
| 351 | + "indexed_val = ii % 10_000"); |
| 352 | + final ParquetInstructions writeInstructions = ParquetInstructions.builder() |
| 353 | + .setGenerateMetadataFiles(true) |
| 354 | + .addIndexColumns("indexed_val") |
| 355 | + .build(); |
| 356 | + final PartitionedTable partitionedTable = testTable.partitionBy("symbol"); |
| 357 | + ParquetTools.writeKeyValuePartitionedTable(partitionedTable, destPath, writeInstructions); |
| 358 | + final Table child; |
| 359 | + |
| 360 | + // We don't need this liveness scope for liveness management, but rather to opt out of the enclosing scope's |
| 361 | + // enforceStrongReachability |
| 362 | + try (final SafeCloseable ignored = LivenessScopeStack.open()) { |
| 363 | + // Read from disk and validate the indexes through GC. |
| 364 | + Table parent = ParquetTools.readTable(destPath); |
| 365 | + child = parent.update("new_val = indexed_val + 1") |
| 366 | + .update("new_val = new_val + 1") |
| 367 | + .update("new_val = new_val + 1") |
| 368 | + .update("new_val = new_val + 1"); |
| 369 | + |
| 370 | + // These indexes will survive GC because the parent table is holding strong references. |
| 371 | + System.gc(); |
| 372 | + |
| 373 | + // The parent table should have the indexes. |
| 374 | + Assert.eqTrue(DataIndexer.hasDataIndex(parent, "symbol"), "hasDataIndex -> symbol"); |
| 375 | + Assert.eqTrue(DataIndexer.hasDataIndex(parent, "indexed_val"), "hasDataIndex -> indexed_val"); |
| 376 | + |
| 377 | + // The child table should have the indexes while the parent is retained. |
| 378 | + Assert.eqTrue(DataIndexer.hasDataIndex(child, "symbol"), "hasDataIndex -> symbol"); |
| 379 | + Assert.eqTrue(DataIndexer.hasDataIndex(child, "indexed_val"), "hasDataIndex -> indexed_val"); |
| 380 | + |
| 381 | + // Force the parent to null to allow GC to collect it. |
| 382 | + parent = null; |
| 383 | + } |
| 384 | + |
| 385 | + // After a GC, the child table should still have access to the indexes. |
| 386 | + System.gc(); |
| 387 | + Assert.eqTrue(DataIndexer.hasDataIndex(child, "symbol"), "hasDataIndex -> symbol"); |
| 388 | + Assert.eqTrue(DataIndexer.hasDataIndex(child, "indexed_val"), "hasDataIndex -> indexed_val"); |
| 389 | + } |
| 390 | + |
| 391 | + @Test |
| 392 | + public void remappedIndexRetentionThroughGC() { |
| 393 | + final String destPath = |
| 394 | + Path.of(rootFile.getPath(), "ParquetTest_remappedIndexRetention_test.parquet").toString(); |
| 395 | + final int tableSize = 10_000; |
| 396 | + final Table testTable = TableTools.emptyTable(tableSize).update( |
| 397 | + "symbol = randomInt(0,4)", |
| 398 | + "price = randomInt(0,10000) * 0.01", |
| 399 | + "str_id = `str_` + String.format(`%08d`, randomInt(0,1_000_000))", |
| 400 | + "indexed_val = ii % 10_000"); |
| 401 | + final ParquetInstructions writeInstructions = ParquetInstructions.builder() |
| 402 | + .setGenerateMetadataFiles(true) |
| 403 | + .addIndexColumns("symbol") |
| 404 | + .addIndexColumns("indexed_val") |
| 405 | + .build(); |
| 406 | + ParquetTools.writeTable(testTable, destPath, writeInstructions); |
| 407 | + final Table child; |
| 408 | + |
| 409 | + // We don't need this liveness scope for liveness management, but rather to opt out of the enclosing scope's |
| 410 | + // enforceStrongReachability |
| 411 | + try (final SafeCloseable ignored = LivenessScopeStack.open()) { |
| 412 | + // Read from disk and validate the indexes through GC. |
| 413 | + Table parent = ParquetTools.readTable(destPath); |
| 414 | + |
| 415 | + // select() produces in-memory column sources, triggering the remapping of the indexes. |
| 416 | + child = parent.select(); |
| 417 | + |
| 418 | + // These indexes will survive GC because the parent table is holding strong references. |
| 419 | + System.gc(); |
| 420 | + |
| 421 | + // The parent table should have the indexes. |
| 422 | + Assert.eqTrue(DataIndexer.hasDataIndex(parent, "symbol"), "hasDataIndex -> symbol"); |
| 423 | + Assert.eqTrue(DataIndexer.hasDataIndex(parent, "indexed_val"), "hasDataIndex -> indexed_val"); |
| 424 | + |
| 425 | + // The child table should have the indexes while the parent is retained. |
| 426 | + Assert.eqTrue(DataIndexer.hasDataIndex(child, "symbol"), "hasDataIndex -> symbol"); |
| 427 | + Assert.eqTrue(DataIndexer.hasDataIndex(child, "indexed_val"), "hasDataIndex -> indexed_val"); |
| 428 | + |
| 429 | + // Force the parent to null to allow GC to collect it. |
| 430 | + parent = null; |
| 431 | + } |
| 432 | + |
| 433 | + // After a GC, the child table should still have access to the indexes. |
| 434 | + System.gc(); |
| 435 | + Assert.eqTrue(DataIndexer.hasDataIndex(child, "symbol"), "hasDataIndex -> symbol"); |
| 436 | + Assert.eqTrue(DataIndexer.hasDataIndex(child, "indexed_val"), "hasDataIndex -> indexed_val"); |
| 437 | + } |
| 438 | + |
340 | 439 | @Test
|
341 | 440 | public void indexByLongKey() {
|
342 | 441 | final TableDefinition definition = TableDefinition.of(
|
|
0 commit comments