34
34
import org .apache .iceberg .catalog .Namespace ;
35
35
import org .apache .iceberg .catalog .TableIdentifier ;
36
36
import org .apache .iceberg .types .Types ;
37
- import org .apache .parquet .column .ColumnDescriptor ;
38
37
import org .apache .parquet .hadoop .metadata .ParquetMetadata ;
38
+ import org .apache .parquet .schema .LogicalTypeAnnotation ;
39
+ import org .apache .parquet .schema .MessageType ;
39
40
import org .jetbrains .annotations .Nullable ;
40
41
import org .junit .jupiter .api .AfterEach ;
41
42
import org .junit .jupiter .api .BeforeEach ;
53
54
import java .util .List ;
54
55
import java .util .stream .Collectors ;
55
56
import static io .deephaven .engine .testutil .TstUtils .assertTableEquals ;
57
+ import static org .apache .parquet .schema .LogicalTypeAnnotation .intType ;
58
+ import static org .apache .parquet .schema .PrimitiveType .PrimitiveTypeName .DOUBLE ;
59
+ import static org .apache .parquet .schema .PrimitiveType .PrimitiveTypeName .INT32 ;
60
+ import static org .apache .parquet .schema .Types .buildMessage ;
61
+ import static org .apache .parquet .schema .Types .optional ;
56
62
import static org .assertj .core .api .Assertions .assertThat ;
57
63
import static org .assertj .core .api .Assertions .failBecauseExceptionWasNotThrown ;
58
64
@@ -416,8 +422,12 @@ void testColumnRenameWhileWriting() throws URISyntaxException {
416
422
{
417
423
final List <String > parquetFiles = getAllParquetFilesFromDataFiles (tableIdentifier );
418
424
assertThat (parquetFiles ).hasSize (1 );
419
- verifyFieldIdsFromParquetFile (parquetFiles .get (0 ), originalDefinition .getColumnNames (),
420
- nameToFieldIdFromSchema );
425
+ final MessageType expectedSchema = buildMessage ()
426
+ .addFields (
427
+ optional (INT32 ).id (1 ).as (intType (32 , true )).named ("intCol" ),
428
+ optional (DOUBLE ).id (2 ).named ("doubleCol" ))
429
+ .named ("root" );
430
+ verifySchema (parquetFiles .get (0 ), expectedSchema );
421
431
}
422
432
423
433
final Table moreData = TableTools .emptyTable (5 )
@@ -442,10 +452,18 @@ void testColumnRenameWhileWriting() throws URISyntaxException {
442
452
443
453
final List <String > parquetFiles = getAllParquetFilesFromDataFiles (tableIdentifier );
444
454
assertThat (parquetFiles ).hasSize (2 );
445
- verifyFieldIdsFromParquetFile (parquetFiles .get (0 ), moreData .getDefinition ().getColumnNames (),
446
- newNameToFieldId );
447
- verifyFieldIdsFromParquetFile (parquetFiles .get (1 ), originalDefinition .getColumnNames (),
448
- nameToFieldIdFromSchema );
455
+ final MessageType expectedSchema0 = buildMessage ()
456
+ .addFields (
457
+ optional (INT32 ).id (1 ).as (intType (32 , true )).named ("newIntCol" ),
458
+ optional (DOUBLE ).id (2 ).named ("newDoubleCol" ))
459
+ .named ("root" );
460
+ final MessageType expectedSchema1 = buildMessage ()
461
+ .addFields (
462
+ optional (INT32 ).id (1 ).as (intType (32 , true )).named ("intCol" ),
463
+ optional (DOUBLE ).id (2 ).named ("doubleCol" ))
464
+ .named ("root" );
465
+ verifySchema (parquetFiles .get (0 ), expectedSchema0 );
466
+ verifySchema (parquetFiles .get (1 ), expectedSchema1 );
449
467
}
450
468
451
469
// TODO: This is failing because we don't map columns based on the column ID when reading. Uncomment this
@@ -455,31 +473,13 @@ void testColumnRenameWhileWriting() throws URISyntaxException {
455
473
// moreData.renameColumns("intCol = newIntCol", "doubleCol = newDoubleCol")), fromIceberg);
456
474
}
457
475
458
- /**
459
- * Verify that the schema of the parquet file read from the provided path has the provided column and corresponding
460
- * field IDs.
461
- */
462
- private void verifyFieldIdsFromParquetFile (
463
- final String path ,
464
- final List <String > columnNames ,
465
- final Map <String , Integer > nameToFieldId ) throws URISyntaxException {
476
+ private void verifySchema (String path , MessageType expectedSchema ) throws URISyntaxException {
466
477
final ParquetMetadata metadata =
467
478
new ParquetTableLocationKey (new URI (path ), 0 , null , ParquetInstructions .builder ()
468
479
.setSpecialInstructions (dataInstructions ())
469
480
.build ())
470
481
.getMetadata ();
471
- final List <ColumnDescriptor > columnsMetadata = metadata .getFileMetaData ().getSchema ().getColumns ();
472
-
473
- final int numColumns = columnNames .size ();
474
- for (int colIdx = 0 ; colIdx < numColumns ; colIdx ++) {
475
- final String columnName = columnNames .get (colIdx );
476
- final String columnNameFromParquetFile = columnsMetadata .get (colIdx ).getPath ()[0 ];
477
- assertThat (columnName ).isEqualTo (columnNameFromParquetFile );
478
-
479
- final int expectedFieldId = nameToFieldId .get (columnName );
480
- final int fieldIdFromParquetFile = columnsMetadata .get (colIdx ).getPrimitiveType ().getId ().intValue ();
481
- assertThat (fieldIdFromParquetFile ).isEqualTo (expectedFieldId );
482
- }
482
+ assertThat (metadata .getFileMetaData ().getSchema ()).isEqualTo (expectedSchema );
483
483
}
484
484
485
485
/**
0 commit comments