48
48
import org .apache .iceberg .types .Types ;
49
49
import org .apache .parquet .hadoop .metadata .ParquetMetadata ;
50
50
import org .apache .parquet .schema .MessageType ;
51
+ import org .jetbrains .annotations .NotNull ;
51
52
import org .jetbrains .annotations .Nullable ;
52
53
import org .junit .jupiter .api .AfterEach ;
53
54
import org .junit .jupiter .api .BeforeEach ;
@@ -1061,6 +1062,15 @@ private void verifySortOrder(
1061
1062
final IcebergTableAdapter tableAdapter ,
1062
1063
final TableIdentifier tableIdentifier ,
1063
1064
final List <List <SortColumn >> expectedSortOrders ) {
1065
+ verifySortOrder (tableAdapter , tableIdentifier , expectedSortOrders ,
1066
+ ParquetInstructions .EMPTY .withTableDefinition (tableAdapter .definition ()));
1067
+ }
1068
+
1069
+ private void verifySortOrder (
1070
+ @ NotNull final IcebergTableAdapter tableAdapter ,
1071
+ @ NotNull final TableIdentifier tableIdentifier ,
1072
+ @ NotNull final List <List <SortColumn >> expectedSortOrders ,
1073
+ @ NotNull final ParquetInstructions readInstructions ) {
1064
1074
final org .apache .iceberg .Table icebergTable = tableAdapter .icebergTable ();
1065
1075
final String uriScheme = locationUri (icebergTable ).getScheme ();
1066
1076
final SeekableChannelsProvider seekableChannelsProvider =
@@ -1078,9 +1088,9 @@ private void verifySortOrder(
1078
1088
StandaloneTableKey .getInstance (),
1079
1089
new IcebergTableParquetLocationKey (
1080
1090
null , null , tableIdentifier , manifestFile , dataFile ,
1081
- dataFileUri (icebergTable , dataFile ), 0 , Map .of (), ParquetInstructions . EMPTY ,
1091
+ dataFileUri (icebergTable , dataFile ), 0 , Map .of (), readInstructions ,
1082
1092
seekableChannelsProvider ),
1083
- ParquetInstructions . EMPTY );
1093
+ readInstructions );
1084
1094
actualSortOrders .add (tableLocation .getSortedColumns ());
1085
1095
}
1086
1096
}
@@ -1347,6 +1357,112 @@ void testFailIfSortOrderUnmapped() {
1347
1357
assertTableEquals (source , fromIceberg );
1348
1358
}
1349
1359
1360
+ @ Test
1361
+ void testSortOrderWithColumnRename () {
1362
+ final Table source = TableTools .newTable (
1363
+ intCol ("intCol" , 15 , 0 , 32 , 33 , 19 ),
1364
+ doubleCol ("doubleCol" , 10.5 , 2.5 , 3.5 , 40.5 , 0.5 ),
1365
+ longCol ("longCol" , 20L , 50L , 0L , 10L , 5L ));
1366
+ final TableIdentifier tableIdentifier = TableIdentifier .parse ("MyNamespace.MyTable" );
1367
+ final IcebergTableAdapter tableAdapter = catalogAdapter .createTable (tableIdentifier , source .getDefinition ());
1368
+
1369
+ // Update the default sort order of the underlying iceberg table
1370
+ final org .apache .iceberg .Table icebergTable = tableAdapter .icebergTable ();
1371
+ icebergTable .replaceSortOrder ().asc ("intCol" ).desc ("doubleCol" ).commit ();
1372
+
1373
+ // Append data to the table
1374
+ final IcebergTableWriter tableWriterWithSorting = tableAdapter .tableWriter (writerOptionsBuilder ()
1375
+ .tableDefinition (source .getDefinition ())
1376
+ .build ());
1377
+ tableWriterWithSorting .append (IcebergWriteInstructions .builder ()
1378
+ .addTables (source )
1379
+ .build ());
1380
+
1381
+ // Now read a table with a column rename
1382
+ final IcebergReadInstructions readInstructions = IcebergReadInstructions .builder ()
1383
+ .putColumnRenames ("intCol" , "renamedIntCol" )
1384
+ .build ();
1385
+ final Table fromIceberg = tableAdapter .table (readInstructions );
1386
+ final Table expected = source .renameColumns ("renamedIntCol = intCol" )
1387
+ .sort (List .of (SortColumn .asc (ColumnName .of ("renamedIntCol" )),
1388
+ SortColumn .desc (ColumnName .of ("doubleCol" ))));
1389
+ assertTableEquals (expected , fromIceberg );
1390
+
1391
+ // Verify that the sort order is still applied
1392
+ final ParquetInstructions parquetInstructions = ParquetInstructions .builder ()
1393
+ .addColumnNameMapping ("intCol" , "renamedIntCol" )
1394
+ .setTableDefinition (expected .getDefinition ())
1395
+ .build ();
1396
+ verifySortOrder (tableAdapter , tableIdentifier , List .of (
1397
+ List .of (SortColumn .asc (ColumnName .of ("renamedIntCol" )), SortColumn .desc (ColumnName .of ("doubleCol" )))),
1398
+ parquetInstructions );
1399
+ }
1400
+
1401
+ @ Test
1402
+ void testSortOrderWithTableDefinition () {
1403
+ final Table source = TableTools .newTable (
1404
+ intCol ("intCol" , 15 , 0 , 32 , 33 , 19 ),
1405
+ doubleCol ("doubleCol" , 10.5 , 2.5 , 3.5 , 40.5 , 0.5 ),
1406
+ longCol ("longCol" , 20L , 50L , 0L , 10L , 5L ));
1407
+ final TableIdentifier tableIdentifier = TableIdentifier .parse ("MyNamespace.MyTable" );
1408
+ final IcebergTableAdapter tableAdapter = catalogAdapter .createTable (tableIdentifier , source .getDefinition ());
1409
+
1410
+ // Update the default sort order of the underlying iceberg table
1411
+ final org .apache .iceberg .Table icebergTable = tableAdapter .icebergTable ();
1412
+ icebergTable .replaceSortOrder ().asc ("intCol" ).desc ("doubleCol" ).commit ();
1413
+
1414
+ // Append data to the table
1415
+ final IcebergTableWriter tableWriterWithSorting = tableAdapter .tableWriter (writerOptionsBuilder ()
1416
+ .tableDefinition (source .getDefinition ())
1417
+ .build ());
1418
+ tableWriterWithSorting .append (IcebergWriteInstructions .builder ()
1419
+ .addTables (source )
1420
+ .build ());
1421
+
1422
+ {
1423
+ // Now read a table with a different table definition skipping the "doubleCol"
1424
+ final TableDefinition tableDefinition = TableDefinition .of (
1425
+ ColumnDefinition .ofInt ("intCol" ),
1426
+ ColumnDefinition .ofLong ("longCol" ));
1427
+ final IcebergReadInstructions readInstructions = IcebergReadInstructions .builder ()
1428
+ .tableDefinition (tableDefinition )
1429
+ .build ();
1430
+ final Table fromIceberg = tableAdapter .table (readInstructions );
1431
+ final Table expected = source .dropColumns ("doubleCol" )
1432
+ .sort (List .of (SortColumn .asc (ColumnName .of ("intCol" ))));
1433
+ assertTableEquals (expected , fromIceberg );
1434
+
1435
+ // Verify that the sort order is still applied for the first column
1436
+ final ParquetInstructions parquetInstructions = ParquetInstructions .builder ()
1437
+ .setTableDefinition (tableDefinition )
1438
+ .build ();
1439
+ verifySortOrder (tableAdapter , tableIdentifier , List .of (
1440
+ List .of (SortColumn .asc (ColumnName .of ("intCol" )))),
1441
+ parquetInstructions );
1442
+ }
1443
+
1444
+ {
1445
+ // Now read the table with a different table definition skipping the "intCol"
1446
+ final TableDefinition tableDefinition = TableDefinition .of (
1447
+ ColumnDefinition .ofDouble ("doubleCol" ),
1448
+ ColumnDefinition .ofLong ("longCol" ));
1449
+ final IcebergReadInstructions readInstructions = IcebergReadInstructions .builder ()
1450
+ .tableDefinition (tableDefinition )
1451
+ .build ();
1452
+ final Table fromIceberg = tableAdapter .table (readInstructions );
1453
+ final Table expected = source
1454
+ .sort (List .of (SortColumn .asc (ColumnName .of ("intCol" )), SortColumn .desc (ColumnName .of ("doubleCol" ))))
1455
+ .dropColumns ("intCol" );
1456
+ assertTableEquals (expected , fromIceberg );
1457
+
1458
+ // Verify that the sort order is not applied for any columns since the first sorted column is skipped
1459
+ final ParquetInstructions parquetInstructions = ParquetInstructions .builder ()
1460
+ .setTableDefinition (tableDefinition )
1461
+ .build ();
1462
+ verifySortOrder (tableAdapter , tableIdentifier , List .of (List .of ()), parquetInstructions );
1463
+ }
1464
+ }
1465
+
1350
1466
@ Test
1351
1467
void appendTableWithAndWithoutDataInstructionsTest () {
1352
1468
final Table source = TableTools .newTable (
0 commit comments