3
3
//
4
4
package io .deephaven .iceberg .util ;
5
5
6
+ import io .deephaven .engine .table .Table ;
7
+ import io .deephaven .engine .util .TableTools ;
6
8
import io .deephaven .extensions .s3 .S3Instructions ;
9
+ import io .deephaven .iceberg .base .IcebergUtils ;
7
10
import io .deephaven .iceberg .junit5 .SqliteCatalogBase ;
11
+ import io .deephaven .iceberg .sqlite .SqliteHelper ;
8
12
import org .apache .iceberg .CatalogProperties ;
13
+ import org .apache .iceberg .DataFile ;
14
+ import org .apache .iceberg .DataFiles ;
9
15
import org .apache .iceberg .aws .s3 .S3FileIO ;
10
16
import org .junit .jupiter .api .TestInfo ;
17
+ import org .junit .jupiter .api .io .TempDir ;
18
+ import org .junit .jupiter .api .Test ;
19
+ import org .apache .iceberg .catalog .TableIdentifier ;
11
20
import software .amazon .awssdk .services .s3 .S3AsyncClient ;
12
21
import software .amazon .awssdk .services .s3 .model .CreateBucketRequest ;
22
+ import software .amazon .awssdk .services .s3 .model .HeadBucketRequest ;
23
+ import software .amazon .awssdk .services .s3 .model .NoSuchBucketException ;
13
24
14
25
import java .nio .file .Path ;
26
+ import java .util .HashMap ;
27
+ import java .util .List ;
15
28
import java .util .Locale ;
16
29
import java .util .Map ;
17
30
import java .util .concurrent .ExecutionException ;
18
31
import java .util .concurrent .TimeUnit ;
19
32
import java .util .concurrent .TimeoutException ;
33
+ import java .util .stream .Collectors ;
20
34
35
+ import static io .deephaven .engine .testutil .TstUtils .assertTableEquals ;
36
+ import static io .deephaven .engine .util .TableTools .doubleCol ;
37
+ import static io .deephaven .engine .util .TableTools .intCol ;
21
38
import static io .deephaven .extensions .s3 .testlib .S3Helper .TIMEOUT_SECONDS ;
39
+ import static io .deephaven .iceberg .base .IcebergUtils .dataFileUri ;
40
+ import static io .deephaven .iceberg .base .IcebergUtils .locationUri ;
41
+ import static org .assertj .core .api .Assertions .assertThat ;
22
42
23
43
abstract class S3WarehouseSqliteCatalogBase extends SqliteCatalogBase {
24
44
@@ -32,17 +52,131 @@ public final Object dataInstructions() {
32
52
}
33
53
34
54
@ Override
35
- protected IcebergCatalogAdapter catalogAdapter (TestInfo testInfo , Path rootDir , Map <String , String > properties )
55
+ protected IcebergCatalogAdapter catalogAdapter (
56
+ final TestInfo testInfo ,
57
+ final Path rootDir ,
58
+ final Map <String , String > properties )
59
+ throws ExecutionException , InterruptedException , TimeoutException {
60
+ return catalogAdapterForScheme (testInfo , properties , "s3" );
61
+ }
62
+
63
+ private IcebergCatalogAdapter catalogAdapterForScheme (
64
+ final TestInfo testInfo ,
65
+ final Map <String , String > properties ,
66
+ final String scheme )
36
67
throws ExecutionException , InterruptedException , TimeoutException {
37
68
final String methodName = testInfo .getTestMethod ().orElseThrow ().getName ();
38
69
final String catalogName = methodName + "-catalog" ;
39
70
final String bucket = methodName .toLowerCase (Locale .US ) + "-bucket" ;
40
71
try (final S3AsyncClient client = s3AsyncClient ()) {
41
- client .createBucket (CreateBucketRequest .builder ().bucket (bucket ).build ())
42
- .get (TIMEOUT_SECONDS , TimeUnit .SECONDS );
72
+ if (!doesBucketExist (client , bucket )) {
73
+ client .createBucket (CreateBucketRequest .builder ().bucket (bucket ).build ())
74
+ .get (TIMEOUT_SECONDS , TimeUnit .SECONDS );
75
+ }
43
76
}
44
- properties .put (CatalogProperties .WAREHOUSE_LOCATION , "s3 ://" + bucket + "/warehouse" );
77
+ properties .put (CatalogProperties .WAREHOUSE_LOCATION , scheme + " ://" + bucket + "/warehouse" );
45
78
properties .put (CatalogProperties .FILE_IO_IMPL , S3FileIO .class .getName ());
46
79
return IcebergToolsS3 .createAdapter (catalogName , properties , Map .of (), s3Instructions ());
47
80
}
81
+
82
+ private boolean doesBucketExist (final S3AsyncClient client , final String bucketName )
83
+ throws ExecutionException , InterruptedException , TimeoutException {
84
+ try {
85
+ client .headBucket (HeadBucketRequest .builder ().bucket (bucketName ).build ())
86
+ .get (TIMEOUT_SECONDS , TimeUnit .SECONDS );
87
+ return true ;
88
+ } catch (ExecutionException e ) {
89
+ if (e .getCause () instanceof NoSuchBucketException ) {
90
+ return false ;
91
+ }
92
+ throw e ;
93
+ }
94
+ }
95
+
96
+ @ Test
97
+ void testIcebergTablesWithS3AScheme (TestInfo testInfo , @ TempDir Path rootDir )
98
+ throws ExecutionException , InterruptedException , TimeoutException {
99
+ testIcebergTablesWithCustomScheme ("s3a" , testInfo , rootDir );
100
+ }
101
+
102
+ @ Test
103
+ void testIcebergTablesWithS3NScheme (TestInfo testInfo , @ TempDir Path rootDir )
104
+ throws ExecutionException , InterruptedException , TimeoutException {
105
+ testIcebergTablesWithCustomScheme ("s3n" , testInfo , rootDir );
106
+ }
107
+
108
+ private void testIcebergTablesWithCustomScheme (final String scheme , TestInfo testInfo , @ TempDir Path rootDir )
109
+ throws ExecutionException , InterruptedException , TimeoutException {
110
+ final Map <String , String > properties = new HashMap <>();
111
+ SqliteHelper .setJdbcCatalogProperties (properties , rootDir );
112
+ final IcebergCatalogAdapter catalogAdapter = catalogAdapterForScheme (testInfo , properties , scheme );
113
+
114
+ final TableIdentifier tableIdentifier = TableIdentifier .parse ("MyNamespace.MyTable" );
115
+
116
+ final Table data = TableTools .newTable (
117
+ intCol ("intCol" , 2 , 4 , 6 , 8 , 10 ),
118
+ doubleCol ("doubleCol" , 2.5 , 5.0 , 7.5 , 10.0 , 12.5 ));
119
+
120
+ // Create a new iceberg table
121
+ final IcebergTableAdapter tableAdapter = catalogAdapter .createTable (tableIdentifier , data .getDefinition ());
122
+ final org .apache .iceberg .Table icebergTable = tableAdapter .icebergTable ();
123
+
124
+ // Verify that the table location has the right scheme
125
+ assertThat (locationUri (icebergTable ).getScheme ()).isEqualTo (scheme );
126
+
127
+ // Add data to the table
128
+ final IcebergTableWriter tableWriter = tableAdapter .tableWriter (writerOptionsBuilder ()
129
+ .tableDefinition (data .getDefinition ())
130
+ .build ());
131
+ tableWriter .append (IcebergWriteInstructions .builder ()
132
+ .addTables (data , data )
133
+ .build ());
134
+
135
+ // Verify all data files have the right scheme
136
+ final List <DataFile > dataFiles = IcebergUtils .allDataFiles (icebergTable , icebergTable .currentSnapshot ())
137
+ .collect (Collectors .toList ());
138
+ assertThat (dataFiles ).hasSize (2 );
139
+ assertThat (dataFiles ).allMatch (dataFile -> dataFileUri (icebergTable , dataFile ).getScheme ().equals (scheme ));
140
+
141
+ // Verify the data is correct
142
+ Table fromIceberg = tableAdapter .table ();
143
+ Table expected = TableTools .merge (data , data );
144
+ assertTableEquals (expected , fromIceberg );
145
+
146
+ // Create a new data file but with s3 scheme
147
+ final DataFile existingDataFile = dataFiles .get (0 );
148
+ final String existingDataFileLocation = existingDataFile .location ();
149
+ assertThat (existingDataFileLocation ).startsWith (scheme );
150
+ final String newLocation = existingDataFileLocation .replace (scheme + "://" , "s3://" );
151
+ final DataFile newDataFile = DataFiles .builder (icebergTable .spec ())
152
+ .withPath (newLocation )
153
+ .withFormat (existingDataFile .format ())
154
+ .withRecordCount (existingDataFile .recordCount ())
155
+ .withFileSizeInBytes (existingDataFile .fileSizeInBytes ())
156
+ .build ();
157
+
158
+ // Append the new data files to the table
159
+ icebergTable .newAppend ().appendFile (newDataFile ).commit ();
160
+
161
+ // Verify the new data file has the right scheme
162
+ final List <DataFile > newDataFiles = IcebergUtils .allDataFiles (icebergTable , icebergTable .currentSnapshot ())
163
+ .collect (Collectors .toList ());
164
+ int s3DataFiles = 0 ;
165
+ int nonS3DataFiles = 0 ;
166
+ for (final DataFile dataFile : newDataFiles ) {
167
+ if (dataFileUri (icebergTable , dataFile ).getScheme ().equals (scheme )) {
168
+ nonS3DataFiles ++;
169
+ } else {
170
+ assertThat (dataFileUri (icebergTable , dataFile ).getScheme ()).isEqualTo ("s3" );
171
+ s3DataFiles ++;
172
+ }
173
+ }
174
+ assertThat (s3DataFiles ).isEqualTo (1 );
175
+ assertThat (nonS3DataFiles ).isEqualTo (2 );
176
+
177
+ // Verify the data is correct
178
+ fromIceberg = tableAdapter .table ();
179
+ expected = TableTools .merge (expected , data );
180
+ assertTableEquals (expected , fromIceberg );
181
+ }
48
182
}
0 commit comments