17
17
import io .deephaven .util .channel .SeekableChannelsProvider ;
18
18
import io .deephaven .util .channel .SeekableChannelsProviderLoader ;
19
19
import org .apache .iceberg .*;
20
+ import org .apache .iceberg .catalog .Catalog ;
21
+ import org .apache .iceberg .catalog .TableIdentifier ;
20
22
import org .apache .iceberg .io .FileIO ;
21
23
import org .jetbrains .annotations .NotNull ;
22
24
import org .jetbrains .annotations .Nullable ;
23
25
24
26
import java .net .URI ;
25
- import java .util .HashMap ;
26
27
import java .util .List ;
27
28
import java .util .Map ;
29
+ import java .util .UUID ;
28
30
import java .util .function .Consumer ;
29
31
30
32
public abstract class IcebergBaseLayout implements TableLocationKeyFinder <IcebergTableLocationKey > {
@@ -33,6 +35,22 @@ public abstract class IcebergBaseLayout implements TableLocationKeyFinder<Iceber
33
35
*/
34
36
final IcebergTableAdapter tableAdapter ;
35
37
38
+ /**
39
+ * The UUID of the table, if available.
40
+ */
41
+ @ Nullable
42
+ private final UUID tableUuid ;
43
+
44
+ /**
45
+ * Name of the {@link Catalog} used to access this table, if available.
46
+ */
47
+ @ Nullable
48
+ private final String catalogName ;
49
+
50
+ /**
51
+ * The table identifier used to access this table.
52
+ */
53
+ private final TableIdentifier tableIdentifier ;
36
54
/**
37
55
* The {@link TableDefinition} that will be used for life of this table. Although Iceberg table schema may change,
38
56
* schema changes are not supported in Deephaven.
@@ -45,9 +63,9 @@ public abstract class IcebergBaseLayout implements TableLocationKeyFinder<Iceber
45
63
private final String uriScheme ;
46
64
47
65
/**
48
- * A cache of {@link IcebergTableLocationKey IcebergTableLocationKeys} keyed by the URI of the file they represent .
66
+ * The {@link Snapshot} from which to discover data files .
49
67
*/
50
- private final Map < URI , IcebergTableLocationKey > cache ;
68
+ Snapshot snapshot ;
51
69
52
70
/**
53
71
* The {@link ParquetInstructions} object that will be used to read any Parquet data files in this table.
@@ -60,17 +78,28 @@ public abstract class IcebergBaseLayout implements TableLocationKeyFinder<Iceber
60
78
*/
61
79
private final SeekableChannelsProvider channelsProvider ;
62
80
81
+
63
82
/**
64
- * The {@link Snapshot} from which to discover data files.
83
+ * Create a new {@link IcebergTableLocationKey} for the given {@link ManifestFile}, {@link DataFile} and
84
+ * {@link URI}.
85
+ *
86
+ * @param manifestFile The manifest file from which the data file was discovered
87
+ * @param dataFile The data file that backs the keyed location
88
+ * @param fileUri The {@link URI} for the file that backs the keyed location
89
+ * @param partitions The table partitions enclosing the table location keyed by the returned key. If {@code null},
90
+ * the location will be a member of no partitions.
91
+ *
92
+ * @return A new {@link IcebergTableLocationKey}
65
93
*/
66
- Snapshot snapshot ;
67
-
68
94
protected IcebergTableLocationKey locationKey (
69
- final org .apache .iceberg .FileFormat format ,
70
- final URI fileUri ,
95
+ @ NotNull final ManifestFile manifestFile ,
96
+ @ NotNull final DataFile dataFile ,
97
+ @ NotNull final URI fileUri ,
71
98
@ Nullable final Map <String , Comparable <?>> partitions ) {
99
+ final org .apache .iceberg .FileFormat format = dataFile .format ();
72
100
if (format == org .apache .iceberg .FileFormat .PARQUET ) {
73
- return new IcebergTableParquetLocationKey (fileUri , 0 , partitions , parquetInstructions , channelsProvider );
101
+ return new IcebergTableParquetLocationKey (catalogName , tableUuid , tableIdentifier , manifestFile , dataFile ,
102
+ fileUri , 0 , partitions , parquetInstructions , channelsProvider );
74
103
}
75
104
throw new UnsupportedOperationException (String .format ("%s:%d - an unsupported file format %s for URI '%s'" ,
76
105
tableAdapter , snapshot .snapshotId (), format , fileUri ));
@@ -85,6 +114,20 @@ public IcebergBaseLayout(
85
114
@ NotNull final IcebergReadInstructions instructions ,
86
115
@ NotNull final DataInstructionsProviderLoader dataInstructionsProvider ) {
87
116
this .tableAdapter = tableAdapter ;
117
+ {
118
+ UUID uuid ;
119
+ try {
120
+ uuid = tableAdapter .icebergTable ().uuid ();
121
+ } catch (final RuntimeException e ) {
122
+ // The UUID method is unsupported for v1 Iceberg tables since uuid is optional for v1 tables.
123
+ uuid = null ;
124
+ }
125
+ this .tableUuid = uuid ;
126
+ }
127
+
128
+ this .catalogName = tableAdapter .catalog ().name ();
129
+ this .tableIdentifier = tableAdapter .tableIdentifier ();
130
+
88
131
this .snapshot = tableAdapter .getSnapshot (instructions );
89
132
this .tableDef = tableAdapter .definition (instructions );
90
133
this .uriScheme = locationUri (tableAdapter .icebergTable ()).getScheme ();
@@ -111,10 +154,9 @@ public IcebergBaseLayout(
111
154
this .parquetInstructions = builder .build ();
112
155
}
113
156
this .channelsProvider = SeekableChannelsProviderLoader .getInstance ().load (uriScheme , specialInstructions );
114
- this .cache = new HashMap <>();
115
157
}
116
158
117
- abstract IcebergTableLocationKey keyFromDataFile (DataFile df , URI fileUri );
159
+ abstract IcebergTableLocationKey keyFromDataFile (ManifestFile manifestFile , DataFile dataFile , URI fileUri );
118
160
119
161
private static String path (String path , FileIO io ) {
120
162
return io instanceof RelativeFileIO ? ((RelativeFileIO ) io ).absoluteLocation (path ) : path ;
@@ -145,15 +187,14 @@ public synchronized void findKeys(@NotNull final Consumer<IcebergTableLocationKe
145
187
table , snapshot .snapshotId (), manifestFile .content ()));
146
188
}
147
189
try (final ManifestReader <DataFile > reader = ManifestFiles .read (manifestFile , table .io ())) {
148
- for (DataFile df : reader ) {
149
- final URI fileUri = dataFileUri (table , df );
190
+ for (final DataFile dataFile : reader ) {
191
+ final URI fileUri = dataFileUri (table , dataFile );
150
192
if (!uriScheme .equals (fileUri .getScheme ())) {
151
193
throw new TableDataException (String .format (
152
194
"%s:%d - multiple URI schemes are not currently supported. uriScheme=%s, fileUri=%s" ,
153
195
table , snapshot .snapshotId (), uriScheme , fileUri ));
154
196
}
155
- final IcebergTableLocationKey locationKey =
156
- cache .computeIfAbsent (fileUri , uri -> keyFromDataFile (df , fileUri ));
197
+ final IcebergTableLocationKey locationKey = keyFromDataFile (manifestFile , dataFile , fileUri );
157
198
if (locationKey != null ) {
158
199
locationKeyObserver .accept (locationKey );
159
200
}
0 commit comments