39
39
40
40
class IcebergUpdateMode (JObjectWrapper ):
41
41
"""
42
- :class:`. IcebergUpdateMode` specifies the update mode for an Iceberg table to be loaded into Deephaven. The modes
42
+ ` IcebergUpdateMode` specifies the update mode for an Iceberg table to be loaded into Deephaven. The modes
43
43
are:
44
44
45
45
- :py:func:`static() <IcebergUpdateMode.static>`: The table is loaded once and does not change
@@ -87,7 +87,7 @@ def j_object(self) -> jpy.JType:
87
87
88
88
class IcebergReadInstructions (JObjectWrapper ):
89
89
"""
90
- :class:`. IcebergReadInstructions` specifies the instructions for reading an Iceberg table into Deephaven. These
90
+ ` IcebergReadInstructions` specifies the instructions for reading an Iceberg table into Deephaven. These
91
91
include column rename instructions and table definitions, as well as special data instructions for loading data
92
92
files from the cloud.
93
93
"""
@@ -149,7 +149,7 @@ def j_object(self) -> jpy.JType:
149
149
150
150
class IcebergWriteInstructions (JObjectWrapper ):
151
151
"""
152
- :class:`. IcebergWriteInstructions` provides instructions intended for writing deephaven tables as partitions to Iceberg
152
+ ` IcebergWriteInstructions` provides instructions intended for writing deephaven tables as partitions to Iceberg
153
153
tables.
154
154
"""
155
155
@@ -164,13 +164,13 @@ def __init__(self,
164
164
Args:
165
165
tables (Union[Table, Sequence[Table]]): The deephaven tables to write.
166
166
partition_paths (Optional[Union[str, Sequence[str]]]): The partition paths where each table will be written.
167
- For example, if the iceberg table is partitioned by "year" and "month", a partition path could be
167
+ For example, if the Iceberg table is partitioned by "year" and "month", a partition path could be
168
168
"year=2021/month=01".
169
- If writing to a partitioned iceberg table, users must provide partition path for each table in tables
169
+ If writing to a partitioned Iceberg table, users must provide partition path for each table in tables
170
170
argument in the same order.
171
171
Else when writing to a non-partitioned table, users should not provide any partition paths.
172
172
Defaults to `None`, which means the deephaven tables will be written to the root data directory of the
173
- iceberg table.
173
+ Iceberg table.
174
174
175
175
Raises:
176
176
DHError: If unable to build the instructions object.
@@ -204,7 +204,7 @@ def j_object(self) -> jpy.JType:
204
204
205
205
class SchemaProvider (JObjectWrapper ):
206
206
"""
207
- :class:`. SchemaProvider` is used to extract the schema from an Iceberg table. Users can specify multiple ways to do
207
+ ` SchemaProvider` is used to extract the schema from an Iceberg table. Users can specify multiple ways to do
208
208
so, for example, by schema ID, snapshot ID, current schema, etc. This can be useful for passing a schema when
209
209
writing to an Iceberg table.
210
210
"""
@@ -213,10 +213,10 @@ class SchemaProvider(JObjectWrapper):
213
213
214
214
def __init__ (self , _j_object : jpy .JType ):
215
215
"""
216
- Initializes the :class:`. SchemaProvider` object.
216
+ Initializes the ` SchemaProvider` object.
217
217
218
218
Args:
219
- _j_object (SchemaProvider): the Java :class:`. SchemaProvider` object.
219
+ _j_object (SchemaProvider): the Java ` SchemaProvider` object.
220
220
"""
221
221
self ._j_object = _j_object
222
222
@@ -230,7 +230,7 @@ def from_current(cls) -> 'SchemaProvider':
230
230
Used for extracting the current schema from the table.
231
231
232
232
Returns:
233
- the SchemaProvider object.
233
+ the ` SchemaProvider` object.
234
234
"""
235
235
return cls (_JSchemaProvider .fromCurrent ())
236
236
@@ -243,7 +243,7 @@ def from_schema_id(cls, schema_id: int) -> 'SchemaProvider':
243
243
schema_id (int): the schema id to use.
244
244
245
245
Returns:
246
- the :class:`. SchemaProvider` object.
246
+ the ` SchemaProvider` object.
247
247
"""
248
248
return cls (_JSchemaProvider .fromSchemaId (schema_id ))
249
249
@@ -256,7 +256,7 @@ def from_snapshot_id(cls, snapshot_id: int) -> 'SchemaProvider':
256
256
snapshot_id (int): the snapshot id to use.
257
257
258
258
Returns:
259
- the :class:`. SchemaProvider` object.
259
+ the ` SchemaProvider` object.
260
260
"""
261
261
return cls (_JSchemaProvider .fromSnapshotId (snapshot_id ))
262
262
@@ -273,19 +273,20 @@ def from_current_snapshot(cls) -> 'SchemaProvider':
273
273
274
274
class SortOrderProvider (JObjectWrapper ):
275
275
"""
276
- :class:`.SortOrderProvider` is used for providing SortOrder to be used for sorting new data while writing to an
277
- iceberg table using this writer. Users can specify multiple ways to do so, for example, by sort ID, table default,
278
- etc.
276
+ `SortOrderProvider` is used to specify the sort order for new data when writing to an Iceberg table. More details
277
+ about sort order can be found in the Iceberg spec: https://iceberg.apache.org/spec/#sorting.
278
+ Users can specify the sort order in multiple ways, such as by providing a sort ID or using the table's default sort
279
+ order. This class consists of factory methods to create different sort order providers.
279
280
"""
280
281
281
282
j_object_type = _JSortOrderProvider
282
283
283
284
def __init__ (self , _j_object : jpy .JType ):
284
285
"""
285
- Initializes the :class:`. SortOrderProvider` object.
286
+ Initializes the ` SortOrderProvider` object.
286
287
287
288
Args:
288
- _j_object (SortOrderProvider): the Java :class:`. SortOrderProvider` object.
289
+ _j_object (SortOrderProvider): the Java ` SortOrderProvider` object.
289
290
"""
290
291
self ._j_object = _j_object
291
292
@@ -296,10 +297,10 @@ def j_object(self) -> jpy.JType:
296
297
@classmethod
297
298
def unsorted (cls ) -> 'SortOrderProvider' :
298
299
"""
299
- Used to disable sorting while writing new data to the iceberg table.
300
+ Used to disable sorting while writing new data to the Iceberg table.
300
301
301
302
Returns:
302
- the SortOrderProvider object.
303
+ the ` SortOrderProvider` object.
303
304
"""
304
305
return cls (_JSortOrderProvider .unsorted ())
305
306
@@ -310,54 +311,55 @@ def use_table_default(cls) -> 'SortOrderProvider':
310
311
will be done.
311
312
312
313
Returns:
313
- the :class:`. SortOrderProvider` object.
314
+ the ` SortOrderProvider` object.
314
315
"""
315
316
return cls (_JSortOrderProvider .useTableDefault ())
316
317
317
318
@classmethod
318
319
def from_sort_id (cls , sort_order_id : int ) -> 'SortOrderProvider' :
319
320
"""
320
- Use the sort order with the given ID to sort new data while writing to the iceberg table.
321
+ Use the sort order with the given ID to sort new data while writing to the Iceberg table.
321
322
322
323
Args:
323
324
sort_order_id (int): the id of the sort order to use.
324
325
325
326
Returns:
326
- the :class: `.SortOrderProvider` object.
327
+ the `.SortOrderProvider` object.
327
328
"""
328
329
return cls (_JSortOrderProvider .fromSortId (sort_order_id ))
329
330
330
331
def with_id (self , sort_order_id : int ) -> 'SortOrderProvider' :
331
332
"""
332
- Returns a sort order provider that delegates to this provider for computing the columns to sort on, but writes a
333
- different sort order ID to the iceberg table.
334
- For example, this provider might return fields {A, B, C} to sort on , but the ID written to iceberg corresponds
335
- to sort order with fields {A, B}.
333
+ Returns a sort order provider that uses the current provider to determine the columns to sort on, but writes a
334
+ different sort order ID to the Iceberg table.
335
+ For example, this provider might sort by columns {A, B, C}, but the ID written to Iceberg corresponds to a sort
336
+ order with columns {A, B}.
336
337
337
338
Args:
338
- sort_order_id (int): the sort order ID to write to the iceberg table.
339
+ sort_order_id (int): the sort order ID to write to the Iceberg table.
339
340
340
341
Returns:
341
- the :class:`. SortOrderProvider` object.
342
+ the ` SortOrderProvider` object.
342
343
"""
343
344
return SortOrderProvider (self ._j_object .withId (sort_order_id ))
344
345
345
346
def with_fail_on_unmapped (self , fail_on_unmapped : bool ) -> 'SortOrderProvider' :
346
347
"""
347
- Returns a sort order provider which will fail, if for any reason, the sort order cannot be applied to the
348
- tables being written. By default, the provider will not fail if the sort order cannot be applied.
348
+ Returns a sort order provider that will fail if the sort order cannot be applied to the tables being written.
349
+ By default, if the sort order cannot be applied, the tables will be written without sorting .
349
350
350
351
Args:
351
352
fail_on_unmapped: whether to fail if the sort order cannot be applied to the tables being written
352
353
353
354
Returns:
354
- the :class:`. SortOrderProvider` object.
355
+ the ` SortOrderProvider` object.
355
356
"""
356
357
return SortOrderProvider (self ._j_object .withFailOnUnmapped (fail_on_unmapped ))
357
358
359
+
358
360
class TableParquetWriterOptions (JObjectWrapper ):
359
361
"""
360
- :class:`. TableParquetWriterOptions` provides specialized instructions for configuring :class:`. IcebergTableWriter`
362
+ ` TableParquetWriterOptions` provides specialized instructions for configuring ` IcebergTableWriter`
361
363
instances.
362
364
"""
363
365
@@ -380,7 +382,7 @@ def __init__(self,
380
382
table_definition: TableDefinitionLike: The table definition to use when writing Iceberg data files using
381
383
this writer instance. This definition can be used to skip some columns or add additional columns with
382
384
null values. The provided definition should have at least one column.
383
- schema_provider: Optional[SchemaProvider]: Used to extract a Schema from a iceberg table. This schema will
385
+ schema_provider: Optional[SchemaProvider]: Used to extract a Schema from an Iceberg table. This schema will
384
386
be used in conjunction with the field_id_to_column_name to map Deephaven columns from table_definition
385
387
to Iceberg columns.
386
388
Defaults to `None`, which means use the current schema from the table.
@@ -398,10 +400,12 @@ def __init__(self,
398
400
`None`, which means use 2^20 (1,048,576)
399
401
target_page_size (Optional[int]): the target Parquet file page size in bytes, if not specified. Defaults to
400
402
`None`, which means use 2^20 bytes (1 MiB)
401
- sort_order_provider: Optional[SortOrderProvider]: Used to provide SortOrder to be used for sorting new data
402
- while writing to an iceberg table using this writer. Note that we select the sort order of the Table at
403
- the time the writer is constructed, and it does not change if the table's sort order changes. Defaults
404
- to `None`, which means use the table's default sort order.
403
+ sort_order_provider (Optional[SortOrderProvider]): Specifies the sort order to use for sorting new data
404
+ when writing to an Iceberg table with this writer. The sort order is determined at the time the writer
405
+ is created and does not change if the table's sort order changes later. Defaults to `None`, which means
406
+ the table's default sort order is used. More details about sort order can be found in the Iceberg
407
+ spec: https://iceberg.apache.org/spec/#sorting
408
+
405
409
406
410
Raises:
407
411
DHError: If unable to build the object.
@@ -449,7 +453,7 @@ def j_object(self) -> jpy.JType:
449
453
450
454
class IcebergTable (Table ):
451
455
"""
452
- :class:`. IcebergTable` is a subclass of Table that allows users to dynamically update the table with new snapshots
456
+ ` IcebergTable` is a subclass of Table that allows users to dynamically update the table with new snapshots
453
457
from the Iceberg catalog.
454
458
"""
455
459
j_object_type = _JIcebergTable
@@ -488,8 +492,8 @@ def j_object(self) -> jpy.JType:
488
492
489
493
class IcebergTableWriter (JObjectWrapper ):
490
494
"""
491
- :class:`. IcebergTableWriter` is responsible for writing Deephaven tables to an Iceberg table. Each
492
- :class:`. IcebergTableWriter` instance associated with a single :class:`. IcebergTableAdapter` and can be used to
495
+ ` IcebergTableWriter` is responsible for writing Deephaven tables to an Iceberg table. Each
496
+ ` IcebergTableWriter` instance associated with a single ` IcebergTableAdapter` and can be used to
493
497
write multiple Deephaven tables to this Iceberg table.
494
498
"""
495
499
j_object_type = _JIcebergTableWriter or type (None )
@@ -504,7 +508,7 @@ def append(self, instructions: IcebergWriteInstructions):
504
508
partition paths where each table will be written using the :attr:`.IcebergWriteInstructions.partition_paths`
505
509
parameter.
506
510
This method will not perform any compatibility checks between the existing schema and the provided Deephaven
507
- tables. All such checks happen at the time of creation of the :class:`. IcebergTableWriter` instance.
511
+ tables. All such checks happen at the time of creation of the ` IcebergTableWriter` instance.
508
512
509
513
Args:
510
514
instructions (IcebergWriteInstructions): the customization instructions for write.
@@ -518,7 +522,7 @@ def j_object(self) -> jpy.JType:
518
522
519
523
class IcebergTableAdapter (JObjectWrapper ):
520
524
"""
521
- :class:`. IcebergTableAdapter` provides an interface for interacting with Iceberg tables. It allows the user to list
525
+ ` IcebergTableAdapter` provides an interface for interacting with Iceberg tables. It allows the user to list
522
526
snapshots, retrieve table definitions and reading Iceberg tables into Deephaven tables.
523
527
"""
524
528
j_object_type = _JIcebergTableAdapter or type (None )
@@ -579,7 +583,7 @@ def table(self, instructions: Optional[IcebergReadInstructions] = None) -> Icebe
579
583
580
584
def table_writer (self , writer_options : TableParquetWriterOptions ) -> IcebergTableWriter :
581
585
"""
582
- Create a new :class:`. IcebergTableWriter` for this Iceberg table using the provided writer options.
586
+ Create a new ` IcebergTableWriter` for this Iceberg table using the provided writer options.
583
587
This method will perform schema validation to ensure that the provided table definition from the writer options
584
588
is compatible with the Iceberg table schema. All further writes performed by the returned writer will not be
585
589
validated against the table's schema, and thus will be faster.
@@ -599,7 +603,7 @@ def j_object(self) -> jpy.JType:
599
603
600
604
class IcebergCatalogAdapter (JObjectWrapper ):
601
605
"""
602
- :class:`. IcebergCatalogAdapter` provides an interface for interacting with Iceberg catalogs. It allows listing
606
+ ` IcebergCatalogAdapter` provides an interface for interacting with Iceberg catalogs. It allows listing
603
607
namespaces, tables and snapshots, as well as reading Iceberg tables into Deephaven tables.
604
608
"""
605
609
j_object_type = _JIcebergCatalogAdapter or type (None )
@@ -660,7 +664,7 @@ def create_table(self, table_identifier: str, table_definition: TableDefinitionL
660
664
table_definition (TableDefinitionLike): the table definition of the new table.
661
665
662
666
Returns:
663
- :class:`. IcebergTableAdapter`: the table adapter for the new Iceberg table.
667
+ ` IcebergTableAdapter`: the table adapter for the new Iceberg table.
664
668
"""
665
669
666
670
return IcebergTableAdapter (self .j_object .createTable (table_identifier ,
@@ -700,7 +704,7 @@ def adapter_s3_rest(
700
704
need to set this; it is most useful when connecting to non-AWS, S3-compatible APIs.
701
705
702
706
Returns:
703
- :class:`. IcebergCatalogAdapter`: the catalog adapter for the provided S3 REST catalog.
707
+ ` IcebergCatalogAdapter`: the catalog adapter for the provided S3 REST catalog.
704
708
705
709
Raises:
706
710
DHError: If unable to build the catalog adapter.
@@ -738,7 +742,7 @@ def adapter_aws_glue(
738
742
catalog URI.
739
743
740
744
Returns:
741
- :class:`. IcebergCatalogAdapter`: the catalog adapter for the provided AWS Glue catalog.
745
+ ` IcebergCatalogAdapter`: the catalog adapter for the provided AWS Glue catalog.
742
746
743
747
Raises:
744
748
DHError: If unable to build the catalog adapter.
@@ -834,7 +838,7 @@ def adapter(
834
838
hadoop_config (Optional[Dict[str, str]]): hadoop configuration properties for the catalog to load
835
839
s3_instructions (Optional[s3.S3Instructions]): the S3 instructions if applicable
836
840
Returns:
837
- :class:`. IcebergCatalogAdapter`: the catalog adapter created from the provided properties
841
+ ` IcebergCatalogAdapter`: the catalog adapter created from the provided properties
838
842
839
843
Raises:
840
844
DHError: If unable to build the catalog adapter
0 commit comments