Skip to content

Commit af24391

Browse files
Python review Part 1
1 parent dcc0b99 commit af24391

File tree

2 files changed

+52
-51
lines changed

2 files changed

+52
-51
lines changed

extensions/iceberg/src/main/java/io/deephaven/iceberg/location/IcebergTableParquetLocation.java

-3
Original file line numberDiff line numberDiff line change
@@ -74,9 +74,6 @@ private static List<SortColumn> computeSortedColumns(
7474
} else if (field.nullOrder() == NullOrder.NULLS_LAST && field.direction() == SortDirection.DESC) {
7575
sortColumn = SortColumn.desc(columnName);
7676
} else {
77-
// TODO Check with Devin if this is okay, The assumption here is that deephaven sorts nulls first for
78-
// ascending order and nulls last for descending, so if we don't have the correct nulls order, we
79-
// cannot use the column as a sort column
8077
break;
8178
}
8279
sortColumns.add(sortColumn);

py/server/deephaven/experimental/iceberg.py

+52-48
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939

4040
class IcebergUpdateMode(JObjectWrapper):
4141
"""
42-
:class:`.IcebergUpdateMode` specifies the update mode for an Iceberg table to be loaded into Deephaven. The modes
42+
`IcebergUpdateMode` specifies the update mode for an Iceberg table to be loaded into Deephaven. The modes
4343
are:
4444
4545
- :py:func:`static() <IcebergUpdateMode.static>`: The table is loaded once and does not change
@@ -87,7 +87,7 @@ def j_object(self) -> jpy.JType:
8787

8888
class IcebergReadInstructions(JObjectWrapper):
8989
"""
90-
:class:`.IcebergReadInstructions` specifies the instructions for reading an Iceberg table into Deephaven. These
90+
`IcebergReadInstructions` specifies the instructions for reading an Iceberg table into Deephaven. These
9191
include column rename instructions and table definitions, as well as special data instructions for loading data
9292
files from the cloud.
9393
"""
@@ -149,7 +149,7 @@ def j_object(self) -> jpy.JType:
149149

150150
class IcebergWriteInstructions(JObjectWrapper):
151151
"""
152-
:class:`.IcebergWriteInstructions` provides instructions intended for writing deephaven tables as partitions to Iceberg
152+
`IcebergWriteInstructions` provides instructions intended for writing deephaven tables as partitions to Iceberg
153153
tables.
154154
"""
155155

@@ -164,13 +164,13 @@ def __init__(self,
164164
Args:
165165
tables (Union[Table, Sequence[Table]]): The deephaven tables to write.
166166
partition_paths (Optional[Union[str, Sequence[str]]]): The partition paths where each table will be written.
167-
For example, if the iceberg table is partitioned by "year" and "month", a partition path could be
167+
For example, if the Iceberg table is partitioned by "year" and "month", a partition path could be
168168
"year=2021/month=01".
169-
If writing to a partitioned iceberg table, users must provide partition path for each table in tables
169+
If writing to a partitioned Iceberg table, users must provide partition path for each table in tables
170170
argument in the same order.
171171
Else when writing to a non-partitioned table, users should not provide any partition paths.
172172
Defaults to `None`, which means the deephaven tables will be written to the root data directory of the
173-
iceberg table.
173+
Iceberg table.
174174
175175
Raises:
176176
DHError: If unable to build the instructions object.
@@ -204,7 +204,7 @@ def j_object(self) -> jpy.JType:
204204

205205
class SchemaProvider(JObjectWrapper):
206206
"""
207-
:class:`.SchemaProvider` is used to extract the schema from an Iceberg table. Users can specify multiple ways to do
207+
`SchemaProvider` is used to extract the schema from an Iceberg table. Users can specify multiple ways to do
208208
so, for example, by schema ID, snapshot ID, current schema, etc. This can be useful for passing a schema when
209209
writing to an Iceberg table.
210210
"""
@@ -213,10 +213,10 @@ class SchemaProvider(JObjectWrapper):
213213

214214
def __init__(self, _j_object: jpy.JType):
215215
"""
216-
Initializes the :class:`.SchemaProvider` object.
216+
Initializes the `SchemaProvider` object.
217217
218218
Args:
219-
_j_object (SchemaProvider): the Java :class:`.SchemaProvider` object.
219+
_j_object (SchemaProvider): the Java `SchemaProvider` object.
220220
"""
221221
self._j_object = _j_object
222222

@@ -230,7 +230,7 @@ def from_current(cls) -> 'SchemaProvider':
230230
Used for extracting the current schema from the table.
231231
232232
Returns:
233-
the SchemaProvider object.
233+
the `SchemaProvider` object.
234234
"""
235235
return cls(_JSchemaProvider.fromCurrent())
236236

@@ -243,7 +243,7 @@ def from_schema_id(cls, schema_id: int) -> 'SchemaProvider':
243243
schema_id (int): the schema id to use.
244244
245245
Returns:
246-
the :class:`.SchemaProvider` object.
246+
the `SchemaProvider` object.
247247
"""
248248
return cls(_JSchemaProvider.fromSchemaId(schema_id))
249249

@@ -256,7 +256,7 @@ def from_snapshot_id(cls, snapshot_id: int) -> 'SchemaProvider':
256256
snapshot_id (int): the snapshot id to use.
257257
258258
Returns:
259-
the :class:`.SchemaProvider` object.
259+
the `SchemaProvider` object.
260260
"""
261261
return cls(_JSchemaProvider.fromSnapshotId(snapshot_id))
262262

@@ -273,19 +273,20 @@ def from_current_snapshot(cls) -> 'SchemaProvider':
273273

274274
class SortOrderProvider(JObjectWrapper):
275275
"""
276-
:class:`.SortOrderProvider` is used for providing SortOrder to be used for sorting new data while writing to an
277-
iceberg table using this writer. Users can specify multiple ways to do so, for example, by sort ID, table default,
278-
etc.
276+
`SortOrderProvider` is used to specify the sort order for new data when writing to an Iceberg table. More details
277+
about sort order can be found in the Iceberg spec: https://iceberg.apache.org/spec/#sorting.
278+
Users can specify the sort order in multiple ways, such as by providing a sort ID or using the table's default sort
279+
order. This class consists of factory methods to create different sort order providers.
279280
"""
280281

281282
j_object_type = _JSortOrderProvider
282283

283284
def __init__(self, _j_object: jpy.JType):
284285
"""
285-
Initializes the :class:`.SortOrderProvider` object.
286+
Initializes the `SortOrderProvider` object.
286287
287288
Args:
288-
_j_object (SortOrderProvider): the Java :class:`.SortOrderProvider` object.
289+
_j_object (SortOrderProvider): the Java `SortOrderProvider` object.
289290
"""
290291
self._j_object = _j_object
291292

@@ -296,10 +297,10 @@ def j_object(self) -> jpy.JType:
296297
@classmethod
297298
def unsorted(cls) -> 'SortOrderProvider':
298299
"""
299-
Used to disable sorting while writing new data to the iceberg table.
300+
Used to disable sorting while writing new data to the Iceberg table.
300301
301302
Returns:
302-
the SortOrderProvider object.
303+
the `SortOrderProvider` object.
303304
"""
304305
return cls(_JSortOrderProvider.unsorted())
305306

@@ -310,54 +311,55 @@ def use_table_default(cls) -> 'SortOrderProvider':
310311
will be done.
311312
312313
Returns:
313-
the :class:`.SortOrderProvider` object.
314+
the `SortOrderProvider` object.
314315
"""
315316
return cls(_JSortOrderProvider.useTableDefault())
316317

317318
@classmethod
318319
def from_sort_id(cls, sort_order_id: int) -> 'SortOrderProvider':
319320
"""
320-
Use the sort order with the given ID to sort new data while writing to the iceberg table.
321+
Use the sort order with the given ID to sort new data while writing to the Iceberg table.
321322
322323
Args:
323324
sort_order_id (int): the id of the sort order to use.
324325
325326
Returns:
326-
the :class:`.SortOrderProvider` object.
327+
the `.SortOrderProvider` object.
327328
"""
328329
return cls(_JSortOrderProvider.fromSortId(sort_order_id))
329330

330331
def with_id(self, sort_order_id: int) -> 'SortOrderProvider':
331332
"""
332-
Returns a sort order provider that delegates to this provider for computing the columns to sort on, but writes a
333-
different sort order ID to the iceberg table.
334-
For example, this provider might return fields {A, B, C} to sort on, but the ID written to iceberg corresponds
335-
to sort order with fields {A, B}.
333+
Returns a sort order provider that uses the current provider to determine the columns to sort on, but writes a
334+
different sort order ID to the Iceberg table.
335+
For example, this provider might sort by columns {A, B, C}, but the ID written to Iceberg corresponds to a sort
336+
order with columns {A, B}.
336337
337338
Args:
338-
sort_order_id (int): the sort order ID to write to the iceberg table.
339+
sort_order_id (int): the sort order ID to write to the Iceberg table.
339340
340341
Returns:
341-
the :class:`.SortOrderProvider` object.
342+
the `SortOrderProvider` object.
342343
"""
343344
return SortOrderProvider(self._j_object.withId(sort_order_id))
344345

345346
def with_fail_on_unmapped(self, fail_on_unmapped: bool) -> 'SortOrderProvider':
346347
"""
347-
Returns a sort order provider which will fail, if for any reason, the sort order cannot be applied to the
348-
tables being written. By default, the provider will not fail if the sort order cannot be applied.
348+
Returns a sort order provider that will fail if the sort order cannot be applied to the tables being written.
349+
By default, if the sort order cannot be applied, the tables will be written without sorting.
349350
350351
Args:
351352
fail_on_unmapped: whether to fail if the sort order cannot be applied to the tables being written
352353
353354
Returns:
354-
the :class:`.SortOrderProvider` object.
355+
the `SortOrderProvider` object.
355356
"""
356357
return SortOrderProvider(self._j_object.withFailOnUnmapped(fail_on_unmapped))
357358

359+
358360
class TableParquetWriterOptions(JObjectWrapper):
359361
"""
360-
:class:`.TableParquetWriterOptions` provides specialized instructions for configuring :class:`.IcebergTableWriter`
362+
`TableParquetWriterOptions` provides specialized instructions for configuring `IcebergTableWriter`
361363
instances.
362364
"""
363365

@@ -380,7 +382,7 @@ def __init__(self,
380382
table_definition: TableDefinitionLike: The table definition to use when writing Iceberg data files using
381383
this writer instance. This definition can be used to skip some columns or add additional columns with
382384
null values. The provided definition should have at least one column.
383-
schema_provider: Optional[SchemaProvider]: Used to extract a Schema from a iceberg table. This schema will
385+
schema_provider: Optional[SchemaProvider]: Used to extract a Schema from an Iceberg table. This schema will
384386
be used in conjunction with the field_id_to_column_name to map Deephaven columns from table_definition
385387
to Iceberg columns.
386388
Defaults to `None`, which means use the current schema from the table.
@@ -398,10 +400,12 @@ def __init__(self,
398400
`None`, which means use 2^20 (1,048,576)
399401
target_page_size (Optional[int]): the target Parquet file page size in bytes, if not specified. Defaults to
400402
`None`, which means use 2^20 bytes (1 MiB)
401-
sort_order_provider: Optional[SortOrderProvider]: Used to provide SortOrder to be used for sorting new data
402-
while writing to an iceberg table using this writer. Note that we select the sort order of the Table at
403-
the time the writer is constructed, and it does not change if the table's sort order changes. Defaults
404-
to `None`, which means use the table's default sort order.
403+
sort_order_provider (Optional[SortOrderProvider]): Specifies the sort order to use for sorting new data
404+
when writing to an Iceberg table with this writer. The sort order is determined at the time the writer
405+
is created and does not change if the table's sort order changes later. Defaults to `None`, which means
406+
the table's default sort order is used. More details about sort order can be found in the Iceberg
407+
spec: https://iceberg.apache.org/spec/#sorting
408+
405409
406410
Raises:
407411
DHError: If unable to build the object.
@@ -449,7 +453,7 @@ def j_object(self) -> jpy.JType:
449453

450454
class IcebergTable(Table):
451455
"""
452-
:class:`.IcebergTable` is a subclass of Table that allows users to dynamically update the table with new snapshots
456+
`IcebergTable` is a subclass of Table that allows users to dynamically update the table with new snapshots
453457
from the Iceberg catalog.
454458
"""
455459
j_object_type = _JIcebergTable
@@ -488,8 +492,8 @@ def j_object(self) -> jpy.JType:
488492

489493
class IcebergTableWriter(JObjectWrapper):
490494
"""
491-
:class:`.IcebergTableWriter` is responsible for writing Deephaven tables to an Iceberg table. Each
492-
:class:`.IcebergTableWriter` instance associated with a single :class:`.IcebergTableAdapter` and can be used to
495+
`IcebergTableWriter` is responsible for writing Deephaven tables to an Iceberg table. Each
496+
`IcebergTableWriter` instance associated with a single `IcebergTableAdapter` and can be used to
493497
write multiple Deephaven tables to this Iceberg table.
494498
"""
495499
j_object_type = _JIcebergTableWriter or type(None)
@@ -504,7 +508,7 @@ def append(self, instructions: IcebergWriteInstructions):
504508
partition paths where each table will be written using the :attr:`.IcebergWriteInstructions.partition_paths`
505509
parameter.
506510
This method will not perform any compatibility checks between the existing schema and the provided Deephaven
507-
tables. All such checks happen at the time of creation of the :class:`.IcebergTableWriter` instance.
511+
tables. All such checks happen at the time of creation of the `IcebergTableWriter` instance.
508512
509513
Args:
510514
instructions (IcebergWriteInstructions): the customization instructions for write.
@@ -518,7 +522,7 @@ def j_object(self) -> jpy.JType:
518522

519523
class IcebergTableAdapter(JObjectWrapper):
520524
"""
521-
:class:`.IcebergTableAdapter` provides an interface for interacting with Iceberg tables. It allows the user to list
525+
`IcebergTableAdapter` provides an interface for interacting with Iceberg tables. It allows the user to list
522526
snapshots, retrieve table definitions and reading Iceberg tables into Deephaven tables.
523527
"""
524528
j_object_type = _JIcebergTableAdapter or type(None)
@@ -579,7 +583,7 @@ def table(self, instructions: Optional[IcebergReadInstructions] = None) -> Icebe
579583

580584
def table_writer(self, writer_options: TableParquetWriterOptions) -> IcebergTableWriter:
581585
"""
582-
Create a new :class:`.IcebergTableWriter` for this Iceberg table using the provided writer options.
586+
Create a new `IcebergTableWriter` for this Iceberg table using the provided writer options.
583587
This method will perform schema validation to ensure that the provided table definition from the writer options
584588
is compatible with the Iceberg table schema. All further writes performed by the returned writer will not be
585589
validated against the table's schema, and thus will be faster.
@@ -599,7 +603,7 @@ def j_object(self) -> jpy.JType:
599603

600604
class IcebergCatalogAdapter(JObjectWrapper):
601605
"""
602-
:class:`.IcebergCatalogAdapter` provides an interface for interacting with Iceberg catalogs. It allows listing
606+
`IcebergCatalogAdapter` provides an interface for interacting with Iceberg catalogs. It allows listing
603607
namespaces, tables and snapshots, as well as reading Iceberg tables into Deephaven tables.
604608
"""
605609
j_object_type = _JIcebergCatalogAdapter or type(None)
@@ -660,7 +664,7 @@ def create_table(self, table_identifier: str, table_definition: TableDefinitionL
660664
table_definition (TableDefinitionLike): the table definition of the new table.
661665
662666
Returns:
663-
:class:`.IcebergTableAdapter`: the table adapter for the new Iceberg table.
667+
`IcebergTableAdapter`: the table adapter for the new Iceberg table.
664668
"""
665669

666670
return IcebergTableAdapter(self.j_object.createTable(table_identifier,
@@ -700,7 +704,7 @@ def adapter_s3_rest(
700704
need to set this; it is most useful when connecting to non-AWS, S3-compatible APIs.
701705
702706
Returns:
703-
:class:`.IcebergCatalogAdapter`: the catalog adapter for the provided S3 REST catalog.
707+
`IcebergCatalogAdapter`: the catalog adapter for the provided S3 REST catalog.
704708
705709
Raises:
706710
DHError: If unable to build the catalog adapter.
@@ -738,7 +742,7 @@ def adapter_aws_glue(
738742
catalog URI.
739743
740744
Returns:
741-
:class:`.IcebergCatalogAdapter`: the catalog adapter for the provided AWS Glue catalog.
745+
`IcebergCatalogAdapter`: the catalog adapter for the provided AWS Glue catalog.
742746
743747
Raises:
744748
DHError: If unable to build the catalog adapter.
@@ -834,7 +838,7 @@ def adapter(
834838
hadoop_config (Optional[Dict[str, str]]): hadoop configuration properties for the catalog to load
835839
s3_instructions (Optional[s3.S3Instructions]): the S3 instructions if applicable
836840
Returns:
837-
:class:`.IcebergCatalogAdapter`: the catalog adapter created from the provided properties
841+
`IcebergCatalogAdapter`: the catalog adapter created from the provided properties
838842
839843
Raises:
840844
DHError: If unable to build the catalog adapter

0 commit comments

Comments
 (0)