fix: DH-18740: Make the table_type argument default to None (deephaven#6656)

jmao-denver · web-flow · commit 965013586aef · 2025-02-28T14:14:43.000-07:00
Fixes DH-18740 1. changed the default values to None 2. fixed a bug in cdc.py 3. black formatted consumer.py Before: <img width="1203" alt="image" src="https://github.com/user-attachments/assets/e67e049b-f8e1-403a-a0a6-77d681cdbf55" /> After: <img width="1143" alt="image" src="https://github.com/user-attachments/assets/33d05fe7-1f20-4158-933d-361e660e0ba8" />
diff --git a/py/server/deephaven/stream/kafka/consumer.py b/py/server/deephaven/stream/kafka/consumer.py
@@ -72,7 +72,7 @@ class TableType(JObjectWrapper):
     j_object_type = jpy.get_type("io.deephaven.kafka.KafkaTools$TableType")
 
     @staticmethod
-    def blink():
+    def blink() -> 'TableType':
         """ Consume all partitions into a single interleaved blink table, which will present only newly-available rows
          to downstream operations and visualizations."""
         return TableType(TableType.j_object_type.blink())
@@ -85,12 +85,12 @@ def stream():
         return TableType.blink()
 
     @staticmethod
-    def append():
+    def append() -> 'TableType':
         """ Consume all partitions into a single interleaved in-memory append-only table."""
         return TableType(TableType.j_object_type.append())
 
     @staticmethod
-    def ring(capacity: int):
+    def ring(capacity: int) -> 'TableType':
         """ Consume all partitions into a single in-memory ring table."""
         return TableType(TableType.j_object_type.ring(capacity))
 
@@ -101,14 +101,18 @@ def __init__(self, j_table_type: jpy.JType):
     def j_object(self) -> jpy.JType:
         return self._j_table_type
 
+
 # TODO (https://github.com/deephaven/deephaven-core/issues/3853): Delete this attribute
 TableType.Stream = TableType.blink()
 """ Deprecated, prefer TableType.blink(). Consume all partitions into a single interleaved blink table, which will
 present only newly-available rows to downstream operations and visualizations."""
 
 # TODO (https://github.com/deephaven/deephaven-core/issues/3853): Delete this attribute
 TableType.Append = TableType.append()
-""" Deprecated, prefer TableType.append(). Consume all partitions into a single interleaved in-memory append-only table."""
+""" Deprecated, prefer TableType.append(). Consume all partitions into a single interleaved in-memory append-only 
+table."""
+
+TableType.Blink = TableType.blink()
 
 
 def j_partitions(partitions):
@@ -134,8 +138,8 @@ def consume(
         offsets: Dict[int, int] = None,
         key_spec: KeyValueSpec = None,
         value_spec: KeyValueSpec = None,
-        table_type: TableType = TableType.blink(),
-) -> Table:
+        table_type: TableType = None,
+        ) -> Table:
     """Consume from Kafka to a Deephaven table.
 
     Args:
@@ -158,17 +162,19 @@ def consume(
             It can be the result of calling one of the functions: simple_spec(),avro_spec() or json_spec() in this
             module, or the predefined KeyValueSpec.IGNORE or KeyValueSpec.FROM_PROPERTIES. The default is None which
             works the same as KeyValueSpec.FROM_PROPERTIES, in which case, the kafka_config param should include values
-            for dictionary keys 'deephaven.value.column.name' and 'deephaven.value.column.type', for the single resulting
+            for dictionary keys 'deephaven.value.column.name' and 'deephaven.value.column.type', for the single
+            resulting
             column name and type
-        table_type (TableType): a TableType, default is TableType.blink()
+        table_type (TableType): a TableType, default is None, meaning to use TableType.blink()
 
     Returns:
         a Deephaven live table that will update based on Kafka messages consumed for the given topic
 
     Raises:
         DHError
     """
-
+    if table_type is None:
+        table_type = TableType.blink()
     return _consume(kafka_config, topic, partitions, offsets, key_spec, value_spec, table_type, to_partitioned=False)
 
 
@@ -179,8 +185,8 @@ def consume_to_partitioned_table(
         offsets: Dict[int, int] = None,
         key_spec: KeyValueSpec = None,
         value_spec: KeyValueSpec = None,
-        table_type: TableType = TableType.blink(),
-) -> PartitionedTable:
+        table_type: TableType = None,
+        ) -> PartitionedTable:
     """Consume from Kafka to a Deephaven partitioned table.
 
     Args:
@@ -203,10 +209,11 @@ def consume_to_partitioned_table(
             It can be the result of calling one of the functions: simple_spec(),avro_spec() or json_spec() in this
             module, or the predefined KeyValueSpec.IGNORE or KeyValueSpec.FROM_PROPERTIES. The default is None which
             works the same as KeyValueSpec.FROM_PROPERTIES, in which case, the kafka_config param should include values
-            for dictionary keys 'deephaven.value.column.name' and 'deephaven.value.column.type', for the single resulting
+            for dictionary keys 'deephaven.value.column.name' and 'deephaven.value.column.type', for the single
+            resulting
             column name and type
         table_type (TableType): a TableType, specifying the type of the expected result's constituent tables,
-            default is TableType.blink()
+            default is None, meaning to use TableType.blink()
 
     Returns:
         a Deephaven live partitioned table that will update based on Kafka messages consumed for the given topic,
@@ -216,7 +223,8 @@ def consume_to_partitioned_table(
     Raises:
         DHError
     """
-
+    if table_type is None:
+        table_type = TableType.blink()
     return _consume(kafka_config, topic, partitions, offsets, key_spec, value_spec, table_type, to_partitioned=True)
 
 
@@ -229,7 +237,7 @@ def _consume(
         value_spec: KeyValueSpec = None,
         table_type: TableType = TableType.blink(),
         to_partitioned: bool = False,
-) -> Union[Table, PartitionedTable]:
+        ) -> Union[Table, PartitionedTable]:
     try:
         partitions = j_partitions(partitions)
 
@@ -243,8 +251,8 @@ def _consume(
             partitions_array = jpy.array("int", list(offsets.keys()))
             offsets_array = jpy.array("long", list(offsets.values()))
             offsets = _JKafkaTools.partitionToOffsetFromParallelArrays(
-                partitions_array, offsets_array
-            )
+                    partitions_array, offsets_array
+                    )
 
         key_spec = KeyValueSpec.FROM_PROPERTIES if key_spec is None else key_spec
         value_spec = KeyValueSpec.FROM_PROPERTIES if value_spec is None else value_spec
@@ -255,29 +263,32 @@ def _consume(
         kafka_config = j_properties(kafka_config)
         if not to_partitioned:
             return Table(
-                j_table=_JKafkaTools.consumeToTable(
-                    kafka_config,
-                    topic,
-                    partitions,
-                    offsets,
-                    key_spec.j_object,
-                    value_spec.j_object,
-                    table_type.j_object,
-                )
-            )
+                    j_table=_JKafkaTools.consumeToTable(
+                            kafka_config,
+                            topic,
+                            partitions,
+                            offsets,
+                            key_spec.j_object,
+                            value_spec.j_object,
+                            table_type.j_object,
+                            )
+                    )
         else:
-            return PartitionedTable(j_partitioned_table=_JKafkaTools.consumeToPartitionedTable(
-                kafka_config,
-                topic,
-                partitions,
-                offsets,
-                key_spec.j_object,
-                value_spec.j_object,
-                table_type.j_object,
-            ))
+            return PartitionedTable(
+                    j_partitioned_table=_JKafkaTools.consumeToPartitionedTable(
+                            kafka_config,
+                            topic,
+                            partitions,
+                            offsets,
+                            key_spec.j_object,
+                            value_spec.j_object,
+                            table_type.j_object,
+                            )
+                    )
     except Exception as e:
         raise DHError(e, "failed to consume a Kafka stream.") from e
 
+
 class ProtobufProtocol(JObjectWrapper):
     """The protobuf serialization / deserialization protocol."""
 
@@ -310,7 +321,7 @@ def protobuf_spec(
         message_class: Optional[str] = None,
         include: Optional[List[str]] = None,
         protocol: Optional[ProtobufProtocol] = None,
-) -> KeyValueSpec:
+        ) -> KeyValueSpec:
     """Creates a spec for parsing a Kafka protobuf stream into a Deephaven table. Uses the schema, schema_version, and
     schema_message_name to fetch the schema from the schema registry; or uses message_class to to get the schema from
     the classpath.
@@ -344,13 +355,13 @@ def protobuf_spec(
     parser_options_builder = _JProtobufDescriptorParserOptions.builder()
     if include is not None:
         parser_options_builder.fieldOptions(
-            _JFieldOptions.includeIf(
-                _JFieldPath.anyMatches(j_array_list(include))
-            )
-        )
+                _JFieldOptions.includeIf(
+                        _JFieldPath.anyMatches(j_array_list(include))
+                        )
+                )
     pb_consume_builder = (
-        _JProtobufConsumeOptions.builder()
-        .parserOptions(parser_options_builder.build())
+            _JProtobufConsumeOptions.builder()
+            .parserOptions(parser_options_builder.build())
     )
     if message_class:
         if schema or schema_version or schema_message_name:
@@ -368,16 +379,16 @@ def protobuf_spec(
     if protocol:
         pb_consume_builder.protocol(protocol.j_object)
     return KeyValueSpec(
-        j_spec=_JKafkaTools_Consume.protobufSpec(pb_consume_builder.build())
-    )
+            j_spec=_JKafkaTools_Consume.protobufSpec(pb_consume_builder.build())
+            )
 
 
 def avro_spec(
         schema: str,
         schema_version: str = "latest",
         mapping: Dict[str, str] = None,
         mapped_only: bool = False,
-) -> KeyValueSpec:
+        ) -> KeyValueSpec:
     """Creates a spec for how to use an Avro schema when consuming a Kafka stream to a Deephaven table.
 
     Args:
@@ -388,7 +399,8 @@ def avro_spec(
             the value of the Schema Server URL for fetching the schema definition
         schema_version (str): the schema version to fetch from schema service, default is 'latest'
         mapping (Dict[str, str]): a mapping from Avro field name to Deephaven table column name; the fields specified in
-            the mapping will have their column names defined by it; if 'mapped_only' parameter is False, any other fields
+            the mapping will have their column names defined by it; if 'mapped_only' parameter is False,
+            any other fields
             not mentioned in the mapping will use the same Avro field name for Deephaven table column; otherwise, these
             unmapped fields will be ignored and will not be present in the resulting table. default is None
         mapped_only (bool): whether to ignore Avro fields not present in the 'mapping' argument, default is False
@@ -407,22 +419,22 @@ def avro_spec(
             jschema = _JKafkaTools.getAvroSchema(schema);
             if mapping:
                 return KeyValueSpec(
-                    j_spec=_JKafkaTools_Consume.avroSpec(jschema, mapping)
-                )
+                        j_spec=_JKafkaTools_Consume.avroSpec(jschema, mapping)
+                        )
             else:
                 return KeyValueSpec(
-                    j_spec=_JKafkaTools_Consume.avroSpec(jschema)
-                )
+                        j_spec=_JKafkaTools_Consume.avroSpec(jschema)
+                        )
 
         else:
             if mapping:
                 return KeyValueSpec(
-                    j_spec=_JKafkaTools_Consume.avroSpec(schema, schema_version, mapping)
-                )
+                        j_spec=_JKafkaTools_Consume.avroSpec(schema, schema_version, mapping)
+                        )
             else:
                 return KeyValueSpec(
-                    j_spec=_JKafkaTools_Consume.avroSpec(schema, schema_version)
-                )
+                        j_spec=_JKafkaTools_Consume.avroSpec(schema, schema_version)
+                        )
     except Exception as e:
         raise DHError(e, "failed to create a Kafka key/value spec") from e
 
@@ -457,10 +469,11 @@ def json_spec(col_defs: Union[TableDefinitionLike, List[Tuple[str, DType]]], map
             col_defs = [col.j_column_definition for col in table_def.values()]
         else:
             warn(
-                'json_spec col_defs for List[Tuple[str, DType]] is deprecated for removal, prefer TableDefinitionLike',
-                DeprecationWarning,
-                stacklevel=2,
-            )
+                    'json_spec col_defs for List[Tuple[str, DType]] is deprecated for removal, '
+                    'prefer TableDefinitionLike',
+                    DeprecationWarning,
+                    stacklevel=2,
+                    )
             col_defs = [col_def(*t).j_column_definition for t in col_defs]
 
         if mapping is None:
@@ -489,8 +502,8 @@ def simple_spec(col_name: str, data_type: DType = None) -> KeyValueSpec:
         if data_type is None:
             return KeyValueSpec(j_spec=_JKafkaTools_Consume.simpleSpec(col_name))
         return KeyValueSpec(
-            j_spec=_JKafkaTools_Consume.simpleSpec(col_name, data_type.qst_type.clazz())
-        )
+                j_spec=_JKafkaTools_Consume.simpleSpec(col_name, data_type.qst_type.clazz())
+                )
     except Exception as e:
         raise DHError(e, "failed to create a Kafka key/value spec") from e