|
5 | 5 | """ This module provides Java compatibility support including convenience functions to create some widely used Java
|
6 | 6 | data structures from corresponding Python ones in order to be able to call Java methods. """
|
7 | 7 |
|
8 |
| -from typing import Any, Callable, Dict, Iterable, List, Sequence, Set, TypeVar, Union, Tuple, Literal, Optional |
| 8 | +from typing import Any, Callable, Dict, Iterable, List, Sequence, Set, TypeVar, Union, Optional |
9 | 9 |
|
10 | 10 | import jpy
|
11 | 11 | import numpy as np
|
12 | 12 | import pandas as pd
|
13 | 13 |
|
14 | 14 | from deephaven import dtypes, DHError
|
15 | 15 | from deephaven._wrapper import unwrap, wrap_j_object, JObjectWrapper
|
16 |
| -from deephaven.dtypes import DType, _PRIMITIVE_DTYPE_NULL_MAP, _J_ARRAY_NP_TYPE_MAP |
| 16 | +from deephaven.dtypes import DType, _PRIMITIVE_DTYPE_NULL_MAP |
17 | 17 |
|
18 | 18 | _NULL_BOOLEAN_AS_BYTE = jpy.get_type("io.deephaven.util.BooleanUtils").NULL_BOOLEAN_AS_BYTE
|
19 | 19 | _JPrimitiveArrayConversionUtility = jpy.get_type("io.deephaven.integrations.common.PrimitiveArrayConversionUtility")
|
@@ -216,14 +216,8 @@ def _j_array_to_numpy_array(dtype: DType, j_array: jpy.JType, conv_null: bool, t
|
216 | 216 | dtype (DType): The dtype of the Java array
|
217 | 217 | j_array (jpy.JType): The Java array to convert
|
218 | 218 | conv_null (bool): If True, convert nulls to the null value for the dtype
|
219 |
| - type_promotion (bool): Ignored when conv_null is False. When type_promotion is False, (1) input Java integer, |
220 |
| - boolean, or character arrays containing Deephaven nulls yield an exception, (2) input Java float or double |
221 |
| - arrays containing Deephaven nulls have null values converted to np.nan, and (3) input Java arrays without |
222 |
| - Deephaven nulls are converted to the target type. When type_promotion is True, (1) input Java integer, |
223 |
| - boolean, or character arrays containing Deephaven nulls are converted to np.float64 arrays and Deephaven |
224 |
| - null values are converted to np.nan, (2) input Java float or double arrays containing Deephaven nulls have |
225 |
| - null values converted to np.nan, and (3) input Java arrays without Deephaven nulls are converted to the |
226 |
| - target type. Defaults to False. |
| 219 | + type_promotion (bool): Ignored when conv_null is False. When conv_null is True, see the description for the same |
| 220 | + named parameter in dh_nulls_to_nan(). |
227 | 221 |
|
228 | 222 | Returns:
|
229 | 223 | np.ndarray: The numpy array or None if the Java array is None
|
@@ -255,26 +249,49 @@ def _j_array_to_numpy_array(dtype: DType, j_array: jpy.JType, conv_null: bool, t
|
255 | 249 | np_array = np.array(j_array, np.object_)
|
256 | 250 |
|
257 | 251 | if conv_null:
|
258 |
| - if dh_null := _PRIMITIVE_DTYPE_NULL_MAP.get(dtype): |
259 |
| - if dtype in (dtypes.float32, dtypes.float64): |
260 |
| - np_array = np.copy(np_array) |
261 |
| - np_array[np_array == dh_null] = np.nan |
262 |
| - else: |
263 |
| - if dtype is dtypes.bool_: # needs to change its type to byte for dh null detection |
264 |
| - np_array = np.frombuffer(np_array, np.byte) |
265 |
| - |
266 |
| - if any(np_array[np_array == dh_null]): |
267 |
| - if not type_promotion: |
268 |
| - raise DHError(f"Problem creating numpy array. Java {dtype} array contains Deephaven null values, but numpy {np_array.dtype} array does not support null values") |
269 |
| - np_array = np_array.astype(np.float64) |
270 |
| - np_array[np_array == dh_null] = np.nan |
271 |
| - else: |
272 |
| - if dtype is dtypes.bool_: # needs to change its type back to bool |
273 |
| - np_array = np.frombuffer(np_array, np.bool_) |
274 |
| - return np_array |
| 252 | + return dh_null_to_nan(np_array, type_promotion) |
275 | 253 |
|
276 | 254 | return np_array
|
277 | 255 |
|
| 256 | +def dh_null_to_nan(np_array: np.ndarray, type_promotion: bool = False) -> np.ndarray: |
| 257 | + """Converts Deephaven primitive null values in the given numpy array to np.nan. No conversion is performed on |
| 258 | + non-primitive types. |
| 259 | +
|
| 260 | + Note, the input numpy array is modified in place if it is of a float or double type. If that's not a desired behavior, |
| 261 | + pass a copy of the array instead. For input arrays of other types, a new array is always returned. |
| 262 | +
|
| 263 | + Args: |
| 264 | + np_array (np.ndarray): The numpy array to convert |
| 265 | + type_promotion (bool): When False, integer, boolean, or character arrays will cause an exception to be raised. |
| 266 | + When True, integer, boolean, or character arrays are converted to new np.float64 arrays and Deephaven null |
| 267 | + values in them are converted to np.nan. Numpy arrays of float or double types are not affected by this flag |
| 268 | + and Deephaven nulls will always be converted to np.nan in place. Defaults to False. |
| 269 | +
|
| 270 | + Returns: |
| 271 | + np.ndarray: The numpy array with Deephaven nulls converted to np.nan. |
| 272 | +
|
| 273 | + Raises: |
| 274 | + DHError |
| 275 | + """ |
| 276 | + if not isinstance(np_array, np.ndarray): |
| 277 | + raise DHError(message="The given np_array argument is not a numpy array.") |
| 278 | + |
| 279 | + dtype = dtypes.from_np_dtype(np_array.dtype) |
| 280 | + if dh_null := _PRIMITIVE_DTYPE_NULL_MAP.get(dtype): |
| 281 | + if dtype in (dtypes.float32, dtypes.float64): |
| 282 | + np_array = np.copy(np_array) |
| 283 | + np_array[np_array == dh_null] = np.nan |
| 284 | + else: |
| 285 | + if not type_promotion: |
| 286 | + raise DHError(message=f"failed to convert DH nulls to np.nan in the numpy array. The array is " |
| 287 | + f"of {np_array.dtype.type} type but type_promotion is False") |
| 288 | + if dtype is dtypes.bool_: # needs to change its type to byte for dh null detection |
| 289 | + np_array = np.frombuffer(np_array, np.byte) |
| 290 | + |
| 291 | + np_array = np_array.astype(np.float64) |
| 292 | + np_array[np_array == dh_null] = np.nan |
| 293 | + |
| 294 | + return np_array |
278 | 295 |
|
279 | 296 | def _j_array_to_series(dtype: DType, j_array: jpy.JType, conv_null: bool) -> pd.Series:
|
280 | 297 | """Produce a copy of the specified Java array as a pandas.Series object.
|
|
0 commit comments