|
4 | 4 | import random
|
5 | 5 | import unittest
|
6 | 6 |
|
7 |
| -from typing import Optional |
| 7 | +from typing import Optional, Union |
8 | 8 | import numpy as np
|
9 | 9 |
|
10 | 10 | from deephaven import DHError, empty_table, dtypes
|
|
15 | 15 | from deephaven._udf import _dh_vectorize as dh_vectorize
|
16 | 16 | from tests.testbase import BaseTestCase
|
17 | 17 |
|
| 18 | +from tests.test_udf_numpy_args import _J_TYPE_NULL_MAP, _J_TYPE_NP_DTYPE_MAP, _J_TYPE_J_ARRAY_TYPE_MAP |
| 19 | + |
18 | 20 |
|
19 | 21 | class VectorizationTestCase(BaseTestCase):
|
20 | 22 | def setUp(self):
|
@@ -278,6 +280,93 @@ def pyfunc(p1: np.int32, p2: np.int32, p3: Optional[np.int32]) -> Optional[int]:
|
278 | 280 | self.assertEqual(t.columns[1].data_type, dtypes.long)
|
279 | 281 | self.assertEqual(t.columns[2].data_type, dtypes.long)
|
280 | 282 |
|
| 283 | + def test_1d_array_args_no_null(self): |
| 284 | + col1_formula = "Col1 = i % 3" |
| 285 | + for j_dtype, np_dtype in _J_TYPE_NP_DTYPE_MAP.items(): |
| 286 | + col2_formula = f"Col2 = ({j_dtype})i" |
| 287 | + with self.subTest(j_dtype): |
| 288 | + tbl = empty_table(10).update([col1_formula, col2_formula]).group_by("Col1").update( |
| 289 | + "Col2 = Col2.toArray()") |
| 290 | + |
| 291 | + func_str = f""" |
| 292 | +def test_udf(col1, col2: np.ndarray[{_J_TYPE_NP_DTYPE_MAP[j_dtype]}]) -> np.ndarray[{_J_TYPE_NP_DTYPE_MAP[j_dtype]}]: |
| 293 | + return col2 + 5 |
| 294 | + """ |
| 295 | + exec(func_str, globals()) |
| 296 | + |
| 297 | + res = tbl.update("Col3 = test_udf(Col1, Col2)") |
| 298 | + self.assertEqual(res.columns[0].data_type, dtypes.int32) |
| 299 | + self.assertEqual(res.columns[1].data_type, _J_TYPE_J_ARRAY_TYPE_MAP[j_dtype]) |
| 300 | + self.assertEqual(res.columns[2].data_type, _J_TYPE_J_ARRAY_TYPE_MAP[j_dtype]) |
| 301 | + |
| 302 | + self.assertEqual(_udf.vectorized_count, 1) |
| 303 | + _udf.vectorized_count = 0 |
| 304 | + |
| 305 | + def test_1d_array_args_null(self): |
| 306 | + col1_formula = "Col1 = i % 3" |
| 307 | + for j_dtype, null_name in _J_TYPE_NULL_MAP.items(): |
| 308 | + col2_formula = f"Col2 = i % 3 == 0? {null_name} : ({j_dtype})i" |
| 309 | + with self.subTest(j_dtype): |
| 310 | + tbl = empty_table(10).update([col1_formula, col2_formula]).group_by("Col1").update("Col2 = Col2.toArray()") |
| 311 | + |
| 312 | + func_str = f""" |
| 313 | +def test_udf(col1, col2: np.ndarray[{_J_TYPE_NP_DTYPE_MAP[j_dtype]}]) -> np.ndarray[{_J_TYPE_NP_DTYPE_MAP[j_dtype]}]: |
| 314 | + return col2 + 5 |
| 315 | + """ |
| 316 | + exec(func_str, globals()) |
| 317 | + |
| 318 | + # for floating point types, DH nulls are auto converted to np.nan |
| 319 | + # for integer types, DH nulls in the array raise exceptions |
| 320 | + if j_dtype in ("float", "double"): |
| 321 | + res = tbl.update("Col3 = test_udf(Col1, Col2)") |
| 322 | + self.assertEqual(res.columns[0].data_type, dtypes.int32) |
| 323 | + self.assertEqual(res.columns[1].data_type, _J_TYPE_J_ARRAY_TYPE_MAP[j_dtype]) |
| 324 | + self.assertEqual(res.columns[2].data_type, _J_TYPE_J_ARRAY_TYPE_MAP[j_dtype]) |
| 325 | + else: |
| 326 | + with self.assertRaises(DHError) as cm: |
| 327 | + tbl.update("Col3 = test_udf(Col1, Col2)") |
| 328 | + |
| 329 | + self.assertEqual(_udf.vectorized_count, 1) |
| 330 | + _udf.vectorized_count = 0 |
| 331 | + |
| 332 | + def test_1d_str_bool_datetime_array(self): |
| 333 | + with self.subTest("str"): |
| 334 | + def f1(p1: np.ndarray[str]) -> bool: |
| 335 | + return (p1 == 'None').any() |
| 336 | + |
| 337 | + t = empty_table(10).update(["X = i % 3", "Y = i % 2 == 0? `deephaven`: null"]).group_by("X").update("Y = Y.toArray()") |
| 338 | + t1 = t.update(["X1 = f1(Y)"]) |
| 339 | + self.assertEqual(t1.columns[2].data_type, dtypes.bool_) |
| 340 | + self.assertEqual(3, t1.to_string().count("true")) |
| 341 | + self.assertEqual(_udf.vectorized_count, 1) |
| 342 | + _udf.vectorized_count = 0 |
| 343 | + |
| 344 | + with self.subTest("datetime"): |
| 345 | + def f2(p1: np.ndarray[np.datetime64]) -> bool: |
| 346 | + return np.isnat(p1).any() |
| 347 | + |
| 348 | + t = empty_table(10).update(["X = i % 3", "Y = i % 2 == 0? now() : null"]).group_by("X").update("Y = Y.toArray()") |
| 349 | + t1 = t.update(["X1 = f2(Y)"]) |
| 350 | + self.assertEqual(t1.columns[2].data_type, dtypes.bool_) |
| 351 | + self.assertEqual(3, t1.to_string().count("true")) |
| 352 | + self.assertEqual(_udf.vectorized_count, 1) |
| 353 | + _udf.vectorized_count = 0 |
| 354 | + |
| 355 | + with self.subTest("boolean"): |
| 356 | + def f3(p1: np.ndarray[np.bool_]) -> np.ndarray[np.bool_]: |
| 357 | + return np.invert(p1) |
| 358 | + |
| 359 | + t = empty_table(10).update(["X = i % 3", "Y = i % 2 == 0? true : false"]).group_by("X").update("Y = Y.toArray()") |
| 360 | + t1 = t.update(["X1 = f3(Y)"]) |
| 361 | + self.assertEqual(_udf.vectorized_count, 1) |
| 362 | + _udf.vectorized_count = 0 |
| 363 | + |
| 364 | + t = empty_table(10).update(["X = i % 3", "Y = i % 2 == 0? true : null"]).group_by("X").update("Y = Y.toArray()") |
| 365 | + with self.assertRaises(DHError) as cm: |
| 366 | + t1 = t.update(["X1 = f3(Y)"]) |
| 367 | + self.assertIn("Java java.lang.Boolean array contains Deephaven null values, but numpy int8 array does not support null values", str(cm.exception)) |
| 368 | + self.assertEqual(_udf.vectorized_count, 1) |
| 369 | + _udf.vectorized_count = 0 |
281 | 370 |
|
282 | 371 | if __name__ == "__main__":
|
283 | 372 | unittest.main()
|
0 commit comments