Skip to content

Commit 3990a7a

Browse files
author
Roman Shanin
committed
add test to check projected field predicate evaluator
1 parent 11d54a9 commit 3990a7a

File tree

2 files changed

+23
-0
lines changed

2 files changed

+23
-0
lines changed

pyiceberg/expressions/visitors.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -860,7 +860,9 @@ class _ColumnNameTranslator(BooleanExpressionVisitor[BooleanExpression]):
860860
861861
Args:
862862
file_schema (Schema): The schema of the file.
863+
projected_schema (Schema): The schema to project onto the data files.
863864
case_sensitive (bool): Whether to consider case when binding a reference to a field in a schema, defaults to True.
865+
projected_missing_fields(dict[str, Any]): Map of fields missing in file_schema, but present as partition values.
864866
865867
Raises:
866868
TypeError: In the case of an UnboundPredicate.

tests/expressions/test_visitors.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,10 +68,12 @@
6868
BooleanExpressionVisitor,
6969
BoundBooleanExpressionVisitor,
7070
_ManifestEvalVisitor,
71+
bind,
7172
expression_evaluator,
7273
expression_to_plain_format,
7374
rewrite_not,
7475
rewrite_to_dnf,
76+
translate_column_names,
7577
visit,
7678
visit_bound_predicate,
7779
)
@@ -1623,3 +1625,22 @@ def test_expression_evaluator_null() -> None:
16231625
assert expression_evaluator(schema, LessThan("a", 1), case_sensitive=True)(struct) is False
16241626
assert expression_evaluator(schema, StartsWith("a", 1), case_sensitive=True)(struct) is False
16251627
assert expression_evaluator(schema, NotStartsWith("a", 1), case_sensitive=True)(struct) is True
1628+
1629+
@pytest.mark.parametrize(
1630+
"before_expression,after_expression",
1631+
[
1632+
(In("id", {1, 2, 3}), AlwaysTrue()),
1633+
(EqualTo("id", 3), AlwaysFalse()),
1634+
(And(EqualTo("id", 1), EqualTo("all_same_value_or_null", "string")), And(AlwaysTrue(), EqualTo("all_same_value_or_null", "string"))),
1635+
(And(EqualTo("all_same_value_or_null", "string"), GreaterThan("id", 2)), And(EqualTo("all_same_value_or_null", "string"), AlwaysFalse())),
1636+
(
1637+
Or(And(EqualTo("id", 1), EqualTo("all_same_value_or_null", "string")), And(EqualTo("all_same_value_or_null", "string"), GreaterThan("id", 2))),
1638+
Or(And(AlwaysTrue(), EqualTo("all_same_value_or_null", "string")), And(EqualTo("all_same_value_or_null", "string"), AlwaysFalse())),
1639+
)
1640+
]
1641+
)
1642+
def test_translate_column_names_eval_projected_fields(schema: Schema, before_expression: BooleanExpression, after_expression: BooleanExpression) -> None:
1643+
# exclude id from file_schema pretending that it's part of partition values
1644+
file_schema = Schema(*[field for field in schema.columns if field.name != "id"])
1645+
projected_missing_fields = { "id": 1 }
1646+
assert translate_column_names(bind(schema, before_expression, True), file_schema, schema, True, projected_missing_fields) == after_expression

0 commit comments

Comments
 (0)