Skip to content

Commit

Permalink
Fix filters by adding surrounding parenthesis (GoogleCloudDataproc#1166)
Browse files Browse the repository at this point in the history
* Fix filters by adding surrounding parenthesis.

* add changes.md

---------

Co-authored-by: Thomas Powell <thomas.samuel.powell@gmail.com>
Co-authored-by: Vishal Karve <vishalkarve15@gmail.com>
  • Loading branch information
3 people authored Jan 22, 2024
1 parent c03a3fd commit bd51cb1
Show file tree
Hide file tree
Showing 3 changed files with 53 additions and 17 deletions.
1 change: 1 addition & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

* PR #1155: allow lazy materialization of query on load
* PR #1163: Added config to set the BigQuery Job timeout
* PR #1166: Fix filters by adding surrounding parenthesis. Thanks @tom-s-powell !
* PR #1171: fix read, write issues with Timestamp
* Issue #1116: BigQuery write fails with MessageSize is too large

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -211,11 +211,12 @@ public static String getCompiledFilter(
handledFilters(pushAllFilters, readDataFormat, ImmutableList.copyOf(pushedFilters)));
return Stream.of(
configFilter,
compiledPushedFilter.length() == 0
? Optional.empty()
compiledPushedFilter.isEmpty()
? Optional.<String>empty()
: Optional.of(compiledPushedFilter))
.filter(Optional::isPresent)
.map(filter -> "(" + filter.get() + ")")
.map(Optional::get)
.map(filter -> filter.startsWith("(") && filter.endsWith(")") ? filter : "(" + filter + ")")
.collect(Collectors.joining(" AND "));
}

Expand Down Expand Up @@ -268,15 +269,15 @@ public static String compileFilter(Filter filter) {
}
if (filter instanceof And) {
And and = (And) filter;
return format("((%s) AND (%s))", compileFilter(and.left()), compileFilter(and.right()));
return format("(%s) AND (%s)", compileFilter(and.left()), compileFilter(and.right()));
}
if (filter instanceof Or) {
Or or = (Or) filter;
return format("((%s) OR (%s))", compileFilter(or.left()), compileFilter(or.right()));
return format("(%s) OR (%s)", compileFilter(or.left()), compileFilter(or.right()));
}
if (filter instanceof Not) {
Not not = (Not) filter;
return format("(NOT (%s))", compileFilter(not.child()));
return format("NOT (%s)", compileFilter(not.child()));
}
if (filter instanceof StringStartsWith) {
StringStartsWith stringStartsWith = (StringStartsWith) filter;
Expand All @@ -301,6 +302,7 @@ public static String compileFilters(Iterable<Filter> filters) {
return StreamSupport.stream(filters.spliterator(), false)
.map(SparkFilterUtils::compileFilter)
.sorted()
.map(filter -> "(" + filter + ")")
.collect(Collectors.joining(" AND "));
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -187,12 +187,12 @@ public void testNumericAndNullFilters() {
.isEqualTo("`foo` IS NOT NULL");
assertThat(
SparkFilterUtils.compileFilter(And.apply(IsNull.apply("foo"), IsNotNull.apply("bar"))))
.isEqualTo("((`foo` IS NULL) AND (`bar` IS NOT NULL))");
.isEqualTo("(`foo` IS NULL) AND (`bar` IS NOT NULL)");
assertThat(
SparkFilterUtils.compileFilter(Or.apply(IsNull.apply("foo"), IsNotNull.apply("bar"))))
.isEqualTo("((`foo` IS NULL) OR (`bar` IS NOT NULL))");
.isEqualTo("(`foo` IS NULL) OR (`bar` IS NOT NULL)");
assertThat(SparkFilterUtils.compileFilter(Not.apply(IsNull.apply("foo"))))
.isEqualTo("(NOT (`foo` IS NULL))");
.isEqualTo("NOT (`foo` IS NULL)");
}

@Test
Expand Down Expand Up @@ -261,9 +261,9 @@ public void testFiltersWithNestedOrAnd_1() {
pushAllFilters,
dataFormat,
"",
"(((((`c1` >= 100) OR (`c1` <= 700))) OR (`c2` <= 900)) "
+ "AND ((((`c1` >= 500) OR (`c1` <= 70))) OR (((`c1` >= 900) OR "
+ "(`c3` <= 50)))) AND ((`c1` >= 5000) OR (`c1` <= 701)))",
"(((`c1` >= 100) OR (`c1` <= 700)) OR (`c2` <= 900)) "
+ "AND (((`c1` >= 500) OR (`c1` <= 70)) OR ((`c1` >= 900) OR "
+ "(`c3` <= 50))) AND ((`c1` >= 5000) OR (`c1` <= 701))",
Optional.empty(),
part1,
part2,
Expand All @@ -289,7 +289,7 @@ public void testFiltersWithNestedOrAnd_2() {
pushAllFilters,
dataFormat,
"",
"(((((`c1` >= 500) AND (`c2` <= 300))) OR (((`c1` <= 800) AND (`c3` >= 230)))))",
"(((`c1` >= 500) AND (`c2` <= 300)) OR ((`c1` <= 800) AND (`c3` >= 230)))",
Optional.empty(),
filter);
}
Expand Down Expand Up @@ -329,11 +329,44 @@ public void testFiltersWithNestedOrAnd_3() {
pushAllFilters,
dataFormat,
"",
"(((((((`c1` >= 5000) OR (`c1` <= 701))) AND "
+ "(((`c2` >= 150) OR (`c3` >= 100))))) OR (((((`c1` >= 50) OR "
+ "(`c1` <= 71))) AND (((`c2` >= 15) OR (`c3` >= 10)))))) AND "
"((((`c1` >= 5000) OR (`c1` <= 701)) AND "
+ "((`c2` >= 150) OR (`c3` >= 100))) OR (((`c1` >= 50) OR "
+ "(`c1` <= 71)) AND ((`c2` >= 15) OR (`c3` >= 10)))) AND "
+ "((`c1` >= 500) OR (`c1` <= 70)) AND ((`c1` >= 900) OR "
+ "(((`c3` <= 50) AND (((`c2` >= 20) OR (`c3` > 200)))))))",
+ "((`c3` <= 50) AND ((`c2` >= 20) OR (`c3` > 200))))",
Optional.empty(),
part1,
part2,
part3);
}

@Test
public void testFiltersWithNestedOrAnd_4() {
if (dataFormat == ARROW && !pushAllFilters) {
// If pushAllFilters isn't true, the following test won't pass for ARROW.
return;
}

Filter part1 =
Or.apply(
Not.apply(EqualNullSafe.apply("c1", 500)), Not.apply(EqualNullSafe.apply("c2", 500)));

Filter part2 = EqualNullSafe.apply("c1", 500);

Filter part3 = EqualNullSafe.apply("c2", 500);

checkFilters(
pushAllFilters,
dataFormat,
"",
"((NOT (`c1` IS NULL AND 500 IS NULL OR `c1` IS NOT NULL "
+ "AND 500 IS NOT NULL AND `c1` = 500)) "
+ "OR (NOT (`c2` IS NULL AND 500 IS NULL OR `c2` IS NOT NULL "
+ "AND 500 IS NOT NULL AND `c2` = 500))) "
+ "AND (`c1` IS NULL AND 500 IS NULL OR `c1` IS NOT NULL "
+ "AND 500 IS NOT NULL AND `c1` = 500) "
+ "AND (`c2` IS NULL AND 500 IS NULL OR `c2` IS NOT NULL "
+ "AND 500 IS NOT NULL AND `c2` = 500)",
Optional.empty(),
part1,
part2,
Expand Down

0 comments on commit bd51cb1

Please sign in to comment.