17
17
18
18
//! Helper functions for the table implementation
19
19
20
+ use std:: collections:: HashMap ;
20
21
use std:: sync:: Arc ;
21
22
22
23
use super :: PartitionedFile ;
23
24
use crate :: datasource:: listing:: ListingTableUrl ;
24
25
use crate :: execution:: context:: SessionState ;
26
+ use crate :: logical_expr:: { BinaryExpr , Operator } ;
25
27
use crate :: { error:: Result , scalar:: ScalarValue } ;
26
28
27
29
use arrow:: {
@@ -185,9 +187,17 @@ async fn list_partitions(
185
187
store : & dyn ObjectStore ,
186
188
table_path : & ListingTableUrl ,
187
189
max_depth : usize ,
190
+ partition_prefix : Option < Path > ,
188
191
) -> Result < Vec < Partition > > {
189
192
let partition = Partition {
190
- path : table_path. prefix ( ) . clone ( ) ,
193
+ path : match partition_prefix {
194
+ Some ( prefix) => Path :: from_iter (
195
+ Path :: from ( table_path. prefix ( ) . as_ref ( ) )
196
+ . parts ( )
197
+ . chain ( Path :: from ( prefix. as_ref ( ) ) . parts ( ) ) ,
198
+ ) ,
199
+ None => table_path. prefix ( ) . clone ( ) ,
200
+ } ,
191
201
depth : 0 ,
192
202
files : None ,
193
203
} ;
@@ -321,6 +331,81 @@ async fn prune_partitions(
321
331
Ok ( filtered)
322
332
}
323
333
334
+ #[ derive( Debug ) ]
335
+ enum PartitionValue {
336
+ Single ( String ) ,
337
+ Multi ,
338
+ }
339
+
340
+ fn populate_partition_values < ' a > (
341
+ partition_values : & mut HashMap < & ' a str , PartitionValue > ,
342
+ filter : & ' a Expr ,
343
+ ) {
344
+ if let Expr :: BinaryExpr ( BinaryExpr {
345
+ ref left,
346
+ op,
347
+ ref right,
348
+ } ) = filter
349
+ {
350
+ match op {
351
+ Operator :: Eq => match ( left. as_ref ( ) , right. as_ref ( ) ) {
352
+ ( Expr :: Column ( Column { ref name, .. } ) , Expr :: Literal ( val) )
353
+ | ( Expr :: Literal ( val) , Expr :: Column ( Column { ref name, .. } ) ) => {
354
+ if partition_values
355
+ . insert ( name, PartitionValue :: Single ( val. to_string ( ) ) )
356
+ . is_some ( )
357
+ {
358
+ partition_values. insert ( name, PartitionValue :: Multi ) ;
359
+ }
360
+ }
361
+ _ => { }
362
+ } ,
363
+ Operator :: And => {
364
+ populate_partition_values ( partition_values, left) ;
365
+ populate_partition_values ( partition_values, right) ;
366
+ }
367
+ _ => { }
368
+ }
369
+ }
370
+ }
371
+
372
+ fn evaluate_partition_prefix < ' a > (
373
+ partition_cols : & ' a [ ( String , DataType ) ] ,
374
+ filters : & ' a [ Expr ] ,
375
+ ) -> Option < Path > {
376
+ let mut partition_values = HashMap :: new ( ) ;
377
+
378
+ if filters. len ( ) > 1 {
379
+ return None ;
380
+ }
381
+
382
+ for filter in filters {
383
+ populate_partition_values ( & mut partition_values, filter) ;
384
+ }
385
+
386
+ if partition_values. is_empty ( ) {
387
+ return None ;
388
+ }
389
+
390
+ let mut parts = vec ! [ ] ;
391
+ for ( p, _) in partition_cols {
392
+ match partition_values. get ( p. as_str ( ) ) {
393
+ Some ( PartitionValue :: Single ( val) ) => {
394
+ parts. push ( format ! ( "{p}={val}" ) ) ;
395
+ }
396
+ _ => {
397
+ break ;
398
+ }
399
+ }
400
+ }
401
+
402
+ if parts. is_empty ( ) {
403
+ None
404
+ } else {
405
+ Some ( Path :: from_iter ( parts) )
406
+ }
407
+ }
408
+
324
409
/// Discover the partitions on the given path and prune out files
325
410
/// that belong to irrelevant partitions using `filters` expressions.
326
411
/// `filters` might contain expressions that can be resolved only at the
@@ -343,7 +428,10 @@ pub async fn pruned_partition_list<'a>(
343
428
) ) ;
344
429
}
345
430
346
- let partitions = list_partitions ( store, table_path, partition_cols. len ( ) ) . await ?;
431
+ let partition_prefix = evaluate_partition_prefix ( partition_cols, filters) ;
432
+ let partitions =
433
+ list_partitions ( store, table_path, partition_cols. len ( ) , partition_prefix)
434
+ . await ?;
347
435
debug ! ( "Listed {} partitions" , partitions. len( ) ) ;
348
436
349
437
let pruned =
@@ -433,7 +521,7 @@ mod tests {
433
521
434
522
use futures:: StreamExt ;
435
523
436
- use crate :: logical_expr:: { case, col, lit} ;
524
+ use crate :: logical_expr:: { case, col, lit, Expr , Operator } ;
437
525
use crate :: test:: object_store:: make_test_store_and_state;
438
526
439
527
use super :: * ;
@@ -692,4 +780,127 @@ mod tests {
692
780
// this helper function
693
781
assert ! ( expr_applicable_for_cols( & [ ] , & lit( true ) ) ) ;
694
782
}
783
+
784
+ #[ test]
785
+ fn test_evaluate_partition_prefix ( ) {
786
+ let partitions = & [
787
+ ( "a" . to_string ( ) , DataType :: Utf8 ) ,
788
+ ( "b" . to_string ( ) , DataType :: Int16 ) ,
789
+ ( "c" . to_string ( ) , DataType :: Boolean ) ,
790
+ ] ;
791
+
792
+ assert_eq ! (
793
+ evaluate_partition_prefix( partitions, & [ Expr :: eq( col( "a" ) , lit( "foo" ) ) ] , ) ,
794
+ Some ( Path :: from( "a=foo" ) ) ,
795
+ ) ;
796
+
797
+ assert_eq ! (
798
+ evaluate_partition_prefix(
799
+ partitions,
800
+ & [ Expr :: and(
801
+ Expr :: eq( col( "a" ) , lit( "foo" ) ) ,
802
+ Expr :: eq( col( "b" ) , lit( "bar" ) ) ,
803
+ ) ] ,
804
+ ) ,
805
+ Some ( Path :: from( "a=foo/b=bar" ) ) ,
806
+ ) ;
807
+
808
+ assert_eq ! (
809
+ evaluate_partition_prefix(
810
+ partitions,
811
+ & [ Expr :: and(
812
+ Expr :: eq( col( "a" ) , lit( "foo" ) ) ,
813
+ Expr :: and(
814
+ Expr :: eq( col( "b" ) , lit( "1" ) ) ,
815
+ Expr :: eq( col( "c" ) , lit( "true" ) ) ,
816
+ ) ,
817
+ ) ] ,
818
+ ) ,
819
+ Some ( Path :: from( "a=foo/b=1/c=true" ) ) ,
820
+ ) ;
821
+
822
+ // no prefix when filter is empty
823
+ assert_eq ! ( evaluate_partition_prefix( partitions, & [ ] ) , None ) ;
824
+
825
+ // b=foo results in no prefix because a is not restricted
826
+ assert_eq ! (
827
+ evaluate_partition_prefix( partitions, & [ Expr :: eq( col( "b" ) , lit( "foo" ) ) ] , ) ,
828
+ None ,
829
+ ) ;
830
+
831
+ // a=foo and c=baz only results in preifx a=foo because b is not restricted
832
+ assert_eq ! (
833
+ evaluate_partition_prefix(
834
+ partitions,
835
+ & [ Expr :: and(
836
+ Expr :: eq( col( "a" ) , lit( "foo" ) ) ,
837
+ Expr :: eq( col( "c" ) , lit( "baz" ) ) ,
838
+ ) ] ,
839
+ ) ,
840
+ Some ( Path :: from( "a=foo" ) ) ,
841
+ ) ;
842
+
843
+ // a=foo or b=bar results in no prefix
844
+ assert_eq ! (
845
+ evaluate_partition_prefix(
846
+ partitions,
847
+ & [
848
+ Expr :: eq( col( "a" ) , lit( "foo" ) ) ,
849
+ Expr :: eq( col( "b" ) , lit( "bar" ) ) ,
850
+ ] ,
851
+ ) ,
852
+ None ,
853
+ ) ;
854
+
855
+ // partition with multiple values results in no prefix
856
+ assert_eq ! (
857
+ evaluate_partition_prefix(
858
+ partitions,
859
+ & [ Expr :: and(
860
+ Expr :: eq( col( "a" ) , lit( "foo" ) ) ,
861
+ Expr :: eq( col( "a" ) , lit( "bar" ) ) ,
862
+ ) ] ,
863
+ ) ,
864
+ None ,
865
+ ) ;
866
+
867
+ // no prefix because partition a is not restricted to a single literal
868
+ assert_eq ! (
869
+ evaluate_partition_prefix(
870
+ partitions,
871
+ & [ Expr :: or(
872
+ Expr :: eq( col( "a" ) , lit( "foo" ) ) ,
873
+ Expr :: eq( col( "a" ) , lit( "bar" ) ) ,
874
+ ) ] ,
875
+ ) ,
876
+ None ,
877
+ ) ;
878
+ }
879
+
880
+ #[ test]
881
+ fn test_evaluate_date_partition_prefix ( ) {
882
+ let partitions = & [ ( "a" . to_string ( ) , DataType :: Date32 ) ] ;
883
+ assert_eq ! (
884
+ evaluate_partition_prefix(
885
+ partitions,
886
+ & [ Expr :: eq(
887
+ col( "a" ) ,
888
+ Expr :: Literal ( ScalarValue :: Date32 ( Some ( 3 ) ) )
889
+ ) ] ,
890
+ ) ,
891
+ Some ( Path :: from( "a=1970-01-04" ) ) ,
892
+ ) ;
893
+
894
+ let partitions = & [ ( "a" . to_string ( ) , DataType :: Date64 ) ] ;
895
+ assert_eq ! (
896
+ evaluate_partition_prefix(
897
+ partitions,
898
+ & [ Expr :: eq(
899
+ col( "a" ) ,
900
+ Expr :: Literal ( ScalarValue :: Date64 ( Some ( 4 * 24 * 60 * 60 * 1000 ) ) )
901
+ ) ] ,
902
+ ) ,
903
+ Some ( Path :: from( "a=1970-01-05" ) ) ,
904
+ ) ;
905
+ }
695
906
}
0 commit comments