@@ -52,11 +52,6 @@ class UnsupportedFileReferenceError(UnsupportedFilteringFeatureError):
52
52
feature = "File references"
53
53
54
54
55
- class UnsupportedChromFieldError (UnsupportedFilteringFeatureError ):
56
- issue = "178"
57
- feature = "CHROM field"
58
-
59
-
60
55
class UnsupportedFunctionsError (UnsupportedFilteringFeatureError ):
61
56
issue = "190"
62
57
feature = "Function evaluation"
@@ -117,9 +112,7 @@ def __init__(self, tokens):
117
112
class Identifier (EvaluationNode ):
118
113
def __init__ (self , mapper , tokens ):
119
114
token = tokens [0 ]
120
- if token == "CHROM" :
121
- raise UnsupportedChromFieldError ()
122
- elif token == "GT" :
115
+ if token == "GT" :
123
116
raise UnsupportedGenotypeValuesError ()
124
117
self .field_name = mapper (token )
125
118
logger .debug (f"Mapped { token } to { self .field_name } " )
@@ -292,6 +285,47 @@ def referenced_fields(self):
292
285
return self .op1 .referenced_fields () | self .op2 .referenced_fields ()
293
286
294
287
288
+ # CHROM field expressions are translated to contig IDs to avoid string
289
+ # comparisons for every variant site
290
+
291
+
292
+ class ChromString (Constant ):
293
+ def __init__ (self , tokens ):
294
+ super ().__init__ (tokens )
295
+
296
+ def eval (self , data ):
297
+ contig_ids = list (data ["contig_id" ])
298
+ try :
299
+ return contig_ids .index (self .tokens )
300
+ except ValueError :
301
+ return - 1 # won't match anything
302
+
303
+ def referenced_fields (self ):
304
+ return frozenset (["contig_id" ])
305
+
306
+
307
+ class ChromFieldOperator (EvaluationNode ):
308
+ op_map = {
309
+ "=" : operator .eq ,
310
+ "==" : operator .eq ,
311
+ "!=" : operator .ne ,
312
+ }
313
+
314
+ def __init__ (self , tokens ):
315
+ super ().__init__ (tokens )
316
+ self .op1 , self .op , self .op2 = tokens # not self.tokens
317
+ self .comparison_fn = self .op_map [self .op ]
318
+
319
+ def eval (self , data ):
320
+ return self .comparison_fn (self .op1 .eval (data ), self .op2 .eval (data ))
321
+
322
+ def __repr__ (self ):
323
+ return f"({ repr (self .op1 )} ){ self .op } ({ repr (self .op2 )} )"
324
+
325
+ def referenced_fields (self ):
326
+ return self .op1 .referenced_fields () | self .op2 .referenced_fields ()
327
+
328
+
295
329
# FILTER field expressions have special set-like semantics
296
330
# so they are handled by dedicated operators.
297
331
@@ -382,6 +416,14 @@ def make_bcftools_filter_parser(all_fields=None, map_vcf_identifiers=True):
382
416
if map_vcf_identifiers :
383
417
name_mapper = functools .partial (vcf_name_to_vcz_name , all_fields )
384
418
419
+ chrom_field_identifier = pp .Literal ("CHROM" )
420
+ chrom_field_identifier = chrom_field_identifier .set_parse_action (
421
+ functools .partial (Identifier , name_mapper )
422
+ )
423
+ chrom_string = pp .QuotedString ('"' ).set_parse_action (ChromString )
424
+ chrom_field_expr = chrom_field_identifier + pp .one_of ("= == !=" ) + chrom_string
425
+ chrom_field_expr = chrom_field_expr .set_parse_action (ChromFieldOperator )
426
+
385
427
filter_field_identifier = pp .Literal ("FILTER" )
386
428
filter_field_identifier = filter_field_identifier .set_parse_action (
387
429
functools .partial (Identifier , name_mapper )
@@ -416,7 +458,8 @@ def make_bcftools_filter_parser(all_fields=None, map_vcf_identifiers=True):
416
458
417
459
comp_op = pp .oneOf ("< = == > >= <= !=" )
418
460
filter_expression = pp .infix_notation (
419
- filter_field_expr
461
+ chrom_field_expr
462
+ | filter_field_expr
420
463
| function
421
464
| constant
422
465
| indexed_identifier
0 commit comments