Skip to content

Commit 33be756

Browse files
committed
fix oscar filter
1 parent 0d70701 commit 33be756

File tree

1 file changed

+2
-3
lines changed

1 file changed

+2
-3
lines changed

Diff for: src/datatrove/pipeline/filters/oscar_filter.py

+2-3
Original file line numberDiff line numberDiff line change
@@ -26,20 +26,19 @@
2626
class OSCARFilter(BaseFilter):
2727
name = "🗑 OSCAR"
2828

29-
def __init__(self, regex_exp: str,
29+
def __init__(self,
3030
exclusion_writer: DiskWriter = None,
3131
min_harmful_ppl: float = DEFAULT_OSCAR_MIN_HARMFUL_PP,
3232
max_harmful_ppl: float = DEFAULT_OSCAR_MAX_HARMFUL_PP,
3333
exclude_categories: set = DEFAULT_EXCLUDE_CATEGORIES):
3434
"""
35-
filters if regex finds at least one match
35+
filters data based on OSCAR annotations
3636
3737
Args:
3838
regex_exp: regex expression
3939
exclusion_writer:
4040
"""
4141
super().__init__(exclusion_writer)
42-
self.regex = re.compile(regex_exp)
4342
self.min_harmful_ppl = min_harmful_ppl
4443
self.max_harmful_ppl = max_harmful_ppl
4544
self.exclude_categories = exclude_categories

0 commit comments

Comments
 (0)