Skip to content

Commit 6592514

Browse files
committed
Add replacment formatter
1 parent 98f48eb commit 6592514

File tree

2 files changed

+14
-0
lines changed

2 files changed

+14
-0
lines changed

Diff for: src/datatrove/pipeline/formatters/__init__.py

+1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
from .ftfy import FTFYFormatter
22
from .pii import PIIFormatter
33
from .symbol_lines_remover import SymbolLinesFormatter
4+
from custom import CustomFormatter

Diff for: src/datatrove/pipeline/formatters/custom.py

+13
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
from .base import BaseFormatter
2+
3+
4+
class CustomFormatter(BaseFormatter):
5+
def __init__(self, replacements: list):
6+
super().__init__()
7+
self.replacements = replacements
8+
9+
def format(self, text: str) -> str:
10+
for replacement in self.replacements:
11+
text = text.replace(replacement[0], replacement[1])
12+
13+
return text

0 commit comments

Comments
 (0)