Skip to content

Commit 9cfcc0a

Browse files
committed
chore(cleaners): add type hints
1 parent a1495d4 commit 9cfcc0a

File tree

1 file changed

+20
-17
lines changed

1 file changed

+20
-17
lines changed

TTS/tts/utils/text/cleaners.py

+20-17
Original file line numberDiff line numberDiff line change
@@ -18,34 +18,37 @@
1818
_whitespace_re = re.compile(r"\s+")
1919

2020

21-
def expand_abbreviations(text, lang="en"):
21+
def expand_abbreviations(text: str, lang: str = "en") -> str:
2222
if lang == "en":
2323
_abbreviations = abbreviations_en
2424
elif lang == "fr":
2525
_abbreviations = abbreviations_fr
26+
else:
27+
msg = f"Language {lang} not supported in expand_abbreviations"
28+
raise ValueError(msg)
2629
for regex, replacement in _abbreviations:
2730
text = re.sub(regex, replacement, text)
2831
return text
2932

3033

31-
def lowercase(text):
34+
def lowercase(text: str) -> str:
3235
return text.lower()
3336

3437

35-
def collapse_whitespace(text):
38+
def collapse_whitespace(text: str) -> str:
3639
return re.sub(_whitespace_re, " ", text).strip()
3740

3841

39-
def convert_to_ascii(text):
42+
def convert_to_ascii(text: str) -> str:
4043
return anyascii(text)
4144

4245

43-
def remove_aux_symbols(text):
46+
def remove_aux_symbols(text: str) -> str:
4447
text = re.sub(r"[\<\>\(\)\[\]\"]+", "", text)
4548
return text
4649

4750

48-
def replace_symbols(text, lang: Optional[str] = "en"):
51+
def replace_symbols(text: str, lang: Optional[str] = "en") -> str:
4952
"""Replace symbols based on the language tag.
5053
5154
Args:
@@ -78,38 +81,38 @@ def replace_symbols(text, lang: Optional[str] = "en"):
7881
return text
7982

8083

81-
def basic_cleaners(text):
84+
def basic_cleaners(text: str) -> str:
8285
"""Basic pipeline that lowercases and collapses whitespace without transliteration."""
8386
text = lowercase(text)
8487
text = collapse_whitespace(text)
8588
return text
8689

8790

88-
def transliteration_cleaners(text):
91+
def transliteration_cleaners(text: str) -> str:
8992
"""Pipeline for non-English text that transliterates to ASCII."""
9093
# text = convert_to_ascii(text)
9194
text = lowercase(text)
9295
text = collapse_whitespace(text)
9396
return text
9497

9598

96-
def basic_german_cleaners(text):
99+
def basic_german_cleaners(text: str) -> str:
97100
"""Pipeline for German text"""
98101
text = lowercase(text)
99102
text = collapse_whitespace(text)
100103
return text
101104

102105

103106
# TODO: elaborate it
104-
def basic_turkish_cleaners(text):
107+
def basic_turkish_cleaners(text: str) -> str:
105108
"""Pipeline for Turkish text"""
106109
text = text.replace("I", "ı")
107110
text = lowercase(text)
108111
text = collapse_whitespace(text)
109112
return text
110113

111114

112-
def english_cleaners(text):
115+
def english_cleaners(text: str) -> str:
113116
"""Pipeline for English text, including number and abbreviation expansion."""
114117
# text = convert_to_ascii(text)
115118
text = lowercase(text)
@@ -122,7 +125,7 @@ def english_cleaners(text):
122125
return text
123126

124127

125-
def phoneme_cleaners(text):
128+
def phoneme_cleaners(text: str) -> str:
126129
"""Pipeline for phonemes mode, including number and abbreviation expansion.
127130
128131
NB: This cleaner converts numbers into English words, for other languages
@@ -136,15 +139,15 @@ def phoneme_cleaners(text):
136139
return text
137140

138141

139-
def multilingual_phoneme_cleaners(text):
142+
def multilingual_phoneme_cleaners(text: str) -> str:
140143
"""Pipeline for phonemes mode, including number and abbreviation expansion."""
141144
text = replace_symbols(text, lang=None)
142145
text = remove_aux_symbols(text)
143146
text = collapse_whitespace(text)
144147
return text
145148

146149

147-
def french_cleaners(text):
150+
def french_cleaners(text: str) -> str:
148151
"""Pipeline for French text. There is no need to expand numbers, phonemizer already does that"""
149152
text = expand_abbreviations(text, lang="fr")
150153
text = lowercase(text)
@@ -154,7 +157,7 @@ def french_cleaners(text):
154157
return text
155158

156159

157-
def portuguese_cleaners(text):
160+
def portuguese_cleaners(text: str) -> str:
158161
"""Basic pipeline for Portuguese text. There is no need to expand abbreviation and
159162
numbers, phonemizer already does that"""
160163
text = lowercase(text)
@@ -170,7 +173,7 @@ def chinese_mandarin_cleaners(text: str) -> str:
170173
return text
171174

172175

173-
def multilingual_cleaners(text):
176+
def multilingual_cleaners(text: str) -> str:
174177
"""Pipeline for multilingual text"""
175178
text = lowercase(text)
176179
text = replace_symbols(text, lang=None)
@@ -179,7 +182,7 @@ def multilingual_cleaners(text):
179182
return text
180183

181184

182-
def no_cleaners(text):
185+
def no_cleaners(text: str) -> str:
183186
# remove newline characters
184187
text = text.replace("\n", "")
185188
return text

0 commit comments

Comments
 (0)