Skip to content

Commit 70baa6c

Browse files
committed
Add Korean TN for cardinal numbers
Signed-off-by: Jinwoo Bae <34386414+bbae0312@users.noreply.github.com>
1 parent 3e4ac3e commit 70baa6c

File tree

6 files changed

+83
-0
lines changed

6 files changed

+83
-0
lines changed
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from nemo_text_processing.text_normalization.en.taggers.tokenize_and_classify import ClassifyFst
16+
from nemo_text_processing.text_normalization.en.verbalizers.verbalize import VerbalizeFst
17+
from nemo_text_processing.text_normalization.en.verbalizers.verbalize_final import VerbalizeFinalFst
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import pynini
16+
from pynini.examples import plurals
17+
from pynini.lib import pynutil
18+

nemo_text_processing/text_normalization/kr/taggers/tokenize_and_classify.py

Whitespace-only changes.
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import pynini
16+
from pynini.lib import pynutil
17+
18+
from nemo_text_processing.text_normalization.en.graph_utils import NEMO_NOT_QUOTE, GraphFst, delete_space
19+
20+
21+
class CardinalFst(GraphFst):
22+
"""
23+
Finite state transducer for verbalizing cardinal, e.g.
24+
cardinal { negative: "true" integer: "23" } -> minus twenty three
25+
26+
Args:
27+
deterministic: if True will provide a single transduction option,
28+
for False multiple options (used for audio-based normalization)
29+
"""
30+
31+
def __init__(self, deterministic: bool = True):
32+
super().__init__(name="cardinal", kind="verbalize", deterministic=deterministic)
33+
34+
self.optional_sign = pynini.cross("negative: \"true\"", "minus ")
35+
if not deterministic:
36+
self.optional_sign |= pynini.cross("negative: \"true\"", "negative ")
37+
self.optional_sign |= pynini.cross("negative: \"true\"", "dash ")
38+
39+
self.optional_sign = pynini.closure(self.optional_sign + delete_space, 0, 1)
40+
41+
integer = pynini.closure(NEMO_NOT_QUOTE)
42+
43+
self.integer = delete_space + pynutil.delete("\"") + integer + pynutil.delete("\"")
44+
integer = pynutil.delete("integer:") + self.integer
45+
46+
self.numbers = self.optional_sign + integer
47+
delete_tokens = self.delete_tokens(self.numbers)
48+
self.fst = delete_tokens.optimize()

nemo_text_processing/text_normalization/kr/verbalizers/verbalize.py

Whitespace-only changes.

nemo_text_processing/text_normalization/kr/verbalizers/verbalize_final.py

Whitespace-only changes.

0 commit comments

Comments
 (0)