From fa304a251a31c5f0efe20dee7997f521eb0b9e39 Mon Sep 17 00:00:00 2001
From: hmlee245 <hmlee245@gmail.com>
Date: Tue, 13 May 2025 10:54:38 -0700
Subject: [PATCH 1/8] First draft of Korean Cardinal ITN

Sparrowhawk testing is not done yet.

Signed-off-by: hmlee245 <hmlee245@gmail.com>
---
 .../inverse_normalize.py                      |   7 +-
 .../inverse_text_normalization/ko/__init__.py |  17 +
 .../ko/clean_eval_data.py                     | 361 ++++++++++++++++++
 .../ko/data/__init__.py                       |  13 +
 .../ko/data/numbers/__init__.py               |  13 +
 .../ko/data/numbers/digit.tsv                 |   9 +
 .../ko/data/numbers/thousands.tsv             |  11 +
 .../ko/data/numbers/zero.tsv                  |   1 +
 .../ko/graph_utils.py                         | 292 ++++++++++++++
 .../ko/taggers/__init__.py                    |  17 +
 .../ko/taggers/cardinal.py                    | 104 +++++
 .../ko/taggers/tokenize_and_classify.py       |  76 ++++
 .../ko/taggers/word.py                        |  32 ++
 .../inverse_text_normalization/ko/utils.py    |  23 ++
 .../ko/verbalizers/__init__.py                |  17 +
 .../ko/verbalizers/cardinal.py                |  54 +++
 .../ko/verbalizers/verbalize.py               |  36 ++
 .../ko/verbalizers/verbalize_final.py         |  49 +++
 .../ko/verbalizers/word.py                    |  34 ++
 .../run_evaluate.py                           |   2 +-
 tests/nemo_text_processing/ko/__init__.py     |  13 +
 .../test_cases_cardinal.txt                   |  27 ++
 .../nemo_text_processing/ko/test_cardinal.py  |  39 ++
 ..._sparrowhawk_inverse_text_normalization.sh |  34 ++
 .../pynini_export.py                          |   8 +
 25 files changed, 1287 insertions(+), 2 deletions(-)
 create mode 100644 nemo_text_processing/inverse_text_normalization/ko/__init__.py
 create mode 100644 nemo_text_processing/inverse_text_normalization/ko/clean_eval_data.py
 create mode 100644 nemo_text_processing/inverse_text_normalization/ko/data/__init__.py
 create mode 100644 nemo_text_processing/inverse_text_normalization/ko/data/numbers/__init__.py
 create mode 100644 nemo_text_processing/inverse_text_normalization/ko/data/numbers/digit.tsv
 create mode 100644 nemo_text_processing/inverse_text_normalization/ko/data/numbers/thousands.tsv
 create mode 100644 nemo_text_processing/inverse_text_normalization/ko/data/numbers/zero.tsv
 create mode 100644 nemo_text_processing/inverse_text_normalization/ko/graph_utils.py
 create mode 100644 nemo_text_processing/inverse_text_normalization/ko/taggers/__init__.py
 create mode 100644 nemo_text_processing/inverse_text_normalization/ko/taggers/cardinal.py
 create mode 100644 nemo_text_processing/inverse_text_normalization/ko/taggers/tokenize_and_classify.py
 create mode 100644 nemo_text_processing/inverse_text_normalization/ko/taggers/word.py
 create mode 100644 nemo_text_processing/inverse_text_normalization/ko/utils.py
 create mode 100644 nemo_text_processing/inverse_text_normalization/ko/verbalizers/__init__.py
 create mode 100644 nemo_text_processing/inverse_text_normalization/ko/verbalizers/cardinal.py
 create mode 100644 nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize.py
 create mode 100644 nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize_final.py
 create mode 100644 nemo_text_processing/inverse_text_normalization/ko/verbalizers/word.py
 create mode 100644 tests/nemo_text_processing/ko/__init__.py
 create mode 100644 tests/nemo_text_processing/ko/data_inverse_text_normalization/test_cases_cardinal.txt
 create mode 100644 tests/nemo_text_processing/ko/test_cardinal.py
 create mode 100644 tests/nemo_text_processing/ko/test_sparrowhawk_inverse_text_normalization.sh

diff --git a/nemo_text_processing/inverse_text_normalization/inverse_normalize.py b/nemo_text_processing/inverse_text_normalization/inverse_normalize.py
index c10819908..e505a8ad0 100644
--- a/nemo_text_processing/inverse_text_normalization/inverse_normalize.py
+++ b/nemo_text_processing/inverse_text_normalization/inverse_normalize.py
@@ -131,6 +131,11 @@ def __init__(
             from nemo_text_processing.inverse_text_normalization.ja.verbalizers.verbalize_final import (
                 VerbalizeFinalFst,
             )
+        elif lang == 'ko':  # Korean
+            from nemo_text_processing.inverse_text_normalization.ko.taggers.tokenize_and_classify import ClassifyFst
+            from nemo_text_processing.inverse_text_normalization.ko.verbalizers.verbalize_final import (
+                VerbalizeFinalFst,
+            )    
 
         self.tagger = ClassifyFst(
             cache_dir=cache_dir, whitelist=whitelist, overwrite_cache=overwrite_cache, input_case=input_case
@@ -175,7 +180,7 @@ def parse_args():
     parser.add_argument(
         "--language",
         help="language",
-        choices=['en', 'de', 'es', 'pt', 'ru', 'fr', 'sv', 'vi', 'ar', 'es_en', 'zh', 'hi', 'hy', 'mr', 'ja'],
+        choices=['en', 'de', 'es', 'pt', 'ru', 'fr', 'sv', 'vi', 'ar', 'es_en', 'zh', 'hi', 'hy', 'mr', 'ja','ko'],
         default="en",
         type=str,
     )
diff --git a/nemo_text_processing/inverse_text_normalization/ko/__init__.py b/nemo_text_processing/inverse_text_normalization/ko/__init__.py
new file mode 100644
index 000000000..f541211af
--- /dev/null
+++ b/nemo_text_processing/inverse_text_normalization/ko/__init__.py
@@ -0,0 +1,17 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from nemo_text_processing.inverse_text_normalization.ko.taggers.tokenize_and_classify import ClassifyFst
+from nemo_text_processing.inverse_text_normalization.ko.verbalizers.verbalize import VerbalizeFst
+from nemo_text_processing.inverse_text_normalization.ko.verbalizers.verbalize_final import VerbalizeFinalFst
diff --git a/nemo_text_processing/inverse_text_normalization/ko/clean_eval_data.py b/nemo_text_processing/inverse_text_normalization/ko/clean_eval_data.py
new file mode 100644
index 000000000..3c1193333
--- /dev/null
+++ b/nemo_text_processing/inverse_text_normalization/ko/clean_eval_data.py
@@ -0,0 +1,361 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from argparse import ArgumentParser
+from typing import List
+
+import regex as re
+
+from nemo_text_processing.text_normalization.data_loader_utils import (
+    EOS_TYPE,
+    Instance,
+    load_files,
+    training_data_to_sentences,
+)
+
+"""
+This file is for evaluation purposes.
+filter_loaded_data() cleans data (list of instances) for inverse text normalization. Filters and cleaners can be specified for each semiotic class individually.
+For example, normalized text should only include characters and whitespace characters but no punctuation. 
+            Cardinal unnormalized instances should contain at least one integer and all other characters are removed.
+"""
+
+
+class Filter:
+    """
+    Filter class
+
+    Args:
+        class_type: semiotic class used in dataset
+        process_func: function to transform text
+        filter_func:  function to filter text
+
+    """
+
+    def __init__(self, class_type: str, process_func: object, filter_func: object):
+        self.class_type = class_type
+        self.process_func = process_func
+        self.filter_func = filter_func
+
+    def filter(self, instance: Instance) -> bool:
+        """
+        filter function
+
+        Args:
+            filters given instance with filter function
+
+        Returns: True if given instance fulfills criteria or does not belong to class type
+        """
+        if instance.token_type != self.class_type:
+            return True
+        return self.filter_func(instance)
+
+    def process(self, instance: Instance) -> Instance:
+        """
+        process function
+
+        Args:
+            processes given instance with process function
+
+        Returns: processed instance if instance belongs to expected class type or original instance
+        """
+        if instance.token_type != self.class_type:
+            return instance
+        return self.process_func(instance)
+
+
+def filter_cardinal_1(instance: Instance) -> bool:
+    ok = re.search(r"[0-9]", instance.un_normalized)
+    return ok
+
+
+def process_cardinal_1(instance: Instance) -> Instance:
+    un_normalized = instance.un_normalized
+    normalized = instance.normalized
+    un_normalized = re.sub(r"[^0-9]", "", un_normalized)
+    normalized = re.sub(r"[^a-z ]", "", normalized)
+    return Instance(token_type=instance.token_type, un_normalized=un_normalized, normalized=normalized)
+
+
+def filter_ordinal_1(instance: Instance) -> bool:
+    ok = re.search(r"(st|nd|rd|th)\s*$", instance.un_normalized)
+    return ok
+
+
+def process_ordinal_1(instance: Instance) -> Instance:
+    un_normalized = instance.un_normalized
+    normalized = instance.normalized
+    un_normalized = re.sub(r"[,\s]", "", un_normalized)
+    normalized = re.sub(r"[^a-z ]", "", normalized)
+    return Instance(token_type=instance.token_type, un_normalized=un_normalized, normalized=normalized)
+
+
+def filter_decimal_1(instance: Instance) -> bool:
+    ok = re.search(r"[0-9]", instance.un_normalized)
+    return ok
+
+
+def process_decimal_1(instance: Instance) -> Instance:
+    un_normalized = instance.un_normalized
+    un_normalized = re.sub(r",", "", un_normalized)
+    normalized = instance.normalized
+    normalized = re.sub(r"[^a-z ]", "", normalized)
+    return Instance(token_type=instance.token_type, un_normalized=un_normalized, normalized=normalized)
+
+
+def filter_measure_1(instance: Instance) -> bool:
+    ok = True
+    return ok
+
+
+def process_measure_1(instance: Instance) -> Instance:
+    un_normalized = instance.un_normalized
+    normalized = instance.normalized
+    un_normalized = re.sub(r",", "", un_normalized)
+    un_normalized = re.sub(r"m2", "m²", un_normalized)
+    un_normalized = re.sub(r"(\d)([^\d.\s])", r"\1 \2", un_normalized)
+    normalized = re.sub(r"[^a-z\s]", "", normalized)
+    normalized = re.sub(r"per ([a-z\s]*)s$", r"per \1", normalized)
+    normalized = re.sub(r"[^a-z ]", "", normalized)
+    return Instance(token_type=instance.token_type, un_normalized=un_normalized, normalized=normalized)
+
+
+def filter_money_1(instance: Instance) -> bool:
+    ok = re.search(r"[0-9]", instance.un_normalized)
+    return ok
+
+
+def process_money_1(instance: Instance) -> Instance:
+    un_normalized = instance.un_normalized
+    normalized = instance.normalized
+    un_normalized = re.sub(r",", "", un_normalized)
+    un_normalized = re.sub(r"a\$", r"$", un_normalized)
+    un_normalized = re.sub(r"us\$", r"$", un_normalized)
+    un_normalized = re.sub(r"(\d)m\s*$", r"\1 million", un_normalized)
+    un_normalized = re.sub(r"(\d)bn?\s*$", r"\1 billion", un_normalized)
+    normalized = re.sub(r"[^a-z ]", "", normalized)
+    return Instance(token_type=instance.token_type, un_normalized=un_normalized, normalized=normalized)
+
+
+def filter_time_1(instance: Instance) -> bool:
+    ok = re.search(r"[0-9]", instance.un_normalized)
+    return ok
+
+
+def process_time_1(instance: Instance) -> Instance:
+    un_normalized = instance.un_normalized
+    un_normalized = re.sub(r": ", ":", un_normalized)
+    un_normalized = re.sub(r"(\d)\s?a\s?m\s?", r"\1 a.m.", un_normalized)
+    un_normalized = re.sub(r"(\d)\s?p\s?m\s?", r"\1 p.m.", un_normalized)
+    normalized = instance.normalized
+    normalized = re.sub(r"[^a-z ]", "", normalized)
+    return Instance(token_type=instance.token_type, un_normalized=un_normalized, normalized=normalized)
+
+
+def filter_plain_1(instance: Instance) -> bool:
+    ok = True
+    return ok
+
+
+def process_plain_1(instance: Instance) -> Instance:
+    un_normalized = instance.un_normalized
+    normalized = instance.normalized
+    return Instance(token_type=instance.token_type, un_normalized=un_normalized, normalized=normalized)
+
+
+def filter_punct_1(instance: Instance) -> bool:
+    ok = True
+    return ok
+
+
+def process_punct_1(instance: Instance) -> Instance:
+    un_normalized = instance.un_normalized
+    normalized = instance.normalized
+    return Instance(token_type=instance.token_type, un_normalized=un_normalized, normalized=normalized)
+
+
+def filter_date_1(instance: Instance) -> bool:
+    ok = True
+    return ok
+
+
+def process_date_1(instance: Instance) -> Instance:
+    un_normalized = instance.un_normalized
+    un_normalized = re.sub(r",", "", un_normalized)
+    normalized = instance.normalized
+    normalized = re.sub(r"[^a-z ]", "", normalized)
+    return Instance(token_type=instance.token_type, un_normalized=un_normalized, normalized=normalized)
+
+
+def filter_letters_1(instance: Instance) -> bool:
+    ok = True
+    return ok
+
+
+def process_letters_1(instance: Instance) -> Instance:
+    un_normalized = instance.un_normalized
+    normalized = instance.normalized
+    normalized = re.sub(r"[^a-z ]", "", normalized)
+    return Instance(token_type=instance.token_type, un_normalized=un_normalized, normalized=normalized)
+
+
+def filter_verbatim_1(instance: Instance) -> bool:
+    ok = True
+    return ok
+
+
+def process_verbatim_1(instance: Instance) -> Instance:
+    un_normalized = instance.un_normalized
+    normalized = instance.normalized
+    return Instance(token_type=instance.token_type, un_normalized=un_normalized, normalized=normalized)
+
+
+def filter_digit_1(instance: Instance) -> bool:
+    ok = re.search(r"[0-9]", instance.un_normalized)
+    return ok
+
+
+def process_digit_1(instance: Instance) -> Instance:
+    un_normalized = instance.un_normalized
+    normalized = instance.normalized
+    normalized = re.sub(r"[^a-z ]", "", normalized)
+    return Instance(token_type=instance.token_type, un_normalized=un_normalized, normalized=normalized)
+
+
+def filter_telephone_1(instance: Instance) -> bool:
+    ok = re.search(r"[0-9]", instance.un_normalized)
+    return ok
+
+
+def process_telephone_1(instance: Instance) -> Instance:
+    un_normalized = instance.un_normalized
+    normalized = instance.normalized
+    normalized = re.sub(r"[^a-z ]", "", normalized)
+    return Instance(token_type=instance.token_type, un_normalized=un_normalized, normalized=normalized)
+
+
+def filter_electronic_1(instance: Instance) -> bool:
+    ok = re.search(r"[0-9]", instance.un_normalized)
+    return ok
+
+
+def process_electronic_1(instance: Instance) -> Instance:
+    un_normalized = instance.un_normalized
+    normalized = instance.normalized
+    normalized = re.sub(r"[^a-z ]", "", normalized)
+    return Instance(token_type=instance.token_type, un_normalized=un_normalized, normalized=normalized)
+
+
+def filter_fraction_1(instance: Instance) -> bool:
+    ok = re.search(r"[0-9]", instance.un_normalized)
+    return ok
+
+
+def process_fraction_1(instance: Instance) -> Instance:
+    un_normalized = instance.un_normalized
+    normalized = instance.normalized
+    normalized = re.sub(r"[^a-z ]", "", normalized)
+    return Instance(token_type=instance.token_type, un_normalized=un_normalized, normalized=normalized)
+
+
+def filter_address_1(instance: Instance) -> bool:
+    ok = True
+    return ok
+
+
+def process_address_1(instance: Instance) -> Instance:
+    un_normalized = instance.un_normalized
+    normalized = instance.normalized
+    normalized = re.sub(r"[^a-z ]", "", normalized)
+    return Instance(token_type=instance.token_type, un_normalized=un_normalized, normalized=normalized)
+
+
+filters = []
+filters.append(Filter(class_type="CARDINAL",
+               process_func=process_cardinal_1, filter_func=filter_cardinal_1))
+filters.append(Filter(class_type="ORDINAL",
+               process_func=process_ordinal_1, filter_func=filter_ordinal_1))
+filters.append(Filter(class_type="DECIMAL",
+               process_func=process_decimal_1, filter_func=filter_decimal_1))
+filters.append(Filter(class_type="MEASURE",
+               process_func=process_measure_1, filter_func=filter_measure_1))
+filters.append(Filter(class_type="MONEY",
+               process_func=process_money_1, filter_func=filter_money_1))
+filters.append(Filter(class_type="TIME",
+               process_func=process_time_1, filter_func=filter_time_1))
+
+filters.append(Filter(class_type="DATE",
+               process_func=process_date_1, filter_func=filter_date_1))
+filters.append(Filter(class_type="PLAIN",
+               process_func=process_plain_1, filter_func=filter_plain_1))
+filters.append(Filter(class_type="PUNCT",
+               process_func=process_punct_1, filter_func=filter_punct_1))
+filters.append(Filter(class_type="LETTERS",
+               process_func=process_letters_1, filter_func=filter_letters_1))
+filters.append(Filter(class_type="VERBATIM",
+               process_func=process_verbatim_1, filter_func=filter_verbatim_1))
+filters.append(Filter(class_type="DIGIT",
+               process_func=process_digit_1, filter_func=filter_digit_1))
+filters.append(Filter(class_type="TELEPHONE",
+               process_func=process_telephone_1, filter_func=filter_telephone_1))
+filters.append(Filter(class_type="ELECTRONIC",
+               process_func=process_electronic_1, filter_func=filter_electronic_1))
+filters.append(Filter(class_type="FRACTION",
+               process_func=process_fraction_1, filter_func=filter_fraction_1))
+filters.append(Filter(class_type="ADDRESS",
+               process_func=process_address_1, filter_func=filter_address_1))
+filters.append(Filter(class_type=EOS_TYPE,
+               process_func=lambda x: x, filter_func=lambda x: True))
+
+
+def filter_loaded_data(data: List[Instance], verbose: bool = False) -> List[Instance]:
+    """
+    Filters list of instances
+
+    Args:
+        data: list of instances
+
+    Returns: filtered and transformed list of instances
+    """
+    updates_instances = []
+    for instance in data:
+        updated_instance = False
+        for fil in filters:
+            if fil.class_type == instance.token_type and fil.filter(instance):
+                instance = fil.process(instance)
+                updated_instance = True
+        if updated_instance:
+            if verbose:
+                print(instance)
+            updates_instances.append(instance)
+    return updates_instances
+
+
+def parse_args():
+    parser = ArgumentParser()
+    parser.add_argument("--input", help="input file path",
+                        type=str, default='./en_with_types/output-00001-of-00100')
+    parser.add_argument(
+        "--verbose", help="print filtered instances", action='store_true')
+    return parser.parse_args()
+
+
+if __name__ == "__main__":
+    args = parse_args()
+    file_path = args.input
+
+    print("Loading training data: " + file_path)
+    instance_list = load_files([file_path])  # List of instances
+    filtered_instance_list = filter_loaded_data(instance_list, args.verbose)
+    training_data_to_sentences(filtered_instance_list)
diff --git a/nemo_text_processing/inverse_text_normalization/ko/data/__init__.py b/nemo_text_processing/inverse_text_normalization/ko/data/__init__.py
new file mode 100644
index 000000000..341a77c5b
--- /dev/null
+++ b/nemo_text_processing/inverse_text_normalization/ko/data/__init__.py
@@ -0,0 +1,13 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/nemo_text_processing/inverse_text_normalization/ko/data/numbers/__init__.py b/nemo_text_processing/inverse_text_normalization/ko/data/numbers/__init__.py
new file mode 100644
index 000000000..341a77c5b
--- /dev/null
+++ b/nemo_text_processing/inverse_text_normalization/ko/data/numbers/__init__.py
@@ -0,0 +1,13 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/nemo_text_processing/inverse_text_normalization/ko/data/numbers/digit.tsv b/nemo_text_processing/inverse_text_normalization/ko/data/numbers/digit.tsv
new file mode 100644
index 000000000..9871cb9cf
--- /dev/null
+++ b/nemo_text_processing/inverse_text_normalization/ko/data/numbers/digit.tsv
@@ -0,0 +1,9 @@
+일	1
+이	2
+삼	3
+사	4
+오	5
+육	6
+칠	7
+팔	8
+구	9
\ No newline at end of file
diff --git a/nemo_text_processing/inverse_text_normalization/ko/data/numbers/thousands.tsv b/nemo_text_processing/inverse_text_normalization/ko/data/numbers/thousands.tsv
new file mode 100644
index 000000000..541752211
--- /dev/null
+++ b/nemo_text_processing/inverse_text_normalization/ko/data/numbers/thousands.tsv
@@ -0,0 +1,11 @@
+억
+조
+경
+해
+자
+양
+구
+간
+정
+재
+극
\ No newline at end of file
diff --git a/nemo_text_processing/inverse_text_normalization/ko/data/numbers/zero.tsv b/nemo_text_processing/inverse_text_normalization/ko/data/numbers/zero.tsv
new file mode 100644
index 000000000..43baac7c1
--- /dev/null
+++ b/nemo_text_processing/inverse_text_normalization/ko/data/numbers/zero.tsv
@@ -0,0 +1 @@
+영  0
\ No newline at end of file
diff --git a/nemo_text_processing/inverse_text_normalization/ko/graph_utils.py b/nemo_text_processing/inverse_text_normalization/ko/graph_utils.py
new file mode 100644
index 000000000..7a9fd8720
--- /dev/null
+++ b/nemo_text_processing/inverse_text_normalization/ko/graph_utils.py
@@ -0,0 +1,292 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.  All rights reserved.
+# Copyright 2015 and onwards Google, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+import os
+import string
+from pathlib import Path
+from typing import Dict
+
+import pynini
+from pynini import Far
+from pynini.examples import plurals
+from pynini.export import export
+from pynini.lib import byte, pynutil, utf8
+
+from nemo_text_processing.text_normalization.en.utils import get_abs_path, load_labels
+
+NEMO_CHAR = utf8.VALID_UTF8_CHAR
+
+NEMO_NARROW_NON_BREAK_SPACE = "\u202f"
+NEMO_DIGIT = byte.DIGIT
+NEMO_LOWER = pynini.union(*string.ascii_lowercase).optimize()
+NEMO_UPPER = pynini.union(*string.ascii_uppercase).optimize()
+NEMO_ALPHA = pynini.union(NEMO_LOWER, NEMO_UPPER).optimize()
+NEMO_ALNUM = pynini.union(NEMO_DIGIT, NEMO_ALPHA).optimize()
+NEMO_HEX = pynini.union(*string.hexdigits).optimize()
+NEMO_NON_BREAKING_SPACE = "\u00a0"
+NEMO_SPACE = " "
+NEMO_WHITE_SPACE = pynini.union(" ", "\t", "\n", "\r", u"\u00a0").optimize()
+NEMO_NOT_SPACE = pynini.difference(NEMO_CHAR, NEMO_WHITE_SPACE).optimize()
+NEMO_NOT_QUOTE = pynini.difference(NEMO_CHAR, r'"').optimize()
+
+NEMO_PUNCT = pynini.union(*map(pynini.escape, string.punctuation)).optimize()
+NEMO_GRAPH = pynini.union(NEMO_ALNUM, NEMO_PUNCT).optimize()
+
+NEMO_SIGMA = pynini.closure(NEMO_CHAR)
+
+NEMO_NOT_ALPHA = pynini.difference(NEMO_SIGMA, NEMO_ALPHA).optimize()
+NEMO_LOWER_NOT_A = pynini.union(
+    "b",
+    "c",
+    "d",
+    "e",
+    "f",
+    "g",
+    "h",
+    "i",
+    "j",
+    "k",
+    "l",
+    "m",
+    "n",
+    "o",
+    "p",
+    "q",
+    "r",
+    "s",
+    "t",
+    "u",
+    "v",
+    "w",
+    "x",
+    "y",
+    "z",
+).optimize()
+
+delete_space = pynutil.delete(pynini.closure(NEMO_WHITE_SPACE))
+delete_zero_or_one_space = pynutil.delete(pynini.closure(NEMO_WHITE_SPACE, 0, 1))
+insert_space = pynutil.insert(" ")
+delete_extra_space = pynini.cross(pynini.closure(NEMO_WHITE_SPACE, 1), " ")
+delete_preserve_order = pynini.closure(
+    pynutil.delete(" preserve_order: true")
+    | (pynutil.delete(" field_order: \"") + NEMO_NOT_QUOTE + pynutil.delete("\""))
+)
+
+suppletive = pynini.string_file(get_abs_path("data/suppletive.tsv"))
+# _v = pynini.union("a", "e", "i", "o", "u")
+_c = pynini.union(
+    "b", "c", "d", "f", "g", "h", "j", "k", "l", "m", "n", "p", "q", "r", "s", "t", "v", "w", "x", "y", "z"
+)
+_ies = NEMO_SIGMA + _c + pynini.cross("y", "ies")
+_es = NEMO_SIGMA + pynini.union("s", "sh", "ch", "x", "z") + pynutil.insert("es")
+_s = NEMO_SIGMA + pynutil.insert("s")
+
+graph_plural = plurals._priority_union(
+    suppletive, plurals._priority_union(_ies, plurals._priority_union(_es, _s, NEMO_SIGMA), NEMO_SIGMA), NEMO_SIGMA
+).optimize()
+
+SINGULAR_TO_PLURAL = graph_plural
+PLURAL_TO_SINGULAR = pynini.invert(graph_plural)
+TO_LOWER = pynini.union(*[pynini.cross(x, y) for x, y in zip(string.ascii_uppercase, string.ascii_lowercase)])
+TO_UPPER = pynini.invert(TO_LOWER)
+MIN_NEG_WEIGHT = -0.0001
+MIN_POS_WEIGHT = 0.0001
+INPUT_CASED = "cased"
+INPUT_LOWER_CASED = "lower_cased"
+MINUS = pynini.union("minus", "Minus").optimize()
+
+
+def capitalized_input_graph(
+    graph: 'pynini.FstLike', original_graph_weight: float = None, capitalized_graph_weight: float = None
+) -> 'pynini.FstLike':
+    """
+    Allow graph input to be capitalized, e.g. for ITN)
+
+    Args:
+        graph: FstGraph
+        original_graph_weight: weight to add to the original `graph`
+        capitalized_graph_weight: weight to add to the capitalized graph
+    """
+    capitalized_graph = pynini.compose(TO_LOWER + NEMO_SIGMA, graph).optimize()
+
+    if original_graph_weight is not None:
+        graph = pynutil.add_weight(graph, weight=original_graph_weight)
+
+    if capitalized_graph_weight is not None:
+        capitalized_graph = pynutil.add_weight(capitalized_graph, weight=capitalized_graph_weight)
+
+    graph |= capitalized_graph
+    return graph
+
+
+def generator_main(file_name: str, graphs: Dict[str, 'pynini.FstLike']):
+    """
+    Exports graph as OpenFst finite state archive (FAR) file with given file name and rule name.
+
+    Args:
+        file_name: exported file name
+        graphs: Mapping of a rule name and Pynini WFST graph to be exported
+    """
+    exporter = export.Exporter(file_name)
+    for rule, graph in graphs.items():
+        exporter[rule] = graph.optimize()
+    exporter.close()
+    logging.info(f'Created {file_name}')
+
+
+def get_plurals(fst):
+    """
+    Given singular returns plurals
+
+    Args:
+        fst: Fst
+
+    Returns plurals to given singular forms
+    """
+    return SINGULAR_TO_PLURAL @ fst
+
+
+def get_singulars(fst):
+    """
+    Given plural returns singulars
+
+    Args:
+        fst: Fst
+
+    Returns singulars to given plural forms
+    """
+    return PLURAL_TO_SINGULAR @ fst
+
+
+def convert_space(fst) -> 'pynini.FstLike':
+    """
+    Converts space to nonbreaking space.
+    Used only in tagger grammars for transducing token values within quotes, e.g. name: "hello kitty"
+    This is making transducer significantly slower, so only use when there could be potential spaces within quotes, otherwise leave it.
+
+    Args:
+        fst: input fst
+
+    Returns output fst where breaking spaces are converted to non breaking spaces
+    """
+    return fst @ pynini.cdrewrite(pynini.cross(NEMO_SPACE, NEMO_NON_BREAKING_SPACE), "", "", NEMO_SIGMA)
+
+
+def string_map_cased(input_file: str, input_case: str = INPUT_LOWER_CASED):
+    labels = load_labels(input_file)
+
+    if input_case == INPUT_CASED:
+        additional_labels = []
+        for written, spoken, *weight in labels:
+            written_capitalized = written[0].upper() + written[1:]
+            additional_labels.extend(
+                [
+                    [written_capitalized, spoken.capitalize()],  # first letter capitalized
+                    [
+                        written_capitalized,
+                        spoken.upper().replace(" AND ", " and "),
+                    ],  # # add pairs with the all letters capitalized
+                ]
+            )
+
+            spoken_no_space = spoken.replace(" ", "")
+            # add abbreviations without spaces (both lower and upper case), i.e. "BMW" not "B M W"
+            if len(spoken) == (2 * len(spoken_no_space) - 1):
+                logging.debug(f"This is weight {weight}")
+                if len(weight) == 0:
+                    additional_labels.extend(
+                        [[written, spoken_no_space], [written_capitalized, spoken_no_space.upper()]]
+                    )
+                else:
+                    additional_labels.extend(
+                        [
+                            [written, spoken_no_space, weight[0]],
+                            [written_capitalized, spoken_no_space.upper(), weight[0]],
+                        ]
+                    )
+        labels += additional_labels
+
+    whitelist = pynini.string_map(labels).invert().optimize()
+    return whitelist
+
+
+class GraphFst:
+    """
+    Base class for all grammar fsts.
+
+    Args:
+        name: name of grammar class
+        kind: either 'classify' or 'verbalize'
+        deterministic: if True will provide a single transduction option,
+            for False multiple transduction are generated (used for audio-based normalization)
+    """
+
+    def __init__(self, name: str, kind: str, deterministic: bool = True):
+        self.name = name
+        self.kind = kind
+        self._fst = None
+        self.deterministic = deterministic
+
+        self.far_path = Path(os.path.dirname(__file__) + '/grammars/' + kind + '/' + name + '.far')
+        if self.far_exist():
+            self._fst = Far(self.far_path, mode="r", arc_type="standard", far_type="default").get_fst()
+
+    def far_exist(self) -> bool:
+        """
+        Returns true if FAR can be loaded
+        """
+        return self.far_path.exists()
+
+    @property
+    def fst(self) -> 'pynini.FstLike':
+        return self._fst
+
+    @fst.setter
+    def fst(self, fst):
+        self._fst = fst
+
+    def add_tokens(self, fst) -> 'pynini.FstLike':
+        """
+        Wraps class name around to given fst
+
+        Args:
+            fst: input fst
+
+        Returns:
+            Fst: fst
+        """
+        return pynutil.insert(f"{self.name} {{ ") + fst + pynutil.insert(" }")
+
+    def delete_tokens(self, fst) -> 'pynini.FstLike':
+        """
+        Deletes class name wrap around output of given fst
+
+        Args:
+            fst: input fst
+
+        Returns:
+            Fst: fst
+        """
+        res = (
+            pynutil.delete(f"{self.name}")
+            + delete_space
+            + pynutil.delete("{")
+            + delete_space
+            + fst
+            + delete_space
+            + pynutil.delete("}")
+        )
+        return res @ pynini.cdrewrite(pynini.cross(u"\u00a0", " "), "", "", NEMO_SIGMA)
diff --git a/nemo_text_processing/inverse_text_normalization/ko/taggers/__init__.py b/nemo_text_processing/inverse_text_normalization/ko/taggers/__init__.py
new file mode 100644
index 000000000..f541211af
--- /dev/null
+++ b/nemo_text_processing/inverse_text_normalization/ko/taggers/__init__.py
@@ -0,0 +1,17 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from nemo_text_processing.inverse_text_normalization.ko.taggers.tokenize_and_classify import ClassifyFst
+from nemo_text_processing.inverse_text_normalization.ko.verbalizers.verbalize import VerbalizeFst
+from nemo_text_processing.inverse_text_normalization.ko.verbalizers.verbalize_final import VerbalizeFinalFst
diff --git a/nemo_text_processing/inverse_text_normalization/ko/taggers/cardinal.py b/nemo_text_processing/inverse_text_normalization/ko/taggers/cardinal.py
new file mode 100644
index 000000000..df5804fc0
--- /dev/null
+++ b/nemo_text_processing/inverse_text_normalization/ko/taggers/cardinal.py
@@ -0,0 +1,104 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+# Copyright 2015 and onwards Google, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pynini
+from pynini.lib import pynutil
+
+from nemo_text_processing.inverse_text_normalization.ko.graph_utils import NEMO_DIGIT, GraphFst, delete_space
+from nemo_text_processing.inverse_text_normalization.ko.utils import get_abs_path
+
+class CardinalFst(GraphFst):
+    """
+    Finite state transducer for classifying cardinals
+        e.g. 마이너스 이십삼 -> cardinal { integer: "23" negative: "-" } }
+
+    Args:
+        input_case: accepting Korean input.
+    """
+
+    def __init__(self):
+        super().__init__(name="cardinal", kind="classify")
+
+        graph_zero = pynini.string_file(get_abs_path("data/numbers/zero.tsv"))
+        graph_digit = pynini.string_file(get_abs_path("data/numbers/digit.tsv"))
+        graph_zero = pynini.cross("영", "0")
+
+        graph_negative = pynini.cross("마이너스", "-")
+        graph_negative += delete_space
+        
+        ten = pynutil.delete("십")
+        ten_alt = pynini.cross("십", "1")
+        ### Responsible for second digit of two digit number. ex) 20's 2
+        graph_ten_component = pynini.union((graph_digit + ten) | ten_alt, pynutil.insert("0"))
+        ### Responsible for the first digit of number. ex) 1,2,3,4,5,,,
+        graph_ten_component += graph_digit | pynutil.insert("0")
+        
+        hundred = pynutil.delete("백")
+        hundred_alt = pynini.cross("백", "1")
+        graph_hundred_component = pynini.union(((graph_digit + hundred) | hundred_alt), pynutil.insert("0"))
+        graph_hundred_component += graph_ten_component
+
+        thousand = pynutil.delete("천")
+        thousand_alt = pynini.cross("천", "1")
+        graph_thousand_component = pynini.union(((graph_digit + thousand) | thousand_alt), pynutil.insert("0"))
+        graph_thousand_component += graph_hundred_component
+
+        tenthousand = pynutil.delete("만")
+        tenthousand_alt = pynini.cross("만", "1")
+        ### "만" can express next four digits of numbers until the next unit "억", so insert "0000" to allocate four digit worth of space
+        ### From "만", keep adding four digits and graph_thousand_component(0000-9999), because Korean units increase every four digits
+        graph_tenthousand_component = pynini.union(((graph_thousand_component + tenthousand) | tenthousand_alt), pynutil.insert("0000"))
+        graph_tenthousand_component += graph_thousand_component
+
+        hundredmillion = pynutil.delete("억")
+        hundredmillion_alt = pynini.cross("억", "1")
+        graph_hundredmillion_component = pynini.union(((graph_thousand_component + hundredmillion) | hundredmillion_alt), pynutil.insert("0000"))
+        graph_hundredmillion_component +=  graph_tenthousand_component
+        
+        trillion = pynutil.delete("조")
+        trillion_alt = pynini.cross("조", "1")
+        graph_trillion_component = pynini.union(((graph_thousand_component + trillion) | trillion_alt), pynutil.insert("0000"))
+        graph_trillion_component += graph_hundredmillion_component
+
+        tenquadrillion = pynutil.delete("경")
+        tenquadrillion_alt = pynini.cross("경", "1")
+        graph_tenquadrillion_component = pynini.union(((graph_thousand_component + tenquadrillion) | tenquadrillion_alt), pynutil.insert("0000"))
+        graph_tenquadrillion_component += graph_trillion_component
+
+        
+        graph = pynini.union(
+            ### From biggest unit to smallest, everything is included
+            graph_tenquadrillion_component|
+            graph_zero
+        )
+
+        leading_zero = (
+            pynutil.delete(pynini.closure("0")) + pynini.difference(NEMO_DIGIT, "0") + pynini.closure(NEMO_DIGIT)
+        )
+        graph_nonzero = graph @ leading_zero
+        graph = pynini.union(graph_nonzero, graph_zero)
+        
+        graph = graph @ leading_zero | graph_zero
+
+        self.just_cardinals = graph
+
+        optional_sign = pynini.closure((pynini.cross("마이너스", 'negative: "-"') | pynini.cross("-", 'negative: "-"')) + delete_space,0, 1)
+
+        final_graph = (
+            optional_sign + pynutil.insert(" ") + pynutil.insert("integer: \"") + graph + pynutil.insert("\"")
+        ) | (pynutil.insert("integer: \"") + graph + pynutil.insert("\""))
+
+        final_graph = self.add_tokens(final_graph)
+        self.fst = final_graph.optimize()
\ No newline at end of file
diff --git a/nemo_text_processing/inverse_text_normalization/ko/taggers/tokenize_and_classify.py b/nemo_text_processing/inverse_text_normalization/ko/taggers/tokenize_and_classify.py
new file mode 100644
index 000000000..760ce6829
--- /dev/null
+++ b/nemo_text_processing/inverse_text_normalization/ko/taggers/tokenize_and_classify.py
@@ -0,0 +1,76 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+# Copyright 2015 and onwards Google, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+import os
+
+import pynini
+from pynini.lib import pynutil
+
+from nemo_text_processing.inverse_text_normalization.ko.taggers.cardinal import CardinalFst
+from nemo_text_processing.inverse_text_normalization.ko.taggers.word import WordFst
+from nemo_text_processing.inverse_text_normalization.ko.graph_utils import ( 
+    INPUT_LOWER_CASED,
+    GraphFst,
+    delete_extra_space,
+    delete_space,
+    generator_main,
+)
+
+
+class ClassifyFst(GraphFst):
+    """
+    Final class that composes all other classification grammars. This class can process an entire sentence, that is lower cased.
+    For deployment, this grammar will be compiled and exported to OpenFst Finite State Archive (FAR) File.
+    More details to deployment at NeMo/tools/text_processing_deployment.
+
+    Args:
+        input_case: accepting either "lower_cased" or "cased" input.
+        cache_dir: path to a dir with .far grammar file. Set to None to avoid using cache.
+        overwrite_cache: set to True to overwrite .far files
+        whitelist: path to a file with whitelist replacements
+    """
+
+    def __init__(
+        self,
+        input_case: str = INPUT_LOWER_CASED,
+        cache_dir: str = None,
+        overwrite_cache: bool = False,
+        whitelist: str = None,
+    ):
+        super().__init__(name="tokenize_and_classify", kind="classify")
+
+        far_file = None
+        if cache_dir is not None and cache_dir != "None":
+            os.makedirs(cache_dir, exist_ok=True)
+            far_file = os.path.join(cache_dir, f"jp_itn_{input_case}.far")
+        if not overwrite_cache and far_file and os.path.exists(far_file):
+            self.fst = pynini.Far(far_file, mode="r")["tokenize_and_classify"]
+            logging.info(f"ClassifyFst.fst was restored from {far_file}.")
+        else:
+            logging.info(f"Creating ClassifyFst grammars.")
+            cardinal = CardinalFst()
+            cardinal_graph = cardinal.fst
+            word_graph = WordFst().fst
+            classify = (pynutil.add_weight(cardinal_graph, 1.1)| pynutil.add_weight(word_graph, 100))
+           
+            token = pynutil.insert("tokens { ") + classify + pynutil.insert(" } ")
+            tagger = pynini.closure(token, 1)
+
+            self.fst = tagger
+
+            if far_file:
+                generator_main(far_file, {"tokenize_and_classify": self.fst})
+                logging.info(f"ClassifyFst grammars are saved to {far_file}.")
\ No newline at end of file
diff --git a/nemo_text_processing/inverse_text_normalization/ko/taggers/word.py b/nemo_text_processing/inverse_text_normalization/ko/taggers/word.py
new file mode 100644
index 000000000..0d6ccd5c5
--- /dev/null
+++ b/nemo_text_processing/inverse_text_normalization/ko/taggers/word.py
@@ -0,0 +1,32 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+# Copyright 2015 and onwards Google, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pynini
+from pynini.lib import pynutil
+
+from nemo_text_processing.inverse_text_normalization.ko.graph_utils import NEMO_NOT_SPACE, GraphFst
+
+
+class WordFst(GraphFst):
+    """
+    Finite state transducer for classifying plain tokens, that do not belong to any special class. This can be considered as the default class.
+        e.g. sleep -> tokens { name: "sleep" }
+    """
+
+    def __init__(self):
+        super().__init__(name="word", kind="classify")
+        word = pynutil.insert(
+            "name: \"") + pynini.closure(NEMO_NOT_SPACE, 1) + pynutil.insert("\"")
+        self.fst = word.optimize()
diff --git a/nemo_text_processing/inverse_text_normalization/ko/utils.py b/nemo_text_processing/inverse_text_normalization/ko/utils.py
new file mode 100644
index 000000000..0222cc0b8
--- /dev/null
+++ b/nemo_text_processing/inverse_text_normalization/ko/utils.py
@@ -0,0 +1,23 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+
+
+
+def get_abs_path(rel_path):
+
+    return os.path.dirname(os.path.abspath(__file__)) + '/' + rel_path
+
+
diff --git a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/__init__.py b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/__init__.py
new file mode 100644
index 000000000..da950f35e
--- /dev/null
+++ b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/__init__.py
@@ -0,0 +1,17 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from nemo_text_processing.inverse_text_normalization.ko.taggers.tokenize_and_classify import ClassifyFst
+from nemo_text_processing.inverse_text_normalization.ko.verbalizers.verbalize import VerbalizeFst
+from nemo_text_processing.inverse_text_normalization.ko.verbalizers.verbalize_final import VerbalizeFinalFst
\ No newline at end of file
diff --git a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/cardinal.py b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/cardinal.py
new file mode 100644
index 000000000..1800a6dc8
--- /dev/null
+++ b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/cardinal.py
@@ -0,0 +1,54 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pynini
+from pynini.lib import pynutil
+
+from nemo_text_processing.inverse_text_normalization.ko.graph_utils import (
+    NEMO_NOT_QUOTE,
+    GraphFst,
+    delete_space,
+)
+
+
+class CardinalFst(GraphFst):
+    """
+    Finite state transducer for verbalizing cardinal
+        e.g. cardinal { negative: "-" integer: "23" } -> -23
+    """
+
+    def __init__(self):
+        super().__init__(name="cardinal", kind="verbalize")
+        negative_sign = (
+            pynutil.delete("negative:")
+            + delete_space
+            + pynutil.delete("\"")
+            + pynini.accep("-") 
+            + pynutil.delete("\"")
+        )
+
+        optional_sign_output = pynini.closure(negative_sign + delete_space, 0, 1)
+
+        digits_from_tag = pynini.closure(NEMO_NOT_QUOTE, 1) 
+        integer_cardinal = (
+            pynutil.delete("integer:")
+            + delete_space
+            + pynutil.delete("\"")
+            + digits_from_tag
+            + pynutil.delete("\"")
+        )
+
+        graph = integer_cardinal
+        final_graph = optional_sign_output + graph
+        self.fst = self.delete_tokens(final_graph).optimize()
\ No newline at end of file
diff --git a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize.py b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize.py
new file mode 100644
index 000000000..9d750d757
--- /dev/null
+++ b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize.py
@@ -0,0 +1,36 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+# Copyright 2015 and onwards Google, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from nemo_text_processing.inverse_text_normalization.ko.verbalizers.cardinal import CardinalFst
+from nemo_text_processing.inverse_text_normalization.ko.verbalizers.word import WordFst
+from nemo_text_processing.inverse_text_normalization.ko.graph_utils import GraphFst
+
+
+class VerbalizeFst(GraphFst):
+    """
+    Composes other verbalizer grammars.
+    For deployment, this grammar will be compiled and exported to OpenFst Finite State Archive (FAR) File.
+    More details to deployment at NeMo/tools/text_processing_deployment.
+    """
+
+    def __init__(self):
+        super().__init__(name="verbalize", kind="verbalize")
+        cardinal = CardinalFst()
+        cardinal_graph = cardinal.fst
+        word_graph = WordFst().fst
+        
+        graph = (cardinal_graph|word_graph)
+        self.fst = graph
+        
diff --git a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize_final.py b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize_final.py
new file mode 100644
index 000000000..8554fc161
--- /dev/null
+++ b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize_final.py
@@ -0,0 +1,49 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+# Copyright 2015 and onwards Google, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+
+import pynini
+from pynini.lib import pynutil
+
+from nemo_text_processing.inverse_text_normalization.ko.verbalizers.verbalize import VerbalizeFst
+from nemo_text_processing.inverse_text_normalization.ko.verbalizers.word import WordFst
+from nemo_text_processing.inverse_text_normalization.ko.graph_utils import GraphFst, generator_main, delete_space
+
+
+class VerbalizeFinalFst(GraphFst):
+    """
+    Finite state transducer that verbalizes an entire sentence, e.g.
+    tokens { name: "its" } tokens { time { hours: "12" minutes: "30" } } tokens { name: "now" } -> its 12:30 now
+    """
+    def __init__(self, deterministic: bool = True, cache_dir: str = None, overwrite_cache: bool = False):
+        super().__init__(name="verbalize_final", kind="verbalize", deterministic=deterministic)
+        far_file = None
+        if cache_dir is not None and cache_dir != "None":
+            os.makedirs(cache_dir, exist_ok=True)
+            far_file = os.path.join(cache_dir, f"ko_tn_{deterministic}_deterministic_verbalizer.far")
+        if not overwrite_cache and far_file and os.path.exists(far_file):
+            self.fst = pynini.Far(far_file, mode="r")["verbalize"]
+        else:
+            # token_graph = VerbalizeFst(deterministic=deterministic)
+            token_graph = VerbalizeFst().fst
+            token_verbalizer = (
+                pynutil.delete("tokens {") + delete_space + token_graph + delete_space + pynutil.delete(" }")
+            )
+            verbalizer = pynini.closure(delete_space + token_verbalizer + delete_space)
+
+            self.fst = (verbalizer).optimize()
+            if far_file:
+                generator_main(far_file, {"verbalize": self.fst})
diff --git a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/word.py b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/word.py
new file mode 100644
index 000000000..d79957ca8
--- /dev/null
+++ b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/word.py
@@ -0,0 +1,34 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+# Copyright 2015 and onwards Google, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import pynini
+from pynini.lib import pynutil
+
+from nemo_text_processing.inverse_text_normalization.ko.graph_utils import NEMO_NOT_QUOTE, GraphFst, delete_space
+
+
+
+class WordFst(GraphFst):
+    '''
+    tokens { name: "一" } -> 一
+    '''
+
+    def __init__(self, deterministic: bool = True, lm: bool = False):
+        super().__init__(name="word", kind="verbalize", deterministic=deterministic)
+
+        graph = pynutil.delete("name: \"") + NEMO_NOT_QUOTE + pynutil.delete("\"")
+
+        self.fst = graph.optimize()
diff --git a/nemo_text_processing/inverse_text_normalization/run_evaluate.py b/nemo_text_processing/inverse_text_normalization/run_evaluate.py
index 0852329d6..7bfdd3399 100644
--- a/nemo_text_processing/inverse_text_normalization/run_evaluate.py
+++ b/nemo_text_processing/inverse_text_normalization/run_evaluate.py
@@ -35,7 +35,7 @@ def parse_args():
     parser.add_argument(
         "--lang",
         help="language",
-        choices=["ar", "de", "en", "es", "es_en", "fr", "hi", "hy", "mr", "pt", "ru", "sv", "vi", "zh", 'ja'],
+        choices=["ar", "de", "en", "es", "es_en", "fr", "hi", "hy", "mr", "pt", "ru", "sv", "vi", "zh", "ja","ko"],
         default="en",
         type=str,
     )
diff --git a/tests/nemo_text_processing/ko/__init__.py b/tests/nemo_text_processing/ko/__init__.py
new file mode 100644
index 000000000..341a77c5b
--- /dev/null
+++ b/tests/nemo_text_processing/ko/__init__.py
@@ -0,0 +1,13 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/tests/nemo_text_processing/ko/data_inverse_text_normalization/test_cases_cardinal.txt b/tests/nemo_text_processing/ko/data_inverse_text_normalization/test_cases_cardinal.txt
new file mode 100644
index 000000000..007273e5e
--- /dev/null
+++ b/tests/nemo_text_processing/ko/data_inverse_text_normalization/test_cases_cardinal.txt
@@ -0,0 +1,27 @@
+영~0
+구~9
+십~10
+십칠~17
+오십삼~53
+백~100
+백오~105
+삼백이십~320
+구백팔십칠~987
+천~1000
+천육~1006
+천오백~1500
+오천사백삼십이~5432
+만~10000
+만천이백~11200
+삼만오천칠백~35700
+십이만~120000
+백오십만삼천~1503000
+천만~10000000
+오천이백칠십만육천백~52706100
+억~100000000
+삼억오천만~350000000
+십이억천만~1210000000
+백오십억칠천만~15070000000
+오천억~500000000000
+일조~1000000000000
+이조오천억~2500000000000
\ No newline at end of file
diff --git a/tests/nemo_text_processing/ko/test_cardinal.py b/tests/nemo_text_processing/ko/test_cardinal.py
new file mode 100644
index 000000000..9fd366ea6
--- /dev/null
+++ b/tests/nemo_text_processing/ko/test_cardinal.py
@@ -0,0 +1,39 @@
+# Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pytest
+from parameterized import parameterized
+
+from nemo_text_processing.inverse_text_normalization.inverse_normalize import InverseNormalizer
+from nemo_text_processing.text_normalization.normalize import Normalizer
+from nemo_text_processing.text_normalization.normalize_with_audio import NormalizerWithAudio
+
+from ..utils import CACHE_DIR, RUN_AUDIO_BASED_TESTS, parse_test_case_file
+
+
+class TestCardinal:
+    inverse_normalizer_ko = InverseNormalizer(lang='ko', cache_dir=CACHE_DIR, overwrite_cache=False)
+
+    @parameterized.expand(parse_test_case_file('ko/data_inverse_text_normalization/test_cases_cardinal.txt'))
+    @pytest.mark.run_only_on('CPU')
+    @pytest.mark.unit
+    def test_denorm(self, test_input, expected):
+        pred = self.inverse_normalizer_ko.inverse_normalize(test_input, verbose=False)
+        assert pred == expected
+
+    normalizer_with_audio_ko = (
+        NormalizerWithAudio(lang='ko', cache_dir=CACHE_DIR, overwrite_cache=False)
+        if RUN_AUDIO_BASED_TESTS
+        else None
+    )
\ No newline at end of file
diff --git a/tests/nemo_text_processing/ko/test_sparrowhawk_inverse_text_normalization.sh b/tests/nemo_text_processing/ko/test_sparrowhawk_inverse_text_normalization.sh
new file mode 100644
index 000000000..c44f4a703
--- /dev/null
+++ b/tests/nemo_text_processing/ko/test_sparrowhawk_inverse_text_normalization.sh
@@ -0,0 +1,34 @@
+#! /bin/sh
+
+GRAMMARS_DIR=${1:-"/workspace/sparrowhawk/documentation/grammars"}
+TEST_DIR=${2:-"/workspace/tests/ko"}
+
+runtest () {
+  input=$1
+  echo "INPUT is $input"
+  cd ${GRAMMARS_DIR}
+
+  # read test file
+  while read testcase; do
+    IFS='~' read spoken written <<< $testcase
+    denorm_pred=$(echo $spoken | normalizer_main --config=sparrowhawk_configuration.ascii_proto 2>&1 | tail -n 1)
+
+    # trim white space
+    written="$(echo -e "${written}" | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//')"
+    denorm_pred="$(echo -e "${denorm_pred}" | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//')"
+
+    # input expected actual
+    assertEquals "$spoken" "$written" "$denorm_pred"
+  done < "$input"
+}
+
+testITNCardinal() {
+  input=$TEST_DIR/data_inverse_text_normalization/test_cases_cardinal.txt
+  runtest $input
+}
+
+# Remove all command-line arguments
+shift $#
+
+# Load shUnit2
+. /workspace/shunit2/shunit2
\ No newline at end of file
diff --git a/tools/text_processing_deployment/pynini_export.py b/tools/text_processing_deployment/pynini_export.py
index 6b82dfbec..0df099774 100644
--- a/tools/text_processing_deployment/pynini_export.py
+++ b/tools/text_processing_deployment/pynini_export.py
@@ -106,6 +106,7 @@ def parse_args():
             'mr',
             'ja',
             'rw',
+            'ko'
         ],
         type=str,
         default='en',
@@ -307,6 +308,13 @@ def parse_args():
             PostProcessingFst as TNPostProcessingFst,
         )
         from nemo_text_processing.text_normalization.ja.verbalizers.verbalize import VerbalizeFst as TNVerbalizeFst
+    elif args.language == 'ko':
+        from nemo_text_processing.inverse_text_normalization.ko.taggers.tokenize_and_classify import (
+            ClassifyFst as ITNClassifyFst,
+        )
+        from nemo_text_processing.inverse_text_normalization.ko.verbalizers.verbalize import (
+            VerbalizeFst as ITNVerbalizeFst,
+        )
     elif args.language == 'rw':
         from nemo_text_processing.text_normalization.rw.taggers.tokenize_and_classify import (
             ClassifyFst as TNClassifyFst,

From 77da79d12b1378502cc2b382cd6933b02e7c2545 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 16 May 2025 18:46:22 +0000
Subject: [PATCH 2/8] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 .../inverse_normalize.py                      |  4 +-
 .../ko/clean_eval_data.py                     | 59 +++++++------------
 .../ko/taggers/cardinal.py                    | 38 +++++++-----
 .../ko/taggers/tokenize_and_classify.py       | 12 ++--
 .../ko/taggers/word.py                        |  3 +-
 .../inverse_text_normalization/ko/utils.py    |  3 -
 .../ko/verbalizers/__init__.py                |  2 +-
 .../ko/verbalizers/cardinal.py                | 18 ++----
 .../ko/verbalizers/verbalize.py               |  7 +--
 .../ko/verbalizers/verbalize_final.py         |  3 +-
 .../ko/verbalizers/word.py                    |  1 -
 .../run_evaluate.py                           |  2 +-
 .../nemo_text_processing/ko/test_cardinal.py  |  6 +-
 .../pynini_export.py                          |  2 +-
 14 files changed, 68 insertions(+), 92 deletions(-)

diff --git a/nemo_text_processing/inverse_text_normalization/inverse_normalize.py b/nemo_text_processing/inverse_text_normalization/inverse_normalize.py
index e505a8ad0..acda8b7f9 100644
--- a/nemo_text_processing/inverse_text_normalization/inverse_normalize.py
+++ b/nemo_text_processing/inverse_text_normalization/inverse_normalize.py
@@ -135,7 +135,7 @@ def __init__(
             from nemo_text_processing.inverse_text_normalization.ko.taggers.tokenize_and_classify import ClassifyFst
             from nemo_text_processing.inverse_text_normalization.ko.verbalizers.verbalize_final import (
                 VerbalizeFinalFst,
-            )    
+            )
 
         self.tagger = ClassifyFst(
             cache_dir=cache_dir, whitelist=whitelist, overwrite_cache=overwrite_cache, input_case=input_case
@@ -180,7 +180,7 @@ def parse_args():
     parser.add_argument(
         "--language",
         help="language",
-        choices=['en', 'de', 'es', 'pt', 'ru', 'fr', 'sv', 'vi', 'ar', 'es_en', 'zh', 'hi', 'hy', 'mr', 'ja','ko'],
+        choices=['en', 'de', 'es', 'pt', 'ru', 'fr', 'sv', 'vi', 'ar', 'es_en', 'zh', 'hi', 'hy', 'mr', 'ja', 'ko'],
         default="en",
         type=str,
     )
diff --git a/nemo_text_processing/inverse_text_normalization/ko/clean_eval_data.py b/nemo_text_processing/inverse_text_normalization/ko/clean_eval_data.py
index 3c1193333..bc429e858 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/clean_eval_data.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/clean_eval_data.py
@@ -282,41 +282,24 @@ def process_address_1(instance: Instance) -> Instance:
 
 
 filters = []
-filters.append(Filter(class_type="CARDINAL",
-               process_func=process_cardinal_1, filter_func=filter_cardinal_1))
-filters.append(Filter(class_type="ORDINAL",
-               process_func=process_ordinal_1, filter_func=filter_ordinal_1))
-filters.append(Filter(class_type="DECIMAL",
-               process_func=process_decimal_1, filter_func=filter_decimal_1))
-filters.append(Filter(class_type="MEASURE",
-               process_func=process_measure_1, filter_func=filter_measure_1))
-filters.append(Filter(class_type="MONEY",
-               process_func=process_money_1, filter_func=filter_money_1))
-filters.append(Filter(class_type="TIME",
-               process_func=process_time_1, filter_func=filter_time_1))
-
-filters.append(Filter(class_type="DATE",
-               process_func=process_date_1, filter_func=filter_date_1))
-filters.append(Filter(class_type="PLAIN",
-               process_func=process_plain_1, filter_func=filter_plain_1))
-filters.append(Filter(class_type="PUNCT",
-               process_func=process_punct_1, filter_func=filter_punct_1))
-filters.append(Filter(class_type="LETTERS",
-               process_func=process_letters_1, filter_func=filter_letters_1))
-filters.append(Filter(class_type="VERBATIM",
-               process_func=process_verbatim_1, filter_func=filter_verbatim_1))
-filters.append(Filter(class_type="DIGIT",
-               process_func=process_digit_1, filter_func=filter_digit_1))
-filters.append(Filter(class_type="TELEPHONE",
-               process_func=process_telephone_1, filter_func=filter_telephone_1))
-filters.append(Filter(class_type="ELECTRONIC",
-               process_func=process_electronic_1, filter_func=filter_electronic_1))
-filters.append(Filter(class_type="FRACTION",
-               process_func=process_fraction_1, filter_func=filter_fraction_1))
-filters.append(Filter(class_type="ADDRESS",
-               process_func=process_address_1, filter_func=filter_address_1))
-filters.append(Filter(class_type=EOS_TYPE,
-               process_func=lambda x: x, filter_func=lambda x: True))
+filters.append(Filter(class_type="CARDINAL", process_func=process_cardinal_1, filter_func=filter_cardinal_1))
+filters.append(Filter(class_type="ORDINAL", process_func=process_ordinal_1, filter_func=filter_ordinal_1))
+filters.append(Filter(class_type="DECIMAL", process_func=process_decimal_1, filter_func=filter_decimal_1))
+filters.append(Filter(class_type="MEASURE", process_func=process_measure_1, filter_func=filter_measure_1))
+filters.append(Filter(class_type="MONEY", process_func=process_money_1, filter_func=filter_money_1))
+filters.append(Filter(class_type="TIME", process_func=process_time_1, filter_func=filter_time_1))
+
+filters.append(Filter(class_type="DATE", process_func=process_date_1, filter_func=filter_date_1))
+filters.append(Filter(class_type="PLAIN", process_func=process_plain_1, filter_func=filter_plain_1))
+filters.append(Filter(class_type="PUNCT", process_func=process_punct_1, filter_func=filter_punct_1))
+filters.append(Filter(class_type="LETTERS", process_func=process_letters_1, filter_func=filter_letters_1))
+filters.append(Filter(class_type="VERBATIM", process_func=process_verbatim_1, filter_func=filter_verbatim_1))
+filters.append(Filter(class_type="DIGIT", process_func=process_digit_1, filter_func=filter_digit_1))
+filters.append(Filter(class_type="TELEPHONE", process_func=process_telephone_1, filter_func=filter_telephone_1))
+filters.append(Filter(class_type="ELECTRONIC", process_func=process_electronic_1, filter_func=filter_electronic_1))
+filters.append(Filter(class_type="FRACTION", process_func=process_fraction_1, filter_func=filter_fraction_1))
+filters.append(Filter(class_type="ADDRESS", process_func=process_address_1, filter_func=filter_address_1))
+filters.append(Filter(class_type=EOS_TYPE, process_func=lambda x: x, filter_func=lambda x: True))
 
 
 def filter_loaded_data(data: List[Instance], verbose: bool = False) -> List[Instance]:
@@ -344,10 +327,8 @@ def filter_loaded_data(data: List[Instance], verbose: bool = False) -> List[Inst
 
 def parse_args():
     parser = ArgumentParser()
-    parser.add_argument("--input", help="input file path",
-                        type=str, default='./en_with_types/output-00001-of-00100')
-    parser.add_argument(
-        "--verbose", help="print filtered instances", action='store_true')
+    parser.add_argument("--input", help="input file path", type=str, default='./en_with_types/output-00001-of-00100')
+    parser.add_argument("--verbose", help="print filtered instances", action='store_true')
     return parser.parse_args()
 
 
diff --git a/nemo_text_processing/inverse_text_normalization/ko/taggers/cardinal.py b/nemo_text_processing/inverse_text_normalization/ko/taggers/cardinal.py
index df5804fc0..09cc03909 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/taggers/cardinal.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/taggers/cardinal.py
@@ -19,6 +19,7 @@
 from nemo_text_processing.inverse_text_normalization.ko.graph_utils import NEMO_DIGIT, GraphFst, delete_space
 from nemo_text_processing.inverse_text_normalization.ko.utils import get_abs_path
 
+
 class CardinalFst(GraphFst):
     """
     Finite state transducer for classifying cardinals
@@ -37,14 +38,14 @@ def __init__(self):
 
         graph_negative = pynini.cross("마이너스", "-")
         graph_negative += delete_space
-        
+
         ten = pynutil.delete("십")
         ten_alt = pynini.cross("십", "1")
         ### Responsible for second digit of two digit number. ex) 20's 2
         graph_ten_component = pynini.union((graph_digit + ten) | ten_alt, pynutil.insert("0"))
         ### Responsible for the first digit of number. ex) 1,2,3,4,5,,,
         graph_ten_component += graph_digit | pynutil.insert("0")
-        
+
         hundred = pynutil.delete("백")
         hundred_alt = pynini.cross("백", "1")
         graph_hundred_component = pynini.union(((graph_digit + hundred) | hundred_alt), pynutil.insert("0"))
@@ -59,29 +60,36 @@ def __init__(self):
         tenthousand_alt = pynini.cross("만", "1")
         ### "만" can express next four digits of numbers until the next unit "억", so insert "0000" to allocate four digit worth of space
         ### From "만", keep adding four digits and graph_thousand_component(0000-9999), because Korean units increase every four digits
-        graph_tenthousand_component = pynini.union(((graph_thousand_component + tenthousand) | tenthousand_alt), pynutil.insert("0000"))
+        graph_tenthousand_component = pynini.union(
+            ((graph_thousand_component + tenthousand) | tenthousand_alt), pynutil.insert("0000")
+        )
         graph_tenthousand_component += graph_thousand_component
 
         hundredmillion = pynutil.delete("억")
         hundredmillion_alt = pynini.cross("억", "1")
-        graph_hundredmillion_component = pynini.union(((graph_thousand_component + hundredmillion) | hundredmillion_alt), pynutil.insert("0000"))
-        graph_hundredmillion_component +=  graph_tenthousand_component
-        
+        graph_hundredmillion_component = pynini.union(
+            ((graph_thousand_component + hundredmillion) | hundredmillion_alt), pynutil.insert("0000")
+        )
+        graph_hundredmillion_component += graph_tenthousand_component
+
         trillion = pynutil.delete("조")
         trillion_alt = pynini.cross("조", "1")
-        graph_trillion_component = pynini.union(((graph_thousand_component + trillion) | trillion_alt), pynutil.insert("0000"))
+        graph_trillion_component = pynini.union(
+            ((graph_thousand_component + trillion) | trillion_alt), pynutil.insert("0000")
+        )
         graph_trillion_component += graph_hundredmillion_component
 
         tenquadrillion = pynutil.delete("경")
         tenquadrillion_alt = pynini.cross("경", "1")
-        graph_tenquadrillion_component = pynini.union(((graph_thousand_component + tenquadrillion) | tenquadrillion_alt), pynutil.insert("0000"))
+        graph_tenquadrillion_component = pynini.union(
+            ((graph_thousand_component + tenquadrillion) | tenquadrillion_alt), pynutil.insert("0000")
+        )
         graph_tenquadrillion_component += graph_trillion_component
 
-        
         graph = pynini.union(
             ### From biggest unit to smallest, everything is included
-            graph_tenquadrillion_component|
-            graph_zero
+            graph_tenquadrillion_component
+            | graph_zero
         )
 
         leading_zero = (
@@ -89,16 +97,18 @@ def __init__(self):
         )
         graph_nonzero = graph @ leading_zero
         graph = pynini.union(graph_nonzero, graph_zero)
-        
+
         graph = graph @ leading_zero | graph_zero
 
         self.just_cardinals = graph
 
-        optional_sign = pynini.closure((pynini.cross("마이너스", 'negative: "-"') | pynini.cross("-", 'negative: "-"')) + delete_space,0, 1)
+        optional_sign = pynini.closure(
+            (pynini.cross("마이너스", 'negative: "-"') | pynini.cross("-", 'negative: "-"')) + delete_space, 0, 1
+        )
 
         final_graph = (
             optional_sign + pynutil.insert(" ") + pynutil.insert("integer: \"") + graph + pynutil.insert("\"")
         ) | (pynutil.insert("integer: \"") + graph + pynutil.insert("\""))
 
         final_graph = self.add_tokens(final_graph)
-        self.fst = final_graph.optimize()
\ No newline at end of file
+        self.fst = final_graph.optimize()
diff --git a/nemo_text_processing/inverse_text_normalization/ko/taggers/tokenize_and_classify.py b/nemo_text_processing/inverse_text_normalization/ko/taggers/tokenize_and_classify.py
index 760ce6829..2842a4167 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/taggers/tokenize_and_classify.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/taggers/tokenize_and_classify.py
@@ -19,15 +19,15 @@
 import pynini
 from pynini.lib import pynutil
 
-from nemo_text_processing.inverse_text_normalization.ko.taggers.cardinal import CardinalFst
-from nemo_text_processing.inverse_text_normalization.ko.taggers.word import WordFst
-from nemo_text_processing.inverse_text_normalization.ko.graph_utils import ( 
+from nemo_text_processing.inverse_text_normalization.ko.graph_utils import (
     INPUT_LOWER_CASED,
     GraphFst,
     delete_extra_space,
     delete_space,
     generator_main,
 )
+from nemo_text_processing.inverse_text_normalization.ko.taggers.cardinal import CardinalFst
+from nemo_text_processing.inverse_text_normalization.ko.taggers.word import WordFst
 
 
 class ClassifyFst(GraphFst):
@@ -64,8 +64,8 @@ def __init__(
             cardinal = CardinalFst()
             cardinal_graph = cardinal.fst
             word_graph = WordFst().fst
-            classify = (pynutil.add_weight(cardinal_graph, 1.1)| pynutil.add_weight(word_graph, 100))
-           
+            classify = pynutil.add_weight(cardinal_graph, 1.1) | pynutil.add_weight(word_graph, 100)
+
             token = pynutil.insert("tokens { ") + classify + pynutil.insert(" } ")
             tagger = pynini.closure(token, 1)
 
@@ -73,4 +73,4 @@ def __init__(
 
             if far_file:
                 generator_main(far_file, {"tokenize_and_classify": self.fst})
-                logging.info(f"ClassifyFst grammars are saved to {far_file}.")
\ No newline at end of file
+                logging.info(f"ClassifyFst grammars are saved to {far_file}.")
diff --git a/nemo_text_processing/inverse_text_normalization/ko/taggers/word.py b/nemo_text_processing/inverse_text_normalization/ko/taggers/word.py
index 0d6ccd5c5..0e4dbb93c 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/taggers/word.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/taggers/word.py
@@ -27,6 +27,5 @@ class WordFst(GraphFst):
 
     def __init__(self):
         super().__init__(name="word", kind="classify")
-        word = pynutil.insert(
-            "name: \"") + pynini.closure(NEMO_NOT_SPACE, 1) + pynutil.insert("\"")
+        word = pynutil.insert("name: \"") + pynini.closure(NEMO_NOT_SPACE, 1) + pynutil.insert("\"")
         self.fst = word.optimize()
diff --git a/nemo_text_processing/inverse_text_normalization/ko/utils.py b/nemo_text_processing/inverse_text_normalization/ko/utils.py
index 0222cc0b8..d198c3835 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/utils.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/utils.py
@@ -15,9 +15,6 @@
 import os
 
 
-
 def get_abs_path(rel_path):
 
     return os.path.dirname(os.path.abspath(__file__)) + '/' + rel_path
-
-
diff --git a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/__init__.py b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/__init__.py
index da950f35e..f541211af 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/__init__.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/__init__.py
@@ -14,4 +14,4 @@
 
 from nemo_text_processing.inverse_text_normalization.ko.taggers.tokenize_and_classify import ClassifyFst
 from nemo_text_processing.inverse_text_normalization.ko.verbalizers.verbalize import VerbalizeFst
-from nemo_text_processing.inverse_text_normalization.ko.verbalizers.verbalize_final import VerbalizeFinalFst
\ No newline at end of file
+from nemo_text_processing.inverse_text_normalization.ko.verbalizers.verbalize_final import VerbalizeFinalFst
diff --git a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/cardinal.py b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/cardinal.py
index 1800a6dc8..fb9a76d8e 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/cardinal.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/cardinal.py
@@ -15,11 +15,7 @@
 import pynini
 from pynini.lib import pynutil
 
-from nemo_text_processing.inverse_text_normalization.ko.graph_utils import (
-    NEMO_NOT_QUOTE,
-    GraphFst,
-    delete_space,
-)
+from nemo_text_processing.inverse_text_normalization.ko.graph_utils import NEMO_NOT_QUOTE, GraphFst, delete_space
 
 
 class CardinalFst(GraphFst):
@@ -34,21 +30,17 @@ def __init__(self):
             pynutil.delete("negative:")
             + delete_space
             + pynutil.delete("\"")
-            + pynini.accep("-") 
+            + pynini.accep("-")
             + pynutil.delete("\"")
         )
 
         optional_sign_output = pynini.closure(negative_sign + delete_space, 0, 1)
 
-        digits_from_tag = pynini.closure(NEMO_NOT_QUOTE, 1) 
+        digits_from_tag = pynini.closure(NEMO_NOT_QUOTE, 1)
         integer_cardinal = (
-            pynutil.delete("integer:")
-            + delete_space
-            + pynutil.delete("\"")
-            + digits_from_tag
-            + pynutil.delete("\"")
+            pynutil.delete("integer:") + delete_space + pynutil.delete("\"") + digits_from_tag + pynutil.delete("\"")
         )
 
         graph = integer_cardinal
         final_graph = optional_sign_output + graph
-        self.fst = self.delete_tokens(final_graph).optimize()
\ No newline at end of file
+        self.fst = self.delete_tokens(final_graph).optimize()
diff --git a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize.py b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize.py
index 9d750d757..d8851e206 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize.py
@@ -13,9 +13,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from nemo_text_processing.inverse_text_normalization.ko.graph_utils import GraphFst
 from nemo_text_processing.inverse_text_normalization.ko.verbalizers.cardinal import CardinalFst
 from nemo_text_processing.inverse_text_normalization.ko.verbalizers.word import WordFst
-from nemo_text_processing.inverse_text_normalization.ko.graph_utils import GraphFst
 
 
 class VerbalizeFst(GraphFst):
@@ -30,7 +30,6 @@ def __init__(self):
         cardinal = CardinalFst()
         cardinal_graph = cardinal.fst
         word_graph = WordFst().fst
-        
-        graph = (cardinal_graph|word_graph)
+
+        graph = cardinal_graph | word_graph
         self.fst = graph
-        
diff --git a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize_final.py b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize_final.py
index 8554fc161..09b4cbc8b 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize_final.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize_final.py
@@ -18,9 +18,9 @@
 import pynini
 from pynini.lib import pynutil
 
+from nemo_text_processing.inverse_text_normalization.ko.graph_utils import GraphFst, delete_space, generator_main
 from nemo_text_processing.inverse_text_normalization.ko.verbalizers.verbalize import VerbalizeFst
 from nemo_text_processing.inverse_text_normalization.ko.verbalizers.word import WordFst
-from nemo_text_processing.inverse_text_normalization.ko.graph_utils import GraphFst, generator_main, delete_space
 
 
 class VerbalizeFinalFst(GraphFst):
@@ -28,6 +28,7 @@ class VerbalizeFinalFst(GraphFst):
     Finite state transducer that verbalizes an entire sentence, e.g.
     tokens { name: "its" } tokens { time { hours: "12" minutes: "30" } } tokens { name: "now" } -> its 12:30 now
     """
+
     def __init__(self, deterministic: bool = True, cache_dir: str = None, overwrite_cache: bool = False):
         super().__init__(name="verbalize_final", kind="verbalize", deterministic=deterministic)
         far_file = None
diff --git a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/word.py b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/word.py
index d79957ca8..c134fe63a 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/word.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/word.py
@@ -20,7 +20,6 @@
 from nemo_text_processing.inverse_text_normalization.ko.graph_utils import NEMO_NOT_QUOTE, GraphFst, delete_space
 
 
-
 class WordFst(GraphFst):
     '''
     tokens { name: "一" } -> 一
diff --git a/nemo_text_processing/inverse_text_normalization/run_evaluate.py b/nemo_text_processing/inverse_text_normalization/run_evaluate.py
index 7bfdd3399..133474940 100644
--- a/nemo_text_processing/inverse_text_normalization/run_evaluate.py
+++ b/nemo_text_processing/inverse_text_normalization/run_evaluate.py
@@ -35,7 +35,7 @@ def parse_args():
     parser.add_argument(
         "--lang",
         help="language",
-        choices=["ar", "de", "en", "es", "es_en", "fr", "hi", "hy", "mr", "pt", "ru", "sv", "vi", "zh", "ja","ko"],
+        choices=["ar", "de", "en", "es", "es_en", "fr", "hi", "hy", "mr", "pt", "ru", "sv", "vi", "zh", "ja", "ko"],
         default="en",
         type=str,
     )
diff --git a/tests/nemo_text_processing/ko/test_cardinal.py b/tests/nemo_text_processing/ko/test_cardinal.py
index 9fd366ea6..526747668 100644
--- a/tests/nemo_text_processing/ko/test_cardinal.py
+++ b/tests/nemo_text_processing/ko/test_cardinal.py
@@ -33,7 +33,5 @@ def test_denorm(self, test_input, expected):
         assert pred == expected
 
     normalizer_with_audio_ko = (
-        NormalizerWithAudio(lang='ko', cache_dir=CACHE_DIR, overwrite_cache=False)
-        if RUN_AUDIO_BASED_TESTS
-        else None
-    )
\ No newline at end of file
+        NormalizerWithAudio(lang='ko', cache_dir=CACHE_DIR, overwrite_cache=False) if RUN_AUDIO_BASED_TESTS else None
+    )
diff --git a/tools/text_processing_deployment/pynini_export.py b/tools/text_processing_deployment/pynini_export.py
index 0df099774..d1ba34a37 100644
--- a/tools/text_processing_deployment/pynini_export.py
+++ b/tools/text_processing_deployment/pynini_export.py
@@ -106,7 +106,7 @@ def parse_args():
             'mr',
             'ja',
             'rw',
-            'ko'
+            'ko',
         ],
         type=str,
         default='en',

From 9f7e876841b518a5b4d3d5e68df760cb7126729c Mon Sep 17 00:00:00 2001
From: hmlee245 <hmlee245@gmail.com>
Date: Fri, 16 May 2025 13:10:40 -0700
Subject: [PATCH 3/8] fixing all the feedbacks

Signed-off-by: hmlee245 <hmlee245@gmail.com>
---
 .../ko/clean_eval_data.py                     | 361 ------------------
 .../ko/data/numbers/zero.tsv                  |   1 -
 .../ko/graph_utils.py                         |   2 +-
 .../ko/taggers/__init__.py                    |   3 -
 .../ko/taggers/cardinal.py                    |   6 +-
 .../ko/taggers/tokenize_and_classify.py       |   2 -
 .../ko/verbalizers/__init__.py                |   4 -
 .../ko/verbalizers/verbalize_final.py         |   1 -
 .../ko/verbalizers/word.py                    |   4 +-
 .../nemo_text_processing/ko/test_cardinal.py  |  12 +-
 10 files changed, 5 insertions(+), 391 deletions(-)
 delete mode 100644 nemo_text_processing/inverse_text_normalization/ko/clean_eval_data.py
 delete mode 100644 nemo_text_processing/inverse_text_normalization/ko/data/numbers/zero.tsv

diff --git a/nemo_text_processing/inverse_text_normalization/ko/clean_eval_data.py b/nemo_text_processing/inverse_text_normalization/ko/clean_eval_data.py
deleted file mode 100644
index 3c1193333..000000000
--- a/nemo_text_processing/inverse_text_normalization/ko/clean_eval_data.py
+++ /dev/null
@@ -1,361 +0,0 @@
-# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from argparse import ArgumentParser
-from typing import List
-
-import regex as re
-
-from nemo_text_processing.text_normalization.data_loader_utils import (
-    EOS_TYPE,
-    Instance,
-    load_files,
-    training_data_to_sentences,
-)
-
-"""
-This file is for evaluation purposes.
-filter_loaded_data() cleans data (list of instances) for inverse text normalization. Filters and cleaners can be specified for each semiotic class individually.
-For example, normalized text should only include characters and whitespace characters but no punctuation. 
-            Cardinal unnormalized instances should contain at least one integer and all other characters are removed.
-"""
-
-
-class Filter:
-    """
-    Filter class
-
-    Args:
-        class_type: semiotic class used in dataset
-        process_func: function to transform text
-        filter_func:  function to filter text
-
-    """
-
-    def __init__(self, class_type: str, process_func: object, filter_func: object):
-        self.class_type = class_type
-        self.process_func = process_func
-        self.filter_func = filter_func
-
-    def filter(self, instance: Instance) -> bool:
-        """
-        filter function
-
-        Args:
-            filters given instance with filter function
-
-        Returns: True if given instance fulfills criteria or does not belong to class type
-        """
-        if instance.token_type != self.class_type:
-            return True
-        return self.filter_func(instance)
-
-    def process(self, instance: Instance) -> Instance:
-        """
-        process function
-
-        Args:
-            processes given instance with process function
-
-        Returns: processed instance if instance belongs to expected class type or original instance
-        """
-        if instance.token_type != self.class_type:
-            return instance
-        return self.process_func(instance)
-
-
-def filter_cardinal_1(instance: Instance) -> bool:
-    ok = re.search(r"[0-9]", instance.un_normalized)
-    return ok
-
-
-def process_cardinal_1(instance: Instance) -> Instance:
-    un_normalized = instance.un_normalized
-    normalized = instance.normalized
-    un_normalized = re.sub(r"[^0-9]", "", un_normalized)
-    normalized = re.sub(r"[^a-z ]", "", normalized)
-    return Instance(token_type=instance.token_type, un_normalized=un_normalized, normalized=normalized)
-
-
-def filter_ordinal_1(instance: Instance) -> bool:
-    ok = re.search(r"(st|nd|rd|th)\s*$", instance.un_normalized)
-    return ok
-
-
-def process_ordinal_1(instance: Instance) -> Instance:
-    un_normalized = instance.un_normalized
-    normalized = instance.normalized
-    un_normalized = re.sub(r"[,\s]", "", un_normalized)
-    normalized = re.sub(r"[^a-z ]", "", normalized)
-    return Instance(token_type=instance.token_type, un_normalized=un_normalized, normalized=normalized)
-
-
-def filter_decimal_1(instance: Instance) -> bool:
-    ok = re.search(r"[0-9]", instance.un_normalized)
-    return ok
-
-
-def process_decimal_1(instance: Instance) -> Instance:
-    un_normalized = instance.un_normalized
-    un_normalized = re.sub(r",", "", un_normalized)
-    normalized = instance.normalized
-    normalized = re.sub(r"[^a-z ]", "", normalized)
-    return Instance(token_type=instance.token_type, un_normalized=un_normalized, normalized=normalized)
-
-
-def filter_measure_1(instance: Instance) -> bool:
-    ok = True
-    return ok
-
-
-def process_measure_1(instance: Instance) -> Instance:
-    un_normalized = instance.un_normalized
-    normalized = instance.normalized
-    un_normalized = re.sub(r",", "", un_normalized)
-    un_normalized = re.sub(r"m2", "m²", un_normalized)
-    un_normalized = re.sub(r"(\d)([^\d.\s])", r"\1 \2", un_normalized)
-    normalized = re.sub(r"[^a-z\s]", "", normalized)
-    normalized = re.sub(r"per ([a-z\s]*)s$", r"per \1", normalized)
-    normalized = re.sub(r"[^a-z ]", "", normalized)
-    return Instance(token_type=instance.token_type, un_normalized=un_normalized, normalized=normalized)
-
-
-def filter_money_1(instance: Instance) -> bool:
-    ok = re.search(r"[0-9]", instance.un_normalized)
-    return ok
-
-
-def process_money_1(instance: Instance) -> Instance:
-    un_normalized = instance.un_normalized
-    normalized = instance.normalized
-    un_normalized = re.sub(r",", "", un_normalized)
-    un_normalized = re.sub(r"a\$", r"$", un_normalized)
-    un_normalized = re.sub(r"us\$", r"$", un_normalized)
-    un_normalized = re.sub(r"(\d)m\s*$", r"\1 million", un_normalized)
-    un_normalized = re.sub(r"(\d)bn?\s*$", r"\1 billion", un_normalized)
-    normalized = re.sub(r"[^a-z ]", "", normalized)
-    return Instance(token_type=instance.token_type, un_normalized=un_normalized, normalized=normalized)
-
-
-def filter_time_1(instance: Instance) -> bool:
-    ok = re.search(r"[0-9]", instance.un_normalized)
-    return ok
-
-
-def process_time_1(instance: Instance) -> Instance:
-    un_normalized = instance.un_normalized
-    un_normalized = re.sub(r": ", ":", un_normalized)
-    un_normalized = re.sub(r"(\d)\s?a\s?m\s?", r"\1 a.m.", un_normalized)
-    un_normalized = re.sub(r"(\d)\s?p\s?m\s?", r"\1 p.m.", un_normalized)
-    normalized = instance.normalized
-    normalized = re.sub(r"[^a-z ]", "", normalized)
-    return Instance(token_type=instance.token_type, un_normalized=un_normalized, normalized=normalized)
-
-
-def filter_plain_1(instance: Instance) -> bool:
-    ok = True
-    return ok
-
-
-def process_plain_1(instance: Instance) -> Instance:
-    un_normalized = instance.un_normalized
-    normalized = instance.normalized
-    return Instance(token_type=instance.token_type, un_normalized=un_normalized, normalized=normalized)
-
-
-def filter_punct_1(instance: Instance) -> bool:
-    ok = True
-    return ok
-
-
-def process_punct_1(instance: Instance) -> Instance:
-    un_normalized = instance.un_normalized
-    normalized = instance.normalized
-    return Instance(token_type=instance.token_type, un_normalized=un_normalized, normalized=normalized)
-
-
-def filter_date_1(instance: Instance) -> bool:
-    ok = True
-    return ok
-
-
-def process_date_1(instance: Instance) -> Instance:
-    un_normalized = instance.un_normalized
-    un_normalized = re.sub(r",", "", un_normalized)
-    normalized = instance.normalized
-    normalized = re.sub(r"[^a-z ]", "", normalized)
-    return Instance(token_type=instance.token_type, un_normalized=un_normalized, normalized=normalized)
-
-
-def filter_letters_1(instance: Instance) -> bool:
-    ok = True
-    return ok
-
-
-def process_letters_1(instance: Instance) -> Instance:
-    un_normalized = instance.un_normalized
-    normalized = instance.normalized
-    normalized = re.sub(r"[^a-z ]", "", normalized)
-    return Instance(token_type=instance.token_type, un_normalized=un_normalized, normalized=normalized)
-
-
-def filter_verbatim_1(instance: Instance) -> bool:
-    ok = True
-    return ok
-
-
-def process_verbatim_1(instance: Instance) -> Instance:
-    un_normalized = instance.un_normalized
-    normalized = instance.normalized
-    return Instance(token_type=instance.token_type, un_normalized=un_normalized, normalized=normalized)
-
-
-def filter_digit_1(instance: Instance) -> bool:
-    ok = re.search(r"[0-9]", instance.un_normalized)
-    return ok
-
-
-def process_digit_1(instance: Instance) -> Instance:
-    un_normalized = instance.un_normalized
-    normalized = instance.normalized
-    normalized = re.sub(r"[^a-z ]", "", normalized)
-    return Instance(token_type=instance.token_type, un_normalized=un_normalized, normalized=normalized)
-
-
-def filter_telephone_1(instance: Instance) -> bool:
-    ok = re.search(r"[0-9]", instance.un_normalized)
-    return ok
-
-
-def process_telephone_1(instance: Instance) -> Instance:
-    un_normalized = instance.un_normalized
-    normalized = instance.normalized
-    normalized = re.sub(r"[^a-z ]", "", normalized)
-    return Instance(token_type=instance.token_type, un_normalized=un_normalized, normalized=normalized)
-
-
-def filter_electronic_1(instance: Instance) -> bool:
-    ok = re.search(r"[0-9]", instance.un_normalized)
-    return ok
-
-
-def process_electronic_1(instance: Instance) -> Instance:
-    un_normalized = instance.un_normalized
-    normalized = instance.normalized
-    normalized = re.sub(r"[^a-z ]", "", normalized)
-    return Instance(token_type=instance.token_type, un_normalized=un_normalized, normalized=normalized)
-
-
-def filter_fraction_1(instance: Instance) -> bool:
-    ok = re.search(r"[0-9]", instance.un_normalized)
-    return ok
-
-
-def process_fraction_1(instance: Instance) -> Instance:
-    un_normalized = instance.un_normalized
-    normalized = instance.normalized
-    normalized = re.sub(r"[^a-z ]", "", normalized)
-    return Instance(token_type=instance.token_type, un_normalized=un_normalized, normalized=normalized)
-
-
-def filter_address_1(instance: Instance) -> bool:
-    ok = True
-    return ok
-
-
-def process_address_1(instance: Instance) -> Instance:
-    un_normalized = instance.un_normalized
-    normalized = instance.normalized
-    normalized = re.sub(r"[^a-z ]", "", normalized)
-    return Instance(token_type=instance.token_type, un_normalized=un_normalized, normalized=normalized)
-
-
-filters = []
-filters.append(Filter(class_type="CARDINAL",
-               process_func=process_cardinal_1, filter_func=filter_cardinal_1))
-filters.append(Filter(class_type="ORDINAL",
-               process_func=process_ordinal_1, filter_func=filter_ordinal_1))
-filters.append(Filter(class_type="DECIMAL",
-               process_func=process_decimal_1, filter_func=filter_decimal_1))
-filters.append(Filter(class_type="MEASURE",
-               process_func=process_measure_1, filter_func=filter_measure_1))
-filters.append(Filter(class_type="MONEY",
-               process_func=process_money_1, filter_func=filter_money_1))
-filters.append(Filter(class_type="TIME",
-               process_func=process_time_1, filter_func=filter_time_1))
-
-filters.append(Filter(class_type="DATE",
-               process_func=process_date_1, filter_func=filter_date_1))
-filters.append(Filter(class_type="PLAIN",
-               process_func=process_plain_1, filter_func=filter_plain_1))
-filters.append(Filter(class_type="PUNCT",
-               process_func=process_punct_1, filter_func=filter_punct_1))
-filters.append(Filter(class_type="LETTERS",
-               process_func=process_letters_1, filter_func=filter_letters_1))
-filters.append(Filter(class_type="VERBATIM",
-               process_func=process_verbatim_1, filter_func=filter_verbatim_1))
-filters.append(Filter(class_type="DIGIT",
-               process_func=process_digit_1, filter_func=filter_digit_1))
-filters.append(Filter(class_type="TELEPHONE",
-               process_func=process_telephone_1, filter_func=filter_telephone_1))
-filters.append(Filter(class_type="ELECTRONIC",
-               process_func=process_electronic_1, filter_func=filter_electronic_1))
-filters.append(Filter(class_type="FRACTION",
-               process_func=process_fraction_1, filter_func=filter_fraction_1))
-filters.append(Filter(class_type="ADDRESS",
-               process_func=process_address_1, filter_func=filter_address_1))
-filters.append(Filter(class_type=EOS_TYPE,
-               process_func=lambda x: x, filter_func=lambda x: True))
-
-
-def filter_loaded_data(data: List[Instance], verbose: bool = False) -> List[Instance]:
-    """
-    Filters list of instances
-
-    Args:
-        data: list of instances
-
-    Returns: filtered and transformed list of instances
-    """
-    updates_instances = []
-    for instance in data:
-        updated_instance = False
-        for fil in filters:
-            if fil.class_type == instance.token_type and fil.filter(instance):
-                instance = fil.process(instance)
-                updated_instance = True
-        if updated_instance:
-            if verbose:
-                print(instance)
-            updates_instances.append(instance)
-    return updates_instances
-
-
-def parse_args():
-    parser = ArgumentParser()
-    parser.add_argument("--input", help="input file path",
-                        type=str, default='./en_with_types/output-00001-of-00100')
-    parser.add_argument(
-        "--verbose", help="print filtered instances", action='store_true')
-    return parser.parse_args()
-
-
-if __name__ == "__main__":
-    args = parse_args()
-    file_path = args.input
-
-    print("Loading training data: " + file_path)
-    instance_list = load_files([file_path])  # List of instances
-    filtered_instance_list = filter_loaded_data(instance_list, args.verbose)
-    training_data_to_sentences(filtered_instance_list)
diff --git a/nemo_text_processing/inverse_text_normalization/ko/data/numbers/zero.tsv b/nemo_text_processing/inverse_text_normalization/ko/data/numbers/zero.tsv
deleted file mode 100644
index 43baac7c1..000000000
--- a/nemo_text_processing/inverse_text_normalization/ko/data/numbers/zero.tsv
+++ /dev/null
@@ -1 +0,0 @@
-영  0
\ No newline at end of file
diff --git a/nemo_text_processing/inverse_text_normalization/ko/graph_utils.py b/nemo_text_processing/inverse_text_normalization/ko/graph_utils.py
index 7a9fd8720..50f1eb3b9 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/graph_utils.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/graph_utils.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.  All rights reserved.
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
 # Copyright 2015 and onwards Google, Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/nemo_text_processing/inverse_text_normalization/ko/taggers/__init__.py b/nemo_text_processing/inverse_text_normalization/ko/taggers/__init__.py
index f541211af..f6e3c3795 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/taggers/__init__.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/taggers/__init__.py
@@ -12,6 +12,3 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from nemo_text_processing.inverse_text_normalization.ko.taggers.tokenize_and_classify import ClassifyFst
-from nemo_text_processing.inverse_text_normalization.ko.verbalizers.verbalize import VerbalizeFst
-from nemo_text_processing.inverse_text_normalization.ko.verbalizers.verbalize_final import VerbalizeFinalFst
diff --git a/nemo_text_processing/inverse_text_normalization/ko/taggers/cardinal.py b/nemo_text_processing/inverse_text_normalization/ko/taggers/cardinal.py
index df5804fc0..7253019f0 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/taggers/cardinal.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/taggers/cardinal.py
@@ -31,13 +31,9 @@ class CardinalFst(GraphFst):
     def __init__(self):
         super().__init__(name="cardinal", kind="classify")
 
-        graph_zero = pynini.string_file(get_abs_path("data/numbers/zero.tsv"))
-        graph_digit = pynini.string_file(get_abs_path("data/numbers/digit.tsv"))
         graph_zero = pynini.cross("영", "0")
+        graph_digit = pynini.string_file(get_abs_path("data/numbers/digit.tsv"))
 
-        graph_negative = pynini.cross("마이너스", "-")
-        graph_negative += delete_space
-        
         ten = pynutil.delete("십")
         ten_alt = pynini.cross("십", "1")
         ### Responsible for second digit of two digit number. ex) 20's 2
diff --git a/nemo_text_processing/inverse_text_normalization/ko/taggers/tokenize_and_classify.py b/nemo_text_processing/inverse_text_normalization/ko/taggers/tokenize_and_classify.py
index 760ce6829..bb6b35d41 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/taggers/tokenize_and_classify.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/taggers/tokenize_and_classify.py
@@ -24,8 +24,6 @@
 from nemo_text_processing.inverse_text_normalization.ko.graph_utils import ( 
     INPUT_LOWER_CASED,
     GraphFst,
-    delete_extra_space,
-    delete_space,
     generator_main,
 )
 
diff --git a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/__init__.py b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/__init__.py
index da950f35e..341a77c5b 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/__init__.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/__init__.py
@@ -11,7 +11,3 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
-from nemo_text_processing.inverse_text_normalization.ko.taggers.tokenize_and_classify import ClassifyFst
-from nemo_text_processing.inverse_text_normalization.ko.verbalizers.verbalize import VerbalizeFst
-from nemo_text_processing.inverse_text_normalization.ko.verbalizers.verbalize_final import VerbalizeFinalFst
\ No newline at end of file
diff --git a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize_final.py b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize_final.py
index 8554fc161..8d40d2804 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize_final.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize_final.py
@@ -19,7 +19,6 @@
 from pynini.lib import pynutil
 
 from nemo_text_processing.inverse_text_normalization.ko.verbalizers.verbalize import VerbalizeFst
-from nemo_text_processing.inverse_text_normalization.ko.verbalizers.word import WordFst
 from nemo_text_processing.inverse_text_normalization.ko.graph_utils import GraphFst, generator_main, delete_space
 
 
diff --git a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/word.py b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/word.py
index d79957ca8..a423d5d0c 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/word.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/word.py
@@ -13,11 +13,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-
-import pynini
 from pynini.lib import pynutil
 
-from nemo_text_processing.inverse_text_normalization.ko.graph_utils import NEMO_NOT_QUOTE, GraphFst, delete_space
+from nemo_text_processing.inverse_text_normalization.ko.graph_utils import NEMO_NOT_QUOTE, GraphFst
 
 
 
diff --git a/tests/nemo_text_processing/ko/test_cardinal.py b/tests/nemo_text_processing/ko/test_cardinal.py
index 9fd366ea6..872a5aa2a 100644
--- a/tests/nemo_text_processing/ko/test_cardinal.py
+++ b/tests/nemo_text_processing/ko/test_cardinal.py
@@ -16,10 +16,8 @@
 from parameterized import parameterized
 
 from nemo_text_processing.inverse_text_normalization.inverse_normalize import InverseNormalizer
-from nemo_text_processing.text_normalization.normalize import Normalizer
-from nemo_text_processing.text_normalization.normalize_with_audio import NormalizerWithAudio
 
-from ..utils import CACHE_DIR, RUN_AUDIO_BASED_TESTS, parse_test_case_file
+from ..utils import CACHE_DIR, parse_test_case_file
 
 
 class TestCardinal:
@@ -30,10 +28,4 @@ class TestCardinal:
     @pytest.mark.unit
     def test_denorm(self, test_input, expected):
         pred = self.inverse_normalizer_ko.inverse_normalize(test_input, verbose=False)
-        assert pred == expected
-
-    normalizer_with_audio_ko = (
-        NormalizerWithAudio(lang='ko', cache_dir=CACHE_DIR, overwrite_cache=False)
-        if RUN_AUDIO_BASED_TESTS
-        else None
-    )
\ No newline at end of file
+        assert pred == expected
\ No newline at end of file

From 4df2965feae682f7762f3c6f292613339869a89b Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 16 May 2025 20:23:32 +0000
Subject: [PATCH 4/8] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 .../inverse_text_normalization/ko/taggers/__init__.py       | 1 -
 .../ko/taggers/tokenize_and_classify.py                     | 6 +-----
 .../ko/verbalizers/verbalize_final.py                       | 5 ++++-
 3 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/nemo_text_processing/inverse_text_normalization/ko/taggers/__init__.py b/nemo_text_processing/inverse_text_normalization/ko/taggers/__init__.py
index f6e3c3795..341a77c5b 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/taggers/__init__.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/taggers/__init__.py
@@ -11,4 +11,3 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
diff --git a/nemo_text_processing/inverse_text_normalization/ko/taggers/tokenize_and_classify.py b/nemo_text_processing/inverse_text_normalization/ko/taggers/tokenize_and_classify.py
index 30e0f5df4..75e3f6f20 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/taggers/tokenize_and_classify.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/taggers/tokenize_and_classify.py
@@ -19,11 +19,7 @@
 import pynini
 from pynini.lib import pynutil
 
-from nemo_text_processing.inverse_text_normalization.ko.graph_utils import (
-    INPUT_LOWER_CASED,
-    GraphFst,
-    generator_main,
-)
+from nemo_text_processing.inverse_text_normalization.ko.graph_utils import INPUT_LOWER_CASED, GraphFst, generator_main
 from nemo_text_processing.inverse_text_normalization.ko.taggers.cardinal import CardinalFst
 from nemo_text_processing.inverse_text_normalization.ko.taggers.word import WordFst
 
diff --git a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize_final.py b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize_final.py
index 648285758..09c917d00 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize_final.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize_final.py
@@ -20,10 +20,13 @@
 
 from nemo_text_processing.inverse_text_normalization.ko.graph_utils import GraphFst, delete_space, generator_main
 from nemo_text_processing.inverse_text_normalization.ko.verbalizers.verbalize import VerbalizeFst
+
 <<<<<<< HEAD
-from nemo_text_processing.inverse_text_normalization.ko.graph_utils import GraphFst, generator_main, delete_space
+from nemo_text_processing.inverse_text_normalization.ko.graph_utils import GraphFst, delete_space, generator_main
+
 =======
 from nemo_text_processing.inverse_text_normalization.ko.verbalizers.word import WordFst
+
 >>>>>>> 77da79d12b1378502cc2b382cd6933b02e7c2545
 
 

From 41ac59d791511cd82c03b242e8ec671c91360c6e Mon Sep 17 00:00:00 2001
From: hmlee245 <hmlee245@gmail.com>
Date: Fri, 16 May 2025 13:36:00 -0700
Subject: [PATCH 5/8] This reverts commit
 f893d89bd8890e1b46df1e40054cc9176ac7ce7a, reversing changes made to
 9f7e876841b518a5b4d3d5e68df760cb7126729c.

Signed-off-by: hmlee245 <hmlee245@gmail.com>
---
 .../inverse_normalize.py                      |  4 +-
 .../ko/taggers/cardinal.py                    | 42 ++++++-------------
 .../ko/taggers/tokenize_and_classify.py       | 12 ++++--
 .../ko/taggers/word.py                        |  3 +-
 .../inverse_text_normalization/ko/utils.py    |  3 ++
 .../ko/verbalizers/__init__.py                |  7 ----
 .../ko/verbalizers/cardinal.py                | 18 +++++---
 .../ko/verbalizers/verbalize.py               |  7 ++--
 .../ko/verbalizers/verbalize_final.py         | 11 +----
 .../ko/verbalizers/word.py                    |  1 +
 .../run_evaluate.py                           |  2 +-
 .../nemo_text_processing/ko/test_cardinal.py  | 10 +----
 .../pynini_export.py                          |  2 +-
 13 files changed, 50 insertions(+), 72 deletions(-)

diff --git a/nemo_text_processing/inverse_text_normalization/inverse_normalize.py b/nemo_text_processing/inverse_text_normalization/inverse_normalize.py
index acda8b7f9..e505a8ad0 100644
--- a/nemo_text_processing/inverse_text_normalization/inverse_normalize.py
+++ b/nemo_text_processing/inverse_text_normalization/inverse_normalize.py
@@ -135,7 +135,7 @@ def __init__(
             from nemo_text_processing.inverse_text_normalization.ko.taggers.tokenize_and_classify import ClassifyFst
             from nemo_text_processing.inverse_text_normalization.ko.verbalizers.verbalize_final import (
                 VerbalizeFinalFst,
-            )
+            )    
 
         self.tagger = ClassifyFst(
             cache_dir=cache_dir, whitelist=whitelist, overwrite_cache=overwrite_cache, input_case=input_case
@@ -180,7 +180,7 @@ def parse_args():
     parser.add_argument(
         "--language",
         help="language",
-        choices=['en', 'de', 'es', 'pt', 'ru', 'fr', 'sv', 'vi', 'ar', 'es_en', 'zh', 'hi', 'hy', 'mr', 'ja', 'ko'],
+        choices=['en', 'de', 'es', 'pt', 'ru', 'fr', 'sv', 'vi', 'ar', 'es_en', 'zh', 'hi', 'hy', 'mr', 'ja','ko'],
         default="en",
         type=str,
     )
diff --git a/nemo_text_processing/inverse_text_normalization/ko/taggers/cardinal.py b/nemo_text_processing/inverse_text_normalization/ko/taggers/cardinal.py
index f3fa597e3..7253019f0 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/taggers/cardinal.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/taggers/cardinal.py
@@ -19,7 +19,6 @@
 from nemo_text_processing.inverse_text_normalization.ko.graph_utils import NEMO_DIGIT, GraphFst, delete_space
 from nemo_text_processing.inverse_text_normalization.ko.utils import get_abs_path
 
-
 class CardinalFst(GraphFst):
     """
     Finite state transducer for classifying cardinals
@@ -35,19 +34,13 @@ def __init__(self):
         graph_zero = pynini.cross("영", "0")
         graph_digit = pynini.string_file(get_abs_path("data/numbers/digit.tsv"))
 
-<<<<<<< HEAD
-=======
-        graph_negative = pynini.cross("마이너스", "-")
-        graph_negative += delete_space
-
->>>>>>> 77da79d12b1378502cc2b382cd6933b02e7c2545
         ten = pynutil.delete("십")
         ten_alt = pynini.cross("십", "1")
         ### Responsible for second digit of two digit number. ex) 20's 2
         graph_ten_component = pynini.union((graph_digit + ten) | ten_alt, pynutil.insert("0"))
         ### Responsible for the first digit of number. ex) 1,2,3,4,5,,,
         graph_ten_component += graph_digit | pynutil.insert("0")
-
+        
         hundred = pynutil.delete("백")
         hundred_alt = pynini.cross("백", "1")
         graph_hundred_component = pynini.union(((graph_digit + hundred) | hundred_alt), pynutil.insert("0"))
@@ -62,36 +55,29 @@ def __init__(self):
         tenthousand_alt = pynini.cross("만", "1")
         ### "만" can express next four digits of numbers until the next unit "억", so insert "0000" to allocate four digit worth of space
         ### From "만", keep adding four digits and graph_thousand_component(0000-9999), because Korean units increase every four digits
-        graph_tenthousand_component = pynini.union(
-            ((graph_thousand_component + tenthousand) | tenthousand_alt), pynutil.insert("0000")
-        )
+        graph_tenthousand_component = pynini.union(((graph_thousand_component + tenthousand) | tenthousand_alt), pynutil.insert("0000"))
         graph_tenthousand_component += graph_thousand_component
 
         hundredmillion = pynutil.delete("억")
         hundredmillion_alt = pynini.cross("억", "1")
-        graph_hundredmillion_component = pynini.union(
-            ((graph_thousand_component + hundredmillion) | hundredmillion_alt), pynutil.insert("0000")
-        )
-        graph_hundredmillion_component += graph_tenthousand_component
-
+        graph_hundredmillion_component = pynini.union(((graph_thousand_component + hundredmillion) | hundredmillion_alt), pynutil.insert("0000"))
+        graph_hundredmillion_component +=  graph_tenthousand_component
+        
         trillion = pynutil.delete("조")
         trillion_alt = pynini.cross("조", "1")
-        graph_trillion_component = pynini.union(
-            ((graph_thousand_component + trillion) | trillion_alt), pynutil.insert("0000")
-        )
+        graph_trillion_component = pynini.union(((graph_thousand_component + trillion) | trillion_alt), pynutil.insert("0000"))
         graph_trillion_component += graph_hundredmillion_component
 
         tenquadrillion = pynutil.delete("경")
         tenquadrillion_alt = pynini.cross("경", "1")
-        graph_tenquadrillion_component = pynini.union(
-            ((graph_thousand_component + tenquadrillion) | tenquadrillion_alt), pynutil.insert("0000")
-        )
+        graph_tenquadrillion_component = pynini.union(((graph_thousand_component + tenquadrillion) | tenquadrillion_alt), pynutil.insert("0000"))
         graph_tenquadrillion_component += graph_trillion_component
 
+        
         graph = pynini.union(
             ### From biggest unit to smallest, everything is included
-            graph_tenquadrillion_component
-            | graph_zero
+            graph_tenquadrillion_component|
+            graph_zero
         )
 
         leading_zero = (
@@ -99,18 +85,16 @@ def __init__(self):
         )
         graph_nonzero = graph @ leading_zero
         graph = pynini.union(graph_nonzero, graph_zero)
-
+        
         graph = graph @ leading_zero | graph_zero
 
         self.just_cardinals = graph
 
-        optional_sign = pynini.closure(
-            (pynini.cross("마이너스", 'negative: "-"') | pynini.cross("-", 'negative: "-"')) + delete_space, 0, 1
-        )
+        optional_sign = pynini.closure((pynini.cross("마이너스", 'negative: "-"') | pynini.cross("-", 'negative: "-"')) + delete_space,0, 1)
 
         final_graph = (
             optional_sign + pynutil.insert(" ") + pynutil.insert("integer: \"") + graph + pynutil.insert("\"")
         ) | (pynutil.insert("integer: \"") + graph + pynutil.insert("\""))
 
         final_graph = self.add_tokens(final_graph)
-        self.fst = final_graph.optimize()
+        self.fst = final_graph.optimize()
\ No newline at end of file
diff --git a/nemo_text_processing/inverse_text_normalization/ko/taggers/tokenize_and_classify.py b/nemo_text_processing/inverse_text_normalization/ko/taggers/tokenize_and_classify.py
index 75e3f6f20..bb6b35d41 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/taggers/tokenize_and_classify.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/taggers/tokenize_and_classify.py
@@ -19,9 +19,13 @@
 import pynini
 from pynini.lib import pynutil
 
-from nemo_text_processing.inverse_text_normalization.ko.graph_utils import INPUT_LOWER_CASED, GraphFst, generator_main
 from nemo_text_processing.inverse_text_normalization.ko.taggers.cardinal import CardinalFst
 from nemo_text_processing.inverse_text_normalization.ko.taggers.word import WordFst
+from nemo_text_processing.inverse_text_normalization.ko.graph_utils import ( 
+    INPUT_LOWER_CASED,
+    GraphFst,
+    generator_main,
+)
 
 
 class ClassifyFst(GraphFst):
@@ -58,8 +62,8 @@ def __init__(
             cardinal = CardinalFst()
             cardinal_graph = cardinal.fst
             word_graph = WordFst().fst
-            classify = pynutil.add_weight(cardinal_graph, 1.1) | pynutil.add_weight(word_graph, 100)
-
+            classify = (pynutil.add_weight(cardinal_graph, 1.1)| pynutil.add_weight(word_graph, 100))
+           
             token = pynutil.insert("tokens { ") + classify + pynutil.insert(" } ")
             tagger = pynini.closure(token, 1)
 
@@ -67,4 +71,4 @@ def __init__(
 
             if far_file:
                 generator_main(far_file, {"tokenize_and_classify": self.fst})
-                logging.info(f"ClassifyFst grammars are saved to {far_file}.")
+                logging.info(f"ClassifyFst grammars are saved to {far_file}.")
\ No newline at end of file
diff --git a/nemo_text_processing/inverse_text_normalization/ko/taggers/word.py b/nemo_text_processing/inverse_text_normalization/ko/taggers/word.py
index 0e4dbb93c..0d6ccd5c5 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/taggers/word.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/taggers/word.py
@@ -27,5 +27,6 @@ class WordFst(GraphFst):
 
     def __init__(self):
         super().__init__(name="word", kind="classify")
-        word = pynutil.insert("name: \"") + pynini.closure(NEMO_NOT_SPACE, 1) + pynutil.insert("\"")
+        word = pynutil.insert(
+            "name: \"") + pynini.closure(NEMO_NOT_SPACE, 1) + pynutil.insert("\"")
         self.fst = word.optimize()
diff --git a/nemo_text_processing/inverse_text_normalization/ko/utils.py b/nemo_text_processing/inverse_text_normalization/ko/utils.py
index d198c3835..0222cc0b8 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/utils.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/utils.py
@@ -15,6 +15,9 @@
 import os
 
 
+
 def get_abs_path(rel_path):
 
     return os.path.dirname(os.path.abspath(__file__)) + '/' + rel_path
+
+
diff --git a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/__init__.py b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/__init__.py
index b8e634eef..341a77c5b 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/__init__.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/__init__.py
@@ -11,10 +11,3 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-<<<<<<< HEAD
-=======
-
-from nemo_text_processing.inverse_text_normalization.ko.taggers.tokenize_and_classify import ClassifyFst
-from nemo_text_processing.inverse_text_normalization.ko.verbalizers.verbalize import VerbalizeFst
-from nemo_text_processing.inverse_text_normalization.ko.verbalizers.verbalize_final import VerbalizeFinalFst
->>>>>>> 77da79d12b1378502cc2b382cd6933b02e7c2545
diff --git a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/cardinal.py b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/cardinal.py
index fb9a76d8e..1800a6dc8 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/cardinal.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/cardinal.py
@@ -15,7 +15,11 @@
 import pynini
 from pynini.lib import pynutil
 
-from nemo_text_processing.inverse_text_normalization.ko.graph_utils import NEMO_NOT_QUOTE, GraphFst, delete_space
+from nemo_text_processing.inverse_text_normalization.ko.graph_utils import (
+    NEMO_NOT_QUOTE,
+    GraphFst,
+    delete_space,
+)
 
 
 class CardinalFst(GraphFst):
@@ -30,17 +34,21 @@ def __init__(self):
             pynutil.delete("negative:")
             + delete_space
             + pynutil.delete("\"")
-            + pynini.accep("-")
+            + pynini.accep("-") 
             + pynutil.delete("\"")
         )
 
         optional_sign_output = pynini.closure(negative_sign + delete_space, 0, 1)
 
-        digits_from_tag = pynini.closure(NEMO_NOT_QUOTE, 1)
+        digits_from_tag = pynini.closure(NEMO_NOT_QUOTE, 1) 
         integer_cardinal = (
-            pynutil.delete("integer:") + delete_space + pynutil.delete("\"") + digits_from_tag + pynutil.delete("\"")
+            pynutil.delete("integer:")
+            + delete_space
+            + pynutil.delete("\"")
+            + digits_from_tag
+            + pynutil.delete("\"")
         )
 
         graph = integer_cardinal
         final_graph = optional_sign_output + graph
-        self.fst = self.delete_tokens(final_graph).optimize()
+        self.fst = self.delete_tokens(final_graph).optimize()
\ No newline at end of file
diff --git a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize.py b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize.py
index d8851e206..9d750d757 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize.py
@@ -13,9 +13,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from nemo_text_processing.inverse_text_normalization.ko.graph_utils import GraphFst
 from nemo_text_processing.inverse_text_normalization.ko.verbalizers.cardinal import CardinalFst
 from nemo_text_processing.inverse_text_normalization.ko.verbalizers.word import WordFst
+from nemo_text_processing.inverse_text_normalization.ko.graph_utils import GraphFst
 
 
 class VerbalizeFst(GraphFst):
@@ -30,6 +30,7 @@ def __init__(self):
         cardinal = CardinalFst()
         cardinal_graph = cardinal.fst
         word_graph = WordFst().fst
-
-        graph = cardinal_graph | word_graph
+        
+        graph = (cardinal_graph|word_graph)
         self.fst = graph
+        
diff --git a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize_final.py b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize_final.py
index 09c917d00..8d40d2804 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize_final.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize_final.py
@@ -18,16 +18,8 @@
 import pynini
 from pynini.lib import pynutil
 
-from nemo_text_processing.inverse_text_normalization.ko.graph_utils import GraphFst, delete_space, generator_main
 from nemo_text_processing.inverse_text_normalization.ko.verbalizers.verbalize import VerbalizeFst
-
-<<<<<<< HEAD
-from nemo_text_processing.inverse_text_normalization.ko.graph_utils import GraphFst, delete_space, generator_main
-
-=======
-from nemo_text_processing.inverse_text_normalization.ko.verbalizers.word import WordFst
-
->>>>>>> 77da79d12b1378502cc2b382cd6933b02e7c2545
+from nemo_text_processing.inverse_text_normalization.ko.graph_utils import GraphFst, generator_main, delete_space
 
 
 class VerbalizeFinalFst(GraphFst):
@@ -35,7 +27,6 @@ class VerbalizeFinalFst(GraphFst):
     Finite state transducer that verbalizes an entire sentence, e.g.
     tokens { name: "its" } tokens { time { hours: "12" minutes: "30" } } tokens { name: "now" } -> its 12:30 now
     """
-
     def __init__(self, deterministic: bool = True, cache_dir: str = None, overwrite_cache: bool = False):
         super().__init__(name="verbalize_final", kind="verbalize", deterministic=deterministic)
         far_file = None
diff --git a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/word.py b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/word.py
index ecf62bfe3..a423d5d0c 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/word.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/word.py
@@ -18,6 +18,7 @@
 from nemo_text_processing.inverse_text_normalization.ko.graph_utils import NEMO_NOT_QUOTE, GraphFst
 
 
+
 class WordFst(GraphFst):
     '''
     tokens { name: "一" } -> 一
diff --git a/nemo_text_processing/inverse_text_normalization/run_evaluate.py b/nemo_text_processing/inverse_text_normalization/run_evaluate.py
index 133474940..7bfdd3399 100644
--- a/nemo_text_processing/inverse_text_normalization/run_evaluate.py
+++ b/nemo_text_processing/inverse_text_normalization/run_evaluate.py
@@ -35,7 +35,7 @@ def parse_args():
     parser.add_argument(
         "--lang",
         help="language",
-        choices=["ar", "de", "en", "es", "es_en", "fr", "hi", "hy", "mr", "pt", "ru", "sv", "vi", "zh", "ja", "ko"],
+        choices=["ar", "de", "en", "es", "es_en", "fr", "hi", "hy", "mr", "pt", "ru", "sv", "vi", "zh", "ja","ko"],
         default="en",
         type=str,
     )
diff --git a/tests/nemo_text_processing/ko/test_cardinal.py b/tests/nemo_text_processing/ko/test_cardinal.py
index ff5950f2a..872a5aa2a 100644
--- a/tests/nemo_text_processing/ko/test_cardinal.py
+++ b/tests/nemo_text_processing/ko/test_cardinal.py
@@ -28,12 +28,4 @@ class TestCardinal:
     @pytest.mark.unit
     def test_denorm(self, test_input, expected):
         pred = self.inverse_normalizer_ko.inverse_normalize(test_input, verbose=False)
-<<<<<<< HEAD
-        assert pred == expected
-=======
-        assert pred == expected
-
-    normalizer_with_audio_ko = (
-        NormalizerWithAudio(lang='ko', cache_dir=CACHE_DIR, overwrite_cache=False) if RUN_AUDIO_BASED_TESTS else None
-    )
->>>>>>> 77da79d12b1378502cc2b382cd6933b02e7c2545
+        assert pred == expected
\ No newline at end of file
diff --git a/tools/text_processing_deployment/pynini_export.py b/tools/text_processing_deployment/pynini_export.py
index d1ba34a37..0df099774 100644
--- a/tools/text_processing_deployment/pynini_export.py
+++ b/tools/text_processing_deployment/pynini_export.py
@@ -106,7 +106,7 @@ def parse_args():
             'mr',
             'ja',
             'rw',
-            'ko',
+            'ko'
         ],
         type=str,
         default='en',

From a5164dc157fdfd6af8aeca449eb7875c80ba6aae Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 16 May 2025 20:55:36 +0000
Subject: [PATCH 6/8] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 .../inverse_normalize.py                      |  4 +--
 .../ko/taggers/cardinal.py                    | 36 ++++++++++++-------
 .../ko/taggers/tokenize_and_classify.py       | 12 +++----
 .../ko/taggers/word.py                        |  3 +-
 .../inverse_text_normalization/ko/utils.py    |  3 --
 .../ko/verbalizers/cardinal.py                | 18 +++-------
 .../ko/verbalizers/verbalize.py               |  7 ++--
 .../ko/verbalizers/verbalize_final.py         |  3 +-
 .../ko/verbalizers/word.py                    |  1 -
 .../run_evaluate.py                           |  2 +-
 .../nemo_text_processing/ko/test_cardinal.py  |  2 +-
 .../pynini_export.py                          |  2 +-
 12 files changed, 43 insertions(+), 50 deletions(-)

diff --git a/nemo_text_processing/inverse_text_normalization/inverse_normalize.py b/nemo_text_processing/inverse_text_normalization/inverse_normalize.py
index e505a8ad0..acda8b7f9 100644
--- a/nemo_text_processing/inverse_text_normalization/inverse_normalize.py
+++ b/nemo_text_processing/inverse_text_normalization/inverse_normalize.py
@@ -135,7 +135,7 @@ def __init__(
             from nemo_text_processing.inverse_text_normalization.ko.taggers.tokenize_and_classify import ClassifyFst
             from nemo_text_processing.inverse_text_normalization.ko.verbalizers.verbalize_final import (
                 VerbalizeFinalFst,
-            )    
+            )
 
         self.tagger = ClassifyFst(
             cache_dir=cache_dir, whitelist=whitelist, overwrite_cache=overwrite_cache, input_case=input_case
@@ -180,7 +180,7 @@ def parse_args():
     parser.add_argument(
         "--language",
         help="language",
-        choices=['en', 'de', 'es', 'pt', 'ru', 'fr', 'sv', 'vi', 'ar', 'es_en', 'zh', 'hi', 'hy', 'mr', 'ja','ko'],
+        choices=['en', 'de', 'es', 'pt', 'ru', 'fr', 'sv', 'vi', 'ar', 'es_en', 'zh', 'hi', 'hy', 'mr', 'ja', 'ko'],
         default="en",
         type=str,
     )
diff --git a/nemo_text_processing/inverse_text_normalization/ko/taggers/cardinal.py b/nemo_text_processing/inverse_text_normalization/ko/taggers/cardinal.py
index 7253019f0..14172b4e9 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/taggers/cardinal.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/taggers/cardinal.py
@@ -19,6 +19,7 @@
 from nemo_text_processing.inverse_text_normalization.ko.graph_utils import NEMO_DIGIT, GraphFst, delete_space
 from nemo_text_processing.inverse_text_normalization.ko.utils import get_abs_path
 
+
 class CardinalFst(GraphFst):
     """
     Finite state transducer for classifying cardinals
@@ -40,7 +41,7 @@ def __init__(self):
         graph_ten_component = pynini.union((graph_digit + ten) | ten_alt, pynutil.insert("0"))
         ### Responsible for the first digit of number. ex) 1,2,3,4,5,,,
         graph_ten_component += graph_digit | pynutil.insert("0")
-        
+
         hundred = pynutil.delete("백")
         hundred_alt = pynini.cross("백", "1")
         graph_hundred_component = pynini.union(((graph_digit + hundred) | hundred_alt), pynutil.insert("0"))
@@ -55,29 +56,36 @@ def __init__(self):
         tenthousand_alt = pynini.cross("만", "1")
         ### "만" can express next four digits of numbers until the next unit "억", so insert "0000" to allocate four digit worth of space
         ### From "만", keep adding four digits and graph_thousand_component(0000-9999), because Korean units increase every four digits
-        graph_tenthousand_component = pynini.union(((graph_thousand_component + tenthousand) | tenthousand_alt), pynutil.insert("0000"))
+        graph_tenthousand_component = pynini.union(
+            ((graph_thousand_component + tenthousand) | tenthousand_alt), pynutil.insert("0000")
+        )
         graph_tenthousand_component += graph_thousand_component
 
         hundredmillion = pynutil.delete("억")
         hundredmillion_alt = pynini.cross("억", "1")
-        graph_hundredmillion_component = pynini.union(((graph_thousand_component + hundredmillion) | hundredmillion_alt), pynutil.insert("0000"))
-        graph_hundredmillion_component +=  graph_tenthousand_component
-        
+        graph_hundredmillion_component = pynini.union(
+            ((graph_thousand_component + hundredmillion) | hundredmillion_alt), pynutil.insert("0000")
+        )
+        graph_hundredmillion_component += graph_tenthousand_component
+
         trillion = pynutil.delete("조")
         trillion_alt = pynini.cross("조", "1")
-        graph_trillion_component = pynini.union(((graph_thousand_component + trillion) | trillion_alt), pynutil.insert("0000"))
+        graph_trillion_component = pynini.union(
+            ((graph_thousand_component + trillion) | trillion_alt), pynutil.insert("0000")
+        )
         graph_trillion_component += graph_hundredmillion_component
 
         tenquadrillion = pynutil.delete("경")
         tenquadrillion_alt = pynini.cross("경", "1")
-        graph_tenquadrillion_component = pynini.union(((graph_thousand_component + tenquadrillion) | tenquadrillion_alt), pynutil.insert("0000"))
+        graph_tenquadrillion_component = pynini.union(
+            ((graph_thousand_component + tenquadrillion) | tenquadrillion_alt), pynutil.insert("0000")
+        )
         graph_tenquadrillion_component += graph_trillion_component
 
-        
         graph = pynini.union(
             ### From biggest unit to smallest, everything is included
-            graph_tenquadrillion_component|
-            graph_zero
+            graph_tenquadrillion_component
+            | graph_zero
         )
 
         leading_zero = (
@@ -85,16 +93,18 @@ def __init__(self):
         )
         graph_nonzero = graph @ leading_zero
         graph = pynini.union(graph_nonzero, graph_zero)
-        
+
         graph = graph @ leading_zero | graph_zero
 
         self.just_cardinals = graph
 
-        optional_sign = pynini.closure((pynini.cross("마이너스", 'negative: "-"') | pynini.cross("-", 'negative: "-"')) + delete_space,0, 1)
+        optional_sign = pynini.closure(
+            (pynini.cross("마이너스", 'negative: "-"') | pynini.cross("-", 'negative: "-"')) + delete_space, 0, 1
+        )
 
         final_graph = (
             optional_sign + pynutil.insert(" ") + pynutil.insert("integer: \"") + graph + pynutil.insert("\"")
         ) | (pynutil.insert("integer: \"") + graph + pynutil.insert("\""))
 
         final_graph = self.add_tokens(final_graph)
-        self.fst = final_graph.optimize()
\ No newline at end of file
+        self.fst = final_graph.optimize()
diff --git a/nemo_text_processing/inverse_text_normalization/ko/taggers/tokenize_and_classify.py b/nemo_text_processing/inverse_text_normalization/ko/taggers/tokenize_and_classify.py
index bb6b35d41..75e3f6f20 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/taggers/tokenize_and_classify.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/taggers/tokenize_and_classify.py
@@ -19,13 +19,9 @@
 import pynini
 from pynini.lib import pynutil
 
+from nemo_text_processing.inverse_text_normalization.ko.graph_utils import INPUT_LOWER_CASED, GraphFst, generator_main
 from nemo_text_processing.inverse_text_normalization.ko.taggers.cardinal import CardinalFst
 from nemo_text_processing.inverse_text_normalization.ko.taggers.word import WordFst
-from nemo_text_processing.inverse_text_normalization.ko.graph_utils import ( 
-    INPUT_LOWER_CASED,
-    GraphFst,
-    generator_main,
-)
 
 
 class ClassifyFst(GraphFst):
@@ -62,8 +58,8 @@ def __init__(
             cardinal = CardinalFst()
             cardinal_graph = cardinal.fst
             word_graph = WordFst().fst
-            classify = (pynutil.add_weight(cardinal_graph, 1.1)| pynutil.add_weight(word_graph, 100))
-           
+            classify = pynutil.add_weight(cardinal_graph, 1.1) | pynutil.add_weight(word_graph, 100)
+
             token = pynutil.insert("tokens { ") + classify + pynutil.insert(" } ")
             tagger = pynini.closure(token, 1)
 
@@ -71,4 +67,4 @@ def __init__(
 
             if far_file:
                 generator_main(far_file, {"tokenize_and_classify": self.fst})
-                logging.info(f"ClassifyFst grammars are saved to {far_file}.")
\ No newline at end of file
+                logging.info(f"ClassifyFst grammars are saved to {far_file}.")
diff --git a/nemo_text_processing/inverse_text_normalization/ko/taggers/word.py b/nemo_text_processing/inverse_text_normalization/ko/taggers/word.py
index 0d6ccd5c5..0e4dbb93c 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/taggers/word.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/taggers/word.py
@@ -27,6 +27,5 @@ class WordFst(GraphFst):
 
     def __init__(self):
         super().__init__(name="word", kind="classify")
-        word = pynutil.insert(
-            "name: \"") + pynini.closure(NEMO_NOT_SPACE, 1) + pynutil.insert("\"")
+        word = pynutil.insert("name: \"") + pynini.closure(NEMO_NOT_SPACE, 1) + pynutil.insert("\"")
         self.fst = word.optimize()
diff --git a/nemo_text_processing/inverse_text_normalization/ko/utils.py b/nemo_text_processing/inverse_text_normalization/ko/utils.py
index 0222cc0b8..d198c3835 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/utils.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/utils.py
@@ -15,9 +15,6 @@
 import os
 
 
-
 def get_abs_path(rel_path):
 
     return os.path.dirname(os.path.abspath(__file__)) + '/' + rel_path
-
-
diff --git a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/cardinal.py b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/cardinal.py
index 1800a6dc8..fb9a76d8e 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/cardinal.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/cardinal.py
@@ -15,11 +15,7 @@
 import pynini
 from pynini.lib import pynutil
 
-from nemo_text_processing.inverse_text_normalization.ko.graph_utils import (
-    NEMO_NOT_QUOTE,
-    GraphFst,
-    delete_space,
-)
+from nemo_text_processing.inverse_text_normalization.ko.graph_utils import NEMO_NOT_QUOTE, GraphFst, delete_space
 
 
 class CardinalFst(GraphFst):
@@ -34,21 +30,17 @@ def __init__(self):
             pynutil.delete("negative:")
             + delete_space
             + pynutil.delete("\"")
-            + pynini.accep("-") 
+            + pynini.accep("-")
             + pynutil.delete("\"")
         )
 
         optional_sign_output = pynini.closure(negative_sign + delete_space, 0, 1)
 
-        digits_from_tag = pynini.closure(NEMO_NOT_QUOTE, 1) 
+        digits_from_tag = pynini.closure(NEMO_NOT_QUOTE, 1)
         integer_cardinal = (
-            pynutil.delete("integer:")
-            + delete_space
-            + pynutil.delete("\"")
-            + digits_from_tag
-            + pynutil.delete("\"")
+            pynutil.delete("integer:") + delete_space + pynutil.delete("\"") + digits_from_tag + pynutil.delete("\"")
         )
 
         graph = integer_cardinal
         final_graph = optional_sign_output + graph
-        self.fst = self.delete_tokens(final_graph).optimize()
\ No newline at end of file
+        self.fst = self.delete_tokens(final_graph).optimize()
diff --git a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize.py b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize.py
index 9d750d757..d8851e206 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize.py
@@ -13,9 +13,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from nemo_text_processing.inverse_text_normalization.ko.graph_utils import GraphFst
 from nemo_text_processing.inverse_text_normalization.ko.verbalizers.cardinal import CardinalFst
 from nemo_text_processing.inverse_text_normalization.ko.verbalizers.word import WordFst
-from nemo_text_processing.inverse_text_normalization.ko.graph_utils import GraphFst
 
 
 class VerbalizeFst(GraphFst):
@@ -30,7 +30,6 @@ def __init__(self):
         cardinal = CardinalFst()
         cardinal_graph = cardinal.fst
         word_graph = WordFst().fst
-        
-        graph = (cardinal_graph|word_graph)
+
+        graph = cardinal_graph | word_graph
         self.fst = graph
-        
diff --git a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize_final.py b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize_final.py
index 8d40d2804..17f547740 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize_final.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize_final.py
@@ -18,8 +18,8 @@
 import pynini
 from pynini.lib import pynutil
 
+from nemo_text_processing.inverse_text_normalization.ko.graph_utils import GraphFst, delete_space, generator_main
 from nemo_text_processing.inverse_text_normalization.ko.verbalizers.verbalize import VerbalizeFst
-from nemo_text_processing.inverse_text_normalization.ko.graph_utils import GraphFst, generator_main, delete_space
 
 
 class VerbalizeFinalFst(GraphFst):
@@ -27,6 +27,7 @@ class VerbalizeFinalFst(GraphFst):
     Finite state transducer that verbalizes an entire sentence, e.g.
     tokens { name: "its" } tokens { time { hours: "12" minutes: "30" } } tokens { name: "now" } -> its 12:30 now
     """
+
     def __init__(self, deterministic: bool = True, cache_dir: str = None, overwrite_cache: bool = False):
         super().__init__(name="verbalize_final", kind="verbalize", deterministic=deterministic)
         far_file = None
diff --git a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/word.py b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/word.py
index a423d5d0c..ecf62bfe3 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/word.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/word.py
@@ -18,7 +18,6 @@
 from nemo_text_processing.inverse_text_normalization.ko.graph_utils import NEMO_NOT_QUOTE, GraphFst
 
 
-
 class WordFst(GraphFst):
     '''
     tokens { name: "一" } -> 一
diff --git a/nemo_text_processing/inverse_text_normalization/run_evaluate.py b/nemo_text_processing/inverse_text_normalization/run_evaluate.py
index 7bfdd3399..133474940 100644
--- a/nemo_text_processing/inverse_text_normalization/run_evaluate.py
+++ b/nemo_text_processing/inverse_text_normalization/run_evaluate.py
@@ -35,7 +35,7 @@ def parse_args():
     parser.add_argument(
         "--lang",
         help="language",
-        choices=["ar", "de", "en", "es", "es_en", "fr", "hi", "hy", "mr", "pt", "ru", "sv", "vi", "zh", "ja","ko"],
+        choices=["ar", "de", "en", "es", "es_en", "fr", "hi", "hy", "mr", "pt", "ru", "sv", "vi", "zh", "ja", "ko"],
         default="en",
         type=str,
     )
diff --git a/tests/nemo_text_processing/ko/test_cardinal.py b/tests/nemo_text_processing/ko/test_cardinal.py
index 872a5aa2a..f95d74107 100644
--- a/tests/nemo_text_processing/ko/test_cardinal.py
+++ b/tests/nemo_text_processing/ko/test_cardinal.py
@@ -28,4 +28,4 @@ class TestCardinal:
     @pytest.mark.unit
     def test_denorm(self, test_input, expected):
         pred = self.inverse_normalizer_ko.inverse_normalize(test_input, verbose=False)
-        assert pred == expected
\ No newline at end of file
+        assert pred == expected
diff --git a/tools/text_processing_deployment/pynini_export.py b/tools/text_processing_deployment/pynini_export.py
index 0df099774..d1ba34a37 100644
--- a/tools/text_processing_deployment/pynini_export.py
+++ b/tools/text_processing_deployment/pynini_export.py
@@ -106,7 +106,7 @@ def parse_args():
             'mr',
             'ja',
             'rw',
-            'ko'
+            'ko',
         ],
         type=str,
         default='en',

From 7842d1324e32a40bd522b99eba726f962dafc742 Mon Sep 17 00:00:00 2001
From: hmlee245 <hmlee245@gmail.com>
Date: Fri, 23 May 2025 16:31:36 -0700
Subject: [PATCH 7/8] third draft of korean ITN work. Mainly fixing minor
 issues and adding test cases

Signed-off-by: hmlee245 <hmlee245@gmail.com>
---
 Jenkinsfile                                   | 22 +++++++++++++++++++
 .../ko/data/numbers/thousands.tsv             | 11 ----------
 .../ko/data/numbers/zero.tsv                  |  1 +
 .../ko/taggers/cardinal.py                    |  8 +++----
 .../test_cases_cardinal.txt                   | 12 +++++++++-
 5 files changed, 37 insertions(+), 17 deletions(-)
 delete mode 100644 nemo_text_processing/inverse_text_normalization/ko/data/numbers/thousands.tsv
 create mode 100644 nemo_text_processing/inverse_text_normalization/ko/data/numbers/zero.tsv

diff --git a/Jenkinsfile b/Jenkinsfile
index c94c107c6..32375f28f 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -28,6 +28,7 @@ pipeline {
     MR_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/03-12-24-1'
     JA_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/10-17-24-1'
     HI_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/04-03-25-1'
+    KO_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/05-21-25-0'
     DEFAULT_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/06-08-23-0'
   }
   stages {
@@ -318,6 +319,22 @@ pipeline {
         }
       }
     }
+    stage('L0: Create KO ITN Grammars') {
+      when {
+        anyOf {
+          branch 'main'
+          changeRequest target: 'main'
+        }
+      }   
+      failFast true
+      parallel {
+        stage('L0: KO ITN grammars') {
+          steps {
+            sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/inverse_text_normalization/inverse_normalize.py --lang=ko --text="100" --cache_dir ${KO_TN_CACHE}'
+          }
+        }
+      }
+    }    
 
 
 // L1 Tests starts here
@@ -406,6 +423,11 @@ pipeline {
             sh 'CUDA_VISIBLE_DEVICES="" pytest tests/nemo_text_processing/hy/ -m "not pleasefixme" --cpu --tn_cache_dir ${HY_TN_CACHE}'
           }
         }
+        stage('L1: Run all KO TN/ITN tests (restore grammars from cache)') {
+          steps {
+            sh 'CUDA_VISIBLE_DEVICES="" pytest tests/nemo_text_processing/ko/ -m "not pleasefixme" --cpu --tn_cache_dir ${KO_TN_CACHE}'
+          }
+        }        
       }
     }
 
diff --git a/nemo_text_processing/inverse_text_normalization/ko/data/numbers/thousands.tsv b/nemo_text_processing/inverse_text_normalization/ko/data/numbers/thousands.tsv
deleted file mode 100644
index 541752211..000000000
--- a/nemo_text_processing/inverse_text_normalization/ko/data/numbers/thousands.tsv
+++ /dev/null
@@ -1,11 +0,0 @@
-억
-조
-경
-해
-자
-양
-구
-간
-정
-재
-극
\ No newline at end of file
diff --git a/nemo_text_processing/inverse_text_normalization/ko/data/numbers/zero.tsv b/nemo_text_processing/inverse_text_normalization/ko/data/numbers/zero.tsv
new file mode 100644
index 000000000..cbf967001
--- /dev/null
+++ b/nemo_text_processing/inverse_text_normalization/ko/data/numbers/zero.tsv
@@ -0,0 +1 @@
+영	0
\ No newline at end of file
diff --git a/nemo_text_processing/inverse_text_normalization/ko/taggers/cardinal.py b/nemo_text_processing/inverse_text_normalization/ko/taggers/cardinal.py
index 7253019f0..a1cf1012f 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/taggers/cardinal.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/taggers/cardinal.py
@@ -31,7 +31,7 @@ class CardinalFst(GraphFst):
     def __init__(self):
         super().__init__(name="cardinal", kind="classify")
 
-        graph_zero = pynini.cross("영", "0")
+        graph_zero = pynini.string_file(get_abs_path("data/numbers/zero.tsv"))
         graph_digit = pynini.string_file(get_abs_path("data/numbers/digit.tsv"))
 
         ten = pynutil.delete("십")
@@ -85,15 +85,13 @@ def __init__(self):
         )
         graph_nonzero = graph @ leading_zero
         graph = pynini.union(graph_nonzero, graph_zero)
-        
-        graph = graph @ leading_zero | graph_zero
 
         self.just_cardinals = graph
 
-        optional_sign = pynini.closure((pynini.cross("마이너스", 'negative: "-"') | pynini.cross("-", 'negative: "-"')) + delete_space,0, 1)
+        negative_sign = pynini.closure((pynini.cross("마이너스", 'negative: "-"') | pynini.cross("-", 'negative: "-"')) + delete_space,0, 1)
 
         final_graph = (
-            optional_sign + pynutil.insert(" ") + pynutil.insert("integer: \"") + graph + pynutil.insert("\"")
+            negative_sign + pynutil.insert(" ") + pynutil.insert("integer: \"") + graph + pynutil.insert("\"")
         ) | (pynutil.insert("integer: \"") + graph + pynutil.insert("\""))
 
         final_graph = self.add_tokens(final_graph)
diff --git a/tests/nemo_text_processing/ko/data_inverse_text_normalization/test_cases_cardinal.txt b/tests/nemo_text_processing/ko/data_inverse_text_normalization/test_cases_cardinal.txt
index 007273e5e..4f64116e5 100644
--- a/tests/nemo_text_processing/ko/data_inverse_text_normalization/test_cases_cardinal.txt
+++ b/tests/nemo_text_processing/ko/data_inverse_text_normalization/test_cases_cardinal.txt
@@ -24,4 +24,14 @@
 백오십억칠천만~15070000000
 오천억~500000000000
 일조~1000000000000
-이조오천억~2500000000000
\ No newline at end of file
+이조오천억~2500000000000
+영영영~000
+영영백이십삼~00123
+만천~11000
+만천백십일~11111
+경~10000000000000000
+마이너스일~-1
+마이너스 일~-1
+- 일~-1
+마이너스일억사천이백칠십구만구천팔십이~-142799082
+마이너스 칠백삼십오~-735
\ No newline at end of file

From ff52238330b0fe3f0974a3a883127c7eeba6624d Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 27 May 2025 22:53:51 +0000
Subject: [PATCH 8/8] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 .../inverse_text_normalization/ko/taggers/cardinal.py         | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/nemo_text_processing/inverse_text_normalization/ko/taggers/cardinal.py b/nemo_text_processing/inverse_text_normalization/ko/taggers/cardinal.py
index 1c78f6000..13d6271df 100644
--- a/nemo_text_processing/inverse_text_normalization/ko/taggers/cardinal.py
+++ b/nemo_text_processing/inverse_text_normalization/ko/taggers/cardinal.py
@@ -96,7 +96,9 @@ def __init__(self):
 
         self.just_cardinals = graph
 
-        negative_sign = pynini.closure((pynini.cross("마이너스", 'negative: "-"') | pynini.cross("-", 'negative: "-"')) + delete_space,0, 1)
+        negative_sign = pynini.closure(
+            (pynini.cross("마이너스", 'negative: "-"') | pynini.cross("-", 'negative: "-"')) + delete_space, 0, 1
+        )
 
         final_graph = (
             negative_sign + pynutil.insert(" ") + pynutil.insert("integer: \"") + graph + pynutil.insert("\"")