@@ -24,15 +24,15 @@ class CardinalFst(GraphFst):
24
24
def __init__ (self , deterministic : bool = True ):
25
25
super ().__init__ (name = "cardinal" , kind = "classify" , deterministic = deterministic )
26
26
# Load base .tsv files
27
- graph_zero = pynini .string_file (get_abs_path ("data/number/zero.tsv" ))
28
- graph_digit = pynini .string_file (get_abs_path ("data/number/digit.tsv" ))
29
-
27
+ graph_zero = pynini .string_file (get_abs_path ("data/number/zero.tsv" ))
28
+ graph_digit = pynini .string_file (get_abs_path ("data/number/digit.tsv" ))
29
+
30
30
digit_except_one = pynini .difference (NEMO_DIGIT , "1" )
31
31
digit_except_zero_one = pynini .difference (digit_except_one , "0" )
32
-
32
+
33
33
graph_digit_alt = digit_except_zero_one @ graph_digit
34
34
graph_ty = pynini .string_file (get_abs_path ("data/number/ty.tsv" ))
35
- graph_teen = pynini .string_file (get_abs_path ("data/number/teen.tsv" ))
35
+ graph_teen = pynini .string_file (get_abs_path ("data/number/teen.tsv" ))
36
36
37
37
# Compose all basic number forms
38
38
graph_all = (graph_ty + (graph_digit | pynutil .delete ('0' ))) | graph_teen | graph_digit
@@ -50,7 +50,7 @@ def __init__(self, deterministic: bool = True):
50
50
(pynini .closure (pynutil .delete ('0' )) + graph_all ),
51
51
)
52
52
graph_thousand = thousands @ graph_thousand_component
53
-
53
+
54
54
ten_thousands = NEMO_DIGIT ** 5
55
55
graph_ten_thousand_component = (pynini .cross ('1' , '만' ) | (graph_digit + pynutil .insert ('만' ))) + pynini .union (
56
56
pynini .closure (pynutil .delete ('0' )),
@@ -59,16 +59,16 @@ def __init__(self, deterministic: bool = True):
59
59
(pynini .closure (pynutil .delete ('0' )) + graph_all ),
60
60
)
61
61
graph_ten_thousand = ten_thousands @ graph_ten_thousand_component
62
-
62
+
63
63
hundred_thousands = NEMO_DIGIT ** 6
64
- graph_hundred_thousand_component = ((NEMO_DIGIT ** 2 @ graph_all ) + pynutil .insert ('만' )) + pynini .union (
64
+ graph_hundred_thousand_component = ((NEMO_DIGIT ** 2 @ graph_all ) + pynutil .insert ('만' )) + pynini .union (
65
65
pynini .closure (pynutil .delete ('0' )),
66
66
graph_thousand_component ,
67
67
(pynutil .delete ('0' ) + graph_hundred_component ),
68
68
(pynini .closure (pynutil .delete ('0' )) + graph_all ),
69
69
)
70
70
graph_hundred_thousand = hundred_thousands @ graph_hundred_thousand_component
71
-
71
+
72
72
millions = NEMO_DIGIT ** 7
73
73
graph_million_component = ((NEMO_DIGIT ** 3 @ graph_hundred_component ) + pynutil .insert ('만' )) + pynini .union (
74
74
pynini .closure (pynutil .delete ('0' )),
@@ -79,15 +79,17 @@ def __init__(self, deterministic: bool = True):
79
79
graph_million = millions @ graph_million_component
80
80
81
81
ten_millions = NEMO_DIGIT ** 8
82
- graph_ten_million_component = ((NEMO_DIGIT ** 4 @ graph_thousand_component ) + pynutil .insert ('만' )) + pynini .union (
82
+ graph_ten_million_component = (
83
+ (NEMO_DIGIT ** 4 @ graph_thousand_component ) + pynutil .insert ('만' )
84
+ ) + pynini .union (
83
85
pynini .closure (pynutil .delete ('0' )),
84
86
graph_thousand_component ,
85
87
(pynutil .delete ('0' ) + graph_hundred_component ),
86
88
(pynini .closure (pynutil .delete ('0' )) + graph_all ),
87
89
)
88
90
graph_ten_million = ten_millions @ graph_ten_million_component
89
-
90
- hundred_millions = NEMO_DIGIT ** 9
91
+
92
+ hundred_millions = NEMO_DIGIT ** 9
91
93
graph_hundred_million_component = (graph_digit + pynutil .insert ('억' )) + pynini .union (
92
94
pynini .closure (pynutil .delete ('0' )),
93
95
graph_ten_million_component ,
@@ -127,7 +129,9 @@ def __init__(self, deterministic: bool = True):
127
129
graph_billions = billions @ graph_billions_component
128
130
129
131
ten_billions = NEMO_DIGIT ** 12
130
- graph_ten_billions_component = ((NEMO_DIGIT ** 4 @ graph_thousand_component ) + pynutil .insert ('억' )) + pynini .union (
132
+ graph_ten_billions_component = (
133
+ (NEMO_DIGIT ** 4 @ graph_thousand_component ) + pynutil .insert ('억' )
134
+ ) + pynini .union (
131
135
pynini .closure (pynutil .delete ('0' )),
132
136
graph_ten_million_component ,
133
137
(pynutil .delete ('0' ) + graph_million_component ),
@@ -138,7 +142,7 @@ def __init__(self, deterministic: bool = True):
138
142
(pynini .closure (pynutil .delete ('0' )) + graph_all ),
139
143
)
140
144
graph_ten_billions = ten_billions @ graph_ten_billions_component
141
-
145
+
142
146
hundred_billions = NEMO_DIGIT ** 13
143
147
graph_hundred_billions_component = (graph_digit + pynutil .insert ('조' )) + pynini .union (
144
148
pynini .closure (pynutil .delete ('0' )),
@@ -155,79 +159,91 @@ def __init__(self, deterministic: bool = True):
155
159
(pynini .closure (pynutil .delete ('0' )) + graph_all ),
156
160
)
157
161
graph_hundred_billions = hundred_billions @ graph_hundred_billions_component
158
-
162
+
159
163
trillion = NEMO_DIGIT ** 14
160
- graph_trillion_component = ((NEMO_DIGIT ** 2 @ graph_all ) + pynutil .insert ('조' ) + pynini .union (
161
- pynini .closure (pynutil .delete ('0' )),
162
- graph_ten_billions_component ,
163
- pynutil .delete ('0' ) + graph_billions_component ,
164
- pynutil .delete ('00' ) + graph_thousand_million_component ,
165
- pynutil .delete ('000' ) + graph_hundred_million_component ,
166
- pynutil .delete ('0000' ) + graph_ten_million_component ,
167
- pynutil .delete ('00000' ) + graph_million_component ,
168
- pynutil .delete ('000000' ) + graph_hundred_thousand_component ,
169
- pynutil .delete ('0000000' ) + graph_ten_thousand_component ,
170
- pynutil .delete ('00000000' ) + graph_thousand_component ,
171
- pynutil .delete ('000000000' ) + graph_hundred_component ,
172
- (pynini .closure (pynutil .delete ('0' )) + graph_all )
164
+ graph_trillion_component = (
165
+ (NEMO_DIGIT ** 2 @ graph_all )
166
+ + pynutil .insert ('조' )
167
+ + pynini .union (
168
+ pynini .closure (pynutil .delete ('0' )),
169
+ graph_ten_billions_component ,
170
+ pynutil .delete ('0' ) + graph_billions_component ,
171
+ pynutil .delete ('00' ) + graph_thousand_million_component ,
172
+ pynutil .delete ('000' ) + graph_hundred_million_component ,
173
+ pynutil .delete ('0000' ) + graph_ten_million_component ,
174
+ pynutil .delete ('00000' ) + graph_million_component ,
175
+ pynutil .delete ('000000' ) + graph_hundred_thousand_component ,
176
+ pynutil .delete ('0000000' ) + graph_ten_thousand_component ,
177
+ pynutil .delete ('00000000' ) + graph_thousand_component ,
178
+ pynutil .delete ('000000000' ) + graph_hundred_component ,
179
+ (pynini .closure (pynutil .delete ('0' )) + graph_all ),
173
180
)
174
181
)
175
182
graph_trillions = trillion @ graph_trillion_component
176
183
177
184
ten_trillions = NEMO_DIGIT ** 15
178
- graph_ten_trillions_component = ((NEMO_DIGIT ** 3 @ graph_hundred_component ) + pynutil .insert ('조' ) + pynini .union (
179
- pynini .closure (pynutil .delete ('0' )),
180
- graph_ten_billions_component ,
181
- pynutil .delete ('0' ) + graph_billions_component ,
182
- pynutil .delete ('00' ) + graph_thousand_million_component ,
183
- pynutil .delete ('000' ) + graph_hundred_million_component ,
184
- pynutil .delete ('0000' ) + graph_ten_million_component ,
185
- pynutil .delete ('00000' ) + graph_million_component ,
186
- pynutil .delete ('000000' ) + graph_hundred_thousand_component ,
187
- pynutil .delete ('0000000' ) + graph_ten_thousand_component ,
188
- pynutil .delete ('00000000' ) + graph_thousand_component ,
189
- pynutil .delete ('000000000' ) + graph_hundred_component ,
190
- (pynini .closure (pynutil .delete ('0' )) + graph_all )
191
- )
185
+ graph_ten_trillions_component = (
186
+ (NEMO_DIGIT ** 3 @ graph_hundred_component )
187
+ + pynutil .insert ('조' )
188
+ + pynini .union (
189
+ pynini .closure (pynutil .delete ('0' )),
190
+ graph_ten_billions_component ,
191
+ pynutil .delete ('0' ) + graph_billions_component ,
192
+ pynutil .delete ('00' ) + graph_thousand_million_component ,
193
+ pynutil .delete ('000' ) + graph_hundred_million_component ,
194
+ pynutil .delete ('0000' ) + graph_ten_million_component ,
195
+ pynutil .delete ('00000' ) + graph_million_component ,
196
+ pynutil .delete ('000000' ) + graph_hundred_thousand_component ,
197
+ pynutil .delete ('0000000' ) + graph_ten_thousand_component ,
198
+ pynutil .delete ('00000000' ) + graph_thousand_component ,
199
+ pynutil .delete ('000000000' ) + graph_hundred_component ,
200
+ (pynini .closure (pynutil .delete ('0' )) + graph_all ),
201
+ )
192
202
)
193
203
graph_ten_trillions = ten_trillions @ graph_ten_trillions_component
194
204
195
205
hundred_trillions = NEMO_DIGIT ** 16
196
- graph_hundred_trillions_component = ((NEMO_DIGIT ** 4 @ graph_thousand_component ) + pynutil .insert ('조' ) + pynini .union (
197
- pynini .closure (pynutil .delete ('0' )),
198
- graph_ten_billions_component ,
199
- pynutil .delete ('0' ) + graph_billions_component ,
200
- pynutil .delete ('00' ) + graph_thousand_million_component ,
201
- pynutil .delete ('000' ) + graph_hundred_million_component ,
202
- pynutil .delete ('0000' ) + graph_ten_million_component ,
203
- pynutil .delete ('00000' ) + graph_million_component ,
204
- pynutil .delete ('000000' ) + graph_hundred_thousand_component ,
205
- pynutil .delete ('0000000' ) + graph_ten_thousand_component ,
206
- pynutil .delete ('00000000' ) + graph_thousand_component ,
207
- pynutil .delete ('000000000' ) + graph_hundred_component ,
208
- (pynini .closure (pynutil .delete ('0' )) + graph_all )
206
+ graph_hundred_trillions_component = (
207
+ (NEMO_DIGIT ** 4 @ graph_thousand_component )
208
+ + pynutil .insert ('조' )
209
+ + pynini .union (
210
+ pynini .closure (pynutil .delete ('0' )),
211
+ graph_ten_billions_component ,
212
+ pynutil .delete ('0' ) + graph_billions_component ,
213
+ pynutil .delete ('00' ) + graph_thousand_million_component ,
214
+ pynutil .delete ('000' ) + graph_hundred_million_component ,
215
+ pynutil .delete ('0000' ) + graph_ten_million_component ,
216
+ pynutil .delete ('00000' ) + graph_million_component ,
217
+ pynutil .delete ('000000' ) + graph_hundred_thousand_component ,
218
+ pynutil .delete ('0000000' ) + graph_ten_thousand_component ,
219
+ pynutil .delete ('00000000' ) + graph_thousand_component ,
220
+ pynutil .delete ('000000000' ) + graph_hundred_component ,
221
+ (pynini .closure (pynutil .delete ('0' )) + graph_all ),
209
222
)
210
223
)
211
224
graph_hundred_trillions = hundred_trillions @ graph_hundred_trillions_component
212
225
213
226
thousand_trillions = NEMO_DIGIT ** 17
214
- graph_thousand_trillions_component = (graph_digit + pynutil .insert ('경' ) + pynini .union (
215
- pynini .closure (pynutil .delete ('0' )),
216
- graph_hundred_trillions_component ,
217
- pynutil .delete ('0' ) + graph_ten_trillions_component ,
218
- pynutil .delete ('00' ) + graph_trillion_component ,
219
- pynutil .delete ('000' ) + graph_hundred_billions_component ,
220
- pynutil .delete ('0000' ) + graph_ten_billions_component ,
221
- pynutil .delete ('00000' ) + graph_billions_component ,
222
- pynutil .delete ('000000' ) + graph_thousand_million_component ,
223
- pynutil .delete ('0000000' ) + graph_hundred_million_component ,
224
- pynutil .delete ('00000000' ) + graph_ten_million_component ,
225
- pynutil .delete ('000000000' ) + graph_million_component ,
226
- pynutil .delete ('0000000000' ) + graph_hundred_thousand_component ,
227
- pynutil .delete ('00000000000' ) + graph_ten_thousand_component ,
228
- pynutil .delete ('000000000000' ) + graph_thousand_component ,
229
- pynutil .delete ('0000000000000' ) + graph_hundred_component ,
230
- (pynini .closure (pynutil .delete ('0' )) + graph_all )
227
+ graph_thousand_trillions_component = (
228
+ graph_digit
229
+ + pynutil .insert ('경' )
230
+ + pynini .union (
231
+ pynini .closure (pynutil .delete ('0' )),
232
+ graph_hundred_trillions_component ,
233
+ pynutil .delete ('0' ) + graph_ten_trillions_component ,
234
+ pynutil .delete ('00' ) + graph_trillion_component ,
235
+ pynutil .delete ('000' ) + graph_hundred_billions_component ,
236
+ pynutil .delete ('0000' ) + graph_ten_billions_component ,
237
+ pynutil .delete ('00000' ) + graph_billions_component ,
238
+ pynutil .delete ('000000' ) + graph_thousand_million_component ,
239
+ pynutil .delete ('0000000' ) + graph_hundred_million_component ,
240
+ pynutil .delete ('00000000' ) + graph_ten_million_component ,
241
+ pynutil .delete ('000000000' ) + graph_million_component ,
242
+ pynutil .delete ('0000000000' ) + graph_hundred_thousand_component ,
243
+ pynutil .delete ('00000000000' ) + graph_ten_thousand_component ,
244
+ pynutil .delete ('000000000000' ) + graph_thousand_component ,
245
+ pynutil .delete ('0000000000000' ) + graph_hundred_component ,
246
+ (pynini .closure (pynutil .delete ('0' )) + graph_all ),
231
247
)
232
248
)
233
249
graph_thousand_trillions = thousand_trillions @ graph_thousand_trillions_component
@@ -254,14 +270,7 @@ def __init__(self, deterministic: bool = True):
254
270
).optimize ()
255
271
256
272
# Sign and final formatting
257
- optional_sign = pynini .closure (
258
- pynutil .insert ('negative: "true" ' ) + pynini .cross ("-" , "" ), 0 , 1
259
- )
260
- final_graph = (
261
- optional_sign
262
- + pynutil .insert ('integer: "' )
263
- + graph_num
264
- + pynutil .insert ('"' )
265
- )
273
+ optional_sign = pynini .closure (pynutil .insert ('negative: "true" ' ) + pynini .cross ("-" , "" ), 0 , 1 )
274
+ final_graph = optional_sign + pynutil .insert ('integer: "' ) + graph_num + pynutil .insert ('"' )
266
275
final_graph = self .add_tokens (final_graph )
267
276
self .fst = final_graph .optimize ()
0 commit comments