@@ -28,26 +28,31 @@ def __init__(self, deterministic: bool = True):
28
28
graph_digit = pynini .string_file (get_abs_path ("data/number/digit.tsv" ))
29
29
30
30
digit_except_one = pynini .difference (NEMO_DIGIT , "1" )
31
- digit_except_zero_one = pynini .difference (digit_except_one , "0" )
31
+ digit_except_zero_one = pynini .difference (digit_except_one , "0" ) << < << << HEAD
32
32
33
33
graph_digit_alt = digit_except_zero_one @ graph_digit
34
34
graph_ty = pynini .string_file (get_abs_path ("data/number/ty.tsv" ))
35
35
graph_teen = pynini .string_file (get_abs_path ("data/number/teen.tsv" ))
36
+ == == == =
37
+
38
+ graph_digit_no_zero_one = digit_except_zero_one @ graph_digit
39
+ graph_ty = pynini .string_file (get_abs_path ("data/number/ty.tsv" ))
40
+ >> >> >> > 68 b18fa8 (Refactor Korean TN cardinal and postprocessing logic based on review feedback )
36
41
37
42
# Compose all basic number forms
38
- graph_all = (graph_ty + (graph_digit | pynutil .delete ('0' ))) | graph_teen | graph_digit
43
+ graph_1_to_99 = (graph_ty + (graph_digit | pynutil .delete ('0' ))) | graph_digit
39
44
40
45
hundreds = NEMO_DIGIT ** 3
41
- graph_hundred_component = (pynini .cross ('1' , '백' ) | (graph_digit_alt + pynutil .insert ('백' ))) + pynini .union (
42
- pynini .closure (pynutil .delete ('0' )), (pynini .closure (pynutil .delete ('0' )) + graph_all )
46
+ graph_hundred_component = (pynini .cross ('1' , '백' ) | (graph_digit_no_zero_one + pynutil .insert ('백' ))) + pynini .union (
47
+ pynini .closure (pynutil .delete ('0' )), (pynini .closure (pynutil .delete ('0' )) + graph_1_to_99 )
43
48
)
44
49
graph_hundred = hundreds @ graph_hundred_component
45
50
46
51
thousands = NEMO_DIGIT ** 4
47
- graph_thousand_component = (pynini .cross ('1' , '천' ) | (graph_digit_alt + pynutil .insert ('천' ))) + pynini .union (
52
+ graph_thousand_component = (pynini .cross ('1' , '천' ) | (graph_digit_no_zero_one + pynutil .insert ('천' ))) + pynini .union (
48
53
pynini .closure (pynutil .delete ('0' )),
49
54
graph_hundred_component ,
50
- (pynini .closure (pynutil .delete ('0' )) + graph_all ),
55
+ (pynini .closure (pynutil .delete ('0' )) + graph_1_to_99 ),
51
56
)
52
57
graph_thousand = thousands @ graph_thousand_component
53
58
@@ -56,36 +61,44 @@ def __init__(self, deterministic: bool = True):
56
61
pynini .closure (pynutil .delete ('0' )),
57
62
graph_thousand_component ,
58
63
(pynutil .delete ('0' ) + graph_hundred_component ),
59
- (pynini .closure (pynutil .delete ('0' )) + graph_all ),
64
+ (pynini .closure (pynutil .delete ('0' )) + graph_1_to_99 ),
60
65
)
61
66
graph_ten_thousand = ten_thousands @ graph_ten_thousand_component
62
67
63
68
hundred_thousands = NEMO_DIGIT ** 6
69
+ < << << << HEAD
64
70
graph_hundred_thousand_component = ((NEMO_DIGIT ** 2 @ graph_all ) + pynutil .insert ('만' )) + pynini .union (
71
+ == == == =
72
+ graph_hundred_thousand_component = ((NEMO_DIGIT ** 2 @ graph_1_to_99 ) + pynutil .insert ('만' )) + pynini .union (
73
+ >> >> >> > 68 b18fa8 (Refactor Korean TN cardinal and postprocessing logic based on review feedback )
65
74
pynini .closure (pynutil .delete ('0' )),
66
75
graph_thousand_component ,
67
76
(pynutil .delete ('0' ) + graph_hundred_component ),
68
- (pynini .closure (pynutil .delete ('0' )) + graph_all ),
77
+ (pynini .closure (pynutil .delete ('0' )) + graph_1_to_99 ),
69
78
)
70
79
graph_hundred_thousand = hundred_thousands @ graph_hundred_thousand_component
71
80
72
81
millions = NEMO_DIGIT ** 7
73
- graph_million_component = ((NEMO_DIGIT ** 3 @ graph_hundred_component ) + pynutil .insert ('만' )) + pynini .union (
82
+ graph_million_component = ((graph_hundred ) + pynutil .insert ('만' )) + pynini .union (
74
83
pynini .closure (pynutil .delete ('0' )),
75
84
graph_thousand_component ,
76
85
(pynutil .delete ('0' ) + graph_hundred_component ),
77
- (pynini .closure (pynutil .delete ('0' )) + graph_all ),
86
+ (pynini .closure (pynutil .delete ('0' )) + graph_1_to_99 ),
78
87
)
79
88
graph_million = millions @ graph_million_component
80
89
81
90
ten_millions = NEMO_DIGIT ** 8
91
+ << < << << HEAD
82
92
graph_ten_million_component = (
83
93
(NEMO_DIGIT ** 4 @ graph_thousand_component ) + pynutil .insert ('만' )
84
94
) + pynini .union (
95
+ == == == =
96
+ graph_ten_million_component = ((graph_thousand ) + pynutil .insert ('만' )) + pynini .union (
97
+ >> >> >> > 68 b18fa8 (Refactor Korean TN cardinal and postprocessing logic based on review feedback )
85
98
pynini .closure (pynutil .delete ('0' )),
86
99
graph_thousand_component ,
87
100
(pynutil .delete ('0' ) + graph_hundred_component ),
88
- (pynini .closure (pynutil .delete ('0' )) + graph_all ),
101
+ (pynini .closure (pynutil .delete ('0' )) + graph_1_to_99 ),
89
102
)
90
103
graph_ten_million = ten_millions @ graph_ten_million_component
91
104
@@ -98,48 +111,52 @@ def __init__(self, deterministic: bool = True):
98
111
(pynutil .delete ('000' ) + graph_ten_thousand_component ),
99
112
(pynutil .delete ('0000' ) + graph_thousand_component ),
100
113
((pynutil .delete ('00000' ) + graph_hundred_component )),
101
- (pynini .closure (pynutil .delete ('0' )) + graph_all ),
114
+ (pynini .closure (pynutil .delete ('0' )) + graph_1_to_99 ),
102
115
)
103
116
graph_hundred_million = hundred_millions @ graph_hundred_million_component
104
117
105
118
thousand_millions = NEMO_DIGIT ** 10
106
- graph_thousand_million_component = ((NEMO_DIGIT ** 2 @ graph_all ) + pynutil .insert ('억' )) + pynini .union (
119
+ graph_thousand_million_component = ((NEMO_DIGIT ** 2 @ graph_1_to_99 ) + pynutil .insert ('억' )) + pynini .union (
107
120
pynini .closure (pynutil .delete ('0' )),
108
121
graph_ten_million_component ,
109
122
(pynutil .delete ('0' ) + graph_million_component ),
110
123
(pynutil .delete ('00' ) + graph_hundred_thousand_component ),
111
124
(pynutil .delete ('000' ) + graph_ten_thousand_component ),
112
125
(pynutil .delete ('0000' ) + graph_thousand_component ),
113
126
((pynutil .delete ('00000' ) + graph_hundred_component )),
114
- (pynini .closure (pynutil .delete ('0' )) + graph_all ),
127
+ (pynini .closure (pynutil .delete ('0' )) + graph_1_to_99 ),
115
128
)
116
129
graph_thousand_million = thousand_millions @ graph_thousand_million_component
117
130
118
131
billions = NEMO_DIGIT ** 11
119
- graph_billions_component = ((NEMO_DIGIT ** 3 @ graph_hundred_component ) + pynutil .insert ('억' )) + pynini .union (
132
+ graph_billions_component = ((graph_hundred ) + pynutil .insert ('억' )) + pynini .union (
120
133
pynini .closure (pynutil .delete ('0' )),
121
134
graph_ten_million_component ,
122
135
(pynutil .delete ('0' ) + graph_million_component ),
123
136
(pynutil .delete ('00' ) + graph_hundred_thousand_component ),
124
137
(pynutil .delete ('000' ) + graph_ten_thousand_component ),
125
138
(pynutil .delete ('0000' ) + graph_thousand_component ),
126
139
((pynutil .delete ('00000' ) + graph_hundred_component )),
127
- (pynini .closure (pynutil .delete ('0' )) + graph_all ),
140
+ (pynini .closure (pynutil .delete ('0' )) + graph_1_to_99 ),
128
141
)
129
142
graph_billions = billions @ graph_billions_component
130
143
131
144
ten_billions = NEMO_DIGIT ** 12
145
+ << < << << HEAD
132
146
graph_ten_billions_component = (
133
147
(NEMO_DIGIT ** 4 @ graph_thousand_component ) + pynutil .insert ('억' )
134
148
) + pynini .union (
149
+ == == == =
150
+ graph_ten_billions_component = ((graph_thousand ) + pynutil .insert ('억' )) + pynini .union (
151
+ >> >> >> > 68 b18fa8 (Refactor Korean TN cardinal and postprocessing logic based on review feedback )
135
152
pynini .closure (pynutil .delete ('0' )),
136
153
graph_ten_million_component ,
137
154
(pynutil .delete ('0' ) + graph_million_component ),
138
155
(pynutil .delete ('00' ) + graph_hundred_thousand_component ),
139
156
(pynutil .delete ('000' ) + graph_ten_thousand_component ),
140
157
(pynutil .delete ('0000' ) + graph_thousand_component ),
141
158
((pynutil .delete ('00000' ) + graph_hundred_component )),
142
- (pynini .closure (pynutil .delete ('0' )) + graph_all ),
159
+ (pynini .closure (pynutil .delete ('0' )) + graph_1_to_99 ),
143
160
)
144
161
graph_ten_billions = ten_billions @ graph_ten_billions_component
145
162
@@ -156,11 +173,12 @@ def __init__(self, deterministic: bool = True):
156
173
pynutil .delete ('0000000' ) + graph_ten_thousand_component ,
157
174
pynutil .delete ('00000000' ) + graph_thousand_component ,
158
175
pynutil .delete ('000000000' ) + graph_hundred_component ,
159
- (pynini .closure (pynutil .delete ('0' )) + graph_all ),
176
+ (pynini .closure (pynutil .delete ('0' )) + graph_1_to_99 ),
160
177
)
161
178
graph_hundred_billions = hundred_billions @ graph_hundred_billions_component
162
179
163
180
trillion = NEMO_DIGIT ** 14
181
+ << < << << HEAD
164
182
graph_trillion_component = (
165
183
(NEMO_DIGIT ** 2 @ graph_all )
166
184
+ pynutil .insert ('조' )
@@ -177,11 +195,27 @@ def __init__(self, deterministic: bool = True):
177
195
pynutil .delete ('00000000' ) + graph_thousand_component ,
178
196
pynutil .delete ('000000000' ) + graph_hundred_component ,
179
197
(pynini .closure (pynutil .delete ('0' )) + graph_all ),
198
+ == == == =
199
+ graph_trillion_component = ((NEMO_DIGIT ** 2 @ graph_1_to_99 ) + pynutil .insert ('조' ) + pynini .union (
200
+ pynini .closure (pynutil .delete ('0' )),
201
+ graph_ten_billions_component ,
202
+ pynutil .delete ('0' ) + graph_billions_component ,
203
+ pynutil .delete ('00' ) + graph_thousand_million_component ,
204
+ pynutil .delete ('000' ) + graph_hundred_million_component ,
205
+ pynutil .delete ('0000' ) + graph_ten_million_component ,
206
+ pynutil .delete ('00000' ) + graph_million_component ,
207
+ pynutil .delete ('000000' ) + graph_hundred_thousand_component ,
208
+ pynutil .delete ('0000000' ) + graph_ten_thousand_component ,
209
+ pynutil .delete ('00000000' ) + graph_thousand_component ,
210
+ pynutil .delete ('000000000' ) + graph_hundred_component ,
211
+ (pynini .closure (pynutil .delete ('0' )) + graph_1_to_99 )
212
+ >> > >> >> 68 b18fa8 (Refactor Korean TN cardinal and postprocessing logic based on review feedback )
180
213
)
181
214
)
182
215
graph_trillions = trillion @ graph_trillion_component
183
216
184
217
ten_trillions = NEMO_DIGIT ** 15
218
+ << < << << HEAD
185
219
graph_ten_trillions_component = (
186
220
(NEMO_DIGIT ** 3 @ graph_hundred_component )
187
221
+ pynutil .insert ('조' )
@@ -199,10 +233,27 @@ def __init__(self, deterministic: bool = True):
199
233
pynutil .delete ('000000000' ) + graph_hundred_component ,
200
234
(pynini .closure (pynutil .delete ('0' )) + graph_all ),
201
235
)
236
+ == == == =
237
+ graph_ten_trillions_component = ((graph_hundred ) + pynutil .insert ('조' ) + pynini .union (
238
+ pynini .closure (pynutil .delete ('0' )),
239
+ graph_ten_billions_component ,
240
+ pynutil .delete ('0' ) + graph_billions_component ,
241
+ pynutil .delete ('00' ) + graph_thousand_million_component ,
242
+ pynutil .delete ('000' ) + graph_hundred_million_component ,
243
+ pynutil .delete ('0000' ) + graph_ten_million_component ,
244
+ pynutil .delete ('00000' ) + graph_million_component ,
245
+ pynutil .delete ('000000' ) + graph_hundred_thousand_component ,
246
+ pynutil .delete ('0000000' ) + graph_ten_thousand_component ,
247
+ pynutil .delete ('00000000' ) + graph_thousand_component ,
248
+ pynutil .delete ('000000000' ) + graph_hundred_component ,
249
+ (pynini .closure (pynutil .delete ('0' )) + graph_1_to_99 )
250
+ )
251
+ >> > >> >> 68 b18fa8 (Refactor Korean TN cardinal and postprocessing logic based on review feedback )
202
252
)
203
253
graph_ten_trillions = ten_trillions @ graph_ten_trillions_component
204
254
205
255
hundred_trillions = NEMO_DIGIT ** 16
256
+ << < << << HEAD
206
257
graph_hundred_trillions_component = (
207
258
(NEMO_DIGIT ** 4 @ graph_thousand_component )
208
259
+ pynutil .insert ('조' )
@@ -219,11 +270,27 @@ def __init__(self, deterministic: bool = True):
219
270
pynutil .delete ('00000000' ) + graph_thousand_component ,
220
271
pynutil .delete ('000000000' ) + graph_hundred_component ,
221
272
(pynini .closure (pynutil .delete ('0' )) + graph_all ),
273
+ == == == =
274
+ graph_hundred_trillions_component = ((graph_thousand ) + pynutil .insert ('조' ) + pynini .union (
275
+ pynini .closure (pynutil .delete ('0' )),
276
+ graph_ten_billions_component ,
277
+ pynutil .delete ('0' ) + graph_billions_component ,
278
+ pynutil .delete ('00' ) + graph_thousand_million_component ,
279
+ pynutil .delete ('000' ) + graph_hundred_million_component ,
280
+ pynutil .delete ('0000' ) + graph_ten_million_component ,
281
+ pynutil .delete ('00000' ) + graph_million_component ,
282
+ pynutil .delete ('000000' ) + graph_hundred_thousand_component ,
283
+ pynutil .delete ('0000000' ) + graph_ten_thousand_component ,
284
+ pynutil .delete ('00000000' ) + graph_thousand_component ,
285
+ pynutil .delete ('000000000' ) + graph_hundred_component ,
286
+ (pynini .closure (pynutil .delete ('0' )) + graph_1_to_99 )
287
+ >> > >> >> 68 b18fa8 (Refactor Korean TN cardinal and postprocessing logic based on review feedback )
222
288
)
223
289
)
224
290
graph_hundred_trillions = hundred_trillions @ graph_hundred_trillions_component
225
291
226
292
thousand_trillions = NEMO_DIGIT ** 17
293
+ << < << << HEAD
227
294
graph_thousand_trillions_component = (
228
295
graph_digit
229
296
+ pynutil .insert ('경' )
@@ -244,6 +311,25 @@ def __init__(self, deterministic: bool = True):
244
311
pynutil .delete ('000000000000' ) + graph_thousand_component ,
245
312
pynutil .delete ('0000000000000' ) + graph_hundred_component ,
246
313
(pynini .closure (pynutil .delete ('0' )) + graph_all ),
314
+ == == == =
315
+ graph_thousand_trillions_component = (graph_digit + pynutil .insert ('경' ) + pynini .union (
316
+ pynini .closure (pynutil .delete ('0' )),
317
+ graph_hundred_trillions_component ,
318
+ pynutil .delete ('0' ) + graph_ten_trillions_component ,
319
+ pynutil .delete ('00' ) + graph_trillion_component ,
320
+ pynutil .delete ('000' ) + graph_hundred_billions_component ,
321
+ pynutil .delete ('0000' ) + graph_ten_billions_component ,
322
+ pynutil .delete ('00000' ) + graph_billions_component ,
323
+ pynutil .delete ('000000' ) + graph_thousand_million_component ,
324
+ pynutil .delete ('0000000' ) + graph_hundred_million_component ,
325
+ pynutil .delete ('00000000' ) + graph_ten_million_component ,
326
+ pynutil .delete ('000000000' ) + graph_million_component ,
327
+ pynutil .delete ('0000000000' ) + graph_hundred_thousand_component ,
328
+ pynutil .delete ('00000000000' ) + graph_ten_thousand_component ,
329
+ pynutil .delete ('000000000000' ) + graph_thousand_component ,
330
+ pynutil .delete ('0000000000000' ) + graph_hundred_component ,
331
+ (pynini .closure (pynutil .delete ('0' )) + graph_1_to_99 )
332
+ >> > >> >> 68 b18fa8 (Refactor Korean TN cardinal and postprocessing logic based on review feedback )
247
333
)
248
334
)
249
335
graph_thousand_trillions = thousand_trillions @ graph_thousand_trillions_component
@@ -265,7 +351,7 @@ def __init__(self, deterministic: bool = True):
265
351
graph_ten_thousand ,
266
352
graph_thousand ,
267
353
graph_hundred ,
268
- graph_all ,
354
+ graph_1_to_99 ,
269
355
graph_zero ,
270
356
).optimize ()
271
357
0 commit comments