@@ -254,29 +254,29 @@ def test_split_without_origin_tracking() -> None:
254
254
255
255
def test_html_escapes_with_origin_tracking () -> None :
256
256
test_string = "xyazáwab"
257
- tokens = list (tokenizer .generate_rough_tokens (test_string , replace_html_escapes = True ))
257
+ tokens = list (tokenizer .generate_raw_tokens (test_string , replace_html_escapes = True ))
258
258
assert len (tokens ) == 1
259
259
assert tokens [0 ] == Tok (kind = TOK .RAW , txt = "xyazáwab" , val = None , original = test_string , origin_spans = [0 , 1 , 2 , 8 , 9 , 17 , 18 , 23 ])
260
260
261
261
262
262
def test_unicode_escapes_with_origin_tracking () -> None :
263
263
test_string = "xya" + ACCENT + "zu" + ACCENT + "wo" + UMLAUT + "b"
264
- tokens = list (tokenizer .generate_rough_tokens (test_string , replace_composite_glyphs = True ))
264
+ tokens = list (tokenizer .generate_raw_tokens (test_string , replace_composite_glyphs = True ))
265
265
assert len (tokens ) == 1
266
266
assert tokens [0 ] == Tok (kind = TOK .RAW , txt = "xyázúwöb" , val = None , original = test_string , origin_spans = [0 , 1 , 2 , 4 , 5 , 7 , 8 , 10 ])
267
267
268
268
269
269
def test_unicode_escapes_that_are_removed () -> None :
270
270
test_string = "a\xad b\xad c"
271
- tokens = list (tokenizer .generate_rough_tokens (test_string , replace_composite_glyphs = True ))
271
+ tokens = list (tokenizer .generate_raw_tokens (test_string , replace_composite_glyphs = True ))
272
272
assert len (tokens ) == 1
273
273
assert tokens [0 ] == Tok (kind = TOK .RAW , txt = "abc" , val = None , original = test_string , origin_spans = [0 , 2 , 4 ])
274
274
275
275
276
276
def test_html_unicode_mix () -> None :
277
277
test_string = "xya" + ACCENT + "zu" + ACCENT + "wáo" + UMLAUT + "b"
278
278
# 012 3 45 6 7890123456 7 8
279
- tokens = list (tokenizer .generate_rough_tokens (test_string , replace_composite_glyphs = True , replace_html_escapes = True ))
279
+ tokens = list (tokenizer .generate_raw_tokens (test_string , replace_composite_glyphs = True , replace_html_escapes = True ))
280
280
assert len (tokens ) == 1
281
281
assert tokens [0 ] == Tok (kind = TOK .RAW , txt = "xyázúwáöb" , val = None , original = test_string , origin_spans = [0 , 1 , 2 , 4 , 5 , 7 , 8 , 16 , 18 ])
282
282
0 commit comments