Skip to content

Commit 7d3cad8

Browse files
Update script to download latest or a specific version.
1 parent d6efe94 commit 7d3cad8

File tree

1 file changed

+30
-10
lines changed

1 file changed

+30
-10
lines changed

Diff for: tools/unicode_properties_parse/download_unicode_data_files.py

+30-10
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,41 @@
11
# Copyright (c) Microsoft Corporation.
22
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
33

4+
from pathlib import PurePosixPath
5+
import sys
46
from urllib.request import urlretrieve
57

68

7-
Unicode_data_files = {
8-
"DerivedCoreProperties.txt": "https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt",
9-
"DerivedGeneralCategory.txt": "https://www.unicode.org/Public/UCD/latest/ucd/extracted/DerivedGeneralCategory.txt",
10-
"EastAsianWidth.txt": "https://www.unicode.org/Public/UCD/latest/ucd/EastAsianWidth.txt",
11-
"GraphemeBreakProperty.txt": "https://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakProperty.txt",
12-
"GraphemeBreakTest.txt": "https://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakTest.txt",
13-
"emoji-data.txt": "https://www.unicode.org/Public/UCD/latest/ucd/emoji/emoji-data.txt",
14-
}
9+
def get_base_url():
10+
if len(sys.argv) != 2:
11+
sys.exit(f"Usage: python {sys.argv[0]} [latest|<VERSION LIKE 15.0.0>]")
12+
13+
version = sys.argv[1]
14+
15+
if version == "latest":
16+
return "https://unicode.org/Public/UCD/latest/"
17+
18+
return f"https://unicode.org/Public/{version}/"
19+
20+
21+
Unicode_data_files = [
22+
"ucd/DerivedCoreProperties.txt",
23+
"ucd/extracted/DerivedGeneralCategory.txt",
24+
"ucd/EastAsianWidth.txt",
25+
"ucd/auxiliary/GraphemeBreakProperty.txt",
26+
"ucd/auxiliary/GraphemeBreakTest.txt",
27+
"ucd/emoji/emoji-data.txt",
28+
]
29+
1530

1631
def download_unicode_data_files():
17-
for filename, url in Unicode_data_files.items():
18-
print(f"downloading {filename} from {url}")
32+
base_url = get_base_url()
33+
print(f" Base URL: {base_url}")
34+
35+
for data_file in Unicode_data_files:
36+
url = base_url + data_file
37+
filename = PurePosixPath(data_file).name
38+
print(f"Downloading: {url}")
1939
urlretrieve(url, filename)
2040

2141

0 commit comments

Comments
 (0)