-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathtranslate_localization.py
263 lines (195 loc) · 9.63 KB
/
translate_localization.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
#!/usr/bin/env python3
#
# Marius Montebaur
#
# October 2023
#
import os
import glob
import json
import argparse
from dataclasses import dataclass
from typing import Dict, List
from chat_gpt_interface import ChatGPT
from common import get_app_context, get_openapi_token, add_common_args, user_approved_overwrite_warning
def get_task_desc(source_lang: str, target_lang: str) -> str:
desc = f"""
I want you to translate some text from {source_lang} to {target_lang}.
This text will be used to offer an iOS app in different languages.
The input given to you will consist of three lines for each phrase that needs to be translated.
First, the phrase in {source_lang}.
Second, a comment that describes in which context the phrase is occurring in the application's UI. Make sure that the translation you provide fits this context.
Third, a line starting with "translation: " in which you should add your translation.
Please return only the lines starting with "translation: " with your added translation after the colon.
Do not include the comments in the translations, those are only to add context.
"\\n" represent escaped newlines in the original string. Please keep the line breaks like this.
"""
return desc.replace(" ", "")
def escape_char_while_parsing_localizable_strings(string: str) -> str:
return string.replace('\n', "\\n")
def unescape_string_while_parsing_response(string: str) -> str:
return string.replace("\\n", '\n')
class Translatable:
"""
Represents a translatable string in the Localizable.xcstrings. This class is
used to build a query for ChatGPT and to parse the response for this
specific translatable string.
"""
def __init__(self, key, info_dict):
self.escaped_key = escape_char_while_parsing_localizable_strings(key)
self.info_dict = info_dict
def is_translated_to(self, language: str):
if "localizations" in self.info_dict:
l10ns = self.info_dict["localizations"]
return language in l10ns.keys()
return False
def get_gpt_query(self) -> str:
query = f"key: {self.escaped_key}\n"
comment = escape_char_while_parsing_localizable_strings(self.info_dict["comment"]) \
if "comment" in self.info_dict else "No comment provided."
query += f"comment: {comment}\n"
query += "translation: \n"
return query
def parse_gpt_response(self, gpt_response: str, for_language: str) -> bool:
try:
translation = gpt_response
if not translation.startswith("translation: "):
return False
translation = translation[len("translation: "):]
localizations_dict_update = {
for_language: {
"stringUnit": {
"state": "translated",
"value": unescape_string_while_parsing_response(translation)
}
}
}
if "localizations" in self.info_dict:
self.info_dict["localizations"].update(localizations_dict_update)
else:
self.info_dict["localizations"] = localizations_dict_update
return True
except:
return False
@dataclass
class TranslateL10nConfig:
target_language: str
localizable_path: str
openai_api_cooldown: int
output_path: str
log_path: str
update_existing: bool
def _parse_args():
parser = argparse.ArgumentParser(description="Augments a Localizable.xcstrings file with translations for the given language. The Localizable.xcstrings file itself must be generated by Xcode. The translation of strings will take the comments and a description for the app's purpose into account.")
parser.add_argument("target_language", help="ISO 639-1 Code if the language has one, otherwise use ISO 639-2 Code")
parser.add_argument("localizable_path", help="Path to a Localizable.xcstrings. If no file is given, the sub folders of the given folder will be searched for this file.")
parser.add_argument("--output", type=str, help="Optional output folder. The Localizable.xcstrings file will not be overwritten and the modified version will be placed in the given folder.")
parser.add_argument("--update-existing", action="store_true", help="If this optional flag is set, terms for which a translation already exists will be overwritten with newly queried translations.")
add_common_args(parser)
args = parser.parse_args()
localizable_filepath = args.localizable_path
if not os.path.exists(localizable_filepath):
print(f"Localizable.xcstrings does not exist at: {localizable_filepath}\nAborting.")
exit(1)
if os.path.isdir(localizable_filepath):
ls = glob.glob(os.path.join(localizable_filepath, "**/Localizable.xcstrings"), recursive=True)
if not ls:
print("Error: No Localizable.xcstrings found in the current directory and its subdirectories")
exit(1)
localizable_filepath = ls[0] # Take the first Localizable.xcstrings file found
print("Using Localizable.xcstrings:", localizable_filepath)
output_filepath = localizable_filepath
if args.output:
output_filepath = args.output
if os.path.isdir(output_filepath):
output_filepath = os.path.join(output_filepath, "Localizable.xcstrings")
if not args.no_confirmation and output_filepath == localizable_filepath:
if not user_approved_overwrite_warning():
# User aborted the execution
exit(1)
conf = TranslateL10nConfig(
target_language = args.target_language,
localizable_path = localizable_filepath,
openai_api_cooldown = args.openai_api_cooldown,
output_path = output_filepath,
log_path = args.log_path,
update_existing=args.update_existing
)
return conf
def build_gpt_translatable_objects(conf: TranslateL10nConfig, strings_dict: Dict[str, any]) -> List[Translatable]:
"""
Parses the Localizable.xcstrings file and constructs a Translatable object
for each string in this file. If a string does not have a translation or if
the user wants to redo all translations, it will be added to the returned
list.
"""
objects_in_this_query: List[Translatable] = []
for key in strings_dict.keys():
string_info = strings_dict[key]
translatable = Translatable(key, string_info)
if not translatable.is_translated_to(conf.target_language) or conf.update_existing:
objects_in_this_query.append(translatable)
return objects_in_this_query
def get_gpt_response(conf: TranslateL10nConfig, translatable_objs: List[Translatable], source_lang: str):
chatgpt_token = get_openapi_token()
app_context = get_app_context()
task_desc = get_task_desc(source_lang, conf.target_language)
print("Init ChatGPT with token: ", chatgpt_token)
cpt = ChatGPT(chatgpt_token, model="gpt-3.5-turbo", log_path=conf.log_path, cooldown_duration_sec=conf.openai_api_cooldown)
# Maybe because gpt3.5 is used, but with multi line strings, a query length of 30 was too complicated.
max_query_length = 10
full_response = ""
for i in range(0, len(translatable_objs)-1, max_query_length):
query_idx = int(i/max_query_length + 1)
query_lines = [t.get_gpt_query() for t in translatable_objs[i: i+max_query_length]]
query_length = len(query_lines)
query = "\n".join(query_lines)
print(f"running gpt query number {query_idx} with {query_length} strings")
def is_response_valid_callback(response: str):
non_empty_lines = [l for l in response.split("\n") if l]
valid = len(non_empty_lines) == query_length
valid &= all([line.startswith("translation: ") for line in non_empty_lines])
return valid
system_cmd = task_desc
if app_context:
system_cmd += "\n" + app_context
response = cpt.complete_query(system_cmd, query, is_response_valid_callback)
full_response += response + "\n"
return full_response
def evaluate_response(full_response: str, translatable_objects: List[Translatable], target_lang: str):
"""
Parses the response for the translated strings and
"""
valid_lines = 0
valid_response = True
for line in full_response.split("\n"):
if not line:
continue
valid_response &= translatable_objects[valid_lines].parse_gpt_response(line, for_language=target_lang)
if not valid_response:
print("invalid line")
print(line)
exit(1)
valid_lines += 1
if valid_lines != len(translatable_objects):
print(f"Something went wrong. {len(translatable_objects)} translations were requested but only {valid_lines} were parsed.")
print("Aborting.")
exit(1)
def main():
conf = _parse_args()
with open(conf.localizable_path, "r") as f:
loc = json.loads(f.read())
source_lang = loc["sourceLanguage"]
target_lang = conf.target_language
print("Source language found: " + source_lang)
strings_dict = loc["strings"]
translatable_objects = build_gpt_translatable_objects(conf, strings_dict)
## send to chatGPT
full_response = get_gpt_response(conf, translatable_objects, source_lang)
## evaluate response
evaluate_response(full_response, translatable_objects, target_lang)
## write back to json
with open(conf.output_path, "w") as f:
f.write(json.dumps(loc, indent=2, separators=(', ', ' : '), ensure_ascii=False))
if __name__ == "__main__":
main()