|
4 | 4 | import urllib.parse
|
5 | 5 | import requests
|
6 | 6 |
|
7 |
| -from scripts.utils.dataset import get_words, get_word2ipa, get_custom_words, get_saved_wordlist |
| 7 | +from scripts.utils.dataset import get_words, get_word2ipa, get_custom_words, get_saved_wordlist, \ |
| 8 | + save_progression_wordlist |
8 | 9 | from scripts.utils.dictionary_database import RemedeDatabase
|
9 | 10 | from scripts.utils.sanitize import sanitize_word
|
10 | 11 | from scripts.utils.scrap import get_conjugaisons, get_synonyms, get_antonyms, get_word_metadata
|
@@ -104,21 +105,28 @@ def safe_get_word_document(word: str, ipa: str):
|
104 | 105 | def remedize(word_list: list):
|
105 | 106 | total = len(word_list)
|
106 | 107 | errored = 0
|
107 |
| - for word in word_list: |
108 |
| - if word in custom_words: |
109 |
| - document = custom_words_json[word] |
110 |
| - ipa = document["phoneme"] |
111 |
| - else: |
112 |
| - ipa = get_ipa(word) |
113 |
| - document = safe_get_word_document(word, ipa) |
114 |
| - if not document: |
115 |
| - errored += 1 |
116 |
| - elidable, feminine, syllables, min_syllables, max_syllables, nature = get_word_metadata(word, ipa) |
117 |
| - # No Openlexicon data, need to find by ourselves |
118 |
| - if not nature: |
119 |
| - nature = get_word_natures(document) |
120 |
| - database.insert(word, sanitize_word(word), ipa, nature, syllables, min_syllables, max_syllables, elidable, feminine, document) |
121 |
| - print(f"\033[A\033[KMot n°{word_list.index(word) + 1}/{total}: \"{word}\"{' ' * (35 - len(word))} | {errored} erreurs") |
| 108 | + try: |
| 109 | + for word in word_list: |
| 110 | + if word in custom_words: |
| 111 | + document = custom_words_json[word] |
| 112 | + ipa = document["phoneme"] |
| 113 | + else: |
| 114 | + ipa = get_ipa(word) |
| 115 | + document = safe_get_word_document(word, ipa) |
| 116 | + if not document: |
| 117 | + errored += 1 |
| 118 | + elidable, feminine, syllables, min_syllables, max_syllables, nature = get_word_metadata(word, ipa) |
| 119 | + # No Openlexicon data, need to find by ourselves |
| 120 | + if not nature: |
| 121 | + nature = get_word_natures(document) |
| 122 | + database.insert(word, sanitize_word(word), ipa, nature, syllables, min_syllables, max_syllables, elidable, feminine, document) |
| 123 | + print(f"\033[A\033[KMot n°{word_list.index(word) + 1}/{total}: \"{word}\"{' ' * (35 - len(word))} | {errored} erreurs") |
| 124 | + except Exception as e: |
| 125 | + print(f"Program raised error {e}. Saving progression...") |
| 126 | + save = word_list[word_list.index(word):] |
| 127 | + save_progression_wordlist(save) |
| 128 | + raise KeyboardInterrupt |
| 129 | + |
122 | 130 |
|
123 | 131 |
|
124 | 132 | def getTimeDetails(time_object):
|
@@ -150,7 +158,7 @@ def getTimeDetails(time_object):
|
150 | 158 | try:
|
151 | 159 | remedize(all_words)
|
152 | 160 | except KeyboardInterrupt:
|
153 |
| - print("Saving and exit...") |
| 161 | + print("Received exit signal.") |
154 | 162 |
|
155 | 163 | after = datetime.datetime.now()
|
156 | 164 | time = after - before
|
|
0 commit comments