Skip to content

Commit 0ed7041

Browse files
committed
refactor get_subtitle_sources
'settings.subtitles_language' is now a ',' seperated list of languages that the user understands. This allows to eliminate cases in which a translated caption would be preferred over captions with the native language of the video, even tough a multilingual user might understand them.
1 parent 96dcefa commit 0ed7041

File tree

1 file changed

+33
-56
lines changed

1 file changed

+33
-56
lines changed

youtube/watch.py

Lines changed: 33 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,7 @@ def video_rank(src):
168168

169169

170170

171-
def make_caption_src(info, lang, auto=False, trans_lang=None):
171+
def make_caption_src(info, lang, score, auto=False, trans_lang=None):
172172
label = lang
173173
if auto:
174174
label += ' (Automatic)'
@@ -179,6 +179,9 @@ def make_caption_src(info, lang, auto=False, trans_lang=None):
179179
'label': label,
180180
'srclang': trans_lang[0:2] if trans_lang else lang[0:2],
181181
'on': False,
182+
'score': score,
183+
'lang': lang,
184+
'auto': auto,
182185
}
183186

184187
def lang_in(lang, sequence):
@@ -195,80 +198,54 @@ def lang_eq(lang1, lang2):
195198
return False
196199
return lang1[0:2] == lang2[0:2]
197200

198-
def equiv_lang_in(lang, sequence):
199-
'''Extracts a language in sequence which is equivalent to lang.
200-
e.g. if lang is en, extracts en-GB from sequence.
201-
Necessary because if only a specific variant like en-GB is available, can't ask Youtube for simply en. Need to get the available variant.'''
202-
lang = lang[0:2]
203-
for l in sequence:
204-
if l[0:2] == lang:
205-
return l
206-
return None
207-
208201
def get_subtitle_sources(info):
209202
'''Returns these sources, ordered from least to most intelligible:
210203
native_video_lang (Automatic)
211204
foreign_langs (Manual)
205+
212206
native_video_lang (Automatic) -> pref_lang
213207
foreign_langs (Manual) -> pref_lang
214208
native_video_lang (Manual) -> pref_lang
209+
215210
pref_lang (Automatic)
216211
pref_lang (Manual)'''
217212
sources = []
218213
if not yt_data_extract.captions_available(info):
219214
return []
220-
pref_lang = settings.subtitles_language
215+
216+
user_langs = settings.subtitles_language.split(',')
221217
native_video_lang = None
222218
if info['automatic_caption_languages']:
223219
native_video_lang = info['automatic_caption_languages'][0]
224220

225-
highest_fidelity_is_manual = False
221+
kManual, kNativeVideoLang, kUserSpeaksIt = 1, 2, 4
222+
def score_lang(lang):
223+
score = 0
224+
if lang_in(lang, user_langs):
225+
score |= kUserSpeaksIt
226+
if lang_eq(lang, native_video_lang):
227+
score |= kNativeVideoLang
228+
return score
226229

227-
# Sources are added in very specific order outlined above
228-
# More intelligible sources are put further down to avoid browser bug when there are too many languages
229-
# (in firefox, it is impossible to select a language near the top of the list because it is cut off)
230+
for lang in info['manual_caption_languages']:
231+
score = score_lang(lang) | kManual
232+
sources.append(make_caption_src(info, lang, score))
230233

231-
# native_video_lang (Automatic)
232-
if native_video_lang and not lang_eq(native_video_lang, pref_lang):
233-
sources.append(make_caption_src(info, native_video_lang, auto=True))
234+
for lang in info['automatic_caption_languages']:
235+
score = score_lang(lang)
236+
if score:
237+
sources.append(make_caption_src(info, lang, score, auto=True))
234238

235-
# foreign_langs (Manual)
236-
for lang in info['manual_caption_languages']:
237-
if not lang_eq(lang, pref_lang):
238-
sources.append(make_caption_src(info, lang))
239-
240-
if (lang_in(pref_lang, info['translation_languages'])
241-
and not lang_in(pref_lang, info['automatic_caption_languages'])
242-
and not lang_in(pref_lang, info['manual_caption_languages'])):
243-
# native_video_lang (Automatic) -> pref_lang
244-
if native_video_lang and not lang_eq(pref_lang, native_video_lang):
245-
sources.append(make_caption_src(info, native_video_lang, auto=True, trans_lang=pref_lang))
246-
247-
# foreign_langs (Manual) -> pref_lang
248-
for lang in info['manual_caption_languages']:
249-
if not lang_eq(lang, native_video_lang) and not lang_eq(lang, pref_lang):
250-
sources.append(make_caption_src(info, lang, trans_lang=pref_lang))
251-
252-
# native_video_lang (Manual) -> pref_lang
253-
if lang_in(native_video_lang, info['manual_caption_languages']):
254-
sources.append(make_caption_src(info, native_video_lang, trans_lang=pref_lang))
255-
256-
# pref_lang (Automatic)
257-
if lang_in(pref_lang, info['automatic_caption_languages']):
258-
sources.append(make_caption_src(info, equiv_lang_in(pref_lang, info['automatic_caption_languages']), auto=True))
259-
260-
# pref_lang (Manual)
261-
if lang_in(pref_lang, info['manual_caption_languages']):
262-
sources.append(make_caption_src(info, equiv_lang_in(pref_lang, info['manual_caption_languages'])))
263-
highest_fidelity_is_manual = True
264-
265-
if sources and sources[-1]['srclang'] == pref_lang:
266-
# set as on by default since it's manual a default-on subtitles mode is in settings
267-
if highest_fidelity_is_manual and settings.subtitles_mode > 0:
268-
sources[-1]['on'] = True
269-
# set as on by default since settings indicate to set it as such even if it's not manual
270-
elif settings.subtitles_mode == 2:
271-
sources[-1]['on'] = True
239+
if not any(s['score'] & kUserSpeaksIt for s in sources):
240+
for user_lang in user_langs:
241+
if not lang in info['translation_languages']:
242+
continue
243+
244+
for s in sources[:]:
245+
lang, score, auto = s['lang'], s['score'] | kUserSpeaksIt, s['auto']
246+
sources.append(make_caption_src(info, lang, score, auto=auto, trans_lang=user_lang))
247+
248+
sources = sorted(sources, key=lambda x: x['score'])
272249

273250
if len(sources) == 0:
274251
assert len(info['automatic_caption_languages']) == 0 and len(info['manual_caption_languages']) == 0

0 commit comments

Comments
 (0)