Skip to content

Commit 0a4843d

Browse files
committedMay 9, 2024
Make subtitle rendering based on PyAV
1 parent 0065948 commit 0a4843d

File tree

5 files changed

+141
-213
lines changed

5 files changed

+141
-213
lines changed
 

‎auto_editor/analyze.py

+46-25
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
from typing import TYPE_CHECKING
77

88
import numpy as np
9+
from numpy import concatenate as npconcat
10+
from numpy import zeros as npzeros
911

1012
from auto_editor import version
1113
from auto_editor.lang.json import Lexer, Parser, dump
@@ -19,7 +21,6 @@
1921
orc,
2022
)
2123
from auto_editor.lib.data_structs import Sym
22-
from auto_editor.render.subtitle import SubtitleParser
2324
from auto_editor.utils.cmdkw import (
2425
Required,
2526
pAttr,
@@ -190,7 +191,7 @@ def none(self) -> NDArray[np.bool_]:
190191
return np.ones(self.media_length, dtype=np.bool_)
191192

192193
def all(self) -> NDArray[np.bool_]:
193-
return np.zeros(self.media_length, dtype=np.bool_)
194+
return npzeros(self.media_length, dtype=np.bool_)
194195

195196
def read_cache(self, tag: str, obj: dict[str, Any]) -> None | np.ndarray:
196197
workfile = os.path.join(
@@ -278,7 +279,7 @@ def get_max_volume(s: np.ndarray) -> float:
278279
)
279280
self.bar.start(audio_ticks, "Analyzing audio volume")
280281

281-
threshold_list = np.zeros((audio_ticks), dtype=np.float64)
282+
threshold_list = npzeros((audio_ticks), dtype=np.float64)
282283

283284
if max_volume == 0: # Prevent dividing by zero
284285
return threshold_list
@@ -313,31 +314,51 @@ def subtitle(
313314
except re.error as e:
314315
self.log.error(e)
315316

316-
sub_file = self.ensure.subtitle(self.src, stream)
317-
parser = SubtitleParser(self.tb)
317+
import av
318+
319+
result = npzeros((30), dtype=np.bool_)
320+
count = 0
321+
subtitle_length = 0
318322

319-
with open(sub_file, encoding="utf-8") as file:
320-
parser.parse(file.read(), "webvtt")
323+
with av.open(self.src.path, "r") as container:
324+
for packet in container.demux(subtitles=stream):
325+
if packet is None or packet.pts is None:
326+
continue
321327

322-
# stackoverflow.com/questions/9662346/python-code-to-remove-html-tags-from-a-string
323-
def cleanhtml(raw_html: str) -> str:
324-
cleanr = re.compile("<.*?>")
325-
return re.sub(cleanr, "", raw_html)
328+
line = ""
329+
if sub := packet.decode():
330+
for val in sub[0].rects:
331+
if isinstance(val, av.subtitles.subtitle.AssSubtitle):
332+
line += val.ass.decode("utf-8", "ignore")
333+
if isinstance(val, av.subtitles.subtitle.TextSubtitle):
334+
line += val.text.decode("utf-8", "ignore")
326335

327-
if not parser.contents:
328-
self.log.error("subtitle has no valid entries")
336+
if packet.duration is not None and packet.time_base is not None:
337+
end = round(
338+
(packet.pts + packet.duration) * packet.time_base * self.tb
339+
)
340+
subtitle_length = max(subtitle_length, end)
329341

330-
result = np.zeros((parser.contents[-1].end), dtype=np.bool_)
342+
if line and re.search(pattern, line):
343+
if packet.duration is None or packet.time_base is None:
344+
self.log.warning("Subtitle has unknown duration")
345+
continue
331346

332-
count = 0
333-
for content in parser.contents:
334-
if max_count is not None and count >= max_count:
335-
break
347+
count += 1
348+
start = round(packet.pts * packet.time_base * self.tb)
349+
350+
if len(result) < end:
351+
new_length = max(end, len(result) * 2)
352+
result = npconcat(
353+
[result, npzeros(new_length, dtype=np.bool_)], axis=0
354+
)
355+
356+
result[start:end] = 1
357+
358+
if max_count is not None and count >= max_count:
359+
break
336360

337-
line = cleanhtml(content.after.strip())
338-
if line and re.search(pattern, line):
339-
result[content.start : content.end] = 1
340-
count += 1
361+
result = result[:subtitle_length]
341362

342363
return result
343364

@@ -377,7 +398,7 @@ def motion(self, s: int, blur: int, width: int) -> NDArray[np.float64]:
377398
)
378399
graph.configure()
379400

380-
threshold_list = np.zeros((1024), dtype=np.float64)
401+
threshold_list = npzeros((1024), dtype=np.float64)
381402

382403
for unframe in container.decode(stream):
383404
graph.push(unframe)
@@ -392,8 +413,8 @@ def motion(self, s: int, blur: int, width: int) -> NDArray[np.float64]:
392413
current_frame = frame.to_ndarray()
393414

394415
if index > len(threshold_list) - 1:
395-
threshold_list = np.concatenate(
396-
(threshold_list, np.zeros((len(threshold_list)), dtype=np.float64)),
416+
threshold_list = npconcat(
417+
(threshold_list, npzeros((len(threshold_list)), dtype=np.float64)),
397418
axis=0,
398419
)
399420

‎auto_editor/edit.py

+7-15
Original file line numberDiff line numberDiff line change
@@ -214,15 +214,6 @@ def edit_media(
214214
ensure = Ensure(ffmpeg, samplerate, temp, log)
215215

216216
if tl is None:
217-
# Extract subtitles in their native format.
218-
if src is not None and len(src.subtitles) > 0 and not args.sn:
219-
cmd = ["-i", f"{src.path}", "-hide_banner"]
220-
for s, sub in enumerate(src.subtitles):
221-
cmd.extend(["-map", f"0:s:{s}"])
222-
for s, sub in enumerate(src.subtitles):
223-
cmd.extend([os.path.join(temp, f"{s}s.{sub.ext}")])
224-
ffmpeg.run(cmd)
225-
226217
tl = make_timeline(sources, ffmpeg, ensure, args, samplerate, bar, temp, log)
227218

228219
if export["export"] == "timeline":
@@ -280,12 +271,8 @@ def make_media(tl: v3, output: str) -> None:
280271

281272
visual_output = []
282273
audio_output = []
283-
sub_output = []
284274
apply_later = False
285275

286-
if ctr.allow_subtitle and not args.sn:
287-
sub_output = make_new_subtitles(tl, ffmpeg, temp, log)
288-
289276
if ctr.allow_audio:
290277
audio_output = make_new_audio(tl, ensure, args, ffmpeg, bar, temp, log)
291278

@@ -304,20 +291,25 @@ def make_media(tl: v3, output: str) -> None:
304291
visual_output.append((False, out_path))
305292

306293
log.conwrite("Writing output file")
294+
295+
making_subs = ctr.allow_subtitle and not args.sn
296+
old_out = os.path.join(temp, f"oldout.{out_ext}")
307297
mux_quality_media(
308298
ffmpeg,
309299
visual_output,
310300
audio_output,
311-
sub_output,
301+
0 if tl.v1 is None else len(tl.v1.source.subtitles),
312302
apply_later,
313303
ctr,
314-
output,
304+
old_out if making_subs else output,
315305
tl.tb,
316306
args,
317307
src,
318308
temp,
319309
log,
320310
)
311+
if making_subs:
312+
make_new_subtitles(tl, old_out, output, log)
321313

322314
if export["export"] == "clip-sequence":
323315
if tl.v1 is None:

‎auto_editor/output.py

+2-23
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ def mux_quality_media(
7676
ffmpeg: FFmpeg,
7777
visual_output: list[tuple[bool, str]],
7878
audio_output: list[str],
79-
sub_output: list[str],
79+
subtitle_streams: int,
8080
apply_v: bool,
8181
ctr: Container,
8282
output_path: str,
@@ -88,7 +88,6 @@ def mux_quality_media(
8888
) -> None:
8989
v_tracks = len(visual_output)
9090
a_tracks = len(audio_output)
91-
s_tracks = 0 if args.sn else len(sub_output)
9291

9392
cmd = ["-hide_banner", "-y", "-i", f"{src.path}"]
9493

@@ -126,10 +125,7 @@ def mux_quality_media(
126125
new_a_file = audio_output[0]
127126
cmd.extend(["-i", new_a_file])
128127

129-
for subfile in sub_output:
130-
cmd.extend(["-i", subfile])
131-
132-
for i in range(v_tracks + s_tracks + a_tracks):
128+
for i in range(v_tracks + a_tracks):
133129
cmd.extend(["-map", f"{i+1}:0"])
134130

135131
cmd.extend(["-map_metadata", "0"])
@@ -163,20 +159,6 @@ def mux_quality_media(
163159
break
164160
if astream.lang is not None:
165161
cmd.extend([f"-metadata:s:a:{i}", f"language={astream.lang}"])
166-
for i, sstream in enumerate(src.subtitles):
167-
if i > s_tracks:
168-
break
169-
if sstream.lang is not None:
170-
cmd.extend([f"-metadata:s:s:{i}", f"language={sstream.lang}"])
171-
172-
if s_tracks > 0:
173-
scodec = src.subtitles[0].codec
174-
if same_container:
175-
cmd.extend(["-c:s", scodec])
176-
elif ctr.scodecs is not None:
177-
if scodec not in ctr.scodecs:
178-
scodec = ctr.scodecs[0]
179-
cmd.extend(["-c:s", scodec])
180162

181163
if a_tracks > 0:
182164
cmd += _ffset("-c:a", args.audio_codec) + _ffset("-b:a", args.audio_bitrate)
@@ -200,9 +182,6 @@ def mux_quality_media(
200182
cmd.extend(args.extras.split(" "))
201183
cmd.extend(["-strict", "-2"]) # Allow experimental codecs.
202184

203-
if s_tracks > 0:
204-
cmd.extend(["-map", "0:t?"]) # Add input attachments to output.
205-
206185
# This was causing a crash for 'example.mp4 multi-track.mov'
207186
# cmd.extend(["-map", "0:d?"])
208187

0 commit comments

Comments
 (0)