Skip to content

Commit 026d979

Browse files
committed
Iterate frames instead of using temp wav
1 parent d55ec6c commit 026d979

File tree

2 files changed

+95
-64
lines changed

2 files changed

+95
-64
lines changed

auto_editor/edit.py

+17-43
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@
2222
from auto_editor.utils.chunks import Chunk, Chunks
2323
from auto_editor.utils.cmdkw import ParserError, parse_with_palet, pAttr, pAttrs
2424
from auto_editor.utils.container import Container, container_constructor
25-
from auto_editor.utils.func import parse_bitrate
2625
from auto_editor.utils.log import Log
2726

2827
if TYPE_CHECKING:
@@ -303,50 +302,27 @@ def make_media(tl: v3, output_path: str) -> None:
303302
output_stream, vframes = None, iter([])
304303

305304
# Setup audio
306-
if ctr.default_aud != "none":
307-
audio_paths = make_new_audio(tl, ctr, args, log)
308-
else:
309-
audio_paths = []
310-
311-
if len(audio_paths) > 1 and ctr.max_audios == 1:
312-
log.warning("Dropping extra audio streams (container only allows one)")
313-
audio_paths = audio_paths[0:1]
314-
315-
if audio_paths:
316-
try:
317-
audio_encoder = bv.Codec(args.audio_codec, "w")
318-
except bv.FFmpegError as e:
319-
log.error(e)
320-
if audio_encoder.audio_formats is None:
321-
log.error(f"{args.audio_codec}: No known audio formats avail.")
322-
audio_format = audio_encoder.audio_formats[0]
323-
resampler = AudioResampler(format=audio_format, layout="stereo", rate=tl.sr)
305+
try:
306+
audio_encoder = bv.Codec(args.audio_codec, "w")
307+
except bv.FFmpegError as e:
308+
log.error(e)
309+
if audio_encoder.audio_formats is None:
310+
log.error(f"{args.audio_codec}: No known audio formats avail.")
311+
audio_format = audio_encoder.audio_formats[0]
312+
resampler = AudioResampler(format=audio_format, layout="stereo", rate=tl.sr)
324313

325314
audio_streams: list[bv.AudioStream] = []
326-
audio_inputs = []
327-
audio_gen_frames = []
328-
for i, audio_path in enumerate(audio_paths):
329-
audio_stream = output.add_stream(
330-
args.audio_codec,
331-
format=audio_format,
332-
rate=tl.sr,
333-
time_base=Fraction(1, tl.sr),
334-
)
335-
if not isinstance(audio_stream, bv.AudioStream):
336-
log.error(f"Not a known audio codec: {args.audio_codec}")
337315

338-
if args.audio_bitrate != "auto":
339-
audio_stream.bit_rate = parse_bitrate(args.audio_bitrate, log)
340-
log.debug(f"audio bitrate: {audio_stream.bit_rate}")
341-
else:
342-
log.debug(f"[auto] audio bitrate: {audio_stream.bit_rate}")
343-
if i < len(src.audios) and src.audios[i].lang is not None:
344-
audio_stream.metadata["language"] = src.audios[i].lang # type: ignore
316+
if ctr.default_aud != "none":
317+
audio_streams, audio_gen_frames = make_new_audio(
318+
output, audio_format, tl, ctr, args, log
319+
)
320+
else:
321+
audio_streams, audio_gen_frames = [], [iter([])]
345322

346-
audio_streams.append(audio_stream)
347-
audio_input = bv.open(audio_path)
348-
audio_inputs.append(audio_input)
349-
audio_gen_frames.append(audio_input.decode(audio=0))
323+
# if len(audio_paths) > 1 and ctr.max_audios == 1:
324+
# log.warning("Dropping extra audio streams (container only allows one)")
325+
# audio_paths = audio_paths[0:1]
350326

351327
# Setup subtitles
352328
if ctr.default_sub != "none" and not args.sn:
@@ -510,8 +486,6 @@ def __eq__(self, other):
510486
bar.end()
511487

512488
# Close resources
513-
for audio_input in audio_inputs:
514-
audio_input.close()
515489
for subtitle_input in subtitle_inputs:
516490
subtitle_input.close()
517491
output.close()

auto_editor/render/audio.py

+78-21
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import io
44
import struct
55
import sys
6+
from fractions import Fraction
67
from pathlib import Path
78
from typing import TYPE_CHECKING
89

@@ -19,9 +20,13 @@
1920
from auto_editor.timeline import TlAudio, v3
2021
from auto_editor.utils.cmdkw import ParserError, parse_with_palet, pAttr, pAttrs
2122
from auto_editor.utils.container import Container
23+
from auto_editor.utils.func import parse_bitrate
2224
from auto_editor.utils.log import Log
2325

2426
if TYPE_CHECKING:
27+
from collections.abc import Iterator
28+
from typing import Any
29+
2530
from auto_editor.__main__ import Args
2631

2732
Reader = io.BufferedReader | io.BytesIO
@@ -364,23 +369,85 @@ def ndarray_to_file(audio_data: np.ndarray, rate: int, out: str | Path) -> None:
364369
output.mux(stream.encode(None))
365370

366371

367-
def make_new_audio(tl: v3, ctr: Container, args: Args, log: Log) -> list[str]:
372+
def ndarray_to_iter(
373+
audio_data: np.ndarray, fmt: bv.AudioFormat, rate: int
374+
) -> Iterator[AudioFrame]:
375+
chunk_size = rate // 4 # Process 0.25 seconds at a time
376+
377+
resampler = bv.AudioResampler(rate=rate, format=fmt, layout="stereo")
378+
for i in range(0, audio_data.shape[1], chunk_size):
379+
chunk = audio_data[:, i : i + chunk_size]
380+
381+
frame = AudioFrame.from_ndarray(chunk, format="s16p", layout="stereo")
382+
frame.rate = rate
383+
# frame.time_base = Fraction(1, rate)
384+
frame.pts = i
385+
386+
yield from resampler.resample(frame)
387+
388+
389+
def make_new_audio(
390+
output: bv.container.OutputContainer,
391+
audio_format: bv.AudioFormat,
392+
tl: v3,
393+
ctr: Container,
394+
args: Args,
395+
log: Log,
396+
) -> tuple[list[bv.AudioStream], list[Iterator[AudioFrame]]]:
397+
audio_inputs = []
398+
audio_gen_frames = []
399+
audio_streams: list[bv.AudioStream] = []
400+
audio_paths = _make_new_audio(tl, audio_format, args, log)
401+
402+
src = tl.src
403+
assert src is not None
404+
405+
for i, audio_path in enumerate(audio_paths):
406+
audio_stream = output.add_stream(
407+
args.audio_codec,
408+
format=audio_format,
409+
rate=tl.sr,
410+
time_base=Fraction(1, tl.sr),
411+
)
412+
if not isinstance(audio_stream, bv.AudioStream):
413+
log.error(f"Not a known audio codec: {args.audio_codec}")
414+
415+
if args.audio_bitrate != "auto":
416+
audio_stream.bit_rate = parse_bitrate(args.audio_bitrate, log)
417+
log.debug(f"audio bitrate: {audio_stream.bit_rate}")
418+
else:
419+
log.debug(f"[auto] audio bitrate: {audio_stream.bit_rate}")
420+
if i < len(src.audios) and src.audios[i].lang is not None:
421+
audio_stream.metadata["language"] = src.audios[i].lang # type: ignore
422+
423+
audio_streams.append(audio_stream)
424+
425+
if isinstance(audio_path, str):
426+
audio_input = bv.open(audio_path)
427+
audio_inputs.append(audio_input)
428+
audio_gen_frames.append(audio_input.decode(audio=0))
429+
else:
430+
audio_gen_frames.append(audio_path)
431+
432+
return audio_streams, audio_gen_frames
433+
434+
435+
def _make_new_audio(tl: v3, fmt: bv.AudioFormat, args: Args, log: Log) -> list[Any]:
368436
sr = tl.sr
369437
tb = tl.tb
370-
output: list[str] = []
438+
output: list[Any] = []
371439
samples: dict[tuple[FileInfo, int], AudioData] = {}
372440

373441
norm = parse_norm(args.audio_normalize, log)
374-
375442
temp = log.temp
376443

377444
if not tl.a[0]:
378445
log.error("Trying to render empty audio timeline")
379446

380447
for i, layer in enumerate(tl.a):
381448
path = Path(temp, f"new{i}.wav")
382-
output.append(f"{path}")
383449
arr: AudioData | None = None
450+
use_iter = False
384451

385452
for c, clip in enumerate(layer):
386453
if (clip.src, clip.stream) not in samples:
@@ -391,19 +458,8 @@ def make_new_audio(tl: v3, ctr: Container, args: Args, log: Log) -> list[str]:
391458

392459
log.conwrite("Creating audio")
393460
if arr is None:
394-
dtype = np.int32
395-
for _samp_arr in samples.values():
396-
dtype = _samp_arr.dtype
397-
break
398-
399461
leng = max(round((layer[-1].start + layer[-1].dur) * sr / tb), sr // tb)
400-
arr = np.memmap(
401-
Path(temp, "asdf.map"),
402-
mode="w+",
403-
dtype=dtype,
404-
shape=(2, leng),
405-
)
406-
del leng
462+
arr = np.zeros(shape=(2, leng), dtype=np.int16)
407463

408464
samp_list = samples[(clip.src, clip.stream)]
409465

@@ -428,19 +484,20 @@ def make_new_audio(tl: v3, ctr: Container, args: Args, log: Log) -> list[str]:
428484

429485
if arr is not None:
430486
if norm is None:
431-
ndarray_to_file(arr, sr, path)
487+
use_iter = True
432488
else:
433489
pre_master = Path(temp, "premaster.wav")
434490
ndarray_to_file(arr, sr, pre_master)
435491
apply_audio_normalization(norm, pre_master, path, log)
436492

437-
try:
438-
Path(temp, "asdf.map").unlink(missing_ok=True)
439-
except PermissionError:
440-
pass
493+
if use_iter and arr is not None:
494+
output.append(ndarray_to_iter(arr, fmt, sr))
495+
else:
496+
output.append(f"{path}")
441497

442498
if args.mix_audio_streams and len(output) > 1:
443499
new_a_file = f"{Path(temp, 'new_audio.wav')}"
444500
mix_audio_files(sr, output, new_a_file)
445501
return [new_a_file]
502+
446503
return output

0 commit comments

Comments
 (0)