Skip to content

Commit 6291dda

Browse files
committed
Don't need to use wavfile_write
1 parent 026d979 commit 6291dda

File tree

2 files changed

+35
-77
lines changed

2 files changed

+35
-77
lines changed

auto_editor/edit.py

+11-11
Original file line numberDiff line numberDiff line change
@@ -308,21 +308,21 @@ def make_media(tl: v3, output_path: str) -> None:
308308
log.error(e)
309309
if audio_encoder.audio_formats is None:
310310
log.error(f"{args.audio_codec}: No known audio formats avail.")
311-
audio_format = audio_encoder.audio_formats[0]
312-
resampler = AudioResampler(format=audio_format, layout="stereo", rate=tl.sr)
311+
fmt = audio_encoder.audio_formats[0]
312+
resampler = AudioResampler(format=fmt, layout="stereo", rate=tl.sr)
313313

314314
audio_streams: list[bv.AudioStream] = []
315315

316-
if ctr.default_aud != "none":
317-
audio_streams, audio_gen_frames = make_new_audio(
318-
output, audio_format, tl, ctr, args, log
319-
)
320-
else:
321-
audio_streams, audio_gen_frames = [], [iter([])]
316+
if ctr.default_aud == "none":
317+
while len(tl.a) > 0:
318+
tl.a.pop()
319+
elif len(tl.a) > 1 and ctr.max_audios == 1:
320+
log.warning("Dropping extra audio streams (container only allows one)")
321+
322+
while len(tl.a) > 1:
323+
tl.a.pop()
322324

323-
# if len(audio_paths) > 1 and ctr.max_audios == 1:
324-
# log.warning("Dropping extra audio streams (container only allows one)")
325-
# audio_paths = audio_paths[0:1]
325+
audio_streams, audio_gen_frames = make_new_audio(output, fmt, tl, args, log)
326326

327327
# Setup subtitles
328328
if ctr.default_sub != "none" and not args.sn:

auto_editor/render/audio.py

+24-66
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,7 @@
11
from __future__ import annotations
22

3-
import io
4-
import struct
5-
import sys
63
from fractions import Fraction
4+
from io import BytesIO
75
from pathlib import Path
86
from typing import TYPE_CHECKING
97

@@ -19,7 +17,6 @@
1917
from auto_editor.lib.err import MyError
2018
from auto_editor.timeline import TlAudio, v3
2119
from auto_editor.utils.cmdkw import ParserError, parse_with_palet, pAttr, pAttrs
22-
from auto_editor.utils.container import Container
2320
from auto_editor.utils.func import parse_bitrate
2421
from auto_editor.utils.log import Log
2522

@@ -29,8 +26,6 @@
2926

3027
from auto_editor.__main__ import Args
3128

32-
Reader = io.BufferedReader | io.BytesIO
33-
Writer = io.BufferedWriter | io.BytesIO
3429
AudioData = np.ndarray
3530

3631

@@ -101,59 +96,6 @@ def parse_ebu_bytes(norm: dict, stat: bytes, log: Log) -> tuple[str, str]:
10196
return "loudnorm", filter
10297

10398

104-
def wavfile_write(fid: Writer, sr: int, arr: np.ndarray) -> None:
105-
# arr.shape is (samples, channels).
106-
107-
def _handle_pad_byte(fid: Reader, size: int) -> None:
108-
if size % 2 == 1:
109-
fid.seek(1, 1)
110-
111-
PCM = 0x0001
112-
IEEE_FLOAT = 0x0003
113-
114-
channels = 1 if arr.ndim == 1 else arr.shape[1]
115-
bit_depth = arr.dtype.itemsize * 8
116-
block_align = channels * (bit_depth // 8)
117-
data_size = arr.nbytes
118-
total_size = 44 + data_size # Basic WAV header size + data size
119-
120-
if is_rf64 := total_size > 0xFFFFFFFF:
121-
fid.write(b"RF64\xff\xff\xff\xffWAVE")
122-
ds64_size = 28
123-
ds64_chunk_data = (0).to_bytes(ds64_size, "little") # placeholder values
124-
fid.write(b"ds64" + struct.pack("<I", ds64_size) + ds64_chunk_data)
125-
else:
126-
fid.write(b"RIFF" + struct.pack("<I", total_size - 8) + b"WAVE")
127-
128-
dkind = arr.dtype.kind
129-
format_tag = IEEE_FLOAT if dkind == "f" else PCM
130-
131-
fmt_chunk_data = struct.pack(
132-
"<HHIIHH", format_tag, channels, sr, 0, block_align, bit_depth
133-
)
134-
fid.write(b"fmt " + struct.pack("<I", len(fmt_chunk_data)) + fmt_chunk_data)
135-
136-
# Data chunk
137-
fid.write(b"data")
138-
fid.write(struct.pack("<I", 0xFFFFFFFF if is_rf64 else data_size))
139-
140-
if arr.dtype.byteorder == ">" or (
141-
arr.dtype.byteorder == "=" and sys.byteorder == "big"
142-
):
143-
arr = arr.byteswap()
144-
fid.write(arr.ravel().view("b").data)
145-
146-
if is_rf64:
147-
end_position = fid.tell()
148-
fid.seek(16) # Position at the start of 'ds64' chunk size
149-
150-
file_size = end_position - 20
151-
fid.write(struct.pack("<I", ds64_size))
152-
fid.write(file_size.to_bytes(8, "little") + data_size.to_bytes(8, "little"))
153-
154-
fid.seek(end_position)
155-
156-
15799
def apply_audio_normalization(
158100
norm: dict, pre_master: Path, path: Path, log: Log
159101
) -> None:
@@ -218,17 +160,30 @@ def get_peak_level(frame: AudioFrame) -> float:
218160
def process_audio_clip(
219161
clip: TlAudio, samp_list: AudioData, samp_start: int, samp_end: int, sr: int
220162
) -> np.ndarray:
221-
samp_list = samp_list.T.copy(order="C")
163+
to_s16 = bv.AudioResampler(format="s16", layout="stereo", rate=sr)
164+
input_buffer = BytesIO()
165+
166+
with bv.open(input_buffer, "w", format="wav") as container:
167+
output_stream = container.add_stream(
168+
"pcm_s16le", sample_rate=sr, format="s16", layout="stereo"
169+
)
170+
171+
frame = AudioFrame.from_ndarray(
172+
samp_list[:, samp_start:samp_end], format="s16p", layout="stereo"
173+
)
174+
frame.rate = sr
175+
176+
for reframe in to_s16.resample(frame):
177+
container.mux(output_stream.encode(reframe))
178+
container.mux(output_stream.encode(None))
222179

223-
input_buffer = io.BytesIO()
224-
wavfile_write(input_buffer, sr, samp_list[samp_start:samp_end])
225180
input_buffer.seek(0)
226181

227182
input_file = bv.open(input_buffer, "r")
228183
input_stream = input_file.streams.audio[0]
229184

230185
graph = bv.filter.Graph()
231-
args = [graph.add_abuffer(sample_rate=sr, format="s16", layout="stereo")]
186+
args = [graph.add_abuffer(template=input_stream)]
232187

233188
if clip.speed != 1:
234189
if clip.speed > 10_000:
@@ -390,7 +345,6 @@ def make_new_audio(
390345
output: bv.container.OutputContainer,
391346
audio_format: bv.AudioFormat,
392347
tl: v3,
393-
ctr: Container,
394348
args: Args,
395349
log: Log,
396350
) -> tuple[list[bv.AudioStream], list[Iterator[AudioFrame]]]:
@@ -405,8 +359,9 @@ def make_new_audio(
405359
for i, audio_path in enumerate(audio_paths):
406360
audio_stream = output.add_stream(
407361
args.audio_codec,
408-
format=audio_format,
409362
rate=tl.sr,
363+
format=audio_format,
364+
layout="stereo",
410365
time_base=Fraction(1, tl.sr),
411366
)
412367
if not isinstance(audio_stream, bv.AudioStream):
@@ -484,7 +439,10 @@ def _make_new_audio(tl: v3, fmt: bv.AudioFormat, args: Args, log: Log) -> list[A
484439

485440
if arr is not None:
486441
if norm is None:
487-
use_iter = True
442+
if args.mix_audio_streams:
443+
ndarray_to_file(arr, sr, path)
444+
else:
445+
use_iter = True
488446
else:
489447
pre_master = Path(temp, "premaster.wav")
490448
ndarray_to_file(arr, sr, pre_master)

0 commit comments

Comments
 (0)