Skip to content

Commit 6eb2cb2

Browse files
committed
WIP numpy array for audio
1 parent 7bb5a82 commit 6eb2cb2

File tree

4 files changed

+68
-36
lines changed

4 files changed

+68
-36
lines changed

auto_editor/__init__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "27.0.0"
1+
__version__ = "27.0.1"

auto_editor/edit.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -305,7 +305,7 @@ def make_media(tl: v3, output_path: str) -> None:
305305
# Setup audio
306306
if ctr.default_aud != "none":
307307
ensure = Ensure(bar, samplerate, log)
308-
audio_paths = make_new_audio(tl, ctr, ensure, args, bar, log)
308+
audio_paths = make_new_audio(tl, ctr, ensure, args, log)
309309
else:
310310
audio_paths = []
311311

auto_editor/render/audio.py

+64-34
Original file line numberDiff line numberDiff line change
@@ -6,24 +6,26 @@
66

77
import bv
88
import numpy as np
9+
from bv import AudioFrame
910
from bv.filter.loudnorm import stats
1011

12+
from auto_editor import wavfile
1113
from auto_editor.ffwrapper import FileInfo
1214
from auto_editor.json import load
1315
from auto_editor.lang.palet import env
1416
from auto_editor.lib.contracts import andc, between_c, is_int_or_float
1517
from auto_editor.lib.err import MyError
1618
from auto_editor.output import Ensure
1719
from auto_editor.timeline import TlAudio, v3
18-
from auto_editor.utils.bar import Bar
1920
from auto_editor.utils.cmdkw import ParserError, parse_with_palet, pAttr, pAttrs
2021
from auto_editor.utils.container import Container
2122
from auto_editor.utils.log import Log
22-
from auto_editor.wavfile import AudioData, read, write
2323

2424
if TYPE_CHECKING:
2525
from auto_editor.__main__ import Args
2626

27+
AudioData = np.ndarray
28+
2729
norm_types = {
2830
"ebu": pAttrs(
2931
"ebu",
@@ -106,7 +108,7 @@ def apply_audio_normalization(
106108
else:
107109
assert "t" in norm
108110

109-
def get_peak_level(frame: bv.AudioFrame) -> float:
111+
def get_peak_level(frame: AudioFrame) -> float:
110112
# Calculate peak level in dB
111113
# Should be equivalent to: -af astats=measure_overall=Peak_level:measure_perchannel=0
112114
max_amplitude = np.abs(frame.to_ndarray()).max()
@@ -143,7 +145,7 @@ def get_peak_level(frame: bv.AudioFrame) -> float:
143145
while True:
144146
try:
145147
aframe = graph.pull()
146-
assert isinstance(aframe, bv.AudioFrame)
148+
assert isinstance(aframe, AudioFrame)
147149
output_file.mux(output_stream.encode(aframe))
148150
except (bv.BlockingIOError, bv.EOFError):
149151
break
@@ -154,9 +156,9 @@ def get_peak_level(frame: bv.AudioFrame) -> float:
154156

155157
def process_audio_clip(
156158
clip: TlAudio, samp_list: AudioData, samp_start: int, samp_end: int, sr: int
157-
) -> AudioData:
159+
) -> np.ndarray:
158160
input_buffer = io.BytesIO()
159-
write(input_buffer, sr, samp_list[samp_start:samp_end])
161+
wavfile.write(input_buffer, sr, samp_list[samp_start:samp_end])
160162
input_buffer.seek(0)
161163

162164
input_file = bv.open(input_buffer, "r")
@@ -191,29 +193,18 @@ def process_audio_clip(
191193
args.append(graph.add("abuffersink"))
192194
graph.link_nodes(*args).configure()
193195

196+
all_frames = []
194197
for frame in input_file.decode(input_stream):
195198
graph.push(frame)
196199
while True:
197200
try:
198201
aframe = graph.pull()
199-
assert isinstance(aframe, bv.AudioFrame)
200-
output_file.mux(output_stream.encode(aframe))
202+
assert isinstance(aframe, AudioFrame)
203+
all_frames.append(aframe.to_ndarray())
201204
except (bv.BlockingIOError, bv.EOFError):
202205
break
203206

204-
# Flush the stream
205-
output_file.mux(output_stream.encode(None))
206-
207-
input_file.close()
208-
output_file.close()
209-
210-
output_bytes.seek(0)
211-
has_filesig = output_bytes.read(4)
212-
output_bytes.seek(0)
213-
if not has_filesig: # Can rarely happen when clip is extremely small
214-
return np.empty((0, 2), dtype=np.int16)
215-
216-
return read(output_bytes)[1]
207+
return np.concatenate(all_frames, axis=1)
217208

218209

219210
def mix_audio_files(sr: int, audio_paths: list[str], output_path: str) -> None:
@@ -278,7 +269,7 @@ def mix_audio_files(sr: int, audio_paths: list[str], output_path: str) -> None:
278269
# Shape becomes (1, samples) for mono
279270
chunk = np.array([mixed_audio[i : i + chunk_size]])
280271

281-
frame = bv.AudioFrame.from_ndarray(chunk, format="s16", layout="mono")
272+
frame = AudioFrame.from_ndarray(chunk, format="s16", layout="mono")
282273
frame.rate = sr
283274
frame.pts = i # Set presentation timestamp
284275

@@ -288,8 +279,46 @@ def mix_audio_files(sr: int, audio_paths: list[str], output_path: str) -> None:
288279
output_container.close()
289280

290281

282+
def file_to_ndarray(src: FileInfo, stream: int) -> np.ndarray:
283+
all_frames = []
284+
285+
# TODO: don't hardcode rate
286+
resampler = bv.AudioResampler(
287+
format='s16p',
288+
layout='stereo',
289+
rate=48000
290+
)
291+
292+
with bv.open(src.path) as container:
293+
for frame in container.decode(audio=stream):
294+
for resampled_frame in resampler.resample(frame):
295+
all_frames.append(resampled_frame.to_ndarray())
296+
297+
# all_frames.append(frame.to_ndarray())
298+
299+
return np.concatenate(all_frames, axis=1)
300+
301+
302+
def ndarray_to_file(audio_data: np.ndarray, out: str | Path) -> None:
303+
rate = 48000
304+
encoder = "pcm_s16le"
305+
format = "s16p"
306+
layout = "stereo"
307+
308+
with bv.open(out, mode="w") as output:
309+
stream = output.add_stream(encoder, rate=rate, layout=layout)
310+
311+
frame = bv.AudioFrame.from_ndarray(audio_data, format=format, layout=layout)
312+
frame.rate = rate
313+
314+
output.mux(stream.encode(frame))
315+
316+
# Flush the stream
317+
output.mux(stream.encode(None))
318+
319+
291320
def make_new_audio(
292-
tl: v3, ctr: Container, ensure: Ensure, args: Args, bar: Bar, log: Log
321+
tl: v3, ctr: Container, ensure: Ensure, args: Args, log: Log
293322
) -> list[str]:
294323
sr = tl.sr
295324
tb = tl.tb
@@ -304,17 +333,21 @@ def make_new_audio(
304333
log.error("Trying to render empty audio timeline")
305334

306335
for i, layer in enumerate(tl.a):
307-
bar.start(len(layer), "Creating new audio")
308-
309336
path = Path(temp, f"new{i}.wav")
310337
output.append(f"{path}")
311338
arr: AudioData | None = None
312339

313340
for c, clip in enumerate(layer):
314341
if (clip.src, clip.stream) not in samples:
342+
343+
# log.conwrite("Writing audio to memeory")
344+
# samples[(clip.src, clip.stream)] = file_to_ndarray(
345+
# clip.src, clip.stream
346+
# ).T.copy(order="C")
347+
315348
audio_path = ensure.audio(clip.src, clip.stream)
316349
with open(audio_path, "rb") as file:
317-
samples[(clip.src, clip.stream)] = read(file)[1]
350+
samples[(clip.src, clip.stream)] = wavfile.read(file)[1]
318351

319352
if arr is None:
320353
leng = max(round((layer[-1].start + layer[-1].dur) * sr / tb), sr // tb)
@@ -338,7 +371,7 @@ def make_new_audio(
338371
samp_end = len(samp_list)
339372

340373
if clip.speed != 1 or clip.volume != 1:
341-
clip_arr = process_audio_clip(clip, samp_list, samp_start, samp_end, sr)
374+
clip_arr = process_audio_clip(clip, samp_list, samp_start, samp_end, sr) #.T.copy(order="C")
342375
else:
343376
clip_arr = samp_list[samp_start:samp_end]
344377

@@ -352,21 +385,18 @@ def make_new_audio(
352385
else:
353386
arr[start : start + car_len] += clip_arr
354387

355-
bar.tick(c)
356-
357388
if arr is not None:
358389
if norm is None:
390+
# ndarray_to_file(arr.T, path)
359391
with open(path, "wb") as fid:
360-
write(fid, sr, arr)
392+
wavfile.write(fid, sr, arr)
361393
else:
362394
pre_master = Path(temp, "premaster.wav")
395+
# ndarray_to_file(arr.T.copy(order="C"), pre_master)
363396
with open(pre_master, "wb") as fid:
364-
write(fid, sr, arr)
365-
397+
wavfile.write(fid, sr, arr)
366398
apply_audio_normalization(norm, pre_master, path, log)
367399

368-
bar.end()
369-
370400
try:
371401
Path(temp, "asdf.map").unlink(missing_ok=True)
372402
except PermissionError:

auto_editor/wavfile.py

+2
Original file line numberDiff line numberDiff line change
@@ -250,6 +250,8 @@ def read(fid: Reader) -> tuple[int, AudioData]:
250250

251251

252252
def write(fid: Writer, sr: int, arr: np.ndarray) -> None:
253+
# arr.shape is (samples, channels).
254+
253255
channels = 1 if arr.ndim == 1 else arr.shape[1]
254256
bit_depth = arr.dtype.itemsize * 8
255257
block_align = channels * (bit_depth // 8)

0 commit comments

Comments
 (0)