6
6
7
7
import bv
8
8
import numpy as np
9
+ from bv import AudioFrame
9
10
from bv .filter .loudnorm import stats
10
11
12
+ from auto_editor import wavfile
11
13
from auto_editor .ffwrapper import FileInfo
12
14
from auto_editor .json import load
13
15
from auto_editor .lang .palet import env
14
16
from auto_editor .lib .contracts import andc , between_c , is_int_or_float
15
17
from auto_editor .lib .err import MyError
16
18
from auto_editor .output import Ensure
17
19
from auto_editor .timeline import TlAudio , v3
18
- from auto_editor .utils .bar import Bar
19
20
from auto_editor .utils .cmdkw import ParserError , parse_with_palet , pAttr , pAttrs
20
21
from auto_editor .utils .container import Container
21
22
from auto_editor .utils .log import Log
22
- from auto_editor .wavfile import AudioData , read , write
23
23
24
24
if TYPE_CHECKING :
25
25
from auto_editor .__main__ import Args
26
26
27
+ AudioData = np .ndarray
28
+
27
29
norm_types = {
28
30
"ebu" : pAttrs (
29
31
"ebu" ,
@@ -106,7 +108,7 @@ def apply_audio_normalization(
106
108
else :
107
109
assert "t" in norm
108
110
109
- def get_peak_level (frame : bv . AudioFrame ) -> float :
111
+ def get_peak_level (frame : AudioFrame ) -> float :
110
112
# Calculate peak level in dB
111
113
# Should be equivalent to: -af astats=measure_overall=Peak_level:measure_perchannel=0
112
114
max_amplitude = np .abs (frame .to_ndarray ()).max ()
@@ -143,7 +145,7 @@ def get_peak_level(frame: bv.AudioFrame) -> float:
143
145
while True :
144
146
try :
145
147
aframe = graph .pull ()
146
- assert isinstance (aframe , bv . AudioFrame )
148
+ assert isinstance (aframe , AudioFrame )
147
149
output_file .mux (output_stream .encode (aframe ))
148
150
except (bv .BlockingIOError , bv .EOFError ):
149
151
break
@@ -154,9 +156,9 @@ def get_peak_level(frame: bv.AudioFrame) -> float:
154
156
155
157
def process_audio_clip (
156
158
clip : TlAudio , samp_list : AudioData , samp_start : int , samp_end : int , sr : int
157
- ) -> AudioData :
159
+ ) -> np . ndarray :
158
160
input_buffer = io .BytesIO ()
159
- write (input_buffer , sr , samp_list [samp_start :samp_end ])
161
+ wavfile . write (input_buffer , sr , samp_list [samp_start :samp_end ])
160
162
input_buffer .seek (0 )
161
163
162
164
input_file = bv .open (input_buffer , "r" )
@@ -191,29 +193,18 @@ def process_audio_clip(
191
193
args .append (graph .add ("abuffersink" ))
192
194
graph .link_nodes (* args ).configure ()
193
195
196
+ all_frames = []
194
197
for frame in input_file .decode (input_stream ):
195
198
graph .push (frame )
196
199
while True :
197
200
try :
198
201
aframe = graph .pull ()
199
- assert isinstance (aframe , bv . AudioFrame )
200
- output_file . mux ( output_stream . encode ( aframe ))
202
+ assert isinstance (aframe , AudioFrame )
203
+ all_frames . append ( aframe . to_ndarray ( ))
201
204
except (bv .BlockingIOError , bv .EOFError ):
202
205
break
203
206
204
- # Flush the stream
205
- output_file .mux (output_stream .encode (None ))
206
-
207
- input_file .close ()
208
- output_file .close ()
209
-
210
- output_bytes .seek (0 )
211
- has_filesig = output_bytes .read (4 )
212
- output_bytes .seek (0 )
213
- if not has_filesig : # Can rarely happen when clip is extremely small
214
- return np .empty ((0 , 2 ), dtype = np .int16 )
215
-
216
- return read (output_bytes )[1 ]
207
+ return np .concatenate (all_frames , axis = 1 )
217
208
218
209
219
210
def mix_audio_files (sr : int , audio_paths : list [str ], output_path : str ) -> None :
@@ -278,7 +269,7 @@ def mix_audio_files(sr: int, audio_paths: list[str], output_path: str) -> None:
278
269
# Shape becomes (1, samples) for mono
279
270
chunk = np .array ([mixed_audio [i : i + chunk_size ]])
280
271
281
- frame = bv . AudioFrame .from_ndarray (chunk , format = "s16" , layout = "mono" )
272
+ frame = AudioFrame .from_ndarray (chunk , format = "s16" , layout = "mono" )
282
273
frame .rate = sr
283
274
frame .pts = i # Set presentation timestamp
284
275
@@ -288,8 +279,46 @@ def mix_audio_files(sr: int, audio_paths: list[str], output_path: str) -> None:
288
279
output_container .close ()
289
280
290
281
282
+ def file_to_ndarray (src : FileInfo , stream : int ) -> np .ndarray :
283
+ all_frames = []
284
+
285
+ # TODO: don't hardcode rate
286
+ resampler = bv .AudioResampler (
287
+ format = 's16p' ,
288
+ layout = 'stereo' ,
289
+ rate = 48000
290
+ )
291
+
292
+ with bv .open (src .path ) as container :
293
+ for frame in container .decode (audio = stream ):
294
+ for resampled_frame in resampler .resample (frame ):
295
+ all_frames .append (resampled_frame .to_ndarray ())
296
+
297
+ # all_frames.append(frame.to_ndarray())
298
+
299
+ return np .concatenate (all_frames , axis = 1 )
300
+
301
+
302
+ def ndarray_to_file (audio_data : np .ndarray , out : str | Path ) -> None :
303
+ rate = 48000
304
+ encoder = "pcm_s16le"
305
+ format = "s16p"
306
+ layout = "stereo"
307
+
308
+ with bv .open (out , mode = "w" ) as output :
309
+ stream = output .add_stream (encoder , rate = rate , layout = layout )
310
+
311
+ frame = bv .AudioFrame .from_ndarray (audio_data , format = format , layout = layout )
312
+ frame .rate = rate
313
+
314
+ output .mux (stream .encode (frame ))
315
+
316
+ # Flush the stream
317
+ output .mux (stream .encode (None ))
318
+
319
+
291
320
def make_new_audio (
292
- tl : v3 , ctr : Container , ensure : Ensure , args : Args , bar : Bar , log : Log
321
+ tl : v3 , ctr : Container , ensure : Ensure , args : Args , log : Log
293
322
) -> list [str ]:
294
323
sr = tl .sr
295
324
tb = tl .tb
@@ -304,17 +333,21 @@ def make_new_audio(
304
333
log .error ("Trying to render empty audio timeline" )
305
334
306
335
for i , layer in enumerate (tl .a ):
307
- bar .start (len (layer ), "Creating new audio" )
308
-
309
336
path = Path (temp , f"new{ i } .wav" )
310
337
output .append (f"{ path } " )
311
338
arr : AudioData | None = None
312
339
313
340
for c , clip in enumerate (layer ):
314
341
if (clip .src , clip .stream ) not in samples :
342
+
343
+ # log.conwrite("Writing audio to memeory")
344
+ # samples[(clip.src, clip.stream)] = file_to_ndarray(
345
+ # clip.src, clip.stream
346
+ # ).T.copy(order="C")
347
+
315
348
audio_path = ensure .audio (clip .src , clip .stream )
316
349
with open (audio_path , "rb" ) as file :
317
- samples [(clip .src , clip .stream )] = read (file )[1 ]
350
+ samples [(clip .src , clip .stream )] = wavfile . read (file )[1 ]
318
351
319
352
if arr is None :
320
353
leng = max (round ((layer [- 1 ].start + layer [- 1 ].dur ) * sr / tb ), sr // tb )
@@ -338,7 +371,7 @@ def make_new_audio(
338
371
samp_end = len (samp_list )
339
372
340
373
if clip .speed != 1 or clip .volume != 1 :
341
- clip_arr = process_audio_clip (clip , samp_list , samp_start , samp_end , sr )
374
+ clip_arr = process_audio_clip (clip , samp_list , samp_start , samp_end , sr ) #.T.copy(order="C")
342
375
else :
343
376
clip_arr = samp_list [samp_start :samp_end ]
344
377
@@ -352,21 +385,18 @@ def make_new_audio(
352
385
else :
353
386
arr [start : start + car_len ] += clip_arr
354
387
355
- bar .tick (c )
356
-
357
388
if arr is not None :
358
389
if norm is None :
390
+ # ndarray_to_file(arr.T, path)
359
391
with open (path , "wb" ) as fid :
360
- write (fid , sr , arr )
392
+ wavfile . write (fid , sr , arr )
361
393
else :
362
394
pre_master = Path (temp , "premaster.wav" )
395
+ # ndarray_to_file(arr.T.copy(order="C"), pre_master)
363
396
with open (pre_master , "wb" ) as fid :
364
- write (fid , sr , arr )
365
-
397
+ wavfile .write (fid , sr , arr )
366
398
apply_audio_normalization (norm , pre_master , path , log )
367
399
368
- bar .end ()
369
-
370
400
try :
371
401
Path (temp , "asdf.map" ).unlink (missing_ok = True )
372
402
except PermissionError :
0 commit comments