3
3
import io
4
4
import struct
5
5
import sys
6
+ from fractions import Fraction
6
7
from pathlib import Path
7
8
from typing import TYPE_CHECKING
8
9
19
20
from auto_editor .timeline import TlAudio , v3
20
21
from auto_editor .utils .cmdkw import ParserError , parse_with_palet , pAttr , pAttrs
21
22
from auto_editor .utils .container import Container
23
+ from auto_editor .utils .func import parse_bitrate
22
24
from auto_editor .utils .log import Log
23
25
24
26
if TYPE_CHECKING :
27
+ from collections .abc import Iterator
28
+ from typing import Any
29
+
25
30
from auto_editor .__main__ import Args
26
31
27
32
Reader = io .BufferedReader | io .BytesIO
@@ -364,23 +369,85 @@ def ndarray_to_file(audio_data: np.ndarray, rate: int, out: str | Path) -> None:
364
369
output .mux (stream .encode (None ))
365
370
366
371
367
- def make_new_audio (tl : v3 , ctr : Container , args : Args , log : Log ) -> list [str ]:
372
+ def ndarray_to_iter (
373
+ audio_data : np .ndarray , fmt : bv .AudioFormat , rate : int
374
+ ) -> Iterator [AudioFrame ]:
375
+ chunk_size = rate // 4 # Process 0.25 seconds at a time
376
+
377
+ resampler = bv .AudioResampler (rate = rate , format = fmt , layout = "stereo" )
378
+ for i in range (0 , audio_data .shape [1 ], chunk_size ):
379
+ chunk = audio_data [:, i : i + chunk_size ]
380
+
381
+ frame = AudioFrame .from_ndarray (chunk , format = "s16p" , layout = "stereo" )
382
+ frame .rate = rate
383
+ # frame.time_base = Fraction(1, rate)
384
+ frame .pts = i
385
+
386
+ yield from resampler .resample (frame )
387
+
388
+
389
+ def make_new_audio (
390
+ output : bv .container .OutputContainer ,
391
+ audio_format : bv .AudioFormat ,
392
+ tl : v3 ,
393
+ ctr : Container ,
394
+ args : Args ,
395
+ log : Log ,
396
+ ) -> tuple [list [bv .AudioStream ], list [Iterator [AudioFrame ]]]:
397
+ audio_inputs = []
398
+ audio_gen_frames = []
399
+ audio_streams : list [bv .AudioStream ] = []
400
+ audio_paths = _make_new_audio (tl , audio_format , args , log )
401
+
402
+ src = tl .src
403
+ assert src is not None
404
+
405
+ for i , audio_path in enumerate (audio_paths ):
406
+ audio_stream = output .add_stream (
407
+ args .audio_codec ,
408
+ format = audio_format ,
409
+ rate = tl .sr ,
410
+ time_base = Fraction (1 , tl .sr ),
411
+ )
412
+ if not isinstance (audio_stream , bv .AudioStream ):
413
+ log .error (f"Not a known audio codec: { args .audio_codec } " )
414
+
415
+ if args .audio_bitrate != "auto" :
416
+ audio_stream .bit_rate = parse_bitrate (args .audio_bitrate , log )
417
+ log .debug (f"audio bitrate: { audio_stream .bit_rate } " )
418
+ else :
419
+ log .debug (f"[auto] audio bitrate: { audio_stream .bit_rate } " )
420
+ if i < len (src .audios ) and src .audios [i ].lang is not None :
421
+ audio_stream .metadata ["language" ] = src .audios [i ].lang # type: ignore
422
+
423
+ audio_streams .append (audio_stream )
424
+
425
+ if isinstance (audio_path , str ):
426
+ audio_input = bv .open (audio_path )
427
+ audio_inputs .append (audio_input )
428
+ audio_gen_frames .append (audio_input .decode (audio = 0 ))
429
+ else :
430
+ audio_gen_frames .append (audio_path )
431
+
432
+ return audio_streams , audio_gen_frames
433
+
434
+
435
+ def _make_new_audio (tl : v3 , fmt : bv .AudioFormat , args : Args , log : Log ) -> list [Any ]:
368
436
sr = tl .sr
369
437
tb = tl .tb
370
- output : list [str ] = []
438
+ output : list [Any ] = []
371
439
samples : dict [tuple [FileInfo , int ], AudioData ] = {}
372
440
373
441
norm = parse_norm (args .audio_normalize , log )
374
-
375
442
temp = log .temp
376
443
377
444
if not tl .a [0 ]:
378
445
log .error ("Trying to render empty audio timeline" )
379
446
380
447
for i , layer in enumerate (tl .a ):
381
448
path = Path (temp , f"new{ i } .wav" )
382
- output .append (f"{ path } " )
383
449
arr : AudioData | None = None
450
+ use_iter = False
384
451
385
452
for c , clip in enumerate (layer ):
386
453
if (clip .src , clip .stream ) not in samples :
@@ -391,19 +458,8 @@ def make_new_audio(tl: v3, ctr: Container, args: Args, log: Log) -> list[str]:
391
458
392
459
log .conwrite ("Creating audio" )
393
460
if arr is None :
394
- dtype = np .int32
395
- for _samp_arr in samples .values ():
396
- dtype = _samp_arr .dtype
397
- break
398
-
399
461
leng = max (round ((layer [- 1 ].start + layer [- 1 ].dur ) * sr / tb ), sr // tb )
400
- arr = np .memmap (
401
- Path (temp , "asdf.map" ),
402
- mode = "w+" ,
403
- dtype = dtype ,
404
- shape = (2 , leng ),
405
- )
406
- del leng
462
+ arr = np .zeros (shape = (2 , leng ), dtype = np .int16 )
407
463
408
464
samp_list = samples [(clip .src , clip .stream )]
409
465
@@ -428,19 +484,20 @@ def make_new_audio(tl: v3, ctr: Container, args: Args, log: Log) -> list[str]:
428
484
429
485
if arr is not None :
430
486
if norm is None :
431
- ndarray_to_file ( arr , sr , path )
487
+ use_iter = True
432
488
else :
433
489
pre_master = Path (temp , "premaster.wav" )
434
490
ndarray_to_file (arr , sr , pre_master )
435
491
apply_audio_normalization (norm , pre_master , path , log )
436
492
437
- try :
438
- Path ( temp , "asdf.map" ). unlink ( missing_ok = True )
439
- except PermissionError :
440
- pass
493
+ if use_iter and arr is not None :
494
+ output . append ( ndarray_to_iter ( arr , fmt , sr ) )
495
+ else :
496
+ output . append ( f" { path } " )
441
497
442
498
if args .mix_audio_streams and len (output ) > 1 :
443
499
new_a_file = f"{ Path (temp , 'new_audio.wav' )} "
444
500
mix_audio_files (sr , output , new_a_file )
445
501
return [new_a_file ]
502
+
446
503
return output
0 commit comments