1
1
from __future__ import annotations
2
2
3
- import io
4
- import struct
5
- import sys
6
3
from fractions import Fraction
4
+ from io import BytesIO
7
5
from pathlib import Path
8
6
from typing import TYPE_CHECKING
9
7
19
17
from auto_editor .lib .err import MyError
20
18
from auto_editor .timeline import TlAudio , v3
21
19
from auto_editor .utils .cmdkw import ParserError , parse_with_palet , pAttr , pAttrs
22
- from auto_editor .utils .container import Container
23
20
from auto_editor .utils .func import parse_bitrate
24
21
from auto_editor .utils .log import Log
25
22
29
26
30
27
from auto_editor .__main__ import Args
31
28
32
- Reader = io .BufferedReader | io .BytesIO
33
- Writer = io .BufferedWriter | io .BytesIO
34
29
AudioData = np .ndarray
35
30
36
31
@@ -101,59 +96,6 @@ def parse_ebu_bytes(norm: dict, stat: bytes, log: Log) -> tuple[str, str]:
101
96
return "loudnorm" , filter
102
97
103
98
104
- def wavfile_write (fid : Writer , sr : int , arr : np .ndarray ) -> None :
105
- # arr.shape is (samples, channels).
106
-
107
- def _handle_pad_byte (fid : Reader , size : int ) -> None :
108
- if size % 2 == 1 :
109
- fid .seek (1 , 1 )
110
-
111
- PCM = 0x0001
112
- IEEE_FLOAT = 0x0003
113
-
114
- channels = 1 if arr .ndim == 1 else arr .shape [1 ]
115
- bit_depth = arr .dtype .itemsize * 8
116
- block_align = channels * (bit_depth // 8 )
117
- data_size = arr .nbytes
118
- total_size = 44 + data_size # Basic WAV header size + data size
119
-
120
- if is_rf64 := total_size > 0xFFFFFFFF :
121
- fid .write (b"RF64\xff \xff \xff \xff WAVE" )
122
- ds64_size = 28
123
- ds64_chunk_data = (0 ).to_bytes (ds64_size , "little" ) # placeholder values
124
- fid .write (b"ds64" + struct .pack ("<I" , ds64_size ) + ds64_chunk_data )
125
- else :
126
- fid .write (b"RIFF" + struct .pack ("<I" , total_size - 8 ) + b"WAVE" )
127
-
128
- dkind = arr .dtype .kind
129
- format_tag = IEEE_FLOAT if dkind == "f" else PCM
130
-
131
- fmt_chunk_data = struct .pack (
132
- "<HHIIHH" , format_tag , channels , sr , 0 , block_align , bit_depth
133
- )
134
- fid .write (b"fmt " + struct .pack ("<I" , len (fmt_chunk_data )) + fmt_chunk_data )
135
-
136
- # Data chunk
137
- fid .write (b"data" )
138
- fid .write (struct .pack ("<I" , 0xFFFFFFFF if is_rf64 else data_size ))
139
-
140
- if arr .dtype .byteorder == ">" or (
141
- arr .dtype .byteorder == "=" and sys .byteorder == "big"
142
- ):
143
- arr = arr .byteswap ()
144
- fid .write (arr .ravel ().view ("b" ).data )
145
-
146
- if is_rf64 :
147
- end_position = fid .tell ()
148
- fid .seek (16 ) # Position at the start of 'ds64' chunk size
149
-
150
- file_size = end_position - 20
151
- fid .write (struct .pack ("<I" , ds64_size ))
152
- fid .write (file_size .to_bytes (8 , "little" ) + data_size .to_bytes (8 , "little" ))
153
-
154
- fid .seek (end_position )
155
-
156
-
157
99
def apply_audio_normalization (
158
100
norm : dict , pre_master : Path , path : Path , log : Log
159
101
) -> None :
@@ -218,17 +160,30 @@ def get_peak_level(frame: AudioFrame) -> float:
218
160
def process_audio_clip (
219
161
clip : TlAudio , samp_list : AudioData , samp_start : int , samp_end : int , sr : int
220
162
) -> np .ndarray :
221
- samp_list = samp_list .T .copy (order = "C" )
163
+ to_s16 = bv .AudioResampler (format = "s16" , layout = "stereo" , rate = sr )
164
+ input_buffer = BytesIO ()
165
+
166
+ with bv .open (input_buffer , "w" , format = "wav" ) as container :
167
+ output_stream = container .add_stream (
168
+ "pcm_s16le" , sample_rate = sr , format = "s16" , layout = "stereo"
169
+ )
170
+
171
+ frame = AudioFrame .from_ndarray (
172
+ samp_list [:, samp_start :samp_end ], format = "s16p" , layout = "stereo"
173
+ )
174
+ frame .rate = sr
175
+
176
+ for reframe in to_s16 .resample (frame ):
177
+ container .mux (output_stream .encode (reframe ))
178
+ container .mux (output_stream .encode (None ))
222
179
223
- input_buffer = io .BytesIO ()
224
- wavfile_write (input_buffer , sr , samp_list [samp_start :samp_end ])
225
180
input_buffer .seek (0 )
226
181
227
182
input_file = bv .open (input_buffer , "r" )
228
183
input_stream = input_file .streams .audio [0 ]
229
184
230
185
graph = bv .filter .Graph ()
231
- args = [graph .add_abuffer (sample_rate = sr , format = "s16" , layout = "stereo" )]
186
+ args = [graph .add_abuffer (template = input_stream )]
232
187
233
188
if clip .speed != 1 :
234
189
if clip .speed > 10_000 :
@@ -390,7 +345,6 @@ def make_new_audio(
390
345
output : bv .container .OutputContainer ,
391
346
audio_format : bv .AudioFormat ,
392
347
tl : v3 ,
393
- ctr : Container ,
394
348
args : Args ,
395
349
log : Log ,
396
350
) -> tuple [list [bv .AudioStream ], list [Iterator [AudioFrame ]]]:
@@ -405,8 +359,9 @@ def make_new_audio(
405
359
for i , audio_path in enumerate (audio_paths ):
406
360
audio_stream = output .add_stream (
407
361
args .audio_codec ,
408
- format = audio_format ,
409
362
rate = tl .sr ,
363
+ format = audio_format ,
364
+ layout = "stereo" ,
410
365
time_base = Fraction (1 , tl .sr ),
411
366
)
412
367
if not isinstance (audio_stream , bv .AudioStream ):
@@ -484,7 +439,10 @@ def _make_new_audio(tl: v3, fmt: bv.AudioFormat, args: Args, log: Log) -> list[A
484
439
485
440
if arr is not None :
486
441
if norm is None :
487
- use_iter = True
442
+ if args .mix_audio_streams :
443
+ ndarray_to_file (arr , sr , path )
444
+ else :
445
+ use_iter = True
488
446
else :
489
447
pre_master = Path (temp , "premaster.wav" )
490
448
ndarray_to_file (arr , sr , pre_master )
0 commit comments