WIP numpy array for audio

WyattBlue · WyattBlue · commit 6eb2cb2beaab · 2025-04-13T01:48:48.000-04:00
diff --git a/auto_editor/__init__.py b/auto_editor/__init__.py
@@ -1 +1 @@
-__version__ = "27.0.0"
+__version__ = "27.0.1"
diff --git a/auto_editor/edit.py b/auto_editor/edit.py
@@ -305,7 +305,7 @@ def make_media(tl: v3, output_path: str) -> None:
         # Setup audio
         if ctr.default_aud != "none":
             ensure = Ensure(bar, samplerate, log)
-            audio_paths = make_new_audio(tl, ctr, ensure, args, bar, log)
+            audio_paths = make_new_audio(tl, ctr, ensure, args, log)
         else:
             audio_paths = []
 
diff --git a/auto_editor/render/audio.py b/auto_editor/render/audio.py
@@ -6,24 +6,26 @@
 
 import bv
 import numpy as np
+from bv import AudioFrame
 from bv.filter.loudnorm import stats
 
+from auto_editor import wavfile
 from auto_editor.ffwrapper import FileInfo
 from auto_editor.json import load
 from auto_editor.lang.palet import env
 from auto_editor.lib.contracts import andc, between_c, is_int_or_float
 from auto_editor.lib.err import MyError
 from auto_editor.output import Ensure
 from auto_editor.timeline import TlAudio, v3
-from auto_editor.utils.bar import Bar
 from auto_editor.utils.cmdkw import ParserError, parse_with_palet, pAttr, pAttrs
 from auto_editor.utils.container import Container
 from auto_editor.utils.log import Log
-from auto_editor.wavfile import AudioData, read, write
 
 if TYPE_CHECKING:
     from auto_editor.__main__ import Args
 
+    AudioData = np.ndarray
+
 norm_types = {
     "ebu": pAttrs(
         "ebu",
@@ -106,7 +108,7 @@ def apply_audio_normalization(
     else:
         assert "t" in norm
 
-        def get_peak_level(frame: bv.AudioFrame) -> float:
+        def get_peak_level(frame: AudioFrame) -> float:
             # Calculate peak level in dB
             # Should be equivalent to: -af astats=measure_overall=Peak_level:measure_perchannel=0
             max_amplitude = np.abs(frame.to_ndarray()).max()
@@ -143,7 +145,7 @@ def get_peak_level(frame: bv.AudioFrame) -> float:
             while True:
                 try:
                     aframe = graph.pull()
-                    assert isinstance(aframe, bv.AudioFrame)
+                    assert isinstance(aframe, AudioFrame)
                     output_file.mux(output_stream.encode(aframe))
                 except (bv.BlockingIOError, bv.EOFError):
                     break
@@ -154,9 +156,9 @@ def get_peak_level(frame: bv.AudioFrame) -> float:
 
 def process_audio_clip(
     clip: TlAudio, samp_list: AudioData, samp_start: int, samp_end: int, sr: int
-) -> AudioData:
+) -> np.ndarray:
     input_buffer = io.BytesIO()
-    write(input_buffer, sr, samp_list[samp_start:samp_end])
+    wavfile.write(input_buffer, sr, samp_list[samp_start:samp_end])
     input_buffer.seek(0)
 
     input_file = bv.open(input_buffer, "r")
@@ -191,29 +193,18 @@ def process_audio_clip(
     args.append(graph.add("abuffersink"))
     graph.link_nodes(*args).configure()
 
+    all_frames = []
     for frame in input_file.decode(input_stream):
         graph.push(frame)
         while True:
             try:
                 aframe = graph.pull()
-                assert isinstance(aframe, bv.AudioFrame)
-                output_file.mux(output_stream.encode(aframe))
+                assert isinstance(aframe, AudioFrame)
+                all_frames.append(aframe.to_ndarray())
             except (bv.BlockingIOError, bv.EOFError):
                 break
 
-    # Flush the stream
-    output_file.mux(output_stream.encode(None))
-
-    input_file.close()
-    output_file.close()
-
-    output_bytes.seek(0)
-    has_filesig = output_bytes.read(4)
-    output_bytes.seek(0)
-    if not has_filesig:  # Can rarely happen when clip is extremely small
-        return np.empty((0, 2), dtype=np.int16)
-
-    return read(output_bytes)[1]
+    return np.concatenate(all_frames, axis=1)
 
 
 def mix_audio_files(sr: int, audio_paths: list[str], output_path: str) -> None:
@@ -278,7 +269,7 @@ def mix_audio_files(sr: int, audio_paths: list[str], output_path: str) -> None:
         # Shape becomes (1, samples) for mono
         chunk = np.array([mixed_audio[i : i + chunk_size]])
 
-        frame = bv.AudioFrame.from_ndarray(chunk, format="s16", layout="mono")
+        frame = AudioFrame.from_ndarray(chunk, format="s16", layout="mono")
         frame.rate = sr
         frame.pts = i  # Set presentation timestamp
 
@@ -288,8 +279,46 @@ def mix_audio_files(sr: int, audio_paths: list[str], output_path: str) -> None:
     output_container.close()
 
 
+def file_to_ndarray(src: FileInfo, stream: int) -> np.ndarray:
+    all_frames = []
+
+    # TODO: don't hardcode rate
+    resampler = bv.AudioResampler(
+        format='s16p',
+        layout='stereo',
+        rate=48000
+    )
+
+    with bv.open(src.path) as container:
+        for frame in container.decode(audio=stream):
+            for resampled_frame in resampler.resample(frame):
+                all_frames.append(resampled_frame.to_ndarray())
+
+            # all_frames.append(frame.to_ndarray())
+
+    return np.concatenate(all_frames, axis=1)
+
+
+def ndarray_to_file(audio_data: np.ndarray, out: str | Path) -> None:
+    rate = 48000
+    encoder = "pcm_s16le"
+    format = "s16p"
+    layout = "stereo"
+
+    with bv.open(out, mode="w") as output:
+        stream = output.add_stream(encoder, rate=rate, layout=layout)
+
+        frame = bv.AudioFrame.from_ndarray(audio_data, format=format, layout=layout)
+        frame.rate = rate
+
+        output.mux(stream.encode(frame))
+
+        # Flush the stream
+        output.mux(stream.encode(None))
+
+
 def make_new_audio(
-    tl: v3, ctr: Container, ensure: Ensure, args: Args, bar: Bar, log: Log
+    tl: v3, ctr: Container, ensure: Ensure, args: Args, log: Log
 ) -> list[str]:
     sr = tl.sr
     tb = tl.tb
@@ -304,17 +333,21 @@ def make_new_audio(
         log.error("Trying to render empty audio timeline")
 
     for i, layer in enumerate(tl.a):
-        bar.start(len(layer), "Creating new audio")
-
         path = Path(temp, f"new{i}.wav")
         output.append(f"{path}")
         arr: AudioData | None = None
 
         for c, clip in enumerate(layer):
             if (clip.src, clip.stream) not in samples:
+
+                # log.conwrite("Writing audio to memeory")
+                # samples[(clip.src, clip.stream)] = file_to_ndarray(
+                #     clip.src, clip.stream
+                # ).T.copy(order="C")
+
                 audio_path = ensure.audio(clip.src, clip.stream)
                 with open(audio_path, "rb") as file:
-                    samples[(clip.src, clip.stream)] = read(file)[1]
+                    samples[(clip.src, clip.stream)] = wavfile.read(file)[1]
 
             if arr is None:
                 leng = max(round((layer[-1].start + layer[-1].dur) * sr / tb), sr // tb)
@@ -338,7 +371,7 @@ def make_new_audio(
                 samp_end = len(samp_list)
 
             if clip.speed != 1 or clip.volume != 1:
-                clip_arr = process_audio_clip(clip, samp_list, samp_start, samp_end, sr)
+                clip_arr = process_audio_clip(clip, samp_list, samp_start, samp_end, sr)  #.T.copy(order="C")
             else:
                 clip_arr = samp_list[samp_start:samp_end]
 
@@ -352,21 +385,18 @@ def make_new_audio(
             else:
                 arr[start : start + car_len] += clip_arr
 
-            bar.tick(c)
-
         if arr is not None:
             if norm is None:
+                # ndarray_to_file(arr.T, path)
                 with open(path, "wb") as fid:
-                    write(fid, sr, arr)
+                    wavfile.write(fid, sr, arr)
             else:
                 pre_master = Path(temp, "premaster.wav")
+                # ndarray_to_file(arr.T.copy(order="C"), pre_master)
                 with open(pre_master, "wb") as fid:
-                    write(fid, sr, arr)
-
+                    wavfile.write(fid, sr, arr)
                 apply_audio_normalization(norm, pre_master, path, log)
 
-        bar.end()
-
     try:
         Path(temp, "asdf.map").unlink(missing_ok=True)
     except PermissionError:
diff --git a/auto_editor/wavfile.py b/auto_editor/wavfile.py
@@ -250,6 +250,8 @@ def read(fid: Reader) -> tuple[int, AudioData]:
 
 
 def write(fid: Writer, sr: int, arr: np.ndarray) -> None:
+    # arr.shape is (samples, channels).
+
     channels = 1 if arr.ndim == 1 else arr.shape[1]
     bit_depth = arr.dtype.itemsize * 8
     block_align = channels * (bit_depth // 8)

Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-__version__ = "27.0.0"`
	`1`	`+__version__ = "27.0.1"`