Use pyav for extracting audio

WyattBlue · WyattBlue · commit f61990919521 · 2024-07-01T04:47:09.000-04:00
diff --git a/auto_editor/edit.py b/auto_editor/edit.py
@@ -209,7 +209,7 @@ def edit_media(
     else:
         samplerate = args.sample_rate
 
-    ensure = Ensure(ffmpeg, samplerate, temp, log)
+    ensure = Ensure(ffmpeg, bar, samplerate, temp, log)
 
     if tl is None:
         tl = make_timeline(sources, ensure, args, samplerate, bar, temp, log)
diff --git a/auto_editor/output.py b/auto_editor/output.py
@@ -4,15 +4,22 @@
 from dataclasses import dataclass, field
 from fractions import Fraction
 
+import av
+from av.audio.resampler import AudioResampler
+
 from auto_editor.ffwrapper import FFmpeg, FileInfo
+from auto_editor.utils.bar import Bar
 from auto_editor.utils.container import Container
 from auto_editor.utils.log import Log
 from auto_editor.utils.types import Args
 
+av.logging.set_level(av.logging.VERBOSE)
+
 
 @dataclass(slots=True)
 class Ensure:
     _ffmpeg: FFmpeg
+    _bar: Bar
     _sr: int
     temp: str
     log: Log
@@ -31,12 +38,42 @@ def audio(self, src: FileInfo, stream: int) -> str:
         out_path = os.path.join(self.temp, f"{label:x}.wav")
 
         if first_time:
+            sample_rate = self._sr
+            bar = self._bar
             self.log.debug(f"Making external audio: {out_path}")
-            self.log.conwrite("Extracting audio")
 
-            cmd = ["-i", f"{src.path}", "-map", f"0:a:{stream}"]
-            cmd += ["-ac", "2", "-ar", f"{self._sr}", "-rf64", "always", out_path]
-            self._ffmpeg.run(cmd)
+            in_container = av.open(src.path, "r")
+            out_container = av.open(
+                out_path, "w", format="wav", options={"rf64": "always"}
+            )
+            astream = in_container.streams.audio[stream]
+
+            if astream.duration is None or astream.time_base is None:
+                dur = 0
+            else:
+                dur = int(astream.duration * astream.time_base)
+
+            bar.start(dur, "Extracting audio")
+
+            # PyAV always uses "stereo" layout, which is what we want.
+            output_astream = out_container.add_stream("pcm_s16le", rate=sample_rate)
+            assert isinstance(output_astream, av.audio.stream.AudioStream)
+
+            resampler = AudioResampler(format="s16", layout="stereo", rate=sample_rate)  # type: ignore
+            for i, frame in enumerate(in_container.decode(astream)):
+                if i % 1500 == 0:
+                    bar.tick(0 if frame.time is None else frame.time)
+
+                for new_frame in resampler.resample(frame):
+                    for packet in output_astream.encode(new_frame):
+                        out_container.mux_one(packet)
+
+            for packet in output_astream.encode():
+                out_container.mux_one(packet)
+
+            out_container.close()
+            in_container.close()
+            bar.end()
 
         return out_path
 
diff --git a/auto_editor/subcommands/levels.py b/auto_editor/subcommands/levels.py
@@ -85,7 +85,7 @@ def main(sys_args: list[str] = sys.argv[1:]) -> None:
     src = sources[0]
 
     tb = src.get_fps() if args.timebase is None else args.timebase
-    ensure = Ensure(ffmpeg, src.get_sr(), temp, log)
+    ensure = Ensure(ffmpeg, bar, src.get_sr(), temp, log)
 
     if ":" in args.edit:
         method, attrs = args.edit.split(":", 1)
diff --git a/auto_editor/subcommands/repl.py b/auto_editor/subcommands/repl.py
@@ -73,8 +73,8 @@ def main(sys_args: list[str] = sys.argv[1:]) -> None:
         sources = [initFileInfo(path, log) for path in args.input]
         src = sources[0]
         tb = src.get_fps() if args.timebase is None else args.timebase
-        ensure = Ensure(ffmpeg, src.get_sr(), temp, log)
         bar = Bar("none")
+        ensure = Ensure(ffmpeg, bar, src.get_sr(), temp, log)
         env["timebase"] = tb
         env["@levels"] = Levels(ensure, src, tb, bar, temp, log)
         env["@filesetup"] = FileSetup(src, ensure, strict, tb, bar, temp, log)