|
3 | 3 | import os
|
4 | 4 | import sys
|
5 | 5 | from fractions import Fraction
|
| 6 | +from heapq import heappop, heappush |
6 | 7 | from os.path import splitext
|
7 | 8 | from subprocess import run
|
8 | 9 | from typing import Any
|
@@ -389,40 +390,107 @@ def make_media(tl: v3, output_path: str) -> None:
|
389 | 390 | title += "\033[0m+".join(encoder_titles) + "\033[0m"
|
390 | 391 | bar.start(tl.end, title)
|
391 | 392 |
|
392 |
| - # Process frames |
| 393 | + MAX_AUDIO_AHEAD = 30 # In timebase, how far audio can be ahead of video. |
| 394 | + MAX_SUB_AHEAD = 30 |
| 395 | + |
| 396 | + class Priority: |
| 397 | + __slots__ = ("index", "frame_type", "frame", "stream") |
| 398 | + |
| 399 | + def __init__(self, value: int | Fraction, frame, stream): |
| 400 | + self.frame_type: str = stream.type |
| 401 | + assert self.frame_type in ("audio", "subtitle", "video") |
| 402 | + if self.frame_type in {"audio", "subtitle"}: |
| 403 | + self.index: int | float = round(value * frame.time_base * tl.tb) |
| 404 | + else: |
| 405 | + self.index = float("inf") if value is None else int(value) |
| 406 | + self.frame = frame |
| 407 | + self.stream = stream |
| 408 | + |
| 409 | + def __lt__(self, other): |
| 410 | + return self.index < other.index |
| 411 | + |
| 412 | + def __eq__(self, other): |
| 413 | + return self.index == other.index |
| 414 | + |
| 415 | + # Priority queue for ordered frames by time_base. |
| 416 | + frame_queue: list[Priority] = [] |
| 417 | + latest_audio_index = float("-inf") |
| 418 | + latest_sub_index = float("-inf") |
| 419 | + earliest_video_index = None |
| 420 | + |
393 | 421 | while True:
|
394 |
| - audio_frames = [next(frames, None) for frames in audio_gen_frames] |
| 422 | + if earliest_video_index is None: |
| 423 | + should_get_audio = True |
| 424 | + should_get_sub = True |
| 425 | + else: |
| 426 | + for item in frame_queue: |
| 427 | + if item.frame_type == "audio": |
| 428 | + latest_audio_index = max(latest_audio_index, item.index) |
| 429 | + elif item.frame_type == "subtitle": |
| 430 | + latest_sub_index = max(latest_sub_index, item.index) |
| 431 | + |
| 432 | + should_get_audio = ( |
| 433 | + latest_audio_index <= earliest_video_index + MAX_AUDIO_AHEAD |
| 434 | + ) |
| 435 | + should_get_sub = ( |
| 436 | + latest_sub_index <= earliest_video_index + MAX_SUB_AHEAD |
| 437 | + ) |
| 438 | + |
395 | 439 | index, video_frame = next(vframes, (0, None))
|
396 |
| - subtitle_frames = [next(packet, None) for packet in sub_gen_frames] |
| 440 | + |
| 441 | + if video_frame: |
| 442 | + earliest_video_index = index |
| 443 | + heappush(frame_queue, Priority(index, video_frame, output_stream)) |
| 444 | + |
| 445 | + if should_get_audio: |
| 446 | + audio_frames = [next(frames, None) for frames in audio_gen_frames] |
| 447 | + else: |
| 448 | + audio_frames = [None] |
| 449 | + if should_get_sub: |
| 450 | + subtitle_frames = [next(packet, None) for packet in sub_gen_frames] |
| 451 | + else: |
| 452 | + subtitle_frames = [None] |
| 453 | + |
| 454 | + # Break if no more frames |
397 | 455 | if (
|
398 | 456 | all(frame is None for frame in audio_frames)
|
399 | 457 | and video_frame is None
|
400 | 458 | and all(packet is None for packet in subtitle_frames)
|
401 | 459 | ):
|
402 | 460 | break
|
403 | 461 |
|
404 |
| - if video_frame: |
| 462 | + if should_get_audio: |
| 463 | + for audio_stream, audio_frame in zip(audio_streams, audio_frames): |
| 464 | + for reframe in resampler.resample(audio_frame): |
| 465 | + assert reframe.pts is not None |
| 466 | + heappush( |
| 467 | + frame_queue, |
| 468 | + Priority(reframe.pts, reframe, audio_stream), |
| 469 | + ) |
| 470 | + if should_get_sub: |
| 471 | + for subtitle_stream, packet in zip(subtitle_streams, subtitle_frames): |
| 472 | + if packet and packet.pts is not None: |
| 473 | + packet.stream = subtitle_stream |
| 474 | + heappush( |
| 475 | + frame_queue, Priority(packet.pts, packet, subtitle_stream) |
| 476 | + ) |
| 477 | + |
| 478 | + while frame_queue and frame_queue[0].index <= index: |
| 479 | + item = heappop(frame_queue) |
| 480 | + frame_type = item.frame_type |
405 | 481 | try:
|
406 |
| - output.mux(output_stream.encode(video_frame)) |
| 482 | + if frame_type in {"video", "audio"}: |
| 483 | + output.mux(item.stream.encode(item.frame)) |
| 484 | + elif frame_type == "subtitle": |
| 485 | + output.mux(item.frame) |
407 | 486 | except av.error.ExternalError:
|
408 | 487 | log.error(
|
409 |
| - f"Generic error for encoder: {output_stream.name}\n" |
410 |
| - "Perhaps video quality settings are too low?" |
| 488 | + f"Generic error for encoder: {item.stream.name}\n" |
| 489 | + f"at {item.index} time_base\nPerhaps video quality settings are too low?" |
411 | 490 | )
|
412 | 491 | except av.FFmpegError as e:
|
413 | 492 | log.error(e)
|
414 | 493 |
|
415 |
| - for audio_stream, audio_frame in zip(audio_streams, audio_frames): |
416 |
| - if audio_frame: |
417 |
| - for reframe in resampler.resample(audio_frame): |
418 |
| - output.mux(audio_stream.encode(reframe)) |
419 |
| - |
420 |
| - for subtitle_stream, packet in zip(subtitle_streams, subtitle_frames): |
421 |
| - if not packet or packet.dts is None: |
422 |
| - continue |
423 |
| - packet.stream = subtitle_stream |
424 |
| - output.mux(packet) |
425 |
| - |
426 | 494 | bar.tick(index)
|
427 | 495 |
|
428 | 496 | # Flush streams
|
|
0 commit comments