Skip to content

Commit

Permalink
Convert subtitles to another format (#1207)
Browse files Browse the repository at this point in the history
* add Subtitle.convert method

* fixes and add tests

* add a --subtitle-format CLI option to force converting subtitles to another format

* add news

* use video FPS if subtitle has no FPS defined
  • Loading branch information
getzze authored Feb 25, 2025
1 parent 4ce6f6d commit 26c5d27
Show file tree
Hide file tree
Showing 5 changed files with 346 additions and 5 deletions.
1 change: 1 addition & 0 deletions changelog.d/536.change.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add a --subtitle-format CLI option to force converting subtitles to another format
10 changes: 10 additions & 0 deletions src/subliminal/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -451,6 +451,14 @@ def cache(ctx: click.Context, clear_subliminal: bool) -> None:
default='utf-8',
help='Force subtitle file encoding, set to an empty string to preserve the original encoding. Default is utf-8.',
)
@click.option(
'-F',
'--subtitle-format',
type=click.STRING,
metavar='FORMAT',
default='',
help="Force subtitle format, set to an empty string to preserve the original format. Default is ''.",
)
@click.option(
'-s',
'--single',
Expand Down Expand Up @@ -551,6 +559,7 @@ def download(
use_ctime: bool,
directory: str | None,
encoding: str | None,
subtitle_format: str | None,
single: bool,
force: bool,
hearing_impaired: tuple[bool | None, ...],
Expand Down Expand Up @@ -762,6 +771,7 @@ def download(
single=single,
directory=directory,
encoding=encoding,
subtitle_format=subtitle_format,
language_type_suffix=language_type_suffix,
language_format=language_format,
)
Expand Down
8 changes: 8 additions & 0 deletions src/subliminal/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -791,6 +791,7 @@ def save_subtitles(
single: bool = False,
directory: str | os.PathLike | None = None,
encoding: str | None = None,
subtitle_format: str | None = None,
extension: str | None = None,
language_type_suffix: bool = False,
language_format: str = 'alpha2',
Expand All @@ -810,6 +811,7 @@ def save_subtitles(
:param bool single: save a single subtitle, default is to save one subtitle per language.
:param str directory: path to directory where to save the subtitles, default is next to the video.
:param str encoding: encoding in which to save the subtitles, default is to keep original encoding.
:param str subtitle_format: format in which to save the subtitles, default is to keep original format.
:param (str | None) extension: the subtitle extension, default is to match to the subtitle format.
:param bool language_type_suffix: add a suffix 'hi' or 'fo' if needed. Default to False.
:param str language_format: format of the language suffix. Default to 'alpha2'.
Expand All @@ -829,6 +831,12 @@ def save_subtitles(
logger.debug('Skipping subtitle %r: language already saved', subtitle)
continue

# convert subtitle to a new format
if subtitle_format:
# Use the video FPS if the FPS of the subtitle is not defined
fps = video.frame_rate if subtitle.fps is None else None
subtitle.convert(subtitle_format, output_encoding=encoding, fps=fps)

# create subtitle path
subtitle_path = subtitle.get_path(
video,
Expand Down
101 changes: 96 additions & 5 deletions src/subliminal/subtitle.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@ def _decode_content(self) -> str:
# Decode
return self.content.decode(self.encoding, errors='replace')

def reencode(self, encoding: str = 'utf-8') -> bool:
def reencode(self, text: str | None = None, encoding: str = 'utf-8') -> bool:
"""Re-encode the subtitle raw content using the specified encoding.
:param str encoding: the new encoding of the raw content (default to 'utf-8').
Expand All @@ -251,7 +251,8 @@ def reencode(self, encoding: str = 'utf-8') -> bool:
"""
# Compute self._text by calling the property
text = self.text
if text is None:
text = self.text

# Text is empty, maybe because the content was not decoded.
# Reencoding would erase the content, so return.
Expand All @@ -266,10 +267,99 @@ def reencode(self, encoding: str = 'utf-8') -> bool:
return False

# Save the new encoding and new raw content
self.clear_content()
self.encoding = encoding
self._content = new_content
return True

def convert(
self,
output_format: str = 'srt',
output_encoding: str | None = 'utf-8',
fps: float | None = None,
) -> bool:
"""Convert the subtitle to a given format.
:param str output_format: the new subtitle format (default to 'srt').
:param (str | None) output_encoding: specify the encoding, do not change if None (default to None).
:param (float | None) fps: the frame rate used to convert from/to a frame rate based subtitle (default to None).
:return: False if the conversion raised an error.
:rtype: bool
"""
# Compute self._text by calling the property
text = self.text

# Text is empty, maybe because the content was not decoded.
# Reencoding would erase the content, so return.
if not text: # pragma: no cover
return False

# Current encoding is not defined, cannot convert
if self.encoding is None: # pragma: no cover
logger.error('the current encoding is not defined')
return False

# Use the current encoding by default, otherwise normalize the encoding name
output_encoding = self.encoding if output_encoding is None else codecs.lookup(output_encoding).name

# Pick the subtitle fps if it's not specified as an argument
fps = self.fps if fps is None or fps <= 0 else fps

# Try parsing the subtitle
try:
obj = SSAFile.from_string(text, format_=self.subtitle_format, fps=fps)
except UnknownFPSError:
logger.exception('need to specify the FPS to convert this subtitle')
return False
except Exception: # pragma: no cover
logger.exception('not a valid subtitle')
return False

# Check subtitle format
self.subtitle_format = str(obj.format)
convert_format = True
if self.subtitle_format == output_format:
logger.debug('the subtitle is already in the correct format: %s', output_format)
convert_format = False
if self.encoding == output_encoding:
if output_encoding is not None: # pragma: no branch
logger.debug('the subtitle is already in the correct encoding: %s', output_encoding)
return True

if convert_format:
# Try converting
try:
new_text = obj.to_string(format_=output_format, fps=fps)
except Exception: # pragma: no cover
logger.exception('cannot convert subtitle to %s format', output_format)
return False

else:
# Do not convert to a new format
new_text = text

# Validate srt
if output_format == 'srt':
try:
parsed = self.parse_srt(new_text)
except Exception: # pragma: no cover
msg = 'srt parsing failed, converted subtitle is invalid'
logger.exception(msg)
return False
new_text = parsed

# Save the new content
ret = self.reencode(new_text, encoding=output_encoding)

# Conversion success
if ret: # pragma: no branch
self._is_valid = True
self.encoding = output_encoding
self.subtitle_format = output_format

return ret

def is_valid(self, *, auto_fix_srt: bool = False) -> bool:
"""Check if a :attr:`text` is a valid SubRip format.
Expand Down Expand Up @@ -305,7 +395,7 @@ def _check_is_valid(self, *, auto_fix_srt: bool = False) -> bool:
# Valid srt
if self.subtitle_format == 'srt':
try:
parsed = self.parse_srt()
parsed = self.parse_srt(self.text)
except Exception: # pragma: no cover
msg = 'srt parsing failed, subtitle is invalid'
logger.exception(msg)
Expand All @@ -318,9 +408,10 @@ def _check_is_valid(self, *, auto_fix_srt: bool = False) -> bool:
# TODO: check other formats
return True

def parse_srt(self) -> str:
@staticmethod
def parse_srt(text: str) -> str:
"""Text content parsed to a valid srt subtitle."""
return str(srt.compose(srt.parse(self.text)))
return str(srt.compose(srt.parse(text)))

def guess_encoding(self) -> str | None:
"""Guess encoding using the language, falling back on chardet.
Expand Down
Loading

0 comments on commit 26c5d27

Please sign in to comment.