From 6ebdfa88183f864a68155916c30b314cbbed09c8 Mon Sep 17 00:00:00 2001 From: Andy Date: Fri, 31 Oct 2025 14:51:25 +0000 Subject: [PATCH] fix(subtitle): resolve SDH stripping crash with VTT files Fixes #34 --- unshackle/commands/dl.py | 30 ++++++++++++++++-------------- unshackle/core/tracks/subtitle.py | 31 ++++++++++++++++++++++--------- unshackle/unshackle-example.yaml | 27 +++++++++++++++++++-------- 3 files changed, 57 insertions(+), 31 deletions(-) diff --git a/unshackle/commands/dl.py b/unshackle/commands/dl.py index 43e5cd7..426b526 100644 --- a/unshackle/commands/dl.py +++ b/unshackle/commands/dl.py @@ -845,20 +845,22 @@ class dl: # strip SDH subs to non-SDH if no equivalent same-lang non-SDH is available # uses a loose check, e.g, wont strip en-US SDH sub if a non-SDH en-GB is available - for subtitle in title.tracks.subtitles: - if subtitle.sdh and not any( - is_close_match(subtitle.language, [x.language]) - for x in title.tracks.subtitles - if not x.sdh and not x.forced - ): - non_sdh_sub = deepcopy(subtitle) - non_sdh_sub.id += "_stripped" - non_sdh_sub.sdh = False - title.tracks.add(non_sdh_sub) - events.subscribe( - events.Types.TRACK_MULTIPLEX, - lambda track: (track.strip_hearing_impaired()) if track.id == non_sdh_sub.id else None, - ) + # Check if automatic SDH stripping is enabled in config + if config.subtitle.get("strip_sdh", True): + for subtitle in title.tracks.subtitles: + if subtitle.sdh and not any( + is_close_match(subtitle.language, [x.language]) + for x in title.tracks.subtitles + if not x.sdh and not x.forced + ): + non_sdh_sub = deepcopy(subtitle) + non_sdh_sub.id += "_stripped" + non_sdh_sub.sdh = False + title.tracks.add(non_sdh_sub) + events.subscribe( + events.Types.TRACK_MULTIPLEX, + lambda track: (track.strip_hearing_impaired()) if track.id == non_sdh_sub.id else None, + ) with console.status("Sorting tracks by language and bitrate...", spinner="dots"): video_sort_lang = v_lang or lang diff --git a/unshackle/core/tracks/subtitle.py b/unshackle/core/tracks/subtitle.py index e336345..7019142 100644 --- a/unshackle/core/tracks/subtitle.py +++ b/unshackle/core/tracks/subtitle.py @@ -979,20 +979,33 @@ class Subtitle(Track): stdout=subprocess.DEVNULL, ) else: - sub = Subtitles(self.path) + if config.subtitle.get("convert_before_strip", True) and self.codec != Subtitle.Codec.SubRip: + self.path = self.convert(Subtitle.Codec.SubRip) + self.codec = Subtitle.Codec.SubRip + try: - sub.filter(rm_fonts=True, rm_ast=True, rm_music=True, rm_effects=True, rm_names=True, rm_author=True) - except ValueError as e: - if "too many values to unpack" in str(e): - # Retry without name removal if the error is due to multiple colons in time references - # This can happen with lines like "at 10:00 and 2:00" - sub = Subtitles(self.path) + sub = Subtitles(self.path) + try: sub.filter( - rm_fonts=True, rm_ast=True, rm_music=True, rm_effects=True, rm_names=False, rm_author=True + rm_fonts=True, rm_ast=True, rm_music=True, rm_effects=True, rm_names=True, rm_author=True ) + except ValueError as e: + if "too many values to unpack" in str(e): + # Retry without name removal if the error is due to multiple colons in time references + # This can happen with lines like "at 10:00 and 2:00" + sub = Subtitles(self.path) + sub.filter( + rm_fonts=True, rm_ast=True, rm_music=True, rm_effects=True, rm_names=False, rm_author=True + ) + else: + raise + sub.save() + except (IOError, OSError) as e: + if "is not valid subtitle file" in str(e): + self.log.warning(f"Failed to strip SDH from {self.path.name}: {e}") + self.log.warning("Continuing without SDH stripping for this subtitle") else: raise - sub.save() def reverse_rtl(self) -> None: """ diff --git a/unshackle/unshackle-example.yaml b/unshackle/unshackle-example.yaml index a56bb77..a2c2408 100644 --- a/unshackle/unshackle-example.yaml +++ b/unshackle/unshackle-example.yaml @@ -1,3 +1,10 @@ +# API key for The Movie Database (TMDB) +tmdb_api_key: "" + +# Client ID for SIMKL API (optional, improves metadata matching) +# Get your free client ID at: https://simkl.com/settings/developer/ +simkl_client_id: "" + # Group or Username to postfix to the end of all download filenames following a dash tag: user_tag @@ -333,22 +340,26 @@ filenames: chapters: "Chapters_{title}_{random}.txt" subtitle: "Subtitle_{id}_{language}.srt" -# API key for The Movie Database (TMDB) -tmdb_api_key: "" - -# Client ID for SIMKL API (optional, improves metadata matching) -# Get your free client ID at: https://simkl.com/settings/developer/ -simkl_client_id: "" - # conversion_method: -# - auto (default): Smart routing - subby for WebVTT/SAMI, standard for others +# - auto (default): Smart routing - subby for WebVTT/SAMI, pycaption for others # - subby: Always use subby with advanced processing # - pycaption: Use only pycaption library (no SubtitleEdit, no subby) # - subtitleedit: Prefer SubtitleEdit when available, fall back to pycaption # - pysubs2: Use pysubs2 library (supports SRT/SSA/ASS/WebVTT/TTML/SAMI/MicroDVD/MPL2/TMP) subtitle: conversion_method: auto + # sdh_method: Method to use for SDH (hearing impaired) stripping + # - auto (default): Try subby (SRT only), then SubtitleEdit (if available), then subtitle-filter + # - subby: Use subby library (SRT only) + # - subtitleedit: Use SubtitleEdit tool (Windows only, falls back to subtitle-filter) + # - filter-subs: Use subtitle-filter library directly sdh_method: auto + # strip_sdh: Automatically create stripped (non-SDH) versions of SDH subtitles + # Set to false to disable automatic SDH stripping entirely (default: true) + strip_sdh: true + # convert_before_strip: Auto-convert VTT/other formats to SRT before using subtitle-filter + # This ensures compatibility when subtitle-filter is used as fallback (default: true) + convert_before_strip: true # Configuration for pywidevine's serve functionality serve: