feat(subtitle): preserve original formatting when no conversion requested

Add preserve_formatting config option to prevent automatic subtitle processing that strips formatting tags and styling. When enabled (default: true), WebVTT files skip pycaption read/write cycle to preserve tags like <i>, <b>, positioning, and other formatting.
2025-11-03 23:01:31 +00:00
parent 8b0b3045e3
commit 8a46655d21
2 changed files with 20 additions and 12 deletions
--- a/unshackle/core/tracks/subtitle.py
+++ b/unshackle/core/tracks/subtitle.py
@@ -239,25 +239,29 @@ class Subtitle(Track):
            # Sanitize WebVTT timestamps before parsing
            text = Subtitle.sanitize_webvtt_timestamps(text)
            preserve_formatting = config.subtitle.get("preserve_formatting", True)
-            try:
+            if preserve_formatting:
-                caption_set = pycaption.WebVTTReader().read(text)
+                self.path.write_text(text, encoding="utf8")
-                Subtitle.merge_same_cues(caption_set)
+            else:
                Subtitle.filter_unwanted_cues(caption_set)
                subtitle_text = pycaption.WebVTTWriter().write(caption_set)
                self.path.write_text(subtitle_text, encoding="utf8")
            except pycaption.exceptions.CaptionReadSyntaxError:
                # If first attempt fails, try more aggressive sanitization
                text = Subtitle.sanitize_webvtt(text)
                try:
                    caption_set = pycaption.WebVTTReader().read(text)
                    Subtitle.merge_same_cues(caption_set)
                    Subtitle.filter_unwanted_cues(caption_set)
                    subtitle_text = pycaption.WebVTTWriter().write(caption_set)
                    self.path.write_text(subtitle_text, encoding="utf8")
-                except Exception:
+                except pycaption.exceptions.CaptionReadSyntaxError:
-                    # Keep the sanitized version even if parsing failed
+                    # If first attempt fails, try more aggressive sanitization
-                    self.path.write_text(text, encoding="utf8")
+                    text = Subtitle.sanitize_webvtt(text)
                    try:
                        caption_set = pycaption.WebVTTReader().read(text)
                        Subtitle.merge_same_cues(caption_set)
                        Subtitle.filter_unwanted_cues(caption_set)
                        subtitle_text = pycaption.WebVTTWriter().write(caption_set)
                        self.path.write_text(subtitle_text, encoding="utf8")
                    except Exception:
                        # Keep the sanitized version even if parsing failed
                        self.path.write_text(text, encoding="utf8")
    @staticmethod
    def sanitize_webvtt_timestamps(text: str) -> str:
--- a/unshackle/unshackle-example.yaml
+++ b/unshackle/unshackle-example.yaml
@@ -360,6 +360,10 @@ subtitle:
  # convert_before_strip: Auto-convert VTT/other formats to SRT before using subtitle-filter
  # This ensures compatibility when subtitle-filter is used as fallback (default: true)
  convert_before_strip: true
  # preserve_formatting: Preserve original subtitle formatting (tags, positioning, styling)
  # When true, skips pycaption processing for WebVTT files to keep tags like <i>, <b>, positioning intact
  # Combined with no sub_format setting, ensures subtitles remain in their original format (default: true)
  preserve_formatting: true
 # Configuration for pywidevine's serve functionality
 serve: