feat(subtitle): preserve original formatting when no conversion requested

Add preserve_formatting config option to prevent automatic subtitle processing that strips formatting tags and styling. When enabled (default: true), WebVTT files skip pycaption read/write cycle to preserve tags like <i>, <b>, positioning, and other formatting.
This commit is contained in:
Andy
2025-11-03 23:01:31 +00:00
parent 8b0b3045e3
commit 8a46655d21
2 changed files with 20 additions and 12 deletions

View File

@@ -239,25 +239,29 @@ class Subtitle(Track):
# Sanitize WebVTT timestamps before parsing
text = Subtitle.sanitize_webvtt_timestamps(text)
preserve_formatting = config.subtitle.get("preserve_formatting", True)
try:
caption_set = pycaption.WebVTTReader().read(text)
Subtitle.merge_same_cues(caption_set)
Subtitle.filter_unwanted_cues(caption_set)
subtitle_text = pycaption.WebVTTWriter().write(caption_set)
self.path.write_text(subtitle_text, encoding="utf8")
except pycaption.exceptions.CaptionReadSyntaxError:
# If first attempt fails, try more aggressive sanitization
text = Subtitle.sanitize_webvtt(text)
if preserve_formatting:
self.path.write_text(text, encoding="utf8")
else:
try:
caption_set = pycaption.WebVTTReader().read(text)
Subtitle.merge_same_cues(caption_set)
Subtitle.filter_unwanted_cues(caption_set)
subtitle_text = pycaption.WebVTTWriter().write(caption_set)
self.path.write_text(subtitle_text, encoding="utf8")
except Exception:
# Keep the sanitized version even if parsing failed
self.path.write_text(text, encoding="utf8")
except pycaption.exceptions.CaptionReadSyntaxError:
# If first attempt fails, try more aggressive sanitization
text = Subtitle.sanitize_webvtt(text)
try:
caption_set = pycaption.WebVTTReader().read(text)
Subtitle.merge_same_cues(caption_set)
Subtitle.filter_unwanted_cues(caption_set)
subtitle_text = pycaption.WebVTTWriter().write(caption_set)
self.path.write_text(subtitle_text, encoding="utf8")
except Exception:
# Keep the sanitized version even if parsing failed
self.path.write_text(text, encoding="utf8")
@staticmethod
def sanitize_webvtt_timestamps(text: str) -> str:

View File

@@ -360,6 +360,10 @@ subtitle:
# convert_before_strip: Auto-convert VTT/other formats to SRT before using subtitle-filter
# This ensures compatibility when subtitle-filter is used as fallback (default: true)
convert_before_strip: true
# preserve_formatting: Preserve original subtitle formatting (tags, positioning, styling)
# When true, skips pycaption processing for WebVTT files to keep tags like <i>, <b>, positioning intact
# Combined with no sub_format setting, ensures subtitles remain in their original format (default: true)
preserve_formatting: true
# Configuration for pywidevine's serve functionality
serve: