forked from kenzuya/unshackle
feat(subtitle): preserve original formatting when no conversion requested
Add preserve_formatting config option to prevent automatic subtitle processing that strips formatting tags and styling. When enabled (default: true), WebVTT files skip pycaption read/write cycle to preserve tags like <i>, <b>, positioning, and other formatting.
This commit is contained in:
@@ -239,25 +239,29 @@ class Subtitle(Track):
|
||||
|
||||
# Sanitize WebVTT timestamps before parsing
|
||||
text = Subtitle.sanitize_webvtt_timestamps(text)
|
||||
preserve_formatting = config.subtitle.get("preserve_formatting", True)
|
||||
|
||||
try:
|
||||
caption_set = pycaption.WebVTTReader().read(text)
|
||||
Subtitle.merge_same_cues(caption_set)
|
||||
Subtitle.filter_unwanted_cues(caption_set)
|
||||
subtitle_text = pycaption.WebVTTWriter().write(caption_set)
|
||||
self.path.write_text(subtitle_text, encoding="utf8")
|
||||
except pycaption.exceptions.CaptionReadSyntaxError:
|
||||
# If first attempt fails, try more aggressive sanitization
|
||||
text = Subtitle.sanitize_webvtt(text)
|
||||
if preserve_formatting:
|
||||
self.path.write_text(text, encoding="utf8")
|
||||
else:
|
||||
try:
|
||||
caption_set = pycaption.WebVTTReader().read(text)
|
||||
Subtitle.merge_same_cues(caption_set)
|
||||
Subtitle.filter_unwanted_cues(caption_set)
|
||||
subtitle_text = pycaption.WebVTTWriter().write(caption_set)
|
||||
self.path.write_text(subtitle_text, encoding="utf8")
|
||||
except Exception:
|
||||
# Keep the sanitized version even if parsing failed
|
||||
self.path.write_text(text, encoding="utf8")
|
||||
except pycaption.exceptions.CaptionReadSyntaxError:
|
||||
# If first attempt fails, try more aggressive sanitization
|
||||
text = Subtitle.sanitize_webvtt(text)
|
||||
try:
|
||||
caption_set = pycaption.WebVTTReader().read(text)
|
||||
Subtitle.merge_same_cues(caption_set)
|
||||
Subtitle.filter_unwanted_cues(caption_set)
|
||||
subtitle_text = pycaption.WebVTTWriter().write(caption_set)
|
||||
self.path.write_text(subtitle_text, encoding="utf8")
|
||||
except Exception:
|
||||
# Keep the sanitized version even if parsing failed
|
||||
self.path.write_text(text, encoding="utf8")
|
||||
|
||||
@staticmethod
|
||||
def sanitize_webvtt_timestamps(text: str) -> str:
|
||||
|
||||
@@ -360,6 +360,10 @@ subtitle:
|
||||
# convert_before_strip: Auto-convert VTT/other formats to SRT before using subtitle-filter
|
||||
# This ensures compatibility when subtitle-filter is used as fallback (default: true)
|
||||
convert_before_strip: true
|
||||
# preserve_formatting: Preserve original subtitle formatting (tags, positioning, styling)
|
||||
# When true, skips pycaption processing for WebVTT files to keep tags like <i>, <b>, positioning intact
|
||||
# Combined with no sub_format setting, ensures subtitles remain in their original format (default: true)
|
||||
preserve_formatting: true
|
||||
|
||||
# Configuration for pywidevine's serve functionality
|
||||
serve:
|
||||
|
||||
Reference in New Issue
Block a user