forked from kenzuya/unshackle
feat(subtitle): preserve original formatting when no conversion requested
Add preserve_formatting config option to prevent automatic subtitle processing that strips formatting tags and styling. When enabled (default: true), WebVTT files skip pycaption read/write cycle to preserve tags like <i>, <b>, positioning, and other formatting.
This commit is contained in:
@@ -239,25 +239,29 @@ class Subtitle(Track):
|
|||||||
|
|
||||||
# Sanitize WebVTT timestamps before parsing
|
# Sanitize WebVTT timestamps before parsing
|
||||||
text = Subtitle.sanitize_webvtt_timestamps(text)
|
text = Subtitle.sanitize_webvtt_timestamps(text)
|
||||||
|
preserve_formatting = config.subtitle.get("preserve_formatting", True)
|
||||||
|
|
||||||
try:
|
if preserve_formatting:
|
||||||
caption_set = pycaption.WebVTTReader().read(text)
|
self.path.write_text(text, encoding="utf8")
|
||||||
Subtitle.merge_same_cues(caption_set)
|
else:
|
||||||
Subtitle.filter_unwanted_cues(caption_set)
|
|
||||||
subtitle_text = pycaption.WebVTTWriter().write(caption_set)
|
|
||||||
self.path.write_text(subtitle_text, encoding="utf8")
|
|
||||||
except pycaption.exceptions.CaptionReadSyntaxError:
|
|
||||||
# If first attempt fails, try more aggressive sanitization
|
|
||||||
text = Subtitle.sanitize_webvtt(text)
|
|
||||||
try:
|
try:
|
||||||
caption_set = pycaption.WebVTTReader().read(text)
|
caption_set = pycaption.WebVTTReader().read(text)
|
||||||
Subtitle.merge_same_cues(caption_set)
|
Subtitle.merge_same_cues(caption_set)
|
||||||
Subtitle.filter_unwanted_cues(caption_set)
|
Subtitle.filter_unwanted_cues(caption_set)
|
||||||
subtitle_text = pycaption.WebVTTWriter().write(caption_set)
|
subtitle_text = pycaption.WebVTTWriter().write(caption_set)
|
||||||
self.path.write_text(subtitle_text, encoding="utf8")
|
self.path.write_text(subtitle_text, encoding="utf8")
|
||||||
except Exception:
|
except pycaption.exceptions.CaptionReadSyntaxError:
|
||||||
# Keep the sanitized version even if parsing failed
|
# If first attempt fails, try more aggressive sanitization
|
||||||
self.path.write_text(text, encoding="utf8")
|
text = Subtitle.sanitize_webvtt(text)
|
||||||
|
try:
|
||||||
|
caption_set = pycaption.WebVTTReader().read(text)
|
||||||
|
Subtitle.merge_same_cues(caption_set)
|
||||||
|
Subtitle.filter_unwanted_cues(caption_set)
|
||||||
|
subtitle_text = pycaption.WebVTTWriter().write(caption_set)
|
||||||
|
self.path.write_text(subtitle_text, encoding="utf8")
|
||||||
|
except Exception:
|
||||||
|
# Keep the sanitized version even if parsing failed
|
||||||
|
self.path.write_text(text, encoding="utf8")
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def sanitize_webvtt_timestamps(text: str) -> str:
|
def sanitize_webvtt_timestamps(text: str) -> str:
|
||||||
|
|||||||
@@ -360,6 +360,10 @@ subtitle:
|
|||||||
# convert_before_strip: Auto-convert VTT/other formats to SRT before using subtitle-filter
|
# convert_before_strip: Auto-convert VTT/other formats to SRT before using subtitle-filter
|
||||||
# This ensures compatibility when subtitle-filter is used as fallback (default: true)
|
# This ensures compatibility when subtitle-filter is used as fallback (default: true)
|
||||||
convert_before_strip: true
|
convert_before_strip: true
|
||||||
|
# preserve_formatting: Preserve original subtitle formatting (tags, positioning, styling)
|
||||||
|
# When true, skips pycaption processing for WebVTT files to keep tags like <i>, <b>, positioning intact
|
||||||
|
# Combined with no sub_format setting, ensures subtitles remain in their original format (default: true)
|
||||||
|
preserve_formatting: true
|
||||||
|
|
||||||
# Configuration for pywidevine's serve functionality
|
# Configuration for pywidevine's serve functionality
|
||||||
serve:
|
serve:
|
||||||
|
|||||||
Reference in New Issue
Block a user