4 Commits

Author SHA1 Message Date
Andy
c1e2d68e57 docs: add pysubs2 conversion_method to configuration documentation 2025-10-01 04:19:25 +00:00
Andy
acbbe734ab Merge branch 'feature/pysubs2-subtitle conversion' into main 2025-10-01 04:16:06 +00:00
Andy
6d0a210efb fix: (subtitle): Move pysubs2 to not be auto while in "testing" phase. 2025-10-01 04:11:22 +00:00
Sp5rky
26a94b7135 feat: add pysubs2 subtitle conversion with extended format support
- Add pysubs2 as default conversion method for 'auto' mode
- Add support for MicroDVD (.sub), MPL2 (.mpl2), and TMP (.tmp) formats
- Implement convert_with_pysubs2() method with fallback to standard conversion
- Update from_mime() to handle new subtitle format types
- Map all supported formats to pysubs2 format identifiers
- Update docstrings to reflect new capabilities
2025-09-30 14:12:51 -06:00
5 changed files with 81 additions and 4 deletions

View File

@@ -656,11 +656,12 @@ Control subtitle conversion and SDH (hearing-impaired) stripping behavior.
- `subby`: Always use subby with CommonIssuesFixer. - `subby`: Always use subby with CommonIssuesFixer.
- `subtitleedit`: Prefer SubtitleEdit when available; otherwise fallback to standard conversion. - `subtitleedit`: Prefer SubtitleEdit when available; otherwise fallback to standard conversion.
- `pycaption`: Use only the pycaption library (no SubtitleEdit, no subby). - `pycaption`: Use only the pycaption library (no SubtitleEdit, no subby).
- `pysubs2`: Use pysubs2 library (supports SRT, SSA, ASS, WebVTT, TTML, SAMI, MicroDVD, MPL2, TMP formats).
- `sdh_method`: How to strip SDH cues. Default: `auto`. - `sdh_method`: How to strip SDH cues. Default: `auto`.
- `auto`: Try subby for SRT first, then SubtitleEdit, then filter-subs. - `auto`: Try subby for SRT first, then SubtitleEdit, then filter-subs.
- `subby`: Use subbys SDHStripper (SRT only). - `subby`: Use subby's SDHStripper (SRT only).
- `subtitleedit`: Use SubtitleEdits RemoveTextForHI when available. - `subtitleedit`: Use SubtitleEdit's RemoveTextForHI when available.
- `filter-subs`: Use the subtitle-filter library. - `filter-subs`: Use the subtitle-filter library.
Example: Example:

View File

@@ -58,6 +58,7 @@ dependencies = [
"httpx>=0.28.1,<0.29", "httpx>=0.28.1,<0.29",
"cryptography>=45.0.0", "cryptography>=45.0.0",
"subby", "subby",
"pysubs2>=1.7.0,<2",
] ]
[project.urls] [project.urls]

View File

@@ -10,6 +10,7 @@ from pathlib import Path
from typing import Any, Callable, Iterable, Optional, Union from typing import Any, Callable, Iterable, Optional, Union
import pycaption import pycaption
import pysubs2
import requests import requests
from construct import Container from construct import Container
from pycaption import Caption, CaptionList, CaptionNode, WebVTTReader from pycaption import Caption, CaptionList, CaptionNode, WebVTTReader
@@ -33,6 +34,9 @@ class Subtitle(Track):
TimedTextMarkupLang = "TTML" # https://wikipedia.org/wiki/Timed_Text_Markup_Language TimedTextMarkupLang = "TTML" # https://wikipedia.org/wiki/Timed_Text_Markup_Language
WebVTT = "VTT" # https://wikipedia.org/wiki/WebVTT WebVTT = "VTT" # https://wikipedia.org/wiki/WebVTT
SAMI = "SMI" # https://wikipedia.org/wiki/SAMI SAMI = "SMI" # https://wikipedia.org/wiki/SAMI
MicroDVD = "SUB" # https://wikipedia.org/wiki/MicroDVD
MPL2 = "MPL2" # MPL2 subtitle format
TMP = "TMP" # TMP subtitle format
# MPEG-DASH box-encapsulated subtitle formats # MPEG-DASH box-encapsulated subtitle formats
fTTML = "STPP" # https://www.w3.org/TR/2018/REC-ttml-imsc1.0.1-20180424 fTTML = "STPP" # https://www.w3.org/TR/2018/REC-ttml-imsc1.0.1-20180424
fVTT = "WVTT" # https://www.w3.org/TR/webvtt1 fVTT = "WVTT" # https://www.w3.org/TR/webvtt1
@@ -56,6 +60,12 @@ class Subtitle(Track):
return Subtitle.Codec.WebVTT return Subtitle.Codec.WebVTT
elif mime in ("smi", "sami"): elif mime in ("smi", "sami"):
return Subtitle.Codec.SAMI return Subtitle.Codec.SAMI
elif mime in ("sub", "microdvd"):
return Subtitle.Codec.MicroDVD
elif mime == "mpl2":
return Subtitle.Codec.MPL2
elif mime == "tmp":
return Subtitle.Codec.TMP
elif mime == "stpp": elif mime == "stpp":
return Subtitle.Codec.fTTML return Subtitle.Codec.fTTML
elif mime == "wvtt": elif mime == "wvtt":
@@ -391,6 +401,57 @@ class Subtitle(Track):
# Fall back to existing conversion method on any error # Fall back to existing conversion method on any error
return self._convert_standard(codec) return self._convert_standard(codec)
def convert_with_pysubs2(self, codec: Subtitle.Codec) -> Path:
"""
Convert subtitle using pysubs2 library for broad format support.
pysubs2 is a pure-Python library supporting SubRip (SRT), SubStation Alpha
(SSA/ASS), WebVTT, TTML, SAMI, MicroDVD, MPL2, and TMP formats.
"""
if not self.path or not self.path.exists():
raise ValueError("You must download the subtitle track first.")
if self.codec == codec:
return self.path
output_path = self.path.with_suffix(f".{codec.value.lower()}")
original_path = self.path
codec_to_pysubs2_format = {
Subtitle.Codec.SubRip: "srt",
Subtitle.Codec.SubStationAlpha: "ssa",
Subtitle.Codec.SubStationAlphav4: "ass",
Subtitle.Codec.WebVTT: "vtt",
Subtitle.Codec.TimedTextMarkupLang: "ttml",
Subtitle.Codec.SAMI: "sami",
Subtitle.Codec.MicroDVD: "microdvd",
Subtitle.Codec.MPL2: "mpl2",
Subtitle.Codec.TMP: "tmp",
}
pysubs2_output_format = codec_to_pysubs2_format.get(codec)
if pysubs2_output_format is None:
return self._convert_standard(codec)
try:
subs = pysubs2.load(str(self.path), encoding="utf-8")
subs.save(str(output_path), format_=pysubs2_output_format, encoding="utf-8")
if original_path.exists() and original_path != output_path:
original_path.unlink()
self.path = output_path
self.codec = codec
if callable(self.OnConverted):
self.OnConverted(codec)
return output_path
except Exception:
return self._convert_standard(codec)
def convert(self, codec: Subtitle.Codec) -> Path: def convert(self, codec: Subtitle.Codec) -> Path:
""" """
Convert this Subtitle to another Format. Convert this Subtitle to another Format.
@@ -400,6 +461,7 @@ class Subtitle(Track):
- 'subby': Always uses subby with CommonIssuesFixer - 'subby': Always uses subby with CommonIssuesFixer
- 'subtitleedit': Uses SubtitleEdit when available, falls back to pycaption - 'subtitleedit': Uses SubtitleEdit when available, falls back to pycaption
- 'pycaption': Uses only pycaption library - 'pycaption': Uses only pycaption library
- 'pysubs2': Uses pysubs2 library
""" """
# Check configuration for conversion method # Check configuration for conversion method
conversion_method = config.subtitle.get("conversion_method", "auto") conversion_method = config.subtitle.get("conversion_method", "auto")
@@ -407,11 +469,12 @@ class Subtitle(Track):
if conversion_method == "subby": if conversion_method == "subby":
return self.convert_with_subby(codec) return self.convert_with_subby(codec)
elif conversion_method == "subtitleedit": elif conversion_method == "subtitleedit":
return self._convert_standard(codec) # SubtitleEdit is used in standard conversion return self._convert_standard(codec)
elif conversion_method == "pycaption": elif conversion_method == "pycaption":
return self._convert_pycaption_only(codec) return self._convert_pycaption_only(codec)
elif conversion_method == "pysubs2":
return self.convert_with_pysubs2(codec)
elif conversion_method == "auto": elif conversion_method == "auto":
# Use subby for formats it handles better
if self.codec in (Subtitle.Codec.WebVTT, Subtitle.Codec.SAMI): if self.codec in (Subtitle.Codec.WebVTT, Subtitle.Codec.SAMI):
return self.convert_with_subby(codec) return self.convert_with_subby(codec)
else: else:

View File

@@ -253,6 +253,7 @@ tmdb_api_key: ""
# - subby: Always use subby with advanced processing # - subby: Always use subby with advanced processing
# - pycaption: Use only pycaption library (no SubtitleEdit, no subby) # - pycaption: Use only pycaption library (no SubtitleEdit, no subby)
# - subtitleedit: Prefer SubtitleEdit when available, fall back to pycaption # - subtitleedit: Prefer SubtitleEdit when available, fall back to pycaption
# - pysubs2: Use pysubs2 library (supports SRT/SSA/ASS/WebVTT/TTML/SAMI/MicroDVD/MPL2/TMP)
subtitle: subtitle:
conversion_method: auto conversion_method: auto
sdh_method: auto sdh_method: auto

11
uv.lock generated
View File

@@ -1180,6 +1180,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/8d/59/b4572118e098ac8e46e399a1dd0f2d85403ce8bbaad9ec79373ed6badaf9/PySocks-1.7.1-py3-none-any.whl", hash = "sha256:2725bd0a9925919b9b51739eea5f9e2bae91e83288108a9ad338b2e3a4435ee5", size = 16725, upload-time = "2019-09-20T02:06:22.938Z" }, { url = "https://files.pythonhosted.org/packages/8d/59/b4572118e098ac8e46e399a1dd0f2d85403ce8bbaad9ec79373ed6badaf9/PySocks-1.7.1-py3-none-any.whl", hash = "sha256:2725bd0a9925919b9b51739eea5f9e2bae91e83288108a9ad338b2e3a4435ee5", size = 16725, upload-time = "2019-09-20T02:06:22.938Z" },
] ]
[[package]]
name = "pysubs2"
version = "1.8.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/31/4a/becf78d9d3df56e6c4a9c50b83794e5436b6c5ab6dd8a3f934e94c89338c/pysubs2-1.8.0.tar.gz", hash = "sha256:3397bb58a4a15b1325ba2ae3fd4d7c214e2c0ddb9f33190d6280d783bb433b20", size = 1130048, upload-time = "2024-12-24T12:39:47.769Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/99/09/0fc0719162e5ad723f71d41cf336f18b6b5054d70dc0fe42ace6b4d2bdc9/pysubs2-1.8.0-py3-none-any.whl", hash = "sha256:05716f5039a9ebe32cd4d7673f923cf36204f3a3e99987f823ab83610b7035a0", size = 43516, upload-time = "2024-12-24T12:39:44.469Z" },
]
[[package]] [[package]]
name = "pywidevine" name = "pywidevine"
version = "1.8.0" version = "1.8.0"
@@ -1529,6 +1538,7 @@ dependencies = [
{ name = "pymp4" }, { name = "pymp4" },
{ name = "pymysql" }, { name = "pymysql" },
{ name = "pyplayready" }, { name = "pyplayready" },
{ name = "pysubs2" },
{ name = "pywidevine", extra = ["serve"] }, { name = "pywidevine", extra = ["serve"] },
{ name = "pyyaml" }, { name = "pyyaml" },
{ name = "requests", extra = ["socks"] }, { name = "requests", extra = ["socks"] },
@@ -1578,6 +1588,7 @@ requires-dist = [
{ name = "pymp4", specifier = ">=1.4.0,<2" }, { name = "pymp4", specifier = ">=1.4.0,<2" },
{ name = "pymysql", specifier = ">=1.1.0,<2" }, { name = "pymysql", specifier = ">=1.1.0,<2" },
{ name = "pyplayready", specifier = ">=0.6.3,<0.7" }, { name = "pyplayready", specifier = ">=0.6.3,<0.7" },
{ name = "pysubs2", specifier = ">=1.7.0,<2" },
{ name = "pywidevine", extras = ["serve"], specifier = ">=1.8.0,<2" }, { name = "pywidevine", extras = ["serve"], specifier = ">=1.8.0,<2" },
{ name = "pyyaml", specifier = ">=6.0.1,<7" }, { name = "pyyaml", specifier = ">=6.0.1,<7" },
{ name = "requests", extras = ["socks"], specifier = ">=2.31.0,<3" }, { name = "requests", extras = ["socks"], specifier = ">=2.31.0,<3" },