From 17a91ee4bbbaf79a9e673727aeb0a488cedd8ff7 Mon Sep 17 00:00:00 2001 From: Andy Date: Mon, 5 Jan 2026 09:50:33 +0000 Subject: [PATCH 01/20] feat(debug): add comprehensive debug logging for downloaders and muxing --- unshackle/core/downloaders/aria2c.py | 84 +++++++++++++++- .../core/downloaders/curl_impersonate.py | 48 ++++++++- unshackle/core/downloaders/n_m3u8dl_re.py | 99 ++++++++++++++++++- unshackle/core/downloaders/requests.py | 46 ++++++++- unshackle/core/manifests/dash.py | 20 +++- unshackle/core/manifests/hls.py | 20 +++- unshackle/core/manifests/ism.py | 20 +++- unshackle/core/tracks/tracks.py | 59 ++++++++++- 8 files changed, 382 insertions(+), 14 deletions(-) diff --git a/unshackle/core/downloaders/aria2c.py b/unshackle/core/downloaders/aria2c.py index bc43460..6f5b5d0 100644 --- a/unshackle/core/downloaders/aria2c.py +++ b/unshackle/core/downloaders/aria2c.py @@ -19,7 +19,7 @@ from unshackle.core import binaries from unshackle.core.config import config from unshackle.core.console import console from unshackle.core.constants import DOWNLOAD_CANCELLED -from unshackle.core.utilities import get_extension, get_free_port +from unshackle.core.utilities import get_debug_logger, get_extension, get_free_port def rpc(caller: Callable, secret: str, method: str, params: Optional[list[Any]] = None) -> Any: @@ -58,6 +58,8 @@ def download( proxy: Optional[str] = None, max_workers: Optional[int] = None, ) -> Generator[dict[str, Any], None, None]: + debug_logger = get_debug_logger() + if not urls: raise ValueError("urls must be provided and not empty") elif not isinstance(urls, (str, dict, list)): @@ -91,6 +93,13 @@ def download( urls = [urls] if not binaries.Aria2: + if debug_logger: + debug_logger.log( + level="ERROR", + operation="downloader_aria2c_binary_missing", + message="Aria2c executable not found in PATH or local binaries directory", + context={"searched_names": ["aria2c", "aria2"]}, + ) raise EnvironmentError("Aria2c executable not found...") if proxy and not proxy.lower().startswith("http://"): @@ -180,6 +189,28 @@ def download( continue arguments.extend(["--header", f"{header}: {value}"]) + if debug_logger: + first_url = urls[0] if isinstance(urls[0], str) else urls[0].get("url", "") + url_display = first_url[:200] + "..." if len(first_url) > 200 else first_url + debug_logger.log( + level="DEBUG", + operation="downloader_aria2c_start", + message="Starting Aria2c download", + context={ + "binary_path": str(binaries.Aria2), + "url_count": len(urls), + "first_url": url_display, + "output_dir": str(output_dir), + "filename": filename, + "max_concurrent_downloads": max_concurrent_downloads, + "max_connection_per_server": max_connection_per_server, + "split": split, + "file_allocation": file_allocation, + "has_proxy": bool(proxy), + "rpc_port": rpc_port, + }, + ) + yield dict(total=len(urls)) try: @@ -226,6 +257,20 @@ def download( textwrap.wrap(error, width=console.width - 20, initial_indent="") ) console.log(Text.from_ansi("\n[Aria2c]: " + error_pretty)) + if debug_logger: + debug_logger.log( + level="ERROR", + operation="downloader_aria2c_download_error", + message=f"Aria2c download failed: {dl['errorMessage']}", + context={ + "gid": dl["gid"], + "error_code": dl["errorCode"], + "error_message": dl["errorMessage"], + "used_uri": used_uri[:200] + "..." if len(used_uri) > 200 else used_uri, + "completed_length": dl.get("completedLength"), + "total_length": dl.get("totalLength"), + }, + ) raise ValueError(error) if number_stopped == len(urls): @@ -237,7 +282,31 @@ def download( p.wait() if p.returncode != 0: + if debug_logger: + debug_logger.log( + level="ERROR", + operation="downloader_aria2c_failed", + message=f"Aria2c exited with code {p.returncode}", + context={ + "returncode": p.returncode, + "url_count": len(urls), + "output_dir": str(output_dir), + }, + ) raise subprocess.CalledProcessError(p.returncode, arguments) + + if debug_logger: + debug_logger.log( + level="DEBUG", + operation="downloader_aria2c_complete", + message="Aria2c download completed successfully", + context={ + "url_count": len(urls), + "output_dir": str(output_dir), + "filename": filename, + }, + ) + except ConnectionResetError: # interrupted while passing URI to download raise KeyboardInterrupt() @@ -251,9 +320,20 @@ def download( DOWNLOAD_CANCELLED.set() # skip pending track downloads yield dict(downloaded="[yellow]CANCELLED") raise - except Exception: + except Exception as e: DOWNLOAD_CANCELLED.set() # skip pending track downloads yield dict(downloaded="[red]FAILED") + if debug_logger and not isinstance(e, (subprocess.CalledProcessError, ValueError)): + debug_logger.log( + level="ERROR", + operation="downloader_aria2c_exception", + message=f"Unexpected error during Aria2c download: {e}", + error=e, + context={ + "url_count": len(urls), + "output_dir": str(output_dir), + }, + ) raise finally: rpc(caller=partial(rpc_session.post, url=rpc_uri), secret=rpc_secret, method="aria2.shutdown") diff --git a/unshackle/core/downloaders/curl_impersonate.py b/unshackle/core/downloaders/curl_impersonate.py index 52dab7a..d278e91 100644 --- a/unshackle/core/downloaders/curl_impersonate.py +++ b/unshackle/core/downloaders/curl_impersonate.py @@ -11,7 +11,7 @@ from rich import filesize from unshackle.core.config import config from unshackle.core.constants import DOWNLOAD_CANCELLED -from unshackle.core.utilities import get_extension +from unshackle.core.utilities import get_debug_logger, get_extension MAX_ATTEMPTS = 5 RETRY_WAIT = 2 @@ -189,6 +189,8 @@ def curl_impersonate( if not isinstance(max_workers, (int, type(None))): raise TypeError(f"Expected max_workers to be {int}, not {type(max_workers)}") + debug_logger = get_debug_logger() + if not isinstance(urls, list): urls = [urls] @@ -209,6 +211,24 @@ def curl_impersonate( if proxy: session.proxies.update({"all": proxy}) + if debug_logger: + first_url = urls[0].get("url", "") if urls else "" + url_display = first_url[:200] + "..." if len(first_url) > 200 else first_url + debug_logger.log( + level="DEBUG", + operation="downloader_curl_impersonate_start", + message="Starting curl_impersonate download", + context={ + "url_count": len(urls), + "first_url": url_display, + "output_dir": str(output_dir), + "filename": filename, + "max_workers": max_workers, + "browser": BROWSER, + "has_proxy": bool(proxy), + }, + ) + yield dict(total=len(urls)) download_sizes = [] @@ -235,11 +255,23 @@ def curl_impersonate( # tell dl that it was cancelled # the pool is already shut down, so exiting loop is fine raise - except Exception: + except Exception as e: DOWNLOAD_CANCELLED.set() # skip pending track downloads yield dict(downloaded="[red]FAILING") pool.shutdown(wait=True, cancel_futures=True) yield dict(downloaded="[red]FAILED") + if debug_logger: + debug_logger.log( + level="ERROR", + operation="downloader_curl_impersonate_failed", + message=f"curl_impersonate download failed: {e}", + error=e, + context={ + "url_count": len(urls), + "output_dir": str(output_dir), + "browser": BROWSER, + }, + ) # tell dl that it failed # the pool is already shut down, so exiting loop is fine raise @@ -260,5 +292,17 @@ def curl_impersonate( last_speed_refresh = now download_sizes.clear() + if debug_logger: + debug_logger.log( + level="DEBUG", + operation="downloader_curl_impersonate_complete", + message="curl_impersonate download completed successfully", + context={ + "url_count": len(urls), + "output_dir": str(output_dir), + "filename": filename, + }, + ) + __all__ = ("curl_impersonate",) diff --git a/unshackle/core/downloaders/n_m3u8dl_re.py b/unshackle/core/downloaders/n_m3u8dl_re.py index 7472c59..815c697 100644 --- a/unshackle/core/downloaders/n_m3u8dl_re.py +++ b/unshackle/core/downloaders/n_m3u8dl_re.py @@ -13,6 +13,7 @@ from unshackle.core import binaries from unshackle.core.config import config from unshackle.core.console import console from unshackle.core.constants import DOWNLOAD_CANCELLED +from unshackle.core.utilities import get_debug_logger PERCENT_RE = re.compile(r"(\d+\.\d+%)") SPEED_RE = re.compile(r"(\d+\.\d+(?:MB|KB)ps)") @@ -176,7 +177,6 @@ def build_download_args( "--thread-count": thread_count, "--download-retry-count": retry_count, "--write-meta-json": False, - "--no-log": True, } if proxy: args["--custom-proxy"] = proxy @@ -224,6 +224,8 @@ def download( content_keys: dict[str, Any] | None, skip_merge: bool | None = False, ) -> Generator[dict[str, Any], None, None]: + debug_logger = get_debug_logger() + if not urls: raise ValueError("urls must be provided and not empty") if not isinstance(urls, (str, dict, list)): @@ -275,7 +277,39 @@ def download( skip_merge=skip_merge, ad_keyword=ad_keyword, ) - arguments.extend(get_track_selection_args(track)) + selection_args = get_track_selection_args(track) + arguments.extend(selection_args) + + log_file_path: Path | None = None + if debug_logger: + log_file_path = output_dir / f".n_m3u8dl_re_{filename}.log" + arguments.extend(["--log-file-path", str(log_file_path)]) + + track_url_display = track.url[:200] + "..." if len(track.url) > 200 else track.url + debug_logger.log( + level="DEBUG", + operation="downloader_n_m3u8dl_re_start", + message="Starting N_m3u8DL-RE download", + context={ + "binary_path": str(binaries.N_m3u8DL_RE), + "track_id": getattr(track, "id", None), + "track_type": track.__class__.__name__, + "track_url": track_url_display, + "output_dir": str(output_dir), + "filename": filename, + "thread_count": thread_count, + "retry_count": retry_count, + "has_content_keys": bool(content_keys), + "content_key_count": len(content_keys) if content_keys else 0, + "has_proxy": bool(proxy), + "skip_merge": skip_merge, + "has_custom_args": bool(track.downloader_args), + "selection_args": selection_args, + "descriptor": track.descriptor.name if hasattr(track, "descriptor") else None, + }, + ) + else: + arguments.extend(["--no-log", "true"]) yield {"total": 100} yield {"downloaded": "Parsing streams..."} @@ -310,11 +344,45 @@ def download( yield {"completed": progress} if progress < 100 else {"downloaded": "Merging"} process.wait() + if process.returncode != 0: + if debug_logger and log_file_path: + log_contents = "" + if log_file_path.exists(): + try: + log_contents = log_file_path.read_text(encoding="utf-8", errors="replace") + except Exception: + log_contents = "" + + debug_logger.log( + level="ERROR", + operation="downloader_n_m3u8dl_re_failed", + message=f"N_m3u8DL-RE exited with code {process.returncode}", + context={ + "returncode": process.returncode, + "track_id": getattr(track, "id", None), + "track_type": track.__class__.__name__, + "last_line": last_line, + "log_file_contents": log_contents, + }, + ) if error_match := ERROR_RE.search(last_line): raise ValueError(f"[N_m3u8DL-RE]: {error_match.group(1)}") raise subprocess.CalledProcessError(process.returncode, arguments) + if debug_logger: + debug_logger.log( + level="DEBUG", + operation="downloader_n_m3u8dl_re_complete", + message="N_m3u8DL-RE download completed successfully", + context={ + "track_id": getattr(track, "id", None), + "track_type": track.__class__.__name__, + "output_dir": str(output_dir), + "filename": filename, + }, + ) + except ConnectionResetError: # interrupted while passing URI to download raise KeyboardInterrupt() @@ -322,10 +390,35 @@ def download( DOWNLOAD_CANCELLED.set() # skip pending track downloads yield {"downloaded": "[yellow]CANCELLED"} raise - except Exception: + except Exception as e: DOWNLOAD_CANCELLED.set() # skip pending track downloads yield {"downloaded": "[red]FAILED"} + if debug_logger and log_file_path and not isinstance(e, (subprocess.CalledProcessError, ValueError)): + log_contents = "" + if log_file_path.exists(): + try: + log_contents = log_file_path.read_text(encoding="utf-8", errors="replace") + except Exception: + log_contents = "" + + debug_logger.log( + level="ERROR", + operation="downloader_n_m3u8dl_re_exception", + message=f"Unexpected error during N_m3u8DL-RE download: {e}", + error=e, + context={ + "track_id": getattr(track, "id", None), + "track_type": track.__class__.__name__, + "log_file_contents": log_contents, + }, + ) raise + finally: + if log_file_path and log_file_path.exists(): + try: + log_file_path.unlink() + except Exception: + pass def n_m3u8dl_re( diff --git a/unshackle/core/downloaders/requests.py b/unshackle/core/downloaders/requests.py index 49c1759..06cab3d 100644 --- a/unshackle/core/downloaders/requests.py +++ b/unshackle/core/downloaders/requests.py @@ -12,7 +12,7 @@ from requests.adapters import HTTPAdapter from rich import filesize from unshackle.core.constants import DOWNLOAD_CANCELLED -from unshackle.core.utilities import get_extension +from unshackle.core.utilities import get_debug_logger, get_extension MAX_ATTEMPTS = 5 RETRY_WAIT = 2 @@ -215,6 +215,8 @@ def requests( if not isinstance(max_workers, (int, type(None))): raise TypeError(f"Expected max_workers to be {int}, not {type(max_workers)}") + debug_logger = get_debug_logger() + if not isinstance(urls, list): urls = [urls] @@ -241,6 +243,23 @@ def requests( if proxy: session.proxies.update({"all": proxy}) + if debug_logger: + first_url = urls[0].get("url", "") if urls else "" + url_display = first_url[:200] + "..." if len(first_url) > 200 else first_url + debug_logger.log( + level="DEBUG", + operation="downloader_requests_start", + message="Starting requests download", + context={ + "url_count": len(urls), + "first_url": url_display, + "output_dir": str(output_dir), + "filename": filename, + "max_workers": max_workers, + "has_proxy": bool(proxy), + }, + ) + yield dict(total=len(urls)) try: @@ -256,14 +275,37 @@ def requests( # tell dl that it was cancelled # the pool is already shut down, so exiting loop is fine raise - except Exception: + except Exception as e: DOWNLOAD_CANCELLED.set() # skip pending track downloads yield dict(downloaded="[red]FAILING") pool.shutdown(wait=True, cancel_futures=True) yield dict(downloaded="[red]FAILED") + if debug_logger: + debug_logger.log( + level="ERROR", + operation="downloader_requests_failed", + message=f"Requests download failed: {e}", + error=e, + context={ + "url_count": len(urls), + "output_dir": str(output_dir), + }, + ) # tell dl that it failed # the pool is already shut down, so exiting loop is fine raise + + if debug_logger: + debug_logger.log( + level="DEBUG", + operation="downloader_requests_complete", + message="Requests download completed successfully", + context={ + "url_count": len(urls), + "output_dir": str(output_dir), + "filename": filename, + }, + ) finally: DOWNLOAD_SIZES.clear() diff --git a/unshackle/core/manifests/dash.py b/unshackle/core/manifests/dash.py index 442ac96..68fdded 100644 --- a/unshackle/core/manifests/dash.py +++ b/unshackle/core/manifests/dash.py @@ -28,7 +28,7 @@ from unshackle.core.downloaders import requests as requests_downloader from unshackle.core.drm import DRM_T, PlayReady, Widevine from unshackle.core.events import events from unshackle.core.tracks import Audio, Subtitle, Tracks, Video -from unshackle.core.utilities import is_close_match, try_ensure_utf8 +from unshackle.core.utilities import get_debug_logger, is_close_match, try_ensure_utf8 from unshackle.core.utils.xml import load_xml @@ -518,6 +518,24 @@ class DASH: if downloader.__name__ == "n_m3u8dl_re": downloader_args.update({"filename": track.id, "track": track}) + debug_logger = get_debug_logger() + if debug_logger: + debug_logger.log( + level="DEBUG", + operation="manifest_dash_download_start", + message="Starting DASH manifest download", + context={ + "track_id": getattr(track, "id", None), + "track_type": track.__class__.__name__, + "total_segments": len(segments), + "downloader": downloader.__name__, + "has_drm": bool(track.drm), + "drm_types": [drm.__class__.__name__ for drm in (track.drm or [])], + "save_path": str(save_path), + "has_init_data": bool(init_data), + }, + ) + for status_update in downloader(**downloader_args): file_downloaded = status_update.get("file_downloaded") if file_downloaded: diff --git a/unshackle/core/manifests/hls.py b/unshackle/core/manifests/hls.py index 6f49c6a..fb0320b 100644 --- a/unshackle/core/manifests/hls.py +++ b/unshackle/core/manifests/hls.py @@ -32,7 +32,7 @@ from unshackle.core.downloaders import requests as requests_downloader from unshackle.core.drm import DRM_T, ClearKey, PlayReady, Widevine from unshackle.core.events import events from unshackle.core.tracks import Audio, Subtitle, Tracks, Video -from unshackle.core.utilities import get_extension, is_close_match, try_ensure_utf8 +from unshackle.core.utilities import get_debug_logger, get_extension, is_close_match, try_ensure_utf8 class HLS: @@ -350,6 +350,24 @@ class HLS: } ) + debug_logger = get_debug_logger() + if debug_logger: + debug_logger.log( + level="DEBUG", + operation="manifest_hls_download_start", + message="Starting HLS manifest download", + context={ + "track_id": getattr(track, "id", None), + "track_type": track.__class__.__name__, + "total_segments": total_segments, + "downloader": downloader.__name__, + "has_drm": bool(session_drm), + "drm_type": session_drm.__class__.__name__ if session_drm else None, + "skip_merge": skip_merge, + "save_path": str(save_path), + }, + ) + for status_update in downloader(**downloader_args): file_downloaded = status_update.get("file_downloaded") if file_downloaded: diff --git a/unshackle/core/manifests/ism.py b/unshackle/core/manifests/ism.py index 346c9e6..8cb6a3b 100644 --- a/unshackle/core/manifests/ism.py +++ b/unshackle/core/manifests/ism.py @@ -21,7 +21,7 @@ from unshackle.core.constants import DOWNLOAD_CANCELLED, DOWNLOAD_LICENCE_ONLY, from unshackle.core.drm import DRM_T, PlayReady, Widevine from unshackle.core.events import events from unshackle.core.tracks import Audio, Subtitle, Track, Tracks, Video -from unshackle.core.utilities import try_ensure_utf8 +from unshackle.core.utilities import get_debug_logger, try_ensure_utf8 from unshackle.core.utils.xml import load_xml @@ -283,6 +283,24 @@ class ISM: } ) + debug_logger = get_debug_logger() + if debug_logger: + debug_logger.log( + level="DEBUG", + operation="manifest_ism_download_start", + message="Starting ISM manifest download", + context={ + "track_id": getattr(track, "id", None), + "track_type": track.__class__.__name__, + "total_segments": len(segments), + "downloader": downloader.__name__, + "has_drm": bool(session_drm), + "drm_type": session_drm.__class__.__name__ if session_drm else None, + "skip_merge": skip_merge, + "save_path": str(save_path), + }, + ) + for status_update in downloader(**downloader_args): file_downloaded = status_update.get("file_downloaded") if file_downloaded: diff --git a/unshackle/core/tracks/tracks.py b/unshackle/core/tracks/tracks.py index eeacd47..fd9d78d 100644 --- a/unshackle/core/tracks/tracks.py +++ b/unshackle/core/tracks/tracks.py @@ -22,7 +22,7 @@ from unshackle.core.tracks.chapters import Chapter, Chapters from unshackle.core.tracks.subtitle import Subtitle from unshackle.core.tracks.track import Track from unshackle.core.tracks.video import Video -from unshackle.core.utilities import is_close_match, sanitize_filename +from unshackle.core.utilities import get_debug_logger, is_close_match, sanitize_filename from unshackle.core.utils.collections import as_list, flatten @@ -507,6 +507,35 @@ class Tracks: if not output_path: raise ValueError("No tracks provided, at least one track must be provided.") + debug_logger = get_debug_logger() + if debug_logger: + debug_logger.log( + level="DEBUG", + operation="mux_start", + message="Starting mkvmerge muxing", + context={ + "title": title, + "output_path": str(output_path), + "video_count": len(self.videos), + "audio_count": len(self.audio), + "subtitle_count": len(self.subtitles), + "attachment_count": len(self.attachments), + "has_chapters": bool(self.chapters), + "video_tracks": [ + {"id": v.id, "codec": getattr(v, "codec", None), "language": str(v.language)} + for v in self.videos + ], + "audio_tracks": [ + {"id": a.id, "codec": getattr(a, "codec", None), "language": str(a.language)} + for a in self.audio + ], + "subtitle_tracks": [ + {"id": s.id, "codec": getattr(s, "codec", None), "language": str(s.language)} + for s in self.subtitles + ], + }, + ) + # let potential failures go to caller, caller should handle try: errors = [] @@ -516,7 +545,33 @@ class Tracks: errors.append(line) if "progress" in line: progress(total=100, completed=int(line.strip()[14:-1])) - return output_path, p.wait(), errors + + returncode = p.wait() + + if debug_logger: + if returncode != 0 or errors: + debug_logger.log( + level="ERROR", + operation="mux_failed", + message=f"mkvmerge exited with code {returncode}", + context={ + "returncode": returncode, + "output_path": str(output_path), + "errors": errors, + }, + ) + else: + debug_logger.log( + level="DEBUG", + operation="mux_complete", + message="mkvmerge muxing completed successfully", + context={ + "output_path": str(output_path), + "output_exists": output_path.exists() if output_path else False, + }, + ) + + return output_path, returncode, errors finally: if chapters_path: chapters_path.unlink() From ede38648db572f67380a55d628a33eedaf834a3a Mon Sep 17 00:00:00 2001 From: Andy Date: Wed, 7 Jan 2026 16:22:45 +0000 Subject: [PATCH 02/20] fix(util): improve test command error detection and add natural sorting --- unshackle/commands/util.py | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/unshackle/commands/util.py b/unshackle/commands/util.py index b9c6b84..612ac2b 100644 --- a/unshackle/commands/util.py +++ b/unshackle/commands/util.py @@ -1,3 +1,4 @@ +import re import subprocess from pathlib import Path @@ -8,6 +9,11 @@ from unshackle.core import binaries from unshackle.core.constants import context_settings +def _natural_sort_key(path: Path) -> list: + """Sort key for natural sorting (S01E01 before S01E10).""" + return [int(part) if part.isdigit() else part.lower() for part in re.split(r"(\d+)", path.name)] + + @click.group(short_help="Various helper scripts and programs.", context_settings=context_settings) def util() -> None: """Various helper scripts and programs.""" @@ -49,7 +55,7 @@ def crop(path: Path, aspect: str, letter: bool, offset: int, preview: bool) -> N raise click.ClickException('FFmpeg executable "ffmpeg" not found but is required.') if path.is_dir(): - paths = list(path.glob("*.mkv")) + list(path.glob("*.mp4")) + paths = sorted(list(path.glob("*.mkv")) + list(path.glob("*.mp4")), key=_natural_sort_key) else: paths = [path] for video_path in paths: @@ -140,7 +146,7 @@ def range_(path: Path, full: bool, preview: bool) -> None: raise click.ClickException('FFmpeg executable "ffmpeg" not found but is required.') if path.is_dir(): - paths = list(path.glob("*.mkv")) + list(path.glob("*.mp4")) + paths = sorted(list(path.glob("*.mkv")) + list(path.glob("*.mp4")), key=_natural_sort_key) else: paths = [path] for video_path in paths: @@ -225,16 +231,18 @@ def test(path: Path, map_: str) -> None: raise click.ClickException('FFmpeg executable "ffmpeg" not found but is required.') if path.is_dir(): - paths = list(path.glob("*.mkv")) + list(path.glob("*.mp4")) + paths = sorted(list(path.glob("*.mkv")) + list(path.glob("*.mp4")), key=_natural_sort_key) else: paths = [path] for video_path in paths: - print("Starting...") + print(f"Testing: {video_path.name}") p = subprocess.Popen( [ binaries.FFMPEG, "-hide_banner", "-benchmark", + "-err_detect", + "+crccheck+bitstream+buffer+careful+compliant+aggressive", "-i", str(video_path), "-map", @@ -255,13 +263,13 @@ def test(path: Path, map_: str) -> None: reached_output = True if not reached_output: continue - if line.startswith("["): # error of some kind + if line.startswith("[") and not line.startswith("[out#"): errors += 1 stream, error = line.split("] ", maxsplit=1) stream = stream.split(" @ ")[0] line = f"{stream} ERROR: {error}" print(line) p.stderr.close() - print(f"Finished with {errors} Errors, Cleaning up...") + print(f"Finished with {errors} error(s)") p.terminate() p.wait() From 7e7bc7aecf7ed7b9c4eb21b0cb15b0cbe1633333 Mon Sep 17 00:00:00 2001 From: Andy Date: Sun, 11 Jan 2026 08:21:02 +0000 Subject: [PATCH 03/20] fix(vaults): batch bulk key operations to avoid query limits --- unshackle/vaults/API.py | 61 ++++++++++++++++++++++++-------------- unshackle/vaults/SQLite.py | 22 ++++++++++++-- 2 files changed, 58 insertions(+), 25 deletions(-) diff --git a/unshackle/vaults/API.py b/unshackle/vaults/API.py index 0cc52fe..d627ecc 100644 --- a/unshackle/vaults/API.py +++ b/unshackle/vaults/API.py @@ -114,32 +114,49 @@ class API(Vault): return added or updated def add_keys(self, service: str, kid_keys: dict[Union[UUID, str], str]) -> int: - data = self.session.post( - url=f"{self.uri}/{service.lower()}", - json={"content_keys": {str(kid).replace("-", ""): key for kid, key in kid_keys.items()}}, - headers={"Accept": "application/json"}, - ).json() + # Normalize keys + normalized_keys = {str(kid).replace("-", ""): key for kid, key in kid_keys.items()} + kid_list = list(normalized_keys.keys()) - code = int(data.get("code", 0)) - message = data.get("message") - error = { - 0: None, - 1: Exceptions.AuthRejected, - 2: Exceptions.TooManyRequests, - 3: Exceptions.ServiceTagInvalid, - 4: Exceptions.KeyIdInvalid, - 5: Exceptions.ContentKeyInvalid, - }.get(code, ValueError) + if not kid_list: + return 0 - if error: - raise error(f"{message} ({code})") + # Batch requests to avoid server limits + batch_size = 500 + total_added = 0 - # each kid:key that was new to the vault (optional) - added = int(data.get("added")) - # each key for a kid that was changed/updated (optional) - updated = int(data.get("updated")) + for i in range(0, len(kid_list), batch_size): + batch_kids = kid_list[i : i + batch_size] + batch_keys = {kid: normalized_keys[kid] for kid in batch_kids} - return added + updated + data = self.session.post( + url=f"{self.uri}/{service.lower()}", + json={"content_keys": batch_keys}, + headers={"Accept": "application/json"}, + ).json() + + code = int(data.get("code", 0)) + message = data.get("message") + error = { + 0: None, + 1: Exceptions.AuthRejected, + 2: Exceptions.TooManyRequests, + 3: Exceptions.ServiceTagInvalid, + 4: Exceptions.KeyIdInvalid, + 5: Exceptions.ContentKeyInvalid, + }.get(code, ValueError) + + if error: + raise error(f"{message} ({code})") + + # each kid:key that was new to the vault (optional) + added = int(data.get("added", 0)) + # each key for a kid that was changed/updated (optional) + updated = int(data.get("updated", 0)) + + total_added += added + updated + + return total_added def get_services(self) -> Iterator[str]: data = self.session.post(url=self.uri, headers={"Accept": "application/json"}).json() diff --git a/unshackle/vaults/SQLite.py b/unshackle/vaults/SQLite.py index f1922d7..a3f6447 100644 --- a/unshackle/vaults/SQLite.py +++ b/unshackle/vaults/SQLite.py @@ -119,9 +119,25 @@ class SQLite(Vault): cursor = conn.cursor() try: - placeholders = ",".join(["?"] * len(kid_keys)) - cursor.execute(f"SELECT kid FROM `{service}` WHERE kid IN ({placeholders})", list(kid_keys.keys())) - existing_kids = {row[0] for row in cursor.fetchall()} + # Query existing KIDs in batches to avoid SQLite variable limit + # Try larger batch first (newer SQLite supports 32766), fall back to 500 if needed + existing_kids: set[str] = set() + kid_list = list(kid_keys.keys()) + batch_size = 32000 + + i = 0 + while i < len(kid_list): + batch = kid_list[i : i + batch_size] + placeholders = ",".join(["?"] * len(batch)) + try: + cursor.execute(f"SELECT kid FROM `{service}` WHERE kid IN ({placeholders})", batch) + existing_kids.update(row[0] for row in cursor.fetchall()) + i += batch_size + except sqlite3.OperationalError as e: + if "too many SQL variables" in str(e) and batch_size > 500: + batch_size = 500 + continue + raise new_keys = {kid: key for kid, key in kid_keys.items() if kid not in existing_kids} From fcd70e5b0f885e20bef497e8bacdb7a96a16fe64 Mon Sep 17 00:00:00 2001 From: Andy Date: Wed, 14 Jan 2026 22:25:58 +0000 Subject: [PATCH 04/20] fix(titles): detect HDR10 in hybrid DV filenames correctly Hybrid DV+HDR10 files were named "DV.H.265" instead of "DV.HDR.H.265" because the HDR10 detection only checked hdr_format_full which contains "Dolby Vision / SMPTE ST 2094". The "HDR10" indicator is in hdr_format_commercial, not hdr_format_full. Now checks both fields for HDR10 compatibility indicators. --- unshackle/core/titles/episode.py | 5 ++++- unshackle/core/titles/movie.py | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/unshackle/core/titles/episode.py b/unshackle/core/titles/episode.py index 6592b60..b260ce9 100644 --- a/unshackle/core/titles/episode.py +++ b/unshackle/core/titles/episode.py @@ -185,7 +185,10 @@ class Episode(Title): if hdr_format: if hdr_format_full.startswith("Dolby Vision"): name += " DV" - if any(indicator in hdr_format_full for indicator in ["HDR10", "SMPTE ST 2086"]): + if any( + indicator in (hdr_format_full + " " + hdr_format) + for indicator in ["HDR10", "SMPTE ST 2086"] + ): name += " HDR" else: name += f" {DYNAMIC_RANGE_MAP.get(hdr_format)} " diff --git a/unshackle/core/titles/movie.py b/unshackle/core/titles/movie.py index 1545b18..bda68df 100644 --- a/unshackle/core/titles/movie.py +++ b/unshackle/core/titles/movie.py @@ -136,7 +136,10 @@ class Movie(Title): if hdr_format: if hdr_format_full.startswith("Dolby Vision"): name += " DV" - if any(indicator in hdr_format_full for indicator in ["HDR10", "SMPTE ST 2086"]): + if any( + indicator in (hdr_format_full + " " + hdr_format) + for indicator in ["HDR10", "SMPTE ST 2086"] + ): name += " HDR" else: name += f" {DYNAMIC_RANGE_MAP.get(hdr_format)} " From 6740dd3dfa9059841f687854f3fbbec198971899 Mon Sep 17 00:00:00 2001 From: Andy Date: Wed, 14 Jan 2026 23:00:43 +0000 Subject: [PATCH 05/20] Revert "fix(vaults): batch bulk key operations to avoid query limits" This reverts commit 7e7bc7aecf7ed7b9c4eb21b0cb15b0cbe1633333. --- unshackle/vaults/API.py | 61 ++++++++++++++------------------------ unshackle/vaults/SQLite.py | 22 ++------------ 2 files changed, 25 insertions(+), 58 deletions(-) diff --git a/unshackle/vaults/API.py b/unshackle/vaults/API.py index d627ecc..0cc52fe 100644 --- a/unshackle/vaults/API.py +++ b/unshackle/vaults/API.py @@ -114,49 +114,32 @@ class API(Vault): return added or updated def add_keys(self, service: str, kid_keys: dict[Union[UUID, str], str]) -> int: - # Normalize keys - normalized_keys = {str(kid).replace("-", ""): key for kid, key in kid_keys.items()} - kid_list = list(normalized_keys.keys()) + data = self.session.post( + url=f"{self.uri}/{service.lower()}", + json={"content_keys": {str(kid).replace("-", ""): key for kid, key in kid_keys.items()}}, + headers={"Accept": "application/json"}, + ).json() - if not kid_list: - return 0 + code = int(data.get("code", 0)) + message = data.get("message") + error = { + 0: None, + 1: Exceptions.AuthRejected, + 2: Exceptions.TooManyRequests, + 3: Exceptions.ServiceTagInvalid, + 4: Exceptions.KeyIdInvalid, + 5: Exceptions.ContentKeyInvalid, + }.get(code, ValueError) - # Batch requests to avoid server limits - batch_size = 500 - total_added = 0 + if error: + raise error(f"{message} ({code})") - for i in range(0, len(kid_list), batch_size): - batch_kids = kid_list[i : i + batch_size] - batch_keys = {kid: normalized_keys[kid] for kid in batch_kids} + # each kid:key that was new to the vault (optional) + added = int(data.get("added")) + # each key for a kid that was changed/updated (optional) + updated = int(data.get("updated")) - data = self.session.post( - url=f"{self.uri}/{service.lower()}", - json={"content_keys": batch_keys}, - headers={"Accept": "application/json"}, - ).json() - - code = int(data.get("code", 0)) - message = data.get("message") - error = { - 0: None, - 1: Exceptions.AuthRejected, - 2: Exceptions.TooManyRequests, - 3: Exceptions.ServiceTagInvalid, - 4: Exceptions.KeyIdInvalid, - 5: Exceptions.ContentKeyInvalid, - }.get(code, ValueError) - - if error: - raise error(f"{message} ({code})") - - # each kid:key that was new to the vault (optional) - added = int(data.get("added", 0)) - # each key for a kid that was changed/updated (optional) - updated = int(data.get("updated", 0)) - - total_added += added + updated - - return total_added + return added + updated def get_services(self) -> Iterator[str]: data = self.session.post(url=self.uri, headers={"Accept": "application/json"}).json() diff --git a/unshackle/vaults/SQLite.py b/unshackle/vaults/SQLite.py index a3f6447..f1922d7 100644 --- a/unshackle/vaults/SQLite.py +++ b/unshackle/vaults/SQLite.py @@ -119,25 +119,9 @@ class SQLite(Vault): cursor = conn.cursor() try: - # Query existing KIDs in batches to avoid SQLite variable limit - # Try larger batch first (newer SQLite supports 32766), fall back to 500 if needed - existing_kids: set[str] = set() - kid_list = list(kid_keys.keys()) - batch_size = 32000 - - i = 0 - while i < len(kid_list): - batch = kid_list[i : i + batch_size] - placeholders = ",".join(["?"] * len(batch)) - try: - cursor.execute(f"SELECT kid FROM `{service}` WHERE kid IN ({placeholders})", batch) - existing_kids.update(row[0] for row in cursor.fetchall()) - i += batch_size - except sqlite3.OperationalError as e: - if "too many SQL variables" in str(e) and batch_size > 500: - batch_size = 500 - continue - raise + placeholders = ",".join(["?"] * len(kid_keys)) + cursor.execute(f"SELECT kid FROM `{service}` WHERE kid IN ({placeholders})", list(kid_keys.keys())) + existing_kids = {row[0] for row in cursor.fetchall()} new_keys = {kid: key for kid, key in kid_keys.items() if kid not in existing_kids} From 415544775b6cb437347f70f7fc60abcc76b60a31 Mon Sep 17 00:00:00 2001 From: Andy Date: Wed, 14 Jan 2026 23:04:54 +0000 Subject: [PATCH 06/20] fix(vaults): adaptive batch sizing for bulk key operations --- unshackle/vaults/API.py | 83 ++++++++++++++++++++++++++++---------- unshackle/vaults/SQLite.py | 22 ++++++++-- 2 files changed, 80 insertions(+), 25 deletions(-) diff --git a/unshackle/vaults/API.py b/unshackle/vaults/API.py index 0cc52fe..dad9607 100644 --- a/unshackle/vaults/API.py +++ b/unshackle/vaults/API.py @@ -114,32 +114,71 @@ class API(Vault): return added or updated def add_keys(self, service: str, kid_keys: dict[Union[UUID, str], str]) -> int: - data = self.session.post( - url=f"{self.uri}/{service.lower()}", - json={"content_keys": {str(kid).replace("-", ""): key for kid, key in kid_keys.items()}}, - headers={"Accept": "application/json"}, - ).json() + # Normalize keys + normalized_keys = {str(kid).replace("-", ""): key for kid, key in kid_keys.items()} + kid_list = list(normalized_keys.keys()) - code = int(data.get("code", 0)) - message = data.get("message") - error = { - 0: None, - 1: Exceptions.AuthRejected, - 2: Exceptions.TooManyRequests, - 3: Exceptions.ServiceTagInvalid, - 4: Exceptions.KeyIdInvalid, - 5: Exceptions.ContentKeyInvalid, - }.get(code, ValueError) + if not kid_list: + return 0 - if error: - raise error(f"{message} ({code})") + # Try batches starting at 500, stepping down by 100 on failure, fallback to 1 + batch_size = 500 + total_added = 0 + i = 0 - # each kid:key that was new to the vault (optional) - added = int(data.get("added")) - # each key for a kid that was changed/updated (optional) - updated = int(data.get("updated")) + while i < len(kid_list): + batch_kids = kid_list[i : i + batch_size] + batch_keys = {kid: normalized_keys[kid] for kid in batch_kids} - return added + updated + try: + response = self.session.post( + url=f"{self.uri}/{service.lower()}", + json={"content_keys": batch_keys}, + headers={"Accept": "application/json"}, + ) + + # Check for HTTP errors that suggest batch is too large + if response.status_code in (413, 414, 400) and batch_size > 1: + if batch_size > 100: + batch_size -= 100 + else: + batch_size = 1 + continue + + data = response.json() + except Exception: + # JSON decode error or connection issue - try smaller batch + if batch_size > 1: + if batch_size > 100: + batch_size -= 100 + else: + batch_size = 1 + continue + raise + + code = int(data.get("code", 0)) + message = data.get("message") + error = { + 0: None, + 1: Exceptions.AuthRejected, + 2: Exceptions.TooManyRequests, + 3: Exceptions.ServiceTagInvalid, + 4: Exceptions.KeyIdInvalid, + 5: Exceptions.ContentKeyInvalid, + }.get(code, ValueError) + + if error: + raise error(f"{message} ({code})") + + # each kid:key that was new to the vault (optional) + added = int(data.get("added", 0)) + # each key for a kid that was changed/updated (optional) + updated = int(data.get("updated", 0)) + + total_added += added + updated + i += batch_size + + return total_added def get_services(self) -> Iterator[str]: data = self.session.post(url=self.uri, headers={"Accept": "application/json"}).json() diff --git a/unshackle/vaults/SQLite.py b/unshackle/vaults/SQLite.py index f1922d7..a3f6447 100644 --- a/unshackle/vaults/SQLite.py +++ b/unshackle/vaults/SQLite.py @@ -119,9 +119,25 @@ class SQLite(Vault): cursor = conn.cursor() try: - placeholders = ",".join(["?"] * len(kid_keys)) - cursor.execute(f"SELECT kid FROM `{service}` WHERE kid IN ({placeholders})", list(kid_keys.keys())) - existing_kids = {row[0] for row in cursor.fetchall()} + # Query existing KIDs in batches to avoid SQLite variable limit + # Try larger batch first (newer SQLite supports 32766), fall back to 500 if needed + existing_kids: set[str] = set() + kid_list = list(kid_keys.keys()) + batch_size = 32000 + + i = 0 + while i < len(kid_list): + batch = kid_list[i : i + batch_size] + placeholders = ",".join(["?"] * len(batch)) + try: + cursor.execute(f"SELECT kid FROM `{service}` WHERE kid IN ({placeholders})", batch) + existing_kids.update(row[0] for row in cursor.fetchall()) + i += batch_size + except sqlite3.OperationalError as e: + if "too many SQL variables" in str(e) and batch_size > 500: + batch_size = 500 + continue + raise new_keys = {kid: key for kid, key in kid_keys.items() if kid not in existing_kids} From a7b6e9e6801ea6b97c19a167848b3da7e6d8090e Mon Sep 17 00:00:00 2001 From: Andy Date: Thu, 15 Jan 2026 02:49:56 +0000 Subject: [PATCH 07/20] feat(drm): add CDM-aware PlayReady fallback detection Add PlayReady PSSH/KID extraction from track and init data with CDM-aware ordering. When PlayReady CDM is selected, tries PlayReady first then falls back to Widevine. When Widevine CDM is selected (default), tries Widevine first then falls back to PlayReady. --- unshackle/core/manifests/dash.py | 22 +++++++++++++++++----- unshackle/core/tracks/track.py | 23 +++++++++++++++++------ 2 files changed, 34 insertions(+), 11 deletions(-) diff --git a/unshackle/core/manifests/dash.py b/unshackle/core/manifests/dash.py index 68fdded..4763f6a 100644 --- a/unshackle/core/manifests/dash.py +++ b/unshackle/core/manifests/dash.py @@ -18,6 +18,7 @@ import requests from curl_cffi.requests import Session as CurlSession from langcodes import Language, tag_is_valid from lxml.etree import Element, ElementTree +from pyplayready.cdm import Cdm as PlayReadyCdm from pyplayready.system.pssh import PSSH as PR_PSSH from pywidevine.cdm import Cdm as WidevineCdm from pywidevine.pssh import PSSH @@ -466,11 +467,22 @@ class DASH: track.data["dash"]["segment_durations"] = segment_durations if not track.drm and isinstance(track, (Video, Audio)): - try: - track.drm = [Widevine.from_init_data(init_data)] - except Widevine.Exceptions.PSSHNotFound: - # it might not have Widevine DRM, or might not have found the PSSH - log.warning("No Widevine PSSH was found for this track, is it DRM free?") + if isinstance(cdm, PlayReadyCdm): + try: + track.drm = [PlayReady.from_init_data(init_data)] + except PlayReady.Exceptions.PSSHNotFound: + try: + track.drm = [Widevine.from_init_data(init_data)] + except Widevine.Exceptions.PSSHNotFound: + log.warning("No PlayReady or Widevine PSSH was found for this track, is it DRM free?") + else: + try: + track.drm = [Widevine.from_init_data(init_data)] + except Widevine.Exceptions.PSSHNotFound: + try: + track.drm = [PlayReady.from_init_data(init_data)] + except PlayReady.Exceptions.PSSHNotFound: + log.warning("No Widevine or PlayReady PSSH was found for this track, is it DRM free?") if track.drm: track_kid = track_kid or track.get_key_id(url=segments[0][0], session=session) diff --git a/unshackle/core/tracks/track.py b/unshackle/core/tracks/track.py index 0b1a38f..3e0d7df 100644 --- a/unshackle/core/tracks/track.py +++ b/unshackle/core/tracks/track.py @@ -295,12 +295,23 @@ class Track: try: if not self.drm and track_type in ("Video", "Audio"): # the service might not have explicitly defined the `drm` property - # try find widevine DRM information from the init data of URL - try: - self.drm = [Widevine.from_track(self, session)] - except Widevine.Exceptions.PSSHNotFound: - # it might not have Widevine DRM, or might not have found the PSSH - log.warning("No Widevine PSSH was found for this track, is it DRM free?") + # try find DRM information from the init data of URL based on CDM type + if isinstance(cdm, PlayReadyCdm): + try: + self.drm = [PlayReady.from_track(self, session)] + except PlayReady.Exceptions.PSSHNotFound: + try: + self.drm = [Widevine.from_track(self, session)] + except Widevine.Exceptions.PSSHNotFound: + log.warning("No PlayReady or Widevine PSSH was found for this track, is it DRM free?") + else: + try: + self.drm = [Widevine.from_track(self, session)] + except Widevine.Exceptions.PSSHNotFound: + try: + self.drm = [PlayReady.from_track(self, session)] + except PlayReady.Exceptions.PSSHNotFound: + log.warning("No Widevine or PlayReady PSSH was found for this track, is it DRM free?") if self.drm: track_kid = self.get_key_id(session=session) From d1d3daf750e9a56d63c35a1c1cf9ea64f0398c29 Mon Sep 17 00:00:00 2001 From: Andy Date: Thu, 15 Jan 2026 03:07:18 +0000 Subject: [PATCH 08/20] chore(release): bump version to 2.2.0 --- CHANGELOG.md | 25 +++++++++++++++++++++++++ pyproject.toml | 2 +- unshackle/core/__init__.py | 2 +- uv.lock | 2 +- 4 files changed, 28 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9625999..f477a7c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,31 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [2.2.0] - 2026-01-15 + +### Added + +- **CDM-Aware PlayReady Fallback Detection**: Intelligent DRM fallback based on selected CDM + - Adds PlayReady PSSH/KID extraction from track and init data with CDM-aware ordering + - When PlayReady CDM is selected, tries PlayReady first then falls back to Widevine + - When Widevine CDM is selected (default), tries Widevine first then falls back to PlayReady +- **Comprehensive Debug Logging**: Enhanced debug logging for downloaders and muxing + - Added detailed debug logging to aria2c, curl_impersonate, n_m3u8dl_re, and requests downloaders + - Enhanced manifest parsers (DASH, HLS, ISM) with debug logging + - Added debug logging to track muxing operations + +### Fixed + +- **Hybrid DV+HDR10 Filename Detection**: Fixed HDR10 detection in hybrid Dolby Vision filenames + - Hybrid DV+HDR10 files were incorrectly named "DV.H.265" instead of "DV.HDR.H.265" + - Now checks both `hdr_format_full` and `hdr_format_commercial` fields for HDR10 indicators +- **Vault Adaptive Batch Sizing**: Improved bulk key operations with adaptive batch sizing + - Prevents query limit issues when retrieving large numbers of keys from vaults + - Dynamically adjusts batch sizes based on vault response characteristics +- **Test Command Improvements**: Enhanced test command error detection and sorting + - Improved error detection in test command output + - Added natural sorting for test results + ## [2.1.0] - 2025-11-27 ### Added diff --git a/pyproject.toml b/pyproject.toml index 5c91c9b..68b8a79 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "unshackle" -version = "2.1.0" +version = "2.2.0" description = "Modular Movie, TV, and Music Archival Software." authors = [{ name = "unshackle team" }] requires-python = ">=3.10,<3.13" diff --git a/unshackle/core/__init__.py b/unshackle/core/__init__.py index 9aa3f90..8a124bf 100644 --- a/unshackle/core/__init__.py +++ b/unshackle/core/__init__.py @@ -1 +1 @@ -__version__ = "2.1.0" +__version__ = "2.2.0" diff --git a/uv.lock b/uv.lock index f2ad4bb..9ca0001 100644 --- a/uv.lock +++ b/uv.lock @@ -1565,7 +1565,7 @@ wheels = [ [[package]] name = "unshackle" -version = "2.1.0" +version = "2.2.0" source = { editable = "." } dependencies = [ { name = "aiohttp-swagger3" }, From 44acfbdc89f29db8cc9754b6704fc2e6fbb24425 Mon Sep 17 00:00:00 2001 From: Andy Date: Thu, 15 Jan 2026 12:48:18 +0000 Subject: [PATCH 09/20] fix(drm): correct PSSH system ID comparison in PlayReady Remove erroneous `.bytes` accessor from PSSH.SYSTEM_ID comparisons in from_track() and from_init_data() methods. The pyplayready PSSH.SYSTEM_ID is already the correct type for comparison with parsed PSSH box system_ID values. --- unshackle/core/drm/playready.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/unshackle/core/drm/playready.py b/unshackle/core/drm/playready.py index b1fcea0..7376764 100644 --- a/unshackle/core/drm/playready.py +++ b/unshackle/core/drm/playready.py @@ -168,7 +168,7 @@ class PlayReady: pssh_boxes.extend(list(get_boxes(init_data, b"pssh"))) tenc_boxes.extend(list(get_boxes(init_data, b"tenc"))) - pssh = next((b for b in pssh_boxes if b.system_ID == PSSH.SYSTEM_ID.bytes), None) + pssh = next((b for b in pssh_boxes if b.system_ID == PSSH.SYSTEM_ID), None) if not pssh: raise PlayReady.Exceptions.PSSHNotFound("PSSH was not found in track data.") @@ -197,7 +197,7 @@ class PlayReady: if enc_key_id: kid = UUID(bytes=base64.b64decode(enc_key_id)) - pssh = next((b for b in pssh_boxes if b.system_ID == PSSH.SYSTEM_ID.bytes), None) + pssh = next((b for b in pssh_boxes if b.system_ID == PSSH.SYSTEM_ID), None) if not pssh: raise PlayReady.Exceptions.PSSHNotFound("PSSH was not found in track data.") From b01fc3c8d1d8b903cbe6208c0b021c1b3dc34c98 Mon Sep 17 00:00:00 2001 From: Andy Date: Thu, 15 Jan 2026 12:50:22 +0000 Subject: [PATCH 10/20] fix(dash): handle placeholder KIDs and improve DRM init from segments - Add CENC namespace support for kid/default_KID attributes - Detect and replace placeholder/test KIDs in Widevine PSSH: - All zeros (key rotation default) - Sequential 0x00-0x0f pattern - Shaka Packager test pattern - Change DRM init condition from `not track.drm` to `init_data` to ensure DRM is always re-initialized from init segments Fixes issue where Widevine PSSH contains placeholder KIDs while the real KID is only in ContentProtection default_KID attributes. --- unshackle/core/manifests/dash.py | 33 +++++++++++++++++++++++--------- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/unshackle/core/manifests/dash.py b/unshackle/core/manifests/dash.py index 4763f6a..576a8db 100644 --- a/unshackle/core/manifests/dash.py +++ b/unshackle/core/manifests/dash.py @@ -466,7 +466,7 @@ class DASH: track.data["dash"]["timescale"] = int(segment_timescale) track.data["dash"]["segment_durations"] = segment_durations - if not track.drm and isinstance(track, (Video, Audio)): + if init_data and isinstance(track, (Video, Audio)): if isinstance(cdm, PlayReadyCdm): try: track.drm = [PlayReady.from_init_data(init_data)] @@ -766,6 +766,11 @@ class DASH: @staticmethod def get_drm(protections: list[Element]) -> list[DRM_T]: drm: list[DRM_T] = [] + PLACEHOLDER_KIDS = { + UUID("00000000-0000-0000-0000-000000000000"), # All zeros (key rotation default) + UUID("00010203-0405-0607-0809-0a0b0c0d0e0f"), # Sequential 0x00-0x0f + UUID("00010203-0405-0607-0809-101112131415"), # Shaka Packager test pattern + } for protection in protections: urn = (protection.get("schemeIdUri") or "").lower() @@ -775,17 +780,27 @@ class DASH: if not pssh_text: continue pssh = PSSH(pssh_text) + kid_attr = protection.get("kid") or protection.get("{urn:mpeg:cenc:2013}kid") + kid = UUID(bytes=base64.b64decode(kid_attr)) if kid_attr else None - kid = protection.get("kid") - if kid: - kid = UUID(bytes=base64.b64decode(kid)) + if not kid: + default_kid_attr = protection.get("default_KID") or protection.get( + "{urn:mpeg:cenc:2013}default_KID" + ) + kid = UUID(default_kid_attr) if default_kid_attr else None - default_kid = protection.get("default_KID") - if default_kid: - kid = UUID(default_kid) + if not kid: + kid = next( + ( + UUID(p.get("default_KID") or p.get("{urn:mpeg:cenc:2013}default_KID")) + for p in protections + if p.get("default_KID") or p.get("{urn:mpeg:cenc:2013}default_KID") + ), + None, + ) - if not pssh.key_ids and not kid: - kid = next((UUID(p.get("default_KID")) for p in protections if p.get("default_KID")), None) + if kid and (not pssh.key_ids or all(k.int == 0 or k in PLACEHOLDER_KIDS for k in pssh.key_ids)): + pssh.set_key_ids([kid]) drm.append(Widevine(pssh=pssh, kid=kid)) From a01f335cfc6cc8cf1decda04b82f68aa3dfc925e Mon Sep 17 00:00:00 2001 From: Andy Date: Fri, 16 Jan 2026 13:25:34 +0000 Subject: [PATCH 11/20] fix(dash): handle N_m3u8DL-RE merge and decryption - Add skip_merge flag for N_m3u8DL-RE to prevent duplicate init data - Pass content_keys to N_m3u8DL-RE for internal decryption handling - Use shutil.move() instead of manual merge when skip_merge is True - Skip manual decryption when N_m3u8DL-RE handles it internally Fixes audio corruption ("Box 'OG 2' size is too large") when using N_m3u8DL-RE with DASH manifests that have SegmentBase init data. The init segment was being written twice: once by N_m3u8DL-RE during its internal merge, and again by dash.py during post-processing. --- unshackle/core/manifests/dash.py | 78 +++++++++++++++++++++----------- 1 file changed, 51 insertions(+), 27 deletions(-) diff --git a/unshackle/core/manifests/dash.py b/unshackle/core/manifests/dash.py index 576a8db..ce0d2a7 100644 --- a/unshackle/core/manifests/dash.py +++ b/unshackle/core/manifests/dash.py @@ -5,6 +5,7 @@ import html import logging import math import re +import shutil import sys from copy import copy from functools import partial @@ -527,8 +528,16 @@ class DASH: max_workers=max_workers, ) + skip_merge = False if downloader.__name__ == "n_m3u8dl_re": - downloader_args.update({"filename": track.id, "track": track}) + skip_merge = True + downloader_args.update( + { + "filename": track.id, + "track": track, + "content_keys": drm.content_keys if drm else None, + } + ) debug_logger = get_debug_logger() if debug_logger: @@ -543,6 +552,7 @@ class DASH: "downloader": downloader.__name__, "has_drm": bool(track.drm), "drm_types": [drm.__class__.__name__ for drm in (track.drm or [])], + "skip_merge": skip_merge, "save_path": str(save_path), "has_init_data": bool(init_data), }, @@ -563,42 +573,56 @@ class DASH: control_file.unlink() segments_to_merge = [x for x in sorted(save_dir.iterdir()) if x.is_file()] - with open(save_path, "wb") as f: - if init_data: - f.write(init_data) - if len(segments_to_merge) > 1: - progress(downloaded="Merging", completed=0, total=len(segments_to_merge)) - for segment_file in segments_to_merge: - segment_data = segment_file.read_bytes() - # TODO: fix encoding after decryption? - if ( - not drm - and isinstance(track, Subtitle) - and track.codec not in (Subtitle.Codec.fVTT, Subtitle.Codec.fTTML) - ): - segment_data = try_ensure_utf8(segment_data) - segment_data = ( - segment_data.decode("utf8") - .replace("‎", html.unescape("‎")) - .replace("‏", html.unescape("‏")) - .encode("utf8") - ) - f.write(segment_data) - f.flush() - segment_file.unlink() - progress(advance=1) + + if skip_merge: + # N_m3u8DL-RE handles merging and decryption internally + shutil.move(segments_to_merge[0], save_path) + if drm: + track.drm = None + events.emit(events.Types.TRACK_DECRYPTED, track=track, drm=drm, segment=None) + else: + with open(save_path, "wb") as f: + if init_data: + f.write(init_data) + if len(segments_to_merge) > 1: + progress(downloaded="Merging", completed=0, total=len(segments_to_merge)) + for segment_file in segments_to_merge: + segment_data = segment_file.read_bytes() + # TODO: fix encoding after decryption? + if ( + not drm + and isinstance(track, Subtitle) + and track.codec not in (Subtitle.Codec.fVTT, Subtitle.Codec.fTTML) + ): + segment_data = try_ensure_utf8(segment_data) + segment_data = ( + segment_data.decode("utf8") + .replace("‎", html.unescape("‎")) + .replace("‏", html.unescape("‏")) + .encode("utf8") + ) + f.write(segment_data) + f.flush() + segment_file.unlink() + progress(advance=1) track.path = save_path events.emit(events.Types.TRACK_DOWNLOADED, track=track) - if drm: + if not skip_merge and drm: progress(downloaded="Decrypting", completed=0, total=100) drm.decrypt(save_path) track.drm = None events.emit(events.Types.TRACK_DECRYPTED, track=track, drm=drm, segment=None) progress(downloaded="Decrypting", advance=100) - save_dir.rmdir() + # Clean up empty segment directory + if save_dir.exists() and save_dir.name.endswith("_segments"): + try: + save_dir.rmdir() + except OSError: + # Directory might not be empty, try removing recursively + shutil.rmtree(save_dir, ignore_errors=True) progress(downloaded="Downloaded") From d0cefa9d58a6a1d2c526ce6016dd0eeca70ff0a0 Mon Sep 17 00:00:00 2001 From: Andy Date: Fri, 16 Jan 2026 13:26:15 +0000 Subject: [PATCH 12/20] fix(drm): include shaka-packager binary in error messages --- unshackle/core/drm/playready.py | 2 +- unshackle/core/drm/widevine.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/unshackle/core/drm/playready.py b/unshackle/core/drm/playready.py index 7376764..9091833 100644 --- a/unshackle/core/drm/playready.py +++ b/unshackle/core/drm/playready.py @@ -415,7 +415,7 @@ class PlayReady: p.wait() if p.returncode != 0 or had_error: - raise subprocess.CalledProcessError(p.returncode, arguments) + raise subprocess.CalledProcessError(p.returncode, [binaries.ShakaPackager, *arguments]) path.unlink() if not stream_skipped: diff --git a/unshackle/core/drm/widevine.py b/unshackle/core/drm/widevine.py index 7fee1c9..f4de4bc 100644 --- a/unshackle/core/drm/widevine.py +++ b/unshackle/core/drm/widevine.py @@ -371,7 +371,7 @@ class Widevine: p.wait() if p.returncode != 0 or had_error: - raise subprocess.CalledProcessError(p.returncode, arguments) + raise subprocess.CalledProcessError(p.returncode, [binaries.ShakaPackager, *arguments]) path.unlink() if not stream_skipped: From 18b05340202d2d589fb7abe6657e2496b40eaeef Mon Sep 17 00:00:00 2001 From: Andy Date: Fri, 16 Jan 2026 13:42:11 +0000 Subject: [PATCH 13/20] fix(subs): strip whitespace from ASS font names Use removeprefix instead of removesuffix and add strip() to handle ASS subtitle files that have spaces after commas in Style definitions. Fixes #57 --- unshackle/commands/dl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unshackle/commands/dl.py b/unshackle/commands/dl.py index cfd09ec..71549a3 100644 --- a/unshackle/commands/dl.py +++ b/unshackle/commands/dl.py @@ -1567,7 +1567,7 @@ class dl: if subtitle.codec == Subtitle.Codec.SubStationAlphav4: for line in subtitle.path.read_text("utf8").splitlines(): if line.startswith("Style: "): - font_names.append(line.removesuffix("Style: ").split(",")[1]) + font_names.append(line.removeprefix("Style: ").split(",")[1].strip()) font_count, missing_fonts = self.attach_subtitle_fonts( font_names, title, temp_font_files From 68ad76cbb0458c6e1f1820ec478eee2b62b94176 Mon Sep 17 00:00:00 2001 From: Andy Date: Fri, 16 Jan 2026 13:43:50 +0000 Subject: [PATCH 14/20] feat(config): add unicode_filenames option to preserve native characters Add config option to disable ASCII transliteration in filenames, allowing preservation of Korean, Japanese, Chinese, and other native language characters instead of converting them via unidecode. Closes #49 --- unshackle/core/config.py | 1 + unshackle/core/utilities.py | 9 +++++++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/unshackle/core/config.py b/unshackle/core/config.py index 6eb7b26..1c50d62 100644 --- a/unshackle/core/config.py +++ b/unshackle/core/config.py @@ -95,6 +95,7 @@ class Config: self.update_check_interval: int = kwargs.get("update_check_interval", 24) self.scene_naming: bool = kwargs.get("scene_naming", True) self.series_year: bool = kwargs.get("series_year", True) + self.unicode_filenames: bool = kwargs.get("unicode_filenames", False) self.title_cache_time: int = kwargs.get("title_cache_time", 1800) # 30 minutes default self.title_cache_max_retention: int = kwargs.get("title_cache_max_retention", 86400) # 24 hours default diff --git a/unshackle/core/utilities.py b/unshackle/core/utilities.py index 5aaf6f0..7a78535 100644 --- a/unshackle/core/utilities.py +++ b/unshackle/core/utilities.py @@ -120,9 +120,14 @@ def sanitize_filename(filename: str, spacer: str = ".") -> str: The spacer is safer to be a '.' for older DDL and p2p sharing spaces. This includes web-served content via direct links and such. + + Set `unicode_filenames: true` in config to preserve native language + characters (Korean, Japanese, Chinese, etc.) instead of transliterating + them to ASCII equivalents. """ - # replace all non-ASCII characters with ASCII equivalents - filename = unidecode(filename) + # optionally replace non-ASCII characters with ASCII equivalents + if not config.unicode_filenames: + filename = unidecode(filename) # remove or replace further characters as needed filename = "".join(c for c in filename if unicodedata.category(c) != "Mn") # hidden characters From aec33338882b2349b07279da7282b5146f606213 Mon Sep 17 00:00:00 2001 From: Andy Date: Fri, 16 Jan 2026 14:16:47 +0000 Subject: [PATCH 15/20] fix(subs): handle negative TTML values in multi-value attributes The previous regex only matched negative size values when they were the entire quoted attribute (e.g., "-5%"). This failed for multi-value attributes like tts:extent="-5% 7.5%" causing pycaption parse errors. The new pattern matches negative values anywhere in the text and preserves the unit during replacement. Closes #47 --- unshackle/core/tracks/subtitle.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unshackle/core/tracks/subtitle.py b/unshackle/core/tracks/subtitle.py index e807bff..2fb5594 100644 --- a/unshackle/core/tracks/subtitle.py +++ b/unshackle/core/tracks/subtitle.py @@ -631,7 +631,7 @@ class Subtitle(Track): text = try_ensure_utf8(data).decode("utf8") text = text.replace("tt:", "") # negative size values aren't allowed in TTML/DFXP spec, replace with 0 - text = re.sub(r'"(-\d+(\.\d+)?(px|em|%|c|pt))"', '"0"', text) + text = re.sub(r'-(\d+(?:\.\d+)?)(px|em|%|c|pt)', r'0\2', text) caption_set = pycaption.DFXPReader().read(text) elif codec == Subtitle.Codec.fVTT: caption_lists: dict[str, pycaption.CaptionList] = defaultdict(pycaption.CaptionList) From 4e11f69a58634780564dca8a101690a74dda894f Mon Sep 17 00:00:00 2001 From: Andy Date: Sat, 17 Jan 2026 13:36:57 +0000 Subject: [PATCH 16/20] fix(drm): filter Widevine PSSH by system ID instead of sorting The previous sorting approach crashed with KeyError when unsupported DRM systems were present in the init segment. Now uses direct filtering --- unshackle/core/drm/widevine.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/unshackle/core/drm/widevine.py b/unshackle/core/drm/widevine.py index f4de4bc..6ca4fb5 100644 --- a/unshackle/core/drm/widevine.py +++ b/unshackle/core/drm/widevine.py @@ -100,9 +100,7 @@ class Widevine: pssh_boxes.extend(list(get_boxes(init_data, b"pssh"))) tenc_boxes.extend(list(get_boxes(init_data, b"tenc"))) - pssh_boxes.sort(key=lambda b: {PSSH.SystemId.Widevine: 0, PSSH.SystemId.PlayReady: 1}[b.system_ID]) - - pssh = next(iter(pssh_boxes), None) + pssh = next((b for b in pssh_boxes if b.system_ID == PSSH.SystemId.Widevine), None) if not pssh: raise Widevine.Exceptions.PSSHNotFound("PSSH was not found in track data.") @@ -141,9 +139,7 @@ class Widevine: if enc_key_id: kid = UUID(bytes=base64.b64decode(enc_key_id)) - pssh_boxes.sort(key=lambda b: {PSSH.SystemId.Widevine: 0, PSSH.SystemId.PlayReady: 1}[b.system_ID]) - - pssh = next(iter(pssh_boxes), None) + pssh = next((b for b in pssh_boxes if b.system_ID == PSSH.SystemId.Widevine), None) if not pssh: raise Widevine.Exceptions.PSSHNotFound("PSSH was not found in track data.") From e99cfddaec9d8f18e8f7c03ad17d285b69a2cad4 Mon Sep 17 00:00:00 2001 From: Andy Date: Sun, 18 Jan 2026 04:44:08 +0000 Subject: [PATCH 17/20] fix(subs): handle WebVTT cue identifiers and overlapping multi-line cues Some services use WebVTT files with: - Cue identifiers (Q0, Q1, etc.) before timing lines that pysubs2/pycaption incorrectly parses as subtitle text - Multi-line subtitles split into separate cues with 1ms offset times and different line: positions (e.g., line:77% for top, line:84% for bottom) Added detection and sanitization functions: - has_webvtt_cue_identifiers(): detects cue identifiers before timing - sanitize_webvtt_cue_identifiers(): removes problematic cue identifiers - has_overlapping_webvtt_cues(): detects overlapping cues needing merge - merge_overlapping_webvtt_cues(): merges cues sorted by line position --- unshackle/core/tracks/subtitle.py | 247 +++++++++++++++++++++++++++++- 1 file changed, 246 insertions(+), 1 deletion(-) diff --git a/unshackle/core/tracks/subtitle.py b/unshackle/core/tracks/subtitle.py index 2fb5594..9a6767d 100644 --- a/unshackle/core/tracks/subtitle.py +++ b/unshackle/core/tracks/subtitle.py @@ -91,6 +91,12 @@ class Subtitle(Track): return Subtitle.Codec.TimedTextMarkupLang raise ValueError(f"The Content Profile '{profile}' is not a supported Subtitle Codec") + # WebVTT sanitization patterns (compiled once for performance) + _CUE_ID_PATTERN = re.compile(r"^[A-Za-z]+\d+$") + _TIMING_START_PATTERN = re.compile(r"^\d+:\d+[:\.]") + _TIMING_LINE_PATTERN = re.compile(r"^((?:\d+:)?\d+:\d+[.,]\d+)\s*-->\s*((?:\d+:)?\d+:\d+[.,]\d+)(.*)$") + _LINE_POS_PATTERN = re.compile(r"line:(\d+(?:\.\d+)?%?)") + def __init__( self, *args: Any, @@ -239,6 +245,11 @@ class Subtitle(Track): # Sanitize WebVTT timestamps before parsing text = Subtitle.sanitize_webvtt_timestamps(text) + # Remove cue identifiers that confuse parsers like pysubs2 + text = Subtitle.sanitize_webvtt_cue_identifiers(text) + # Merge overlapping cues with line positioning into single multi-line cues + text = Subtitle.merge_overlapping_webvtt_cues(text) + preserve_formatting = config.subtitle.get("preserve_formatting", True) if preserve_formatting: @@ -277,6 +288,240 @@ class Subtitle(Track): # Replace negative timestamps with 00:00:00.000 return re.sub(r"(-\d+:\d+:\d+\.\d+)", "00:00:00.000", text) + @staticmethod + def has_webvtt_cue_identifiers(text: str) -> bool: + """ + Check if WebVTT content has cue identifiers that need removal. + + Parameters: + text: The WebVTT content as string + + Returns: + True if cue identifiers are detected, False otherwise + """ + lines = text.split("\n") + + for i, line in enumerate(lines): + line = line.strip() + if Subtitle._CUE_ID_PATTERN.match(line): + # Look ahead to see if next non-empty line is a timing line + j = i + 1 + while j < len(lines) and not lines[j].strip(): + j += 1 + if j < len(lines) and ("-->" in lines[j] or Subtitle._TIMING_START_PATTERN.match(lines[j].strip())): + return True + return False + + @staticmethod + def sanitize_webvtt_cue_identifiers(text: str) -> str: + """ + Remove WebVTT cue identifiers that can confuse subtitle parsers. + + Some services use cue identifiers like "Q0", "Q1", etc. + that appear on their own line before the timing line. These can be + incorrectly parsed as part of the previous cue's text content by + some parsers (like pysubs2). + + Parameters: + text: The WebVTT content as string + + Returns: + Sanitized WebVTT content with cue identifiers removed + """ + if not Subtitle.has_webvtt_cue_identifiers(text): + return text + + lines = text.split("\n") + sanitized_lines = [] + + i = 0 + while i < len(lines): + line = lines[i].strip() + + # Check if this line is a cue identifier followed by a timing line + if Subtitle._CUE_ID_PATTERN.match(line): + # Look ahead to see if next non-empty line is a timing line + j = i + 1 + while j < len(lines) and not lines[j].strip(): + j += 1 + if j < len(lines) and ("-->" in lines[j] or Subtitle._TIMING_START_PATTERN.match(lines[j].strip())): + # This is a cue identifier, skip it + i += 1 + continue + + sanitized_lines.append(lines[i]) + i += 1 + + return "\n".join(sanitized_lines) + + @staticmethod + def _parse_vtt_time(t: str) -> int: + """Parse WebVTT timestamp to milliseconds. Returns 0 for malformed input.""" + try: + t = t.replace(",", ".") + parts = t.split(":") + if len(parts) == 2: + m, s = parts + h = "0" + elif len(parts) >= 3: + h, m, s = parts[:3] + else: + return 0 + sec_parts = s.split(".") + secs = int(sec_parts[0]) + # Handle variable millisecond digits (e.g., .5 = 500ms, .50 = 500ms, .500 = 500ms) + ms = int(sec_parts[1].ljust(3, "0")[:3]) if len(sec_parts) > 1 else 0 + return int(h) * 3600000 + int(m) * 60000 + secs * 1000 + ms + except (ValueError, IndexError): + return 0 + + @staticmethod + def has_overlapping_webvtt_cues(text: str) -> bool: + """ + Check if WebVTT content has overlapping cues that need merging. + + Detects cues with start times within 50ms of each other and the same end time, + which indicates multi-line subtitles split into separate cues. + + Parameters: + text: The WebVTT content as string + + Returns: + True if overlapping cues are detected, False otherwise + """ + timings = [] + for line in text.split("\n"): + match = Subtitle._TIMING_LINE_PATTERN.match(line) + if match: + start_str, end_str = match.group(1), match.group(2) + timings.append((Subtitle._parse_vtt_time(start_str), Subtitle._parse_vtt_time(end_str))) + + # Check for overlapping cues (within 50ms start, same end) + for i in range(len(timings) - 1): + curr_start, curr_end = timings[i] + next_start, next_end = timings[i + 1] + if abs(curr_start - next_start) <= 50 and curr_end == next_end: + return True + + return False + + @staticmethod + def merge_overlapping_webvtt_cues(text: str) -> str: + """ + Merge WebVTT cues that have overlapping/near-identical times but different line positions. + + Some services use separate cues for each line of a multi-line subtitle, with + slightly different start times (1ms apart) and different line: positions. + This merges them into single cues with proper line ordering based on the + line: position (lower percentage = higher on screen = first line). + + Parameters: + text: The WebVTT content as string + + Returns: + WebVTT content with overlapping cues merged + """ + if not Subtitle.has_overlapping_webvtt_cues(text): + return text + + lines = text.split("\n") + cues = [] + header_lines = [] + in_header = True + i = 0 + + while i < len(lines): + line = lines[i] + + if in_header: + if "-->" in line: + in_header = False + else: + header_lines.append(line) + i += 1 + continue + + match = Subtitle._TIMING_LINE_PATTERN.match(line) + if match: + start_str, end_str, settings = match.groups() + line_pos = 100.0 # Default to bottom + line_match = Subtitle._LINE_POS_PATTERN.search(settings) + if line_match: + pos_str = line_match.group(1).rstrip("%") + line_pos = float(pos_str) + + content_lines = [] + i += 1 + while i < len(lines) and lines[i].strip() and "-->" not in lines[i]: + content_lines.append(lines[i]) + i += 1 + + cues.append( + { + "start_ms": Subtitle._parse_vtt_time(start_str), + "end_ms": Subtitle._parse_vtt_time(end_str), + "start_str": start_str, + "end_str": end_str, + "line_pos": line_pos, + "content": "\n".join(content_lines), + "settings": settings, + } + ) + else: + i += 1 + + # Merge overlapping cues (within 50ms of each other with same end time) + merged_cues = [] + i = 0 + while i < len(cues): + current = cues[i] + group = [current] + + j = i + 1 + while j < len(cues): + other = cues[j] + if abs(current["start_ms"] - other["start_ms"]) <= 50 and current["end_ms"] == other["end_ms"]: + group.append(other) + j += 1 + else: + break + + if len(group) > 1: + # Sort by line position (lower % = higher on screen = first) + group.sort(key=lambda x: x["line_pos"]) + # Use the earliest start time from the group + earliest = min(group, key=lambda x: x["start_ms"]) + merged_cues.append( + { + "start_str": earliest["start_str"], + "end_str": group[0]["end_str"], + "content": "\n".join(c["content"] for c in group), + "settings": "", + } + ) + else: + merged_cues.append( + { + "start_str": current["start_str"], + "end_str": current["end_str"], + "content": current["content"], + "settings": current["settings"], + } + ) + + i = j if len(group) > 1 else i + 1 + + result_lines = header_lines[:] + if result_lines and result_lines[-1].strip(): + result_lines.append("") + + for cue in merged_cues: + result_lines.append(f"{cue['start_str']} --> {cue['end_str']}{cue['settings']}") + result_lines.append(cue["content"]) + result_lines.append("") + + return "\n".join(result_lines) + @staticmethod def sanitize_webvtt(text: str) -> str: """ @@ -631,7 +876,7 @@ class Subtitle(Track): text = try_ensure_utf8(data).decode("utf8") text = text.replace("tt:", "") # negative size values aren't allowed in TTML/DFXP spec, replace with 0 - text = re.sub(r'-(\d+(?:\.\d+)?)(px|em|%|c|pt)', r'0\2', text) + text = re.sub(r"-(\d+(?:\.\d+)?)(px|em|%|c|pt)", r"0\2", text) caption_set = pycaption.DFXPReader().read(text) elif codec == Subtitle.Codec.fVTT: caption_lists: dict[str, pycaption.CaptionList] = defaultdict(pycaption.CaptionList) From abd8fc2eb95940948ee802d628a7f38fc9687e6a Mon Sep 17 00:00:00 2001 From: Andy Date: Sun, 18 Jan 2026 19:03:14 +0000 Subject: [PATCH 18/20] chore(release): bump version to 2.3.0 --- CHANGELOG.md | 33 +++++++++++++++++++++++++++++++++ pyproject.toml | 2 +- unshackle/core/__init__.py | 2 +- uv.lock | 2 +- 4 files changed, 36 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f477a7c..343b09e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,39 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [2.3.0] - 2026-01-18 + +### Added + +- **Unicode Filenames Option**: New `unicode_filenames` config option to preserve native characters + - Allows disabling ASCII transliteration in filenames + - Preserves Korean, Japanese, Chinese, and other native language characters + - Closes #49 + +### Fixed + +- **WebVTT Cue Handling**: Handle WebVTT cue identifiers and overlapping multi-line cues + - Added detection and sanitization for cue identifiers (Q0, Q1, etc.) before timing lines + - Added merging of overlapping cues with different line positions into multi-line subtitles + - Fixes parsing issues with pysubs2/pycaption on certain WebVTT files +- **Widevine PSSH Filtering**: Filter Widevine PSSH by system ID instead of sorting + - Fixes KeyError crash when unsupported DRM systems are present in init segments +- **TTML Negative Values**: Handle negative values in multi-value TTML attributes + - Fixes pycaption parse errors for attributes like `tts:extent="-5% 7.5%"` + - Closes #47 +- **ASS Font Names**: Strip whitespace from ASS font names + - Handles ASS subtitle files with spaces after commas in Style definitions + - Fixes #57 +- **Shaka-Packager Error Messages**: Include shaka-packager binary path in error messages +- **N_m3u8DL-RE Merge and Decryption**: Handle merge and decryption properly + - Prevents audio corruption ("Box 'OG 2' size is too large") with DASH manifests + - Fixes duplicate init segment writing when using N_m3u8DL-RE +- **DASH Placeholder KIDs**: Handle placeholder KIDs and improve DRM init from segments + - Detects and replaces placeholder/test KIDs in Widevine PSSH + - Adds CENC namespace support for kid/default_KID attributes +- **PlayReady PSSH Comparison**: Correct PSSH system ID comparison in PlayReady + - Removes erroneous `.bytes` accessor from PSSH.SYSTEM_ID comparisons + ## [2.2.0] - 2026-01-15 ### Added diff --git a/pyproject.toml b/pyproject.toml index 68b8a79..80d989d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "unshackle" -version = "2.2.0" +version = "2.3.0" description = "Modular Movie, TV, and Music Archival Software." authors = [{ name = "unshackle team" }] requires-python = ">=3.10,<3.13" diff --git a/unshackle/core/__init__.py b/unshackle/core/__init__.py index 8a124bf..55e4709 100644 --- a/unshackle/core/__init__.py +++ b/unshackle/core/__init__.py @@ -1 +1 @@ -__version__ = "2.2.0" +__version__ = "2.3.0" diff --git a/uv.lock b/uv.lock index 9ca0001..ee38efe 100644 --- a/uv.lock +++ b/uv.lock @@ -1565,7 +1565,7 @@ wheels = [ [[package]] name = "unshackle" -version = "2.2.0" +version = "2.3.0" source = { editable = "." } dependencies = [ { name = "aiohttp-swagger3" }, From 90a7db2e4637533622f704a692dc39b37c21c116 Mon Sep 17 00:00:00 2001 From: Andy Date: Mon, 19 Jan 2026 00:32:19 +0000 Subject: [PATCH 19/20] fix(subs): update SubtitleEdit CLI syntax and respect conversion_method - Use lowercase format names (subrip, webvtt, advancedsubstationalpha) to match SubtitleEdit 4.x CLI requirements - Change /Convert to /convert for consistency with CLI docs - Convert Path objects to strings explicitly for subprocess calls - Respect conversion_method config in SDH stripping - skip SubtitleEdit when user has set pysubs2/pycaption/subby as their preferred method - Add stderr suppression to SubtitleEdit calls --- unshackle/core/tracks/subtitle.py | 68 +++++++++++++++++++------------ 1 file changed, 43 insertions(+), 25 deletions(-) diff --git a/unshackle/core/tracks/subtitle.py b/unshackle/core/tracks/subtitle.py index 9a6767d..b11181c 100644 --- a/unshackle/core/tracks/subtitle.py +++ b/unshackle/core/tracks/subtitle.py @@ -810,13 +810,18 @@ class Subtitle(Track): if binaries.SubtitleEdit and self.codec not in (Subtitle.Codec.fTTML, Subtitle.Codec.fVTT): sub_edit_format = { - Subtitle.Codec.SubStationAlphav4: "AdvancedSubStationAlpha", - Subtitle.Codec.TimedTextMarkupLang: "TimedText1.0", - }.get(codec, codec.name) + Subtitle.Codec.SubRip: "subrip", + Subtitle.Codec.SubStationAlpha: "substationalpha", + Subtitle.Codec.SubStationAlphav4: "advancedsubstationalpha", + Subtitle.Codec.TimedTextMarkupLang: "timedtext1.0", + Subtitle.Codec.WebVTT: "webvtt", + Subtitle.Codec.SAMI: "sami", + Subtitle.Codec.MicroDVD: "microdvd", + }.get(codec, codec.name.lower()) sub_edit_args = [ - binaries.SubtitleEdit, - "/Convert", - self.path, + str(binaries.SubtitleEdit), + "/convert", + str(self.path), sub_edit_format, f"/outputfilename:{output_path.name}", "/encoding:utf8", @@ -1207,18 +1212,26 @@ class Subtitle(Track): except Exception: pass # Fall through to other methods - if binaries.SubtitleEdit: - if self.codec == Subtitle.Codec.SubStationAlphav4: - output_format = "AdvancedSubStationAlpha" - elif self.codec == Subtitle.Codec.TimedTextMarkupLang: - output_format = "TimedText1.0" - else: - output_format = self.codec.name + conversion_method = config.subtitle.get("conversion_method", "auto") + use_subtitleedit = sdh_method == "subtitleedit" or ( + sdh_method == "auto" and conversion_method in ("auto", "subtitleedit") + ) + + if binaries.SubtitleEdit and use_subtitleedit: + output_format = { + Subtitle.Codec.SubRip: "subrip", + Subtitle.Codec.SubStationAlpha: "substationalpha", + Subtitle.Codec.SubStationAlphav4: "advancedsubstationalpha", + Subtitle.Codec.TimedTextMarkupLang: "timedtext1.0", + Subtitle.Codec.WebVTT: "webvtt", + Subtitle.Codec.SAMI: "sami", + Subtitle.Codec.MicroDVD: "microdvd", + }.get(self.codec, self.codec.name.lower()) subprocess.run( [ - binaries.SubtitleEdit, - "/Convert", - self.path, + str(binaries.SubtitleEdit), + "/convert", + str(self.path), output_format, "/encoding:utf8", "/overwrite", @@ -1226,6 +1239,7 @@ class Subtitle(Track): ], check=True, stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, ) else: if config.subtitle.get("convert_before_strip", True) and self.codec != Subtitle.Codec.SubRip: @@ -1267,18 +1281,21 @@ class Subtitle(Track): if not binaries.SubtitleEdit: raise EnvironmentError("SubtitleEdit executable not found...") - if self.codec == Subtitle.Codec.SubStationAlphav4: - output_format = "AdvancedSubStationAlpha" - elif self.codec == Subtitle.Codec.TimedTextMarkupLang: - output_format = "TimedText1.0" - else: - output_format = self.codec.name + output_format = { + Subtitle.Codec.SubRip: "subrip", + Subtitle.Codec.SubStationAlpha: "substationalpha", + Subtitle.Codec.SubStationAlphav4: "advancedsubstationalpha", + Subtitle.Codec.TimedTextMarkupLang: "timedtext1.0", + Subtitle.Codec.WebVTT: "webvtt", + Subtitle.Codec.SAMI: "sami", + Subtitle.Codec.MicroDVD: "microdvd", + }.get(self.codec, self.codec.name.lower()) subprocess.run( [ - binaries.SubtitleEdit, - "/Convert", - self.path, + str(binaries.SubtitleEdit), + "/convert", + str(self.path), output_format, "/ReverseRtlStartEnd", "/encoding:utf8", @@ -1286,6 +1303,7 @@ class Subtitle(Track): ], check=True, stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, ) From 477fd7f2eb1f2bc75f8b3ae2bea5ae0568eb5ced Mon Sep 17 00:00:00 2001 From: Andy Date: Mon, 19 Jan 2026 20:18:45 +0000 Subject: [PATCH 20/20] fix(n_m3u8dl_re): include language in DASH audio track selection When DASH manifests have multiple audio AdaptationSets with the same representation IDs (e.g., both English and Japanese having id="0"), N_m3u8DL-RE would download the same track twice. Now includes the language alongside the ID in selection args to properly disambiguate tracks across adaptation sets. --- unshackle/core/downloaders/n_m3u8dl_re.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/unshackle/core/downloaders/n_m3u8dl_re.py b/unshackle/core/downloaders/n_m3u8dl_re.py index 815c697..5598a63 100644 --- a/unshackle/core/downloaders/n_m3u8dl_re.py +++ b/unshackle/core/downloaders/n_m3u8dl_re.py @@ -67,12 +67,17 @@ def get_track_selection_args(track: Any) -> list[str]: parts = [] if track_type == "Audio": - if track_id := representation.get("id") or adaptation_set.get("audioTrackId"): + track_id = representation.get("id") or adaptation_set.get("audioTrackId") + lang = representation.get("lang") or adaptation_set.get("lang") + + if track_id: parts.append(rf'"id=\b{track_id}\b"') + if lang: + parts.append(f"lang={lang}") else: if codecs := representation.get("codecs"): parts.append(f"codecs={codecs}") - if lang := representation.get("lang") or adaptation_set.get("lang"): + if lang: parts.append(f"lang={lang}") if bw := representation.get("bandwidth"): bitrate = int(bw) // 1000