From c323db94810e40d546cd8f567cf8d1d874473fd1 Mon Sep 17 00:00:00 2001 From: Andy Date: Thu, 19 Mar 2026 18:13:43 -0600 Subject: [PATCH 1/8] feat(downloader): consolidate into unified requests-based downloader Replace 4 separate downloaders (requests, curl_impersonate, aria2c, n_m3u8dl_re) with a single optimized requests downloader with adaptive chunk sizing and session passthrough for TLS fingerprinting support. - Adaptive chunk sizing (512KB-4MB) based on content length, up from fixed 1KB - Buffered writes (1MB buffer) for improved I/O throughput - Session passthrough: accepts both requests.Session and CurlSession - Per-call speed tracking with rolling window (fixes cross-track speed bleed) - Worker count default capped at 16 - Removed all downloader.__name__ special-casing from manifest parsers - Removed aria2c/curl_impersonate/n_m3u8dl_re downloader modules - Deprecated downloader config key in unshackle.yaml --- unshackle/commands/dl.py | 3 - unshackle/core/config.py | 16 +- unshackle/core/downloaders/__init__.py | 5 +- unshackle/core/downloaders/aria2c.py | 543 ----------------- .../core/downloaders/curl_impersonate.py | 308 ---------- unshackle/core/downloaders/n_m3u8dl_re.py | 548 ------------------ unshackle/core/downloaders/requests.py | 179 +++--- unshackle/core/manifests/dash.py | 122 +--- unshackle/core/manifests/hls.py | 43 +- unshackle/core/manifests/ism.py | 79 +-- unshackle/core/tracks/track.py | 24 +- 11 files changed, 179 insertions(+), 1691 deletions(-) delete mode 100644 unshackle/core/downloaders/aria2c.py delete mode 100644 unshackle/core/downloaders/curl_impersonate.py delete mode 100644 unshackle/core/downloaders/n_m3u8dl_re.py diff --git a/unshackle/commands/dl.py b/unshackle/commands/dl.py index c04c0be..603c685 100644 --- a/unshackle/commands/dl.py +++ b/unshackle/commands/dl.py @@ -723,9 +723,6 @@ class dl: pass merge_dict(config.services.get(self.service), self.service_config) - if getattr(config, "downloader_map", None): - config.downloader = config.downloader_map.get(self.service, config.downloader) - if getattr(config, "decryption_map", None): config.decryption = config.decryption_map.get(self.service, config.decryption) diff --git a/unshackle/core/config.py b/unshackle/core/config.py index 686043e..51e9a9a 100644 --- a/unshackle/core/config.py +++ b/unshackle/core/config.py @@ -41,8 +41,6 @@ class Config: def __init__(self, **kwargs: Any): self.dl: dict = kwargs.get("dl") or {} - self.aria2c: dict = kwargs.get("aria2c") or {} - self.n_m3u8dl_re: dict = kwargs.get("n_m3u8dl_re") or {} self.cdm: dict = kwargs.get("cdm") or {} self.chapter_fallback_name: str = kwargs.get("chapter_fallback_name") or "" self.curl_impersonate: dict = kwargs.get("curl_impersonate") or {} @@ -60,13 +58,13 @@ class Config: else: setattr(self.directories, name, Path(path).expanduser()) - downloader_cfg = kwargs.get("downloader") or "requests" - if isinstance(downloader_cfg, dict): - self.downloader_map = {k.upper(): v for k, v in downloader_cfg.items()} - self.downloader = self.downloader_map.get("DEFAULT", "requests") - else: - self.downloader_map = {} - self.downloader = downloader_cfg + downloader_cfg = kwargs.get("downloader") + if downloader_cfg and downloader_cfg != "requests": + warnings.warn( + f"downloader '{downloader_cfg}' is deprecated. The unified requests downloader is now used.", + DeprecationWarning, + stacklevel=2, + ) self.filenames = self._Filenames() for name, filename in (kwargs.get("filenames") or {}).items(): diff --git a/unshackle/core/downloaders/__init__.py b/unshackle/core/downloaders/__init__.py index aa0aecb..66d3b42 100644 --- a/unshackle/core/downloaders/__init__.py +++ b/unshackle/core/downloaders/__init__.py @@ -1,6 +1,3 @@ -from .aria2c import aria2c -from .curl_impersonate import curl_impersonate -from .n_m3u8dl_re import n_m3u8dl_re from .requests import requests -__all__ = ("aria2c", "curl_impersonate", "requests", "n_m3u8dl_re") +__all__ = ("requests",) diff --git a/unshackle/core/downloaders/aria2c.py b/unshackle/core/downloaders/aria2c.py deleted file mode 100644 index 8620b09..0000000 --- a/unshackle/core/downloaders/aria2c.py +++ /dev/null @@ -1,543 +0,0 @@ -import logging -import os -import subprocess -import textwrap -import threading -import time -from functools import partial -from http.cookiejar import CookieJar -from pathlib import Path -from typing import Any, Callable, Generator, MutableMapping, Optional, Union -from urllib.parse import urlparse - -import requests -from Crypto.Random import get_random_bytes -from requests import Session -from requests.cookies import cookiejar_from_dict, get_cookie_header -from rich import filesize -from rich.text import Text - -from unshackle.core import binaries -from unshackle.core.config import config -from unshackle.core.console import console -from unshackle.core.constants import DOWNLOAD_CANCELLED -from unshackle.core.utilities import get_debug_logger, get_extension, get_free_port - - -def rpc(caller: Callable, secret: str, method: str, params: Optional[list[Any]] = None) -> Any: - """Make a call to Aria2's JSON-RPC API.""" - try: - rpc_res = caller( - json={ - "jsonrpc": "2.0", - "id": get_random_bytes(16).hex(), - "method": method, - "params": [f"token:{secret}", *(params or [])], - } - ).json() - if rpc_res.get("code"): - # wrap to console width - padding - '[Aria2c]: ' - error_pretty = "\n ".join( - textwrap.wrap( - f"RPC Error: {rpc_res['message']} ({rpc_res['code']})".strip(), - width=console.width - 20, - initial_indent="", - ) - ) - console.log(Text.from_ansi("\n[Aria2c]: " + error_pretty)) - return rpc_res["result"] - except requests.exceptions.ConnectionError: - # absorb, process likely ended as it was calling RPC - return - - -class _Aria2Manager: - """Singleton manager to run one aria2c process and enqueue downloads via RPC.""" - - def __init__(self) -> None: - self._logger = logging.getLogger(__name__) - self._proc: Optional[subprocess.Popen] = None - self._rpc_port: Optional[int] = None - self._rpc_secret: Optional[str] = None - self._rpc_uri: Optional[str] = None - self._session: Session = Session() - self._max_workers: Optional[int] = None - self._max_concurrent_downloads: int = 0 - self._max_connection_per_server: int = 1 - self._split_default: int = 5 - self._file_allocation: str = "prealloc" - self._proxy: Optional[str] = None - self._lock: threading.Lock = threading.Lock() - - def _wait_for_rpc_ready(self, timeout_s: float = 8.0, interval_s: float = 0.1) -> None: - assert self._proc is not None - assert self._rpc_uri is not None - assert self._rpc_secret is not None - - deadline = time.monotonic() + timeout_s - - payload = { - "jsonrpc": "2.0", - "id": get_random_bytes(16).hex(), - "method": "aria2.getVersion", - "params": [f"token:{self._rpc_secret}"], - } - - while time.monotonic() < deadline: - if self._proc.poll() is not None: - raise RuntimeError( - f"aria2c exited before RPC became ready (exit code {self._proc.returncode})" - ) - try: - res = self._session.post(self._rpc_uri, json=payload, timeout=0.25) - data = res.json() - if isinstance(data, dict) and data.get("result") is not None: - return - except (requests.exceptions.RequestException, ValueError): - # Not ready yet (connection refused / bad response / etc.) - pass - time.sleep(interval_s) - - # Timed out: ensure we don't leave a zombie/stray aria2c process behind. - try: - self._proc.terminate() - self._proc.wait(timeout=2) - except Exception: - try: - self._proc.kill() - self._proc.wait(timeout=2) - except Exception: - pass - raise TimeoutError(f"aria2c RPC did not become ready within {timeout_s:.1f}s") - - def _build_args(self) -> list[str]: - args = [ - "--continue=true", - f"--max-concurrent-downloads={self._max_concurrent_downloads}", - f"--max-connection-per-server={self._max_connection_per_server}", - f"--split={self._split_default}", - "--max-file-not-found=5", - "--max-tries=5", - "--retry-wait=2", - "--allow-overwrite=true", - "--auto-file-renaming=false", - "--console-log-level=warn", - "--download-result=default", - f"--file-allocation={self._file_allocation}", - "--summary-interval=0", - "--enable-rpc=true", - f"--rpc-listen-port={self._rpc_port}", - f"--rpc-secret={self._rpc_secret}", - ] - if self._proxy: - args.extend(["--all-proxy", self._proxy]) - return args - - def ensure_started( - self, - proxy: Optional[str], - max_workers: Optional[int], - ) -> None: - with self._lock: - if not binaries.Aria2: - debug_logger = get_debug_logger() - if debug_logger: - debug_logger.log( - level="ERROR", - operation="downloader_aria2c_binary_missing", - message="Aria2c executable not found in PATH or local binaries directory", - context={"searched_names": ["aria2c", "aria2"]}, - ) - raise EnvironmentError("Aria2c executable not found...") - - effective_proxy = proxy or None - - if not max_workers: - effective_max_workers = min(32, (os.cpu_count() or 1) + 4) - elif not isinstance(max_workers, int): - raise TypeError(f"Expected max_workers to be {int}, not {type(max_workers)}") - else: - effective_max_workers = max_workers - - if self._proc and self._proc.poll() is None: - if effective_proxy != self._proxy or effective_max_workers != self._max_workers: - self._logger.warning( - "aria2c process is already running; requested proxy=%r, max_workers=%r, " - "but running process will continue with proxy=%r, max_workers=%r", - effective_proxy, - effective_max_workers, - self._proxy, - self._max_workers, - ) - return - - self._rpc_port = get_free_port() - self._rpc_secret = get_random_bytes(16).hex() - self._rpc_uri = f"http://127.0.0.1:{self._rpc_port}/jsonrpc" - - self._max_workers = effective_max_workers - self._max_concurrent_downloads = int( - config.aria2c.get("max_concurrent_downloads", effective_max_workers) - ) - self._max_connection_per_server = int(config.aria2c.get("max_connection_per_server", 1)) - self._split_default = int(config.aria2c.get("split", 5)) - self._file_allocation = config.aria2c.get("file_allocation", "prealloc") - self._proxy = effective_proxy - - args = self._build_args() - self._proc = subprocess.Popen( - [binaries.Aria2, *args], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL - ) - self._wait_for_rpc_ready() - - @property - def rpc_uri(self) -> str: - assert self._rpc_uri - return self._rpc_uri - - @property - def rpc_secret(self) -> str: - assert self._rpc_secret - return self._rpc_secret - - @property - def session(self) -> Session: - return self._session - - def add_uris(self, uris: list[str], options: dict[str, Any]) -> str: - """Add a single download with multiple URIs via RPC.""" - gid = rpc( - caller=partial(self._session.post, url=self.rpc_uri), - secret=self.rpc_secret, - method="aria2.addUri", - params=[uris, options], - ) - return gid or "" - - def get_global_stat(self) -> dict[str, Any]: - return rpc( - caller=partial(self.session.post, url=self.rpc_uri), - secret=self.rpc_secret, - method="aria2.getGlobalStat", - ) or {} - - def tell_status(self, gid: str) -> Optional[dict[str, Any]]: - return rpc( - caller=partial(self.session.post, url=self.rpc_uri), - secret=self.rpc_secret, - method="aria2.tellStatus", - params=[gid, ["status", "errorCode", "errorMessage", "files", "completedLength", "totalLength"]], - ) - - def remove(self, gid: str) -> None: - rpc( - caller=partial(self.session.post, url=self.rpc_uri), - secret=self.rpc_secret, - method="aria2.forceRemove", - params=[gid], - ) - - -_manager = _Aria2Manager() - - -def download( - urls: Union[str, list[str], dict[str, Any], list[dict[str, Any]]], - output_dir: Path, - filename: str, - headers: Optional[MutableMapping[str, Union[str, bytes]]] = None, - cookies: Optional[Union[MutableMapping[str, str], CookieJar]] = None, - proxy: Optional[str] = None, - max_workers: Optional[int] = None, -) -> Generator[dict[str, Any], None, None]: - """Enqueue downloads to the singleton aria2c instance via stdin and track per-call progress via RPC.""" - debug_logger = get_debug_logger() - - if not urls: - raise ValueError("urls must be provided and not empty") - elif not isinstance(urls, (str, dict, list)): - raise TypeError(f"Expected urls to be {str} or {dict} or a list of one of them, not {type(urls)}") - - if not output_dir: - raise ValueError("output_dir must be provided") - elif not isinstance(output_dir, Path): - raise TypeError(f"Expected output_dir to be {Path}, not {type(output_dir)}") - - if not filename: - raise ValueError("filename must be provided") - elif not isinstance(filename, str): - raise TypeError(f"Expected filename to be {str}, not {type(filename)}") - - if not isinstance(headers, (MutableMapping, type(None))): - raise TypeError(f"Expected headers to be {MutableMapping}, not {type(headers)}") - - if not isinstance(cookies, (MutableMapping, CookieJar, type(None))): - raise TypeError(f"Expected cookies to be {MutableMapping} or {CookieJar}, not {type(cookies)}") - - if not isinstance(proxy, (str, type(None))): - raise TypeError(f"Expected proxy to be {str}, not {type(proxy)}") - - if not max_workers: - max_workers = min(32, (os.cpu_count() or 1) + 4) - elif not isinstance(max_workers, int): - raise TypeError(f"Expected max_workers to be {int}, not {type(max_workers)}") - - if not isinstance(urls, list): - urls = [urls] - - if cookies and not isinstance(cookies, CookieJar): - cookies = cookiejar_from_dict(cookies) - - _manager.ensure_started(proxy=proxy, max_workers=max_workers) - - if debug_logger: - first_url = urls[0] if isinstance(urls[0], str) else urls[0].get("url", "") - url_display = first_url[:200] + "..." if len(first_url) > 200 else first_url - debug_logger.log( - level="DEBUG", - operation="downloader_aria2c_start", - message="Starting Aria2c download", - context={ - "binary_path": str(binaries.Aria2), - "url_count": len(urls), - "first_url": url_display, - "output_dir": str(output_dir), - "filename": filename, - "has_proxy": bool(proxy), - }, - ) - - # Build options for each URI and add via RPC - gids: list[str] = [] - - for i, url in enumerate(urls): - if isinstance(url, str): - url_data = {"url": url} - else: - url_data: dict[str, Any] = url - - url_filename = filename.format(i=i, ext=get_extension(url_data["url"])) - - opts: dict[str, Any] = { - "dir": str(output_dir), - "out": url_filename, - "split": str(1 if len(urls) > 1 else int(config.aria2c.get("split", 5))), - } - - # Cookies as header - if cookies: - mock_request = requests.Request(url=url_data["url"]) - cookie_header = get_cookie_header(cookies, mock_request) - if cookie_header: - opts.setdefault("header", []).append(f"Cookie: {cookie_header}") - - # Global headers - for header, value in (headers or {}).items(): - if header.lower() == "cookie": - raise ValueError("You cannot set Cookies as a header manually, please use the `cookies` param.") - if header.lower() == "accept-encoding": - continue - if header.lower() == "referer": - opts["referer"] = str(value) - continue - if header.lower() == "user-agent": - opts["user-agent"] = str(value) - continue - opts.setdefault("header", []).append(f"{header}: {value}") - - # Per-url extra args - for key, value in url_data.items(): - if key == "url": - continue - if key == "headers": - for header_name, header_value in value.items(): - opts.setdefault("header", []).append(f"{header_name}: {header_value}") - else: - opts[key] = str(value) - - # Add via RPC - gid = _manager.add_uris([url_data["url"]], opts) - if gid: - gids.append(gid) - - yield dict(total=len(gids)) - - completed: set[str] = set() - - try: - while len(completed) < len(gids): - if DOWNLOAD_CANCELLED.is_set(): - # Remove tracked downloads on cancel - for gid in gids: - if gid not in completed: - _manager.remove(gid) - yield dict(downloaded="[yellow]CANCELLED") - raise KeyboardInterrupt() - - stats = _manager.get_global_stat() - dl_speed = int(stats.get("downloadSpeed", -1)) - - # Aggregate progress across all GIDs for this call - total_completed = 0 - total_size = 0 - - # Check each tracked GID - for gid in gids: - if gid in completed: - continue - - status = _manager.tell_status(gid) - if not status: - continue - - completed_length = int(status.get("completedLength", 0)) - total_length = int(status.get("totalLength", 0)) - total_completed += completed_length - total_size += total_length - - state = status.get("status") - if state in ("complete", "error"): - completed.add(gid) - yield dict(completed=len(completed)) - - if state == "error": - used_uri = None - try: - used_uri = next( - uri["uri"] - for file in status.get("files", []) - for uri in file.get("uris", []) - if uri.get("status") == "used" - ) - except Exception: - used_uri = "unknown" - error = f"Download Error (#{gid}): {status.get('errorMessage')} ({status.get('errorCode')}), {used_uri}" - error_pretty = "\n ".join(textwrap.wrap(error, width=console.width - 20, initial_indent="")) - console.log(Text.from_ansi("\n[Aria2c]: " + error_pretty)) - if debug_logger: - debug_logger.log( - level="ERROR", - operation="downloader_aria2c_download_error", - message=f"Aria2c download failed: {status.get('errorMessage')}", - context={ - "gid": gid, - "error_code": status.get("errorCode"), - "error_message": status.get("errorMessage"), - "used_uri": used_uri[:200] + "..." if used_uri and len(used_uri) > 200 else used_uri, - "completed_length": status.get("completedLength"), - "total_length": status.get("totalLength"), - }, - ) - raise ValueError(error) - - # Yield aggregate progress for this call's downloads - progress_data = {"advance": 0} - - if len(gids) > 1: - # Multi-file mode (e.g., HLS): Return the count of completed segments - progress_data["completed"] = len(completed) - progress_data["total"] = len(gids) - else: - # Single-file mode: Return the total bytes downloaded - progress_data["completed"] = total_completed - if total_size > 0: - progress_data["total"] = total_size - else: - progress_data["total"] = None - - if dl_speed != -1: - progress_data["downloaded"] = f"{filesize.decimal(dl_speed)}/s" - - yield progress_data - - time.sleep(1) - except KeyboardInterrupt: - DOWNLOAD_CANCELLED.set() - raise - except Exception as e: - DOWNLOAD_CANCELLED.set() - yield dict(downloaded="[red]FAILED") - if debug_logger and not isinstance(e, ValueError): - debug_logger.log( - level="ERROR", - operation="downloader_aria2c_exception", - message=f"Unexpected error during Aria2c download: {e}", - error=e, - context={ - "url_count": len(urls), - "output_dir": str(output_dir), - }, - ) - raise - - -def aria2c( - urls: Union[str, list[str], dict[str, Any], list[dict[str, Any]]], - output_dir: Path, - filename: str, - headers: Optional[MutableMapping[str, Union[str, bytes]]] = None, - cookies: Optional[Union[MutableMapping[str, str], CookieJar]] = None, - proxy: Optional[str] = None, - max_workers: Optional[int] = None, -) -> Generator[dict[str, Any], None, None]: - """ - Download files using Aria2(c). - https://aria2.github.io - - Yields the following download status updates while chunks are downloading: - - - {total: 100} (100% download total) - - {completed: 1} (1% download progress out of 100%) - - {downloaded: "10.1 MB/s"} (currently downloading at a rate of 10.1 MB/s) - - The data is in the same format accepted by rich's progress.update() function. - - Parameters: - urls: Web URL(s) to file(s) to download. You can use a dictionary with the key - "url" for the URI, and other keys for extra arguments to use per-URL. - output_dir: The folder to save the file into. If the save path's directory does - not exist then it will be made automatically. - filename: The filename or filename template to use for each file. The variables - you can use are `i` for the URL index and `ext` for the URL extension. - headers: A mapping of HTTP Header Key/Values to use for all downloads. - cookies: A mapping of Cookie Key/Values or a Cookie Jar to use for all downloads. - proxy: An optional proxy URI to route connections through for all downloads. - max_workers: The maximum amount of threads to use for downloads. Defaults to - min(32,(cpu_count+4)). Use for the --max-concurrent-downloads option. - """ - if proxy and not proxy.lower().startswith("http://"): - # Only HTTP proxies are supported by aria2(c) - proxy = urlparse(proxy) - - port = get_free_port() - username, password = get_random_bytes(8).hex(), get_random_bytes(8).hex() - local_proxy = f"http://{username}:{password}@localhost:{port}" - - scheme = {"https": "http+ssl", "socks5h": "socks"}.get(proxy.scheme, proxy.scheme) - - remote_server = f"{scheme}://{proxy.hostname}" - if proxy.port: - remote_server += f":{proxy.port}" - if proxy.username or proxy.password: - remote_server += "#" - if proxy.username: - remote_server += proxy.username - if proxy.password: - remote_server += f":{proxy.password}" - - p = subprocess.Popen( - ["pproxy", "-l", f"http://:{port}#{username}:{password}", "-r", remote_server], - stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL, - ) - - try: - yield from download(urls, output_dir, filename, headers, cookies, local_proxy, max_workers) - finally: - p.kill() - p.wait() - return - yield from download(urls, output_dir, filename, headers, cookies, proxy, max_workers) - - -__all__ = ("aria2c",) diff --git a/unshackle/core/downloaders/curl_impersonate.py b/unshackle/core/downloaders/curl_impersonate.py deleted file mode 100644 index d278e91..0000000 --- a/unshackle/core/downloaders/curl_impersonate.py +++ /dev/null @@ -1,308 +0,0 @@ -import math -import time -from concurrent import futures -from concurrent.futures.thread import ThreadPoolExecutor -from http.cookiejar import CookieJar -from pathlib import Path -from typing import Any, Generator, MutableMapping, Optional, Union - -from curl_cffi.requests import Session -from rich import filesize - -from unshackle.core.config import config -from unshackle.core.constants import DOWNLOAD_CANCELLED -from unshackle.core.utilities import get_debug_logger, get_extension - -MAX_ATTEMPTS = 5 -RETRY_WAIT = 2 -CHUNK_SIZE = 1024 -PROGRESS_WINDOW = 5 -BROWSER = config.curl_impersonate.get("browser", "chrome124") - - -def download(url: str, save_path: Path, session: Session, **kwargs: Any) -> Generator[dict[str, Any], None, None]: - """ - Download files using Curl Impersonate. - https://github.com/lwthiker/curl-impersonate - - Yields the following download status updates while chunks are downloading: - - - {total: 123} (there are 123 chunks to download) - - {total: None} (there are an unknown number of chunks to download) - - {advance: 1} (one chunk was downloaded) - - {downloaded: "10.1 MB/s"} (currently downloading at a rate of 10.1 MB/s) - - {file_downloaded: Path(...), written: 1024} (download finished, has the save path and size) - - The data is in the same format accepted by rich's progress.update() function. The - `downloaded` key is custom and is not natively accepted by all rich progress bars. - - Parameters: - url: Web URL of a file to download. - save_path: The path to save the file to. If the save path's directory does not - exist then it will be made automatically. - session: The Requests or Curl-Impersonate Session to make HTTP requests with. - Useful to set Header, Cookie, and Proxy data. Connections are saved and - re-used with the session so long as the server keeps the connection alive. - kwargs: Any extra keyword arguments to pass to the session.get() call. Use this - for one-time request changes like a header, cookie, or proxy. For example, - to request Byte-ranges use e.g., `headers={"Range": "bytes=0-128"}`. - """ - save_dir = save_path.parent - control_file = save_path.with_name(f"{save_path.name}.!dev") - - save_dir.mkdir(parents=True, exist_ok=True) - - if control_file.exists(): - # consider the file corrupt if the control file exists - save_path.unlink(missing_ok=True) - control_file.unlink() - elif save_path.exists(): - # if it exists, and no control file, then it should be safe - yield dict(file_downloaded=save_path, written=save_path.stat().st_size) - - # TODO: Design a control file format so we know how much of the file is missing - control_file.write_bytes(b"") - - attempts = 1 - try: - while True: - written = 0 - download_sizes = [] - last_speed_refresh = time.time() - - try: - stream = session.get(url, stream=True, **kwargs) - stream.raise_for_status() - - try: - content_length = int(stream.headers.get("Content-Length", "0")) - - # Skip Content-Length validation for compressed responses since - # curl_impersonate automatically decompresses but Content-Length shows compressed size - if stream.headers.get("Content-Encoding", "").lower() in ["gzip", "deflate", "br"]: - content_length = 0 - except ValueError: - content_length = 0 - - if content_length > 0: - yield dict(total=math.ceil(content_length / CHUNK_SIZE)) - else: - # we have no data to calculate total chunks - yield dict(total=None) # indeterminate mode - - with open(save_path, "wb") as f: - for chunk in stream.iter_content(chunk_size=CHUNK_SIZE): - download_size = len(chunk) - f.write(chunk) - written += download_size - - yield dict(advance=1) - - now = time.time() - time_since = now - last_speed_refresh - - download_sizes.append(download_size) - if time_since > PROGRESS_WINDOW or download_size < CHUNK_SIZE: - data_size = sum(download_sizes) - download_speed = math.ceil(data_size / (time_since or 1)) - yield dict(downloaded=f"{filesize.decimal(download_speed)}/s") - last_speed_refresh = now - download_sizes.clear() - - if content_length and written < content_length: - raise IOError(f"Failed to read {content_length} bytes from the track URI.") - - yield dict(file_downloaded=save_path, written=written) - break - except Exception as e: - save_path.unlink(missing_ok=True) - if DOWNLOAD_CANCELLED.is_set() or attempts == MAX_ATTEMPTS: - raise e - time.sleep(RETRY_WAIT) - attempts += 1 - finally: - control_file.unlink() - - -def curl_impersonate( - urls: Union[str, list[str], dict[str, Any], list[dict[str, Any]]], - output_dir: Path, - filename: str, - headers: Optional[MutableMapping[str, Union[str, bytes]]] = None, - cookies: Optional[Union[MutableMapping[str, str], CookieJar]] = None, - proxy: Optional[str] = None, - max_workers: Optional[int] = None, -) -> Generator[dict[str, Any], None, None]: - """ - Download files using Curl Impersonate. - https://github.com/lwthiker/curl-impersonate - - Yields the following download status updates while chunks are downloading: - - - {total: 123} (there are 123 chunks to download) - - {total: None} (there are an unknown number of chunks to download) - - {advance: 1} (one chunk was downloaded) - - {downloaded: "10.1 MB/s"} (currently downloading at a rate of 10.1 MB/s) - - {file_downloaded: Path(...), written: 1024} (download finished, has the save path and size) - - The data is in the same format accepted by rich's progress.update() function. - However, The `downloaded`, `file_downloaded` and `written` keys are custom and not - natively accepted by rich progress bars. - - Parameters: - urls: Web URL(s) to file(s) to download. You can use a dictionary with the key - "url" for the URI, and other keys for extra arguments to use per-URL. - output_dir: The folder to save the file into. If the save path's directory does - not exist then it will be made automatically. - filename: The filename or filename template to use for each file. The variables - you can use are `i` for the URL index and `ext` for the URL extension. - headers: A mapping of HTTP Header Key/Values to use for all downloads. - cookies: A mapping of Cookie Key/Values or a Cookie Jar to use for all downloads. - proxy: An optional proxy URI to route connections through for all downloads. - max_workers: The maximum amount of threads to use for downloads. Defaults to - min(32,(cpu_count+4)). - """ - if not urls: - raise ValueError("urls must be provided and not empty") - elif not isinstance(urls, (str, dict, list)): - raise TypeError(f"Expected urls to be {str} or {dict} or a list of one of them, not {type(urls)}") - - if not output_dir: - raise ValueError("output_dir must be provided") - elif not isinstance(output_dir, Path): - raise TypeError(f"Expected output_dir to be {Path}, not {type(output_dir)}") - - if not filename: - raise ValueError("filename must be provided") - elif not isinstance(filename, str): - raise TypeError(f"Expected filename to be {str}, not {type(filename)}") - - if not isinstance(headers, (MutableMapping, type(None))): - raise TypeError(f"Expected headers to be {MutableMapping}, not {type(headers)}") - - if not isinstance(cookies, (MutableMapping, CookieJar, type(None))): - raise TypeError(f"Expected cookies to be {MutableMapping} or {CookieJar}, not {type(cookies)}") - - if not isinstance(proxy, (str, type(None))): - raise TypeError(f"Expected proxy to be {str}, not {type(proxy)}") - - if not isinstance(max_workers, (int, type(None))): - raise TypeError(f"Expected max_workers to be {int}, not {type(max_workers)}") - - debug_logger = get_debug_logger() - - if not isinstance(urls, list): - urls = [urls] - - urls = [ - dict(save_path=save_path, **url) if isinstance(url, dict) else dict(url=url, save_path=save_path) - for i, url in enumerate(urls) - for save_path in [ - output_dir / filename.format(i=i, ext=get_extension(url["url"] if isinstance(url, dict) else url)) - ] - ] - - session = Session(impersonate=BROWSER) - if headers: - headers = {k: v for k, v in headers.items() if k.lower() != "accept-encoding"} - session.headers.update(headers) - if cookies: - session.cookies.update(cookies) - if proxy: - session.proxies.update({"all": proxy}) - - if debug_logger: - first_url = urls[0].get("url", "") if urls else "" - url_display = first_url[:200] + "..." if len(first_url) > 200 else first_url - debug_logger.log( - level="DEBUG", - operation="downloader_curl_impersonate_start", - message="Starting curl_impersonate download", - context={ - "url_count": len(urls), - "first_url": url_display, - "output_dir": str(output_dir), - "filename": filename, - "max_workers": max_workers, - "browser": BROWSER, - "has_proxy": bool(proxy), - }, - ) - - yield dict(total=len(urls)) - - download_sizes = [] - last_speed_refresh = time.time() - - with ThreadPoolExecutor(max_workers=max_workers) as pool: - for i, future in enumerate( - futures.as_completed((pool.submit(download, session=session, **url) for url in urls)) - ): - file_path, download_size = None, None - try: - for status_update in future.result(): - if status_update.get("file_downloaded") and status_update.get("written"): - file_path = status_update["file_downloaded"] - download_size = status_update["written"] - elif len(urls) == 1: - # these are per-chunk updates, only useful if it's one big file - yield status_update - except KeyboardInterrupt: - DOWNLOAD_CANCELLED.set() # skip pending track downloads - yield dict(downloaded="[yellow]CANCELLING") - pool.shutdown(wait=True, cancel_futures=True) - yield dict(downloaded="[yellow]CANCELLED") - # tell dl that it was cancelled - # the pool is already shut down, so exiting loop is fine - raise - except Exception as e: - DOWNLOAD_CANCELLED.set() # skip pending track downloads - yield dict(downloaded="[red]FAILING") - pool.shutdown(wait=True, cancel_futures=True) - yield dict(downloaded="[red]FAILED") - if debug_logger: - debug_logger.log( - level="ERROR", - operation="downloader_curl_impersonate_failed", - message=f"curl_impersonate download failed: {e}", - error=e, - context={ - "url_count": len(urls), - "output_dir": str(output_dir), - "browser": BROWSER, - }, - ) - # tell dl that it failed - # the pool is already shut down, so exiting loop is fine - raise - else: - yield dict(file_downloaded=file_path) - yield dict(advance=1) - - now = time.time() - time_since = now - last_speed_refresh - - if download_size: # no size == skipped dl - download_sizes.append(download_size) - - if download_sizes and (time_since > PROGRESS_WINDOW or i == len(urls)): - data_size = sum(download_sizes) - download_speed = math.ceil(data_size / (time_since or 1)) - yield dict(downloaded=f"{filesize.decimal(download_speed)}/s") - last_speed_refresh = now - download_sizes.clear() - - if debug_logger: - debug_logger.log( - level="DEBUG", - operation="downloader_curl_impersonate_complete", - message="curl_impersonate download completed successfully", - context={ - "url_count": len(urls), - "output_dir": str(output_dir), - "filename": filename, - }, - ) - - -__all__ = ("curl_impersonate",) diff --git a/unshackle/core/downloaders/n_m3u8dl_re.py b/unshackle/core/downloaders/n_m3u8dl_re.py deleted file mode 100644 index c88ab3f..0000000 --- a/unshackle/core/downloaders/n_m3u8dl_re.py +++ /dev/null @@ -1,548 +0,0 @@ -import os -import re -import subprocess -import warnings -from http.cookiejar import CookieJar -from pathlib import Path -from typing import Any, Generator, MutableMapping - -import requests -from requests.cookies import cookiejar_from_dict, get_cookie_header - -from unshackle.core import binaries -from unshackle.core.binaries import FFMPEG, Mp4decrypt, ShakaPackager -from unshackle.core.config import config -from unshackle.core.console import console -from unshackle.core.constants import DOWNLOAD_CANCELLED -from unshackle.core.utilities import get_debug_logger - -PERCENT_RE = re.compile(r"(\d+\.\d+%)") -SPEED_RE = re.compile(r"(\d+\.\d+(?:MB|KB)ps)") -SIZE_RE = re.compile(r"(\d+\.\d+(?:MB|GB|KB)/\d+\.\d+(?:MB|GB|KB))") -WARN_RE = re.compile(r"(WARN : Response.*|WARN : One or more errors occurred.*)") -ERROR_RE = re.compile(r"(\bERROR\b.*|\bFAILED\b.*|\bException\b.*)") - -DECRYPTION_ENGINE = { - "shaka": "SHAKA_PACKAGER", - "mp4decrypt": "MP4DECRYPT", -} - -# Ignore FutureWarnings -warnings.simplefilter(action="ignore", category=FutureWarning) - - -def get_track_selection_args(track: Any) -> list[str]: - """ - Generates track selection arguments for N_m3u8dl_RE. - - Args: - track: A track object with attributes like descriptor, data, and class name. - - Returns: - A list of strings for track selection. - - Raises: - ValueError: If the manifest type is unsupported or track selection fails. - """ - descriptor = track.descriptor.name - track_type = track.__class__.__name__ - - def _create_args(flag: str, parts: list[str], type_str: str, extra_args: list[str] | None = None) -> list[str]: - if not parts: - raise ValueError(f"[N_m3u8DL-RE]: Unable to select {type_str} track from {descriptor} manifest") - - final_args = [flag, ":".join(parts)] - if extra_args: - final_args.extend(extra_args) - - return final_args - - match descriptor: - case "HLS": - # HLS playlists are direct inputs; no selection arguments needed. - return [] - - case "DASH": - representation = track.data.get("dash", {}).get("representation", {}) - adaptation_set = track.data.get("dash", {}).get("adaptation_set", {}) - parts = [] - - if track_type == "Audio": - track_id = representation.get("id") or adaptation_set.get("audioTrackId") - lang = representation.get("lang") or adaptation_set.get("lang") - - if track_id: - parts.append(rf'"id=\b{track_id}\b"') - if lang: - parts.append(f"lang={lang}") - else: - if codecs := representation.get("codecs"): - parts.append(f"codecs={codecs}") - if lang: - parts.append(f"lang={lang}") - if bw := representation.get("bandwidth"): - bitrate = int(bw) // 1000 - parts.append(f"bwMin={bitrate}:bwMax={bitrate + 5}") - if roles := representation.findall("Role") + adaptation_set.findall("Role"): - if role := next((r.get("value") for r in roles if r.get("value", "").lower() == "main"), None): - parts.append(f"role={role}") - return _create_args("-sa", parts, "audio") - - if track_type == "Video": - if track_id := representation.get("id"): - parts.append(rf'"id=\b{track_id}\b"') - else: - if width := representation.get("width"): - parts.append(f"res={width}*") - if codecs := representation.get("codecs"): - parts.append(f"codecs={codecs}") - if bw := representation.get("bandwidth"): - bitrate = int(bw) // 1000 - parts.append(f"bwMin={bitrate}:bwMax={bitrate + 5}") - return _create_args("-sv", parts, "video") - - if track_type == "Subtitle": - if track_id := representation.get("id"): - parts.append(rf'"id=\b{track_id}\b"') - else: - if lang := representation.get("lang"): - parts.append(f"lang={lang}") - return _create_args("-ss", parts, "subtitle", extra_args=["--auto-subtitle-fix", "false"]) - - case "ISM": - quality_level = track.data.get("ism", {}).get("quality_level", {}) - stream_index = track.data.get("ism", {}).get("stream_index", {}) - parts = [] - - if track_type == "Audio": - if name := stream_index.get("Name") or quality_level.get("Index"): - parts.append(rf'"id=\b{name}\b"') - else: - if codecs := quality_level.get("FourCC"): - parts.append(f"codecs={codecs}") - if lang := stream_index.get("Language"): - parts.append(f"lang={lang}") - if br := quality_level.get("Bitrate"): - bitrate = int(br) // 1000 - parts.append(f"bwMin={bitrate}:bwMax={bitrate + 5}") - return _create_args("-sa", parts, "audio") - - if track_type == "Video": - if name := stream_index.get("Name") or quality_level.get("Index"): - parts.append(rf'"id=\b{name}\b"') - else: - if width := quality_level.get("MaxWidth"): - parts.append(f"res={width}*") - if codecs := quality_level.get("FourCC"): - parts.append(f"codecs={codecs}") - if br := quality_level.get("Bitrate"): - bitrate = int(br) // 1000 - parts.append(f"bwMin={bitrate}:bwMax={bitrate + 5}") - return _create_args("-sv", parts, "video") - - # I've yet to encounter a subtitle track in ISM manifests, so this is mostly theoretical. - if track_type == "Subtitle": - if name := stream_index.get("Name") or quality_level.get("Index"): - parts.append(rf'"id=\b{name}\b"') - else: - if lang := stream_index.get("Language"): - parts.append(f"lang={lang}") - return _create_args("-ss", parts, "subtitle", extra_args=["--auto-subtitle-fix", "false"]) - - case "URL": - raise ValueError( - f"[N_m3u8DL-RE]: Direct URL downloads are not supported for {track_type} tracks. " - f"The track should use a different downloader (e.g., 'requests', 'aria2c')." - ) - - raise ValueError(f"[N_m3u8DL-RE]: Unsupported manifest type: {descriptor}") - - -def build_download_args( - track_url: str, - filename: str, - output_dir: Path, - thread_count: int, - retry_count: int, - track_from_file: Path | None, - custom_args: dict[str, Any] | None, - headers: dict[str, Any] | None, - cookies: CookieJar | None, - proxy: str | None, - content_keys: dict[str, str] | None, - ad_keyword: str | None, - skip_merge: bool | None = False, -) -> list[str]: - """Constructs the CLI arguments for N_m3u8DL-RE.""" - - # Default arguments - args = { - "--save-name": filename, - "--save-dir": output_dir, - "--tmp-dir": output_dir, - "--thread-count": thread_count, - "--download-retry-count": retry_count, - } - if FFMPEG: - args["--ffmpeg-binary-path"] = str(FFMPEG) - if proxy: - args["--custom-proxy"] = proxy - if skip_merge: - args["--skip-merge"] = skip_merge - if ad_keyword: - args["--ad-keyword"] = ad_keyword - # Disable segment count validation to work around N_m3u8DL-RE's Math.Ceiling - # bug in duration-based SegmentTemplate calculation (see nilaoda/N_m3u8DL-RE#108) - args["--check-segments-count"] = False - - key_args = [] - if content_keys: - for kid, key in content_keys.items(): - key_args.extend(["--key", f"{kid.hex}:{key.lower()}"]) - - decryption_config = config.decryption.lower() - engine_name = DECRYPTION_ENGINE.get(decryption_config) or "SHAKA_PACKAGER" - args["--decryption-engine"] = engine_name - - binary_path = None - if engine_name == "SHAKA_PACKAGER": - if ShakaPackager: - binary_path = str(ShakaPackager) - elif engine_name == "MP4DECRYPT": - if Mp4decrypt: - binary_path = str(Mp4decrypt) - if binary_path: - args["--decryption-binary-path"] = binary_path - - if custom_args: - args.update(custom_args) - - command = [track_from_file or track_url] - for flag, value in args.items(): - if value is True: - command.append(flag) - elif value is False: - command.extend([flag, "false"]) - elif value is not False and value is not None: - command.extend([flag, str(value)]) - - # Append all content keys (multiple --key flags supported by N_m3u8DL-RE) - command.extend(key_args) - - if headers: - for key, value in headers.items(): - if key.lower() not in ("accept-encoding", "cookie"): - command.extend(["--header", f"{key}: {value}"]) - - if cookies: - req = requests.Request(method="GET", url=track_url) - cookie_header = get_cookie_header(cookies, req) - command.extend(["--header", f"Cookie: {cookie_header}"]) - - return command - - -def download( - urls: str | dict[str, Any] | list[str | dict[str, Any]], - track: Any, - output_dir: Path, - filename: str, - headers: MutableMapping[str, str | bytes] | None, - cookies: MutableMapping[str, str] | CookieJar | None, - proxy: str | None, - max_workers: int | None, - content_keys: dict[str, Any] | None, - skip_merge: bool | None = False, -) -> Generator[dict[str, Any], None, None]: - debug_logger = get_debug_logger() - - if not urls: - raise ValueError("urls must be provided and not empty") - if not isinstance(urls, (str, dict, list)): - raise TypeError(f"Expected urls to be str, dict, or list, not {type(urls)}") - if not isinstance(output_dir, Path): - raise TypeError(f"Expected output_dir to be Path, not {type(output_dir)}") - if not isinstance(filename, str) or not filename: - raise ValueError("filename must be a non-empty string") - if not isinstance(headers, (MutableMapping, type(None))): - raise TypeError(f"Expected headers to be a mapping or None, not {type(headers)}") - if not isinstance(cookies, (MutableMapping, CookieJar, type(None))): - raise TypeError(f"Expected cookies to be a mapping, CookieJar, or None, not {type(cookies)}") - if not isinstance(proxy, (str, type(None))): - raise TypeError(f"Expected proxy to be a str or None, not {type(proxy)}") - if not isinstance(max_workers, (int, type(None))): - raise TypeError(f"Expected max_workers to be an int or None, not {type(max_workers)}") - if not isinstance(content_keys, (dict, type(None))): - raise TypeError(f"Expected content_keys to be a dict or None, not {type(content_keys)}") - if not isinstance(skip_merge, (bool, type(None))): - raise TypeError(f"Expected skip_merge to be a bool or None, not {type(skip_merge)}") - - if cookies and not isinstance(cookies, CookieJar): - cookies = cookiejar_from_dict(cookies) - - if not binaries.N_m3u8DL_RE: - raise EnvironmentError("N_m3u8DL-RE executable not found...") - - effective_max_workers = max_workers or min(32, (os.cpu_count() or 1) + 4) - - if proxy and not config.n_m3u8dl_re.get("use_proxy", True): - proxy = None - - thread_count = config.n_m3u8dl_re.get("thread_count", effective_max_workers) - retry_count = config.n_m3u8dl_re.get("retry_count", 10) - ad_keyword = config.n_m3u8dl_re.get("ad_keyword") - - arguments = build_download_args( - track_url=track.url, - track_from_file=track.from_file, - filename=filename, - output_dir=output_dir, - thread_count=thread_count, - retry_count=retry_count, - custom_args=track.downloader_args, - headers=headers, - cookies=cookies, - proxy=proxy, - content_keys=content_keys, - skip_merge=skip_merge, - ad_keyword=ad_keyword, - ) - selection_args = get_track_selection_args(track) - arguments.extend(selection_args) - - log_file_path: Path | None = None - if debug_logger: - log_file_path = output_dir / f".n_m3u8dl_re_{filename}.log" - arguments.extend([ - "--log-file-path", str(log_file_path), - "--log-level", "DEBUG", - ]) - - track_url_display = track.url[:200] + "..." if len(track.url) > 200 else track.url - debug_logger.log( - level="DEBUG", - operation="downloader_n_m3u8dl_re_start", - message="Starting N_m3u8DL-RE download", - context={ - "binary_path": str(binaries.N_m3u8DL_RE), - "track_id": getattr(track, "id", None), - "track_type": track.__class__.__name__, - "track_url": track_url_display, - "output_dir": str(output_dir), - "filename": filename, - "thread_count": thread_count, - "retry_count": retry_count, - "has_content_keys": bool(content_keys), - "content_key_count": len(content_keys) if content_keys else 0, - "has_proxy": bool(proxy), - "skip_merge": skip_merge, - "has_custom_args": bool(track.downloader_args), - "selection_args": selection_args, - "descriptor": track.descriptor.name if hasattr(track, "descriptor") else None, - }, - ) - else: - arguments.extend(["--no-log", "true"]) - - yield {"total": 100} - yield {"downloaded": "Parsing streams..."} - - try: - with subprocess.Popen( - [binaries.N_m3u8DL_RE, *arguments], - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, - text=True, - encoding="utf-8", - ) as process: - last_line = "" - track_type = track.__class__.__name__ - - for line in process.stdout: - output = line.strip() - if not output: - continue - last_line = output - - if warn_match := WARN_RE.search(output): - console.log(f"{track_type} {warn_match.group(1)}") - continue - - if speed_match := SPEED_RE.search(output): - size = size_match.group(1) if (size_match := SIZE_RE.search(output)) else "" - yield {"downloaded": f"{speed_match.group(1)} {size}"} - - if percent_match := PERCENT_RE.search(output): - progress = int(percent_match.group(1).split(".", 1)[0]) - yield {"completed": progress} if progress < 100 else {"downloaded": "Merging"} - - process.wait() - - if process.returncode != 0: - if debug_logger and log_file_path: - log_contents = "" - if log_file_path.exists(): - try: - log_contents = log_file_path.read_text(encoding="utf-8", errors="replace") - except Exception: - log_contents = "" - - debug_logger.log( - level="ERROR", - operation="downloader_n_m3u8dl_re_failed", - message=f"N_m3u8DL-RE exited with code {process.returncode}", - context={ - "returncode": process.returncode, - "track_id": getattr(track, "id", None), - "track_type": track.__class__.__name__, - "last_line": last_line, - "log_file_contents": log_contents, - }, - ) - if error_match := ERROR_RE.search(last_line): - raise ValueError(f"[N_m3u8DL-RE]: {error_match.group(1)}") - raise subprocess.CalledProcessError(process.returncode, arguments) - - if debug_logger: - output_dir_exists = output_dir.exists() - output_files = [] - if output_dir_exists: - try: - output_files = [f.name for f in output_dir.iterdir() if f.is_file()][:20] - except Exception: - output_files = [""] - - debug_logger.log( - level="DEBUG", - operation="downloader_n_m3u8dl_re_complete", - message="N_m3u8DL-RE download completed successfully", - context={ - "track_id": getattr(track, "id", None), - "track_type": track.__class__.__name__, - "output_dir": str(output_dir), - "output_dir_exists": output_dir_exists, - "output_files_count": len(output_files), - "output_files": output_files, - "filename": filename, - }, - ) - - # Warn if no output was produced - include N_m3u8DL-RE's logs for diagnosis - if not output_dir_exists or not output_files: - # Read N_m3u8DL-RE's log file for debugging - n_m3u8dl_log = "" - if log_file_path and log_file_path.exists(): - try: - n_m3u8dl_log = log_file_path.read_text(encoding="utf-8", errors="replace") - except Exception: - n_m3u8dl_log = "" - - debug_logger.log( - level="WARNING", - operation="downloader_n_m3u8dl_re_no_output", - message="N_m3u8DL-RE exited successfully but produced no output files", - context={ - "track_id": getattr(track, "id", None), - "track_type": track.__class__.__name__, - "output_dir": str(output_dir), - "output_dir_exists": output_dir_exists, - "selection_args": selection_args, - "track_url": track.url[:200] + "..." if len(track.url) > 200 else track.url, - "n_m3u8dl_re_log": n_m3u8dl_log, - }, - ) - - except ConnectionResetError: - # interrupted while passing URI to download - raise KeyboardInterrupt() - except KeyboardInterrupt: - DOWNLOAD_CANCELLED.set() # skip pending track downloads - yield {"downloaded": "[yellow]CANCELLED"} - raise - except Exception as e: - DOWNLOAD_CANCELLED.set() # skip pending track downloads - yield {"downloaded": "[red]FAILED"} - if debug_logger and log_file_path and not isinstance(e, (subprocess.CalledProcessError, ValueError)): - log_contents = "" - if log_file_path.exists(): - try: - log_contents = log_file_path.read_text(encoding="utf-8", errors="replace") - except Exception: - log_contents = "" - - debug_logger.log( - level="ERROR", - operation="downloader_n_m3u8dl_re_exception", - message=f"Unexpected error during N_m3u8DL-RE download: {e}", - error=e, - context={ - "track_id": getattr(track, "id", None), - "track_type": track.__class__.__name__, - "log_file_contents": log_contents, - }, - ) - raise - finally: - # Clean up temporary debug files - if log_file_path and log_file_path.exists(): - try: - log_file_path.unlink() - except Exception: - pass - - -def n_m3u8dl_re( - urls: str | list[str] | dict[str, Any] | list[dict[str, Any]], - track: Any, - output_dir: Path, - filename: str, - headers: MutableMapping[str, str | bytes] | None = None, - cookies: MutableMapping[str, str] | CookieJar | None = None, - proxy: str | None = None, - max_workers: int | None = None, - content_keys: dict[str, Any] | None = None, - skip_merge: bool | None = False, -) -> Generator[dict[str, Any], None, None]: - """ - Download files using N_m3u8DL-RE. - https://github.com/nilaoda/N_m3u8DL-RE - - Yields the following download status updates while chunks are downloading: - - - {total: 100} (100% download total) - - {completed: 1} (1% download progress out of 100%) - - {downloaded: "10.1 MB/s"} (currently downloading at a rate of 10.1 MB/s) - - The data is in the same format accepted by rich's progress.update() function. - - Parameters: - urls: Web URL(s) to file(s) to download. NOTE: This parameter is ignored for now. - track: The track to download. Used to get track attributes for the selection - process. Note that Track.Descriptor.URL is not supported by N_m3u8DL-RE. - output_dir: The folder to save the file into. If the save path's directory does - not exist then it will be made automatically. - filename: The filename or filename template to use for each file. - headers: A mapping of HTTP Header Key/Values to use for all downloads. - cookies: A mapping of Cookie Key/Values or a Cookie Jar to use for all downloads. - proxy: A proxy to use for all downloads. - max_workers: The maximum amount of threads to use for downloads. Defaults to - min(32,(cpu_count+4)). Can be set in config with --thread-count option. - content_keys: The content keys to use for decryption. - skip_merge: Whether to skip merging the downloaded chunks. - """ - - yield from download( - urls=urls, - track=track, - output_dir=output_dir, - filename=filename, - headers=headers, - cookies=cookies, - proxy=proxy, - max_workers=max_workers, - content_keys=content_keys, - skip_merge=skip_merge, - ) - - -__all__ = ("n_m3u8dl_re",) diff --git a/unshackle/core/downloaders/requests.py b/unshackle/core/downloaders/requests.py index 0cb6b4e..fc9c792 100644 --- a/unshackle/core/downloaders/requests.py +++ b/unshackle/core/downloaders/requests.py @@ -16,19 +16,36 @@ from unshackle.core.utilities import get_debug_logger, get_extension MAX_ATTEMPTS = 5 RETRY_WAIT = 2 -CHUNK_SIZE = 1024 -PROGRESS_WINDOW = 5 +PROGRESS_WINDOW = 2 + +# Adaptive chunk sizing — benchmarked optimal range +MIN_CHUNK = 524_288 # 512KB +MAX_CHUNK = 4_194_304 # 4MB +DEFAULT_CHUNK = 524_288 # 512KB +SPEED_ROLLING_WINDOW = 10 # seconds of history to keep for speed calculation + + +def _adaptive_chunk_size(content_length: int) -> int: + """Pick chunk size based on content length. Benchmarked sweet spot: 512KB-4MB.""" + if content_length <= 0: + return DEFAULT_CHUNK + return min(MAX_CHUNK, max(MIN_CHUNK, content_length // 4)) -DOWNLOAD_SIZES = [] -LAST_SPEED_REFRESH = time.time() def download( - url: str, save_path: Path, session: Optional[Session] = None, segmented: bool = False, **kwargs: Any + url: str, + save_path: Path, + session: Optional[Any] = None, + segmented: bool = False, + _speed_tracker: Optional[dict] = None, + **kwargs: Any, ) -> Generator[dict[str, Any], None, None]: """ - Download a file using Python Requests. - https://requests.readthedocs.io + Download a file with optimized I/O. + + Supports both requests.Session and curl_cffi CurlSession for TLS fingerprinting. + Uses adaptive chunk sizing with buffered writes for maximum throughput. Yields the following download status updates while chunks are downloading: @@ -38,74 +55,68 @@ def download( - {downloaded: "10.1 MB/s"} (currently downloading at a rate of 10.1 MB/s) - {file_downloaded: Path(...), written: 1024} (download finished, has the save path and size) - The data is in the same format accepted by rich's progress.update() function. The - `downloaded` key is custom and is not natively accepted by all rich progress bars. - Parameters: url: Web URL of a file to download. save_path: The path to save the file to. If the save path's directory does not exist then it will be made automatically. - session: The Requests Session to make HTTP requests with. Useful to set Header, - Cookie, and Proxy data. Connections are saved and re-used with the session - so long as the server keeps the connection alive. + session: A requests.Session or curl_cffi CurlSession to make HTTP requests with. + CurlSession preserves TLS fingerprinting for services that need it. segmented: If downloads are segments or parts of one bigger file. + _speed_tracker: Shared speed tracking state for this download batch (per-call, not global). kwargs: Any extra keyword arguments to pass to the session.get() call. Use this for one-time request changes like a header, cookie, or proxy. For example, to request Byte-ranges use e.g., `headers={"Range": "bytes=0-128"}`. """ - global LAST_SPEED_REFRESH - session = session or Session() + # Per-call speed tracking (shared across threads within one requests() call) + if _speed_tracker is None: + _speed_tracker = {"sizes": [], "last_refresh": time.time()} + save_dir = save_path.parent control_file = save_path.with_name(f"{save_path.name}.!dev") save_dir.mkdir(parents=True, exist_ok=True) if control_file.exists(): - # consider the file corrupt if the control file exists save_path.unlink(missing_ok=True) control_file.unlink() elif save_path.exists(): - # if it exists, and no control file, then it should be safe yield dict(file_downloaded=save_path, written=save_path.stat().st_size) - # TODO: This should return, potential recovery bug - # TODO: Design a control file format so we know how much of the file is missing control_file.write_bytes(b"") attempts = 1 try: while True: written = 0 - - # these are for single-url speed calcs only - download_sizes = [] + download_sizes: list[int] = [] last_speed_refresh = time.time() try: stream = session.get(url, stream=True, **kwargs) stream.raise_for_status() - if not segmented: - try: - content_length = int(stream.headers.get("Content-Length", "0")) - - # Skip Content-Length validation for compressed responses since - # requests automatically decompresses but Content-Length shows compressed size - if stream.headers.get("Content-Encoding", "").lower() in ["gzip", "deflate", "br"]: - content_length = 0 - except ValueError: + # Determine content length and adaptive chunk size + try: + content_length = int(stream.headers.get("Content-Length", "0")) + if stream.headers.get("Content-Encoding", "").lower() in ["gzip", "deflate", "br"]: content_length = 0 + except ValueError: + content_length = 0 + chunk_size = _adaptive_chunk_size(content_length) + + if not segmented: if content_length > 0: - yield dict(total=math.ceil(content_length / CHUNK_SIZE)) + yield dict(total=math.ceil(content_length / chunk_size)) else: - # we have no data to calculate total chunks - yield dict(total=None) # indeterminate mode + yield dict(total=None) - with open(save_path, "wb") as f: - for chunk in stream.iter_content(chunk_size=CHUNK_SIZE): + # Buffered iter_content with adaptive chunk size + # Works with both requests.Session and CurlSession + with open(save_path, "wb", buffering=1_048_576) as f: + for chunk in stream.iter_content(chunk_size=chunk_size): download_size = len(chunk) f.write(chunk) written += download_size @@ -115,7 +126,7 @@ def download( now = time.time() time_since = now - last_speed_refresh download_sizes.append(download_size) - if time_since > PROGRESS_WINDOW or download_size < CHUNK_SIZE: + if time_since > PROGRESS_WINDOW or download_size < chunk_size: data_size = sum(download_sizes) download_speed = math.ceil(data_size / (time_since or 1)) yield dict(downloaded=f"{filesize.decimal(download_speed)}/s") @@ -130,15 +141,21 @@ def download( if segmented: yield dict(advance=1) now = time.time() - time_since = now - LAST_SPEED_REFRESH - if written: # no size == skipped dl - DOWNLOAD_SIZES.append(written) - if DOWNLOAD_SIZES and time_since > PROGRESS_WINDOW: - data_size = sum(DOWNLOAD_SIZES) - download_speed = math.ceil(data_size / (time_since or 1)) + sizes = _speed_tracker["sizes"] + if written: + sizes.append((now, written)) + # Prune entries older than the rolling window + cutoff = now - SPEED_ROLLING_WINDOW + while sizes and sizes[0][0] < cutoff: + sizes.pop(0) + time_since = now - _speed_tracker["last_refresh"] + if sizes and time_since > PROGRESS_WINDOW: + window_start = sizes[0][0] + window_duration = now - window_start + data_size = sum(size for _, size in sizes) + download_speed = math.ceil(data_size / (window_duration or 1)) yield dict(downloaded=f"{filesize.decimal(download_speed)}/s") - LAST_SPEED_REFRESH = now - DOWNLOAD_SIZES.clear() + _speed_tracker["last_refresh"] = now break except Exception as e: save_path.unlink(missing_ok=True) @@ -158,10 +175,14 @@ def requests( cookies: Optional[Union[MutableMapping[str, str], CookieJar]] = None, proxy: Optional[str] = None, max_workers: Optional[int] = None, + session: Optional[Any] = None, ) -> Generator[dict[str, Any], None, None]: """ - Download a file using Python Requests. - https://requests.readthedocs.io + Download files with optimized I/O and adaptive chunk sizing. + + Supports both requests.Session and curl_cffi CurlSession. When a CurlSession is + provided (e.g. from a service's get_session()), TLS fingerprinting is preserved + on all segment downloads. Yields the following download status updates while chunks are downloading: @@ -186,7 +207,10 @@ def requests( cookies: A mapping of Cookie Key/Values or a Cookie Jar to use for all downloads. proxy: An optional proxy URI to route connections through for all downloads. max_workers: The maximum amount of threads to use for downloads. Defaults to - min(32,(cpu_count+4)). + min(12,(cpu_count+4)). + session: An optional requests.Session or curl_cffi CurlSession to use. If provided, + it will be used directly (preserving TLS fingerprinting). If None, a new + requests.Session with HTTPAdapter connection pooling will be created. """ if not urls: raise ValueError("urls must be provided and not empty") @@ -221,7 +245,7 @@ def requests( urls = [urls] if not max_workers: - max_workers = min(32, (os.cpu_count() or 1) + 4) + max_workers = min(16, (os.cpu_count() or 1) + 4) urls = [ dict(save_path=save_path, **url) if isinstance(url, dict) else dict(url=url, save_path=save_path) @@ -231,25 +255,28 @@ def requests( ] ] - session = Session() - session.mount("https://", HTTPAdapter(pool_connections=max_workers, pool_maxsize=max_workers, pool_block=True)) - session.mount("http://", session.adapters["https://"]) - - if headers: - headers = {k: v for k, v in headers.items() if k.lower() != "accept-encoding"} - session.headers.update(headers) - if cookies: - session.cookies.update(cookies) - if proxy: - session.proxies.update({"all": proxy}) + # Use provided session or create a new optimized requests.Session + # When a session is provided (e.g., service's CurlSession), don't mutate it — + # headers/cookies/proxy are already set on it and it may be shared across tracks. + if session is None: + session = Session() + session.mount("https://", HTTPAdapter(pool_connections=max_workers, pool_maxsize=max_workers, pool_block=True)) + session.mount("http://", HTTPAdapter(pool_connections=max_workers, pool_maxsize=max_workers, pool_block=True)) + if headers: + headers = {k: v for k, v in headers.items() if k.lower() != "accept-encoding"} + session.headers.update(headers) + if cookies: + session.cookies.update(cookies) + if proxy: + session.proxies.update({"all": proxy}) if debug_logger: first_url = urls[0].get("url", "") if urls else "" url_display = first_url[:200] + "..." if len(first_url) > 200 else first_url debug_logger.log( level="DEBUG", - operation="downloader_requests_start", - message="Starting requests download", + operation="downloader_start", + message="Starting download", context={ "url_count": len(urls), "first_url": url_display, @@ -257,56 +284,54 @@ def requests( "filename": filename, "max_workers": max_workers, "has_proxy": bool(proxy), + "session_type": type(session).__name__, }, ) - # If we're downloading more than one URL, treat them as "segments" for progress purposes. - # For single-URL downloads we want per-chunk progress (and the inner `download()` will yield - # a chunk-based `total`), so don't set a segment total of 1 here. segmented_batch = len(urls) > 1 if segmented_batch: yield dict(total=len(urls)) + # Per-call speed tracker — shared across threads within this call only + speed_tracker: dict[str, Any] = {"sizes": [], "last_refresh": time.time()} + try: with ThreadPoolExecutor(max_workers=max_workers) as pool: for future in as_completed( - pool.submit(download, session=session, segmented=segmented_batch, **url) for url in urls + pool.submit(download, session=session, segmented=segmented_batch, _speed_tracker=speed_tracker, **url) + for url in urls ): try: yield from future.result() except KeyboardInterrupt: - DOWNLOAD_CANCELLED.set() # skip pending track downloads + DOWNLOAD_CANCELLED.set() yield dict(downloaded="[yellow]CANCELLING") pool.shutdown(wait=True, cancel_futures=True) yield dict(downloaded="[yellow]CANCELLED") - # tell dl that it was cancelled - # the pool is already shut down, so exiting loop is fine raise except Exception as e: - DOWNLOAD_CANCELLED.set() # skip pending track downloads + DOWNLOAD_CANCELLED.set() yield dict(downloaded="[red]FAILING") pool.shutdown(wait=True, cancel_futures=True) yield dict(downloaded="[red]FAILED") if debug_logger: debug_logger.log( level="ERROR", - operation="downloader_requests_failed", - message=f"Requests download failed: {e}", + operation="downloader_failed", + message=f"Download failed: {e}", error=e, context={ "url_count": len(urls), "output_dir": str(output_dir), }, ) - # tell dl that it failed - # the pool is already shut down, so exiting loop is fine raise if debug_logger: debug_logger.log( level="DEBUG", - operation="downloader_requests_complete", - message="Requests download completed successfully", + operation="downloader_complete", + message="Download completed successfully", context={ "url_count": len(urls), "output_dir": str(output_dir), @@ -314,7 +339,7 @@ def requests( }, ) finally: - DOWNLOAD_SIZES.clear() + speed_tracker["sizes"].clear() __all__ = ("requests",) diff --git a/unshackle/core/manifests/dash.py b/unshackle/core/manifests/dash.py index 92052fd..fe338c0 100644 --- a/unshackle/core/manifests/dash.py +++ b/unshackle/core/manifests/dash.py @@ -7,7 +7,7 @@ import math import re import shutil import sys -from copy import copy, deepcopy +from copy import copy from functools import partial from pathlib import Path from typing import Any, Callable, Optional, Union @@ -18,7 +18,6 @@ from zlib import crc32 import requests from curl_cffi.requests import Session as CurlSession from langcodes import Language, tag_is_valid -from lxml import etree from lxml.etree import Element, ElementTree from pyplayready.system.pssh import PSSH as PR_PSSH from pywidevine.cdm import Cdm as WidevineCdm @@ -27,7 +26,6 @@ from requests import Session from unshackle.core.cdm.detect import is_playready_cdm from unshackle.core.constants import DOWNLOAD_CANCELLED, DOWNLOAD_LICENCE_ONLY, AnyTrack -from unshackle.core.downloaders import requests as requests_downloader from unshackle.core.drm import DRM_T, PlayReady, Widevine from unshackle.core.events import events from unshackle.core.tracks import Audio, Subtitle, Tracks, Video @@ -543,10 +541,6 @@ class DASH: progress(total=len(segments)) downloader = track.downloader - if downloader.__name__ == "aria2c" and any(bytes_range is not None for url, bytes_range in segments): - # aria2(c) is shit and doesn't support the Range header, fallback to the requests downloader - downloader = requests_downloader - log.warning("Falling back to the requests downloader as aria2(c) doesn't support the Range header") downloader_args = dict( urls=[ @@ -559,39 +553,9 @@ class DASH: cookies=session.cookies, proxy=proxy, max_workers=max_workers, + session=session, ) - skip_merge = False - if downloader.__name__ == "n_m3u8dl_re": - skip_merge = True - - # When periods were filtered out during to_tracks(), n_m3u8dl_re will re-parse - # the raw MPD and download ALL periods (including ads/pre-rolls). Write a filtered - # MPD with the rejected periods removed so n_m3u8dl_re downloads the correct content. - filtered_period_ids = track.data.get("dash", {}).get("filtered_period_ids", []) - if filtered_period_ids: - filtered_manifest = deepcopy(manifest) - for child in list(filtered_manifest): - if not hasattr(child.tag, "find"): - continue - if child.tag == "Period" and child.get("id") in filtered_period_ids: - filtered_manifest.remove(child) - - filtered_mpd_path = save_dir / f".{track.id}_filtered.mpd" - filtered_mpd_path.parent.mkdir(parents=True, exist_ok=True) - etree.ElementTree(filtered_manifest).write( - str(filtered_mpd_path), xml_declaration=True, encoding="utf-8" - ) - track.from_file = filtered_mpd_path - - downloader_args.update( - { - "filename": track.id, - "track": track, - "content_keys": drm.content_keys if drm else None, - } - ) - debug_logger = get_debug_logger() if debug_logger: debug_logger.log( @@ -602,10 +566,8 @@ class DASH: "track_id": getattr(track, "id", None), "track_type": track.__class__.__name__, "total_segments": len(segments), - "downloader": downloader.__name__, "has_drm": bool(track.drm), "drm_types": [drm.__class__.__name__ for drm in (track.drm or [])], - "skip_merge": skip_merge, "save_path": str(save_path), "has_init_data": bool(init_data), }, @@ -621,15 +583,6 @@ class DASH: status_update["downloaded"] = f"DASH {downloaded}" progress(**status_update) - # Clean up filtered MPD temp file before enumerating segments - filtered_mpd_path = save_dir / f".{track.id}_filtered.mpd" - if filtered_mpd_path.exists(): - filtered_mpd_path.unlink() - - # see https://github.com/devine-dl/devine/issues/71 - for control_file in save_dir.glob("*.aria2__temp"): - control_file.unlink() - # Verify output directory exists and contains files if not save_dir.exists(): error_msg = f"Output directory does not exist: {save_dir}" @@ -643,9 +596,8 @@ class DASH: "track_type": track.__class__.__name__, "save_dir": str(save_dir), "save_path": str(save_path), - "downloader": downloader.__name__, - "skip_merge": skip_merge, - }, + "downloader": "requests", + }, ) raise FileNotFoundError(error_msg) @@ -663,9 +615,8 @@ class DASH: "save_dir_exists": save_dir.exists(), "segments_found": len(segments_to_merge), "segment_files": [f.name for f in segments_to_merge[:10]], # Limit to first 10 - "downloader": downloader.__name__, - "skip_merge": skip_merge, - }, + "downloader": "requests", + }, ) if not segments_to_merge: @@ -682,48 +633,39 @@ class DASH: "track_type": track.__class__.__name__, "save_dir": str(save_dir), "directory_contents": [str(p) for p in all_contents], - "downloader": downloader.__name__, - "skip_merge": skip_merge, - }, + "downloader": "requests", + }, ) raise FileNotFoundError(error_msg) - if skip_merge: - # N_m3u8DL-RE handles merging and decryption internally - shutil.move(segments_to_merge[0], save_path) - if drm: - track.drm = None - events.emit(events.Types.TRACK_DECRYPTED, track=track, drm=drm, segment=None) - else: - with open(save_path, "wb") as f: - if init_data: - f.write(init_data) - if len(segments_to_merge) > 1: - progress(downloaded="Merging", completed=0, total=len(segments_to_merge)) - for segment_file in segments_to_merge: - segment_data = segment_file.read_bytes() - # TODO: fix encoding after decryption? - if ( - not drm - and isinstance(track, Subtitle) - and track.codec not in (Subtitle.Codec.fVTT, Subtitle.Codec.fTTML) - ): - segment_data = try_ensure_utf8(segment_data) - segment_data = ( - segment_data.decode("utf8") - .replace("‎", html.unescape("‎")) - .replace("‏", html.unescape("‏")) - .encode("utf8") - ) - f.write(segment_data) - f.flush() - segment_file.unlink() - progress(advance=1) + with open(save_path, "wb") as f: + if init_data: + f.write(init_data) + if len(segments_to_merge) > 1: + progress(downloaded="Merging", completed=0, total=len(segments_to_merge)) + for segment_file in segments_to_merge: + segment_data = segment_file.read_bytes() + if ( + not drm + and isinstance(track, Subtitle) + and track.codec not in (Subtitle.Codec.fVTT, Subtitle.Codec.fTTML) + ): + segment_data = try_ensure_utf8(segment_data) + segment_data = ( + segment_data.decode("utf8") + .replace("‎", html.unescape("‎")) + .replace("‏", html.unescape("‏")) + .encode("utf8") + ) + f.write(segment_data) + f.flush() + segment_file.unlink() + progress(advance=1) track.path = save_path events.emit(events.Types.TRACK_DOWNLOADED, track=track) - if not skip_merge and drm: + if drm: progress(downloaded="Decrypting", completed=0, total=100) drm.decrypt(save_path) track.drm = None diff --git a/unshackle/core/manifests/hls.py b/unshackle/core/manifests/hls.py index 2904c79..7c6ee63 100644 --- a/unshackle/core/manifests/hls.py +++ b/unshackle/core/manifests/hls.py @@ -30,7 +30,6 @@ from requests import Session from unshackle.core import binaries from unshackle.core.cdm.detect import is_playready_cdm, is_widevine_cdm from unshackle.core.constants import DOWNLOAD_CANCELLED, DOWNLOAD_LICENCE_ONLY, AnyTrack -from unshackle.core.downloaders import requests as requests_downloader from unshackle.core.drm import DRM_T, ClearKey, MonaLisa, PlayReady, Widevine from unshackle.core.events import events from unshackle.core.tracks import Audio, Subtitle, Tracks, Video @@ -391,9 +390,6 @@ class HLS: progress(total=total_segments) downloader = track.downloader - if downloader.__name__ == "aria2c" and any(x.byterange for x in master.segments if x not in unwanted_segments): - downloader = requests_downloader - log.warning("Falling back to the requests downloader as aria2(c) doesn't support the Range header") urls: list[dict[str, Any]] = [] segment_durations: list[int] = [] @@ -422,7 +418,6 @@ class HLS: segment_save_dir = save_dir / "segments" - skip_merge = False downloader_args = dict( urls=urls, output_dir=segment_save_dir, @@ -431,22 +426,9 @@ class HLS: cookies=session.cookies, proxy=proxy, max_workers=max_workers, + session=session, ) - if downloader.__name__ == "n_m3u8dl_re": - skip_merge = True - # session_drm already has correct content_keys from initial licensing above - n_m3u8dl_content_keys = session_drm.content_keys if session_drm else None - - downloader_args.update( - { - "output_dir": save_dir, - "filename": track.id, - "track": track, - "content_keys": n_m3u8dl_content_keys, - } - ) - debug_logger = get_debug_logger() if debug_logger: debug_logger.log( @@ -457,10 +439,8 @@ class HLS: "track_id": getattr(track, "id", None), "track_type": track.__class__.__name__, "total_segments": total_segments, - "downloader": downloader.__name__, "has_drm": bool(session_drm), "drm_type": session_drm.__class__.__name__ if session_drm else None, - "skip_merge": skip_merge, "save_path": str(save_path), }, ) @@ -475,17 +455,6 @@ class HLS: status_update["downloaded"] = f"HLS {downloaded}" progress(**status_update) - # see https://github.com/devine-dl/devine/issues/71 - for control_file in segment_save_dir.glob("*.aria2__temp"): - control_file.unlink() - - if skip_merge: - final_save_path = HLS._finalize_n_m3u8dl_re_output(track=track, save_dir=save_dir, save_path=save_path) - progress(downloaded="Downloaded") - track.path = final_save_path - events.emit(events.Types.TRACK_DOWNLOADED, track=track) - return - progress(total=total_segments, completed=0, downloaded="Merging") name_len = len(str(total_segments)) @@ -736,9 +705,8 @@ class HLS: "save_dir_exists": save_dir.exists(), "segments_found": len(segments_to_merge), "segment_files": [f.name for f in segments_to_merge[:10]], # Limit to first 10 - "downloader": downloader.__name__, - "skip_merge": skip_merge, - }, + "downloader": "requests", + }, ) if not segments_to_merge: @@ -755,9 +723,8 @@ class HLS: "save_dir": str(save_dir), "save_dir_exists": save_dir.exists(), "directory_contents": [str(p) for p in all_contents], - "downloader": downloader.__name__, - "skip_merge": skip_merge, - }, + "downloader": "requests", + }, ) raise FileNotFoundError(error_msg) diff --git a/unshackle/core/manifests/ism.py b/unshackle/core/manifests/ism.py index 816e2bd..7f4d4a9 100644 --- a/unshackle/core/manifests/ism.py +++ b/unshackle/core/manifests/ism.py @@ -3,7 +3,6 @@ from __future__ import annotations import base64 import hashlib import html -import shutil import urllib.parse from functools import partial from pathlib import Path @@ -269,7 +268,6 @@ class ISM: progress(total=len(segments)) downloader = track.downloader - skip_merge = False downloader_args = dict( urls=[{"url": url} for url in segments], output_dir=save_dir, @@ -278,18 +276,9 @@ class ISM: cookies=session.cookies, proxy=proxy, max_workers=max_workers, + session=session, ) - if downloader.__name__ == "n_m3u8dl_re": - skip_merge = True - downloader_args.update( - { - "filename": track.id, - "track": track, - "content_keys": session_drm.content_keys if session_drm else None, - } - ) - debug_logger = get_debug_logger() if debug_logger: debug_logger.log( @@ -300,11 +289,10 @@ class ISM: "track_id": getattr(track, "id", None), "track_type": track.__class__.__name__, "total_segments": len(segments), - "downloader": downloader.__name__, + "downloader": "requests", "has_drm": bool(session_drm), "drm_type": session_drm.__class__.__name__ if session_drm else None, - "skip_merge": skip_merge, - "save_path": str(save_path), + "save_path": str(save_path), }, ) @@ -318,9 +306,6 @@ class ISM: status_update["downloaded"] = f"ISM {downloaded}" progress(**status_update) - for control_file in save_dir.glob("*.aria2__temp"): - control_file.unlink() - # Verify output directory exists and contains files if not save_dir.exists(): error_msg = f"Output directory does not exist: {save_dir}" @@ -334,9 +319,8 @@ class ISM: "track_type": track.__class__.__name__, "save_dir": str(save_dir), "save_path": str(save_path), - "downloader": downloader.__name__, - "skip_merge": skip_merge, - }, + "downloader": "requests", + }, ) raise FileNotFoundError(error_msg) @@ -354,9 +338,8 @@ class ISM: "save_dir_exists": save_dir.exists(), "segments_found": len(segments_to_merge), "segment_files": [f.name for f in segments_to_merge[:10]], # Limit to first 10 - "downloader": downloader.__name__, - "skip_merge": skip_merge, - }, + "downloader": "requests", + }, ) if not segments_to_merge: @@ -372,39 +355,35 @@ class ISM: "track_type": track.__class__.__name__, "save_dir": str(save_dir), "directory_contents": [str(p) for p in all_contents], - "downloader": downloader.__name__, - "skip_merge": skip_merge, - }, + "downloader": "requests", + }, ) raise FileNotFoundError(error_msg) - if skip_merge: - shutil.move(segments_to_merge[0], save_path) - else: - with open(save_path, "wb") as f: - for segment_file in segments_to_merge: - segment_data = segment_file.read_bytes() - if ( - not session_drm - and isinstance(track, Subtitle) - and track.codec not in (Subtitle.Codec.fVTT, Subtitle.Codec.fTTML) - ): - segment_data = try_ensure_utf8(segment_data) - segment_data = ( - segment_data.decode("utf8") - .replace("‎", html.unescape("‎")) - .replace("‏", html.unescape("‏")) - .encode("utf8") - ) - f.write(segment_data) - f.flush() - segment_file.unlink() - progress(advance=1) + with open(save_path, "wb") as f: + for segment_file in segments_to_merge: + segment_data = segment_file.read_bytes() + if ( + not session_drm + and isinstance(track, Subtitle) + and track.codec not in (Subtitle.Codec.fVTT, Subtitle.Codec.fTTML) + ): + segment_data = try_ensure_utf8(segment_data) + segment_data = ( + segment_data.decode("utf8") + .replace("‎", html.unescape("‎")) + .replace("‏", html.unescape("‏")) + .encode("utf8") + ) + f.write(segment_data) + f.flush() + segment_file.unlink() + progress(advance=1) track.path = save_path events.emit(events.Types.TRACK_DOWNLOADED, track=track) - if not skip_merge and session_drm: + if session_drm: progress(downloaded="Decrypting", completed=0, total=100) session_drm.decrypt(save_path) track.drm = None diff --git a/unshackle/core/tracks/track.py b/unshackle/core/tracks/track.py index 6031dc2..d73b528 100644 --- a/unshackle/core/tracks/track.py +++ b/unshackle/core/tracks/track.py @@ -21,7 +21,7 @@ from unshackle.core import binaries from unshackle.core.cdm.detect import is_playready_cdm, is_widevine_cdm from unshackle.core.config import config from unshackle.core.constants import DOWNLOAD_CANCELLED, DOWNLOAD_LICENCE_ONLY -from unshackle.core.downloaders import aria2c, curl_impersonate, n_m3u8dl_re, requests +from unshackle.core.downloaders import requests from unshackle.core.drm import DRM_T, PlayReady, Widevine from unshackle.core.events import events from unshackle.core.utilities import get_boxes, try_ensure_utf8 @@ -88,12 +88,7 @@ class Track: raise TypeError(f"Expected drm to be an iterable, not {type(drm)}") if downloader is None: - downloader = { - "aria2c": aria2c, - "curl_impersonate": curl_impersonate, - "requests": requests, - "n_m3u8dl_re": n_m3u8dl_re, - }[config.downloader] + downloader = requests self.path: Optional[Path] = None self.url = url @@ -211,23 +206,13 @@ class Track: if track_type == "Subtitle": save_path = save_path.with_suffix(f".{self.codec.extension}") - if self.downloader.__name__ == "n_m3u8dl_re" and ( - self.descriptor == self.Descriptor.URL - or track_type in ("Subtitle", "Attachment") - ): - self.downloader = requests - if self.descriptor != self.Descriptor.URL: save_dir = save_path.with_name(save_path.name + "_segments") else: save_dir = save_path.parent def cleanup(): - # track file (e.g., "foo.mp4") save_path.unlink(missing_ok=True) - # aria2c control file (e.g., "foo.mp4.aria2" or "foo.mp4.aria2__temp") - save_path.with_suffix(f"{save_path.suffix}.aria2").unlink(missing_ok=True) - save_path.with_suffix(f"{save_path.suffix}.aria2__temp").unlink(missing_ok=True) if save_dir.exists() and save_dir.name.endswith("_segments"): shutil.rmtree(save_dir) @@ -328,10 +313,6 @@ class Track: if DOWNLOAD_LICENCE_ONLY.is_set(): progress(downloaded="[yellow]SKIPPED") - elif track_type != "Subtitle" and self.downloader.__name__ == "n_m3u8dl_re": - progress(downloaded="[red]FAILED") - error = f"[N_m3u8DL-RE]: {self.descriptor} is currently not supported" - raise ValueError(error) else: for status_update in self.downloader( urls=self.url, @@ -341,6 +322,7 @@ class Track: cookies=session.cookies, proxy=proxy, max_workers=max_workers, + session=session, ): file_downloaded = status_update.get("file_downloaded") if not file_downloaded: From dc197af29e1411ad2a55481cfe3f6ea88dbb98cb Mon Sep 17 00:00:00 2001 From: Andy Date: Fri, 20 Mar 2026 12:47:49 -0600 Subject: [PATCH 2/8] feat(dash): refactor segment extraction and add content period validation --- unshackle/core/manifests/dash.py | 419 +++++++++++++++++++------------ 1 file changed, 255 insertions(+), 164 deletions(-) diff --git a/unshackle/core/manifests/dash.py b/unshackle/core/manifests/dash.py index fe338c0..3ee61fd 100644 --- a/unshackle/core/manifests/dash.py +++ b/unshackle/core/manifests/dash.py @@ -107,13 +107,7 @@ class DASH: if period_id := period.get("id"): filtered_period_ids.append(period_id) continue - if next(iter(period.xpath("SegmentType/@value")), "content") != "content": - if period_id := period.get("id"): - filtered_period_ids.append(period_id) - continue - if "urn:amazon:primevideo:cachingBreadth" in [ - x.get("schemeIdUri") for x in period.findall("SupplementalProperty") - ]: + if not DASH._is_content_period(period, []): if period_id := period.get("id"): filtered_period_ids.append(period_id) continue @@ -242,6 +236,7 @@ class DASH: "period": period, "adaptation_set": adaptation_set, "representation": rep, + "representation_id": rep.get("id"), "filtered_period_ids": filtered_period_ids, } }, @@ -278,9 +273,10 @@ class DASH: log = logging.getLogger("DASH") manifest: ElementTree = track.data["dash"]["manifest"] - period: Element = track.data["dash"]["period"] adaptation_set: Element = track.data["dash"]["adaptation_set"] representation: Element = track.data["dash"]["representation"] + rep_id: Optional[str] = track.data["dash"].get("representation_id") or representation.get("id") + filtered_period_ids: list[str] = track.data["dash"].get("filtered_period_ids", []) # Preserve existing DRM if it was set by the service, especially when service set Widevine # but manifest only contains PlayReady protection (common scenario for some services) @@ -321,174 +317,63 @@ class DASH: if kid not in drm_obj.content_keys: drm_obj.content_keys[kid] = key - manifest_base_url = manifest.findtext("BaseURL") - if not manifest_base_url: - manifest_base_url = track.url - elif not re.match("^https?://", manifest_base_url, re.IGNORECASE): - manifest_base_url = urljoin(track.url, f"./{manifest_base_url}") - period_base_url = urljoin(manifest_base_url, period.findtext("BaseURL") or "") - adaptation_set_base_url = urljoin(period_base_url, adaptation_set.findtext("BaseURL") or "") - rep_base_url = urljoin(adaptation_set_base_url, representation.findtext("BaseURL") or "") - - period_duration = period.get("duration") or manifest.get("mediaPresentationDuration") - init_data: Optional[bytes] = None - - segment_template = representation.find("SegmentTemplate") - if segment_template is None: - segment_template = adaptation_set.find("SegmentTemplate") - - segment_list = representation.find("SegmentList") - if segment_list is None: - segment_list = adaptation_set.find("SegmentList") - - segment_base = representation.find("SegmentBase") - if segment_base is None: - segment_base = adaptation_set.find("SegmentBase") - + # Collect segments from all content periods in the manifest + all_periods = manifest.findall("Period") segments: list[tuple[str, Optional[str]]] = [] - segment_timescale: float = 0 segment_durations: list[int] = [] + segment_timescale: float = 0 + init_data: Optional[bytes] = None track_kid: Optional[UUID] = None - if segment_template is not None: - segment_template = copy(segment_template) - start_number = int(segment_template.get("startNumber") or 1) - end_number = int(segment_template.get("endNumber") or 0) or None - segment_timeline = segment_template.find("SegmentTimeline") - segment_timescale = float(segment_template.get("timescale") or 1) + content_periods = [p for p in all_periods if DASH._is_content_period(p, filtered_period_ids)] + period_count = len(content_periods) - for item in ("initialization", "media"): - value = segment_template.get(item) - if not value: + if period_count > 1: + log.info(f"Multi-period manifest detected with {period_count} content periods") + + for period_idx, content_period in enumerate(content_periods): + # Find the matching representation in this period + matched_rep = None + matched_as = None + for as_ in content_period.findall("AdaptationSet"): + if DASH.is_trick_mode(as_): continue - if not re.match("^https?://", value, re.IGNORECASE): - if not rep_base_url: - raise ValueError("Resolved Segment URL is not absolute, and no Base URL is available.") - value = urljoin(rep_base_url, value) - if not urlparse(value).query: - manifest_url_query = urlparse(track.url).query - if manifest_url_query: - value += f"?{manifest_url_query}" - segment_template.set(item, value) + for rep in as_.findall("Representation"): + if rep.get("id") == rep_id: + matched_rep = rep + matched_as = as_ + break + if matched_rep is not None: + break - init_url = segment_template.get("initialization") - if init_url: - res = session.get( - DASH.replace_fields( - init_url, Bandwidth=representation.get("bandwidth"), RepresentationID=representation.get("id") - ) - ) - res.raise_for_status() - init_data = res.content - track_kid = track.get_key_id(init_data) + if matched_rep is None or matched_as is None: + period_id = content_period.get("id", period_idx) + log.warning(f"Representation '{rep_id}' not found in period '{period_id}', skipping") + continue - if segment_timeline is not None: - current_time = 0 - for s in segment_timeline.findall("S"): - if s.get("t"): - current_time = int(s.get("t")) - for _ in range(1 + (int(s.get("r") or 0))): - segment_durations.append(current_time) - current_time += int(s.get("d")) + p_init, p_segments, p_timescale, p_durations, p_kid = DASH._get_period_segments( + period=content_period, + adaptation_set=matched_as, + representation=matched_rep, + manifest=manifest, + track=track, + track_url=track.url, + session=session, + ) - if not end_number: - end_number = len(segment_durations) - # Handle high startNumber in DVR/catch-up manifests where startNumber > segment count - if start_number > end_number: - end_number = start_number + len(segment_durations) - 1 - - for t, n in zip(segment_durations, range(start_number, end_number + 1)): - segments.append( - ( - DASH.replace_fields( - segment_template.get("media"), - Bandwidth=representation.get("bandwidth"), - Number=n, - RepresentationID=representation.get("id"), - Time=t, - ), - None, - ) - ) + if period_idx == 0: + # First period: use its init data and KID for DRM licensing + init_data = p_init + track_kid = p_kid + segment_timescale = p_timescale else: - if not period_duration: - raise ValueError("Duration of the Period was unable to be determined.") - period_duration = DASH.pt_to_sec(period_duration) - segment_duration = float(segment_template.get("duration")) or 1 + if p_kid and track_kid and p_kid != track_kid: + log.debug(f"Period {content_period.get('id', period_idx)} has different KID: {p_kid}") - if not end_number: - segment_count = math.ceil(period_duration / (segment_duration / segment_timescale)) - end_number = start_number + segment_count - 1 + segments.extend(p_segments) + segment_durations.extend(p_durations) - for s in range(start_number, end_number + 1): - segments.append( - ( - DASH.replace_fields( - segment_template.get("media"), - Bandwidth=representation.get("bandwidth"), - Number=s, - RepresentationID=representation.get("id"), - Time=s, - ), - None, - ) - ) - # TODO: Should we floor/ceil/round, or is int() ok? - segment_durations.append(int(segment_duration)) - elif segment_list is not None: - segment_timescale = float(segment_list.get("timescale") or 1) - - init_data = None - initialization = segment_list.find("Initialization") - if initialization is not None: - source_url = initialization.get("sourceURL") - if not source_url: - source_url = rep_base_url - elif not re.match("^https?://", source_url, re.IGNORECASE): - source_url = urljoin(rep_base_url, f"./{source_url}") - - if initialization.get("range"): - init_range_header = {"Range": f"bytes={initialization.get('range')}"} - else: - init_range_header = None - - res = session.get(url=source_url, headers=init_range_header) - res.raise_for_status() - init_data = res.content - track_kid = track.get_key_id(init_data) - - segment_urls = segment_list.findall("SegmentURL") - for segment_url in segment_urls: - media_url = segment_url.get("media") - if not media_url: - media_url = rep_base_url - elif not re.match("^https?://", media_url, re.IGNORECASE): - media_url = urljoin(rep_base_url, f"./{media_url}") - - segments.append((media_url, segment_url.get("mediaRange"))) - segment_durations.append(int(segment_url.get("duration") or 1)) - elif segment_base is not None: - media_range = None - init_data = None - initialization = segment_base.find("Initialization") - if initialization is not None: - if initialization.get("range"): - init_range_header = {"Range": f"bytes={initialization.get('range')}"} - else: - init_range_header = None - - res = session.get(url=rep_base_url, headers=init_range_header) - res.raise_for_status() - init_data = res.content - track_kid = track.get_key_id(init_data) - total_size = res.headers.get("Content-Range", "").split("/")[-1] - if total_size: - media_range = f"{len(init_data)}-{total_size}" - - segments.append((rep_base_url, media_range)) - elif rep_base_url: - segments.append((rep_base_url, None)) - else: + if not segments: log.error("Could not find a way to get segments from this MPD manifest.") log.debug(track.url) sys.exit(1) @@ -682,6 +567,212 @@ class DASH: progress(downloaded="Downloaded") + @staticmethod + def _is_content_period(period: Element, filtered_period_ids: list[str]) -> bool: + """Check if a period is a valid content period (not an ad, not filtered, not trick mode).""" + period_id = period.get("id") + if period_id and period_id in filtered_period_ids: + return False + if next(iter(period.xpath("SegmentType/@value")), "content") != "content": + return False + if "urn:amazon:primevideo:cachingBreadth" in [ + x.get("schemeIdUri") for x in period.findall("SupplementalProperty") + ]: + return False + return True + + @staticmethod + def _get_period_segments( + period: Element, + adaptation_set: Element, + representation: Element, + manifest: ElementTree, + track: AnyTrack, + track_url: str, + session: Union[Session, CurlSession], + ) -> tuple[ + Optional[bytes], + list[tuple[str, Optional[str]]], + float, + list[int], + Optional[UUID], + ]: + """ + Extract segments from a single period's representation. + + Returns: + A tuple of (init_data, segments, segment_timescale, segment_durations, track_kid). + """ + manifest_base_url = manifest.findtext("BaseURL") + if not manifest_base_url: + manifest_base_url = track_url + elif not re.match("^https?://", manifest_base_url, re.IGNORECASE): + manifest_base_url = urljoin(track_url, f"./{manifest_base_url}") + period_base_url = urljoin(manifest_base_url, period.findtext("BaseURL") or "") + adaptation_set_base_url = urljoin(period_base_url, adaptation_set.findtext("BaseURL") or "") + rep_base_url = urljoin(adaptation_set_base_url, representation.findtext("BaseURL") or "") + + period_duration = period.get("duration") or manifest.get("mediaPresentationDuration") + init_data: Optional[bytes] = None + + segment_template = representation.find("SegmentTemplate") + if segment_template is None: + segment_template = adaptation_set.find("SegmentTemplate") + + segment_list = representation.find("SegmentList") + if segment_list is None: + segment_list = adaptation_set.find("SegmentList") + + segment_base = representation.find("SegmentBase") + if segment_base is None: + segment_base = adaptation_set.find("SegmentBase") + + segments: list[tuple[str, Optional[str]]] = [] + segment_timescale: float = 0 + segment_durations: list[int] = [] + track_kid: Optional[UUID] = None + + if segment_template is not None: + segment_template = copy(segment_template) + start_number = int(segment_template.get("startNumber") or 1) + end_number = int(segment_template.get("endNumber") or 0) or None + segment_timeline = segment_template.find("SegmentTimeline") + segment_timescale = float(segment_template.get("timescale") or 1) + + for item in ("initialization", "media"): + value = segment_template.get(item) + if not value: + continue + if not re.match("^https?://", value, re.IGNORECASE): + if not rep_base_url: + raise ValueError("Resolved Segment URL is not absolute, and no Base URL is available.") + value = urljoin(rep_base_url, value) + if not urlparse(value).query: + manifest_url_query = urlparse(track_url).query + if manifest_url_query: + value += f"?{manifest_url_query}" + segment_template.set(item, value) + + init_url = segment_template.get("initialization") + if init_url: + res = session.get( + DASH.replace_fields( + init_url, Bandwidth=representation.get("bandwidth"), RepresentationID=representation.get("id") + ) + ) + res.raise_for_status() + init_data = res.content + track_kid = track.get_key_id(init_data) + + if segment_timeline is not None: + current_time = 0 + for s in segment_timeline.findall("S"): + if s.get("t"): + current_time = int(s.get("t")) + for _ in range(1 + (int(s.get("r") or 0))): + segment_durations.append(current_time) + current_time += int(s.get("d")) + + if not end_number: + end_number = len(segment_durations) + # Handle high startNumber in DVR/catch-up manifests where startNumber > segment count + if start_number > end_number: + end_number = start_number + len(segment_durations) - 1 + + for t, n in zip(segment_durations, range(start_number, end_number + 1)): + segments.append( + ( + DASH.replace_fields( + segment_template.get("media"), + Bandwidth=representation.get("bandwidth"), + Number=n, + RepresentationID=representation.get("id"), + Time=t, + ), + None, + ) + ) + else: + if not period_duration: + raise ValueError("Duration of the Period was unable to be determined.") + period_duration = DASH.pt_to_sec(period_duration) + segment_duration = float(segment_template.get("duration")) or 1 + + if not end_number: + segment_count = math.ceil(period_duration / (segment_duration / segment_timescale)) + end_number = start_number + segment_count - 1 + + for s in range(start_number, end_number + 1): + segments.append( + ( + DASH.replace_fields( + segment_template.get("media"), + Bandwidth=representation.get("bandwidth"), + Number=s, + RepresentationID=representation.get("id"), + Time=s, + ), + None, + ) + ) + # TODO: Should we floor/ceil/round, or is int() ok? + segment_durations.append(int(segment_duration)) + elif segment_list is not None: + segment_timescale = float(segment_list.get("timescale") or 1) + + init_data = None + initialization = segment_list.find("Initialization") + if initialization is not None: + source_url = initialization.get("sourceURL") + if not source_url: + source_url = rep_base_url + elif not re.match("^https?://", source_url, re.IGNORECASE): + source_url = urljoin(rep_base_url, f"./{source_url}") + + if initialization.get("range"): + init_range_header = {"Range": f"bytes={initialization.get('range')}"} + else: + init_range_header = None + + res = session.get(url=source_url, headers=init_range_header) + res.raise_for_status() + init_data = res.content + track_kid = track.get_key_id(init_data) + + segment_urls = segment_list.findall("SegmentURL") + for segment_url in segment_urls: + media_url = segment_url.get("media") + if not media_url: + media_url = rep_base_url + elif not re.match("^https?://", media_url, re.IGNORECASE): + media_url = urljoin(rep_base_url, f"./{media_url}") + + segments.append((media_url, segment_url.get("mediaRange"))) + segment_durations.append(int(segment_url.get("duration") or 1)) + elif segment_base is not None: + media_range = None + init_data = None + initialization = segment_base.find("Initialization") + if initialization is not None: + if initialization.get("range"): + init_range_header = {"Range": f"bytes={initialization.get('range')}"} + else: + init_range_header = None + + res = session.get(url=rep_base_url, headers=init_range_header) + res.raise_for_status() + init_data = res.content + track_kid = track.get_key_id(init_data) + total_size = res.headers.get("Content-Range", "").split("/")[-1] + if total_size: + media_range = f"{len(init_data)}-{total_size}" + + segments.append((rep_base_url, media_range)) + elif rep_base_url: + segments.append((rep_base_url, None)) + + return init_data, segments, segment_timescale, segment_durations, track_kid + @staticmethod def _get(item: str, adaptation_set: Element, representation: Optional[Element] = None) -> Optional[Any]: """Helper to get a requested item from the Representation, otherwise from the AdaptationSet.""" From 006d08041610482e5801da640d47721d4f838c18 Mon Sep 17 00:00:00 2001 From: Andy Date: Mon, 23 Mar 2026 17:20:26 -0600 Subject: [PATCH 3/8] feat(downloader): optimize download throughput with Queue-based threading and raw reads MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix critical bug where ThreadPoolExecutor was not actually parallelizing downloads (generator functions returned instantly, I/O ran on main thread). Performance improvements: Queue-based event dispatch: workers consume generators in threads, push events to a thread-safe Queue for truly parallel segment downloads Raw socket reads (resp.raw.read) for requests.Session — 30-35% faster than iter_content, with iter_content fallback for CurlSession File pre-allocation via truncate when Content-Length is known Hot loop caching: time.time, f.write, stream.raw.read cached as locals HTTPAdapter connection pooling mounted on passed sessions for reuse --- unshackle/core/downloaders/requests.py | 188 +++++++++++++++++-------- 1 file changed, 133 insertions(+), 55 deletions(-) diff --git a/unshackle/core/downloaders/requests.py b/unshackle/core/downloaders/requests.py index fc9c792..800b26d 100644 --- a/unshackle/core/downloaders/requests.py +++ b/unshackle/core/downloaders/requests.py @@ -1,10 +1,10 @@ import math import os import time -from concurrent.futures import as_completed from concurrent.futures.thread import ThreadPoolExecutor from http.cookiejar import CookieJar from pathlib import Path +from queue import Empty, Queue from typing import Any, Generator, MutableMapping, Optional, Union from requests import Session @@ -32,6 +32,10 @@ def _adaptive_chunk_size(content_length: int) -> int: return min(MAX_CHUNK, max(MIN_CHUNK, content_length // 4)) +def _is_requests_session(session: Any) -> bool: + """Check if the session is a standard requests.Session (supports resp.raw).""" + return isinstance(session, Session) + def download( url: str, @@ -45,7 +49,7 @@ def download( Download a file with optimized I/O. Supports both requests.Session and curl_cffi CurlSession for TLS fingerprinting. - Uses adaptive chunk sizing with buffered writes for maximum throughput. + Uses raw socket reads for requests.Session (30-35% faster) and iter_content for CurlSession. Yields the following download status updates while chunks are downloading: @@ -69,7 +73,6 @@ def download( """ session = session or Session() - # Per-call speed tracking (shared across threads within one requests() call) if _speed_tracker is None: _speed_tracker = {"sizes": [], "last_refresh": time.time()} @@ -85,13 +88,15 @@ def download( yield dict(file_downloaded=save_path, written=save_path.stat().st_size) control_file.write_bytes(b"") + _time = time.time + use_raw = _is_requests_session(session) attempts = 1 try: while True: written = 0 download_sizes: list[int] = [] - last_speed_refresh = time.time() + last_speed_refresh = _time() try: stream = session.get(url, stream=True, **kwargs) @@ -113,25 +118,61 @@ def download( else: yield dict(total=None) - # Buffered iter_content with adaptive chunk size - # Works with both requests.Session and CurlSession + # Pre-allocate file when size is known (helps filesystem allocate contiguous blocks) with open(save_path, "wb", buffering=1_048_576) as f: - for chunk in stream.iter_content(chunk_size=chunk_size): - download_size = len(chunk) - f.write(chunk) - written += download_size + if content_length > 0: + f.truncate(content_length) + f.seek(0) - if not segmented: - yield dict(advance=1) - now = time.time() - time_since = now - last_speed_refresh - download_sizes.append(download_size) - if time_since > PROGRESS_WINDOW or download_size < chunk_size: - data_size = sum(download_sizes) - download_speed = math.ceil(data_size / (time_since or 1)) - yield dict(downloaded=f"{filesize.decimal(download_speed)}/s") - last_speed_refresh = now - download_sizes.clear() + # Cache f.write for hot loop + _write = f.write + + if use_raw: + # Raw socket read — 30-35% faster than iter_content (benchmarked) + # Safe in worker threads with Queue-based event dispatch + _read = stream.raw.read + while True: + chunk = _read(chunk_size) + if not chunk: + break + _write(chunk) + download_size = len(chunk) + written += download_size + + if not segmented: + yield dict(advance=1) + now = _time() + time_since = now - last_speed_refresh + download_sizes.append(download_size) + if time_since > PROGRESS_WINDOW or download_size < chunk_size: + data_size = sum(download_sizes) + download_speed = math.ceil(data_size / (time_since or 1)) + yield dict(downloaded=f"{filesize.decimal(download_speed)}/s") + last_speed_refresh = now + download_sizes.clear() + stream.close() + else: + # CurlSession: use iter_content (raw not available) + for chunk in stream.iter_content(chunk_size=chunk_size): + _write(chunk) + download_size = len(chunk) + written += download_size + + if not segmented: + yield dict(advance=1) + now = _time() + time_since = now - last_speed_refresh + download_sizes.append(download_size) + if time_since > PROGRESS_WINDOW or download_size < chunk_size: + data_size = sum(download_sizes) + download_speed = math.ceil(data_size / (time_since or 1)) + yield dict(downloaded=f"{filesize.decimal(download_speed)}/s") + last_speed_refresh = now + download_sizes.clear() + + # Truncate to actual written size in case pre-allocation overshot + if content_length > 0 and written != content_length: + f.truncate(written) if not segmented and content_length and written < content_length: raise IOError(f"Failed to read {content_length} bytes from the track URI.") @@ -140,11 +181,10 @@ def download( if segmented: yield dict(advance=1) - now = time.time() + now = _time() sizes = _speed_tracker["sizes"] if written: sizes.append((now, written)) - # Prune entries older than the rolling window cutoff = now - SPEED_ROLLING_WINDOW while sizes and sizes[0][0] < cutoff: sizes.pop(0) @@ -256,12 +296,10 @@ def requests( ] # Use provided session or create a new optimized requests.Session - # When a session is provided (e.g., service's CurlSession), don't mutate it — - # headers/cookies/proxy are already set on it and it may be shared across tracks. + # When a session is provided (e.g., service's CurlSession), don't mutate headers/cookies/proxy — + # they're already set and the session may be shared across tracks. if session is None: session = Session() - session.mount("https://", HTTPAdapter(pool_connections=max_workers, pool_maxsize=max_workers, pool_block=True)) - session.mount("http://", HTTPAdapter(pool_connections=max_workers, pool_maxsize=max_workers, pool_block=True)) if headers: headers = {k: v for k, v in headers.items() if k.lower() != "accept-encoding"} session.headers.update(headers) @@ -270,6 +308,13 @@ def requests( if proxy: session.proxies.update({"all": proxy}) + # Mount HTTPAdapter with connection pooling sized to worker count. + # Safe to do on any requests.Session — improves connection reuse for parallel downloads. + if _is_requests_session(session): + adapter = HTTPAdapter(pool_connections=max_workers, pool_maxsize=max_workers, pool_block=True) + session.mount("https://", adapter) + session.mount("http://", adapter) + if debug_logger: first_url = urls[0].get("url", "") if urls else "" url_display = first_url[:200] + "..." if len(first_url) > 200 else first_url @@ -297,35 +342,68 @@ def requests( try: with ThreadPoolExecutor(max_workers=max_workers) as pool: - for future in as_completed( - pool.submit(download, session=session, segmented=segmented_batch, _speed_tracker=speed_tracker, **url) - for url in urls - ): + event_queue: Queue[dict[str, Any]] = Queue() + + def _download_worker(url_item: dict[str, Any]) -> None: + for event in download( + session=session, + segmented=segmented_batch, + _speed_tracker=speed_tracker, + **url_item, + ): + event_queue.put(event) + + futures = [pool.submit(_download_worker, url) for url in urls] + pending = set(futures) + + while pending: + # Drain queued progress updates for responsive UI + while True: + try: + yield event_queue.get_nowait() + except Empty: + break + + done = {future for future in pending if future.done()} + for future in done: + pending.remove(future) + exc = future.exception() + if isinstance(exc, KeyboardInterrupt): + DOWNLOAD_CANCELLED.set() + yield dict(downloaded="[yellow]CANCELLING") + pool.shutdown(wait=True, cancel_futures=True) + yield dict(downloaded="[yellow]CANCELLED") + raise exc + elif exc: + DOWNLOAD_CANCELLED.set() + yield dict(downloaded="[red]FAILING") + pool.shutdown(wait=True, cancel_futures=True) + yield dict(downloaded="[red]FAILED") + if debug_logger: + debug_logger.log( + level="ERROR", + operation="downloader_failed", + message=f"Download failed: {exc}", + error=exc, + context={ + "url_count": len(urls), + "output_dir": str(output_dir), + }, + ) + raise exc + + if pending: + try: + yield event_queue.get(timeout=0.1) + except Empty: + pass + + # Drain any remaining events from workers that just finished + while True: try: - yield from future.result() - except KeyboardInterrupt: - DOWNLOAD_CANCELLED.set() - yield dict(downloaded="[yellow]CANCELLING") - pool.shutdown(wait=True, cancel_futures=True) - yield dict(downloaded="[yellow]CANCELLED") - raise - except Exception as e: - DOWNLOAD_CANCELLED.set() - yield dict(downloaded="[red]FAILING") - pool.shutdown(wait=True, cancel_futures=True) - yield dict(downloaded="[red]FAILED") - if debug_logger: - debug_logger.log( - level="ERROR", - operation="downloader_failed", - message=f"Download failed: {e}", - error=e, - context={ - "url_count": len(urls), - "output_dir": str(output_dir), - }, - ) - raise + yield event_queue.get_nowait() + except Empty: + break if debug_logger: debug_logger.log( From 6840944738f56e82bde5164f843e305c0043993d Mon Sep 17 00:00:00 2001 From: Andy Date: Mon, 23 Mar 2026 18:17:12 -0600 Subject: [PATCH 4/8] perf(downloader): optimize hot loop and threading efficiency Replace list.pop(0) with deque.popleft() for O(1) speed tracker eviction, skip urllib3 decode chain with decode_content=False on raw reads, use running total instead of sum() for progress reporting, add explicit stream.close() on CurlSession path, replace busy-poll loop with concurrent.futures.wait(FIRST_COMPLETED), skip ThreadPoolExecutor for single-URL downloads, and DRY up duplicated raw/iter_content progress logic into a unified chunk iterator. --- unshackle/core/downloaders/requests.py | 192 ++++++++++++------------- 1 file changed, 96 insertions(+), 96 deletions(-) diff --git a/unshackle/core/downloaders/requests.py b/unshackle/core/downloaders/requests.py index 800b26d..74732d7 100644 --- a/unshackle/core/downloaders/requests.py +++ b/unshackle/core/downloaders/requests.py @@ -1,6 +1,8 @@ import math import os import time +from collections import deque +from concurrent.futures import FIRST_COMPLETED, wait from concurrent.futures.thread import ThreadPoolExecutor from http.cookiejar import CookieJar from pathlib import Path @@ -74,7 +76,7 @@ def download( session = session or Session() if _speed_tracker is None: - _speed_tracker = {"sizes": [], "last_refresh": time.time()} + _speed_tracker = {"sizes": deque(), "last_refresh": time.time()} save_dir = save_path.parent control_file = save_path.with_name(f"{save_path.name}.!dev") @@ -95,7 +97,6 @@ def download( try: while True: written = 0 - download_sizes: list[int] = [] last_speed_refresh = _time() try: @@ -127,48 +128,44 @@ def download( # Cache f.write for hot loop _write = f.write + # Build chunk iterator — raw reads for requests.Session, iter_content for CurlSession if use_raw: - # Raw socket read — 30-35% faster than iter_content (benchmarked) - # Safe in worker threads with Queue-based event dispatch + stream.raw.decode_content = False _read = stream.raw.read - while True: - chunk = _read(chunk_size) - if not chunk: - break - _write(chunk) - download_size = len(chunk) - written += download_size - if not segmented: - yield dict(advance=1) - now = _time() - time_since = now - last_speed_refresh - download_sizes.append(download_size) - if time_since > PROGRESS_WINDOW or download_size < chunk_size: - data_size = sum(download_sizes) - download_speed = math.ceil(data_size / (time_since or 1)) - yield dict(downloaded=f"{filesize.decimal(download_speed)}/s") - last_speed_refresh = now - download_sizes.clear() - stream.close() + def _chunks() -> Generator[bytes, None, None]: + while True: + chunk = _read(chunk_size) + if not chunk: + break + yield chunk + stream.close() + + chunks = _chunks() else: - # CurlSession: use iter_content (raw not available) - for chunk in stream.iter_content(chunk_size=chunk_size): - _write(chunk) - download_size = len(chunk) - written += download_size + def _chunks_iter() -> Generator[bytes, None, None]: + yield from stream.iter_content(chunk_size=chunk_size) + stream.close() - if not segmented: - yield dict(advance=1) - now = _time() - time_since = now - last_speed_refresh - download_sizes.append(download_size) - if time_since > PROGRESS_WINDOW or download_size < chunk_size: - data_size = sum(download_sizes) - download_speed = math.ceil(data_size / (time_since or 1)) - yield dict(downloaded=f"{filesize.decimal(download_speed)}/s") - last_speed_refresh = now - download_sizes.clear() + chunks = _chunks_iter() + + # Unified write + progress loop + _data_accumulated = 0 + for chunk in chunks: + _write(chunk) + download_size = len(chunk) + written += download_size + + if not segmented: + yield dict(advance=1) + now = _time() + time_since = now - last_speed_refresh + _data_accumulated += download_size + if time_since > PROGRESS_WINDOW or download_size < chunk_size: + download_speed = math.ceil(_data_accumulated / (time_since or 1)) + yield dict(downloaded=f"{filesize.decimal(download_speed)}/s") + last_speed_refresh = now + _data_accumulated = 0 # Truncate to actual written size in case pre-allocation overshot if content_length > 0 and written != content_length: @@ -187,7 +184,7 @@ def download( sizes.append((now, written)) cutoff = now - SPEED_ROLLING_WINDOW while sizes and sizes[0][0] < cutoff: - sizes.pop(0) + sizes.popleft() time_since = now - _speed_tracker["last_refresh"] if sizes and time_since > PROGRESS_WINDOW: window_start = sizes[0][0] @@ -338,73 +335,76 @@ def requests( yield dict(total=len(urls)) # Per-call speed tracker — shared across threads within this call only - speed_tracker: dict[str, Any] = {"sizes": [], "last_refresh": time.time()} + speed_tracker: dict[str, Any] = {"sizes": deque(), "last_refresh": time.time()} try: - with ThreadPoolExecutor(max_workers=max_workers) as pool: - event_queue: Queue[dict[str, Any]] = Queue() + # Fast path: single URL — no thread pool overhead + if len(urls) == 1: + yield from download( + session=session, + segmented=segmented_batch, + _speed_tracker=speed_tracker, + **urls[0], + ) + else: + with ThreadPoolExecutor(max_workers=max_workers) as pool: + event_queue: Queue[dict[str, Any]] = Queue() - def _download_worker(url_item: dict[str, Any]) -> None: - for event in download( - session=session, - segmented=segmented_batch, - _speed_tracker=speed_tracker, - **url_item, - ): - event_queue.put(event) + def _download_worker(url_item: dict[str, Any]) -> None: + for event in download( + session=session, + segmented=segmented_batch, + _speed_tracker=speed_tracker, + **url_item, + ): + event_queue.put(event) - futures = [pool.submit(_download_worker, url) for url in urls] - pending = set(futures) + futures = [pool.submit(_download_worker, url) for url in urls] + pending = set(futures) - while pending: - # Drain queued progress updates for responsive UI + while pending: + # Drain queued progress updates for responsive UI + while True: + try: + yield event_queue.get_nowait() + except Empty: + break + + # Wait efficiently for next future completion (OS condition variable) + completed, pending = wait(pending, timeout=0.1, return_when=FIRST_COMPLETED) + for future in completed: + exc = future.exception() + if isinstance(exc, KeyboardInterrupt): + DOWNLOAD_CANCELLED.set() + yield dict(downloaded="[yellow]CANCELLING") + pool.shutdown(wait=True, cancel_futures=True) + yield dict(downloaded="[yellow]CANCELLED") + raise exc + elif exc: + DOWNLOAD_CANCELLED.set() + yield dict(downloaded="[red]FAILING") + pool.shutdown(wait=True, cancel_futures=True) + yield dict(downloaded="[red]FAILED") + if debug_logger: + debug_logger.log( + level="ERROR", + operation="downloader_failed", + message=f"Download failed: {exc}", + error=exc, + context={ + "url_count": len(urls), + "output_dir": str(output_dir), + }, + ) + raise exc + + # Drain any remaining events from workers that just finished while True: try: yield event_queue.get_nowait() except Empty: break - done = {future for future in pending if future.done()} - for future in done: - pending.remove(future) - exc = future.exception() - if isinstance(exc, KeyboardInterrupt): - DOWNLOAD_CANCELLED.set() - yield dict(downloaded="[yellow]CANCELLING") - pool.shutdown(wait=True, cancel_futures=True) - yield dict(downloaded="[yellow]CANCELLED") - raise exc - elif exc: - DOWNLOAD_CANCELLED.set() - yield dict(downloaded="[red]FAILING") - pool.shutdown(wait=True, cancel_futures=True) - yield dict(downloaded="[red]FAILED") - if debug_logger: - debug_logger.log( - level="ERROR", - operation="downloader_failed", - message=f"Download failed: {exc}", - error=exc, - context={ - "url_count": len(urls), - "output_dir": str(output_dir), - }, - ) - raise exc - - if pending: - try: - yield event_queue.get(timeout=0.1) - except Empty: - pass - - # Drain any remaining events from workers that just finished - while True: - try: - yield event_queue.get_nowait() - except Empty: - break - if debug_logger: debug_logger.log( level="DEBUG", From 99be88dc08b9ac390a732ee9e4fb51de1413d98e Mon Sep 17 00:00:00 2001 From: Andy Date: Tue, 24 Mar 2026 10:08:17 -0600 Subject: [PATCH 5/8] feat(session): replace curl_cffi with rnet for TLS-fingerprinted HTTP Replace CurlSession (curl_cffi) with RnetSession powered by rnet (Rust/BoringSSL). Benchmarks show 3.5x faster segmented downloads (1.06 GB/s vs 304 MB/s) and 16% faster single-file downloads with near-zero TLS fingerprinting overhead. - Add RnetSession wrapper with requests-compatible API (headers, cookies, proxies, retry logic, prepared requests) - Add RnetResponse wrapper normalizing rnet quirks (status_code as int, text as property, bytes-to-str headers, iter_content re-chunking) - Replace CurlSession isinstance checks across manifests, tracks, DRM - Update downloader with rnet native streaming path and byte-based progress tracking for accurate Rich progress bars - Add speed display column to Rich progress bar (DASH/HLS/URL prefix) - Add rnet dependency, services use exact preset names (e.g. OkHttp4_12) --- pyproject.toml | 4 +- unshackle/commands/dl.py | 6 +- unshackle/core/downloaders/requests.py | 272 ++++++---- unshackle/core/drm/clearkey.py | 7 +- unshackle/core/manifests/dash.py | 14 +- unshackle/core/manifests/hls.py | 41 +- unshackle/core/manifests/ism.py | 8 +- unshackle/core/manifests/m3u8.py | 4 +- unshackle/core/session.py | 693 +++++++++++++++++++------ unshackle/core/tracks/track.py | 18 +- uv.lock | 147 ++---- 11 files changed, 790 insertions(+), 424 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 42892af..74fa6c8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,7 +36,6 @@ dependencies = [ "jsonpickle>=3.0.4,<5", "langcodes>=3.4.0,<4", "lxml>=5.2.1,<7", - "pproxy>=2.7.9,<3", "protobuf>=4.25.3,<7", "pycaption>=2.2.6,<3", "pycryptodomex>=3.20.0,<4", @@ -55,9 +54,7 @@ dependencies = [ "Unidecode>=1.3.8,<2", "urllib3>=2.6.3,<3", "chardet>=5.2.0,<6", - "curl-cffi>=0.7.0b4,<0.14", "pyplayready>=0.8.3,<0.9", - "httpx>=0.28.1,<0.29", "cryptography>=45.0.0,<47", "subby", "aiohttp>=3.13.3,<4", @@ -68,6 +65,7 @@ dependencies = [ "language-data>=1.4.0", "wasmtime>=41.0.0", "animeapi-py>=0.6.0", + "rnet>=2.4.2", ] [project.urls] diff --git a/unshackle/commands/dl.py b/unshackle/commands/dl.py index 603c685..369d0be 100644 --- a/unshackle/commands/dl.py +++ b/unshackle/commands/dl.py @@ -2190,6 +2190,8 @@ class dl: BarColumn(), "•", TimeRemainingColumn(compact=True, elapsed_when_finished=True), + "•", + TextColumn("{task.fields[downloaded]}"), console=console, ) @@ -2215,7 +2217,7 @@ class dl: def enqueue_mux_tasks(task_description: str, base_tracks: Tracks) -> None: if merge_audio or not base_tracks.audio: - task_id = progress.add_task(f"{task_description}...", total=None, start=False) + task_id = progress.add_task(f"{task_description}...", total=None, start=False, downloaded="") multiplex_tasks.append((task_id, base_tracks, None)) return @@ -2228,7 +2230,7 @@ class dl: if audio_codec: description = f"{task_description} {audio_codec.name}" - task_id = progress.add_task(f"{description}...", total=None, start=False) + task_id = progress.add_task(f"{description}...", total=None, start=False, downloaded="") task_tracks = clone_tracks_for_audio(base_tracks, codec_audio_tracks) multiplex_tasks.append((task_id, task_tracks, audio_codec)) diff --git a/unshackle/core/downloaders/requests.py b/unshackle/core/downloaders/requests.py index 74732d7..fd23a41 100644 --- a/unshackle/core/downloaders/requests.py +++ b/unshackle/core/downloaders/requests.py @@ -1,7 +1,6 @@ import math import os import time -from collections import deque from concurrent.futures import FIRST_COMPLETED, wait from concurrent.futures.thread import ThreadPoolExecutor from http.cookiejar import CookieJar @@ -39,19 +38,24 @@ def _is_requests_session(session: Any) -> bool: return isinstance(session, Session) +def _is_rnet_session(session: Any) -> bool: + """Check if the session is an RnetSession (uses resp.stream()).""" + from unshackle.core.session import RnetSession + return isinstance(session, RnetSession) + + def download( url: str, save_path: Path, session: Optional[Any] = None, segmented: bool = False, - _speed_tracker: Optional[dict] = None, **kwargs: Any, ) -> Generator[dict[str, Any], None, None]: """ Download a file with optimized I/O. - Supports both requests.Session and curl_cffi CurlSession for TLS fingerprinting. - Uses raw socket reads for requests.Session (30-35% faster) and iter_content for CurlSession. + Supports both requests.Session and RnetSession for TLS fingerprinting. + Uses raw socket reads for requests.Session and native rnet streaming for RnetSession. Yields the following download status updates while chunks are downloading: @@ -65,19 +69,15 @@ def download( url: Web URL of a file to download. save_path: The path to save the file to. If the save path's directory does not exist then it will be made automatically. - session: A requests.Session or curl_cffi CurlSession to make HTTP requests with. - CurlSession preserves TLS fingerprinting for services that need it. + session: A requests.Session or RnetSession to make HTTP requests with. + RnetSession preserves TLS fingerprinting for services that need it. segmented: If downloads are segments or parts of one bigger file. - _speed_tracker: Shared speed tracking state for this download batch (per-call, not global). kwargs: Any extra keyword arguments to pass to the session.get() call. Use this for one-time request changes like a header, cookie, or proxy. For example, to request Byte-ranges use e.g., `headers={"Range": "bytes=0-128"}`. """ session = session or Session() - if _speed_tracker is None: - _speed_tracker = {"sizes": deque(), "last_refresh": time.time()} - save_dir = save_path.parent control_file = save_path.with_name(f"{save_path.name}.!dev") @@ -100,22 +100,26 @@ def download( last_speed_refresh = _time() try: + use_rnet = _is_rnet_session(session) stream = session.get(url, stream=True, **kwargs) stream.raise_for_status() # Determine content length and adaptive chunk size - try: - content_length = int(stream.headers.get("Content-Length", "0")) - if stream.headers.get("Content-Encoding", "").lower() in ["gzip", "deflate", "br"]: + if use_rnet: + content_length = stream.content_length or 0 + else: + try: + content_length = int(stream.headers.get("Content-Length", "0")) + if stream.headers.get("Content-Encoding", "").lower() in ["gzip", "deflate", "br"]: + content_length = 0 + except ValueError: content_length = 0 - except ValueError: - content_length = 0 chunk_size = _adaptive_chunk_size(content_length) if not segmented: if content_length > 0: - yield dict(total=math.ceil(content_length / chunk_size)) + yield dict(total=content_length) else: yield dict(total=None) @@ -128,8 +132,12 @@ def download( # Cache f.write for hot loop _write = f.write - # Build chunk iterator — raw reads for requests.Session, iter_content for CurlSession - if use_raw: + # Build chunk iterator based on session type + if use_rnet: + # rnet: native Rust streaming — 3.5x faster than curl_cffi (benchmarked) + chunks = stream.stream() + elif use_raw: + # requests.Session: raw socket read — 30-35% faster than iter_content stream.raw.decode_content = False _read = stream.raw.read @@ -143,6 +151,7 @@ def download( chunks = _chunks() else: + # Fallback: iter_content def _chunks_iter() -> Generator[bytes, None, None]: yield from stream.iter_content(chunk_size=chunk_size) stream.close() @@ -151,22 +160,31 @@ def download( # Unified write + progress loop _data_accumulated = 0 + _bytes_since_yield = 0 for chunk in chunks: + if DOWNLOAD_CANCELLED.is_set(): + break _write(chunk) download_size = len(chunk) written += download_size if not segmented: - yield dict(advance=1) + _bytes_since_yield += download_size + _data_accumulated += download_size now = _time() time_since = now - last_speed_refresh - _data_accumulated += download_size - if time_since > PROGRESS_WINDOW or download_size < chunk_size: + if time_since > PROGRESS_WINDOW: + yield dict(advance=_bytes_since_yield) + _bytes_since_yield = 0 download_speed = math.ceil(_data_accumulated / (time_since or 1)) yield dict(downloaded=f"{filesize.decimal(download_speed)}/s") last_speed_refresh = now _data_accumulated = 0 + # Flush any remaining bytes + if not segmented and _bytes_since_yield > 0: + yield dict(advance=_bytes_since_yield) + # Truncate to actual written size in case pre-allocation overshot if content_length > 0 and written != content_length: f.truncate(written) @@ -178,21 +196,6 @@ def download( if segmented: yield dict(advance=1) - now = _time() - sizes = _speed_tracker["sizes"] - if written: - sizes.append((now, written)) - cutoff = now - SPEED_ROLLING_WINDOW - while sizes and sizes[0][0] < cutoff: - sizes.popleft() - time_since = now - _speed_tracker["last_refresh"] - if sizes and time_since > PROGRESS_WINDOW: - window_start = sizes[0][0] - window_duration = now - window_start - data_size = sum(size for _, size in sizes) - download_speed = math.ceil(data_size / (window_duration or 1)) - yield dict(downloaded=f"{filesize.decimal(download_speed)}/s") - _speed_tracker["last_refresh"] = now break except Exception as e: save_path.unlink(missing_ok=True) @@ -217,7 +220,7 @@ def requests( """ Download files with optimized I/O and adaptive chunk sizing. - Supports both requests.Session and curl_cffi CurlSession. When a CurlSession is + Supports both requests.Session and RnetSession. When a RnetSession is provided (e.g. from a service's get_session()), TLS fingerprinting is preserved on all segment downloads. @@ -245,7 +248,7 @@ def requests( proxy: An optional proxy URI to route connections through for all downloads. max_workers: The maximum amount of threads to use for downloads. Defaults to min(12,(cpu_count+4)). - session: An optional requests.Session or curl_cffi CurlSession to use. If provided, + session: An optional requests.Session or RnetSession to use. If provided, it will be used directly (preserving TLS fingerprinting). If None, a new requests.Session with HTTPAdapter connection pooling will be created. """ @@ -293,7 +296,7 @@ def requests( ] # Use provided session or create a new optimized requests.Session - # When a session is provided (e.g., service's CurlSession), don't mutate headers/cookies/proxy — + # When a session is provided (e.g., service's RnetSession), don't mutate headers/cookies/proxy — # they're already set and the session may be shared across tracks. if session is None: session = Session() @@ -331,93 +334,142 @@ def requests( ) segmented_batch = len(urls) > 1 - if segmented_batch: - yield dict(total=len(urls)) - # Per-call speed tracker — shared across threads within this call only - speed_tracker: dict[str, Any] = {"sizes": deque(), "last_refresh": time.time()} - - try: - # Fast path: single URL — no thread pool overhead - if len(urls) == 1: + # Fast path: single URL — no thread pool overhead + if len(urls) == 1: + try: yield from download( session=session, segmented=segmented_batch, - _speed_tracker=speed_tracker, **urls[0], ) - else: - with ThreadPoolExecutor(max_workers=max_workers) as pool: - event_queue: Queue[dict[str, Any]] = Queue() + except KeyboardInterrupt: + DOWNLOAD_CANCELLED.set() + yield dict(downloaded="[yellow]CANCELLED") + raise + else: + # Segmented download with thread pool + # Speed is tracked here on the main thread, not in workers + total_bytes = 0 + start_time = time.time() + last_speed_report = start_time - def _download_worker(url_item: dict[str, Any]) -> None: - for event in download( - session=session, - segmented=segmented_batch, - _speed_tracker=speed_tracker, - **url_item, - ): - event_queue.put(event) + pool = ThreadPoolExecutor(max_workers=max_workers) + event_queue: Queue[dict[str, Any]] = Queue() - futures = [pool.submit(_download_worker, url) for url in urls] - pending = set(futures) + def _download_worker(url_item: dict[str, Any]) -> None: + for event in download( + session=session, + segmented=segmented_batch, + **url_item, + ): + event_queue.put(event) - while pending: - # Drain queued progress updates for responsive UI - while True: - try: - yield event_queue.get_nowait() - except Empty: - break + futures = [pool.submit(_download_worker, url) for url in urls] + pending = set(futures) - # Wait efficiently for next future completion (OS condition variable) - completed, pending = wait(pending, timeout=0.1, return_when=FIRST_COMPLETED) - for future in completed: - exc = future.exception() - if isinstance(exc, KeyboardInterrupt): - DOWNLOAD_CANCELLED.set() - yield dict(downloaded="[yellow]CANCELLING") - pool.shutdown(wait=True, cancel_futures=True) - yield dict(downloaded="[yellow]CANCELLED") - raise exc - elif exc: - DOWNLOAD_CANCELLED.set() - yield dict(downloaded="[red]FAILING") - pool.shutdown(wait=True, cancel_futures=True) - yield dict(downloaded="[red]FAILED") - if debug_logger: - debug_logger.log( - level="ERROR", - operation="downloader_failed", - message=f"Download failed: {exc}", - error=exc, - context={ - "url_count": len(urls), - "output_dir": str(output_dir), - }, - ) - raise exc + pending_advance = 0 - # Drain any remaining events from workers that just finished + try: + while pending: + # Drain queued events — batch advances, track bytes for speed while True: try: - yield event_queue.get_nowait() + event = event_queue.get_nowait() except Empty: break + # Accumulate advance events for batched yield + advance = event.get("advance") + if advance: + pending_advance += advance + continue + # Track bytes from completed segments for speed calculation + written = event.get("written") + if written: + total_bytes += written + # Pass through other events (file_downloaded, total, etc.) + yield event - if debug_logger: - debug_logger.log( - level="DEBUG", - operation="downloader_complete", - message="Download completed successfully", - context={ - "url_count": len(urls), - "output_dir": str(output_dir), - "filename": filename, - }, - ) - finally: - speed_tracker["sizes"].clear() + # Yield batched advances every drain cycle for responsive progress bar + if pending_advance > 0: + yield dict(advance=pending_advance) + pending_advance = 0 + + # Yield speed every 0.5s (throttled to avoid spamming Rich) + now = time.time() + if now - last_speed_report > 0.5 and total_bytes > 0: + elapsed = now - start_time + if elapsed > 0: + download_speed = math.ceil(total_bytes / elapsed) + yield dict(downloaded=f"{filesize.decimal(download_speed)}/s") + last_speed_report = now + + # Wait efficiently for next future completion (OS condition variable) + completed, pending = wait(pending, timeout=0.1, return_when=FIRST_COMPLETED) + for future in completed: + exc = future.exception() + if isinstance(exc, KeyboardInterrupt): + raise KeyboardInterrupt() + elif exc: + DOWNLOAD_CANCELLED.set() + yield dict(downloaded="[red]FAILING") + pool.shutdown(wait=False, cancel_futures=True) + yield dict(downloaded="[red]FAILED") + if debug_logger: + debug_logger.log( + level="ERROR", + operation="downloader_failed", + message=f"Download failed: {exc}", + error=exc, + context={ + "url_count": len(urls), + "output_dir": str(output_dir), + }, + ) + raise exc + except KeyboardInterrupt: + DOWNLOAD_CANCELLED.set() + yield dict(downloaded="[yellow]CANCELLING") + pool.shutdown(wait=False, cancel_futures=True) + yield dict(downloaded="[yellow]CANCELLED") + raise + finally: + pool.shutdown(wait=False, cancel_futures=True) + + # Drain remaining events + while True: + try: + event = event_queue.get_nowait() + except Empty: + break + advance = event.get("advance") + if advance: + pending_advance += advance + continue + written = event.get("written") + if written: + total_bytes += written + yield event + + # Flush remaining advances and final speed + if pending_advance > 0: + yield dict(advance=pending_advance) + elapsed = time.time() - start_time + if elapsed > 0 and total_bytes > 0: + download_speed = math.ceil(total_bytes / elapsed) + yield dict(downloaded=f"{filesize.decimal(download_speed)}/s") + + if debug_logger: + debug_logger.log( + level="DEBUG", + operation="downloader_complete", + message="Download completed successfully", + context={ + "url_count": len(urls), + "output_dir": str(output_dir), + "filename": filename, + }, + ) __all__ = ("requests",) diff --git a/unshackle/core/drm/clearkey.py b/unshackle/core/drm/clearkey.py index 089fa71..c1c93e4 100644 --- a/unshackle/core/drm/clearkey.py +++ b/unshackle/core/drm/clearkey.py @@ -8,10 +8,11 @@ from urllib.parse import urljoin from Cryptodome.Cipher import AES from Cryptodome.Util.Padding import unpad -from curl_cffi.requests import Session as CurlSession from m3u8.model import Key from requests import Session +from unshackle.core.session import RnetSession + class ClearKey: """AES Clear Key DRM System.""" @@ -70,8 +71,8 @@ class ClearKey: """ if not isinstance(m3u_key, Key): raise ValueError(f"Provided M3U Key is in an unexpected type {m3u_key!r}") - if not isinstance(session, (Session, CurlSession, type(None))): - raise TypeError(f"Expected session to be a {Session} or {CurlSession}, not a {type(session)}") + if not isinstance(session, (Session, RnetSession, type(None))): + raise TypeError(f"Expected session to be a {Session} or {RnetSession}, not a {type(session)}") if not m3u_key.method.startswith("AES"): raise ValueError(f"Provided M3U Key is not an AES Clear Key, {m3u_key.method}") diff --git a/unshackle/core/manifests/dash.py b/unshackle/core/manifests/dash.py index 3ee61fd..a09d705 100644 --- a/unshackle/core/manifests/dash.py +++ b/unshackle/core/manifests/dash.py @@ -16,7 +16,6 @@ from uuid import UUID from zlib import crc32 import requests -from curl_cffi.requests import Session as CurlSession from langcodes import Language, tag_is_valid from lxml.etree import Element, ElementTree from pyplayready.system.pssh import PSSH as PR_PSSH @@ -28,6 +27,7 @@ from unshackle.core.cdm.detect import is_playready_cdm from unshackle.core.constants import DOWNLOAD_CANCELLED, DOWNLOAD_LICENCE_ONLY, AnyTrack from unshackle.core.drm import DRM_T, PlayReady, Widevine from unshackle.core.events import events +from unshackle.core.session import RnetSession from unshackle.core.tracks import Audio, Subtitle, Tracks, Video from unshackle.core.utilities import get_debug_logger, is_close_match, try_ensure_utf8 from unshackle.core.utils.xml import load_xml @@ -49,7 +49,7 @@ class DASH: self.url = url @classmethod - def from_url(cls, url: str, session: Optional[Union[Session, CurlSession]] = None, **args: Any) -> DASH: + def from_url(cls, url: str, session: Optional[Union[Session, RnetSession]] = None, **args: Any) -> DASH: if not url: raise requests.URLRequired("DASH manifest URL must be provided for relative path computations.") if not isinstance(url, str): @@ -57,8 +57,8 @@ class DASH: if not session: session = Session() - elif not isinstance(session, (Session, CurlSession)): - raise TypeError(f"Expected session to be a {Session} or {CurlSession}, not {session!r}") + elif not isinstance(session, (Session, RnetSession)): + raise TypeError(f"Expected session to be a {Session} or {RnetSession}, not {session!r}") res = session.get(url, **args) if res.url != url: @@ -264,8 +264,8 @@ class DASH: ): if not session: session = Session() - elif not isinstance(session, (Session, CurlSession)): - raise TypeError(f"Expected session to be a {Session} or {CurlSession}, not {session!r}") + elif not isinstance(session, (Session, RnetSession)): + raise TypeError(f"Expected session to be a {Session} or {RnetSession}, not {session!r}") if proxy: session.proxies.update({"all": proxy}) @@ -589,7 +589,7 @@ class DASH: manifest: ElementTree, track: AnyTrack, track_url: str, - session: Union[Session, CurlSession], + session: Union[Session, RnetSession], ) -> tuple[ Optional[bytes], list[tuple[str, Optional[str]]], diff --git a/unshackle/core/manifests/hls.py b/unshackle/core/manifests/hls.py index 7c6ee63..e5344e7 100644 --- a/unshackle/core/manifests/hls.py +++ b/unshackle/core/manifests/hls.py @@ -17,8 +17,6 @@ from zlib import crc32 import m3u8 import requests -from curl_cffi.requests import Response as CurlResponse -from curl_cffi.requests import Session as CurlSession from langcodes import Language, tag_is_valid from m3u8 import M3U8 from pyplayready.cdm import Cdm as PlayReadyCdm @@ -32,12 +30,13 @@ from unshackle.core.cdm.detect import is_playready_cdm, is_widevine_cdm from unshackle.core.constants import DOWNLOAD_CANCELLED, DOWNLOAD_LICENCE_ONLY, AnyTrack from unshackle.core.drm import DRM_T, ClearKey, MonaLisa, PlayReady, Widevine from unshackle.core.events import events +from unshackle.core.session import RnetResponse, RnetSession from unshackle.core.tracks import Audio, Subtitle, Tracks, Video from unshackle.core.utilities import get_debug_logger, get_extension, is_close_match, try_ensure_utf8 class HLS: - def __init__(self, manifest: M3U8, session: Optional[Union[Session, CurlSession]] = None): + def __init__(self, manifest: M3U8, session: Optional[Union[Session, RnetSession]] = None): if not manifest: raise ValueError("HLS manifest must be provided.") if not isinstance(manifest, M3U8): @@ -49,7 +48,7 @@ class HLS: self.session = session or Session() @classmethod - def from_url(cls, url: str, session: Optional[Union[Session, CurlSession]] = None, **args: Any) -> HLS: + def from_url(cls, url: str, session: Optional[Union[Session, RnetSession]] = None, **args: Any) -> HLS: if not url: raise requests.URLRequired("HLS manifest URL must be provided.") if not isinstance(url, str): @@ -57,22 +56,22 @@ class HLS: if not session: session = Session() - elif not isinstance(session, (Session, CurlSession)): - raise TypeError(f"Expected session to be a {Session} or {CurlSession}, not {session!r}") + elif not isinstance(session, (Session, RnetSession)): + raise TypeError(f"Expected session to be a {Session} or {RnetSession}, not {session!r}") res = session.get(url, **args) - # Handle requests and curl_cffi response objects + # Handle requests and rnet response objects if isinstance(res, requests.Response): if not res.ok: raise requests.ConnectionError("Failed to request the M3U(8) document.", response=res) content = res.text - elif isinstance(res, CurlResponse): + elif isinstance(res, RnetResponse): if not res.ok: raise requests.ConnectionError("Failed to request the M3U(8) document.", response=res) content = res.text else: - raise TypeError(f"Expected response to be a requests.Response or curl_cffi.Response, not {type(res)}") + raise TypeError(f"Expected response to be a requests.Response or rnet.Response, not {type(res)}") master = m3u8.loads(content, uri=url) @@ -281,7 +280,7 @@ class HLS: save_path: Path, save_dir: Path, progress: partial, - session: Optional[Union[Session, CurlSession]] = None, + session: Optional[Union[Session, RnetSession]] = None, proxy: Optional[str] = None, max_workers: Optional[int] = None, license_widevine: Optional[Callable] = None, @@ -290,8 +289,8 @@ class HLS: ) -> None: if not session: session = Session() - elif not isinstance(session, (Session, CurlSession)): - raise TypeError(f"Expected session to be a {Session} or {CurlSession}, not {session!r}") + elif not isinstance(session, (Session, RnetSession)): + raise TypeError(f"Expected session to be a {Session} or {RnetSession}, not {session!r}") if proxy: # Handle proxies differently based on session type @@ -305,14 +304,14 @@ class HLS: else: # Get the playlist text and handle both session types response = session.get(track.url) - if isinstance(response, requests.Response) or isinstance(response, CurlResponse): + if isinstance(response, requests.Response) or isinstance(response, RnetResponse): if not response.ok: log.error(f"Failed to request the invariant M3U8 playlist: {response.status_code}") sys.exit(1) playlist_text = response.text else: raise TypeError( - f"Expected response to be a requests.Response or curl_cffi.Response, not {type(response)}" + f"Expected response to be a requests.Response or rnet.Response, not {type(response)}" ) master = m3u8.loads(playlist_text, uri=track.url) @@ -613,12 +612,12 @@ class HLS: ) # Check response based on session type - if isinstance(res, requests.Response) or isinstance(res, CurlResponse): + if isinstance(res, requests.Response) or isinstance(res, RnetResponse): res.raise_for_status() init_content = res.content else: raise TypeError( - f"Expected response to be requests.Response or curl_cffi.Response, not {type(res)}" + f"Expected response to be requests.Response or rnet.Response, not {type(res)}" ) map_data = (segment.init_section, init_content) @@ -832,7 +831,7 @@ class HLS: @staticmethod def parse_session_data_keys( - manifest: M3U8, session: Optional[Union[Session, CurlSession]] = None + manifest: M3U8, session: Optional[Union[Session, RnetSession]] = None ) -> list[m3u8.model.Key]: """Parse `com.apple.hls.keys` session data and return Key objects.""" keys: list[m3u8.model.Key] = [] @@ -907,7 +906,7 @@ class HLS: def get_track_kid_from_init( master: M3U8, track: AnyTrack, - session: Union[Session, CurlSession], + session: Union[Session, RnetSession], ) -> Optional[UUID]: """ Extract the track's Key ID from its init segment (EXT-X-MAP). @@ -974,7 +973,7 @@ class HLS: @staticmethod def get_drm( key: Union[m3u8.model.SessionKey, m3u8.model.Key], - session: Optional[Union[Session, CurlSession]] = None, + session: Optional[Union[Session, RnetSession]] = None, ) -> DRM_T: """ Convert HLS EXT-X-KEY data to an initialized DRM object. @@ -986,8 +985,8 @@ class HLS: Raises a NotImplementedError if the key system is not supported. """ - if not isinstance(session, (Session, CurlSession, type(None))): - raise TypeError(f"Expected session to be a {Session} or {CurlSession}, not {type(session)}") + if not isinstance(session, (Session, RnetSession, type(None))): + raise TypeError(f"Expected session to be a {Session} or {RnetSession}, not {type(session)}") if not session: session = Session() diff --git a/unshackle/core/manifests/ism.py b/unshackle/core/manifests/ism.py index 7f4d4a9..875fd5e 100644 --- a/unshackle/core/manifests/ism.py +++ b/unshackle/core/manifests/ism.py @@ -9,7 +9,6 @@ from pathlib import Path from typing import Any, Callable, Optional, Union import requests -from curl_cffi.requests import Session as CurlSession from langcodes import Language, tag_is_valid from lxml.etree import Element from pyplayready.system.pssh import PSSH as PR_PSSH @@ -19,6 +18,7 @@ from requests import Session from unshackle.core.constants import DOWNLOAD_CANCELLED, DOWNLOAD_LICENCE_ONLY, AnyTrack from unshackle.core.drm import DRM_T, PlayReady, Widevine from unshackle.core.events import events +from unshackle.core.session import RnetSession from unshackle.core.tracks import Audio, Subtitle, Track, Tracks, Video from unshackle.core.utilities import get_debug_logger, try_ensure_utf8 from unshackle.core.utils.xml import load_xml @@ -34,13 +34,13 @@ class ISM: self.url = url @classmethod - def from_url(cls, url: str, session: Optional[Union[Session, CurlSession]] = None, **kwargs: Any) -> "ISM": + def from_url(cls, url: str, session: Optional[Union[Session, RnetSession]] = None, **kwargs: Any) -> "ISM": if not url: raise requests.URLRequired("ISM manifest URL must be provided") if not session: session = Session() - elif not isinstance(session, (Session, CurlSession)): - raise TypeError(f"Expected session to be a {Session} or {CurlSession}, not {session!r}") + elif not isinstance(session, (Session, RnetSession)): + raise TypeError(f"Expected session to be a {Session} or {RnetSession}, not {session!r}") res = session.get(url, **kwargs) if res.url != url: url = res.url diff --git a/unshackle/core/manifests/m3u8.py b/unshackle/core/manifests/m3u8.py index 761d73c..2f39f84 100644 --- a/unshackle/core/manifests/m3u8.py +++ b/unshackle/core/manifests/m3u8.py @@ -5,10 +5,10 @@ from __future__ import annotations from typing import Optional, Union import m3u8 -from curl_cffi.requests import Session as CurlSession from requests import Session from unshackle.core.manifests.hls import HLS +from unshackle.core.session import RnetSession from unshackle.core.tracks import Tracks @@ -16,7 +16,7 @@ def parse( master: m3u8.M3U8, language: str, *, - session: Optional[Union[Session, CurlSession]] = None, + session: Optional[Union[Session, RnetSession]] = None, ) -> Tracks: """Parse a variant playlist to ``Tracks`` with basic information, defer DRM loading.""" tracks = HLS(master, session=session).to_tracks(language) diff --git a/unshackle/core/session.py b/unshackle/core/session.py index 974dd95..bf49286 100644 --- a/unshackle/core/session.py +++ b/unshackle/core/session.py @@ -1,96 +1,452 @@ -"""Session utilities for creating HTTP sessions with different backends.""" +"""Session utilities for creating HTTP sessions with TLS fingerprinting via rnet (Rust/BoringSSL).""" from __future__ import annotations +import http import logging import random import time -import warnings +from collections.abc import Iterator, MutableMapping from datetime import datetime, timezone from email.utils import parsedate_to_datetime -from typing import Any -from urllib.parse import urlparse +from http.cookiejar import CookieJar +from typing import Any, Optional +from urllib.parse import urlencode, urlparse, urlunparse -from curl_cffi.requests import Response, Session, exceptions +import rnet +from requests import HTTPError, Request +from requests.structures import CaseInsensitiveDict from unshackle.core.config import config -# Globally suppress curl_cffi HTTPS proxy warnings since some proxy providers -# (like NordVPN) require HTTPS URLs but curl_cffi expects HTTP format -warnings.filterwarnings( - "ignore", message="Make sure you are using https over https proxy.*", category=RuntimeWarning, module="curl_cffi.*" -) +# --------------------------------------------------------------------------- +# Impersonate preset mapping — rnet uses named presets (no custom JA3/Akamai) +# --------------------------------------------------------------------------- -FINGERPRINT_PRESETS = { - "okhttp4": { - "ja3": ( - "771," # TLS 1.2 - "4865-4866-4867-49195-49196-52393-49199-49200-52392-49171-49172-156-157-47-53," # Ciphers - "0-23-65281-10-11-35-16-5-13-51-45-43," # Extensions - "29-23-24," # Named groups (x25519, secp256r1, secp384r1) - "0" # EC point formats - ), - "akamai": "4:16777216|16711681|0|m,p,a,s", - "description": "OkHttp 3.x/4.x (BoringSSL TLS stack)", - }, - "okhttp5": { - "ja3": ( - "771," # TLS 1.2 - "4865-4866-4867-49195-49199-49196-49200-52393-52392-49171-49172-156-157-47-53," # Ciphers - "0-23-65281-10-11-35-16-5-13-51-45-43," # Extensions - "29-23-24," # Named groups (x25519, secp256r1, secp384r1) - "0" # EC point formats - ), - "akamai": "4:16777216|16711681|0|m,p,a,s", - "description": "OkHttp 5.x (BoringSSL TLS stack)", - }, - "shield_okhttp": { - "ja3": ( - "771," # TLS 1.2 - "4865-4866-4867-49195-49199-49196-49200-52393-52392-49171-49172-156-157-47-53," # Ciphers (OkHttp 4.11) - "0-23-65281-10-11-35-16-5-13-51-45-43-21," # Extensions (incl padding ext 21) - "29-23-24," # Named groups (x25519, secp256r1, secp384r1) - "0" # EC point formats - ), - "akamai": "4:16777216|16711681|0|m,p,a,s", - "description": "NVIDIA SHIELD Android TV OkHttp 4.11 (captured JA3)", - }, +DEFAULT_IMPERSONATE = rnet.Impersonate.Chrome131 + + +def _resolve_impersonate(browser: str) -> rnet.Impersonate: + """Resolve a browser string to an rnet.Impersonate preset. + + Accepts exact rnet preset names (e.g. "Chrome131", "OkHttp4_12", "Edge101"). + See https://github.com/0x676e67/rnet for the full list of available presets. + """ + preset = getattr(rnet.Impersonate, browser, None) + if preset is not None: + return preset + raise ValueError( + f"Unknown impersonate preset: {browser!r}. " + f"Use exact rnet preset names like 'Chrome131', 'OkHttp4_12', 'Edge101'. " + f"See rnet.Impersonate for all available presets." + ) + +# Map string method names to rnet.Method enum +_METHOD_MAP: dict[str, rnet.Method] = { + "GET": rnet.Method.GET, + "POST": rnet.Method.POST, + "PUT": rnet.Method.PUT, + "DELETE": rnet.Method.DELETE, + "HEAD": rnet.Method.HEAD, + "OPTIONS": rnet.Method.OPTIONS, + "PATCH": rnet.Method.PATCH, + "TRACE": rnet.Method.TRACE, } -class MaxRetriesError(exceptions.RequestException): - def __init__(self, message, cause=None): +# --------------------------------------------------------------------------- +# Response headers adapter — bytes → str +# --------------------------------------------------------------------------- + + +class RnetResponseHeaders(MutableMapping): + """Read-only str-based view over rnet's bytes-based HeaderMap.""" + + def __init__(self, header_map: Any) -> None: + self._map = header_map + + def _decode(self, val: Any) -> str: + return val.decode("utf-8", errors="replace") if isinstance(val, (bytes, bytearray)) else str(val) + + def __getitem__(self, key: str) -> str: + val = self._map[key] + return self._decode(val) + + def __setitem__(self, key: str, value: str) -> None: + raise TypeError("Response headers are read-only") + + def __delitem__(self, key: str) -> None: + raise TypeError("Response headers are read-only") + + def __contains__(self, key: object) -> bool: + if not isinstance(key, str): + return False + return self._map.contains_key(key) + + def __iter__(self) -> Iterator[str]: + seen: set[str] = set() + for k, _ in self._map.items(): + dk = self._decode(k) + if dk not in seen: + seen.add(dk) + yield dk + + def __len__(self) -> int: + return self._map.keys_len() + + def get(self, key: str, default: Optional[str] = None) -> Optional[str]: + val = self._map.get(key) + if val is None: + return default + return self._decode(val) + + def items(self) -> list[tuple[str, str]]: + return [(self._decode(k), self._decode(v)) for k, v in self._map.items()] + + +# --------------------------------------------------------------------------- +# Response wrapper — requests-compatible interface +# --------------------------------------------------------------------------- + + +class RnetResponse: + """Wraps rnet.BlockingResponse with a requests-compatible API.""" + + def __init__(self, resp: Any) -> None: + self._resp = resp + self._headers: Optional[RnetResponseHeaders] = None + self._content: Optional[bytes] = None + self._text: Optional[str] = None + self._streamed = False + + @property + def status_code(self) -> int: + return int(str(self._resp.status_code)) + + @property + def ok(self) -> bool: + return self._resp.ok + + @property + def headers(self) -> RnetResponseHeaders: + if self._headers is None: + self._headers = RnetResponseHeaders(self._resp.headers) + return self._headers + + @property + def url(self) -> str: + return str(self._resp.url) + + @property + def content_length(self) -> Optional[int]: + return self._resp.content_length + + @property + def content(self) -> bytes: + if self._content is None: + self._content = self._resp.bytes() + return self._content + + @property + def text(self) -> str: + if self._text is None: + encoding = self._resp.encoding or "utf-8" + self._text = self.content.decode(encoding, errors="replace") + return self._text + + @property + def reason(self) -> str: + try: + return http.HTTPStatus(self.status_code).phrase + except ValueError: + return "Unknown" + + @property + def cookies(self) -> Any: + return self._resp.cookies + + def json(self, **kwargs: Any) -> Any: + import json as _json + return _json.loads(self.content) + + def raise_for_status(self) -> None: + if not self.ok: + raise HTTPError( + f"{self.status_code} {self.reason}: {self.url}", + response=self, + ) + + def iter_content(self, chunk_size: Optional[int] = None) -> Iterator[bytes]: + """Re-chunk rnet's variable-size stream into fixed-size pieces.""" + self._streamed = True + if chunk_size is None or chunk_size <= 0: + yield from self._resp.stream() + return + + buf = bytearray() + for chunk in self._resp.stream(): + buf.extend(chunk) + while len(buf) >= chunk_size: + yield bytes(buf[:chunk_size]) + buf = buf[chunk_size:] + if buf: + yield bytes(buf) + + def stream(self) -> Iterator[bytes]: + """Direct pass-through of rnet's native stream iterator.""" + self._streamed = True + yield from self._resp.stream() + + def close(self) -> None: + try: + self._resp.close() + except Exception: + pass + + +# --------------------------------------------------------------------------- +# Session headers adapter — persists via client.update() +# --------------------------------------------------------------------------- + + +class RnetSessionHeaders(CaseInsensitiveDict): + """Dict-like headers that persist to the rnet client via update().""" + + def __init__(self, client: Any) -> None: + self._client = client + super().__init__() + + def _sync(self) -> None: + """Push current headers to the rnet client.""" + if hasattr(self, "_store") and self._store: + self._client.update(headers={k: v for k, v in self.items()}) + + def __setitem__(self, key: str, value: str) -> None: + super().__setitem__(key, value) + self._sync() + + def update(self, __m: Any = None, **kwargs: Any) -> None: + if __m: + if hasattr(__m, "items"): + for k, v in __m.items(): + super().__setitem__(k, v) + else: + for k, v in __m: + super().__setitem__(k, v) + for k, v in kwargs.items(): + super().__setitem__(k, v) + self._sync() + + def pop(self, key: str, *args: Any) -> Any: + result = super().pop(key, *args) + # rnet doesn't support removing individual headers, but we track locally + # and always send the full set on next update + return result + + def __delitem__(self, key: str) -> None: + super().__delitem__(key) + + +# --------------------------------------------------------------------------- +# Session cookies adapter +# --------------------------------------------------------------------------- + + +class RnetCookieAdapter(MutableMapping): + """Cookie adapter that bridges requests-style cookie access to rnet.""" + + def __init__(self, client: Any) -> None: + self._client = client + self._cookies: dict[str, dict[str, str]] = {} # {domain: {name: value}} + self._flat: dict[str, str] = {} # flat name→value for simple access + + def update(self, other: Any = None, **kwargs: Any) -> None: + if other is None: + other = {} + if isinstance(other, CookieJar): + for cookie in other: + domain = cookie.domain or "" + name = cookie.name + value = cookie.value or "" + self._flat[name] = value + self._cookies.setdefault(domain, {})[name] = value + try: + url = f"https://{domain.lstrip('.')}" if domain else "https://localhost" + self._client.set_cookie(url, rnet.Cookie(name, value)) + except Exception: + pass + elif isinstance(other, dict): + for name, value in other.items(): + self._flat[name] = value + self._client.set_cookie("https://localhost", rnet.Cookie(name, str(value))) + self._flat.update(other) + elif hasattr(other, "items"): + for name, value in other.items(): + self._flat[name] = str(value) + self._client.set_cookie("https://localhost", rnet.Cookie(name, str(value))) + + for name, value in kwargs.items(): + self._flat[name] = value + self._client.set_cookie("https://localhost", rnet.Cookie(name, value)) + + def get(self, name: str, default: Optional[str] = None, domain: Optional[str] = None, + path: Optional[str] = None) -> Optional[str]: + if domain and domain in self._cookies: + return self._cookies[domain].get(name, default) + return self._flat.get(name, default) + + def set(self, name: str, value: str, domain: str = "localhost") -> None: + self._flat[name] = value + self._cookies.setdefault(domain, {})[name] = value + url = f"https://{domain.lstrip('.')}" + self._client.set_cookie(url, rnet.Cookie(name, value)) + + def __getitem__(self, name: str) -> str: + return self._flat[name] + + def __setitem__(self, name: str, value: str) -> None: + self.set(name, value) + + def __delitem__(self, name: str) -> None: + self._flat.pop(name, None) + for domain_cookies in self._cookies.values(): + domain_cookies.pop(name, None) + + def __contains__(self, name: object) -> bool: + return name in self._flat + + def __iter__(self) -> Iterator: + return iter(self._flat) + + def __len__(self) -> int: + return len(self._flat) + + def __bool__(self) -> bool: + return bool(self._flat) + + def items(self) -> list[tuple[str, str]]: + return list(self._flat.items()) + + def keys(self) -> list[str]: + return list(self._flat.keys()) + + def values(self) -> list[str]: + return list(self._flat.values()) + + +# --------------------------------------------------------------------------- +# Session proxy adapter +# --------------------------------------------------------------------------- + + +class RnetProxyDict(dict): + """Dict-like proxy config that syncs to the rnet client.""" + + def __init__(self, client: Any) -> None: + super().__init__() + self._client = client + + def _sync(self) -> None: + proxy = self.get("all") or self.get("https") or self.get("http") + if proxy: + self._client.update(proxy=proxy) + + def update(self, __m: Any = None, **kwargs: Any) -> None: + super().update(__m or {}, **kwargs) + self._sync() + + def __setitem__(self, key: str, value: str) -> None: + super().__setitem__(key, value) + self._sync() + + +# --------------------------------------------------------------------------- +# Exceptions +# --------------------------------------------------------------------------- + + +class MaxRetriesError(Exception): + def __init__(self, message: str, cause: Optional[Exception] = None) -> None: super().__init__(message) self.__cause__ = cause -class CurlSession(Session): +# --------------------------------------------------------------------------- +# RnetSession — main session class +# --------------------------------------------------------------------------- + + +class RnetSession: + """ + TLS-fingerprinted HTTP session powered by rnet (Rust/BoringSSL). + + Drop-in replacement for CurlSession with requests-compatible API. + Supports browser impersonation (Chrome, Firefox, Edge, Safari, OkHttp), + retry with exponential backoff, cookie persistence, and proxy support. + """ + def __init__( self, max_retries: int = 5, backoff_factor: float = 0.2, max_backoff: float = 60.0, - status_forcelist: list[int] | None = None, - allowed_methods: set[str] | None = None, - catch_exceptions: tuple[type[Exception], ...] | None = None, + status_forcelist: Optional[list[int]] = None, + allowed_methods: Optional[set[str]] = None, + catch_exceptions: Optional[tuple[type[Exception], ...]] = None, **session_kwargs: Any, - ): - super().__init__(**session_kwargs) - + ) -> None: + # Extract retry config before passing to rnet self.max_retries = max_retries self.backoff_factor = backoff_factor self.max_backoff = max_backoff self.status_forcelist = status_forcelist or [429, 500, 502, 503, 504] self.allowed_methods = allowed_methods or {"GET", "POST", "HEAD", "OPTIONS", "PUT", "DELETE", "TRACE"} self.catch_exceptions = catch_exceptions or ( - exceptions.ConnectionError, - exceptions.ProxyError, - exceptions.SSLError, - exceptions.Timeout, + rnet.ConnectionError, + rnet.TimeoutError, + rnet.RequestError, ) self.log = logging.getLogger(self.__class__.__name__) - def get_sleep_time(self, response: Response | None, attempt: int) -> float | None: + # Extract rnet-compatible kwargs + client_kwargs: dict[str, Any] = {} + for key in ("impersonate", "timeout", "proxy", "verify", "redirect"): + if key in session_kwargs: + client_kwargs[key] = session_kwargs.pop(key) + + # Always enable cookie store + client_kwargs["cookie_store"] = True + + # Handle verify=False + self.verify: bool = client_kwargs.pop("verify", True) + if not self.verify: + client_kwargs["danger_accept_invalid_certs"] = True + + self._client = rnet.BlockingClient(**client_kwargs) + + # Set up attribute adapters + self.headers = RnetSessionHeaders(self._client) + self.cookies = RnetCookieAdapter(self._client) + self.proxies = RnetProxyDict(self._client) + + # Handle initial headers/cookies/proxies from kwargs + if "headers" in session_kwargs: + self.headers.update(session_kwargs.pop("headers")) + if "cookies" in session_kwargs: + self.cookies.update(session_kwargs.pop("cookies")) + if "proxies" in session_kwargs: + self.proxies.update(session_kwargs.pop("proxies")) + + def _build_url(self, url: str, params: Optional[dict] = None) -> str: + """URL-encode params dict into the URL (rnet ignores params kwarg).""" + if not params: + return url + parsed = urlparse(url) + separator = "&" if parsed.query else "" + query = parsed.query + separator + urlencode(params, doseq=True) if parsed.query else urlencode(params, doseq=True) + return urlunparse(parsed._replace(query=query)) + + def get_sleep_time(self, response: Optional[RnetResponse], attempt: int) -> Optional[float]: if response: retry_after = response.headers.get("Retry-After") if retry_after: @@ -108,19 +464,42 @@ class CurlSession(Session): sleep_time = backoff_value + random.uniform(-jitter, jitter) return min(sleep_time, self.max_backoff) - def request(self, method: str, url: str, **kwargs: Any) -> Response: - if method.upper() not in self.allowed_methods: - return super().request(method, url, **kwargs) + def request(self, method: str, url: str, **kwargs: Any) -> RnetResponse: + method_upper = method.upper() if isinstance(method, str) else str(method).upper() - last_exception = None - response = None + # Build URL with params + url = self._build_url(url, kwargs.pop("params", None)) + + # Default allow_redirects=True + kwargs.setdefault("allow_redirects", True) + + # Pass verify setting + if not self.verify: + kwargs.setdefault("verify", False) + + # Remove kwargs rnet doesn't understand + kwargs.pop("stream", None) # rnet responses are always lazy + + # Resolve method enum + rnet_method = _METHOD_MAP.get(method_upper) + if rnet_method is None: + raise ValueError(f"Unsupported HTTP method: {method}") + + # Skip retry for non-allowed methods + if method_upper not in self.allowed_methods: + raw_resp = self._client.request(rnet_method, url, **kwargs) + return RnetResponse(raw_resp) + + last_exception: Optional[Exception] = None + response: Optional[RnetResponse] = None for attempt in range(self.max_retries + 1): try: - response = super().request(method, url, **kwargs) + raw_resp = self._client.request(rnet_method, url, **kwargs) + response = RnetResponse(raw_resp) if response.status_code not in self.status_forcelist: return response - last_exception = exceptions.HTTPError(f"Received status code: {response.status_code}") + last_exception = HTTPError(f"Received status code: {response.status_code}") self.log.warning( f"{response.status_code} {response.reason}({urlparse(url).path}). Retrying... " f"({attempt + 1}/{self.max_retries})" @@ -142,120 +521,100 @@ class CurlSession(Session): raise MaxRetriesError(f"Max retries exceeded for {method} {url}", cause=last_exception) + def get(self, url: str, **kwargs: Any) -> RnetResponse: + return self.request("GET", url, **kwargs) + + def post(self, url: str, **kwargs: Any) -> RnetResponse: + return self.request("POST", url, **kwargs) + + def put(self, url: str, **kwargs: Any) -> RnetResponse: + return self.request("PUT", url, **kwargs) + + def delete(self, url: str, **kwargs: Any) -> RnetResponse: + return self.request("DELETE", url, **kwargs) + + def head(self, url: str, **kwargs: Any) -> RnetResponse: + return self.request("HEAD", url, **kwargs) + + def options(self, url: str, **kwargs: Any) -> RnetResponse: + return self.request("OPTIONS", url, **kwargs) + + def patch(self, url: str, **kwargs: Any) -> RnetResponse: + return self.request("PATCH", url, **kwargs) + + def prepare_request(self, req: Request) -> Request: + """Compatibility shim for services using prepared requests.""" + # Merge session headers into request headers + if req.headers: + merged = dict(self.headers) + merged.update(req.headers) + req.headers = merged + else: + req.headers = dict(self.headers) + return req + + def send(self, req: Request, **kwargs: Any) -> RnetResponse: + """Compatibility shim for services using prepared requests.""" + method = req.method or "GET" + url = req.url or "" + + send_kwargs: dict[str, Any] = {} + if req.headers: + send_kwargs["headers"] = dict(req.headers) + if req.body: + send_kwargs["data"] = req.body + if req.json: + send_kwargs["json"] = req.json + + send_kwargs.update(kwargs) + return self.request(method, url, **send_kwargs) + + def mount(self, prefix: str, adapter: Any) -> None: + """No-op — rnet handles TLS and connection pooling natively.""" + pass + + def close(self) -> None: + """No-op — rnet manages its own resources.""" + pass + + +# --------------------------------------------------------------------------- +# session() factory +# --------------------------------------------------------------------------- + def session( - browser: str | None = None, - ja3: str | None = None, - akamai: str | None = None, - extra_fp: dict | None = None, - **kwargs, -) -> CurlSession: + browser: Optional[str] = None, + **kwargs: Any, +) -> RnetSession: """ - Create a curl_cffi session that impersonates a browser or custom TLS/HTTP fingerprint. - - This is a full replacement for requests.Session with browser impersonation - and anti-bot capabilities. The session uses curl-impersonate under the hood - to mimic real browser behavior. + Create an rnet session with TLS fingerprinting (browser/app impersonation). Args: - browser: Browser to impersonate (e.g. "chrome124", "firefox", "safari") OR - fingerprint preset name (e.g. "okhttp4"). - Uses the configured default from curl_impersonate.browser if not specified. - Available presets: okhttp4, okhttp5 - See https://github.com/lexiforest/curl_cffi#sessions for browser options. - ja3: Custom JA3 TLS fingerprint string (format: "SSLVersion,Ciphers,Extensions,Curves,PointFormats"). - When provided, curl_cffi will use this exact TLS fingerprint instead of the browser's default. - See https://curl-cffi.readthedocs.io/en/latest/impersonate/customize.html - akamai: Custom Akamai HTTP/2 fingerprint string (format: "SETTINGS|WINDOW_UPDATE|PRIORITY|PSEUDO_HEADERS"). - When provided, curl_cffi will use this exact HTTP/2 fingerprint instead of the browser's default. - See https://curl-cffi.readthedocs.io/en/latest/impersonate/customize.html - extra_fp: Additional fingerprint parameters dict for advanced customization. - See https://curl-cffi.readthedocs.io/en/latest/impersonate/customize.html - **kwargs: Additional arguments passed to CurlSession constructor: - - headers: Additional headers (dict) - - cookies: Cookie jar or dict - - auth: HTTP basic auth tuple (username, password) - - proxies: Proxy configuration dict - - verify: SSL certificate verification (bool, default True) - - timeout: Request timeout in seconds (float or tuple) - - allow_redirects: Follow redirects (bool, default True) - - max_redirects: Maximum redirect count (int) - - cert: Client certificate (str or tuple) - - Extra arguments for retry handler: - - max_retries: Maximum number of retries (int, default 5) - - backoff_factor: Backoff factor (float, default 0.2) - - max_backoff: Maximum backoff time (float, default 60.0) - - status_forcelist: List of status codes to force retry (list, default [429, 500, 502, 503, 504]) - - allowed_methods: List of allowed HTTP methods (set, default {"GET", "POST", "HEAD", "OPTIONS", "PUT", "DELETE", "TRACE"}) - - catch_exceptions: List of exceptions to catch (tuple, default (exceptions.ConnectionError, exceptions.ProxyError, exceptions.SSLError, exceptions.Timeout)) + browser: Exact rnet.Impersonate preset name. Examples: + "Chrome131", "OkHttp4_12", "Edge101", "Firefox135", + "Safari18", "OkHttp5", "Opera118" + Uses the configured default from config if not specified. + See rnet.Impersonate for all available presets. + **kwargs: Additional arguments passed to RnetSession constructor. Returns: - curl_cffi.requests.Session configured with browser impersonation or custom fingerprints, - common headers, and equivalent retry behavior to requests.Session. + RnetSession configured with browser impersonation and retry behavior. Examples: - # Standard browser impersonation - from unshackle.core.session import session - - class MyService(Service): - @staticmethod - def get_session(): - return session() # Uses config default browser - - # Use OkHttp 4.x preset for Android TV - class AndroidService(Service): - @staticmethod - def get_session(): - return session("okhttp4") - - # Custom fingerprint (manual) - class CustomService(Service): - @staticmethod - def get_session(): - return session( - ja3="771,4865-4866-4867-49195...", - akamai="1:65536;2:0;4:6291456;6:262144|15663105|0|m,a,s,p", - ) - - # With retry configuration - class MyService(Service): - @staticmethod - def get_session(): - return session( - "okhttp4", - max_retries=5, - status_forcelist=[429, 500], - allowed_methods={"GET", "HEAD", "OPTIONS"}, - ) + session() # Default browser from config + session("OkHttp4_12") # OkHttp 4.12 fingerprint + session("Chrome131") # Chrome 131 + session("Edge101", max_retries=3) # Edge 101 with custom retry """ + if browser is None: + browser = config.curl_impersonate.get("browser", "Chrome131") - if browser and browser in FINGERPRINT_PRESETS: - preset = FINGERPRINT_PRESETS[browser] - if ja3 is None: - ja3 = preset.get("ja3") - if akamai is None: - akamai = preset.get("akamai") - if extra_fp is None: - extra_fp = preset.get("extra_fp") - browser = None + impersonate = _resolve_impersonate(browser) - if browser is None and ja3 is None and akamai is None: - browser = config.curl_impersonate.get("browser", "chrome") + session_kwargs: dict[str, Any] = {"impersonate": impersonate} + session_kwargs.update(kwargs) - session_config = {} - if browser: - session_config["impersonate"] = browser - - if ja3: - session_config["ja3"] = ja3 - if akamai: - session_config["akamai"] = akamai - if extra_fp: - session_config["extra_fp"] = extra_fp - - session_config.update(kwargs) - - session_obj = CurlSession(**session_config) + session_obj = RnetSession(**session_kwargs) session_obj.headers.update(config.headers) return session_obj diff --git a/unshackle/core/tracks/track.py b/unshackle/core/tracks/track.py index d73b528..4272647 100644 --- a/unshackle/core/tracks/track.py +++ b/unshackle/core/tracks/track.py @@ -13,7 +13,6 @@ from typing import Any, Callable, Iterable, Optional, Union from uuid import UUID from zlib import crc32 -from curl_cffi.requests import Session as CurlSession from langcodes import Language from requests import Session @@ -24,6 +23,7 @@ from unshackle.core.constants import DOWNLOAD_CANCELLED, DOWNLOAD_LICENCE_ONLY from unshackle.core.downloaders import requests from unshackle.core.drm import DRM_T, PlayReady, Widevine from unshackle.core.events import events +from unshackle.core.session import RnetSession from unshackle.core.utilities import get_boxes, try_ensure_utf8 from unshackle.core.utils.subprocess import ffprobe @@ -326,6 +326,9 @@ class Track: ): file_downloaded = status_update.get("file_downloaded") if not file_downloaded: + downloaded = status_update.get("downloaded") + if downloaded and downloaded.endswith("/s"): + status_update["downloaded"] = f"URL {downloaded}" progress(**status_update) # see https://github.com/devine-dl/devine/issues/71 @@ -584,8 +587,8 @@ class Track: raise TypeError(f"Expected url to be a {str}, not {type(url)}") if not isinstance(byte_range, (str, type(None))): raise TypeError(f"Expected byte_range to be a {str}, not {type(byte_range)}") - if not isinstance(session, (Session, CurlSession, type(None))): - raise TypeError(f"Expected session to be a {Session} or {CurlSession}, not {type(session)}") + if not isinstance(session, (Session, RnetSession, type(None))): + raise TypeError(f"Expected session to be a {Session} or {RnetSession}, not {type(session)}") if not url: if self.descriptor != self.Descriptor.URL: @@ -623,10 +626,11 @@ class Track: init_data = res.content else: init_data = None - with session.get(url, stream=True) as s: - for chunk in s.iter_content(content_length): - init_data = chunk - break + s = session.get(url, stream=True) + for chunk in s.iter_content(content_length): + init_data = chunk + break + s.close() if not init_data: raise ValueError(f"Failed to read {content_length} bytes from the track URI.") diff --git a/uv.lock b/uv.lock index ed0b7e5..5f87ebb 100644 --- a/uv.lock +++ b/uv.lock @@ -123,20 +123,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ba/98/6775d71cf7d88d554e8394f5ce5cda90041c99fdf1b2b60af02001e8c790/animeapi_py-3.8.1-py3-none-any.whl", hash = "sha256:c29f6e633d17bb613f459aa6514c0baab7ae325881f8a109eb6e4b3be5c22827", size = 26983, upload-time = "2026-02-25T15:29:16.685Z" }, ] -[[package]] -name = "anyio" -version = "4.12.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "exceptiongroup", marker = "python_full_version < '3.11'" }, - { name = "idna" }, - { name = "typing-extensions" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/96/f0/5eb65b2bb0d09ac6776f2eb54adee6abe8228ea05b20a5ad0e4945de8aac/anyio-4.12.1.tar.gz", hash = "sha256:41cfcc3a4c85d3f05c932da7c26d0201ac36f72abd4435ba90d0464a3ffed703", size = 228685, upload-time = "2026-01-06T11:45:21.246Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/38/0e/27be9fdef66e72d64c0cdc3cc2823101b80585f8119b5c112c2e8f5f7dab/anyio-4.12.1-py3-none-any.whl", hash = "sha256:d405828884fc140aa80a3c667b8beed277f1dfedec42ba031bd6ac3db606ab6c", size = 113592, upload-time = "2026-01-06T11:45:19.497Z" }, -] - [[package]] name = "appdirs" version = "1.4.4" @@ -442,27 +428,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a7/ec/bb273b7208c606890dc36540fe667d06ce840a6f62f9fae7e658fcdc90fb/cssutils-2.11.1-py3-none-any.whl", hash = "sha256:a67bfdfdff4f3867fab43698ec4897c1a828eca5973f4073321b3bccaf1199b1", size = 385747, upload-time = "2024-06-04T15:51:37.499Z" }, ] -[[package]] -name = "curl-cffi" -version = "0.13.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "certifi" }, - { name = "cffi" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/4e/3d/f39ca1f8fdf14408888e7c25e15eed63eac5f47926e206fb93300d28378c/curl_cffi-0.13.0.tar.gz", hash = "sha256:62ecd90a382bd5023750e3606e0aa7cb1a3a8ba41c14270b8e5e149ebf72c5ca", size = 151303, upload-time = "2025-08-06T13:05:42.988Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/19/d1/acabfd460f1de26cad882e5ef344d9adde1507034528cb6f5698a2e6a2f1/curl_cffi-0.13.0-cp39-abi3-macosx_10_9_x86_64.whl", hash = "sha256:434cadbe8df2f08b2fc2c16dff2779fb40b984af99c06aa700af898e185bb9db", size = 5686337, upload-time = "2025-08-06T13:05:28.985Z" }, - { url = "https://files.pythonhosted.org/packages/2c/1c/cdb4fb2d16a0e9de068e0e5bc02094e105ce58a687ff30b4c6f88e25a057/curl_cffi-0.13.0-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:59afa877a9ae09efa04646a7d068eeea48915a95d9add0a29854e7781679fcd7", size = 2994613, upload-time = "2025-08-06T13:05:31.027Z" }, - { url = "https://files.pythonhosted.org/packages/04/3e/fdf617c1ec18c3038b77065d484d7517bb30f8fb8847224eb1f601a4e8bc/curl_cffi-0.13.0-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d06ed389e45a7ca97b17c275dbedd3d6524560270e675c720e93a2018a766076", size = 7931353, upload-time = "2025-08-06T13:05:32.273Z" }, - { url = "https://files.pythonhosted.org/packages/3d/10/6f30c05d251cf03ddc2b9fd19880f3cab8c193255e733444a2df03b18944/curl_cffi-0.13.0-cp39-abi3-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b4e0de45ab3b7a835c72bd53640c2347415111b43421b5c7a1a0b18deae2e541", size = 7486378, upload-time = "2025-08-06T13:05:33.672Z" }, - { url = "https://files.pythonhosted.org/packages/77/81/5bdb7dd0d669a817397b2e92193559bf66c3807f5848a48ad10cf02bf6c7/curl_cffi-0.13.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8eb4083371bbb94e9470d782de235fb5268bf43520de020c9e5e6be8f395443f", size = 8328585, upload-time = "2025-08-06T13:05:35.28Z" }, - { url = "https://files.pythonhosted.org/packages/ce/c1/df5c6b4cfad41c08442e0f727e449f4fb5a05f8aa564d1acac29062e9e8e/curl_cffi-0.13.0-cp39-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:28911b526e8cd4aa0e5e38401bfe6887e8093907272f1f67ca22e6beb2933a51", size = 8739831, upload-time = "2025-08-06T13:05:37.078Z" }, - { url = "https://files.pythonhosted.org/packages/1a/91/6dd1910a212f2e8eafe57877bcf97748eb24849e1511a266687546066b8a/curl_cffi-0.13.0-cp39-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:6d433ffcb455ab01dd0d7bde47109083aa38b59863aa183d29c668ae4c96bf8e", size = 8711908, upload-time = "2025-08-06T13:05:38.741Z" }, - { url = "https://files.pythonhosted.org/packages/6d/e4/15a253f9b4bf8d008c31e176c162d2704a7e0c5e24d35942f759df107b68/curl_cffi-0.13.0-cp39-abi3-win_amd64.whl", hash = "sha256:66a6b75ce971de9af64f1b6812e275f60b88880577bac47ef1fa19694fa21cd3", size = 1614510, upload-time = "2025-08-06T13:05:40.451Z" }, - { url = "https://files.pythonhosted.org/packages/f9/0f/9c5275f17ad6ff5be70edb8e0120fdc184a658c9577ca426d4230f654beb/curl_cffi-0.13.0-cp39-abi3-win_arm64.whl", hash = "sha256:d438a3b45244e874794bc4081dc1e356d2bb926dcc7021e5a8fef2e2105ef1d8", size = 1365753, upload-time = "2025-08-06T13:05:41.879Z" }, -] - [[package]] name = "dacite" version = "1.9.2" @@ -490,18 +455,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e8/35/4a113189f7138035a21bd255d30dc7bffc77c942c93b7948d2eac2e22429/ECPy-1.2.5-py3-none-any.whl", hash = "sha256:559c92e42406d9d1a6b2b8fc26e6ad7bc985f33903b72f426a56cb1073a25ce3", size = 43075, upload-time = "2020-10-26T11:56:13.613Z" }, ] -[[package]] -name = "exceptiongroup" -version = "1.3.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "typing-extensions" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/50/79/66800aadf48771f6b62f7eb014e352e5d06856655206165d775e675a02c9/exceptiongroup-1.3.1.tar.gz", hash = "sha256:8b412432c6055b0b7d14c310000ae93352ed6754f70fa8f7c34141f91c4e3219", size = 30371, upload-time = "2025-11-21T23:01:54.787Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/8a/0e/97c33bf5009bdbac74fd2beace167cab3f978feb69cc36f1ef79360d6c4e/exceptiongroup-1.3.1-py3-none-any.whl", hash = "sha256:a7a39a3bd276781e98394987d3a5701d0c4edffb633bb7a5144577f82c773598", size = 16740, upload-time = "2025-11-21T23:01:53.443Z" }, -] - [[package]] name = "fastjsonschema" version = "2.19.1" @@ -622,43 +575,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ab/6e/81d47999aebc1b155f81eca4477a616a70f238a2549848c38983f3c22a82/ftfy-6.3.1-py3-none-any.whl", hash = "sha256:7c70eb532015cd2f9adb53f101fb6c7945988d023a085d127d1573dc49dd0083", size = 44821, upload-time = "2024-10-26T00:50:33.425Z" }, ] -[[package]] -name = "h11" -version = "0.16.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/01/ee/02a2c011bdab74c6fb3c75474d40b3052059d95df7e73351460c8588d963/h11-0.16.0.tar.gz", hash = "sha256:4e35b956cf45792e4caa5885e69fba00bdbc6ffafbfa020300e549b208ee5ff1", size = 101250, upload-time = "2025-04-24T03:35:25.427Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86", size = 37515, upload-time = "2025-04-24T03:35:24.344Z" }, -] - -[[package]] -name = "httpcore" -version = "1.0.9" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "certifi" }, - { name = "h11" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/06/94/82699a10bca87a5556c9c59b5963f2d039dbd239f25bc2a63907a05a14cb/httpcore-1.0.9.tar.gz", hash = "sha256:6e34463af53fd2ab5d807f399a9b45ea31c3dfa2276f15a2c3f00afff6e176e8", size = 85484, upload-time = "2025-04-24T22:06:22.219Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/7e/f5/f66802a942d491edb555dd61e3a9961140fd64c90bce1eafd741609d334d/httpcore-1.0.9-py3-none-any.whl", hash = "sha256:2d400746a40668fc9dec9810239072b40b4484b640a8c38fd654a024c7a1bf55", size = 78784, upload-time = "2025-04-24T22:06:20.566Z" }, -] - -[[package]] -name = "httpx" -version = "0.28.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "anyio" }, - { name = "certifi" }, - { name = "httpcore" }, - { name = "idna" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/b1/df/48c586a5fe32a0f01324ee087459e112ebb7224f646c0b5023f5e79e9956/httpx-0.28.1.tar.gz", hash = "sha256:75e98c5f16b0f35b567856f597f06ff2270a374470a5c2392242528e3e3e42fc", size = 141406, upload-time = "2024-12-06T15:37:23.222Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/2a/39/e50c7c3a983047577ee07d2a9e53faf5a69493943ec3f6a384bdc792deb2/httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad", size = 73517, upload-time = "2024-12-06T15:37:21.509Z" }, -] - [[package]] name = "identify" version = "2.6.16" @@ -1046,14 +962,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/cb/28/3bfe2fa5a7b9c46fe7e13c97bda14c895fb10fa2ebf1d0abb90e0cea7ee1/platformdirs-4.5.1-py3-none-any.whl", hash = "sha256:d03afa3963c806a9bed9d5125c8f4cb2fdaf74a55ab60e5d59b3fde758104d31", size = 18731, upload-time = "2025-12-05T13:52:56.823Z" }, ] -[[package]] -name = "pproxy" -version = "2.7.9" -source = { registry = "https://pypi.org/simple" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/d4/c6/673a10a729061d2594b85aedd7dd2e470db4d54b12d4f95a306353bb2967/pproxy-2.7.9-py3-none-any.whl", hash = "sha256:a073d02616a47c43e1d20a547918c307dbda598c6d53869b165025f3cfe58e80", size = 42842, upload-time = "2024-01-16T11:33:35.286Z" }, -] - [[package]] name = "pre-commit" version = "4.5.1" @@ -1434,6 +1342,53 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/52/d2/d2ffaecbfff0c057b5824a82b57b709b1c5b2966c970e4c5d6e1d8109b21/rlaphoenix.m3u8-3.4.0-py3-none-any.whl", hash = "sha256:cd2c22195c747d52c63189d4bd5f664e1fc5ea202f5a7396b7336581f26a2838", size = 24767, upload-time = "2023-03-09T21:37:38.326Z" }, ] +[[package]] +name = "rnet" +version = "2.4.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/10/bc/e5e4395e67803405900b98d503a23c1125432a5a73d2c311dd2ebe11b7fc/rnet-2.4.2.tar.gz", hash = "sha256:9fc9ea17a7afea799e10670f0c1da939f500c440760aeefe42209644ffef5bf5", size = 515573, upload-time = "2025-08-02T23:26:27.795Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b9/5e/09b4fcb92611b6c51db2b7abb0a126aa87a76350e1da783ea35e3c9711af/rnet-2.4.2-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:9e5c8e485396dc86cdd39bf036747866f9ccf1c462ed660c65df4fea57b7d8b7", size = 3703136, upload-time = "2025-08-02T23:25:24.945Z" }, + { url = "https://files.pythonhosted.org/packages/60/0e/40b06dec2a172e2136d0c731880f5932b4383da470dc0ccf17f3fdd196da/rnet-2.4.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:b092c70d4943d914272c58bc17e2382054c3180828564f378411cdfebc752f7a", size = 3429794, upload-time = "2025-08-02T23:25:12.382Z" }, + { url = "https://files.pythonhosted.org/packages/68/31/4e51497c8722379c79b054bb6d98e0273f42248de948f7dbc3c4dcde88cb/rnet-2.4.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5f881c1334d8f65b8c3c54eacccc487b21ea778762dc40e20d94ee8f841a2bb9", size = 3661754, upload-time = "2025-08-02T23:24:59.127Z" }, + { url = "https://files.pythonhosted.org/packages/9f/c3/9b43dde7c6b505eae0d0c23133b612b07d9221f0423fac55abbda78d5bdb/rnet-2.4.2-cp310-cp310-manylinux_2_34_aarch64.whl", hash = "sha256:ad5d2af6097493a84f9ef006f709fa4a3d42957f38aa84dd6283f8856e94e773", size = 3609141, upload-time = "2025-08-02T23:24:20.516Z" }, + { url = "https://files.pythonhosted.org/packages/19/37/37e5a0b9eb1c4a782c399443c5d498b24a2d40baa86842afd1588f4b4508/rnet-2.4.2-cp310-cp310-manylinux_2_34_armv7l.whl", hash = "sha256:5cdaf7a141a045cae13961b206406ccc34d8b9f3bac9d5e44bd26f14c33ca657", size = 3424711, upload-time = "2025-08-02T23:24:34.339Z" }, + { url = "https://files.pythonhosted.org/packages/1b/6c/aef21e909707d0bbfd347a843fefbed2fd50255c7a99ff4251fce82e2362/rnet-2.4.2-cp310-cp310-manylinux_2_34_i686.whl", hash = "sha256:df33b9f4e5e2bdc21aba4189628a6827d950718f863904c5ee3f43a40c60089a", size = 3686201, upload-time = "2025-08-02T23:24:46.31Z" }, + { url = "https://files.pythonhosted.org/packages/1a/c5/5ed5ee58cba531681e73099e619c2d36e8453e28764c71682a32c373b30c/rnet-2.4.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:a76a4976e065ff2af0fbfa13ea14e2b2f449ba6ea708125029d54738e3c638cf", size = 3957076, upload-time = "2025-08-02T23:25:37.751Z" }, + { url = "https://files.pythonhosted.org/packages/97/88/2ac698c25fe8c7a108d0bf7b76afa0049d9f4c1ae7162542434970936a00/rnet-2.4.2-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:1030ca77af54bfee5739d4bb34f403329b154cdbab4bcd2feeb20fab22955359", size = 3919451, upload-time = "2025-08-02T23:25:49.469Z" }, + { url = "https://files.pythonhosted.org/packages/3e/1b/129029ba55eeb1daa58ab7e88a06f2a95b8246b207fbe8bbc04f9f23d2cd/rnet-2.4.2-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:fc5dd872523a4b5f21ea7092fd9440a0677f609e3b971c60673b4dbd984745a9", size = 4005497, upload-time = "2025-08-02T23:26:02.816Z" }, + { url = "https://files.pythonhosted.org/packages/a3/a2/4df4e00e1f3b04c902ab494147140fea308d139c5f7697aedcf949d8f225/rnet-2.4.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:080d04ceaf7be30505d11360b33d5d43668b557a7b86de3c548882d1de19bc4f", size = 4166618, upload-time = "2025-08-02T23:26:15.245Z" }, + { url = "https://files.pythonhosted.org/packages/09/93/cbf1d634d17b220bb7ba52fd38afd98101a010bf5c873af5815eba6e601d/rnet-2.4.2-cp310-cp310-win32.whl", hash = "sha256:9e8f79f055630780e1334255b1167b30b99989e31a87e10295e143240eb519d5", size = 3207306, upload-time = "2025-08-02T23:26:53.616Z" }, + { url = "https://files.pythonhosted.org/packages/d2/36/dd76e90d1fea4688f64cb6263244500fea6b1c8f979bb1651f132515a617/rnet-2.4.2-cp310-cp310-win_amd64.whl", hash = "sha256:f4e891d603c6fe4f28857b161d6ee10975633a5ee1867050962aef3954cf3e1a", size = 3561188, upload-time = "2025-08-02T23:26:41.141Z" }, + { url = "https://files.pythonhosted.org/packages/d9/08/beb3c97573688b23f081d35f6280db9438c3a32ec7dc6ba8479107f8d913/rnet-2.4.2-cp310-cp310-win_arm64.whl", hash = "sha256:372e9a7764f6947a8484774827829e291a8f299b80f93cf9318483c60b1c1921", size = 3202587, upload-time = "2025-08-02T23:26:29.299Z" }, + { url = "https://files.pythonhosted.org/packages/5c/b3/7cbd1daf6cf3a5eb56615128e5a9fb5f3fda6457d511791766c39cc71203/rnet-2.4.2-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:0106d7b43ea92a02458eea5e5c76ac67ff978f5715293c836164c4a05a7eb890", size = 3703182, upload-time = "2025-08-02T23:25:26.218Z" }, + { url = "https://files.pythonhosted.org/packages/d2/2f/4bd07edd1785445b95e717ad93c5845b18e8d4df578e1c62c11c77a9aea4/rnet-2.4.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:14074800998098403540b9b624e78f7dd811605ac0f1a6081a12ad5e6e1fd1ac", size = 3429858, upload-time = "2025-08-02T23:25:13.59Z" }, + { url = "https://files.pythonhosted.org/packages/43/4e/d71e2c30526c54ace931f95c5134cb474aaa9f3142e4e11f651bb1ec7b27/rnet-2.4.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:573c637accaf7f3c7aa6d2241224ed577444c00e8af4e631b243b3cae765c502", size = 3661678, upload-time = "2025-08-02T23:25:00.677Z" }, + { url = "https://files.pythonhosted.org/packages/f1/27/a33ac1b61d29015e832ff960b274929288b6901cca3cf415e1f6a0aec1ed/rnet-2.4.2-cp311-cp311-manylinux_2_34_aarch64.whl", hash = "sha256:48fca3430dc4d90c920c08474a0db0ec3e6465226a08345b10b6cc58c8b0c23e", size = 3609069, upload-time = "2025-08-02T23:24:22.862Z" }, + { url = "https://files.pythonhosted.org/packages/e1/f5/628153c9228e7430650e11c1f40cdb53a1a23592d98c39aafb534217278e/rnet-2.4.2-cp311-cp311-manylinux_2_34_armv7l.whl", hash = "sha256:7bf06f481297304d426cd7c6b36babc3859ae242cde276f038f6f51cff7fd4de", size = 3424456, upload-time = "2025-08-02T23:24:35.592Z" }, + { url = "https://files.pythonhosted.org/packages/5e/c9/c6444cfa9c935ef2b9a273470812b8555cec262dbab7f20325fd67a27c1d/rnet-2.4.2-cp311-cp311-manylinux_2_34_i686.whl", hash = "sha256:ca351af5ccb531d308eeb7ae3dcbfba038a14d4897e22139d76f8cd88eed649f", size = 3686160, upload-time = "2025-08-02T23:24:47.5Z" }, + { url = "https://files.pythonhosted.org/packages/97/6a/d7c48b8400b30c1931a800c79b429692758ef349b1a210bb9f499f199687/rnet-2.4.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:129eab0183ca50fd5f57b24b3b4387a5edca727e4004c1debcb5c23ecba6c128", size = 3957128, upload-time = "2025-08-02T23:25:39Z" }, + { url = "https://files.pythonhosted.org/packages/bd/cf/ddabfa4299dbeefae488a54e95684c0c68c00b5d3cff3b8212d1adf2b206/rnet-2.4.2-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:4a0449fae24b95b29f8a3f433f7866fc6c94d9ee37d2a5d94b7154eb436ee448", size = 3919406, upload-time = "2025-08-02T23:25:50.78Z" }, + { url = "https://files.pythonhosted.org/packages/9b/fe/e92e5dacfc97041cbf335c10e0a45b7ac71e0d30c51e0a0dc51d35d1ce0b/rnet-2.4.2-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:b8a17765351c3f75ce725ee4d946a255c1b3920462252edffd737e81ce996fc7", size = 4005641, upload-time = "2025-08-02T23:26:04.192Z" }, + { url = "https://files.pythonhosted.org/packages/92/a6/156f5801328adc4296f6686e27f69ea22cc0c17d1f108759caa53bcedeb5/rnet-2.4.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:e73fb0e89965ed31b22644e221bdd928ebcb6b3f8ce75da4f083cb92baef844f", size = 4166702, upload-time = "2025-08-02T23:26:16.539Z" }, + { url = "https://files.pythonhosted.org/packages/00/e6/42a36a76238e10b157e1265be38f2fc66eeb4eaa5b9b3dfdcd4b581e2e6f/rnet-2.4.2-cp311-cp311-win32.whl", hash = "sha256:f3296e85f3f8da7165d8b7df5633f8443b1f2597215646e8e090d1affaa3d1b0", size = 3207638, upload-time = "2025-08-02T23:26:54.904Z" }, + { url = "https://files.pythonhosted.org/packages/ba/6e/92d99f03522cddffb4d00dbac4b63daafbd7966a915ec689bb713da45d3e/rnet-2.4.2-cp311-cp311-win_amd64.whl", hash = "sha256:847529308ab9cf59f0d4ac5a9d1fe051894a26aadf3b8f8b20a862302587725f", size = 3561173, upload-time = "2025-08-02T23:26:42.451Z" }, + { url = "https://files.pythonhosted.org/packages/e9/bc/4a4d19425adf6a62459da608988b4de0f43c71d252cf0b15517cdb46649e/rnet-2.4.2-cp311-cp311-win_arm64.whl", hash = "sha256:b2eb935265a0771f9b323f2980455b5478550919d18572d523ac2cb5f328e7f7", size = 3202633, upload-time = "2025-08-02T23:26:30.611Z" }, + { url = "https://files.pythonhosted.org/packages/c9/22/434a9aa0228a4fa2abe48b04d36214f5cbe08af45afdb833ac7cc02cd913/rnet-2.4.2-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:b853a9809588a569011142b9bae142ad982387640edcfd38fba4337b044900ae", size = 3694856, upload-time = "2025-08-02T23:25:27.818Z" }, + { url = "https://files.pythonhosted.org/packages/9b/ca/b49c2dce89381b7697ccb771a6850eea13934ef1eb37a8ef2ba27d925643/rnet-2.4.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:da547d7be92261ead4bc0ce23e30823c760d638055fd301da18c6521ec245fc8", size = 3420543, upload-time = "2025-08-02T23:25:14.783Z" }, + { url = "https://files.pythonhosted.org/packages/e5/cb/7c5979932069c9f40651d4aca487bfe639a94098eb123d7ec466f7f7730d/rnet-2.4.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc5bf17b3ed46455fe70a784dff0ccc7beaf54984d554536e644b6d1dacea63e", size = 3658152, upload-time = "2025-08-02T23:25:01.876Z" }, + { url = "https://files.pythonhosted.org/packages/62/2f/83b754b1383a8cf6e696cb547e0ec4d47ba58dc838b16341be6f1af0ede6/rnet-2.4.2-cp312-cp312-manylinux_2_34_aarch64.whl", hash = "sha256:3fe4531b0bffc26d10e3baec2f3d0deb59fb8ff157c56b985d9bd2d6060b2715", size = 3602597, upload-time = "2025-08-02T23:24:24.421Z" }, + { url = "https://files.pythonhosted.org/packages/91/4a/3012990ec2f309baf41f70929bac0f166db3a7ce5a6bca1143ba6e9b4610/rnet-2.4.2-cp312-cp312-manylinux_2_34_armv7l.whl", hash = "sha256:c51cc5648efdc97bb17d88aab30f0596924766dc137109865bff72539141a81e", size = 3418020, upload-time = "2025-08-02T23:24:36.835Z" }, + { url = "https://files.pythonhosted.org/packages/a9/5b/6780b490a7d9dfc76c17c68f84b9d5cfb602bdc5db4ca5774930e7b7933e/rnet-2.4.2-cp312-cp312-manylinux_2_34_i686.whl", hash = "sha256:b13ee78075389050ae537d9c6957d8de820d0c7f3c7053dfc3e103e0538890a7", size = 3679644, upload-time = "2025-08-02T23:24:49.07Z" }, + { url = "https://files.pythonhosted.org/packages/c0/d4/a092274c9513d67f802cc6f3472068f6cbf30652d00d4b5c29617c20479d/rnet-2.4.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:1d2e21ed40fcaead89a9f711354f92d5d2690e621a0a6f37edf6d655d1994d58", size = 3953384, upload-time = "2025-08-02T23:25:40.244Z" }, + { url = "https://files.pythonhosted.org/packages/b6/5f/e4660f38921f41ab2199228d173d6f5d881f391c7b686695dd383fd41693/rnet-2.4.2-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:8c5672ff8cdb9042a275187badd28279f979e20dcf175da22fce666af7b1b273", size = 3913721, upload-time = "2025-08-02T23:25:52.214Z" }, + { url = "https://files.pythonhosted.org/packages/44/89/1e81dd97c9ab45bfed871b5cb7fec50893f1a6be6bfd2c237cf3b902cf63/rnet-2.4.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:82037372e94fd7bc999ccac1b971da1a0f15a469979777c11dd225fddb249de1", size = 4001858, upload-time = "2025-08-02T23:26:05.506Z" }, + { url = "https://files.pythonhosted.org/packages/c4/5c/475c7c9bff6e94a7e5d457e8de2b5786a1f1a7488ad48b29cafddbb530bf/rnet-2.4.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:582f71311cb42db9a396bb95f39577fc4ac5e94d6de17696d2900a05814e5ac6", size = 4162005, upload-time = "2025-08-02T23:26:17.797Z" }, + { url = "https://files.pythonhosted.org/packages/5a/36/c4bdcdcdd9682fcb1fe01a371e8c25bff949bd719fd78021112538951bd3/rnet-2.4.2-cp312-cp312-win32.whl", hash = "sha256:355b849b67b131fbeffb7b5ee9a4057d3b4f576c1c63a59698a49f86c3a0bc80", size = 3200189, upload-time = "2025-08-02T23:26:56.208Z" }, + { url = "https://files.pythonhosted.org/packages/ef/cd/cb6f11f33e0a7d567b980c2b7e19f5f0e827a9ea33c53c2de350ef23f121/rnet-2.4.2-cp312-cp312-win_amd64.whl", hash = "sha256:d47a3fb6339e62b06cabeed8dc4aab28050cadc02a8dcbf56b688fb1ca2c7171", size = 3560606, upload-time = "2025-08-02T23:26:43.797Z" }, + { url = "https://files.pythonhosted.org/packages/9c/70/5ded6c684343fd1a59e5b9ed4ffc7ec783d080bac32ba98c503d363914c0/rnet-2.4.2-cp312-cp312-win_arm64.whl", hash = "sha256:71d3f845b0f44d2353133ac8ee3bea39d00a6766356aa0d1f545b739380d0bea", size = 3199395, upload-time = "2025-08-02T23:26:31.975Z" }, +] + [[package]] name = "ruamel-yaml" version = "0.18.17" @@ -1663,15 +1618,12 @@ dependencies = [ { name = "construct" }, { name = "crccheck" }, { name = "cryptography" }, - { name = "curl-cffi" }, { name = "filelock" }, { name = "fonttools" }, - { name = "httpx" }, { name = "jsonpickle" }, { name = "langcodes" }, { name = "language-data" }, { name = "lxml" }, - { name = "pproxy" }, { name = "protobuf" }, { name = "pycaption" }, { name = "pycountry" }, @@ -1688,6 +1640,7 @@ dependencies = [ { name = "requests", extra = ["socks"] }, { name = "rich" }, { name = "rlaphoenix-m3u8" }, + { name = "rnet" }, { name = "ruamel-yaml" }, { name = "sortedcontainers" }, { name = "subby" }, @@ -1722,15 +1675,12 @@ requires-dist = [ { name = "construct", specifier = ">=2.8.8,<3" }, { name = "crccheck", specifier = ">=1.3.0,<2" }, { name = "cryptography", specifier = ">=45.0.0,<47" }, - { name = "curl-cffi", specifier = ">=0.7.0b4,<0.14" }, { name = "filelock", specifier = ">=3.20.3,<4" }, { name = "fonttools", specifier = ">=4.60.2,<5" }, - { name = "httpx", specifier = ">=0.28.1,<0.29" }, { name = "jsonpickle", specifier = ">=3.0.4,<5" }, { name = "langcodes", specifier = ">=3.4.0,<4" }, { name = "language-data", specifier = ">=1.4.0" }, { name = "lxml", specifier = ">=5.2.1,<7" }, - { name = "pproxy", specifier = ">=2.7.9,<3" }, { name = "protobuf", specifier = ">=4.25.3,<7" }, { name = "pycaption", specifier = ">=2.2.6,<3" }, { name = "pycountry", specifier = ">=24.6.1" }, @@ -1747,6 +1697,7 @@ requires-dist = [ { name = "requests", extras = ["socks"], specifier = ">=2.32.5,<3" }, { name = "rich", specifier = ">=13.7.1,<15" }, { name = "rlaphoenix-m3u8", specifier = ">=3.4.0,<4" }, + { name = "rnet", specifier = ">=2.4.2" }, { name = "ruamel-yaml", specifier = ">=0.18.6,<0.19" }, { name = "sortedcontainers", specifier = ">=2.4.0,<3" }, { name = "subby", git = "https://github.com/vevv/subby.git?rev=1ea6a52028c5bea8177c8abc91716d74e4d097e1" }, From c930abc6fdeec07ba972081640b8ad64535f6dd8 Mon Sep 17 00:00:00 2001 From: Andy Date: Tue, 24 Mar 2026 17:44:23 -0600 Subject: [PATCH 6/8] fix(subtitle): decompress gzip/zlib responses for subtitle downloads MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The requests downloader used decode_content=False on raw socket reads, which skipped HTTP content-encoding decompression. Subtitle files served with Content-Encoding: gzip were saved as raw compressed bytes, then mangled by try_ensure_utf8 falling back to CP1252 decoding. Remove decode_content=False from the raw read path — the speed gain comes from raw.read() itself, not from skipping decompression. Also add gzip/zlib magic byte detection in try_ensure_utf8 as a safety net for any edge cases where compressed data reaches encoding detection. --- unshackle/core/downloaders/requests.py | 1 - unshackle/core/utilities.py | 19 +++++++++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/unshackle/core/downloaders/requests.py b/unshackle/core/downloaders/requests.py index fd23a41..65a61e8 100644 --- a/unshackle/core/downloaders/requests.py +++ b/unshackle/core/downloaders/requests.py @@ -138,7 +138,6 @@ def download( chunks = stream.stream() elif use_raw: # requests.Session: raw socket read — 30-35% faster than iter_content - stream.raw.decode_content = False _read = stream.raw.read def _chunks() -> Generator[bytes, None, None]: diff --git a/unshackle/core/utilities.py b/unshackle/core/utilities.py index b57ca14..727c41c 100644 --- a/unshackle/core/utilities.py +++ b/unshackle/core/utilities.py @@ -1,5 +1,6 @@ import ast import contextlib +import gzip import importlib.util import json import logging @@ -10,6 +11,7 @@ import sys import time import traceback import unicodedata +import zlib from collections import defaultdict from datetime import datetime, timezone from pathlib import Path @@ -478,12 +480,29 @@ def try_ensure_utf8(data: bytes) -> bytes: """ Try to ensure that the given data is encoded in UTF-8. + Automatically decompresses gzip/deflate/zlib data before encoding detection. + This handles cases where HTTP responses are saved with raw Content-Encoding + (e.g., when decode_content=False is used for performance). + Parameters: data: Input data that may or may not yet be UTF-8 or another encoding. Returns the input data encoded in UTF-8 if successful. If unable to detect the encoding of the input data, then the original data is returned as-received. """ + # Decompress gzip data (magic bytes: 1f 8b) + if data[:2] == b"\x1f\x8b": + try: + data = gzip.decompress(data) + except Exception: + pass + # Decompress raw deflate/zlib data (common zlib headers: 78 01, 78 5e, 78 9c, 78 da) + elif data[:1] == b"\x78" and len(data) > 1 and data[1:2] in (b"\x01", b"\x5e", b"\x9c", b"\xda"): + try: + data = zlib.decompress(data) + except Exception: + pass + try: data.decode("utf8") return data From b524585d78e81c57dbc68775effe53a600fc2037 Mon Sep 17 00:00:00 2001 From: Andy Date: Thu, 19 Mar 2026 18:43:43 -0600 Subject: [PATCH 7/8] fix(drm): add zero-KID fallback for mp4decrypt and clear HLS track.drm after download mp4decrypt silently copies files unchanged when the tenc box default KID is all zeros, since none of the real KID:KEY pairs match. Add zero-KID fallback entries to both Widevine and PlayReady mp4decrypt methods, matching what Shaka Packager already does. Also clear track.drm after HLS download when decryption was performed, preventing unnecessary double-decryption. DASH and URL descriptors already did this. --- unshackle/core/drm/playready.py | 13 +++++++++++++ unshackle/core/drm/widevine.py | 13 +++++++++++++ unshackle/core/manifests/hls.py | 8 ++++++++ 3 files changed, 34 insertions(+) diff --git a/unshackle/core/drm/playready.py b/unshackle/core/drm/playready.py index f23e1b8..7907100 100644 --- a/unshackle/core/drm/playready.py +++ b/unshackle/core/drm/playready.py @@ -356,6 +356,19 @@ class PlayReady: key_hex = key if isinstance(key, str) else key.hex() key_args.extend(["--key", f"{kid_hex}:{key_hex}"]) + # Some services use a blank/zero default KID in the tenc box, + # but the real KID for the license server. Add zero-KID fallback entries so + # mp4decrypt can match when the file's default KID is all zeros. + zero_kid = "00" * 16 + existing_kids = { + kid.hex if hasattr(kid, "hex") else str(kid).replace("-", "") + for kid in self.content_keys + } + if zero_kid not in existing_kids: + for key in self.content_keys.values(): + key_hex = key if isinstance(key, str) else key.hex() + key_args.extend(["--key", f"{zero_kid}:{key_hex}"]) + cmd = [ str(binaries.Mp4decrypt), "--show-progress", diff --git a/unshackle/core/drm/widevine.py b/unshackle/core/drm/widevine.py index 64b14e7..191bc23 100644 --- a/unshackle/core/drm/widevine.py +++ b/unshackle/core/drm/widevine.py @@ -290,6 +290,19 @@ class Widevine: key_hex = key if isinstance(key, str) else key.hex() key_args.extend(["--key", f"{kid_hex}:{key_hex}"]) + # Some services use a blank/zero default KID in the tenc box, + # but the real KID for the license server. Add zero-KID fallback entries so + # mp4decrypt can match when the file's default KID is all zeros. + zero_kid = "00" * 16 + existing_kids = { + kid.hex if hasattr(kid, "hex") else str(kid).replace("-", "") + for kid in self.content_keys + } + if zero_kid not in existing_kids: + for key in self.content_keys.values(): + key_hex = key if isinstance(key, str) else key.hex() + key_args.extend(["--key", f"{zero_kid}:{key_hex}"]) + cmd = [ str(binaries.Mp4decrypt), "--show-progress", diff --git a/unshackle/core/manifests/hls.py b/unshackle/core/manifests/hls.py index e5344e7..475a1c0 100644 --- a/unshackle/core/manifests/hls.py +++ b/unshackle/core/manifests/hls.py @@ -454,6 +454,10 @@ class HLS: status_update["downloaded"] = f"HLS {downloaded}" progress(**status_update) + # see https://github.com/devine-dl/devine/issues/71 + for control_file in segment_save_dir.glob("*.aria2__temp"): + control_file.unlink() + progress(total=total_segments, completed=0, downloaded="Merging") name_len = len(str(total_segments)) @@ -753,6 +757,10 @@ class HLS: progress(downloaded="Downloaded") track.path = save_path + + if session_drm: + track.drm = None + events.emit(events.Types.TRACK_DOWNLOADED, track=track) @staticmethod From 5a3ac81ff9b376d73120d3733a5c8828a64237fc Mon Sep 17 00:00:00 2001 From: Andy Date: Thu, 26 Mar 2026 16:36:35 -0600 Subject: [PATCH 8/8] feat(session): translate requests 'data' kwarg to rnet equivalents for compatibility --- unshackle/core/session.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/unshackle/core/session.py b/unshackle/core/session.py index bf49286..a7a28e9 100644 --- a/unshackle/core/session.py +++ b/unshackle/core/session.py @@ -480,6 +480,16 @@ class RnetSession: # Remove kwargs rnet doesn't understand kwargs.pop("stream", None) # rnet responses are always lazy + # Translate requests-compatible 'data' kwarg to rnet equivalents + data = kwargs.pop("data", None) + if data is not None: + if isinstance(data, dict): + kwargs["form"] = list(data.items()) + elif isinstance(data, (str, bytes)): + kwargs["body"] = data + else: + kwargs["body"] = data + # Resolve method enum rnet_method = _METHOD_MAP.get(method_upper) if rnet_method is None: