diff --git a/pyproject.toml b/pyproject.toml index 03a13ee..50cc70e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -67,6 +67,7 @@ dependencies = [ "animeapi-py>=0.6.0", "rnet>=2.4.2", "bandit>=1.9.4", + "defusedxml>=0.7.1", ] [project.urls] diff --git a/unshackle/commands/dl.py b/unshackle/commands/dl.py index 581d8b0..60efd97 100644 --- a/unshackle/commands/dl.py +++ b/unshackle/commands/dl.py @@ -862,11 +862,16 @@ class dl: self.service_config = yaml.safe_load(service_config_path.read_text(encoding="utf8")) self.log.info("Service Config loaded") if self.debug_logger: + # log key names only -- the full config carries service certificates, + # device fingerprints and endpoints that bloat the log and may be sensitive self.debug_logger.log( level="DEBUG", operation="load_service_config", service=self.service, - context={"config_path": str(service_config_path), "config": self.service_config}, + context={ + "config_path": str(service_config_path), + "config_keys": sorted(self.service_config or []), + }, ) except KeyError: pass diff --git a/unshackle/core/api/download_manager.py b/unshackle/core/api/download_manager.py index 54ee507..718f4c5 100644 --- a/unshackle/core/api/download_manager.py +++ b/unshackle/core/api/download_manager.py @@ -14,20 +14,16 @@ from enum import Enum from pathlib import Path from typing import Any, Callable, Dict, List, Optional +from unshackle.core.api.sanitize import sanitize_log +from unshackle.core.utils.redact import REDACTED, URL_USERINFO_RE, redact_text + log = logging.getLogger("download_manager") -def _sanitize_log(value: object) -> str: - """Sanitize a value for safe logging by removing newlines and control characters.""" - return str(value).replace("\n", "").replace("\r", "").replace("\x00", "") - - # Job parameters may carry secrets (a raw "user:pass" credential, a proxy URL with embedded # userinfo). These must never leave the process via the API or logs, so they are masked # wherever parameters are serialized for a response. -_REDACTED = "***" _SENSITIVE_PARAM_KEYS = ("credential", "credentials", "password", "token", "api_key") -_PROXY_USERINFO_RE = re.compile(r"(?<=://)[^/@]+@") def _redact_parameters(parameters: Dict[str, Any]) -> Dict[str, Any]: @@ -37,10 +33,10 @@ def _redact_parameters(parameters: Dict[str, Any]) -> Dict[str, Any]: redacted = dict(parameters) for key in _SENSITIVE_PARAM_KEYS: if redacted.get(key): - redacted[key] = _REDACTED + redacted[key] = REDACTED proxy = redacted.get("proxy") if isinstance(proxy, str) and "@" in proxy: - redacted["proxy"] = _PROXY_USERINFO_RE.sub(f"{_REDACTED}@", proxy) + redacted["proxy"] = URL_USERINFO_RE.sub(f"{REDACTED}@", proxy) return redacted @@ -68,12 +64,7 @@ def _secret_values(parameters: Dict[str, Any]) -> List[str]: def _redact_text(text: Optional[str], parameters: Dict[str, Any]) -> Optional[str]: """Mask proxy userinfo and any known parameter secrets that leaked into a free-text field (error message / details / traceback / worker stderr) before it is returned via the API.""" - if not isinstance(text, str) or not text: - return text - text = _PROXY_USERINFO_RE.sub(f"{_REDACTED}@", text) - for secret in _secret_values(parameters): - text = text.replace(secret, _REDACTED) - return text + return redact_text(text, _secret_values(parameters)) class JobStatus(Enum): @@ -515,7 +506,7 @@ class DownloadQueueManager: self._jobs[job_id] = job self._job_queue.put_nowait(job) - log.info(f"Created download job {job_id} for {_sanitize_log(service)}:{_sanitize_log(title_id)}") + log.info(f"Created download job {job_id} for {sanitize_log(service)}:{sanitize_log(title_id)}") return job def get_job(self, job_id: str) -> Optional[DownloadJob]: @@ -535,27 +526,27 @@ class DownloadQueueManager: if job.status == JobStatus.QUEUED: job.status = JobStatus.CANCELLED job.cancel_event.set() # Signal cancellation - log.info(f"Cancelled queued job {_sanitize_log(job_id)}") + log.info(f"Cancelled queued job {sanitize_log(job_id)}") return True elif job.status == JobStatus.DOWNLOADING: # Set the cancellation event first - this will be checked by the download thread job.cancel_event.set() job.status = JobStatus.CANCELLED - log.info(f"Signaled cancellation for downloading job {_sanitize_log(job_id)}") + log.info(f"Signaled cancellation for downloading job {sanitize_log(job_id)}") # Cancel the active download task task = self._active_downloads.get(job_id) if task: task.cancel() - log.info(f"Cancelled download task for job {_sanitize_log(job_id)}") + log.info(f"Cancelled download task for job {sanitize_log(job_id)}") process = self._download_processes.get(job_id) if process: try: process.terminate() - log.info(f"Terminated worker process for job {_sanitize_log(job_id)}") + log.info(f"Terminated worker process for job {sanitize_log(job_id)}") except ProcessLookupError: - log.debug(f"Worker process for job {_sanitize_log(job_id)} already exited") + log.debug(f"Worker process for job {sanitize_log(job_id)} already exited") return True diff --git a/unshackle/core/api/handlers.py b/unshackle/core/api/handlers.py index 8641e4a..9f3600d 100644 --- a/unshackle/core/api/handlers.py +++ b/unshackle/core/api/handlers.py @@ -7,6 +7,7 @@ from aiohttp import web from unshackle.core.api.errors import APIError, APIErrorCode, handle_api_exception from unshackle.core.api.input_bridge import AuthStatus, InputBridge +from unshackle.core.api.sanitize import sanitize_log from unshackle.core.config import config from unshackle.core.constants import AUDIO_CODEC_MAP, DYNAMIC_RANGE_MAP, VIDEO_CODEC_MAP from unshackle.core.proxies.resolve import initialize_proxy_providers, resolve_proxy @@ -17,11 +18,6 @@ from unshackle.core.tracks import Audio, Subtitle, Video log = logging.getLogger("api") -def sanitize_log(value: object) -> str: - """Sanitize a value for safe logging by removing newlines and control characters.""" - return str(value).replace("\n", "").replace("\r", "").replace("\x00", "") - - DEFAULT_DOWNLOAD_PARAMS = { "profile": None, "quality": [], @@ -126,7 +122,9 @@ def load_full_cdm(service: str, profile: Optional[str], cdm_type: Optional[str] try: return load_cdm(cdm_name, service_name=service) except Exception as exc: # noqa: BLE001 - fall back to stub on load failure - log.warning(f"load_cdm({cdm_name!r}) failed for {service}: {exc}; using lightweight stub") + log.warning( + f"load_cdm({sanitize_log(cdm_name)!r}) failed for {sanitize_log(service)}: {exc}; using lightweight stub" + ) return _resolve_server_cdm(service, profile, cdm_type) @@ -735,7 +733,9 @@ async def list_tracks_handler(data: Dict[str, Any], request: Optional[web.Reques wanted = season_range.parse_tokens(*wanted_param) else: wanted = season_range.parse_tokens(wanted_param) - log.debug(f"Parsed wanted '{wanted_param}' into {len(wanted)} episodes: {wanted[:10]}...") + log.debug( + f"Parsed wanted '{sanitize_log(wanted_param)}' into {len(wanted)} episodes: {wanted[:10]}..." + ) except (Exception, SystemExit) as e: raise APIError( APIErrorCode.INVALID_PARAMETERS, @@ -1910,13 +1910,13 @@ def _resolve_handler_proxy(data: Dict[str, Any], normalized_service: str) -> tup server_region = None if server_region and server_region == client_region.lower(): - log.info(f"Server already in client region '{client_region}', no proxy needed") + log.info(f"Server already in client region '{sanitize_log(client_region)}', no proxy needed") else: try: proxy_param = resolve_proxy(client_region, proxy_providers) - log.info(f"Using server proxy for client region '{client_region}'") + log.info(f"Using server proxy for client region '{sanitize_log(client_region)}'") except ValueError: - log.debug(f"No server proxy available for client region '{client_region}'") + log.debug(f"No server proxy available for client region '{sanitize_log(client_region)}'") return proxy_param, proxy_providers @@ -2299,9 +2299,9 @@ async def session_license_handler( if track_drm_type: actual_drm_type = track_drm_type except SystemExit: - log.warning(f"Service exited while resolving keys for track {tid[:12]}, skipping") + log.warning(f"Service exited while resolving keys for track {sanitize_log(tid[:12])}, skipping") except (Exception, SystemExit) as e: - log.warning(f"Failed to resolve keys for track {tid[:12]}: {e}") + log.warning(f"Failed to resolve keys for track {sanitize_log(tid[:12])}: {e}") response: Dict[str, Any] = {"keys": all_keys} if actual_drm_type: @@ -2347,7 +2347,7 @@ async def session_license_handler( if mode == "server_cdm": keys = _handle_single_server_cdm(service, title, track, pssh_b64, drm_type, request) - log.info(f"Server CDM resolved {len(keys)} key(s) for track {track_id[:12]}") + log.info(f"Server CDM resolved {len(keys)} key(s) for track {sanitize_log(track_id[:12])}") return web.json_response({"keys": keys}) return _handle_proxy_license(service, title, track, challenge_b64, drm_type) @@ -2357,7 +2357,7 @@ async def session_license_handler( except SystemExit: raise APIError(APIErrorCode.SERVICE_ERROR, "Service exited during license request") except (Exception, SystemExit) as e: - log.exception(f"Error proxying license for track {track_id}") + log.exception(f"Error proxying license for track {sanitize_log(track_id)}") debug_mode = request.app.get("debug_api", False) if request else False return handle_api_exception( e, diff --git a/unshackle/core/api/sanitize.py b/unshackle/core/api/sanitize.py new file mode 100644 index 0000000..81e4a0e --- /dev/null +++ b/unshackle/core/api/sanitize.py @@ -0,0 +1,8 @@ +"""Helpers for safely logging user-provided values.""" + +from __future__ import annotations + + +def sanitize_log(value: object) -> str: + """Sanitize a value for safe logging by removing newlines and control characters.""" + return str(value).replace("\n", "").replace("\r", "").replace("\x00", "") diff --git a/unshackle/core/api/session_store.py b/unshackle/core/api/session_store.py index 3ae27b1..5c27bfb 100644 --- a/unshackle/core/api/session_store.py +++ b/unshackle/core/api/session_store.py @@ -15,6 +15,7 @@ from datetime import datetime, timezone from typing import Any, Dict, List, Optional from unshackle.core.api.input_bridge import AuthStatus, InputBridge +from unshackle.core.api.sanitize import sanitize_log from unshackle.core.config import config from unshackle.core.tracks import Track @@ -84,7 +85,7 @@ class SessionStore: service_instance=service_instance, ) self._sessions[session_id] = entry - log.info(f"Created session {session_id} for service {service_tag}") + log.info(f"Created session {sanitize_log(session_id)} for service {sanitize_log(service_tag)}") return entry async def get(self, session_id: str) -> Optional[SessionEntry]: @@ -97,7 +98,9 @@ class SessionStore: if entry.auth_status not in (AuthStatus.AUTHENTICATING, AuthStatus.PENDING_INPUT): elapsed = (datetime.now(timezone.utc) - entry.last_accessed).total_seconds() if elapsed > self._ttl: - log.info(f"Session {session_id} expired (elapsed={elapsed:.0f}s, ttl={self._ttl}s)") + log.info( + f"Session {sanitize_log(session_id)} expired (elapsed={elapsed:.0f}s, ttl={self._ttl}s)" + ) del self._sessions[session_id] return None @@ -112,7 +115,7 @@ class SessionStore: if entry.input_bridge: entry.input_bridge.cancel() self._cleanup_cache_dir(entry.cache_tag) - log.info(f"Deleted session {session_id}") + log.info(f"Deleted session {sanitize_log(session_id)}") return True return False diff --git a/unshackle/core/drm/playready.py b/unshackle/core/drm/playready.py index 09d0756..5328ddf 100644 --- a/unshackle/core/drm/playready.py +++ b/unshackle/core/drm/playready.py @@ -80,7 +80,8 @@ class PlayReady: def _extract_kids_from_pssh_b64(self, pssh_b64: str) -> list[UUID]: """Extract all KIDs from base64-encoded PSSH data.""" try: - import xml.etree.ElementTree as ET + # PSSH XML comes from third-party manifests; defusedxml guards against entity expansion + import defusedxml.ElementTree as ET # Decode the PSSH pssh_bytes = base64.b64decode(pssh_b64) diff --git a/unshackle/core/remote_service.py b/unshackle/core/remote_service.py index b725f65..39b1a52 100644 --- a/unshackle/core/remote_service.py +++ b/unshackle/core/remote_service.py @@ -31,9 +31,18 @@ from unshackle.core.titles.movie import Movie, Movies from unshackle.core.tracks import Audio, Chapter, Chapters, Subtitle, Tracks, Video from unshackle.core.tracks.attachment import Attachment from unshackle.core.tracks.track import Track +from unshackle.core.utils.redact import redact_text, safe_display_url log = logging.getLogger("remote_service") +SENSITIVE_DATA_KEYS = ("credential", "credentials", "password", "token", "api_key") + + +def redact_secrets(text: str, data: Optional[Dict[str, Any]] = None) -> str: + """Mask URL userinfo and any request-payload secrets before the text is logged.""" + secrets = [v for k in SENSITIVE_DATA_KEYS if isinstance(v := (data or {}).get(k), str) and v] + return redact_text(text, secrets) or "" + class RemoteClient: """HTTP client for the unshackle serve API.""" @@ -59,14 +68,15 @@ class RemoteClient: try: resp = getattr(self.session, method)(url, json=data, timeout=120 if method == "post" else 30) except requests.ConnectionError: - log.error(f"Could not connect to remote server at {self.server_url}. Is it running? (unshackle serve)") + server_url = safe_display_url(self.server_url) + log.error(f"Could not connect to remote server at {server_url}. Is it running? (unshackle serve)") raise SystemExit(1) except requests.Timeout: log.error(f"Request to remote server timed out: {endpoint}") raise SystemExit(1) result = resp.json() if resp.status_code >= 400: - error_msg = result.get("message", resp.text) + error_msg = redact_secrets(str(result.get("message", resp.text)), data) error_code = result.get("error_code", "UNKNOWN") log.error(f"Server error [{error_code}]: {error_msg}") raise SystemExit(1) diff --git a/unshackle/core/utilities.py b/unshackle/core/utilities.py index 69ed7c8..55c6b9f 100644 --- a/unshackle/core/utilities.py +++ b/unshackle/core/utilities.py @@ -30,6 +30,7 @@ from unidecode import unidecode from unshackle.core.config import config from unshackle.core.constants import LANGUAGE_EXACT_DISTANCE, LANGUAGE_MAX_DISTANCE +from unshackle.core.utils.redact import redact_text """ Utility functions for the unshackle media archival tool. @@ -836,7 +837,7 @@ class DebugLogger: if operation: entry["operation"] = operation if message: - entry["message"] = message + entry["message"] = redact_text(message) if service: entry["service"] = service if context: @@ -853,8 +854,10 @@ class DebugLogger: if error: entry["error"] = { "type": type(error).__name__, - "message": str(error), - "traceback": traceback.format_exception(type(error), error, error.__traceback__), + "message": redact_text(str(error)), + "traceback": [ + redact_text(line) for line in traceback.format_exception(type(error), error, error.__traceback__) + ], } for key, value in kwargs.items(): @@ -875,7 +878,12 @@ class DebugLogger: if data is None: return None - if isinstance(data, (str, int, float, bool)): + if isinstance(data, str): + # mask URL userinfo and secret-bearing query params that key-based + # redaction can't catch (e.g. a proxy URL inside a non-sensitive field) + return redact_text(data) + + if isinstance(data, (int, float, bool)): return data if isinstance(data, (list, tuple)): diff --git a/unshackle/core/utils/redact.py b/unshackle/core/utils/redact.py new file mode 100644 index 0000000..ebb04da --- /dev/null +++ b/unshackle/core/utils/redact.py @@ -0,0 +1,39 @@ +"""Helpers for masking secrets in text destined for logs or API responses.""" + +from __future__ import annotations + +import re +from typing import Iterable, Optional +from urllib.parse import urlsplit, urlunsplit + +REDACTED = "***" + +# user:pass@ userinfo embedded in any URL (proxy URLs, remote server URLs) +URL_USERINFO_RE = re.compile(r"(?<=://)[^/@]+@") + +# secret-bearing query parameters in URLs that end up in free text +SENSITIVE_QUERY_PARAM_RE = re.compile(r"(?i)\b(password|passwd|pwd|token|api_key|apikey|secret|auth)=([^&#\s\"']+)") + + +def redact_text(text: Optional[str], secrets: Iterable[str] = ()) -> Optional[str]: + """ + Mask URL userinfo, secret-bearing query parameters, and any known secret + strings inside a free-text value before it is logged or serialized. + """ + if not isinstance(text, str) or not text: + return text + text = URL_USERINFO_RE.sub(f"{REDACTED}@", text) + text = SENSITIVE_QUERY_PARAM_RE.sub(rf"\1={REDACTED}", text) + # longest first so substrings don't survive partial replacement + for secret in sorted({s for s in secrets if isinstance(s, str) and s}, key=len, reverse=True): + text = text.replace(secret, REDACTED) + return text + + +def safe_display_url(url: str) -> str: + """Rebuild a URL from its scheme/host/port/path only, so userinfo can never be logged.""" + parts = urlsplit(url) + netloc = parts.hostname or "" + if parts.port: + netloc += f":{parts.port}" + return urlunsplit((parts.scheme, netloc, parts.path, "", "")) diff --git a/uv.lock b/uv.lock index 25680ee..ff9f7bb 100644 --- a/uv.lock +++ b/uv.lock @@ -456,6 +456,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/94/35/386550fd60316d1e37eccdda609b074113298f23cef5bddb2049823fe666/dacite-1.9.2-py3-none-any.whl", hash = "sha256:053f7c3f5128ca2e9aceb66892b1a3c8936d02c686e707bee96e19deef4bc4a0", size = 16600, upload-time = "2025-02-05T09:27:24.345Z" }, ] +[[package]] +name = "defusedxml" +version = "0.7.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/0f/d5/c66da9b79e5bdb124974bfe172b4daf3c984ebd9c2a06e2b8a4dc7331c72/defusedxml-0.7.1.tar.gz", hash = "sha256:1bb3032db185915b62d7c6209c5a8792be6a32ab2fedacc84e01b52c51aa3e69", size = 75520, upload-time = "2021-03-08T10:59:26.269Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/07/6c/aa3f2f849e01cb6a001cd8554a88d4c77c5c1a31c95bdf1cf9301e6d9ef4/defusedxml-0.7.1-py2.py3-none-any.whl", hash = "sha256:a352e7e428770286cc899e2542b6cdaedb2b4953ff269a210103ec58f6198a61", size = 25604, upload-time = "2021-03-08T10:59:24.45Z" }, +] + [[package]] name = "distlib" version = "0.4.0" @@ -1766,6 +1775,7 @@ dependencies = [ { name = "construct" }, { name = "crccheck" }, { name = "cryptography" }, + { name = "defusedxml" }, { name = "filelock" }, { name = "fonttools" }, { name = "jsonpickle" }, @@ -1831,6 +1841,7 @@ requires-dist = [ { name = "construct", specifier = ">=2.8.8,<3" }, { name = "crccheck", specifier = ">=1.3.0,<2" }, { name = "cryptography", specifier = ">=45.0.0,<47" }, + { name = "defusedxml", specifier = ">=0.7.1" }, { name = "filelock", specifier = ">=3.20.3,<4" }, { name = "fonttools", specifier = ">=4.60.2,<5" }, { name = "jsonpickle", specifier = ">=3.0.4,<5" },