From 466bf610cc9bbeb210950497135d208eb27e3554 Mon Sep 17 00:00:00 2001 From: imSp4rky Date: Thu, 11 Jun 2026 12:26:57 -0600 Subject: [PATCH] feat(drm): add native DASH ClearKey (org.w3.clearkey) support unshackle's DASH parser only recognised Widevine and PlayReady ContentProtection, so services using W3C EME ClearKey had to fake a Widevine object and monkey-patch get_content_keys. Add a first-class ClearKeyCENC DRM type so services just implement a license callback. - ClearKeyCENC (core/drm/clearkey_cenc.py): KID-based, no CDM/PSSH; builds the W3C JSON license request (unpadded base64url), parses the JWK Set response (dict/str/bytes), falls back to POSTing the manifest Laurl when the service returns None, decrypts via the same shaka/ mp4decrypt CENC path as Widevine - DASH.get_drm emits ClearKeyCENC for scheme e2719d58-...; KID from own or sibling mp4protection cenc:default_KID, Laurl across dashif/legacy/ bare namespaces - track.download dispatches prepare_drm for ClearKeyCENC; dl.prepare_drm gains a clearkey branch (cache/vault lookup, license-failure tolerated when content_keys pre-populated, vault push, export) - Service.get_clearkey_license base callback (default None -> Laurl); drm_from_dict reconstructs ClearKeyCENC for export/import round-trip - EXAMPLE service + config demo the callback - Tests: tests/core/test_clearkey_cenc.py and an export round-trip case - Docs: DRM_CONFIG.md ClearKey section --- docs/DRM_CONFIG.md | 28 +++ tests/core/test_clearkey_cenc.py | 215 +++++++++++++++++ tests/core/test_export.py | 21 ++ unshackle/commands/dl.py | 146 ++++++++++- unshackle/core/drm/__init__.py | 23 +- unshackle/core/drm/clearkey_cenc.py | 320 +++++++++++++++++++++++++ unshackle/core/manifests/dash.py | 50 +++- unshackle/core/service.py | 21 ++ unshackle/core/tracks/track.py | 9 +- unshackle/services/EXAMPLE/__init__.py | 19 +- unshackle/services/EXAMPLE/config.yaml | 1 + 11 files changed, 836 insertions(+), 17 deletions(-) create mode 100644 tests/core/test_clearkey_cenc.py create mode 100644 unshackle/core/drm/clearkey_cenc.py diff --git a/docs/DRM_CONFIG.md b/docs/DRM_CONFIG.md index 44ca027..5ee3e9e 100644 --- a/docs/DRM_CONFIG.md +++ b/docs/DRM_CONFIG.md @@ -344,6 +344,34 @@ that use MonaLisa handle ticket/key retrieval and CDM initialization internally. --- +## ClearKey DRM + +Two distinct ClearKey mechanisms are supported; neither needs a CDM device or any DRM config: + +### HLS AES-128 ClearKey + +The key is fetched from (or near) the M3U8 `EXT-X-KEY` URI and segments are decrypted with +pure-Python AES-CBC. Fully automatic — nothing to configure. + +### DASH ClearKey (`org.w3.clearkey`) + +W3C EME ClearKey for DASH CENC content. The DASH parser recognises the clearkey +ContentProtection scheme (`urn:uuid:e2719d58-a985-b3c9-781a-b030af78d30e`), takes the KID from +`cenc:default_KID`, and reads the license server URL from the manifest's `` element when +present. + +License flow: the W3C JSON license request (`{"kids": [...], "type": "temporary"}`) is POSTed to +the license server, which returns the content key as a JWK Set. Keys land in the same vault and +`--export` paths as Widevine/PlayReady, and decryption uses the same shaka-packager/mp4decrypt +CENC backends (`decryption` config option applies). + +Service integration (simplest first): +1. Manifest carries a `` — works with zero service code. +2. Custom endpoint/headers — service overrides `get_clearkey_license`. +3. Bespoke key delivery — service pre-populates the DRM object's keys in `get_tracks`. + +--- + ## key_vaults (list\[dict]) Key Vaults store your obtained Content Encryption Keys (CEKs) and Key IDs per-service. diff --git a/tests/core/test_clearkey_cenc.py b/tests/core/test_clearkey_cenc.py new file mode 100644 index 0000000..6fbb319 --- /dev/null +++ b/tests/core/test_clearkey_cenc.py @@ -0,0 +1,215 @@ +"""Tests for the DASH/W3C EME ClearKey (``org.w3.clearkey``) DRM system. + +Covers the three seams of the native ClearKey flow: +- ``DASH.get_drm`` emitting a ``ClearKeyCENC`` from a clearkey ContentProtection + element (KID from own attrs or sibling mp4protection, Laurl namespace variants) +- ``ClearKeyCENC.get_content_keys`` building the W3C JSON license request and + parsing the JWK Set response (dict/str/bytes, unpadded base64url) +- ``to_dict`` / ``drm_from_dict`` round-trip for the --export/import path +""" + +from __future__ import annotations + +import base64 +import json +from typing import Any, Optional +from uuid import UUID + +import pytest +# lxml.etree: XML parser used to build ContentProtection fixtures for DASH.get_drm +from lxml import etree + +from unshackle.core.drm import drm_from_dict +from unshackle.core.drm.clearkey_cenc import ClearKeyCENC +from unshackle.core.manifests.dash import DASH + +KID = UUID("9eb4050d-e44b-4802-932e-27d75083e266") +KEY = bytes.fromhex("ccd0064c43f7e9fcbaa9b12af3fd1f40") +LAURL = "https://license.example.test/clearkey" + +CLEARKEY_URN = "urn:uuid:e2719d58-a985-b3c9-781a-b030af78d30e" +CENC_NS = "urn:mpeg:cenc:2013" + + +def b64url_nopad(data: bytes) -> str: + return base64.urlsafe_b64encode(data).rstrip(b"=").decode("ascii") + + +def jwk_set() -> dict: + return { + "keys": [{"kty": "oct", "kid": b64url_nopad(KID.bytes), "k": b64url_nopad(KEY)}], + "type": "temporary", + } + + +def protection(xml: str) -> etree._Element: + return etree.fromstring(xml.encode("utf8")) + + +class StubLicence: + """Callable license stub that records the challenge it was given.""" + + def __init__(self, response: Any) -> None: + self.response = response + self.challenge: Optional[bytes] = None + + def __call__(self, *, challenge: bytes) -> Any: + self.challenge = challenge + return self.response + + +class StubResponse: + def __init__(self, content: bytes) -> None: + self.content = content + + def raise_for_status(self) -> None: + return None + + +class StubSession: + """Records the POST that the laurl fallback makes.""" + + def __init__(self, content: bytes) -> None: + self.content = content + self.url: Optional[str] = None + self.data: Optional[bytes] = None + + def post(self, url: str, data: Any = None, **_: Any) -> StubResponse: + self.url = url + self.data = data + return StubResponse(self.content) + + +def test_get_drm_parses_clearkey_contentprotection() -> None: + elem = protection( + f'' + f"{LAURL}" + f"" + ) + + drm = DASH.get_drm([elem]) + + assert len(drm) == 1 + assert isinstance(drm[0], ClearKeyCENC) + assert drm[0].kids == [KID] + assert drm[0].laurl == LAURL + + +@pytest.mark.parametrize( + "laurl_xml", + [ + f'{LAURL}', + f'{LAURL}', + f"{LAURL}", + ], + ids=["dashif-cps", "legacy-clearkey-ns", "bare-lowercase"], +) +def test_get_drm_clearkey_laurl_variants(laurl_xml: str) -> None: + elem = protection( + f'' + f"{laurl_xml}" + f"" + ) + + drm = DASH.get_drm([elem]) + + assert len(drm) == 1 + assert drm[0].laurl == LAURL + + +def test_get_drm_clearkey_kid_from_sibling_mp4protection() -> None: + # Canonical DASH-IF shape: default_KID on the mp4protection element only. + clearkey = protection(f'') + mp4protection = protection( + f'' + ) + + drm = DASH.get_drm([mp4protection, clearkey]) + + assert len(drm) == 1 + assert isinstance(drm[0], ClearKeyCENC) + assert drm[0].kids == [KID] + assert drm[0].laurl is None + + +def test_get_drm_clearkey_without_any_kid_is_skipped() -> None: + elem = protection(f'') + assert DASH.get_drm([elem]) == [] + + +@pytest.mark.parametrize( + "shape", + ["dict", "str", "bytes"], +) +def test_get_content_keys_parses_jwk_set(shape: str) -> None: + response: Any = jwk_set() + if shape == "str": + response = json.dumps(response) + elif shape == "bytes": + response = json.dumps(response).encode("utf8") + + drm = ClearKeyCENC(kids=[KID]) + drm.get_content_keys(licence=StubLicence(response)) + + assert drm.content_keys == {KID: KEY.hex()} + + +def test_get_content_keys_challenge_shape() -> None: + licence = StubLicence(jwk_set()) + drm = ClearKeyCENC(kids=[KID]) + drm.get_content_keys(licence=licence) + + assert licence.challenge is not None + request = json.loads(licence.challenge.decode("utf8")) + assert request == {"kids": [b64url_nopad(KID.bytes)], "type": "temporary"} + # W3C EME mandates unpadded base64url key IDs + assert all("=" not in kid for kid in request["kids"]) + + +def test_get_content_keys_laurl_fallback() -> None: + session = StubSession(json.dumps(jwk_set()).encode("utf8")) + drm = ClearKeyCENC(kids=[KID], laurl=LAURL) + drm.get_content_keys(licence=StubLicence(None), session=session) + + assert session.url == LAURL + assert session.data is not None + assert json.loads(session.data.decode("utf8"))["type"] == "temporary" + assert drm.content_keys == {KID: KEY.hex()} + + +def test_get_content_keys_no_response_raises_empty_license() -> None: + drm = ClearKeyCENC(kids=[KID]) + with pytest.raises(ClearKeyCENC.Exceptions.EmptyLicense): + drm.get_content_keys(licence=StubLicence(None)) + + +def test_get_content_keys_missing_kid_raises_cek_not_found() -> None: + other_kid = UUID(int=7) + response = {"keys": [{"kty": "oct", "kid": b64url_nopad(other_kid.bytes), "k": b64url_nopad(KEY)}]} + drm = ClearKeyCENC(kids=[KID]) + with pytest.raises(ClearKeyCENC.Exceptions.CEKNotFound): + drm.get_content_keys(licence=StubLicence(response)) + + +def test_get_content_keys_skips_when_already_keyed() -> None: + licence = StubLicence(jwk_set()) + drm = ClearKeyCENC(kids=[KID], content_keys={KID: KEY.hex()}) + drm.get_content_keys(licence=licence) + + assert licence.challenge is None # no license round-trip needed + + +def test_to_dict_roundtrip() -> None: + drm = ClearKeyCENC(kids=[KID], laurl=LAURL, content_keys={KID: KEY.hex()}) + data = drm.to_dict() + assert data["system"] == "ClearKeyCENC" + + data["content_keys"] = {kid.hex: key for kid, key in drm.content_keys.items()} + rebuilt = drm_from_dict(data) + + assert isinstance(rebuilt, ClearKeyCENC) + assert rebuilt.kids == [KID] + assert rebuilt.laurl == LAURL + assert rebuilt.content_keys == {KID: KEY.hex()} diff --git a/tests/core/test_export.py b/tests/core/test_export.py index c6118d1..d393e0a 100644 --- a/tests/core/test_export.py +++ b/tests/core/test_export.py @@ -14,7 +14,9 @@ from types import SimpleNamespace from uuid import UUID from unshackle.commands.dl import dl +from unshackle.core.drm import drm_from_dict from unshackle.core.drm.clearkey import ClearKey +from unshackle.core.drm.clearkey_cenc import ClearKeyCENC from unshackle.core.import_service import ImportService from unshackle.core.titles import Movie from unshackle.core.tracks import Audio, Chapter, Subtitle, Video @@ -162,6 +164,25 @@ def test_drm_free_export_roundtrips_through_import_service(tmp_path: Path) -> No assert [c.name for c in svc.get_chapters(movie)] == [None, "Intro"] +def test_clearkey_cenc_exports_drm_and_keys(tmp_path: Path) -> None: + """A licensed ClearKeyCENC exports its system dict and KID:KEY map, and the + exported DRM dict plus keys rebuild a decrypt-ready instance via drm_from_dict.""" + export = tmp_path / "export.json" + title = make_title() + video = title.tracks.videos[0] + drm = ClearKeyCENC(kids=[KID], laurl="https://license.example.test/ck", content_keys={KID: "cc" * 16}) + + make_dl().write_export(export, title, video, drm) + + track = read_export(export)["titles"]["movie-1"]["tracks"]["v1"] + assert track["drm"] == [{"system": "ClearKeyCENC", "kids": [KID.hex], "laurl": "https://license.example.test/ck"}] + assert track["keys"] == {KID.hex: "cc" * 16} + + rebuilt = drm_from_dict({**track["drm"][0], "content_keys": track["keys"]}) + assert isinstance(rebuilt, ClearKeyCENC) + assert rebuilt.content_keys == {KID: "cc" * 16} + + def test_keyless_content_keys_writes_no_keys_entry(tmp_path: Path) -> None: """A DRM object with empty content_keys must not create an empty keys map.""" export = tmp_path / "export.json" diff --git a/unshackle/commands/dl.py b/unshackle/commands/dl.py index be6477a..4b14dc6 100644 --- a/unshackle/commands/dl.py +++ b/unshackle/commands/dl.py @@ -46,7 +46,7 @@ from unshackle.core.config import config from unshackle.core.console import console from unshackle.core.constants import DOWNLOAD_CANCELLED, DOWNLOAD_LICENCE_ONLY, AnyTrack, context_settings from unshackle.core.credential import Credential -from unshackle.core.drm import DRM_T, MonaLisa, PlayReady, Widevine +from unshackle.core.drm import DRM_T, ClearKeyCENC, MonaLisa, PlayReady, Widevine from unshackle.core.events import events from unshackle.core.proxies import Basic, Gluetun, Hola, NordVPN, SurfsharkVPN, WindscribeVPN from unshackle.core.service import Service @@ -2320,6 +2320,11 @@ class dl: title=title, track=track, ), + clearkey_licence=partial( + service.get_clearkey_license, + title=title, + track=track, + ), cdm_only=cdm_only, vaults_only=vaults_only, export=export_path, @@ -2328,7 +2333,7 @@ class dl: max_workers=workers, progress=tracks_progress_callables[i], ) - # DRM-free and ClearKey tracks never reach prepare_drm, so export here. + # DRM-free and HLS-ClearKey tracks never reach prepare_drm, so export here. # drm=None on purpose: licensed tracks already recorded their DRM/keys # in prepare_drm, and write_export merges via setdefault. if export_path: @@ -3004,6 +3009,7 @@ class dl: title: Title_T, certificate: Callable, licence: Callable, + clearkey_licence: Optional[Callable] = None, track_kid: Optional[UUID] = None, table: Table = None, cdm_only: bool = False, @@ -3456,6 +3462,142 @@ class dl: if export: self.write_export(export, title, track, drm) + elif isinstance(drm, ClearKeyCENC): + with self.DRM_TABLE_LOCK: + cek_tree = Tree(Text.assemble(("ClearKey", "cyan"), overflow="fold")) + pre_existing_tree = next( + (x for x in table.columns[0].cells if isinstance(x, Tree) and x.label == cek_tree.label), None + ) + if pre_existing_tree: + cek_tree = pre_existing_tree + + need_license = False + all_kids = list(drm.kids) + if track_kid and track_kid not in all_kids: + all_kids.append(track_kid) + + for kid in all_kids: + if kid in drm.content_keys: + is_track_kid = ["", "*"][kid == track_kid] + key = drm.content_keys[kid] + label = f"[text2]{kid.hex}:{key}{is_track_kid}" + if not any(f"{kid.hex}:{key}" in x.label for x in cek_tree.children): + cek_tree.add(label) + continue + + is_track_kid = ["", "*"][kid == track_kid] + + cached_key = self.LICENSE_KEY_CACHE.get(kid) + if cached_key: + drm.content_keys[kid] = cached_key + label = f"[text2]{kid.hex}:{cached_key}{is_track_kid} from cache" + if not any(f"{kid.hex}:{cached_key}" in x.label for x in cek_tree.children): + cek_tree.add(label) + log_event( + "license_cache_hit", + level="INFO", + service=self.service, + context={ + "kid": kid.hex, + "content_key": cached_key, + "track": str(track), + "drm_type": "ClearKeyCENC", + }, + ) + continue + + if not cdm_only: + content_key, vault_used = self.vaults.get_key(kid) + if content_key: + drm.content_keys[kid] = content_key + label = f"[text2]{kid.hex}:{content_key}{is_track_kid} from {vault_used}" + if not any(f"{kid.hex}:{content_key}" in x.label for x in cek_tree.children): + cek_tree.add(label) + self.vaults.add_key(kid, content_key, excluding=vault_used) + self.LICENSE_KEY_CACHE[kid] = content_key + elif vaults_only: + msg = f"No Vault has a Key for {kid.hex} and --vaults-only was used" + cek_tree.add(f"[logging.level.error]{msg}") + if not pre_existing_tree: + table.add_row(cek_tree) + log_event( + "vault_key_not_found", + level="ERROR", + service=self.service, + message=msg, + context={"kid": kid.hex, "track": str(track), "drm_type": "ClearKeyCENC"}, + ) + raise ClearKeyCENC.Exceptions.CEKNotFound(msg) + else: + need_license = True + + if kid not in drm.content_keys and cdm_only: + need_license = True + + if need_license and all(kid in drm.content_keys for kid in all_kids): + need_license = False + + if need_license and not vaults_only: + from_vaults = drm.content_keys.copy() + + try: + drm.get_content_keys(licence=clearkey_licence or (lambda **_: None)) + except Exception as e: + if drm.content_keys: + self.log.debug(f"License call failed but keys already in content_keys: {e}") + else: + if isinstance( + e, (ClearKeyCENC.Exceptions.EmptyLicense, ClearKeyCENC.Exceptions.CEKNotFound) + ): + msg = str(e) + else: + msg = f"An exception occurred in the Service's license function: {e}" + cek_tree.add(f"[logging.level.error]{msg}") + if not pre_existing_tree: + table.add_row(cek_tree) + if self.debug_logger: + self.debug_logger.log_error( + "get_license_clearkey", + e, + service=self.service, + context={ + "track": str(track), + "exception_type": type(e).__name__, + "drm_type": "ClearKeyCENC", + }, + ) + raise e + + for kid_, key in drm.content_keys.items(): + is_track_kid_marker = ["", "*"][kid_ == track_kid] + label = f"[text2]{kid_.hex}:{key}{is_track_kid_marker}" + if not any(f"{kid_.hex}:{key}" in x.label for x in cek_tree.children): + cek_tree.add(label) + + drm.content_keys.update(from_vaults) + + self.LICENSE_KEY_CACHE.update(drm.content_keys) + + successful_caches = self.vaults.add_keys(drm.content_keys) + self.log.info( + f"Cached {len(drm.content_keys)} Key{'' if len(drm.content_keys) == 1 else 's'} to " + f"{successful_caches}/{len(self.vaults)} Vaults" + ) + + if track_kid and track_kid not in drm.content_keys: + msg = f"No Content Key for KID {track_kid.hex} was returned in the License" + cek_tree.add(f"[logging.level.error]{msg}") + if not pre_existing_tree: + table.add_row(cek_tree) + raise ClearKeyCENC.Exceptions.CEKNotFound(msg) + + if cek_tree.children and not pre_existing_tree: + table.add_row() + table.add_row(cek_tree) + + if export: + self.write_export(export, title, track, drm) + elif isinstance(drm, MonaLisa): with self.DRM_TABLE_LOCK: display_id = drm.content_id or drm.pssh diff --git a/unshackle/core/drm/__init__.py b/unshackle/core/drm/__init__.py index bfe126d..d9893e2 100644 --- a/unshackle/core/drm/__init__.py +++ b/unshackle/core/drm/__init__.py @@ -3,31 +3,34 @@ from typing import Any, Union from uuid import UUID from unshackle.core.drm.clearkey import ClearKey +from unshackle.core.drm.clearkey_cenc import ClearKeyCENC from unshackle.core.drm.monalisa import MonaLisa from unshackle.core.drm.playready import PlayReady from unshackle.core.drm.widevine import Widevine -DRM_T = Union[ClearKey, Widevine, PlayReady, MonaLisa] +DRM_T = Union[ClearKey, ClearKeyCENC, Widevine, PlayReady, MonaLisa] -def drm_from_dict(data: dict[str, Any]) -> Union[Widevine, PlayReady]: - """Reconstruct a Widevine/PlayReady DRM instance from its ``to_dict()`` form. +def drm_from_dict(data: dict[str, Any]) -> Union[Widevine, PlayReady, ClearKeyCENC]: + """Reconstruct a Widevine/PlayReady/ClearKeyCENC DRM instance from its ``to_dict()`` form. - Rebuilds the PSSH from the stored base64 and re-injects any saved content keys - so the resulting object can decrypt without contacting a license server. + Rebuilds the PSSH from the stored base64 (KIDs for ClearKey, which has no PSSH) + and re-injects any saved content keys so the resulting object can decrypt without + contacting a license server. """ system = data.get("system") pssh_b64 = data.get("pssh_b64") kids = data.get("kids") or [] content_keys = data.get("content_keys") or {} - if not pssh_b64: + if system == "ClearKeyCENC": + drm: Union[Widevine, PlayReady, ClearKeyCENC] = ClearKeyCENC(kids=kids, laurl=data.get("laurl")) + elif not pssh_b64: raise ValueError("Cannot reconstruct DRM without a stored PSSH.") - - if system == "PlayReady": + elif system == "PlayReady": from pyplayready.system.pssh import PSSH as PlayReadyPSSH - drm: Union[Widevine, PlayReady] = PlayReady(pssh=PlayReadyPSSH(base64.b64decode(pssh_b64)), pssh_b64=pssh_b64) + drm = PlayReady(pssh=PlayReadyPSSH(base64.b64decode(pssh_b64)), pssh_b64=pssh_b64) elif system == "Widevine": from pywidevine.pssh import PSSH as WidevinePSSH @@ -41,4 +44,4 @@ def drm_from_dict(data: dict[str, Any]) -> Union[Widevine, PlayReady]: return drm -__all__ = ("ClearKey", "Widevine", "PlayReady", "MonaLisa", "DRM_T", "drm_from_dict") +__all__ = ("ClearKey", "ClearKeyCENC", "Widevine", "PlayReady", "MonaLisa", "DRM_T", "drm_from_dict") diff --git a/unshackle/core/drm/clearkey_cenc.py b/unshackle/core/drm/clearkey_cenc.py new file mode 100644 index 0000000..5c07434 --- /dev/null +++ b/unshackle/core/drm/clearkey_cenc.py @@ -0,0 +1,320 @@ +from __future__ import annotations + +import base64 +import json +import shutil +import subprocess +import textwrap +import time +from pathlib import Path +from typing import Any, Callable, Iterable, Optional, Union +from uuid import UUID + +from requests import Session +from rich.text import Text + +from unshackle.core import binaries +from unshackle.core.config import config +from unshackle.core.console import console +from unshackle.core.utilities import log_event + + +def b64url_encode_nopad(data: bytes) -> str: + # W3C EME uses base64url without padding for key IDs and key values + return base64.urlsafe_b64encode(data).rstrip(b"=").decode("ascii") + + +def b64url_decode(data: str) -> bytes: + return base64.urlsafe_b64decode(data + "=" * (-len(data) % 4)) + + +class ClearKeyCENC: + """W3C EME ClearKey (org.w3.clearkey) DRM System over MPEG-CENC content. + + Distinct from the HLS AES-128 `ClearKey` class: keys here are delivered by a + license server as a JWK Set keyed by KID, and content is standard CENC + (decrypted with shaka-packager/mp4decrypt KID:KEY pairs, same as Widevine). + """ + + urn = "urn:uuid:e2719d58-a985-b3c9-781a-b030af78d30e" + + def __init__( + self, + kids: Iterable[Union[UUID, str, bytes]], + laurl: Optional[str] = None, + content_keys: Optional[dict[UUID, str]] = None, + **kwargs: Any, + ): + kid_list: list[UUID] = [] + for kid in kids or []: + if isinstance(kid, str): + kid = UUID(hex=kid) + elif isinstance(kid, bytes): + kid = UUID(bytes=kid) + if not isinstance(kid, UUID): + raise ValueError(f"Expected kid to be a {UUID}, str, or bytes, not {kid!r}") + kid_list.append(kid) + if not kid_list: + raise ClearKeyCENC.Exceptions.KIDNotFound("No Key ID was provided.") + + self.kids: list[UUID] = kid_list + self.laurl: Optional[str] = laurl + self.content_keys: dict[UUID, str] = dict(content_keys or {}) + self.data: dict = kwargs or {} + + @property + def kid(self) -> Optional[UUID]: + """Get first Key ID, if any.""" + return next(iter(self.kids), None) + + def to_dict(self) -> dict[str, Any]: + """Serialise this DRM instance for export/import (KIDs + license URL). + + Content keys are stored once at the export's track level, not duplicated here. + """ + data: dict[str, Any] = { + "system": "ClearKeyCENC", + "kids": [kid.hex for kid in self.kids], + } + if self.laurl: + data["laurl"] = self.laurl + return data + + def get_license_challenge(self) -> bytes: + """Build the W3C EME ClearKey JSON license request for the unkeyed KIDs.""" + kids = [kid for kid in self.kids if kid not in self.content_keys] or self.kids + request = {"kids": [b64url_encode_nopad(kid.bytes) for kid in kids], "type": "temporary"} + return json.dumps(request).encode("utf8") + + def get_content_keys(self, *, licence: Callable, session: Optional[Session] = None) -> None: + """ + Obtain Content Keys for this DRM Instance from a ClearKey license server. + + The licence param is expected to be a function and will be provided with the + W3C JSON license request as `challenge`. It may return the JWK Set license as + a dict, JSON str, or bytes. If it returns None and the manifest provided a + Laurl, the challenge is POSTed there directly instead. + """ + if all(kid in self.content_keys for kid in self.kids): + return + + challenge = self.get_license_challenge() + + log_event( + "drm_license_request", + level="DEBUG", + message=f"Requesting ClearKey license for {len(self.kids)} KID(s)", + drm_type="ClearKeyCENC", + kids=[kid.hex for kid in self.kids], + challenge_size=len(challenge), + ) + + response = licence(challenge=challenge) + + if response is None and self.laurl: + if not session: + session = Session() + session.headers.update(config.headers) + r = session.post(self.laurl, data=challenge, headers={"Content-Type": "application/json"}) + r.raise_for_status() + response = r.content + + if not response: + raise ClearKeyCENC.Exceptions.EmptyLicense("No ClearKey license was returned and no Laurl is available.") + + if isinstance(response, (bytes, bytearray)): + document = json.loads(bytes(response).decode("utf8")) + elif isinstance(response, str): + document = json.loads(response) + elif isinstance(response, dict): + document = response + else: + raise ValueError(f"Expected the ClearKey license to be bytes, str, or dict, not {response!r}") + + for jwk in document.get("keys") or []: + if jwk.get("kty") not in (None, "oct"): + continue + kid_b64 = jwk.get("kid") + key_b64 = jwk.get("k") + if not kid_b64 or not key_b64: + continue + kid = UUID(bytes=b64url_decode(kid_b64)) + self.content_keys[kid] = b64url_decode(key_b64).hex() + + if not self.content_keys: + raise ClearKeyCENC.Exceptions.EmptyLicense("No Content Keys were within the License") + + for kid in self.kids: + if kid not in self.content_keys: + raise ClearKeyCENC.Exceptions.CEKNotFound(f"No Content Key for KID {kid.hex} within the License") + + log_event( + "drm_content_keys", + level="INFO", + message=f"Recovered {len(self.content_keys)} ClearKey content key(s)", + drm_type="ClearKeyCENC", + key_count=len(self.content_keys), + keys=[{"kid": k.hex, "key": v} for k, v in self.content_keys.items()], + ) + + def decrypt(self, path: Path) -> None: + """ + Decrypt a Track with ClearKey DRM (standard CENC). + Args: + path: Path to the encrypted file to decrypt + Raises: + EnvironmentError if the required decryption executable could not be found. + ValueError if the track has not yet been downloaded. + SubprocessError if the decryption process returned a non-zero exit code. + """ + if not self.content_keys: + raise ValueError("Cannot decrypt a Track without any Content Keys...") + + if not path or not path.exists(): + raise ValueError("Tried to decrypt a file that does not exist.") + + decrypter = str(getattr(config, "decryption", "")).lower() + tool = "mp4decrypt" if decrypter == "mp4decrypt" else "shaka-packager" + + log_event( + "drm_decrypt", + level="DEBUG", + message=f"Decrypting {path.name} with {tool}", + drm_type="ClearKeyCENC", + tool=tool, + file=path.name, + key_count=len(self.content_keys), + ) + + decrypt_start = time.monotonic() + if decrypter == "mp4decrypt": + self.decrypt_with_mp4decrypt(path) + else: + self.decrypt_with_shaka_packager(path) + + log_event( + "drm_decrypt_complete", + level="DEBUG", + message=f"Decrypted {path.name} with {tool}", + drm_type="ClearKeyCENC", + tool=tool, + file=path.name, + duration_ms=round((time.monotonic() - decrypt_start) * 1000, 1), + output_size=path.stat().st_size if path.exists() else 0, + ) + + def decrypt_with_mp4decrypt(self, path: Path) -> None: + """Decrypt using mp4decrypt""" + if not binaries.Mp4decrypt: + raise EnvironmentError("mp4decrypt executable not found but is required.") + + output_path = path.with_stem(f"{path.stem}_decrypted") + + key_args = [] + for kid, key in self.content_keys.items(): + key_args.extend(["--key", f"{kid.hex}:{key}"]) + + cmd = [ + str(binaries.Mp4decrypt), + "--show-progress", + *key_args, + str(path), + str(output_path), + ] + + try: + subprocess.run(cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, encoding="utf-8") + except subprocess.CalledProcessError as e: + error_msg = e.stderr if e.stderr else f"mp4decrypt failed with exit code {e.returncode}" + raise subprocess.CalledProcessError(e.returncode, cmd, output=e.stdout, stderr=error_msg) + + if not output_path.exists(): + raise RuntimeError(f"mp4decrypt failed: output file {output_path} was not created") + if output_path.stat().st_size == 0: + raise RuntimeError(f"mp4decrypt failed: output file {output_path} is empty") + + path.unlink() + shutil.move(output_path, path) + + def decrypt_with_shaka_packager(self, path: Path) -> None: + """Decrypt using Shaka Packager""" + if not binaries.ShakaPackager: + raise EnvironmentError("Shaka Packager executable not found but is required.") + + output_path = path.with_stem(f"{path.stem}_decrypted") + config.directories.temp.mkdir(parents=True, exist_ok=True) + + try: + arguments = [ + f"input={path},stream=0,output={output_path},output_format=MP4", + "--enable_raw_key_decryption", + "--keys", + ",".join( + "label={}:key_id={}:key={}".format(i, kid.hex, key.lower()) + for i, (kid, key) in enumerate(self.content_keys.items()) + ), + "--temp_dir", + config.directories.temp, + ] + + p = subprocess.Popen( + [binaries.ShakaPackager, *arguments], + stdout=subprocess.DEVNULL, + stderr=subprocess.PIPE, + universal_newlines=True, + ) + + stream_skipped = False + had_error = False + + shaka_log_buffer = "" + for line in iter(p.stderr.readline, ""): + line = line.strip() + if not line: + continue + if "Skip stream" in line: + # file/segment was so small that it didn't have any actual data, ignore + stream_skipped = True + if ":INFO:" in line: + continue + if "I0" in line or "W0" in line: + continue + if ":ERROR:" in line: + had_error = True + if "Insufficient bits in bitstream for given AVC profile" in line: + # this is a warning and is something we don't have to worry about + continue + shaka_log_buffer += f"{line.strip()}\n" + + if shaka_log_buffer: + # wrap to console width - padding - '[ClearKey]: ' + shaka_log_buffer = "\n ".join( + textwrap.wrap(shaka_log_buffer.rstrip(), width=console.width - 22, initial_indent="") + ) + console.log(Text.from_ansi("\n[ClearKey]: " + shaka_log_buffer)) + + p.wait() + + if p.returncode != 0 or had_error: + raise subprocess.CalledProcessError(p.returncode, [binaries.ShakaPackager, *arguments]) + + path.unlink() + if not stream_skipped: + shutil.move(output_path, path) + except subprocess.CalledProcessError as e: + if e.returncode == 0xC000013A: # STATUS_CONTROL_C_EXIT + raise KeyboardInterrupt() + raise + + class Exceptions: + class KIDNotFound(Exception): + """KID (Encryption Key ID) was not found.""" + + class CEKNotFound(Exception): + """CEK (Content Encryption Key) for KID was not found in License.""" + + class EmptyLicense(Exception): + """License returned no Content Encryption Keys.""" + + +__all__ = ("ClearKeyCENC",) diff --git a/unshackle/core/manifests/dash.py b/unshackle/core/manifests/dash.py index 8c60bbf..166ad49 100644 --- a/unshackle/core/manifests/dash.py +++ b/unshackle/core/manifests/dash.py @@ -25,7 +25,7 @@ from requests import Session from unshackle.core.cdm.detect import is_playready_cdm from unshackle.core.constants import DOWNLOAD_CANCELLED, DOWNLOAD_LICENCE_ONLY, AnyTrack -from unshackle.core.drm import DRM_T, PlayReady, Widevine +from unshackle.core.drm import DRM_T, ClearKeyCENC, PlayReady, Widevine from unshackle.core.events import events from unshackle.core.session import RnetSession from unshackle.core.tracks import Audio, Subtitle, Tracks, Video @@ -1047,6 +1047,54 @@ class DASH: drm.append(PlayReady(pssh=pr_pssh, kid=kid, pssh_b64=pr_pssh_b64)) + elif urn == ClearKeyCENC.urn: + # W3C EME ClearKey (org.w3.clearkey) — match the scheme UUID alone, + # value="ClearKey1.0" is spec'd (DASH-IF CCP) but not required in the wild + kid_attr = protection.get("default_KID") or protection.get("{urn:mpeg:cenc:2013}default_KID") + kid = None + if kid_attr: + try: + kid = UUID(kid_attr) + except ValueError: + try: + kid = UUID(bytes=base64.b64decode(kid_attr)) + except Exception: + kid = None + + if not kid: + # DASH-IF puts default_KID on the sibling mp4protection element + kid = next( + ( + UUID(p.get("default_KID") or p.get("{urn:mpeg:cenc:2013}default_KID")) + for p in protections + if p.get("default_KID") or p.get("{urn:mpeg:cenc:2013}default_KID") + ), + None, + ) + + if not kid or kid in PLACEHOLDER_KIDS: + continue + + # license URL appears under several namespaces/casings in the wild + laurl = next( + ( + text.strip() + for name in ( + "{https://dashif.org/CPS}Laurl", + "{https://dashif.org/CPS}laurl", + "{http://dashif.org/guidelines/clearKey}Laurl", + "{http://dashif.org/guidelines/clearKey}laurl", + "Laurl", + "laurl", + ) + for text in [protection.findtext(name)] + if text and text.strip() + ), + None, + ) + + drm.append(ClearKeyCENC(kids=[kid], laurl=laurl)) + return drm @staticmethod diff --git a/unshackle/core/service.py b/unshackle/core/service.py index 417984b..109cd77 100644 --- a/unshackle/core/service.py +++ b/unshackle/core/service.py @@ -438,6 +438,27 @@ class Service(metaclass=ABCMeta): # Delegates license handling to the Widevine license method by default if a service-specific PlayReady implementation is not provided. return self.get_widevine_license(challenge=challenge, title=title, track=track) + def get_clearkey_license( + self, *, challenge: bytes, title: Title_T, track: AnyTrack + ) -> Optional[Union[bytes, str, dict]]: + """ + Get a W3C ClearKey License (JWK Set) by sending a License Request (challenge). + + Used for DASH `org.w3.clearkey` content. No CDM is involved: the challenge is + the W3C EME JSON license request, e.g. ``{"kids": [""], "type": "temporary"}``, + and the license is a JWK Set, e.g. ``{"keys": [{"kty": "oct", "k": "...", "kid": "..."}]}``. + + :param challenge: The JSON license request bytes to POST to the license server. + :param title: The current `Title` from get_titles that is being executed. This is provided in + case it has data needed to be used, e.g. for a HTTP request. + :param track: The current `Track` needing decryption. Provided for same reason as `title`. + :return: The JWK Set license as a dict, JSON str, or raw bytes. Return None (the default) + to let the framework POST the challenge to the manifest-provided Laurl, if any. + Services with no license server can instead pre-populate the DRM object's + `content_keys` in get_tracks. + """ + return None + # Required Abstract functions # The following functions *must* be implemented by the Service. # The functions will be executed in shown order. diff --git a/unshackle/core/tracks/track.py b/unshackle/core/tracks/track.py index 657f30e..2b36418 100644 --- a/unshackle/core/tracks/track.py +++ b/unshackle/core/tracks/track.py @@ -21,7 +21,7 @@ from unshackle.core.cdm.detect import is_playready_cdm, is_widevine_cdm from unshackle.core.config import config from unshackle.core.constants import DOWNLOAD_CANCELLED, DOWNLOAD_LICENCE_ONLY from unshackle.core.downloaders import requests -from unshackle.core.drm import DRM_T, PlayReady, Widevine +from unshackle.core.drm import DRM_T, ClearKeyCENC, PlayReady, Widevine from unshackle.core.events import events from unshackle.core.session import RnetSession from unshackle.core.utilities import get_boxes, log_event, try_ensure_utf8 @@ -335,6 +335,13 @@ class Track: progress(downloaded="LICENSING") prepare_drm(drm, track_kid=track_kid) progress(downloaded="[yellow]LICENSED") + elif isinstance(drm, ClearKeyCENC): + # license and grab content keys (no CDM involved) + if not prepare_drm: + raise ValueError("prepare_drm func must be supplied to use ClearKey DRM") + progress(downloaded="LICENSING") + prepare_drm(drm, track_kid=track_kid) + progress(downloaded="[yellow]LICENSED") else: drm = None diff --git a/unshackle/services/EXAMPLE/__init__.py b/unshackle/services/EXAMPLE/__init__.py index ff5894d..af4c2eb 100644 --- a/unshackle/services/EXAMPLE/__init__.py +++ b/unshackle/services/EXAMPLE/__init__.py @@ -64,7 +64,7 @@ class EXAMPLE(Service): get_chapters Chapters() with named + unnamed markers get_widevine_* service cert + license (per-segment PSSH via `track`) get_playready_license PlayReady challenge POST - get_clearkey DRM-free / ClearKey fallback (commented alternate) + get_clearkey_license DASH org.w3.clearkey JWK Set POST (Laurl fallback) """ # ALIASES: extra CLI tags that resolve to this service (e.g. `dl EX ...`). @@ -486,7 +486,20 @@ class EXAMPLE(Service): response.raise_for_status() return response.content - # For ClearKey or unencrypted content there is no license callback; instead the - # KID:KEY pair comes from the manifest or a side endpoint and is attached to the + def get_clearkey_license( + self, *, challenge: bytes, title: Title_T, track: AnyTrack + ) -> Optional[Union[bytes, str, dict]]: + # DASH org.w3.clearkey: `challenge` is the W3C JSON license request; return the + # JWK Set response. Omit this method entirely when the manifest carries a Laurl — + # the framework then POSTs the challenge there with no service code at all. + license_url = self.config["endpoints"].get("clearkey_license") + if not license_url: + return None # fall back to the manifest-provided Laurl, if any + response = self.session.post(url=license_url, data=challenge) + response.raise_for_status() + return response.json() + + # For HLS AES-128 ClearKey or unencrypted content there is no license callback; + # the key comes from the manifest or a side endpoint and is attached to the # track's DRM directly. Vaults (`self.cache` is separate) cache KID:KEY so repeat # downloads skip the license round-trip entirely. diff --git a/unshackle/services/EXAMPLE/config.yaml b/unshackle/services/EXAMPLE/config.yaml index 00310a0..c9b824f 100644 --- a/unshackle/services/EXAMPLE/config.yaml +++ b/unshackle/services/EXAMPLE/config.yaml @@ -10,6 +10,7 @@ endpoints: playback: https://api.domain.com/v1/playback/{title_id} # HLS alternate widevine_license: https://api.domain.com/v1/license/widevine playready_license: https://api.domain.com/v1/license/playready + clearkey_license: https://api.domain.com/v1/license/clearkey # DASH org.w3.clearkey (omit to use manifest Laurl) # Base64 Widevine service certificate (enables privacy-mode license requests). certificate: null