mirror of
https://github.com/unshackle-dl/unshackle.git
synced 2026-03-12 01:19:02 +00:00
Initial Commit
This commit is contained in:
5
unshackle/core/manifests/__init__.py
Normal file
5
unshackle/core/manifests/__init__.py
Normal file
@@ -0,0 +1,5 @@
|
||||
from .dash import DASH
|
||||
from .hls import HLS
|
||||
from .ism import ISM
|
||||
|
||||
__all__ = ("DASH", "HLS", "ISM")
|
||||
800
unshackle/core/manifests/dash.py
Normal file
800
unshackle/core/manifests/dash.py
Normal file
@@ -0,0 +1,800 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
import html
|
||||
import logging
|
||||
import math
|
||||
import re
|
||||
import sys
|
||||
from copy import copy
|
||||
from functools import partial
|
||||
from pathlib import Path
|
||||
from typing import Any, Callable, Optional, Union
|
||||
from urllib.parse import urljoin, urlparse
|
||||
from uuid import UUID
|
||||
from zlib import crc32
|
||||
|
||||
import requests
|
||||
from langcodes import Language, tag_is_valid
|
||||
from lxml.etree import Element, ElementTree
|
||||
from pyplayready.system.pssh import PSSH as PR_PSSH
|
||||
from pywidevine.cdm import Cdm as WidevineCdm
|
||||
from pywidevine.pssh import PSSH
|
||||
from requests import Session
|
||||
|
||||
from unshackle.core.constants import DOWNLOAD_CANCELLED, DOWNLOAD_LICENCE_ONLY, AnyTrack
|
||||
from unshackle.core.downloaders import requests as requests_downloader
|
||||
from unshackle.core.drm import DRM_T, PlayReady, Widevine
|
||||
from unshackle.core.events import events
|
||||
from unshackle.core.tracks import Audio, Subtitle, Tracks, Video
|
||||
from unshackle.core.utilities import is_close_match, try_ensure_utf8
|
||||
from unshackle.core.utils.xml import load_xml
|
||||
|
||||
|
||||
class DASH:
|
||||
def __init__(self, manifest, url: str):
|
||||
if manifest is None:
|
||||
raise ValueError("DASH manifest must be provided.")
|
||||
if manifest.tag != "MPD":
|
||||
raise TypeError(f"Expected 'MPD' document, but received a '{manifest.tag}' document instead.")
|
||||
|
||||
if not url:
|
||||
raise requests.URLRequired("DASH manifest URL must be provided for relative path computations.")
|
||||
if not isinstance(url, str):
|
||||
raise TypeError(f"Expected url to be a {str}, not {url!r}")
|
||||
|
||||
self.manifest = manifest
|
||||
self.url = url
|
||||
|
||||
@classmethod
|
||||
def from_url(cls, url: str, session: Optional[Session] = None, **args: Any) -> DASH:
|
||||
if not url:
|
||||
raise requests.URLRequired("DASH manifest URL must be provided for relative path computations.")
|
||||
if not isinstance(url, str):
|
||||
raise TypeError(f"Expected url to be a {str}, not {url!r}")
|
||||
|
||||
if not session:
|
||||
session = Session()
|
||||
elif not isinstance(session, Session):
|
||||
raise TypeError(f"Expected session to be a {Session}, not {session!r}")
|
||||
|
||||
res = session.get(url, **args)
|
||||
if res.url != url:
|
||||
url = res.url
|
||||
|
||||
if not res.ok:
|
||||
raise requests.ConnectionError("Failed to request the MPD document.", response=res)
|
||||
|
||||
return DASH.from_text(res.text, url)
|
||||
|
||||
@classmethod
|
||||
def from_text(cls, text: str, url: str) -> DASH:
|
||||
if not text:
|
||||
raise ValueError("DASH manifest Text must be provided.")
|
||||
if not isinstance(text, str):
|
||||
raise TypeError(f"Expected text to be a {str}, not {text!r}")
|
||||
|
||||
if not url:
|
||||
raise requests.URLRequired("DASH manifest URL must be provided for relative path computations.")
|
||||
if not isinstance(url, str):
|
||||
raise TypeError(f"Expected url to be a {str}, not {url!r}")
|
||||
|
||||
manifest = load_xml(text)
|
||||
|
||||
return cls(manifest, url)
|
||||
|
||||
def to_tracks(
|
||||
self, language: Optional[Union[str, Language]] = None, period_filter: Optional[Callable] = None
|
||||
) -> Tracks:
|
||||
"""
|
||||
Convert an MPEG-DASH document to Video, Audio and Subtitle Track objects.
|
||||
|
||||
Parameters:
|
||||
language: The Title's Original Recorded Language. It will also be used as a fallback
|
||||
track language value if the manifest does not list language information.
|
||||
period_filter: Filter out period's within the manifest.
|
||||
|
||||
All Track URLs will be a list of segment URLs.
|
||||
"""
|
||||
tracks = Tracks()
|
||||
|
||||
for period in self.manifest.findall("Period"):
|
||||
if callable(period_filter) and period_filter(period):
|
||||
continue
|
||||
if next(iter(period.xpath("SegmentType/@value")), "content") != "content":
|
||||
continue
|
||||
|
||||
for adaptation_set in period.findall("AdaptationSet"):
|
||||
if self.is_trick_mode(adaptation_set):
|
||||
# we don't want trick mode streams (they are only used for fast-forward/rewind)
|
||||
continue
|
||||
|
||||
for rep in adaptation_set.findall("Representation"):
|
||||
get = partial(self._get, adaptation_set=adaptation_set, representation=rep)
|
||||
findall = partial(self._findall, adaptation_set=adaptation_set, representation=rep, both=True)
|
||||
segment_base = rep.find("SegmentBase")
|
||||
|
||||
codecs = get("codecs")
|
||||
content_type = get("contentType")
|
||||
mime_type = get("mimeType")
|
||||
|
||||
if not content_type and mime_type:
|
||||
content_type = mime_type.split("/")[0]
|
||||
if not content_type and not mime_type:
|
||||
raise ValueError("Unable to determine the format of a Representation, cannot continue...")
|
||||
|
||||
if mime_type == "application/mp4" or content_type == "application":
|
||||
# likely mp4-boxed subtitles
|
||||
# TODO: It may not actually be subtitles
|
||||
try:
|
||||
real_codec = Subtitle.Codec.from_mime(codecs)
|
||||
content_type = "text"
|
||||
mime_type = f"application/mp4; codecs='{real_codec.value.lower()}'"
|
||||
except ValueError:
|
||||
raise ValueError(f"Unsupported content type '{content_type}' with codecs of '{codecs}'")
|
||||
|
||||
if content_type == "text" and mime_type and "/mp4" not in mime_type:
|
||||
# mimeType likely specifies the subtitle codec better than `codecs`
|
||||
codecs = mime_type.split("/")[1]
|
||||
|
||||
if content_type == "video":
|
||||
track_type = Video
|
||||
track_codec = Video.Codec.from_codecs(codecs)
|
||||
track_fps = get("frameRate")
|
||||
if not track_fps and segment_base is not None:
|
||||
track_fps = segment_base.get("timescale")
|
||||
|
||||
track_args = dict(
|
||||
range_=self.get_video_range(
|
||||
codecs, findall("SupplementalProperty"), findall("EssentialProperty")
|
||||
),
|
||||
bitrate=get("bandwidth") or None,
|
||||
width=get("width") or 0,
|
||||
height=get("height") or 0,
|
||||
fps=track_fps or None,
|
||||
)
|
||||
elif content_type == "audio":
|
||||
track_type = Audio
|
||||
track_codec = Audio.Codec.from_codecs(codecs)
|
||||
track_args = dict(
|
||||
bitrate=get("bandwidth") or None,
|
||||
channels=next(
|
||||
iter(
|
||||
rep.xpath("AudioChannelConfiguration/@value")
|
||||
or adaptation_set.xpath("AudioChannelConfiguration/@value")
|
||||
),
|
||||
None,
|
||||
),
|
||||
joc=self.get_ddp_complexity_index(adaptation_set, rep),
|
||||
descriptive=self.is_descriptive(adaptation_set),
|
||||
)
|
||||
elif content_type == "text":
|
||||
track_type = Subtitle
|
||||
track_codec = Subtitle.Codec.from_codecs(codecs or "vtt")
|
||||
track_args = dict(
|
||||
cc=self.is_closed_caption(adaptation_set),
|
||||
sdh=self.is_sdh(adaptation_set),
|
||||
forced=self.is_forced(adaptation_set),
|
||||
)
|
||||
elif content_type == "image":
|
||||
# we don't want what's likely thumbnails for the seekbar
|
||||
continue
|
||||
else:
|
||||
raise ValueError(f"Unknown Track Type '{content_type}'")
|
||||
|
||||
track_lang = self.get_language(adaptation_set, rep, fallback=language)
|
||||
if not track_lang:
|
||||
msg = "Language information could not be derived from a Representation."
|
||||
if language is None:
|
||||
msg += " No fallback language was provided when calling DASH.to_tracks()."
|
||||
elif not tag_is_valid((str(language) or "").strip()) or str(language).startswith("und"):
|
||||
msg += f" The fallback language provided is also invalid: {language}"
|
||||
raise ValueError(msg)
|
||||
|
||||
# for some reason it's incredibly common for services to not provide
|
||||
# a good and actually unique track ID, sometimes because of the lang
|
||||
# dialect not being represented in the id, or the bitrate, or such.
|
||||
# this combines all of them as one and hashes it to keep it small(ish).
|
||||
track_id = hex(
|
||||
crc32(
|
||||
"{codec}-{lang}-{bitrate}-{base_url}-{ids}-{track_args}".format(
|
||||
codec=codecs,
|
||||
lang=track_lang,
|
||||
bitrate=get("bitrate"),
|
||||
base_url=(rep.findtext("BaseURL") or "").split("?")[0],
|
||||
ids=[get("audioTrackId"), get("id"), period.get("id")],
|
||||
track_args=track_args,
|
||||
).encode()
|
||||
)
|
||||
)[2:]
|
||||
|
||||
tracks.add(
|
||||
track_type(
|
||||
id_=track_id,
|
||||
url=self.url,
|
||||
codec=track_codec,
|
||||
language=track_lang,
|
||||
is_original_lang=bool(language and is_close_match(track_lang, [language])),
|
||||
descriptor=Video.Descriptor.DASH,
|
||||
data={
|
||||
"dash": {
|
||||
"manifest": self.manifest,
|
||||
"period": period,
|
||||
"adaptation_set": adaptation_set,
|
||||
"representation": rep,
|
||||
}
|
||||
},
|
||||
**track_args,
|
||||
)
|
||||
)
|
||||
|
||||
# only get tracks from the first main-content period
|
||||
break
|
||||
|
||||
return tracks
|
||||
|
||||
@staticmethod
|
||||
def download_track(
|
||||
track: AnyTrack,
|
||||
save_path: Path,
|
||||
save_dir: Path,
|
||||
progress: partial,
|
||||
session: Optional[Session] = None,
|
||||
proxy: Optional[str] = None,
|
||||
max_workers: Optional[int] = None,
|
||||
license_widevine: Optional[Callable] = None,
|
||||
*,
|
||||
cdm: Optional[object] = None,
|
||||
):
|
||||
if not session:
|
||||
session = Session()
|
||||
elif not isinstance(session, Session):
|
||||
raise TypeError(f"Expected session to be a {Session}, not {session!r}")
|
||||
|
||||
if proxy:
|
||||
session.proxies.update({"all": proxy})
|
||||
|
||||
log = logging.getLogger("DASH")
|
||||
|
||||
manifest: ElementTree = track.data["dash"]["manifest"]
|
||||
period: Element = track.data["dash"]["period"]
|
||||
adaptation_set: Element = track.data["dash"]["adaptation_set"]
|
||||
representation: Element = track.data["dash"]["representation"]
|
||||
|
||||
# Preserve existing DRM if it was set by the service, especially when service set Widevine
|
||||
# but manifest only contains PlayReady protection (common scenario for some services)
|
||||
existing_drm = track.drm
|
||||
manifest_drm = DASH.get_drm(
|
||||
representation.findall("ContentProtection") + adaptation_set.findall("ContentProtection")
|
||||
)
|
||||
|
||||
# Only override existing DRM if:
|
||||
# 1. No existing DRM was set, OR
|
||||
# 2. Existing DRM contains same type as manifest DRM, OR
|
||||
# 3. Existing DRM is not Widevine (preserve Widevine when service explicitly set it)
|
||||
should_override_drm = (
|
||||
not existing_drm
|
||||
or (
|
||||
existing_drm
|
||||
and manifest_drm
|
||||
and any(isinstance(existing, type(manifest)) for existing in existing_drm for manifest in manifest_drm)
|
||||
)
|
||||
or (existing_drm and not any(isinstance(drm, Widevine) for drm in existing_drm))
|
||||
)
|
||||
|
||||
if should_override_drm:
|
||||
track.drm = manifest_drm
|
||||
else:
|
||||
track.drm = existing_drm
|
||||
|
||||
manifest_base_url = manifest.findtext("BaseURL")
|
||||
if not manifest_base_url:
|
||||
manifest_base_url = track.url
|
||||
elif not re.match("^https?://", manifest_base_url, re.IGNORECASE):
|
||||
manifest_base_url = urljoin(track.url, f"./{manifest_base_url}")
|
||||
period_base_url = urljoin(manifest_base_url, period.findtext("BaseURL"))
|
||||
rep_base_url = urljoin(period_base_url, representation.findtext("BaseURL"))
|
||||
|
||||
period_duration = period.get("duration") or manifest.get("mediaPresentationDuration")
|
||||
init_data: Optional[bytes] = None
|
||||
|
||||
segment_template = representation.find("SegmentTemplate")
|
||||
if segment_template is None:
|
||||
segment_template = adaptation_set.find("SegmentTemplate")
|
||||
|
||||
segment_list = representation.find("SegmentList")
|
||||
if segment_list is None:
|
||||
segment_list = adaptation_set.find("SegmentList")
|
||||
|
||||
segment_base = representation.find("SegmentBase")
|
||||
if segment_base is None:
|
||||
segment_base = adaptation_set.find("SegmentBase")
|
||||
|
||||
segments: list[tuple[str, Optional[str]]] = []
|
||||
segment_timescale: float = 0
|
||||
segment_durations: list[int] = []
|
||||
track_kid: Optional[UUID] = None
|
||||
|
||||
if segment_template is not None:
|
||||
segment_template = copy(segment_template)
|
||||
start_number = int(segment_template.get("startNumber") or 1)
|
||||
end_number = int(segment_template.get("endNumber") or 0) or None
|
||||
segment_timeline = segment_template.find("SegmentTimeline")
|
||||
segment_timescale = float(segment_template.get("timescale") or 1)
|
||||
|
||||
for item in ("initialization", "media"):
|
||||
value = segment_template.get(item)
|
||||
if not value:
|
||||
continue
|
||||
if not re.match("^https?://", value, re.IGNORECASE):
|
||||
if not rep_base_url:
|
||||
raise ValueError("Resolved Segment URL is not absolute, and no Base URL is available.")
|
||||
value = urljoin(rep_base_url, value)
|
||||
if not urlparse(value).query:
|
||||
manifest_url_query = urlparse(track.url).query
|
||||
if manifest_url_query:
|
||||
value += f"?{manifest_url_query}"
|
||||
segment_template.set(item, value)
|
||||
|
||||
init_url = segment_template.get("initialization")
|
||||
if init_url:
|
||||
res = session.get(
|
||||
DASH.replace_fields(
|
||||
init_url, Bandwidth=representation.get("bandwidth"), RepresentationID=representation.get("id")
|
||||
)
|
||||
)
|
||||
res.raise_for_status()
|
||||
init_data = res.content
|
||||
track_kid = track.get_key_id(init_data)
|
||||
|
||||
if segment_timeline is not None:
|
||||
current_time = 0
|
||||
for s in segment_timeline.findall("S"):
|
||||
if s.get("t"):
|
||||
current_time = int(s.get("t"))
|
||||
for _ in range(1 + (int(s.get("r") or 0))):
|
||||
segment_durations.append(current_time)
|
||||
current_time += int(s.get("d"))
|
||||
|
||||
if not end_number:
|
||||
end_number = len(segment_durations)
|
||||
|
||||
for t, n in zip(segment_durations, range(start_number, end_number + 1)):
|
||||
segments.append(
|
||||
(
|
||||
DASH.replace_fields(
|
||||
segment_template.get("media"),
|
||||
Bandwidth=representation.get("bandwidth"),
|
||||
Number=n,
|
||||
RepresentationID=representation.get("id"),
|
||||
Time=t,
|
||||
),
|
||||
None,
|
||||
)
|
||||
)
|
||||
else:
|
||||
if not period_duration:
|
||||
raise ValueError("Duration of the Period was unable to be determined.")
|
||||
period_duration = DASH.pt_to_sec(period_duration)
|
||||
segment_duration = float(segment_template.get("duration")) or 1
|
||||
|
||||
if not end_number:
|
||||
end_number = math.ceil(period_duration / (segment_duration / segment_timescale))
|
||||
|
||||
for s in range(start_number, end_number + 1):
|
||||
segments.append(
|
||||
(
|
||||
DASH.replace_fields(
|
||||
segment_template.get("media"),
|
||||
Bandwidth=representation.get("bandwidth"),
|
||||
Number=s,
|
||||
RepresentationID=representation.get("id"),
|
||||
Time=s,
|
||||
),
|
||||
None,
|
||||
)
|
||||
)
|
||||
# TODO: Should we floor/ceil/round, or is int() ok?
|
||||
segment_durations.append(int(segment_duration))
|
||||
elif segment_list is not None:
|
||||
segment_timescale = float(segment_list.get("timescale") or 1)
|
||||
|
||||
init_data = None
|
||||
initialization = segment_list.find("Initialization")
|
||||
if initialization is not None:
|
||||
source_url = initialization.get("sourceURL")
|
||||
if not source_url:
|
||||
source_url = rep_base_url
|
||||
elif not re.match("^https?://", source_url, re.IGNORECASE):
|
||||
source_url = urljoin(rep_base_url, f"./{source_url}")
|
||||
|
||||
if initialization.get("range"):
|
||||
init_range_header = {"Range": f"bytes={initialization.get('range')}"}
|
||||
else:
|
||||
init_range_header = None
|
||||
|
||||
res = session.get(url=source_url, headers=init_range_header)
|
||||
res.raise_for_status()
|
||||
init_data = res.content
|
||||
track_kid = track.get_key_id(init_data)
|
||||
|
||||
segment_urls = segment_list.findall("SegmentURL")
|
||||
for segment_url in segment_urls:
|
||||
media_url = segment_url.get("media")
|
||||
if not media_url:
|
||||
media_url = rep_base_url
|
||||
elif not re.match("^https?://", media_url, re.IGNORECASE):
|
||||
media_url = urljoin(rep_base_url, f"./{media_url}")
|
||||
|
||||
segments.append((media_url, segment_url.get("mediaRange")))
|
||||
segment_durations.append(int(segment_url.get("duration") or 1))
|
||||
elif segment_base is not None:
|
||||
media_range = None
|
||||
init_data = None
|
||||
initialization = segment_base.find("Initialization")
|
||||
if initialization is not None:
|
||||
if initialization.get("range"):
|
||||
init_range_header = {"Range": f"bytes={initialization.get('range')}"}
|
||||
else:
|
||||
init_range_header = None
|
||||
|
||||
res = session.get(url=rep_base_url, headers=init_range_header)
|
||||
res.raise_for_status()
|
||||
init_data = res.content
|
||||
track_kid = track.get_key_id(init_data)
|
||||
total_size = res.headers.get("Content-Range", "").split("/")[-1]
|
||||
if total_size:
|
||||
media_range = f"{len(init_data)}-{total_size}"
|
||||
|
||||
segments.append((rep_base_url, media_range))
|
||||
elif rep_base_url:
|
||||
segments.append((rep_base_url, None))
|
||||
else:
|
||||
log.error("Could not find a way to get segments from this MPD manifest.")
|
||||
log.debug(track.url)
|
||||
sys.exit(1)
|
||||
|
||||
# TODO: Should we floor/ceil/round, or is int() ok?
|
||||
track.data["dash"]["timescale"] = int(segment_timescale)
|
||||
track.data["dash"]["segment_durations"] = segment_durations
|
||||
|
||||
if not track.drm and isinstance(track, (Video, Audio)):
|
||||
try:
|
||||
track.drm = [Widevine.from_init_data(init_data)]
|
||||
except Widevine.Exceptions.PSSHNotFound:
|
||||
# it might not have Widevine DRM, or might not have found the PSSH
|
||||
log.warning("No Widevine PSSH was found for this track, is it DRM free?")
|
||||
|
||||
if track.drm:
|
||||
track_kid = track_kid or track.get_key_id(url=segments[0][0], session=session)
|
||||
drm = track.get_drm_for_cdm(cdm)
|
||||
if isinstance(drm, (Widevine, PlayReady)):
|
||||
# license and grab content keys
|
||||
try:
|
||||
if not license_widevine:
|
||||
raise ValueError("license_widevine func must be supplied to use DRM")
|
||||
progress(downloaded="LICENSING")
|
||||
license_widevine(drm, track_kid=track_kid)
|
||||
progress(downloaded="[yellow]LICENSED")
|
||||
except Exception: # noqa
|
||||
DOWNLOAD_CANCELLED.set() # skip pending track downloads
|
||||
progress(downloaded="[red]FAILED")
|
||||
raise
|
||||
else:
|
||||
drm = None
|
||||
|
||||
if DOWNLOAD_LICENCE_ONLY.is_set():
|
||||
progress(downloaded="[yellow]SKIPPED")
|
||||
return
|
||||
|
||||
progress(total=len(segments))
|
||||
|
||||
downloader = track.downloader
|
||||
if downloader.__name__ == "aria2c" and any(bytes_range is not None for url, bytes_range in segments):
|
||||
# aria2(c) is shit and doesn't support the Range header, fallback to the requests downloader
|
||||
downloader = requests_downloader
|
||||
log.warning("Falling back to the requests downloader as aria2(c) doesn't support the Range header")
|
||||
|
||||
downloader_args = dict(
|
||||
urls=[
|
||||
{"url": url, "headers": {"Range": f"bytes={bytes_range}"} if bytes_range else {}}
|
||||
for url, bytes_range in segments
|
||||
],
|
||||
output_dir=save_dir,
|
||||
filename="{i:0%d}.mp4" % (len(str(len(segments)))),
|
||||
headers=session.headers,
|
||||
cookies=session.cookies,
|
||||
proxy=proxy,
|
||||
max_workers=max_workers,
|
||||
)
|
||||
|
||||
if downloader.__name__ == "n_m3u8dl_re":
|
||||
downloader_args.update({"filename": track.id, "track": track})
|
||||
|
||||
for status_update in downloader(**downloader_args):
|
||||
file_downloaded = status_update.get("file_downloaded")
|
||||
if file_downloaded:
|
||||
events.emit(events.Types.SEGMENT_DOWNLOADED, track=track, segment=file_downloaded)
|
||||
else:
|
||||
downloaded = status_update.get("downloaded")
|
||||
if downloaded and downloaded.endswith("/s"):
|
||||
status_update["downloaded"] = f"DASH {downloaded}"
|
||||
progress(**status_update)
|
||||
|
||||
# see https://github.com/devine-dl/devine/issues/71
|
||||
for control_file in save_dir.glob("*.aria2__temp"):
|
||||
control_file.unlink()
|
||||
|
||||
segments_to_merge = [x for x in sorted(save_dir.iterdir()) if x.is_file()]
|
||||
with open(save_path, "wb") as f:
|
||||
if init_data:
|
||||
f.write(init_data)
|
||||
if len(segments_to_merge) > 1:
|
||||
progress(downloaded="Merging", completed=0, total=len(segments_to_merge))
|
||||
for segment_file in segments_to_merge:
|
||||
segment_data = segment_file.read_bytes()
|
||||
# TODO: fix encoding after decryption?
|
||||
if (
|
||||
not drm
|
||||
and isinstance(track, Subtitle)
|
||||
and track.codec not in (Subtitle.Codec.fVTT, Subtitle.Codec.fTTML)
|
||||
):
|
||||
segment_data = try_ensure_utf8(segment_data)
|
||||
segment_data = (
|
||||
segment_data.decode("utf8")
|
||||
.replace("‎", html.unescape("‎"))
|
||||
.replace("‏", html.unescape("‏"))
|
||||
.encode("utf8")
|
||||
)
|
||||
f.write(segment_data)
|
||||
f.flush()
|
||||
segment_file.unlink()
|
||||
progress(advance=1)
|
||||
|
||||
track.path = save_path
|
||||
events.emit(events.Types.TRACK_DOWNLOADED, track=track)
|
||||
|
||||
if drm:
|
||||
progress(downloaded="Decrypting", completed=0, total=100)
|
||||
drm.decrypt(save_path)
|
||||
track.drm = None
|
||||
events.emit(events.Types.TRACK_DECRYPTED, track=track, drm=drm, segment=None)
|
||||
progress(downloaded="Decrypting", advance=100)
|
||||
|
||||
save_dir.rmdir()
|
||||
|
||||
progress(downloaded="Downloaded")
|
||||
|
||||
@staticmethod
|
||||
def _get(item: str, adaptation_set: Element, representation: Optional[Element] = None) -> Optional[Any]:
|
||||
"""Helper to get a requested item from the Representation, otherwise from the AdaptationSet."""
|
||||
adaptation_set_item = adaptation_set.get(item)
|
||||
if representation is None:
|
||||
return adaptation_set_item
|
||||
|
||||
representation_item = representation.get(item)
|
||||
if representation_item is not None:
|
||||
return representation_item
|
||||
|
||||
return adaptation_set_item
|
||||
|
||||
@staticmethod
|
||||
def _findall(
|
||||
item: str, adaptation_set: Element, representation: Optional[Element] = None, both: bool = False
|
||||
) -> list[Any]:
|
||||
"""
|
||||
Helper to get all requested items from the Representation, otherwise from the AdaptationSet.
|
||||
Optionally, you may pass both=True to keep both values (where available).
|
||||
"""
|
||||
adaptation_set_items = adaptation_set.findall(item)
|
||||
if representation is None:
|
||||
return adaptation_set_items
|
||||
|
||||
representation_items = representation.findall(item)
|
||||
|
||||
if both:
|
||||
return representation_items + adaptation_set_items
|
||||
|
||||
if representation_items:
|
||||
return representation_items
|
||||
|
||||
return adaptation_set_items
|
||||
|
||||
@staticmethod
|
||||
def get_language(
|
||||
adaptation_set: Element,
|
||||
representation: Optional[Element] = None,
|
||||
fallback: Optional[Union[str, Language]] = None,
|
||||
) -> Optional[Language]:
|
||||
"""
|
||||
Get Language (if any) from the AdaptationSet or Representation.
|
||||
|
||||
A fallback language may be provided if no language information could be
|
||||
retrieved.
|
||||
"""
|
||||
options = []
|
||||
|
||||
if representation is not None:
|
||||
options.append(representation.get("lang"))
|
||||
# derive language from somewhat common id string format
|
||||
# the format is typically "{rep_id}_{lang}={bitrate}" or similar
|
||||
rep_id = representation.get("id")
|
||||
if rep_id:
|
||||
m = re.match(r"\w+_(\w+)=\d+", rep_id)
|
||||
if m:
|
||||
options.append(m.group(1))
|
||||
|
||||
options.append(adaptation_set.get("lang"))
|
||||
|
||||
if fallback:
|
||||
options.append(fallback)
|
||||
|
||||
for option in options:
|
||||
option = (str(option) or "").strip()
|
||||
if not tag_is_valid(option) or option.startswith("und"):
|
||||
continue
|
||||
return Language.get(option)
|
||||
|
||||
@staticmethod
|
||||
def get_video_range(
|
||||
codecs: str, all_supplemental_props: list[Element], all_essential_props: list[Element]
|
||||
) -> Video.Range:
|
||||
if codecs.startswith(("dva1", "dvav", "dvhe", "dvh1")):
|
||||
return Video.Range.DV
|
||||
|
||||
return Video.Range.from_cicp(
|
||||
primaries=next(
|
||||
(
|
||||
int(x.get("value"))
|
||||
for x in all_supplemental_props + all_essential_props
|
||||
if x.get("schemeIdUri") == "urn:mpeg:mpegB:cicp:ColourPrimaries"
|
||||
),
|
||||
0,
|
||||
),
|
||||
transfer=next(
|
||||
(
|
||||
int(x.get("value"))
|
||||
for x in all_supplemental_props + all_essential_props
|
||||
if x.get("schemeIdUri") == "urn:mpeg:mpegB:cicp:TransferCharacteristics"
|
||||
),
|
||||
0,
|
||||
),
|
||||
matrix=next(
|
||||
(
|
||||
int(x.get("value"))
|
||||
for x in all_supplemental_props + all_essential_props
|
||||
if x.get("schemeIdUri") == "urn:mpeg:mpegB:cicp:MatrixCoefficients"
|
||||
),
|
||||
0,
|
||||
),
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def is_trick_mode(adaptation_set: Element) -> bool:
|
||||
"""Check if contents of Adaptation Set is a Trick-Mode stream."""
|
||||
essential_props = adaptation_set.findall("EssentialProperty")
|
||||
supplemental_props = adaptation_set.findall("SupplementalProperty")
|
||||
|
||||
return any(
|
||||
prop.get("schemeIdUri") == "http://dashif.org/guidelines/trickmode"
|
||||
for prop in essential_props + supplemental_props
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def is_descriptive(adaptation_set: Element) -> bool:
|
||||
"""Check if contents of Adaptation Set is Descriptive."""
|
||||
return any(
|
||||
(x.get("schemeIdUri"), x.get("value"))
|
||||
in (("urn:mpeg:dash:role:2011", "descriptive"), ("urn:tva:metadata:cs:AudioPurposeCS:2007", "1"))
|
||||
for x in adaptation_set.findall("Accessibility")
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def is_forced(adaptation_set: Element) -> bool:
|
||||
"""Check if contents of Adaptation Set is a Forced Subtitle."""
|
||||
return any(
|
||||
x.get("schemeIdUri") == "urn:mpeg:dash:role:2011"
|
||||
and x.get("value") in ("forced-subtitle", "forced_subtitle")
|
||||
for x in adaptation_set.findall("Role")
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def is_sdh(adaptation_set: Element) -> bool:
|
||||
"""Check if contents of Adaptation Set is for the Hearing Impaired."""
|
||||
return any(
|
||||
(x.get("schemeIdUri"), x.get("value")) == ("urn:tva:metadata:cs:AudioPurposeCS:2007", "2")
|
||||
for x in adaptation_set.findall("Accessibility")
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def is_closed_caption(adaptation_set: Element) -> bool:
|
||||
"""Check if contents of Adaptation Set is a Closed Caption Subtitle."""
|
||||
return any(
|
||||
(x.get("schemeIdUri"), x.get("value")) == ("urn:mpeg:dash:role:2011", "caption")
|
||||
for x in adaptation_set.findall("Role")
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def get_ddp_complexity_index(adaptation_set: Element, representation: Optional[Element]) -> Optional[int]:
|
||||
"""Get the DD+ Complexity Index (if any) from the AdaptationSet or Representation."""
|
||||
return next(
|
||||
(
|
||||
int(x.get("value"))
|
||||
for x in DASH._findall("SupplementalProperty", adaptation_set, representation, both=True)
|
||||
if x.get("schemeIdUri") == "tag:dolby.com,2018:dash:EC3_ExtensionComplexityIndex:2018"
|
||||
),
|
||||
None,
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def get_drm(protections: list[Element]) -> list[DRM_T]:
|
||||
drm: list[DRM_T] = []
|
||||
|
||||
for protection in protections:
|
||||
urn = (protection.get("schemeIdUri") or "").lower()
|
||||
|
||||
if urn == WidevineCdm.urn:
|
||||
pssh_text = protection.findtext("pssh")
|
||||
if not pssh_text:
|
||||
continue
|
||||
pssh = PSSH(pssh_text)
|
||||
|
||||
kid = protection.get("kid")
|
||||
if kid:
|
||||
kid = UUID(bytes=base64.b64decode(kid))
|
||||
|
||||
default_kid = protection.get("default_KID")
|
||||
if default_kid:
|
||||
kid = UUID(default_kid)
|
||||
|
||||
if not pssh.key_ids and not kid:
|
||||
kid = next((UUID(p.get("default_KID")) for p in protections if p.get("default_KID")), None)
|
||||
|
||||
drm.append(Widevine(pssh=pssh, kid=kid))
|
||||
|
||||
elif urn in ("urn:uuid:9a04f079-9840-4286-ab92-e65be0885f95", "urn:microsoft:playready"):
|
||||
pr_pssh_b64 = (
|
||||
protection.findtext("pssh")
|
||||
or protection.findtext("pro")
|
||||
or protection.findtext("{urn:microsoft:playready}pro")
|
||||
)
|
||||
if not pr_pssh_b64:
|
||||
continue
|
||||
pr_pssh = PR_PSSH(pr_pssh_b64)
|
||||
kid_b64 = protection.findtext("kid")
|
||||
kid = None
|
||||
if kid_b64:
|
||||
try:
|
||||
kid = UUID(bytes=base64.b64decode(kid_b64))
|
||||
except Exception:
|
||||
kid = None
|
||||
|
||||
drm.append(PlayReady(pssh=pr_pssh, kid=kid, pssh_b64=pr_pssh_b64))
|
||||
|
||||
return drm
|
||||
|
||||
@staticmethod
|
||||
def pt_to_sec(d: Union[str, float]) -> float:
|
||||
if isinstance(d, float):
|
||||
return d
|
||||
has_ymd = d[0:8] == "P0Y0M0DT"
|
||||
if d[0:2] != "PT" and not has_ymd:
|
||||
raise ValueError("Input data is not a valid time string.")
|
||||
if has_ymd:
|
||||
d = d[6:].upper() # skip `P0Y0M0DT`
|
||||
else:
|
||||
d = d[2:].upper() # skip `PT`
|
||||
m = re.findall(r"([\d.]+.)", d)
|
||||
return sum(float(x[0:-1]) * {"H": 60 * 60, "M": 60, "S": 1}[x[-1].upper()] for x in m)
|
||||
|
||||
@staticmethod
|
||||
def replace_fields(url: str, **kwargs: Any) -> str:
|
||||
for field, value in kwargs.items():
|
||||
url = url.replace(f"${field}$", str(value))
|
||||
m = re.search(rf"\${re.escape(field)}%([a-z0-9]+)\$", url, flags=re.I)
|
||||
if m:
|
||||
url = url.replace(m.group(), f"{value:{m.group(1)}}")
|
||||
return url
|
||||
|
||||
|
||||
__all__ = ("DASH",)
|
||||
832
unshackle/core/manifests/hls.py
Normal file
832
unshackle/core/manifests/hls.py
Normal file
@@ -0,0 +1,832 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
import html
|
||||
import json
|
||||
import logging
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
from functools import partial
|
||||
from pathlib import Path
|
||||
from typing import Any, Callable, Optional, Union
|
||||
from urllib.parse import urljoin
|
||||
from zlib import crc32
|
||||
|
||||
import httpx
|
||||
import m3u8
|
||||
import requests
|
||||
from langcodes import Language, tag_is_valid
|
||||
from m3u8 import M3U8
|
||||
from pyplayready.cdm import Cdm as PlayReadyCdm
|
||||
from pyplayready.system.pssh import PSSH as PR_PSSH
|
||||
from pywidevine.cdm import Cdm as WidevineCdm
|
||||
from pywidevine.pssh import PSSH as WV_PSSH
|
||||
from requests import Session
|
||||
|
||||
from unshackle.core import binaries
|
||||
from unshackle.core.constants import DOWNLOAD_CANCELLED, DOWNLOAD_LICENCE_ONLY, AnyTrack
|
||||
from unshackle.core.downloaders import requests as requests_downloader
|
||||
from unshackle.core.drm import DRM_T, ClearKey, PlayReady, Widevine
|
||||
from unshackle.core.events import events
|
||||
from unshackle.core.tracks import Audio, Subtitle, Tracks, Video
|
||||
from unshackle.core.utilities import get_extension, is_close_match, try_ensure_utf8
|
||||
|
||||
|
||||
class HLS:
|
||||
def __init__(self, manifest: M3U8, session: Optional[Union[Session, httpx.Client]] = None):
|
||||
if not manifest:
|
||||
raise ValueError("HLS manifest must be provided.")
|
||||
if not isinstance(manifest, M3U8):
|
||||
raise TypeError(f"Expected manifest to be a {M3U8}, not {manifest!r}")
|
||||
if not manifest.is_variant:
|
||||
raise ValueError("Expected the M3U(8) manifest to be a Variant Playlist.")
|
||||
|
||||
self.manifest = manifest
|
||||
self.session = session or Session()
|
||||
|
||||
@classmethod
|
||||
def from_url(cls, url: str, session: Optional[Union[Session, httpx.Client]] = None, **args: Any) -> HLS:
|
||||
if not url:
|
||||
raise requests.URLRequired("HLS manifest URL must be provided.")
|
||||
if not isinstance(url, str):
|
||||
raise TypeError(f"Expected url to be a {str}, not {url!r}")
|
||||
|
||||
if not session:
|
||||
session = Session()
|
||||
elif not isinstance(session, (Session, httpx.Client)):
|
||||
raise TypeError(f"Expected session to be a {Session} or {httpx.Client}, not {session!r}")
|
||||
|
||||
res = session.get(url, **args)
|
||||
|
||||
# Handle both requests and httpx response objects
|
||||
if isinstance(res, requests.Response):
|
||||
if not res.ok:
|
||||
raise requests.ConnectionError("Failed to request the M3U(8) document.", response=res)
|
||||
content = res.text
|
||||
elif isinstance(res, httpx.Response):
|
||||
if res.status_code >= 400:
|
||||
raise requests.ConnectionError("Failed to request the M3U(8) document.", response=res)
|
||||
content = res.text
|
||||
else:
|
||||
raise TypeError(f"Expected response to be a requests.Response or httpx.Response, not {type(res)}")
|
||||
|
||||
master = m3u8.loads(content, uri=url)
|
||||
|
||||
return cls(master, session)
|
||||
|
||||
@classmethod
|
||||
def from_text(cls, text: str, url: str) -> HLS:
|
||||
if not text:
|
||||
raise ValueError("HLS manifest Text must be provided.")
|
||||
if not isinstance(text, str):
|
||||
raise TypeError(f"Expected text to be a {str}, not {text!r}")
|
||||
|
||||
if not url:
|
||||
raise requests.URLRequired("HLS manifest URL must be provided for relative path computations.")
|
||||
if not isinstance(url, str):
|
||||
raise TypeError(f"Expected url to be a {str}, not {url!r}")
|
||||
|
||||
master = m3u8.loads(text, uri=url)
|
||||
|
||||
return cls(master)
|
||||
|
||||
def to_tracks(self, language: Union[str, Language]) -> Tracks:
|
||||
"""
|
||||
Convert a Variant Playlist M3U(8) document to Video, Audio and Subtitle Track objects.
|
||||
|
||||
Parameters:
|
||||
language: Language you expect the Primary Track to be in.
|
||||
|
||||
All Track objects' URL will be to another M3U(8) document. However, these documents
|
||||
will be Invariant Playlists and contain the list of segments URIs among other metadata.
|
||||
"""
|
||||
session_keys = list(self.manifest.session_keys or [])
|
||||
if not session_keys:
|
||||
session_keys = HLS.parse_session_data_keys(self.manifest, self.session)
|
||||
|
||||
session_drm = HLS.get_all_drm(session_keys)
|
||||
|
||||
audio_codecs_by_group_id: dict[str, Audio.Codec] = {}
|
||||
tracks = Tracks()
|
||||
|
||||
for playlist in self.manifest.playlists:
|
||||
audio_group = playlist.stream_info.audio
|
||||
if audio_group:
|
||||
audio_codec = Audio.Codec.from_codecs(playlist.stream_info.codecs)
|
||||
audio_codecs_by_group_id[audio_group] = audio_codec
|
||||
|
||||
try:
|
||||
# TODO: Any better way to figure out the primary track type?
|
||||
if playlist.stream_info.codecs:
|
||||
Video.Codec.from_codecs(playlist.stream_info.codecs)
|
||||
except ValueError:
|
||||
primary_track_type = Audio
|
||||
else:
|
||||
primary_track_type = Video
|
||||
|
||||
tracks.add(
|
||||
primary_track_type(
|
||||
id_=hex(crc32(str(playlist).encode()))[2:],
|
||||
url=urljoin(playlist.base_uri, playlist.uri),
|
||||
codec=(
|
||||
primary_track_type.Codec.from_codecs(playlist.stream_info.codecs)
|
||||
if playlist.stream_info.codecs
|
||||
else None
|
||||
),
|
||||
language=language, # HLS manifests do not seem to have language info
|
||||
is_original_lang=True, # TODO: All we can do is assume Yes
|
||||
bitrate=playlist.stream_info.average_bandwidth or playlist.stream_info.bandwidth,
|
||||
descriptor=Video.Descriptor.HLS,
|
||||
drm=session_drm,
|
||||
data={"hls": {"playlist": playlist}},
|
||||
# video track args
|
||||
**(
|
||||
dict(
|
||||
range_=Video.Range.DV
|
||||
if any(
|
||||
codec.split(".")[0] in ("dva1", "dvav", "dvhe", "dvh1")
|
||||
for codec in (playlist.stream_info.codecs or "").lower().split(",")
|
||||
)
|
||||
else Video.Range.from_m3u_range_tag(playlist.stream_info.video_range),
|
||||
width=playlist.stream_info.resolution[0] if playlist.stream_info.resolution else None,
|
||||
height=playlist.stream_info.resolution[1] if playlist.stream_info.resolution else None,
|
||||
fps=playlist.stream_info.frame_rate,
|
||||
)
|
||||
if primary_track_type is Video
|
||||
else {}
|
||||
),
|
||||
)
|
||||
)
|
||||
|
||||
for media in self.manifest.media:
|
||||
if not media.uri:
|
||||
continue
|
||||
|
||||
joc = 0
|
||||
if media.type == "AUDIO":
|
||||
track_type = Audio
|
||||
codec = audio_codecs_by_group_id.get(media.group_id)
|
||||
if media.channels and media.channels.endswith("/JOC"):
|
||||
joc = int(media.channels.split("/JOC")[0])
|
||||
media.channels = "5.1"
|
||||
else:
|
||||
track_type = Subtitle
|
||||
codec = Subtitle.Codec.WebVTT # assuming WebVTT, codec info isn't shown
|
||||
|
||||
track_lang = next(
|
||||
(
|
||||
Language.get(option)
|
||||
for x in (media.language, language)
|
||||
for option in [(str(x) or "").strip()]
|
||||
if tag_is_valid(option) and not option.startswith("und")
|
||||
),
|
||||
None,
|
||||
)
|
||||
if not track_lang:
|
||||
msg = "Language information could not be derived for a media."
|
||||
if language is None:
|
||||
msg += " No fallback language was provided when calling HLS.to_tracks()."
|
||||
elif not tag_is_valid((str(language) or "").strip()) or str(language).startswith("und"):
|
||||
msg += f" The fallback language provided is also invalid: {language}"
|
||||
raise ValueError(msg)
|
||||
|
||||
tracks.add(
|
||||
track_type(
|
||||
id_=hex(crc32(str(media).encode()))[2:],
|
||||
url=urljoin(media.base_uri, media.uri),
|
||||
codec=codec,
|
||||
language=track_lang, # HLS media may not have language info, fallback if needed
|
||||
is_original_lang=bool(language and is_close_match(track_lang, [language])),
|
||||
descriptor=Audio.Descriptor.HLS,
|
||||
drm=session_drm if media.type == "AUDIO" else None,
|
||||
data={"hls": {"media": media}},
|
||||
# audio track args
|
||||
**(
|
||||
dict(
|
||||
bitrate=0, # TODO: M3U doesn't seem to state bitrate?
|
||||
channels=media.channels,
|
||||
joc=joc,
|
||||
descriptive="public.accessibility.describes-video" in (media.characteristics or ""),
|
||||
)
|
||||
if track_type is Audio
|
||||
else dict(
|
||||
forced=media.forced == "YES",
|
||||
sdh="public.accessibility.describes-music-and-sound" in (media.characteristics or ""),
|
||||
)
|
||||
if track_type is Subtitle
|
||||
else {}
|
||||
),
|
||||
)
|
||||
)
|
||||
|
||||
return tracks
|
||||
|
||||
@staticmethod
|
||||
def download_track(
|
||||
track: AnyTrack,
|
||||
save_path: Path,
|
||||
save_dir: Path,
|
||||
progress: partial,
|
||||
session: Optional[Union[Session, httpx.Client]] = None,
|
||||
proxy: Optional[str] = None,
|
||||
max_workers: Optional[int] = None,
|
||||
license_widevine: Optional[Callable] = None,
|
||||
*,
|
||||
cdm: Optional[object] = None,
|
||||
) -> None:
|
||||
if not session:
|
||||
session = Session()
|
||||
elif not isinstance(session, (Session, httpx.Client)):
|
||||
raise TypeError(f"Expected session to be a {Session} or {httpx.Client}, not {session!r}")
|
||||
|
||||
if proxy:
|
||||
# Handle proxies differently based on session type
|
||||
if isinstance(session, Session):
|
||||
session.proxies.update({"all": proxy})
|
||||
elif isinstance(session, httpx.Client):
|
||||
session.proxies = {"http://": proxy, "https://": proxy}
|
||||
|
||||
log = logging.getLogger("HLS")
|
||||
|
||||
# Get the playlist text and handle both session types
|
||||
response = session.get(track.url)
|
||||
if isinstance(response, requests.Response):
|
||||
if not response.ok:
|
||||
log.error(f"Failed to request the invariant M3U8 playlist: {response.status_code}")
|
||||
sys.exit(1)
|
||||
playlist_text = response.text
|
||||
elif isinstance(response, httpx.Response):
|
||||
if response.status_code >= 400:
|
||||
log.error(f"Failed to request the invariant M3U8 playlist: {response.status_code}")
|
||||
sys.exit(1)
|
||||
playlist_text = response.text
|
||||
else:
|
||||
raise TypeError(f"Expected response to be a requests.Response or httpx.Response, not {type(response)}")
|
||||
|
||||
master = m3u8.loads(playlist_text, uri=track.url)
|
||||
|
||||
if not master.segments:
|
||||
log.error("Track's HLS playlist has no segments, expecting an invariant M3U8 playlist.")
|
||||
sys.exit(1)
|
||||
|
||||
if track.drm:
|
||||
session_drm = track.get_drm_for_cdm(cdm)
|
||||
if isinstance(session_drm, (Widevine, PlayReady)):
|
||||
# license and grab content keys
|
||||
try:
|
||||
if not license_widevine:
|
||||
raise ValueError("license_widevine func must be supplied to use DRM")
|
||||
progress(downloaded="LICENSING")
|
||||
license_widevine(session_drm)
|
||||
progress(downloaded="[yellow]LICENSED")
|
||||
except Exception: # noqa
|
||||
DOWNLOAD_CANCELLED.set() # skip pending track downloads
|
||||
progress(downloaded="[red]FAILED")
|
||||
raise
|
||||
else:
|
||||
session_drm = None
|
||||
|
||||
if DOWNLOAD_LICENCE_ONLY.is_set():
|
||||
progress(downloaded="[yellow]SKIPPED")
|
||||
return
|
||||
|
||||
unwanted_segments = [
|
||||
segment for segment in master.segments if callable(track.OnSegmentFilter) and track.OnSegmentFilter(segment)
|
||||
]
|
||||
|
||||
total_segments = len(master.segments) - len(unwanted_segments)
|
||||
progress(total=total_segments)
|
||||
|
||||
downloader = track.downloader
|
||||
if downloader.__name__ == "aria2c" and any(x.byterange for x in master.segments if x not in unwanted_segments):
|
||||
downloader = requests_downloader
|
||||
log.warning("Falling back to the requests downloader as aria2(c) doesn't support the Range header")
|
||||
|
||||
urls: list[dict[str, Any]] = []
|
||||
segment_durations: list[int] = []
|
||||
|
||||
range_offset = 0
|
||||
for segment in master.segments:
|
||||
if segment in unwanted_segments:
|
||||
continue
|
||||
|
||||
segment_durations.append(int(segment.duration))
|
||||
|
||||
if segment.byterange:
|
||||
byte_range = HLS.calculate_byte_range(segment.byterange, range_offset)
|
||||
range_offset = byte_range.split("-")[0]
|
||||
else:
|
||||
byte_range = None
|
||||
|
||||
urls.append(
|
||||
{
|
||||
"url": urljoin(segment.base_uri, segment.uri),
|
||||
"headers": {"Range": f"bytes={byte_range}"} if byte_range else {},
|
||||
}
|
||||
)
|
||||
|
||||
track.data["hls"]["segment_durations"] = segment_durations
|
||||
|
||||
segment_save_dir = save_dir / "segments"
|
||||
|
||||
skip_merge = False
|
||||
downloader_args = dict(
|
||||
urls=urls,
|
||||
output_dir=segment_save_dir,
|
||||
filename="{i:0%d}{ext}" % len(str(len(urls))),
|
||||
headers=session.headers,
|
||||
cookies=session.cookies,
|
||||
proxy=proxy,
|
||||
max_workers=max_workers,
|
||||
)
|
||||
|
||||
if downloader.__name__ == "n_m3u8dl_re":
|
||||
skip_merge = True
|
||||
downloader_args.update(
|
||||
{
|
||||
"output_dir": save_dir,
|
||||
"filename": track.id,
|
||||
"track": track,
|
||||
"content_keys": session_drm.content_keys if session_drm else None,
|
||||
}
|
||||
)
|
||||
|
||||
for status_update in downloader(**downloader_args):
|
||||
file_downloaded = status_update.get("file_downloaded")
|
||||
if file_downloaded:
|
||||
events.emit(events.Types.SEGMENT_DOWNLOADED, track=track, segment=file_downloaded)
|
||||
else:
|
||||
downloaded = status_update.get("downloaded")
|
||||
if downloaded and downloaded.endswith("/s"):
|
||||
status_update["downloaded"] = f"HLS {downloaded}"
|
||||
progress(**status_update)
|
||||
|
||||
# see https://github.com/devine-dl/devine/issues/71
|
||||
for control_file in segment_save_dir.glob("*.aria2__temp"):
|
||||
control_file.unlink()
|
||||
|
||||
if not skip_merge:
|
||||
progress(total=total_segments, completed=0, downloaded="Merging")
|
||||
|
||||
name_len = len(str(total_segments))
|
||||
discon_i = 0
|
||||
range_offset = 0
|
||||
map_data: Optional[tuple[m3u8.model.InitializationSection, bytes]] = None
|
||||
if session_drm:
|
||||
encryption_data: Optional[tuple[Optional[m3u8.Key], DRM_T]] = (None, session_drm)
|
||||
else:
|
||||
encryption_data: Optional[tuple[Optional[m3u8.Key], DRM_T]] = None
|
||||
|
||||
i = -1
|
||||
for real_i, segment in enumerate(master.segments):
|
||||
if segment not in unwanted_segments:
|
||||
i += 1
|
||||
|
||||
is_last_segment = (real_i + 1) == len(master.segments)
|
||||
|
||||
def merge(to: Path, via: list[Path], delete: bool = False, include_map_data: bool = False):
|
||||
"""
|
||||
Merge all files to a given path, optionally including map data.
|
||||
|
||||
Parameters:
|
||||
to: The output file with all merged data.
|
||||
via: List of files to merge, in sequence.
|
||||
delete: Delete the file once it's been merged.
|
||||
include_map_data: Whether to include the init map data.
|
||||
"""
|
||||
with open(to, "wb") as x:
|
||||
if include_map_data and map_data and map_data[1]:
|
||||
x.write(map_data[1])
|
||||
for file in via:
|
||||
x.write(file.read_bytes())
|
||||
x.flush()
|
||||
if delete:
|
||||
file.unlink()
|
||||
|
||||
def decrypt(include_this_segment: bool) -> Path:
|
||||
"""
|
||||
Decrypt all segments that uses the currently set DRM.
|
||||
|
||||
All segments that will be decrypted with this DRM will be merged together
|
||||
in sequence, prefixed with the init data (if any), and then deleted. Once
|
||||
merged they will be decrypted. The merged and decrypted file names state
|
||||
the range of segments that were used.
|
||||
|
||||
Parameters:
|
||||
include_this_segment: Whether to include the current segment in the
|
||||
list of segments to merge and decrypt. This should be False if
|
||||
decrypting on EXT-X-KEY changes, or True when decrypting on the
|
||||
last segment.
|
||||
|
||||
Returns the decrypted path.
|
||||
"""
|
||||
drm = encryption_data[1]
|
||||
first_segment_i = next(
|
||||
int(file.stem) for file in sorted(segment_save_dir.iterdir()) if file.stem.isdigit()
|
||||
)
|
||||
last_segment_i = max(0, i - int(not include_this_segment))
|
||||
range_len = (last_segment_i - first_segment_i) + 1
|
||||
|
||||
segment_range = f"{str(first_segment_i).zfill(name_len)}-{str(last_segment_i).zfill(name_len)}"
|
||||
merged_path = (
|
||||
segment_save_dir / f"{segment_range}{get_extension(master.segments[last_segment_i].uri)}"
|
||||
)
|
||||
decrypted_path = segment_save_dir / f"{merged_path.stem}_decrypted{merged_path.suffix}"
|
||||
|
||||
files = [
|
||||
file
|
||||
for file in sorted(segment_save_dir.iterdir())
|
||||
if file.stem.isdigit() and first_segment_i <= int(file.stem) <= last_segment_i
|
||||
]
|
||||
if not files:
|
||||
raise ValueError(f"None of the segment files for {segment_range} exist...")
|
||||
elif len(files) != range_len:
|
||||
raise ValueError(f"Missing {range_len - len(files)} segment files for {segment_range}...")
|
||||
|
||||
if isinstance(drm, Widevine):
|
||||
# with widevine we can merge all segments and decrypt once
|
||||
merge(to=merged_path, via=files, delete=True, include_map_data=True)
|
||||
drm.decrypt(merged_path)
|
||||
merged_path.rename(decrypted_path)
|
||||
else:
|
||||
# with other drm we must decrypt separately and then merge them
|
||||
# for aes this is because each segment likely has 16-byte padding
|
||||
for file in files:
|
||||
drm.decrypt(file)
|
||||
merge(to=merged_path, via=files, delete=True, include_map_data=True)
|
||||
|
||||
events.emit(events.Types.TRACK_DECRYPTED, track=track, drm=drm, segment=decrypted_path)
|
||||
|
||||
return decrypted_path
|
||||
|
||||
def merge_discontinuity(include_this_segment: bool, include_map_data: bool = True):
|
||||
"""
|
||||
Merge all segments of the discontinuity.
|
||||
|
||||
All segment files for this discontinuity must already be downloaded and
|
||||
already decrypted (if it needs to be decrypted).
|
||||
|
||||
Parameters:
|
||||
include_this_segment: Whether to include the current segment in the
|
||||
list of segments to merge and decrypt. This should be False if
|
||||
decrypting on EXT-X-KEY changes, or True when decrypting on the
|
||||
last segment.
|
||||
include_map_data: Whether to prepend the init map data before the
|
||||
segment files when merging.
|
||||
"""
|
||||
last_segment_i = max(0, i - int(not include_this_segment))
|
||||
|
||||
files = [
|
||||
file
|
||||
for file in sorted(segment_save_dir.iterdir())
|
||||
if int(file.stem.replace("_decrypted", "").split("-")[-1]) <= last_segment_i
|
||||
]
|
||||
if files:
|
||||
to_dir = segment_save_dir.parent
|
||||
to_path = to_dir / f"{str(discon_i).zfill(name_len)}{files[-1].suffix}"
|
||||
merge(to=to_path, via=files, delete=True, include_map_data=include_map_data)
|
||||
|
||||
if segment not in unwanted_segments:
|
||||
if isinstance(track, Subtitle):
|
||||
segment_file_ext = get_extension(segment.uri)
|
||||
segment_file_path = segment_save_dir / f"{str(i).zfill(name_len)}{segment_file_ext}"
|
||||
segment_data = try_ensure_utf8(segment_file_path.read_bytes())
|
||||
if track.codec not in (Subtitle.Codec.fVTT, Subtitle.Codec.fTTML):
|
||||
segment_data = (
|
||||
segment_data.decode("utf8")
|
||||
.replace("‎", html.unescape("‎"))
|
||||
.replace("‏", html.unescape("‏"))
|
||||
.encode("utf8")
|
||||
)
|
||||
segment_file_path.write_bytes(segment_data)
|
||||
|
||||
if segment.discontinuity and i != 0:
|
||||
if encryption_data:
|
||||
decrypt(include_this_segment=False)
|
||||
merge_discontinuity(
|
||||
include_this_segment=False, include_map_data=not encryption_data or not encryption_data[1]
|
||||
)
|
||||
|
||||
discon_i += 1
|
||||
range_offset = 0 # TODO: Should this be reset or not?
|
||||
map_data = None
|
||||
if encryption_data:
|
||||
encryption_data = (encryption_data[0], encryption_data[1])
|
||||
|
||||
if segment.init_section and (not map_data or segment.init_section != map_data[0]):
|
||||
if segment.init_section.byterange:
|
||||
init_byte_range = HLS.calculate_byte_range(segment.init_section.byterange, range_offset)
|
||||
range_offset = init_byte_range.split("-")[0]
|
||||
init_range_header = {"Range": f"bytes={init_byte_range}"}
|
||||
else:
|
||||
init_range_header = {}
|
||||
|
||||
# Handle both session types for init section request
|
||||
res = session.get(
|
||||
url=urljoin(segment.init_section.base_uri, segment.init_section.uri),
|
||||
headers=init_range_header,
|
||||
)
|
||||
|
||||
# Check response based on session type
|
||||
if isinstance(res, requests.Response):
|
||||
res.raise_for_status()
|
||||
init_content = res.content
|
||||
elif isinstance(res, httpx.Response):
|
||||
if res.status_code >= 400:
|
||||
raise requests.HTTPError(f"HTTP Error: {res.status_code}", response=res)
|
||||
init_content = res.content
|
||||
else:
|
||||
raise TypeError(
|
||||
f"Expected response to be requests.Response or httpx.Response, not {type(res)}"
|
||||
)
|
||||
|
||||
map_data = (segment.init_section, init_content)
|
||||
|
||||
segment_keys = getattr(segment, "keys", None)
|
||||
if segment_keys:
|
||||
key = HLS.get_supported_key(segment_keys)
|
||||
if encryption_data and encryption_data[0] != key and i != 0 and segment not in unwanted_segments:
|
||||
decrypt(include_this_segment=False)
|
||||
|
||||
if key is None:
|
||||
encryption_data = None
|
||||
elif not encryption_data or encryption_data[0] != key:
|
||||
drm = HLS.get_drm(key, session)
|
||||
if isinstance(drm, (Widevine, PlayReady)):
|
||||
try:
|
||||
if map_data:
|
||||
track_kid = track.get_key_id(map_data[1])
|
||||
else:
|
||||
track_kid = None
|
||||
progress(downloaded="LICENSING")
|
||||
license_widevine(drm, track_kid=track_kid)
|
||||
progress(downloaded="[yellow]LICENSED")
|
||||
except Exception: # noqa
|
||||
DOWNLOAD_CANCELLED.set() # skip pending track downloads
|
||||
progress(downloaded="[red]FAILED")
|
||||
raise
|
||||
encryption_data = (key, drm)
|
||||
|
||||
if DOWNLOAD_LICENCE_ONLY.is_set():
|
||||
continue
|
||||
|
||||
if is_last_segment:
|
||||
# required as it won't end with EXT-X-DISCONTINUITY nor a new key
|
||||
if encryption_data:
|
||||
decrypt(include_this_segment=True)
|
||||
merge_discontinuity(
|
||||
include_this_segment=True, include_map_data=not encryption_data or not encryption_data[1]
|
||||
)
|
||||
|
||||
progress(advance=1)
|
||||
|
||||
if DOWNLOAD_LICENCE_ONLY.is_set():
|
||||
return
|
||||
|
||||
if segment_save_dir.exists():
|
||||
segment_save_dir.rmdir()
|
||||
|
||||
# finally merge all the discontinuity save files together to the final path
|
||||
segments_to_merge = [x for x in sorted(save_dir.iterdir()) if x.is_file()]
|
||||
if len(segments_to_merge) == 1:
|
||||
shutil.move(segments_to_merge[0], save_path)
|
||||
else:
|
||||
progress(downloaded="Merging")
|
||||
if isinstance(track, (Video, Audio)):
|
||||
HLS.merge_segments(segments=segments_to_merge, save_path=save_path)
|
||||
else:
|
||||
with open(save_path, "wb") as f:
|
||||
for discontinuity_file in segments_to_merge:
|
||||
discontinuity_data = discontinuity_file.read_bytes()
|
||||
f.write(discontinuity_data)
|
||||
f.flush()
|
||||
discontinuity_file.unlink()
|
||||
|
||||
save_dir.rmdir()
|
||||
|
||||
progress(downloaded="Downloaded")
|
||||
|
||||
track.path = save_path
|
||||
events.emit(events.Types.TRACK_DOWNLOADED, track=track)
|
||||
|
||||
@staticmethod
|
||||
def merge_segments(segments: list[Path], save_path: Path) -> int:
|
||||
"""
|
||||
Concatenate Segments by first demuxing with FFmpeg.
|
||||
|
||||
Returns the file size of the merged file.
|
||||
"""
|
||||
if not binaries.FFMPEG:
|
||||
raise EnvironmentError("FFmpeg executable was not found but is required to merge HLS segments.")
|
||||
|
||||
demuxer_file = segments[0].parent / "ffmpeg_concat_demuxer.txt"
|
||||
demuxer_file.write_text("\n".join([f"file '{segment}'" for segment in segments]))
|
||||
|
||||
subprocess.check_call(
|
||||
[
|
||||
binaries.FFMPEG,
|
||||
"-hide_banner",
|
||||
"-loglevel",
|
||||
"panic",
|
||||
"-f",
|
||||
"concat",
|
||||
"-safe",
|
||||
"0",
|
||||
"-i",
|
||||
demuxer_file,
|
||||
"-map",
|
||||
"0",
|
||||
"-c",
|
||||
"copy",
|
||||
save_path,
|
||||
]
|
||||
)
|
||||
demuxer_file.unlink()
|
||||
|
||||
for segment in segments:
|
||||
segment.unlink()
|
||||
|
||||
return save_path.stat().st_size
|
||||
|
||||
@staticmethod
|
||||
def parse_session_data_keys(
|
||||
manifest: M3U8, session: Optional[Union[Session, httpx.Client]] = None
|
||||
) -> list[m3u8.model.Key]:
|
||||
"""Parse `com.apple.hls.keys` session data and return Key objects."""
|
||||
keys: list[m3u8.model.Key] = []
|
||||
|
||||
for data in getattr(manifest, "session_data", []) or []:
|
||||
if getattr(data, "data_id", None) != "com.apple.hls.keys":
|
||||
continue
|
||||
|
||||
value = getattr(data, "value", None)
|
||||
if not value and data.uri:
|
||||
if not session:
|
||||
session = Session()
|
||||
res = session.get(urljoin(manifest.base_uri or "", data.uri))
|
||||
value = res.text
|
||||
|
||||
if not value:
|
||||
continue
|
||||
|
||||
try:
|
||||
decoded = base64.b64decode(value).decode()
|
||||
except Exception:
|
||||
decoded = value
|
||||
|
||||
try:
|
||||
items = json.loads(decoded)
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
for item in items if isinstance(items, list) else []:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
key = m3u8.model.Key(
|
||||
method=item.get("method"),
|
||||
base_uri=manifest.base_uri or "",
|
||||
uri=item.get("uri"),
|
||||
keyformat=item.get("keyformat"),
|
||||
keyformatversions=",".join(item.get("keyformatversion") or item.get("keyformatversions") or []),
|
||||
)
|
||||
if key.method in {"AES-128", "ISO-23001-7"} or (
|
||||
key.keyformat
|
||||
and key.keyformat.lower()
|
||||
in {
|
||||
WidevineCdm.urn,
|
||||
PlayReadyCdm,
|
||||
"com.microsoft.playready",
|
||||
}
|
||||
):
|
||||
keys.append(key)
|
||||
|
||||
return keys
|
||||
|
||||
@staticmethod
|
||||
def get_supported_key(keys: list[Union[m3u8.model.SessionKey, m3u8.model.Key]]) -> Optional[m3u8.Key]:
|
||||
"""
|
||||
Get a support Key System from a list of Key systems.
|
||||
|
||||
Note that the key systems are chosen in an opinionated order.
|
||||
|
||||
Returns None if one of the key systems is method=NONE, which means all segments
|
||||
from hence forth should be treated as plain text until another key system is
|
||||
encountered, unless it's also method=NONE.
|
||||
|
||||
Raises NotImplementedError if none of the key systems are supported.
|
||||
"""
|
||||
if any(key.method == "NONE" for key in keys):
|
||||
return None
|
||||
|
||||
unsupported_systems = []
|
||||
for key in keys:
|
||||
if not key:
|
||||
continue
|
||||
# TODO: Add a way to specify which supported key system to use
|
||||
# TODO: Add support for 'SAMPLE-AES', 'AES-CTR', 'AES-CBC', 'ClearKey'
|
||||
elif key.method == "AES-128":
|
||||
return key
|
||||
elif key.method == "ISO-23001-7":
|
||||
return key
|
||||
elif key.keyformat and key.keyformat.lower() == WidevineCdm.urn:
|
||||
return key
|
||||
elif key.keyformat and (
|
||||
key.keyformat.lower() == PlayReadyCdm or "com.microsoft.playready" in key.keyformat.lower()
|
||||
):
|
||||
return key
|
||||
else:
|
||||
unsupported_systems.append(key.method + (f" ({key.keyformat})" if key.keyformat else ""))
|
||||
else:
|
||||
raise NotImplementedError(f"None of the key systems are supported: {', '.join(unsupported_systems)}")
|
||||
|
||||
@staticmethod
|
||||
def get_drm(
|
||||
key: Union[m3u8.model.SessionKey, m3u8.model.Key], session: Optional[Union[Session, httpx.Client]] = None
|
||||
) -> DRM_T:
|
||||
"""
|
||||
Convert HLS EXT-X-KEY data to an initialized DRM object.
|
||||
|
||||
Parameters:
|
||||
key: m3u8 key system (EXT-X-KEY) object.
|
||||
session: Optional session used to request AES-128 URIs.
|
||||
Useful to set headers, proxies, cookies, and so forth.
|
||||
|
||||
Raises a NotImplementedError if the key system is not supported.
|
||||
"""
|
||||
if not isinstance(session, (Session, httpx.Client, type(None))):
|
||||
raise TypeError(f"Expected session to be a {Session} or {httpx.Client}, not {type(session)}")
|
||||
if not session:
|
||||
session = Session()
|
||||
|
||||
# TODO: Add support for 'SAMPLE-AES', 'AES-CTR', 'AES-CBC', 'ClearKey'
|
||||
if key.method == "AES-128":
|
||||
drm = ClearKey.from_m3u_key(key, session)
|
||||
elif key.method == "ISO-23001-7":
|
||||
drm = Widevine(pssh=WV_PSSH.new(key_ids=[key.uri.split(",")[-1]], system_id=WV_PSSH.SystemId.Widevine))
|
||||
elif key.keyformat and key.keyformat.lower() == WidevineCdm.urn:
|
||||
drm = Widevine(
|
||||
pssh=WV_PSSH(key.uri.split(",")[-1]),
|
||||
**key._extra_params, # noqa
|
||||
)
|
||||
elif key.keyformat and (
|
||||
key.keyformat.lower() == PlayReadyCdm or "com.microsoft.playready" in key.keyformat.lower()
|
||||
):
|
||||
drm = PlayReady(
|
||||
pssh=PR_PSSH(key.uri.split(",")[-1]),
|
||||
pssh_b64=key.uri.split(",")[-1],
|
||||
)
|
||||
else:
|
||||
raise NotImplementedError(f"The key system is not supported: {key}")
|
||||
|
||||
return drm
|
||||
|
||||
@staticmethod
|
||||
def get_all_drm(
|
||||
keys: list[Union[m3u8.model.SessionKey, m3u8.model.Key]], proxy: Optional[str] = None
|
||||
) -> list[DRM_T]:
|
||||
"""
|
||||
Convert HLS EXT-X-KEY data to initialized DRM objects.
|
||||
|
||||
Parameters:
|
||||
keys: m3u8 key system (EXT-X-KEY) objects.
|
||||
proxy: Optional proxy string used for requesting AES-128 URIs.
|
||||
|
||||
Raises a NotImplementedError if none of the key systems are supported.
|
||||
"""
|
||||
unsupported_keys: list[m3u8.Key] = []
|
||||
drm_objects: list[DRM_T] = []
|
||||
|
||||
if any(key.method == "NONE" for key in keys):
|
||||
return []
|
||||
|
||||
for key in keys:
|
||||
try:
|
||||
drm = HLS.get_drm(key, proxy)
|
||||
drm_objects.append(drm)
|
||||
except NotImplementedError:
|
||||
unsupported_keys.append(key)
|
||||
|
||||
if not drm_objects and unsupported_keys:
|
||||
logging.debug(
|
||||
"Ignoring unsupported key systems: %s",
|
||||
", ".join([str(k.keyformat or k.method) for k in unsupported_keys]),
|
||||
)
|
||||
return []
|
||||
|
||||
return drm_objects
|
||||
|
||||
@staticmethod
|
||||
def calculate_byte_range(m3u_range: str, fallback_offset: int = 0) -> str:
|
||||
"""
|
||||
Convert a HLS EXT-X-BYTERANGE value to a more traditional range value.
|
||||
E.g., '1433@0' -> '0-1432', '357392@1433' -> '1433-358824'.
|
||||
"""
|
||||
parts = [int(x) for x in m3u_range.split("@")]
|
||||
if len(parts) != 2:
|
||||
parts.append(fallback_offset)
|
||||
length, offset = parts
|
||||
return f"{offset}-{offset + length - 1}"
|
||||
|
||||
|
||||
__all__ = ("HLS",)
|
||||
335
unshackle/core/manifests/ism.py
Normal file
335
unshackle/core/manifests/ism.py
Normal file
@@ -0,0 +1,335 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
import hashlib
|
||||
import html
|
||||
import shutil
|
||||
import urllib.parse
|
||||
from functools import partial
|
||||
from pathlib import Path
|
||||
from typing import Any, Callable, Optional, Union
|
||||
|
||||
import requests
|
||||
from langcodes import Language, tag_is_valid
|
||||
from lxml.etree import Element
|
||||
from pyplayready.system.pssh import PSSH as PR_PSSH
|
||||
from pywidevine.pssh import PSSH
|
||||
from requests import Session
|
||||
|
||||
from unshackle.core.constants import DOWNLOAD_CANCELLED, DOWNLOAD_LICENCE_ONLY, AnyTrack
|
||||
from unshackle.core.drm import DRM_T, PlayReady, Widevine
|
||||
from unshackle.core.events import events
|
||||
from unshackle.core.tracks import Audio, Subtitle, Track, Tracks, Video
|
||||
from unshackle.core.utilities import try_ensure_utf8
|
||||
from unshackle.core.utils.xml import load_xml
|
||||
|
||||
|
||||
class ISM:
|
||||
def __init__(self, manifest: Element, url: str) -> None:
|
||||
if manifest.tag != "SmoothStreamingMedia":
|
||||
raise TypeError(f"Expected 'SmoothStreamingMedia' document, got '{manifest.tag}'")
|
||||
if not url:
|
||||
raise requests.URLRequired("ISM manifest URL must be provided for relative paths")
|
||||
self.manifest = manifest
|
||||
self.url = url
|
||||
|
||||
@classmethod
|
||||
def from_url(cls, url: str, session: Optional[Session] = None, **kwargs: Any) -> "ISM":
|
||||
if not url:
|
||||
raise requests.URLRequired("ISM manifest URL must be provided")
|
||||
if not session:
|
||||
session = Session()
|
||||
res = session.get(url, **kwargs)
|
||||
if res.url != url:
|
||||
url = res.url
|
||||
res.raise_for_status()
|
||||
return cls(load_xml(res.content), url)
|
||||
|
||||
@classmethod
|
||||
def from_text(cls, text: str, url: str) -> "ISM":
|
||||
if not text:
|
||||
raise ValueError("ISM manifest text must be provided")
|
||||
if not url:
|
||||
raise requests.URLRequired("ISM manifest URL must be provided for relative paths")
|
||||
return cls(load_xml(text), url)
|
||||
|
||||
@staticmethod
|
||||
def _get_drm(headers: list[Element]) -> list[DRM_T]:
|
||||
drm: list[DRM_T] = []
|
||||
for header in headers:
|
||||
system_id = (header.get("SystemID") or header.get("SystemId") or "").lower()
|
||||
data = "".join(header.itertext()).strip()
|
||||
if not data:
|
||||
continue
|
||||
if system_id == "edef8ba9-79d6-4ace-a3c8-27dcd51d21ed":
|
||||
try:
|
||||
pssh = PSSH(base64.b64decode(data))
|
||||
except Exception:
|
||||
continue
|
||||
kid = next(iter(pssh.key_ids), None)
|
||||
drm.append(Widevine(pssh=pssh, kid=kid))
|
||||
elif system_id == "9a04f079-9840-4286-ab92-e65be0885f95":
|
||||
try:
|
||||
pr_pssh = PR_PSSH(data)
|
||||
except Exception:
|
||||
continue
|
||||
drm.append(PlayReady(pssh=pr_pssh, pssh_b64=data))
|
||||
return drm
|
||||
|
||||
def to_tracks(self, language: Optional[Union[str, Language]] = None) -> Tracks:
|
||||
tracks = Tracks()
|
||||
base_url = self.url
|
||||
duration = int(self.manifest.get("Duration") or 0)
|
||||
drm = self._get_drm(self.manifest.xpath(".//ProtectionHeader"))
|
||||
|
||||
for stream_index in self.manifest.findall("StreamIndex"):
|
||||
content_type = stream_index.get("Type")
|
||||
if not content_type:
|
||||
raise ValueError("No content type value could be found")
|
||||
for ql in stream_index.findall("QualityLevel"):
|
||||
codec = ql.get("FourCC")
|
||||
if codec == "TTML":
|
||||
codec = "STPP"
|
||||
track_lang = None
|
||||
lang = (stream_index.get("Language") or "").strip()
|
||||
if lang and tag_is_valid(lang) and not lang.startswith("und"):
|
||||
track_lang = Language.get(lang)
|
||||
|
||||
track_urls: list[str] = []
|
||||
fragment_time = 0
|
||||
fragments = stream_index.findall("c")
|
||||
# Some manifests omit the first fragment in the <c> list but
|
||||
# still expect a request for start time 0 which contains the
|
||||
# initialization segment. If the first declared fragment is not
|
||||
# at time 0, prepend the missing initialization URL.
|
||||
if fragments:
|
||||
first_time = int(fragments[0].get("t") or 0)
|
||||
if first_time != 0:
|
||||
track_urls.append(
|
||||
urllib.parse.urljoin(
|
||||
base_url,
|
||||
stream_index.get("Url").format_map(
|
||||
{
|
||||
"bitrate": ql.get("Bitrate"),
|
||||
"start time": "0",
|
||||
}
|
||||
),
|
||||
)
|
||||
)
|
||||
|
||||
for idx, frag in enumerate(fragments):
|
||||
fragment_time = int(frag.get("t", fragment_time))
|
||||
repeat = int(frag.get("r", 1))
|
||||
duration_frag = int(frag.get("d") or 0)
|
||||
if not duration_frag:
|
||||
try:
|
||||
next_time = int(fragments[idx + 1].get("t"))
|
||||
except (IndexError, AttributeError):
|
||||
next_time = duration
|
||||
duration_frag = (next_time - fragment_time) / repeat
|
||||
for _ in range(repeat):
|
||||
track_urls.append(
|
||||
urllib.parse.urljoin(
|
||||
base_url,
|
||||
stream_index.get("Url").format_map(
|
||||
{
|
||||
"bitrate": ql.get("Bitrate"),
|
||||
"start time": str(fragment_time),
|
||||
}
|
||||
),
|
||||
)
|
||||
)
|
||||
fragment_time += duration_frag
|
||||
|
||||
track_id = hashlib.md5(
|
||||
f"{codec}-{track_lang}-{ql.get('Bitrate') or 0}-{ql.get('Index') or 0}".encode()
|
||||
).hexdigest()
|
||||
|
||||
data = {
|
||||
"ism": {
|
||||
"manifest": self.manifest,
|
||||
"stream_index": stream_index,
|
||||
"quality_level": ql,
|
||||
"segments": track_urls,
|
||||
}
|
||||
}
|
||||
|
||||
if content_type == "video":
|
||||
try:
|
||||
vcodec = Video.Codec.from_mime(codec) if codec else None
|
||||
except ValueError:
|
||||
vcodec = None
|
||||
tracks.add(
|
||||
Video(
|
||||
id_=track_id,
|
||||
url=self.url,
|
||||
codec=vcodec,
|
||||
language=track_lang or language,
|
||||
is_original_lang=bool(language and track_lang and str(track_lang) == str(language)),
|
||||
bitrate=ql.get("Bitrate"),
|
||||
width=int(ql.get("MaxWidth") or 0) or int(stream_index.get("MaxWidth") or 0),
|
||||
height=int(ql.get("MaxHeight") or 0) or int(stream_index.get("MaxHeight") or 0),
|
||||
descriptor=Video.Descriptor.ISM,
|
||||
drm=drm,
|
||||
data=data,
|
||||
)
|
||||
)
|
||||
elif content_type == "audio":
|
||||
try:
|
||||
acodec = Audio.Codec.from_mime(codec) if codec else None
|
||||
except ValueError:
|
||||
acodec = None
|
||||
tracks.add(
|
||||
Audio(
|
||||
id_=track_id,
|
||||
url=self.url,
|
||||
codec=acodec,
|
||||
language=track_lang or language,
|
||||
is_original_lang=bool(language and track_lang and str(track_lang) == str(language)),
|
||||
bitrate=ql.get("Bitrate"),
|
||||
channels=ql.get("Channels"),
|
||||
descriptor=Track.Descriptor.ISM,
|
||||
drm=drm,
|
||||
data=data,
|
||||
)
|
||||
)
|
||||
else:
|
||||
try:
|
||||
scodec = Subtitle.Codec.from_mime(codec) if codec else None
|
||||
except ValueError:
|
||||
scodec = None
|
||||
tracks.add(
|
||||
Subtitle(
|
||||
id_=track_id,
|
||||
url=self.url,
|
||||
codec=scodec,
|
||||
language=track_lang or language,
|
||||
is_original_lang=bool(language and track_lang and str(track_lang) == str(language)),
|
||||
descriptor=Track.Descriptor.ISM,
|
||||
drm=drm,
|
||||
data=data,
|
||||
)
|
||||
)
|
||||
return tracks
|
||||
|
||||
@staticmethod
|
||||
def download_track(
|
||||
track: AnyTrack,
|
||||
save_path: Path,
|
||||
save_dir: Path,
|
||||
progress: partial,
|
||||
session: Optional[Session] = None,
|
||||
proxy: Optional[str] = None,
|
||||
max_workers: Optional[int] = None,
|
||||
license_widevine: Optional[Callable] = None,
|
||||
*,
|
||||
cdm: Optional[object] = None,
|
||||
) -> None:
|
||||
if not session:
|
||||
session = Session()
|
||||
elif not isinstance(session, Session):
|
||||
raise TypeError(f"Expected session to be a {Session}, not {session!r}")
|
||||
|
||||
if proxy:
|
||||
session.proxies.update({"all": proxy})
|
||||
|
||||
segments: list[str] = track.data["ism"]["segments"]
|
||||
|
||||
session_drm = None
|
||||
if track.drm:
|
||||
# Mirror HLS.download_track: pick the DRM matching the provided CDM
|
||||
# (or the first available) and license it if supported.
|
||||
session_drm = track.get_drm_for_cdm(cdm)
|
||||
if isinstance(session_drm, (Widevine, PlayReady)):
|
||||
try:
|
||||
if not license_widevine:
|
||||
raise ValueError("license_widevine func must be supplied to use DRM")
|
||||
progress(downloaded="LICENSING")
|
||||
license_widevine(session_drm)
|
||||
progress(downloaded="[yellow]LICENSED")
|
||||
except Exception:
|
||||
DOWNLOAD_CANCELLED.set()
|
||||
progress(downloaded="[red]FAILED")
|
||||
raise
|
||||
|
||||
if DOWNLOAD_LICENCE_ONLY.is_set():
|
||||
progress(downloaded="[yellow]SKIPPED")
|
||||
return
|
||||
|
||||
progress(total=len(segments))
|
||||
|
||||
downloader = track.downloader
|
||||
skip_merge = False
|
||||
downloader_args = dict(
|
||||
urls=[{"url": url} for url in segments],
|
||||
output_dir=save_dir,
|
||||
filename="{i:0%d}.mp4" % len(str(len(segments))),
|
||||
headers=session.headers,
|
||||
cookies=session.cookies,
|
||||
proxy=proxy,
|
||||
max_workers=max_workers,
|
||||
)
|
||||
|
||||
if downloader.__name__ == "n_m3u8dl_re":
|
||||
skip_merge = True
|
||||
downloader_args.update(
|
||||
{
|
||||
"filename": track.id,
|
||||
"track": track,
|
||||
"content_keys": session_drm.content_keys if session_drm else None,
|
||||
}
|
||||
)
|
||||
|
||||
for status_update in downloader(**downloader_args):
|
||||
file_downloaded = status_update.get("file_downloaded")
|
||||
if file_downloaded:
|
||||
events.emit(events.Types.SEGMENT_DOWNLOADED, track=track, segment=file_downloaded)
|
||||
else:
|
||||
downloaded = status_update.get("downloaded")
|
||||
if downloaded and downloaded.endswith("/s"):
|
||||
status_update["downloaded"] = f"ISM {downloaded}"
|
||||
progress(**status_update)
|
||||
|
||||
for control_file in save_dir.glob("*.aria2__temp"):
|
||||
control_file.unlink()
|
||||
|
||||
segments_to_merge = [x for x in sorted(save_dir.iterdir()) if x.is_file()]
|
||||
|
||||
if skip_merge:
|
||||
shutil.move(segments_to_merge[0], save_path)
|
||||
else:
|
||||
with open(save_path, "wb") as f:
|
||||
for segment_file in segments_to_merge:
|
||||
segment_data = segment_file.read_bytes()
|
||||
if (
|
||||
not session_drm
|
||||
and isinstance(track, Subtitle)
|
||||
and track.codec not in (Subtitle.Codec.fVTT, Subtitle.Codec.fTTML)
|
||||
):
|
||||
segment_data = try_ensure_utf8(segment_data)
|
||||
segment_data = (
|
||||
segment_data.decode("utf8")
|
||||
.replace("‎", html.unescape("‎"))
|
||||
.replace("‏", html.unescape("‏"))
|
||||
.encode("utf8")
|
||||
)
|
||||
f.write(segment_data)
|
||||
f.flush()
|
||||
segment_file.unlink()
|
||||
progress(advance=1)
|
||||
|
||||
track.path = save_path
|
||||
events.emit(events.Types.TRACK_DOWNLOADED, track=track)
|
||||
|
||||
if not skip_merge and session_drm:
|
||||
progress(downloaded="Decrypting", completed=0, total=100)
|
||||
session_drm.decrypt(save_path)
|
||||
track.drm = None
|
||||
events.emit(events.Types.TRACK_DECRYPTED, track=track, drm=session_drm, segment=None)
|
||||
progress(downloaded="Decrypting", advance=100)
|
||||
|
||||
save_dir.rmdir()
|
||||
progress(downloaded="Downloaded")
|
||||
|
||||
|
||||
__all__ = ("ISM",)
|
||||
76
unshackle/core/manifests/m3u8.py
Normal file
76
unshackle/core/manifests/m3u8.py
Normal file
@@ -0,0 +1,76 @@
|
||||
"""Utility functions for parsing M3U8 playlists."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Optional, Union
|
||||
|
||||
import httpx
|
||||
import m3u8
|
||||
from pyplayready.cdm import Cdm as PlayReadyCdm
|
||||
from pyplayready.system.pssh import PSSH as PR_PSSH
|
||||
from pywidevine.cdm import Cdm as WidevineCdm
|
||||
from pywidevine.pssh import PSSH as WV_PSSH
|
||||
from requests import Session
|
||||
|
||||
from unshackle.core.drm import PlayReady, Widevine
|
||||
from unshackle.core.manifests.hls import HLS
|
||||
from unshackle.core.tracks import Tracks
|
||||
|
||||
|
||||
def parse(
|
||||
master: m3u8.M3U8,
|
||||
language: str,
|
||||
*,
|
||||
session: Optional[Union[Session, httpx.Client]] = None,
|
||||
) -> Tracks:
|
||||
"""Parse a variant playlist to ``Tracks`` with DRM information."""
|
||||
tracks = HLS(master, session=session).to_tracks(language)
|
||||
|
||||
need_wv = not any(isinstance(d, Widevine) for t in tracks for d in (t.drm or []))
|
||||
need_pr = not any(isinstance(d, PlayReady) for t in tracks for d in (t.drm or []))
|
||||
|
||||
if (need_wv or need_pr) and tracks.videos:
|
||||
if not session:
|
||||
session = Session()
|
||||
|
||||
session_keys = list(master.session_keys or [])
|
||||
session_keys.extend(HLS.parse_session_data_keys(master, session))
|
||||
|
||||
for drm_obj in HLS.get_all_drm(session_keys):
|
||||
if need_wv and isinstance(drm_obj, Widevine):
|
||||
for t in tracks.videos + tracks.audio:
|
||||
t.drm = [d for d in (t.drm or []) if not isinstance(d, Widevine)] + [drm_obj]
|
||||
need_wv = False
|
||||
elif need_pr and isinstance(drm_obj, PlayReady):
|
||||
for t in tracks.videos + tracks.audio:
|
||||
t.drm = [d for d in (t.drm or []) if not isinstance(d, PlayReady)] + [drm_obj]
|
||||
need_pr = False
|
||||
if not need_wv and not need_pr:
|
||||
break
|
||||
|
||||
if (need_wv or need_pr) and tracks.videos:
|
||||
first_video = tracks.videos[0]
|
||||
playlist = m3u8.load(first_video.url)
|
||||
for key in playlist.keys or []:
|
||||
if not key or not key.keyformat:
|
||||
continue
|
||||
fmt = key.keyformat.lower()
|
||||
if need_wv and fmt == WidevineCdm.urn:
|
||||
pssh_b64 = key.uri.split(",")[-1]
|
||||
drm = Widevine(pssh=WV_PSSH(pssh_b64))
|
||||
for t in tracks.videos + tracks.audio:
|
||||
t.drm = [d for d in (t.drm or []) if not isinstance(d, Widevine)] + [drm]
|
||||
need_wv = False
|
||||
elif need_pr and (fmt == PlayReadyCdm or "com.microsoft.playready" in fmt):
|
||||
pssh_b64 = key.uri.split(",")[-1]
|
||||
drm = PlayReady(pssh=PR_PSSH(pssh_b64), pssh_b64=pssh_b64)
|
||||
for t in tracks.videos + tracks.audio:
|
||||
t.drm = [d for d in (t.drm or []) if not isinstance(d, PlayReady)] + [drm]
|
||||
need_pr = False
|
||||
if not need_wv and not need_pr:
|
||||
break
|
||||
|
||||
return tracks
|
||||
|
||||
|
||||
__all__ = ["parse"]
|
||||
Reference in New Issue
Block a user