Initial Commit

2026-03-11 17:09:00 +00:00 · 2025-07-18 00:46:05 +00:00
commit d37014f53f
94 changed files with 17458 additions and 0 deletions
--- a/unshackle/core/downloaders/n_m3u8dl_re.py
+++ b/unshackle/core/downloaders/n_m3u8dl_re.py
@@ -0,0 +1,299 @@
+import logging
+import os
+import re
+import subprocess
+import warnings
+from http.cookiejar import CookieJar
+from itertools import chain
+from pathlib import Path
+from typing import Any, Generator, MutableMapping, Optional, Union
+
+import requests
+from requests.cookies import cookiejar_from_dict, get_cookie_header
+
+from unshackle.core import binaries
+from unshackle.core.config import config
+from unshackle.core.console import console
+from unshackle.core.constants import DOWNLOAD_CANCELLED
+
+# Ignore FutureWarnings
+warnings.simplefilter(action="ignore", category=FutureWarning)
+
+AUDIO_CODEC_MAP = {"AAC": "mp4a", "AC3": "ac-3", "EC3": "ec-3"}
+VIDEO_CODEC_MAP = {"AVC": "avc", "HEVC": "hvc", "DV": "dvh", "HLG": "hev"}
+
+
+def track_selection(track: object) -> list[str]:
+    """Return the N_m3u8DL-RE stream selection arguments for a track."""
+
+    if "dash" in track.data:
+        adaptation_set = track.data["dash"]["adaptation_set"]
+        representation = track.data["dash"]["representation"]
+
+        track_type = track.__class__.__name__
+        codec = track.codec.name
+        bitrate = track.bitrate // 1000
+        language = track.language
+        width = track.width if track_type == "Video" else None
+        height = track.height if track_type == "Video" else None
+        range = track.range.name if track_type == "Video" else None
+
+    elif "ism" in track.data:
+        stream_index = track.data["ism"]["stream_index"]
+        quality_level = track.data["ism"]["quality_level"]
+
+        track_type = track.__class__.__name__
+        codec = track.codec.name
+        bitrate = track.bitrate // 1000
+        language = track.language
+        width = track.width if track_type == "Video" else None
+        height = track.height if track_type == "Video" else None
+        range = track.range.name if track_type == "Video" else None
+        adaptation_set = stream_index
+        representation = quality_level
+
+    else:
+        return []
+
+    if track_type == "Audio":
+        codecs = AUDIO_CODEC_MAP.get(codec)
+        langs = adaptation_set.findall("lang") + representation.findall("lang")
+        track_ids = list(
+            set(
+                v
+                for x in chain(adaptation_set, representation)
+                for v in (x.get("audioTrackId"), x.get("id"))
+                if v is not None
+            )
+        )
+        roles = adaptation_set.findall("Role") + representation.findall("Role")
+        role = ":role=main" if next((i for i in roles if i.get("value").lower() == "main"), None) else ""
+        bandwidth = f"bwMin={bitrate}:bwMax={bitrate + 5}"
+
+        if langs:
+            track_selection = ["-sa", f"lang={language}:codecs={codecs}:{bandwidth}{role}"]
+        elif len(track_ids) == 1:
+            track_selection = ["-sa", f"id={track_ids[0]}"]
+        else:
+            track_selection = ["-sa", f"for=best{role}"]
+        return track_selection
+
+    if track_type == "Video":
+        # adjust codec based on range
+        codec_adjustments = {("HEVC", "DV"): "DV", ("HEVC", "HLG"): "HLG"}
+        codec = codec_adjustments.get((codec, range), codec)
+        codecs = VIDEO_CODEC_MAP.get(codec)
+
+        bandwidth = f"bwMin={bitrate}:bwMax={bitrate + 5}"
+        if width and height:
+            resolution = f"{width}x{height}"
+        elif width:
+            resolution = f"{width}*"
+        else:
+            resolution = "for=best"
+        if resolution.startswith("for="):
+            track_selection = ["-sv", resolution]
+            track_selection.append(f"codecs={codecs}:{bandwidth}")
+        else:
+            track_selection = ["-sv", f"res={resolution}:codecs={codecs}:{bandwidth}"]
+        return track_selection
+
+
+def download(
+    urls: Union[str, dict[str, Any], list[str], list[dict[str, Any]]],
+    track: object,
+    output_dir: Path,
+    filename: str,
+    headers: Optional[MutableMapping[str, Union[str, bytes]]] = None,
+    cookies: Optional[Union[MutableMapping[str, str], CookieJar]] = None,
+    proxy: Optional[str] = None,
+    max_workers: Optional[int] = None,
+    content_keys: Optional[dict[str, Any]] = None,
+) -> Generator[dict[str, Any], None, None]:
+    if not urls:
+        raise ValueError("urls must be provided and not empty")
+    elif not isinstance(urls, (str, dict, list)):
+        raise TypeError(f"Expected urls to be {str} or {dict} or a list of one of them, not {type(urls)}")
+
+    if not output_dir:
+        raise ValueError("output_dir must be provided")
+    elif not isinstance(output_dir, Path):
+        raise TypeError(f"Expected output_dir to be {Path}, not {type(output_dir)}")
+
+    if not filename:
+        raise ValueError("filename must be provided")
+    elif not isinstance(filename, str):
+        raise TypeError(f"Expected filename to be {str}, not {type(filename)}")
+
+    if not isinstance(headers, (MutableMapping, type(None))):
+        raise TypeError(f"Expected headers to be {MutableMapping}, not {type(headers)}")
+
+    if not isinstance(cookies, (MutableMapping, CookieJar, type(None))):
+        raise TypeError(f"Expected cookies to be {MutableMapping} or {CookieJar}, not {type(cookies)}")
+
+    if not isinstance(proxy, (str, type(None))):
+        raise TypeError(f"Expected proxy to be {str}, not {type(proxy)}")
+
+    if not max_workers:
+        max_workers = min(32, (os.cpu_count() or 1) + 4)
+    elif not isinstance(max_workers, int):
+        raise TypeError(f"Expected max_workers to be {int}, not {type(max_workers)}")
+
+    if not isinstance(urls, list):
+        urls = [urls]
+
+    if not binaries.N_m3u8DL_RE:
+        raise EnvironmentError("N_m3u8DL-RE executable not found...")
+
+    if cookies and not isinstance(cookies, CookieJar):
+        cookies = cookiejar_from_dict(cookies)
+
+    track_type = track.__class__.__name__
+    thread_count = str(config.n_m3u8dl_re.get("thread_count", max_workers))
+    ad_keyword = config.n_m3u8dl_re.get("ad_keyword")
+
+    arguments = [
+        track.url,
+        "--save-dir",
+        output_dir,
+        "--tmp-dir",
+        output_dir,
+        "--thread-count",
+        thread_count,
+        "--no-log",
+        "--write-meta-json",
+        "false",
+    ]
+
+    for header, value in (headers or {}).items():
+        if header.lower() in ("accept-encoding", "cookie"):
+            continue
+        arguments.extend(["--header", f"{header}: {value}"])
+
+    if cookies:
+        cookie_header = get_cookie_header(cookies, requests.Request(url=track.url))
+        if cookie_header:
+            arguments.extend(["--header", f"Cookie: {cookie_header}"])
+
+    if proxy:
+        arguments.extend(["--custom-proxy", proxy])
+
+    if content_keys:
+        for kid, key in content_keys.items():
+            keys = f"{kid.hex}:{key.lower()}"
+        arguments.extend(["--key", keys])
+        arguments.extend(["--use-shaka-packager"])
+
+    if ad_keyword:
+        arguments.extend(["--ad-keyword", ad_keyword])
+
+    if track.descriptor.name == "URL":
+        error = f"[N_m3u8DL-RE]: {track.descriptor} is currently not supported"
+        raise ValueError(error)
+    elif track.descriptor.name == "DASH":
+        arguments.extend(track_selection(track))
+
+    # TODO: improve this nonsense
+    percent_re = re.compile(r"(\d+\.\d+%)")
+    speed_re = re.compile(r"(?<!/)(\d+\.\d+MB)(?!.*\/)")
+    warn = re.compile(r"(WARN : Response.*)")
+    error = re.compile(r"(ERROR.*)")
+    size_patterns = [
+        re.compile(r"(\d+\.\d+MB/\d+\.\d+GB)"),
+        re.compile(r"(\d+\.\d+GB/\d+\.\d+GB)"),
+        re.compile(r"(\d+\.\d+MB/\d+\.\d+MB)"),
+    ]
+
+    yield dict(total=100)
+
+    try:
+        with subprocess.Popen(
+            [binaries.N_m3u8DL_RE, *arguments], stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True
+        ) as p:
+            for line in p.stdout:
+                output = line.strip()
+                if output:
+                    percent = percent_re.search(output)
+                    speed = speed_re.search(output)
+                    size = next(
+                        (pattern.search(output).group(1) for pattern in size_patterns if pattern.search(output)), ""
+                    )
+
+                    if speed:
+                        yield dict(downloaded=f"{speed.group(1)}ps {size}")
+                    if percent:
+                        progress = int(percent.group(1).split(".")[0])
+                        yield dict(completed=progress) if progress < 100 else dict(downloaded="Merging")
+
+                    if warn.search(output):
+                        console.log(f"{track_type} " + warn.search(output).group(1))
+
+            p.wait()
+
+        if p.returncode != 0:
+            if error.search(output):
+                raise ValueError(f"[N_m3u8DL-RE]: {error.search(output).group(1)}")
+            raise subprocess.CalledProcessError(p.returncode, arguments)
+
+    except ConnectionResetError:
+        # interrupted while passing URI to download
+        raise KeyboardInterrupt()
+    except KeyboardInterrupt:
+        DOWNLOAD_CANCELLED.set()  # skip pending track downloads
+        yield dict(downloaded="[yellow]CANCELLED")
+        raise
+    except Exception:
+        DOWNLOAD_CANCELLED.set()  # skip pending track downloads
+        yield dict(downloaded="[red]FAILED")
+        raise
+
+
+def n_m3u8dl_re(
+    urls: Union[str, list[str], dict[str, Any], list[dict[str, Any]]],
+    track: object,
+    output_dir: Path,
+    filename: str,
+    headers: Optional[MutableMapping[str, Union[str, bytes]]] = None,
+    cookies: Optional[Union[MutableMapping[str, str], CookieJar]] = None,
+    proxy: Optional[str] = None,
+    max_workers: Optional[int] = None,
+    content_keys: Optional[dict[str, Any]] = None,
+) -> Generator[dict[str, Any], None, None]:
+    """
+    Download files using N_m3u8DL-RE.
+    https://github.com/nilaoda/N_m3u8DL-RE
+
+    Yields the following download status updates while chunks are downloading:
+
+    - {total: 100} (100% download total)
+    - {completed: 1} (1% download progress out of 100%)
+    - {downloaded: "10.1 MB/s"} (currently downloading at a rate of 10.1 MB/s)
+
+    The data is in the same format accepted by rich's progress.update() function.
+
+    Parameters:
+        urls: Web URL(s) to file(s) to download. You can use a dictionary with the key
+            "url" for the URI, and other keys for extra arguments to use per-URL.
+        track: The track to download. Used to get track attributes for the selection
+            process. Note that Track.Descriptor.URL is not supported by N_m3u8DL-RE.
+        output_dir: The folder to save the file into. If the save path's directory does
+            not exist then it will be made automatically.
+        filename: The filename or filename template to use for each file. The variables
+            you can use are `i` for the URL index and `ext` for the URL extension.
+        headers: A mapping of HTTP Header Key/Values to use for the download.
+        cookies: A mapping of Cookie Key/Values or a Cookie Jar to use for the download.
+        max_workers: The maximum amount of threads to use for downloads. Defaults to
+            min(32,(cpu_count+4)). Can be set in config with --thread-count option.
+        content_keys: The content keys to use for decryption.
+    """
+    track_type = track.__class__.__name__
+
+    log = logging.getLogger("N_m3u8DL-RE")
+    if proxy and not config.n_m3u8dl_re.get("use_proxy", True):
+        log.warning(f"{track_type}: Ignoring proxy as N_m3u8DL-RE is set to use_proxy=False")
+        proxy = None
+
+    yield from download(urls, track, output_dir, filename, headers, cookies, proxy, max_workers, content_keys)
+
+
+__all__ = ("n_m3u8dl_re",)