diff --git a/tests/tracks/test_subtitle_skip.py b/tests/tracks/test_subtitle_skip.py new file mode 100644 index 0000000..daf4d40 --- /dev/null +++ b/tests/tracks/test_subtitle_skip.py @@ -0,0 +1,193 @@ +"""Tests for ``download_tracks_in_passes`` - the two-pass track download used by +``dl.result()`` when ``--skip-subtitle-errors`` is set. + +The behaviour under test is the cancel-event interaction: a failed track sets the +process-global ``DOWNLOAD_CANCELLED`` event, which makes other in-flight tracks +early-return without raising. Running the fatal video/audio tracks to completion +*before* the skippable subtitles removes that race, so a subtitle failure can no +longer truncate the video/audio that still gets muxed. +""" + +from __future__ import annotations + +import pytest + +from unshackle.commands.dl import SkippedSubtitle, download_tracks_in_passes +from unshackle.core.constants import DOWNLOAD_CANCELLED +from unshackle.core.tracks import Audio, Subtitle, Video + + +def make_video(track_id: str = "v") -> Video: + return Video( + id_=track_id, + url=f"https://example.test/{track_id}.m3u8", + language="en", + codec=Video.Codec.AVC, + range_=Video.Range.SDR, + width=1920, + height=1080, + bitrate=5_000_000, + ) + + +def make_audio(track_id: str = "a") -> Audio: + return Audio( + id_=track_id, + url=f"https://example.test/{track_id}.m3u8", + language="en", + codec=Audio.Codec.AAC, + bitrate=128_000, + ) + + +def make_subtitle(track_id: str, language: str) -> Subtitle: + return Subtitle( + id_=track_id, + url=f"https://example.test/{track_id}.vtt", + language=language, + codec=Subtitle.Codec.WebVTT, + ) + + +class Harness: + """Mimics how ``dl.result`` drives the helper, with a controllable downloader. + + ``run_one`` mirrors ``Track.download``: it early-returns (without recording a + completion) when the cancel event is already set, and a track flagged to fail + sets the event and raises - exactly what the real cancel sites do. + """ + + def __init__(self, fail_ids: set[str]): + self.fail_ids = fail_ids + self.completed: list[str] = [] + self.early_returned: list[str] = [] + self.skipped: list[Subtitle] = [] + + def run_one(self, track, _index): + if DOWNLOAD_CANCELLED.is_set(): + self.early_returned.append(track.id) + return + if track.id in self.fail_ids: + DOWNLOAD_CANCELLED.set() + raise RuntimeError(f"{track.id} failed") + self.completed.append(track.id) + + def on_subtitle_skipped(self, track): + self.skipped.append(track) + + +@pytest.fixture(autouse=True) +def _clear_event(): + DOWNLOAD_CANCELLED.clear() + yield + DOWNLOAD_CANCELLED.clear() + + +def test_failed_subtitle_does_not_truncate_video_or_audio(): + """A subtitle that fails *and sets the cancel event* must not stop the video/audio.""" + video, audio = make_video(), make_audio() + sub = make_subtitle("s-he", "he") + h = Harness(fail_ids={"s-he"}) + + download_tracks_in_passes( + [video, audio, sub], 4, h.run_one, + skip_subtitle_errors=True, on_subtitle_skipped=h.on_subtitle_skipped, + ) + + assert set(h.completed) == {"v", "a"} # both fatal tracks fully downloaded + assert h.early_returned == [] # nothing early-returned because of the subtitle + assert h.skipped == [sub] # the subtitle was recorded as skipped + + +def test_good_subtitle_kept_bad_subtitle_skipped(): + video = make_video() + good, bad = make_subtitle("s-en", "en"), make_subtitle("s-fr", "fr") + h = Harness(fail_ids={"s-fr"}) + + download_tracks_in_passes( + [video, good, bad], 4, h.run_one, + skip_subtitle_errors=True, on_subtitle_skipped=h.on_subtitle_skipped, + ) + + assert "s-en" in h.completed # the available subtitle still downloaded + assert h.skipped == [bad] # only the failing one was skipped + + +def test_subtitle_failure_stays_fatal_without_flag(): + """Default behaviour (flag off) is unchanged: a subtitle failure aborts the title.""" + video = make_video() + sub = make_subtitle("s-he", "he") + h = Harness(fail_ids={"s-he"}) + + with pytest.raises(RuntimeError): + download_tracks_in_passes( + [video, sub], 4, h.run_one, + skip_subtitle_errors=False, on_subtitle_skipped=h.on_subtitle_skipped, + ) + + assert not DOWNLOAD_CANCELLED.is_set() # the finally clears the event even on the fatal path + + +def test_cancel_event_is_reset_between_titles(): + """A cancel left set by a previous title must not skip this title's tracks.""" + DOWNLOAD_CANCELLED.set() + video, audio = make_video(), make_audio() + h = Harness(fail_ids=set()) + + download_tracks_in_passes( + [video, audio], 4, h.run_one, + skip_subtitle_errors=True, on_subtitle_skipped=h.on_subtitle_skipped, + ) + + assert set(h.completed) == {"v", "a"} + assert h.early_returned == [] + + +def test_cancel_event_cleared_after_failed_final_subtitle(): + """A subtitle failing in the last pass leaves the event clear on exit, not set.""" + video = make_video() + sub = make_subtitle("s-he", "he") + h = Harness(fail_ids={"s-he"}) + + download_tracks_in_passes( + [video, sub], 4, h.run_one, + skip_subtitle_errors=True, on_subtitle_skipped=h.on_subtitle_skipped, + ) + + assert not DOWNLOAD_CANCELLED.is_set() # the helper clears it on exit for any later code + + +def test_all_subtitles_skipped_video_audio_kept(): + """Every subtitle failing must not stop the video/audio, and each is recorded.""" + video, audio = make_video(), make_audio() + s1, s2 = make_subtitle("s-en", "en"), make_subtitle("s-he", "he") + h = Harness(fail_ids={"s-en", "s-he"}) + + download_tracks_in_passes( + [video, audio, s1, s2], 4, h.run_one, + skip_subtitle_errors=True, on_subtitle_skipped=h.on_subtitle_skipped, + ) + + assert set(h.completed) == {"v", "a"} # both fatal tracks survived + assert {t.id for t in h.skipped} == {"s-en", "s-he"} # every failing subtitle recorded + + +def test_duplicate_language_subtitles_distinguished_by_id(): + """Forced + SDH share a language; a failure of each must be distinguishable by track id - + the reason ``SkippedSubtitle`` carries ``id`` and not just ``language``.""" + forced, sdh = make_subtitle("en-forced", "en"), make_subtitle("en-sdh", "en") + h = Harness(fail_ids={"en-forced", "en-sdh"}) + + download_tracks_in_passes( + [make_video(), forced, sdh], 4, h.run_one, + skip_subtitle_errors=True, on_subtitle_skipped=h.on_subtitle_skipped, + ) + + assert [t.id for t in h.skipped] == ["en-forced", "en-sdh"] # same language, distinct ids + + +def test_skipped_subtitle_contract(): + """Pin the public ``skipped_subtitles`` entry shape - #113 serializes it into the job, so a + field rename/removal here is a breaking change and must fail a test.""" + assert set(SkippedSubtitle.__annotations__) == {"id", "language", "title"} + assert SkippedSubtitle.__required_keys__ == frozenset({"id", "language", "title"}) diff --git a/unshackle/commands/dl.py b/unshackle/commands/dl.py index 1829678..274ef1a 100644 --- a/unshackle/commands/dl.py +++ b/unshackle/commands/dl.py @@ -11,6 +11,7 @@ import shutil import subprocess import sys import time +from collections.abc import Iterable from concurrent import futures from concurrent.futures import ThreadPoolExecutor from copy import deepcopy @@ -19,7 +20,7 @@ from http.cookiejar import CookieJar, MozillaCookieJar from itertools import product from pathlib import Path from threading import Lock -from typing import Any, Callable, Optional +from typing import Any, Callable, Optional, TypedDict from uuid import UUID import click @@ -42,7 +43,7 @@ from unshackle.core.cdm import DecryptLabsRemoteCDM from unshackle.core.cdm.detect import is_playready_cdm, is_widevine_cdm from unshackle.core.config import config from unshackle.core.console import console -from unshackle.core.constants import DOWNLOAD_LICENCE_ONLY, AnyTrack, context_settings +from unshackle.core.constants import DOWNLOAD_CANCELLED, DOWNLOAD_LICENCE_ONLY, AnyTrack, context_settings from unshackle.core.credential import Credential from unshackle.core.drm import DRM_T, MonaLisa, PlayReady, Widevine from unshackle.core.events import events @@ -70,6 +71,59 @@ from unshackle.core.utils.subprocess import ffprobe from unshackle.core.vaults import Vaults +class SkippedSubtitle(TypedDict): + """A subtitle skipped under ``--skip-subtitle-errors``. Accumulated as ``dl.skipped_subtitles``, + one entry per track; ``id`` and ``title`` identify which subtitle of which title was unavailable.""" + + id: str + language: str + title: str + + +def download_tracks_in_passes( + tracks: Iterable[AnyTrack], + max_concurrent: int, + run_one: Callable[[AnyTrack, int], None], + *, + skip_subtitle_errors: bool, + on_subtitle_skipped: Callable[[Subtitle], None], +) -> None: + """Download a title's tracks so a skippable subtitle failure can't corrupt the rest. + + A failed track sets the process-global ``DOWNLOAD_CANCELLED`` event, making other in-flight + tracks early-return without raising. With ``skip_subtitle_errors`` set, video/audio download + concurrently first; the subtitles then run one at a time (a concurrent pass would let one + failure's cancel silently drop the others, unrecorded), with the event cleared before each. + Video/audio failures stay fatal. The event is cleared on entry (stale cancel from a prior + title) and in ``finally`` (never leave it set for later code). + + ``run_one(track, index)`` downloads a single track; ``on_subtitle_skipped(track)`` records a + subtitle whose download raised (handled here). + """ + DOWNLOAD_CANCELLED.clear() + indexed = list(enumerate(tracks)) + if skip_subtitle_errors: + primary = [(i, t) for i, t in indexed if not isinstance(t, Subtitle)] + skippable = [(i, t) for i, t in indexed if isinstance(t, Subtitle)] + else: + primary, skippable = indexed, [] + + try: + with ThreadPoolExecutor(max_concurrent) as pool: + future_to_track = {pool.submit(run_one, track, i): track for i, track in primary} + for download in futures.as_completed(future_to_track): + download.result() # a video/audio failure is fatal + + for i, track in skippable: + DOWNLOAD_CANCELLED.clear() + try: + run_one(track, i) + except Exception: + on_subtitle_skipped(track) + finally: + DOWNLOAD_CANCELLED.clear() # never leave a failed track's cancel set for later code + + class dl: @staticmethod def truncate_pssh_for_display(pssh_string: str, drm_type: str) -> str: @@ -474,6 +528,9 @@ class dl: @click.option("-S", "--subs-only", is_flag=True, default=False, help="Only download subtitle tracks.") @click.option("-C", "--chapters-only", is_flag=True, default=False, help="Only download chapter markers.") @click.option("-ns", "--no-subs", is_flag=True, default=False, help="Do not download subtitle tracks.") + @click.option("--skip-subtitle-errors", is_flag=True, default=False, + help="If a subtitle track fails to download, skip it and continue instead of " + "aborting the whole title (video/audio failures stay fatal).") @click.option("-na", "--no-audio", is_flag=True, default=False, help="Do not download audio tracks.") @click.option("-nc", "--no-chapters", is_flag=True, default=False, help="Do not download chapter markers.") @click.option("-nv", "--no-video", is_flag=True, default=False, help="Do not download video tracks.") @@ -609,6 +666,9 @@ class dl: self.log = logging.getLogger("download") self.completed_files: list[Path] = [] + # Subtitles skipped under --skip-subtitle-errors, recorded so an embedding caller can + # report which weren't available without parsing the console output. See SkippedSubtitle. + self.skipped_subtitles: list[SkippedSubtitle] = [] if not config.output_template: raise click.ClickException( @@ -1051,6 +1111,7 @@ class dl: subs_only: bool, chapters_only: bool, no_subs: bool, + skip_subtitle_errors: bool, no_audio: bool, no_chapters: bool, no_video: bool, @@ -2181,41 +2242,64 @@ class dl: try: with Live(Padding(download_table, (1, 5)), console=console, refresh_per_second=5): - with ThreadPoolExecutor(downloads) as pool: - for download in futures.as_completed( - ( - pool.submit( - track.download, - session=track.session or service.session, - no_proxy_download=no_proxy_download, - prepare_drm=partial( - partial(self.prepare_drm, table=download_table), - track=track, - title=title, - certificate=partial( - service.get_widevine_service_certificate, - title=title, - track=track, - ), - licence=partial( - service.get_playready_license - if is_playready_cdm(self.cdm) - else service.get_widevine_license, - title=title, - track=track, - ), - cdm_only=cdm_only, - vaults_only=vaults_only, - export=export_path, - ), - cdm=self.cdm, - max_workers=workers, - progress=tracks_progress_callables[i], - ) - for i, track in enumerate(title.tracks) - ) - ): - download.result() + + def download_track(track: AnyTrack, i: int) -> None: + track.download( + session=track.session or service.session, + no_proxy_download=no_proxy_download, + prepare_drm=partial( + partial(self.prepare_drm, table=download_table), + track=track, + title=title, + certificate=partial( + service.get_widevine_service_certificate, + title=title, + track=track, + ), + licence=partial( + service.get_playready_license + if is_playready_cdm(self.cdm) + else service.get_widevine_license, + title=title, + track=track, + ), + cdm_only=cdm_only, + vaults_only=vaults_only, + export=export_path, + ), + cdm=self.cdm, + max_workers=workers, + progress=tracks_progress_callables[i], + ) + + def on_subtitle_skipped(track: Subtitle) -> None: + lang = str(track.language) + self.log.warning(f"Subtitle {lang} failed to download, skipping it.") + self.skipped_subtitles.append(SkippedSubtitle(id=track.id, language=lang, title=str(title))) + try: + title.tracks.subtitles.remove(track) + except ValueError: + self.log.debug(f"Skipped subtitle {track.id} was already absent from the track list.") + + skipped_before = len(self.skipped_subtitles) + download_tracks_in_passes( + title.tracks, + downloads, + download_track, + skip_subtitle_errors=skip_subtitle_errors, + on_subtitle_skipped=on_subtitle_skipped, + ) + + if ( + len(self.skipped_subtitles) > skipped_before + and not title.tracks.videos + and not title.tracks.audio + and not title.tracks.subtitles + ): + self.log.warning( + f"{title}: all subtitles were skipped and no video or audio was " + "downloaded - nothing was produced for this title." + ) except KeyboardInterrupt: console.print(Padding(":x: Download Cancelled...", (0, 5, 1, 5)))