From 4fdffc94353356729e5e53a07f1229dff2ad6f33 Mon Sep 17 00:00:00 2001 From: Avi Cohen Date: Sat, 6 Jun 2026 09:27:21 +0300 Subject: [PATCH] refactor(dl): type the two-pass download helper + pin skipped_subtitles shape Addresses review feedback on the failed-subtitle handling: - Fully annotate download_tracks_in_passes and the download_track closure (MyPy strict). - Replace list[str] skipped_subtitles with a documented SkippedSubtitle TypedDict (id + language + title) so a client can report which subtitle of which title was skipped; pin the shape with a contract test. - Clear DOWNLOAD_CANCELLED in a finally so no failed track leaves it set for later code. - Document why the subtitle pass must stay sequential (a concurrent pass would silently drop in-flight subtitles via the cancel event). - Warn only when a title skipped a subtitle and produced no video/audio/subtitle (was a loose len(title.tracks) check that ignored chapters/attachments). - Narrow the over-broad remove() except to ValueError with a debug log. - Add tests: final-clear on the fatal path, all-subs-skipped keeps video/audio, and duplicate-language subtitles distinguished by id. --- tests/tracks/test_subtitle_skip.py | 54 +++++++++++++++++- unshackle/commands/dl.py | 89 ++++++++++++++++++++---------- 2 files changed, 113 insertions(+), 30 deletions(-) diff --git a/tests/tracks/test_subtitle_skip.py b/tests/tracks/test_subtitle_skip.py index 699bcd2..daf4d40 100644 --- a/tests/tracks/test_subtitle_skip.py +++ b/tests/tracks/test_subtitle_skip.py @@ -12,7 +12,7 @@ from __future__ import annotations import pytest -from unshackle.commands.dl import download_tracks_in_passes +from unshackle.commands.dl import SkippedSubtitle, download_tracks_in_passes from unshackle.core.constants import DOWNLOAD_CANCELLED from unshackle.core.tracks import Audio, Subtitle, Video @@ -125,6 +125,8 @@ def test_subtitle_failure_stays_fatal_without_flag(): skip_subtitle_errors=False, on_subtitle_skipped=h.on_subtitle_skipped, ) + assert not DOWNLOAD_CANCELLED.is_set() # the finally clears the event even on the fatal path + def test_cancel_event_is_reset_between_titles(): """A cancel left set by a previous title must not skip this title's tracks.""" @@ -139,3 +141,53 @@ def test_cancel_event_is_reset_between_titles(): assert set(h.completed) == {"v", "a"} assert h.early_returned == [] + + +def test_cancel_event_cleared_after_failed_final_subtitle(): + """A subtitle failing in the last pass leaves the event clear on exit, not set.""" + video = make_video() + sub = make_subtitle("s-he", "he") + h = Harness(fail_ids={"s-he"}) + + download_tracks_in_passes( + [video, sub], 4, h.run_one, + skip_subtitle_errors=True, on_subtitle_skipped=h.on_subtitle_skipped, + ) + + assert not DOWNLOAD_CANCELLED.is_set() # the helper clears it on exit for any later code + + +def test_all_subtitles_skipped_video_audio_kept(): + """Every subtitle failing must not stop the video/audio, and each is recorded.""" + video, audio = make_video(), make_audio() + s1, s2 = make_subtitle("s-en", "en"), make_subtitle("s-he", "he") + h = Harness(fail_ids={"s-en", "s-he"}) + + download_tracks_in_passes( + [video, audio, s1, s2], 4, h.run_one, + skip_subtitle_errors=True, on_subtitle_skipped=h.on_subtitle_skipped, + ) + + assert set(h.completed) == {"v", "a"} # both fatal tracks survived + assert {t.id for t in h.skipped} == {"s-en", "s-he"} # every failing subtitle recorded + + +def test_duplicate_language_subtitles_distinguished_by_id(): + """Forced + SDH share a language; a failure of each must be distinguishable by track id - + the reason ``SkippedSubtitle`` carries ``id`` and not just ``language``.""" + forced, sdh = make_subtitle("en-forced", "en"), make_subtitle("en-sdh", "en") + h = Harness(fail_ids={"en-forced", "en-sdh"}) + + download_tracks_in_passes( + [make_video(), forced, sdh], 4, h.run_one, + skip_subtitle_errors=True, on_subtitle_skipped=h.on_subtitle_skipped, + ) + + assert [t.id for t in h.skipped] == ["en-forced", "en-sdh"] # same language, distinct ids + + +def test_skipped_subtitle_contract(): + """Pin the public ``skipped_subtitles`` entry shape - #113 serializes it into the job, so a + field rename/removal here is a breaking change and must fail a test.""" + assert set(SkippedSubtitle.__annotations__) == {"id", "language", "title"} + assert SkippedSubtitle.__required_keys__ == frozenset({"id", "language", "title"}) diff --git a/unshackle/commands/dl.py b/unshackle/commands/dl.py index 210bc90..30a7a8e 100644 --- a/unshackle/commands/dl.py +++ b/unshackle/commands/dl.py @@ -11,6 +11,7 @@ import shutil import subprocess import sys import time +from collections.abc import Iterable from concurrent import futures from concurrent.futures import ThreadPoolExecutor from copy import deepcopy @@ -19,7 +20,7 @@ from http.cookiejar import CookieJar, MozillaCookieJar from itertools import product from pathlib import Path from threading import Lock -from typing import Any, Callable, Optional +from typing import Any, Callable, Optional, TypedDict from uuid import UUID import click @@ -70,19 +71,34 @@ from unshackle.core.utils.subprocess import ffprobe from unshackle.core.vaults import Vaults -def download_tracks_in_passes(tracks, max_concurrent, run_one, *, skip_subtitle_errors, on_subtitle_skipped): - """Download a title's tracks, keeping a skippable subtitle failure from corrupting the rest. +class SkippedSubtitle(TypedDict): + """A subtitle skipped under ``--skip-subtitle-errors``. Accumulated as ``dl.skipped_subtitles``, + one entry per track; ``id`` and ``title`` identify which subtitle of which title was unavailable.""" - A failed track sets the process-global ``DOWNLOAD_CANCELLED`` event, which makes other - in-flight tracks early-return without raising - so catching a subtitle failure after the fact - can leave a half-downloaded video/audio that still gets muxed. When ``skip_subtitle_errors`` - is set the fatal tracks (video/audio) are therefore downloaded concurrently first, and the - skippable subtitles run in a separate sequential pass once nothing else is in flight, with the - event reset before each so one failure can't poison the next. Video/Audio failures stay fatal. - The event is also reset up front so a cancel from a previous title can't carry over. + id: str + language: str + title: str + + +def download_tracks_in_passes( + tracks: Iterable[AnyTrack], + max_concurrent: int, + run_one: Callable[[AnyTrack, int], None], + *, + skip_subtitle_errors: bool, + on_subtitle_skipped: Callable[[Subtitle], None], +) -> None: + """Download a title's tracks so a skippable subtitle failure can't corrupt the rest. + + A failed track sets the process-global ``DOWNLOAD_CANCELLED`` event, making other in-flight + tracks early-return without raising. With ``skip_subtitle_errors`` set, video/audio download + concurrently first; the subtitles then run one at a time (a concurrent pass would let one + failure's cancel silently drop the others, unrecorded), with the event cleared before each. + Video/audio failures stay fatal. The event is cleared on entry (stale cancel from a prior + title) and in ``finally`` (never leave it set for later code). ``run_one(track, index)`` downloads a single track; ``on_subtitle_skipped(track)`` records a - subtitle whose download raised (the exception is handled here). + subtitle whose download raised (handled here). """ DOWNLOAD_CANCELLED.clear() indexed = list(enumerate(tracks)) @@ -92,17 +108,20 @@ def download_tracks_in_passes(tracks, max_concurrent, run_one, *, skip_subtitle_ else: primary, skippable = indexed, [] - with ThreadPoolExecutor(max_concurrent) as pool: - future_to_track = {pool.submit(run_one, track, i): track for i, track in primary} - for download in futures.as_completed(future_to_track): - download.result() # a video/audio failure is fatal + try: + with ThreadPoolExecutor(max_concurrent) as pool: + future_to_track = {pool.submit(run_one, track, i): track for i, track in primary} + for download in futures.as_completed(future_to_track): + download.result() # a video/audio failure is fatal - for i, track in skippable: - DOWNLOAD_CANCELLED.clear() - try: - run_one(track, i) - except Exception: - on_subtitle_skipped(track) + for i, track in skippable: + DOWNLOAD_CANCELLED.clear() + try: + run_one(track, i) + except Exception: + on_subtitle_skipped(track) + finally: + DOWNLOAD_CANCELLED.clear() # never leave a failed track's cancel set for later code class dl: @@ -648,8 +667,8 @@ class dl: self.log = logging.getLogger("download") self.completed_files: list[Path] = [] # Subtitles skipped under --skip-subtitle-errors, recorded so an embedding caller can - # report which weren't available without parsing the console output. - self.skipped_subtitles: list[str] = [] + # report which weren't available without parsing the console output. See SkippedSubtitle. + self.skipped_subtitles: list[SkippedSubtitle] = [] if not config.output_template: raise click.ClickException( @@ -2248,7 +2267,7 @@ class dl: try: with Live(Padding(download_table, (1, 5)), console=console, refresh_per_second=5): - def download_track(track, i): + def download_track(track: AnyTrack, i: int) -> None: track.download( session=track.session or service.session, no_proxy_download=no_proxy_download, @@ -2277,15 +2296,16 @@ class dl: progress=tracks_progress_callables[i], ) - def on_subtitle_skipped(track): - lang = str(getattr(track, "language", "") or "") + def on_subtitle_skipped(track: Subtitle) -> None: + lang = str(track.language) self.log.warning(f"Subtitle {lang} failed to download, skipping it.") - self.skipped_subtitles.append(lang) + self.skipped_subtitles.append(SkippedSubtitle(id=track.id, language=lang, title=str(title))) try: title.tracks.subtitles.remove(track) - except (ValueError, AttributeError): - pass + except ValueError: + self.log.debug(f"Skipped subtitle {track.id} was already absent from the track list.") + skipped_before = len(self.skipped_subtitles) download_tracks_in_passes( title.tracks, downloads, @@ -2294,6 +2314,17 @@ class dl: on_subtitle_skipped=on_subtitle_skipped, ) + if ( + len(self.skipped_subtitles) > skipped_before + and not title.tracks.videos + and not title.tracks.audio + and not title.tracks.subtitles + ): + self.log.warning( + f"{title}: all subtitles were skipped and no video or audio was " + "downloaded - nothing was produced for this title." + ) + except KeyboardInterrupt: console.print(Padding(":x: Download Cancelled...", (0, 5, 1, 5))) if self.debug_logger: