refactor(dl): type the two-pass download helper + pin skipped_subtitles shape

Addresses review feedback on the failed-subtitle handling:
- Fully annotate download_tracks_in_passes and the download_track closure (MyPy strict).
- Replace list[str] skipped_subtitles with a documented SkippedSubtitle TypedDict
  (id + language + title) so a client can report which subtitle of which title was
  skipped; pin the shape with a contract test.
- Clear DOWNLOAD_CANCELLED in a finally so no failed track leaves it set for later code.
- Document why the subtitle pass must stay sequential (a concurrent pass would silently
  drop in-flight subtitles via the cancel event).
- Warn only when a title skipped a subtitle and produced no video/audio/subtitle (was a
  loose len(title.tracks) check that ignored chapters/attachments).
- Narrow the over-broad remove() except to ValueError with a debug log.
- Add tests: final-clear on the fatal path, all-subs-skipped keeps video/audio, and
  duplicate-language subtitles distinguished by id.
This commit is contained in:
Avi Cohen
2026-06-06 09:27:21 +03:00
parent 73d2cbccf8
commit 4fdffc9435
2 changed files with 113 additions and 30 deletions

View File

@@ -12,7 +12,7 @@ from __future__ import annotations
import pytest import pytest
from unshackle.commands.dl import download_tracks_in_passes from unshackle.commands.dl import SkippedSubtitle, download_tracks_in_passes
from unshackle.core.constants import DOWNLOAD_CANCELLED from unshackle.core.constants import DOWNLOAD_CANCELLED
from unshackle.core.tracks import Audio, Subtitle, Video from unshackle.core.tracks import Audio, Subtitle, Video
@@ -125,6 +125,8 @@ def test_subtitle_failure_stays_fatal_without_flag():
skip_subtitle_errors=False, on_subtitle_skipped=h.on_subtitle_skipped, skip_subtitle_errors=False, on_subtitle_skipped=h.on_subtitle_skipped,
) )
assert not DOWNLOAD_CANCELLED.is_set() # the finally clears the event even on the fatal path
def test_cancel_event_is_reset_between_titles(): def test_cancel_event_is_reset_between_titles():
"""A cancel left set by a previous title must not skip this title's tracks.""" """A cancel left set by a previous title must not skip this title's tracks."""
@@ -139,3 +141,53 @@ def test_cancel_event_is_reset_between_titles():
assert set(h.completed) == {"v", "a"} assert set(h.completed) == {"v", "a"}
assert h.early_returned == [] assert h.early_returned == []
def test_cancel_event_cleared_after_failed_final_subtitle():
"""A subtitle failing in the last pass leaves the event clear on exit, not set."""
video = make_video()
sub = make_subtitle("s-he", "he")
h = Harness(fail_ids={"s-he"})
download_tracks_in_passes(
[video, sub], 4, h.run_one,
skip_subtitle_errors=True, on_subtitle_skipped=h.on_subtitle_skipped,
)
assert not DOWNLOAD_CANCELLED.is_set() # the helper clears it on exit for any later code
def test_all_subtitles_skipped_video_audio_kept():
"""Every subtitle failing must not stop the video/audio, and each is recorded."""
video, audio = make_video(), make_audio()
s1, s2 = make_subtitle("s-en", "en"), make_subtitle("s-he", "he")
h = Harness(fail_ids={"s-en", "s-he"})
download_tracks_in_passes(
[video, audio, s1, s2], 4, h.run_one,
skip_subtitle_errors=True, on_subtitle_skipped=h.on_subtitle_skipped,
)
assert set(h.completed) == {"v", "a"} # both fatal tracks survived
assert {t.id for t in h.skipped} == {"s-en", "s-he"} # every failing subtitle recorded
def test_duplicate_language_subtitles_distinguished_by_id():
"""Forced + SDH share a language; a failure of each must be distinguishable by track id -
the reason ``SkippedSubtitle`` carries ``id`` and not just ``language``."""
forced, sdh = make_subtitle("en-forced", "en"), make_subtitle("en-sdh", "en")
h = Harness(fail_ids={"en-forced", "en-sdh"})
download_tracks_in_passes(
[make_video(), forced, sdh], 4, h.run_one,
skip_subtitle_errors=True, on_subtitle_skipped=h.on_subtitle_skipped,
)
assert [t.id for t in h.skipped] == ["en-forced", "en-sdh"] # same language, distinct ids
def test_skipped_subtitle_contract():
"""Pin the public ``skipped_subtitles`` entry shape - #113 serializes it into the job, so a
field rename/removal here is a breaking change and must fail a test."""
assert set(SkippedSubtitle.__annotations__) == {"id", "language", "title"}
assert SkippedSubtitle.__required_keys__ == frozenset({"id", "language", "title"})

View File

@@ -11,6 +11,7 @@ import shutil
import subprocess import subprocess
import sys import sys
import time import time
from collections.abc import Iterable
from concurrent import futures from concurrent import futures
from concurrent.futures import ThreadPoolExecutor from concurrent.futures import ThreadPoolExecutor
from copy import deepcopy from copy import deepcopy
@@ -19,7 +20,7 @@ from http.cookiejar import CookieJar, MozillaCookieJar
from itertools import product from itertools import product
from pathlib import Path from pathlib import Path
from threading import Lock from threading import Lock
from typing import Any, Callable, Optional from typing import Any, Callable, Optional, TypedDict
from uuid import UUID from uuid import UUID
import click import click
@@ -70,19 +71,34 @@ from unshackle.core.utils.subprocess import ffprobe
from unshackle.core.vaults import Vaults from unshackle.core.vaults import Vaults
def download_tracks_in_passes(tracks, max_concurrent, run_one, *, skip_subtitle_errors, on_subtitle_skipped): class SkippedSubtitle(TypedDict):
"""Download a title's tracks, keeping a skippable subtitle failure from corrupting the rest. """A subtitle skipped under ``--skip-subtitle-errors``. Accumulated as ``dl.skipped_subtitles``,
one entry per track; ``id`` and ``title`` identify which subtitle of which title was unavailable."""
A failed track sets the process-global ``DOWNLOAD_CANCELLED`` event, which makes other id: str
in-flight tracks early-return without raising - so catching a subtitle failure after the fact language: str
can leave a half-downloaded video/audio that still gets muxed. When ``skip_subtitle_errors`` title: str
is set the fatal tracks (video/audio) are therefore downloaded concurrently first, and the
skippable subtitles run in a separate sequential pass once nothing else is in flight, with the
event reset before each so one failure can't poison the next. Video/Audio failures stay fatal. def download_tracks_in_passes(
The event is also reset up front so a cancel from a previous title can't carry over. tracks: Iterable[AnyTrack],
max_concurrent: int,
run_one: Callable[[AnyTrack, int], None],
*,
skip_subtitle_errors: bool,
on_subtitle_skipped: Callable[[Subtitle], None],
) -> None:
"""Download a title's tracks so a skippable subtitle failure can't corrupt the rest.
A failed track sets the process-global ``DOWNLOAD_CANCELLED`` event, making other in-flight
tracks early-return without raising. With ``skip_subtitle_errors`` set, video/audio download
concurrently first; the subtitles then run one at a time (a concurrent pass would let one
failure's cancel silently drop the others, unrecorded), with the event cleared before each.
Video/audio failures stay fatal. The event is cleared on entry (stale cancel from a prior
title) and in ``finally`` (never leave it set for later code).
``run_one(track, index)`` downloads a single track; ``on_subtitle_skipped(track)`` records a ``run_one(track, index)`` downloads a single track; ``on_subtitle_skipped(track)`` records a
subtitle whose download raised (the exception is handled here). subtitle whose download raised (handled here).
""" """
DOWNLOAD_CANCELLED.clear() DOWNLOAD_CANCELLED.clear()
indexed = list(enumerate(tracks)) indexed = list(enumerate(tracks))
@@ -92,6 +108,7 @@ def download_tracks_in_passes(tracks, max_concurrent, run_one, *, skip_subtitle_
else: else:
primary, skippable = indexed, [] primary, skippable = indexed, []
try:
with ThreadPoolExecutor(max_concurrent) as pool: with ThreadPoolExecutor(max_concurrent) as pool:
future_to_track = {pool.submit(run_one, track, i): track for i, track in primary} future_to_track = {pool.submit(run_one, track, i): track for i, track in primary}
for download in futures.as_completed(future_to_track): for download in futures.as_completed(future_to_track):
@@ -103,6 +120,8 @@ def download_tracks_in_passes(tracks, max_concurrent, run_one, *, skip_subtitle_
run_one(track, i) run_one(track, i)
except Exception: except Exception:
on_subtitle_skipped(track) on_subtitle_skipped(track)
finally:
DOWNLOAD_CANCELLED.clear() # never leave a failed track's cancel set for later code
class dl: class dl:
@@ -648,8 +667,8 @@ class dl:
self.log = logging.getLogger("download") self.log = logging.getLogger("download")
self.completed_files: list[Path] = [] self.completed_files: list[Path] = []
# Subtitles skipped under --skip-subtitle-errors, recorded so an embedding caller can # Subtitles skipped under --skip-subtitle-errors, recorded so an embedding caller can
# report which weren't available without parsing the console output. # report which weren't available without parsing the console output. See SkippedSubtitle.
self.skipped_subtitles: list[str] = [] self.skipped_subtitles: list[SkippedSubtitle] = []
if not config.output_template: if not config.output_template:
raise click.ClickException( raise click.ClickException(
@@ -2248,7 +2267,7 @@ class dl:
try: try:
with Live(Padding(download_table, (1, 5)), console=console, refresh_per_second=5): with Live(Padding(download_table, (1, 5)), console=console, refresh_per_second=5):
def download_track(track, i): def download_track(track: AnyTrack, i: int) -> None:
track.download( track.download(
session=track.session or service.session, session=track.session or service.session,
no_proxy_download=no_proxy_download, no_proxy_download=no_proxy_download,
@@ -2277,15 +2296,16 @@ class dl:
progress=tracks_progress_callables[i], progress=tracks_progress_callables[i],
) )
def on_subtitle_skipped(track): def on_subtitle_skipped(track: Subtitle) -> None:
lang = str(getattr(track, "language", "") or "") lang = str(track.language)
self.log.warning(f"Subtitle {lang} failed to download, skipping it.") self.log.warning(f"Subtitle {lang} failed to download, skipping it.")
self.skipped_subtitles.append(lang) self.skipped_subtitles.append(SkippedSubtitle(id=track.id, language=lang, title=str(title)))
try: try:
title.tracks.subtitles.remove(track) title.tracks.subtitles.remove(track)
except (ValueError, AttributeError): except ValueError:
pass self.log.debug(f"Skipped subtitle {track.id} was already absent from the track list.")
skipped_before = len(self.skipped_subtitles)
download_tracks_in_passes( download_tracks_in_passes(
title.tracks, title.tracks,
downloads, downloads,
@@ -2294,6 +2314,17 @@ class dl:
on_subtitle_skipped=on_subtitle_skipped, on_subtitle_skipped=on_subtitle_skipped,
) )
if (
len(self.skipped_subtitles) > skipped_before
and not title.tracks.videos
and not title.tracks.audio
and not title.tracks.subtitles
):
self.log.warning(
f"{title}: all subtitles were skipped and no video or audio was "
"downloaded - nothing was produced for this title."
)
except KeyboardInterrupt: except KeyboardInterrupt:
console.print(Padding(":x: Download Cancelled...", (0, 5, 1, 5))) console.print(Padding(":x: Download Cancelled...", (0, 5, 1, 5)))
if self.debug_logger: if self.debug_logger: