feat(dl): gate s_lang/a_lang miss behind --best-available

Missing requested subtitle and audio languages now warn and continue when --best-available is set instead of hard-exiting. Without the flag, missing languages still produce an error and exit, matching the prior strict behavior. Audio missing-lang detection is now symmetric with subtitles.

- add find_missing_langs helper in core/utilities for reuse between s_lang and a_lang paths (skips all/best/orig sentinels)
- refactor dl.py s_lang/a_lang checks to share the helper
- add tests/lang_selection covering match primitives, helper output, and tricky langcodes corners (zh-Hans/zh-Hant/zh-CN/zh-TW/zh-HK, cmn/yue, fil/tl/tgl)
- clean up unused-var ruff F841 in tests/remote/unit/
This commit is contained in:
imSp4rky
2026-05-22 13:52:35 -06:00
parent b0ae88812c
commit 7654e91ebc
11 changed files with 365 additions and 42 deletions

View File

View File

@@ -0,0 +1,103 @@
from __future__ import annotations
import pytest
from unshackle.core.utilities import find_missing_langs
@pytest.mark.parametrize(
"requested,available,expected",
[
(["en"], ["en"], []),
(["en", "ja"], ["en", "ja", "fr"], []),
(["en", "fil"], ["en"], ["fil"]),
(["en", "ko", "ja"], ["en"], ["ko", "ja"]),
(["fil"], ["en", "ja"], ["fil"]),
(["fil", "ko"], ["en"], ["fil", "ko"]),
(["es"], ["es-419"], []),
(["es-419"], ["es"], []),
(["en"], ["en-US"], []),
(["all"], [], []),
(["best"], [], []),
(["orig"], [], []),
(["all", "en"], ["en"], []),
(["best", "fil"], ["en"], ["fil"]),
([], ["en"], []),
(["en"], [], ["en"]),
(["en"], [None, "en"], []),
(["en"], [None], ["en"]),
(["zh"], ["zh-Hans"], []),
(["zh-CN"], ["zh-Hans"], []),
(["zh-Hans"], ["zh-CN"], []),
(["zh-TW"], ["zh-Hant"], []),
(["zh-Hant"], ["zh-TW"], []),
(["zh"], ["zh-Hant"], ["zh"]),
(["zh-Hans"], ["zh-Hant"], ["zh-Hans"]),
(["zh-CN"], ["zh-TW"], ["zh-CN"]),
(["zh-HK"], ["zh-Hant"], []),
(["zh"], ["cmn"], []),
(["cmn"], ["zh"], []),
(["zh"], ["yue"], ["zh"]),
(["yue"], ["zh-HK"], ["yue"]),
(["fil"], ["tl"], []),
(["tl"], ["fil"], []),
(["fil"], ["tgl"], []),
(["tgl"], ["fil"], []),
(["fil"], ["fil-PH"], []),
(["tl"], ["fil-PH"], []),
],
)
def test_close_match(requested, available, expected):
assert find_missing_langs(requested, available, exact=False) == expected
@pytest.mark.parametrize(
"requested,available,expected",
[
(["es"], ["es-419"], ["es"]),
(["es-419"], ["es"], ["es-419"]),
(["en"], ["en-US"], []),
(["en-US"], ["en"], []),
(["en"], ["en-GB"], ["en"]),
(["en-US"], ["en-GB"], ["en-US"]),
(["en-US"], ["en-US"], []),
(["en-US", "en-GB"], ["en-US"], ["en-GB"]),
(["en"], ["en"], []),
(["all", "es-419"], ["es"], ["es-419"]),
(["zh"], ["zh-Hans"], []),
(["zh-CN"], ["zh-Hans"], []),
(["zh-TW"], ["zh-Hant"], []),
(["zh"], ["cmn"], []),
(["zh-HK"], ["zh-Hant"], ["zh-HK"]),
(["zh-Hans"], ["zh-Hant"], ["zh-Hans"]),
(["zh-CN"], ["zh-TW"], ["zh-CN"]),
(["fil"], ["tl"], []),
(["tl"], ["fil"], []),
(["fil"], ["tgl"], []),
(["fil"], ["fil-PH"], []),
(["tl"], ["fil-PH"], []),
],
)
def test_exact_match(requested, available, expected):
assert find_missing_langs(requested, available, exact=True) == expected
def test_order_preserved():
assert find_missing_langs(["ja", "ko", "fr"], ["en"], exact=False) == ["ja", "ko", "fr"]
def test_duplicates_in_request():
assert find_missing_langs(["fil", "fil", "en"], ["en"], exact=False) == ["fil", "fil"]
def test_zh_catalogue_simplified_only_misses_traditional():
assert find_missing_langs(["zh-Hant", "zh-TW"], ["en", "zh-Hans"], exact=False) == ["zh-Hant", "zh-TW"]
def test_mixed_zh_fil_request():
assert find_missing_langs(["en", "zh-Hans", "fil"], ["en", "tl"], exact=False) == ["zh-Hans"]
def test_zh_cn_request_with_tw_catalogue():
assert find_missing_langs(["zh-CN"], ["zh-TW"], exact=False) == ["zh-CN"]
assert find_missing_langs(["zh-CN"], ["zh-TW"], exact=True) == ["zh-CN"]

View File

@@ -0,0 +1,78 @@
from __future__ import annotations
import pytest
from unshackle.core.utilities import is_close_match, is_exact_match
@pytest.mark.parametrize(
"needle,haystack,expected",
[
("en", ["en"], True),
("fr", ["en", "de"], False),
("es", ["es-419"], True),
("es", ["es-ES"], True),
("es-419", ["es"], True),
("en", ["en-US"], True),
("en-US", ["en-GB"], True),
("EN", ["en"], True),
("en", ["EN"], True),
("ja", ["ko"], False),
("fil", ["en", "fr", "de"], False),
("en", [], False),
("en", [None, "en"], True),
("en", [None], False),
("zh", ["zh-Hans"], True),
("zh-CN", ["zh-Hans"], True),
("zh-Hans", ["zh-CN"], True),
("zh-TW", ["zh-Hant"], True),
("zh-Hant", ["zh-TW"], True),
("zh", ["zh-Hant"], False),
("zh-Hans", ["zh-Hant"], False),
("zh-CN", ["zh-TW"], False),
("zh-HK", ["zh-Hant"], True),
("zh", ["cmn"], True),
("cmn", ["zh"], True),
("zh", ["yue"], False),
("yue", ["zh-HK"], False),
("fil", ["tl"], True),
("tl", ["fil"], True),
("fil", ["tgl"], True),
("tgl", ["fil"], True),
("fil", ["fil-PH"], True),
("tl", ["fil-PH"], True),
],
)
def test_is_close_match(needle, haystack, expected):
assert is_close_match(needle, haystack) is expected
@pytest.mark.parametrize(
"needle,haystack,expected",
[
("es", ["es-419"], False),
("es-419", ["es"], False),
("es-419", ["es-419"], True),
("en-US", ["en-GB"], False),
("en-US", ["en-US"], True),
("en", ["en"], True),
("EN", ["en"], True),
("fr", ["de"], False),
("fil", ["en"], False),
("en", [], False),
("zh", ["zh-Hans"], True),
("zh-CN", ["zh-Hans"], True),
("zh-TW", ["zh-Hant"], True),
("zh", ["cmn"], True),
("zh-HK", ["zh-Hant"], False),
("zh-Hans", ["zh-Hant"], False),
("zh-CN", ["zh-TW"], False),
("fil", ["tl"], True),
("tl", ["fil"], True),
("fil", ["tgl"], True),
("fil", ["fil-PH"], True),
("tl", ["fil-PH"], True),
],
)
def test_is_exact_match(needle, haystack, expected):
assert is_exact_match(needle, haystack) is expected

View File

@@ -24,7 +24,6 @@ def _run(coro):
def test_skips_when_client_does_not_accept_gzip() -> None:
payload = b"x" * 4096
body_json = json.dumps({"data": "x" * 4096}).encode()
async def handler(req): # noqa: ARG001

View File

@@ -6,13 +6,8 @@ import json
import pytest
from unshackle.core.api.errors import (
APIError,
APIErrorCode,
build_error_response,
categorize_exception,
handle_api_exception,
)
from unshackle.core.api.errors import (APIError, APIErrorCode, build_error_response, categorize_exception,
handle_api_exception)
pytestmark = pytest.mark.unit

View File

@@ -5,16 +5,9 @@ from __future__ import annotations
import pytest
from langcodes import Language
from unshackle.core.api.handlers import (
sanitize_log,
serialize_audio_track,
serialize_drm,
serialize_subtitle_track,
serialize_title,
serialize_video_track,
validate_download_parameters,
validate_service,
)
from unshackle.core.api.handlers import (sanitize_log, serialize_audio_track, serialize_drm, serialize_subtitle_track,
serialize_title, serialize_video_track, validate_download_parameters,
validate_service)
from unshackle.core.titles.episode import Episode
from unshackle.core.titles.movie import Movie
from unshackle.core.tracks import Audio, Subtitle, Video

View File

@@ -6,16 +6,8 @@ from enum import Enum
import pytest
from unshackle.core.remote_service import (
_build_title,
_build_tracks,
_deserialize_audio,
_deserialize_subtitle,
_deserialize_video,
_enum_get,
_match_track,
_reconstruct_drm,
)
from unshackle.core.remote_service import (_build_title, _build_tracks, _deserialize_audio, _deserialize_subtitle,
_deserialize_video, _enum_get, _match_track, _reconstruct_drm)
from unshackle.core.titles.episode import Episode
from unshackle.core.titles.movie import Movie
from unshackle.core.tracks import Audio, Subtitle, Video

View File

@@ -105,7 +105,7 @@ async def test_get_session_store_returns_singleton() -> None:
async def test_max_sessions_evicts_oldest(store: SessionStore, monkeypatch: pytest.MonkeyPatch) -> None:
monkeypatch.setattr(type(store), "_max_sessions", property(lambda _: 2))
a = await store.create("A", _FakeService(), session_id="a")
await store.create("A", _FakeService(), session_id="a")
await asyncio.sleep(0.01)
b = await store.create("B", _FakeService(), session_id="b")
await asyncio.sleep(0.01)