Files
unshackle/unshackle/core/providers/_base.py
Andy 820db5f179 refactor(providers): extract metadata providers into modular system
- Create `unshackle/core/providers/` package with abstract base class, IMDBApi (free, no key), SIMKL, and TMDB provider implementations
- Add consensus-based ID enrichment: cross-references IMDB IDs with TMDB and SIMKL, drops all data from providers that disagree on tmdb_id (likely resolved to wrong title)
- Cache enriched IDs alongside raw provider data so they survive cache round-trips
- Genericize TitleCacher with `cache_provider()`/`get_cached_provider()` replacing provider-specific methods; respect `--no-cache` flag
- Add `--imdb` CLI flag to dl command for direct IMDB ID lookup
2026-02-25 19:02:18 -07:00

98 lines
3.0 KiB
Python

from __future__ import annotations
import logging
import re
from abc import ABCMeta, abstractmethod
from dataclasses import dataclass, field
from difflib import SequenceMatcher
from typing import Optional, Union
import requests
from requests.adapters import HTTPAdapter, Retry
log = logging.getLogger("METADATA")
HEADERS = {"User-Agent": "unshackle-tags/1.0"}
STRIP_RE = re.compile(r"[^a-z0-9]+", re.I)
YEAR_RE = re.compile(r"\s*\(?[12][0-9]{3}\)?$")
@dataclass
class ExternalIds:
"""Normalized external IDs across providers."""
imdb_id: Optional[str] = None
tmdb_id: Optional[int] = None
tmdb_kind: Optional[str] = None # "movie" or "tv"
tvdb_id: Optional[int] = None
@dataclass
class MetadataResult:
"""Unified metadata result from any provider."""
title: Optional[str] = None
year: Optional[int] = None
kind: Optional[str] = None # "movie" or "tv"
external_ids: ExternalIds = field(default_factory=ExternalIds)
source: str = "" # provider name, e.g. "tmdb", "simkl", "imdbapi"
raw: Optional[dict] = None # original API response for caching
class MetadataProvider(metaclass=ABCMeta):
"""Abstract base for metadata providers."""
NAME: str = ""
REQUIRES_KEY: bool = True
def __init__(self) -> None:
self.log = logging.getLogger(f"METADATA.{self.NAME.upper()}")
self._session: Optional[requests.Session] = None
@property
def session(self) -> requests.Session:
if self._session is None:
self._session = requests.Session()
self._session.headers.update(HEADERS)
retry = Retry(
total=3,
backoff_factor=1,
status_forcelist=[429, 500, 502, 503, 504],
allowed_methods=["GET", "POST"],
)
adapter = HTTPAdapter(max_retries=retry)
self._session.mount("https://", adapter)
self._session.mount("http://", adapter)
return self._session
@abstractmethod
def is_available(self) -> bool:
"""Return True if this provider has the credentials/keys it needs."""
@abstractmethod
def search(self, title: str, year: Optional[int], kind: str) -> Optional[MetadataResult]:
"""Search for a title and return metadata, or None on failure/no match."""
@abstractmethod
def get_by_id(self, provider_id: Union[int, str], kind: str) -> Optional[MetadataResult]:
"""Fetch metadata by this provider's native ID."""
@abstractmethod
def get_external_ids(self, provider_id: Union[int, str], kind: str) -> ExternalIds:
"""Fetch external IDs for a title by this provider's native ID."""
def _clean(s: str) -> str:
return STRIP_RE.sub("", s).lower()
def _strip_year(s: str) -> str:
return YEAR_RE.sub("", s).strip()
def fuzzy_match(a: str, b: str, threshold: float = 0.8) -> bool:
"""Return True if ``a`` and ``b`` are a close match."""
ratio = SequenceMatcher(None, _clean(a), _clean(b)).ratio()
return ratio >= threshold