feat(cache): add TMDB and Simkl metadata caching to title cache

This commit is contained in:
Andy
2025-11-02 23:33:24 +00:00
parent 27d0ca84a3
commit 001f6a0146
3 changed files with 422 additions and 32 deletions

View File

@@ -51,6 +51,7 @@ from unshackle.core.events import events
from unshackle.core.proxies import Basic, Hola, NordVPN, SurfsharkVPN, WindscribeVPN from unshackle.core.proxies import Basic, Hola, NordVPN, SurfsharkVPN, WindscribeVPN
from unshackle.core.service import Service from unshackle.core.service import Service
from unshackle.core.services import Services from unshackle.core.services import Services
from unshackle.core.title_cacher import get_account_hash
from unshackle.core.titles import Movie, Movies, Series, Song, Title_T from unshackle.core.titles import Movie, Movies, Series, Song, Title_T
from unshackle.core.titles.episode import Episode from unshackle.core.titles.episode import Episode
from unshackle.core.tracks import Audio, Subtitle, Tracks, Video from unshackle.core.tracks import Audio, Subtitle, Tracks, Video
@@ -690,16 +691,49 @@ class dl:
level="INFO", operation="get_titles", service=self.service, context={"titles": titles_info} level="INFO", operation="get_titles", service=self.service, context={"titles": titles_info}
) )
if self.tmdb_year and self.tmdb_id: title_cacher = service.title_cache if hasattr(service, "title_cache") else None
cache_title_id = None
if hasattr(service, "title"):
cache_title_id = service.title
elif hasattr(service, "title_id"):
cache_title_id = service.title_id
cache_region = service.current_region if hasattr(service, "current_region") else None
cache_account_hash = get_account_hash(service.credential) if hasattr(service, "credential") else None
if (self.tmdb_year or self.tmdb_name) and self.tmdb_id:
sample_title = titles[0] if hasattr(titles, "__getitem__") else titles sample_title = titles[0] if hasattr(titles, "__getitem__") else titles
kind = "tv" if isinstance(sample_title, Episode) else "movie" kind = "tv" if isinstance(sample_title, Episode) else "movie"
tmdb_year_val = tags.get_year(self.tmdb_id, kind)
if tmdb_year_val: tmdb_year_val = None
tmdb_name_val = None
if self.tmdb_year:
tmdb_year_val = tags.get_year(
self.tmdb_id, kind, title_cacher, cache_title_id, cache_region, cache_account_hash
)
if self.tmdb_name:
tmdb_name_val = tags.get_title(
self.tmdb_id, kind, title_cacher, cache_title_id, cache_region, cache_account_hash
)
if isinstance(titles, (Series, Movies)): if isinstance(titles, (Series, Movies)):
for t in titles: for t in titles:
if tmdb_year_val:
t.year = tmdb_year_val t.year = tmdb_year_val
if tmdb_name_val:
if isinstance(t, Episode):
t.title = tmdb_name_val
else: else:
t.name = tmdb_name_val
else:
if tmdb_year_val:
titles.year = tmdb_year_val titles.year = tmdb_year_val
if tmdb_name_val:
if isinstance(titles, Episode):
titles.title = tmdb_name_val
else:
titles.name = tmdb_name_val
console.print(Padding(Rule(f"[rule.text]{titles.__class__.__name__}: {titles}"), (1, 2))) console.print(Padding(Rule(f"[rule.text]{titles.__class__.__name__}: {titles}"), (1, 2)))
@@ -729,9 +763,13 @@ class dl:
if isinstance(title, Episode) and not self.tmdb_searched: if isinstance(title, Episode) and not self.tmdb_searched:
kind = "tv" kind = "tv"
if self.tmdb_id: if self.tmdb_id:
tmdb_title = tags.get_title(self.tmdb_id, kind) tmdb_title = tags.get_title(
self.tmdb_id, kind, title_cacher, cache_title_id, cache_region, cache_account_hash
)
else: else:
self.tmdb_id, tmdb_title, self.search_source = tags.search_show_info(title.title, title.year, kind) self.tmdb_id, tmdb_title, self.search_source = tags.search_show_info(
title.title, title.year, kind, title_cacher, cache_title_id, cache_region, cache_account_hash
)
if not (self.tmdb_id and tmdb_title and tags.fuzzy_match(tmdb_title, title.title)): if not (self.tmdb_id and tmdb_title and tags.fuzzy_match(tmdb_title, title.title)):
self.tmdb_id = None self.tmdb_id = None
if list_ or list_titles: if list_ or list_titles:
@@ -747,7 +785,9 @@ class dl:
self.tmdb_searched = True self.tmdb_searched = True
if isinstance(title, Movie) and (list_ or list_titles) and not self.tmdb_id: if isinstance(title, Movie) and (list_ or list_titles) and not self.tmdb_id:
movie_id, movie_title, _ = tags.search_show_info(title.name, title.year, "movie") movie_id, movie_title, _ = tags.search_show_info(
title.name, title.year, "movie", title_cacher, cache_title_id, cache_region, cache_account_hash
)
if movie_id: if movie_id:
console.print( console.print(
Padding( Padding(
@@ -760,11 +800,7 @@ class dl:
if self.tmdb_id and getattr(self, "search_source", None) != "simkl": if self.tmdb_id and getattr(self, "search_source", None) != "simkl":
kind = "tv" if isinstance(title, Episode) else "movie" kind = "tv" if isinstance(title, Episode) else "movie"
tags.external_ids(self.tmdb_id, kind) tags.external_ids(self.tmdb_id, kind, title_cacher, cache_title_id, cache_region, cache_account_hash)
if self.tmdb_year:
tmdb_year_val = tags.get_year(self.tmdb_id, kind)
if tmdb_year_val:
title.year = tmdb_year_val
if slow and i != 0: if slow and i != 0:
delay = random.randint(60, 120) delay = random.randint(60, 120)

View File

@@ -180,6 +180,167 @@ class TitleCacher:
"hit_rate": f"{hit_rate:.1f}%", "hit_rate": f"{hit_rate:.1f}%",
} }
def get_cached_tmdb(
self, title_id: str, kind: str, region: Optional[str] = None, account_hash: Optional[str] = None
) -> Optional[dict]:
"""
Get cached TMDB data for a title.
Args:
title_id: The title identifier
kind: "movie" or "tv"
region: The region/proxy identifier
account_hash: Hash of account credentials
Returns:
Dict with 'detail' and 'external_ids' if cached and valid, None otherwise
"""
if not config.title_cache_enabled:
return None
cache_key = self._generate_cache_key(title_id, region, account_hash)
cache = self.cacher.get(cache_key, version=1)
if not cache or not cache.data:
return None
tmdb_data = getattr(cache.data, "tmdb_data", None)
if not tmdb_data:
return None
tmdb_expiration = tmdb_data.get("expires_at")
if not tmdb_expiration or datetime.now() >= tmdb_expiration:
self.log.debug(f"TMDB cache expired for {title_id}")
return None
if tmdb_data.get("kind") != kind:
self.log.debug(f"TMDB cache kind mismatch for {title_id}: cached {tmdb_data.get('kind')}, requested {kind}")
return None
self.log.debug(f"TMDB cache hit for {title_id}")
return {
"detail": tmdb_data.get("detail"),
"external_ids": tmdb_data.get("external_ids"),
"fetched_at": tmdb_data.get("fetched_at"),
}
def cache_tmdb(
self,
title_id: str,
detail_response: dict,
external_ids_response: dict,
kind: str,
region: Optional[str] = None,
account_hash: Optional[str] = None,
) -> None:
"""
Cache TMDB data for a title.
Args:
title_id: The title identifier
detail_response: Full TMDB detail API response
external_ids_response: Full TMDB external_ids API response
kind: "movie" or "tv"
region: The region/proxy identifier
account_hash: Hash of account credentials
"""
if not config.title_cache_enabled:
return
cache_key = self._generate_cache_key(title_id, region, account_hash)
cache = self.cacher.get(cache_key, version=1)
if not cache or not cache.data:
self.log.debug(f"Cannot cache TMDB data: no title cache exists for {title_id}")
return
now = datetime.now()
tmdb_data = {
"detail": detail_response,
"external_ids": external_ids_response,
"kind": kind,
"fetched_at": now,
"expires_at": now + timedelta(days=7), # 7-day expiration
}
cache.data.tmdb_data = tmdb_data
cache.set(cache.data, expiration=cache.expiration)
self.log.debug(f"Cached TMDB data for {title_id} (kind={kind})")
def get_cached_simkl(
self, title_id: str, region: Optional[str] = None, account_hash: Optional[str] = None
) -> Optional[dict]:
"""
Get cached Simkl data for a title.
Args:
title_id: The title identifier
region: The region/proxy identifier
account_hash: Hash of account credentials
Returns:
Simkl response dict if cached and valid, None otherwise
"""
if not config.title_cache_enabled:
return None
cache_key = self._generate_cache_key(title_id, region, account_hash)
cache = self.cacher.get(cache_key, version=1)
if not cache or not cache.data:
return None
simkl_data = getattr(cache.data, "simkl_data", None)
if not simkl_data:
return None
simkl_expiration = simkl_data.get("expires_at")
if not simkl_expiration or datetime.now() >= simkl_expiration:
self.log.debug(f"Simkl cache expired for {title_id}")
return None
self.log.debug(f"Simkl cache hit for {title_id}")
return simkl_data.get("response")
def cache_simkl(
self,
title_id: str,
simkl_response: dict,
region: Optional[str] = None,
account_hash: Optional[str] = None,
) -> None:
"""
Cache Simkl data for a title.
Args:
title_id: The title identifier
simkl_response: Full Simkl API response
region: The region/proxy identifier
account_hash: Hash of account credentials
"""
if not config.title_cache_enabled:
return
cache_key = self._generate_cache_key(title_id, region, account_hash)
cache = self.cacher.get(cache_key, version=1)
if not cache or not cache.data:
self.log.debug(f"Cannot cache Simkl data: no title cache exists for {title_id}")
return
now = datetime.now()
simkl_data = {
"response": simkl_response,
"fetched_at": now,
"expires_at": now + timedelta(days=7),
}
cache.data.simkl_data = simkl_data
cache.set(cache.data, expiration=cache.expiration)
self.log.debug(f"Cached Simkl data for {title_id}")
def get_region_from_proxy(proxy_url: Optional[str]) -> Optional[str]: def get_region_from_proxy(proxy_url: Optional[str]) -> Optional[str]:
""" """

View File

@@ -66,8 +66,37 @@ def fuzzy_match(a: str, b: str, threshold: float = 0.8) -> bool:
return ratio >= threshold return ratio >= threshold
def search_simkl(title: str, year: Optional[int], kind: str) -> Tuple[Optional[dict], Optional[str], Optional[int]]: def search_simkl(
title: str,
year: Optional[int],
kind: str,
title_cacher=None,
cache_title_id: Optional[str] = None,
cache_region: Optional[str] = None,
cache_account_hash: Optional[str] = None,
) -> Tuple[Optional[dict], Optional[str], Optional[int]]:
"""Search Simkl API for show information by filename.""" """Search Simkl API for show information by filename."""
if title_cacher and cache_title_id:
cached_simkl = title_cacher.get_cached_simkl(cache_title_id, cache_region, cache_account_hash)
if cached_simkl:
log.debug("Using cached Simkl data")
if cached_simkl.get("type") == "episode" and "show" in cached_simkl:
show_info = cached_simkl["show"]
show_title = show_info.get("title")
tmdb_id = show_info.get("ids", {}).get("tmdbtv")
if tmdb_id:
tmdb_id = int(tmdb_id)
return cached_simkl, show_title, tmdb_id
elif cached_simkl.get("type") == "movie" and "movie" in cached_simkl:
movie_info = cached_simkl["movie"]
movie_title = movie_info.get("title")
ids = movie_info.get("ids", {})
tmdb_id = ids.get("tmdb") or ids.get("moviedb")
if tmdb_id:
tmdb_id = int(tmdb_id)
return cached_simkl, movie_title, tmdb_id
log.debug("Searching Simkl for %r (%s, %s)", title, kind, year) log.debug("Searching Simkl for %r (%s, %s)", title, kind, year)
client_id = _simkl_client_id() client_id = _simkl_client_id()
@@ -112,19 +141,23 @@ def search_simkl(title: str, year: Optional[int], kind: str) -> Tuple[Optional[d
log.debug("Simkl year mismatch: searched %d, got %d", year, show_year) log.debug("Simkl year mismatch: searched %d, got %d", year, show_year)
return None, None, None return None, None, None
if title_cacher and cache_title_id:
try:
title_cacher.cache_simkl(cache_title_id, data, cache_region, cache_account_hash)
except Exception as exc:
log.debug("Failed to cache Simkl data: %s", exc)
tmdb_id = show_info.get("ids", {}).get("tmdbtv") tmdb_id = show_info.get("ids", {}).get("tmdbtv")
if tmdb_id: if tmdb_id:
tmdb_id = int(tmdb_id) tmdb_id = int(tmdb_id)
log.debug("Simkl -> %s (TMDB ID %s)", show_title, tmdb_id) log.debug("Simkl -> %s (TMDB ID %s)", show_title, tmdb_id)
return data, show_title, tmdb_id return data, show_title, tmdb_id
# Handle movie responses
elif data.get("type") == "movie" and "movie" in data: elif data.get("type") == "movie" and "movie" in data:
movie_info = data["movie"] movie_info = data["movie"]
movie_title = movie_info.get("title") movie_title = movie_info.get("title")
movie_year = movie_info.get("year") movie_year = movie_info.get("year")
# Verify title matches and year if provided
if not fuzzy_match(movie_title, title): if not fuzzy_match(movie_title, title):
log.debug("Simkl title mismatch: searched %r, got %r", title, movie_title) log.debug("Simkl title mismatch: searched %r, got %r", title, movie_title)
return None, None, None return None, None, None
@@ -132,6 +165,12 @@ def search_simkl(title: str, year: Optional[int], kind: str) -> Tuple[Optional[d
log.debug("Simkl year mismatch: searched %d, got %d", year, movie_year) log.debug("Simkl year mismatch: searched %d, got %d", year, movie_year)
return None, None, None return None, None, None
if title_cacher and cache_title_id:
try:
title_cacher.cache_simkl(cache_title_id, data, cache_region, cache_account_hash)
except Exception as exc:
log.debug("Failed to cache Simkl data: %s", exc)
ids = movie_info.get("ids", {}) ids = movie_info.get("ids", {})
tmdb_id = ids.get("tmdb") or ids.get("moviedb") tmdb_id = ids.get("tmdb") or ids.get("moviedb")
if tmdb_id: if tmdb_id:
@@ -145,18 +184,85 @@ def search_simkl(title: str, year: Optional[int], kind: str) -> Tuple[Optional[d
return None, None, None return None, None, None
def search_show_info(title: str, year: Optional[int], kind: str) -> Tuple[Optional[int], Optional[str], Optional[str]]: def search_show_info(
title: str,
year: Optional[int],
kind: str,
title_cacher=None,
cache_title_id: Optional[str] = None,
cache_region: Optional[str] = None,
cache_account_hash: Optional[str] = None,
) -> Tuple[Optional[int], Optional[str], Optional[str]]:
"""Search for show information, trying Simkl first, then TMDB fallback. Returns (tmdb_id, title, source).""" """Search for show information, trying Simkl first, then TMDB fallback. Returns (tmdb_id, title, source)."""
simkl_data, simkl_title, simkl_tmdb_id = search_simkl(title, year, kind) simkl_data, simkl_title, simkl_tmdb_id = search_simkl(
title, year, kind, title_cacher, cache_title_id, cache_region, cache_account_hash
)
if simkl_data and simkl_title and fuzzy_match(simkl_title, title): if simkl_data and simkl_title and fuzzy_match(simkl_title, title):
return simkl_tmdb_id, simkl_title, "simkl" return simkl_tmdb_id, simkl_title, "simkl"
tmdb_id, tmdb_title = search_tmdb(title, year, kind) tmdb_id, tmdb_title = search_tmdb(title, year, kind, title_cacher, cache_title_id, cache_region, cache_account_hash)
return tmdb_id, tmdb_title, "tmdb" return tmdb_id, tmdb_title, "tmdb"
def search_tmdb(title: str, year: Optional[int], kind: str) -> Tuple[Optional[int], Optional[str]]: def _fetch_tmdb_detail(tmdb_id: int, kind: str) -> Optional[dict]:
"""Fetch full TMDB detail response for caching."""
api_key = _api_key()
if not api_key:
return None
try:
session = _get_session()
r = session.get(
f"https://api.themoviedb.org/3/{kind}/{tmdb_id}",
params={"api_key": api_key},
timeout=30,
)
r.raise_for_status()
return r.json()
except requests.RequestException as exc:
log.debug("Failed to fetch TMDB detail: %s", exc)
return None
def _fetch_tmdb_external_ids(tmdb_id: int, kind: str) -> Optional[dict]:
"""Fetch full TMDB external_ids response for caching."""
api_key = _api_key()
if not api_key:
return None
try:
session = _get_session()
r = session.get(
f"https://api.themoviedb.org/3/{kind}/{tmdb_id}/external_ids",
params={"api_key": api_key},
timeout=30,
)
r.raise_for_status()
return r.json()
except requests.RequestException as exc:
log.debug("Failed to fetch TMDB external IDs: %s", exc)
return None
def search_tmdb(
title: str,
year: Optional[int],
kind: str,
title_cacher=None,
cache_title_id: Optional[str] = None,
cache_region: Optional[str] = None,
cache_account_hash: Optional[str] = None,
) -> Tuple[Optional[int], Optional[str]]:
if title_cacher and cache_title_id:
cached_tmdb = title_cacher.get_cached_tmdb(cache_title_id, kind, cache_region, cache_account_hash)
if cached_tmdb and cached_tmdb.get("detail"):
detail = cached_tmdb["detail"]
tmdb_id = detail.get("id")
tmdb_title = detail.get("title") or detail.get("name")
log.debug("Using cached TMDB data: %r (ID %s)", tmdb_title, tmdb_id)
return tmdb_id, tmdb_title
api_key = _api_key() api_key = _api_key()
if not api_key: if not api_key:
return None, None return None, None
@@ -215,15 +321,41 @@ def search_tmdb(title: str, year: Optional[int], kind: str) -> Tuple[Optional[in
) )
if best_id is not None: if best_id is not None:
if title_cacher and cache_title_id:
try:
detail_response = _fetch_tmdb_detail(best_id, kind)
external_ids_response = _fetch_tmdb_external_ids(best_id, kind)
if detail_response and external_ids_response:
title_cacher.cache_tmdb(
cache_title_id, detail_response, external_ids_response, kind, cache_region, cache_account_hash
)
except Exception as exc:
log.debug("Failed to cache TMDB data: %s", exc)
return best_id, best_title return best_id, best_title
first = results[0] first = results[0]
return first.get("id"), first.get("title") or first.get("name") return first.get("id"), first.get("title") or first.get("name")
def get_title(tmdb_id: int, kind: str) -> Optional[str]: def get_title(
tmdb_id: int,
kind: str,
title_cacher=None,
cache_title_id: Optional[str] = None,
cache_region: Optional[str] = None,
cache_account_hash: Optional[str] = None,
) -> Optional[str]:
"""Fetch the name/title of a TMDB entry by ID.""" """Fetch the name/title of a TMDB entry by ID."""
if title_cacher and cache_title_id:
cached_tmdb = title_cacher.get_cached_tmdb(cache_title_id, kind, cache_region, cache_account_hash)
if cached_tmdb and cached_tmdb.get("detail"):
detail = cached_tmdb["detail"]
tmdb_title = detail.get("title") or detail.get("name")
log.debug("Using cached TMDB title: %r", tmdb_title)
return tmdb_title
api_key = _api_key() api_key = _api_key()
if not api_key: if not api_key:
return None return None
@@ -236,17 +368,44 @@ def get_title(tmdb_id: int, kind: str) -> Optional[str]:
timeout=30, timeout=30,
) )
r.raise_for_status() r.raise_for_status()
js = r.json()
if title_cacher and cache_title_id:
try:
external_ids_response = _fetch_tmdb_external_ids(tmdb_id, kind)
if external_ids_response:
title_cacher.cache_tmdb(
cache_title_id, js, external_ids_response, kind, cache_region, cache_account_hash
)
except Exception as exc:
log.debug("Failed to cache TMDB data: %s", exc)
return js.get("title") or js.get("name")
except requests.RequestException as exc: except requests.RequestException as exc:
log.debug("Failed to fetch TMDB title: %s", exc) log.debug("Failed to fetch TMDB title: %s", exc)
return None return None
js = r.json()
return js.get("title") or js.get("name")
def get_year(
def get_year(tmdb_id: int, kind: str) -> Optional[int]: tmdb_id: int,
kind: str,
title_cacher=None,
cache_title_id: Optional[str] = None,
cache_region: Optional[str] = None,
cache_account_hash: Optional[str] = None,
) -> Optional[int]:
"""Fetch the release year of a TMDB entry by ID.""" """Fetch the release year of a TMDB entry by ID."""
if title_cacher and cache_title_id:
cached_tmdb = title_cacher.get_cached_tmdb(cache_title_id, kind, cache_region, cache_account_hash)
if cached_tmdb and cached_tmdb.get("detail"):
detail = cached_tmdb["detail"]
date = detail.get("release_date") or detail.get("first_air_date")
if date and len(date) >= 4 and date[:4].isdigit():
year = int(date[:4])
log.debug("Using cached TMDB year: %d", year)
return year
api_key = _api_key() api_key = _api_key()
if not api_key: if not api_key:
return None return None
@@ -259,18 +418,41 @@ def get_year(tmdb_id: int, kind: str) -> Optional[int]:
timeout=30, timeout=30,
) )
r.raise_for_status() r.raise_for_status()
except requests.RequestException as exc:
log.debug("Failed to fetch TMDB year: %s", exc)
return None
js = r.json() js = r.json()
if title_cacher and cache_title_id:
try:
external_ids_response = _fetch_tmdb_external_ids(tmdb_id, kind)
if external_ids_response:
title_cacher.cache_tmdb(
cache_title_id, js, external_ids_response, kind, cache_region, cache_account_hash
)
except Exception as exc:
log.debug("Failed to cache TMDB data: %s", exc)
date = js.get("release_date") or js.get("first_air_date") date = js.get("release_date") or js.get("first_air_date")
if date and len(date) >= 4 and date[:4].isdigit(): if date and len(date) >= 4 and date[:4].isdigit():
return int(date[:4]) return int(date[:4])
return None return None
except requests.RequestException as exc:
log.debug("Failed to fetch TMDB year: %s", exc)
return None
def external_ids(tmdb_id: int, kind: str) -> dict: def external_ids(
tmdb_id: int,
kind: str,
title_cacher=None,
cache_title_id: Optional[str] = None,
cache_region: Optional[str] = None,
cache_account_hash: Optional[str] = None,
) -> dict:
if title_cacher and cache_title_id:
cached_tmdb = title_cacher.get_cached_tmdb(cache_title_id, kind, cache_region, cache_account_hash)
if cached_tmdb and cached_tmdb.get("external_ids"):
log.debug("Using cached TMDB external IDs")
return cached_tmdb["external_ids"]
api_key = _api_key() api_key = _api_key()
if not api_key: if not api_key:
return {} return {}
@@ -287,6 +469,17 @@ def external_ids(tmdb_id: int, kind: str) -> dict:
r.raise_for_status() r.raise_for_status()
js = r.json() js = r.json()
log.debug("External IDs response: %s", js) log.debug("External IDs response: %s", js)
if title_cacher and cache_title_id:
try:
detail_response = _fetch_tmdb_detail(tmdb_id, kind)
if detail_response:
title_cacher.cache_tmdb(
cache_title_id, detail_response, js, kind, cache_region, cache_account_hash
)
except Exception as exc:
log.debug("Failed to cache TMDB data: %s", exc)
return js return js
except requests.RequestException as exc: except requests.RequestException as exc:
log.warning("Failed to fetch external IDs for %s %s: %s", kind, tmdb_id, exc) log.warning("Failed to fetch external IDs for %s %s: %s", kind, tmdb_id, exc)