diff --git a/unshackle/commands/dl.py b/unshackle/commands/dl.py index 426b526..db808ec 100644 --- a/unshackle/commands/dl.py +++ b/unshackle/commands/dl.py @@ -51,6 +51,7 @@ from unshackle.core.events import events from unshackle.core.proxies import Basic, Hola, NordVPN, SurfsharkVPN, WindscribeVPN from unshackle.core.service import Service from unshackle.core.services import Services +from unshackle.core.title_cacher import get_account_hash from unshackle.core.titles import Movie, Movies, Series, Song, Title_T from unshackle.core.titles.episode import Episode from unshackle.core.tracks import Audio, Subtitle, Tracks, Video @@ -690,16 +691,49 @@ class dl: level="INFO", operation="get_titles", service=self.service, context={"titles": titles_info} ) - if self.tmdb_year and self.tmdb_id: + title_cacher = service.title_cache if hasattr(service, "title_cache") else None + cache_title_id = None + if hasattr(service, "title"): + cache_title_id = service.title + elif hasattr(service, "title_id"): + cache_title_id = service.title_id + cache_region = service.current_region if hasattr(service, "current_region") else None + cache_account_hash = get_account_hash(service.credential) if hasattr(service, "credential") else None + + if (self.tmdb_year or self.tmdb_name) and self.tmdb_id: sample_title = titles[0] if hasattr(titles, "__getitem__") else titles kind = "tv" if isinstance(sample_title, Episode) else "movie" - tmdb_year_val = tags.get_year(self.tmdb_id, kind) - if tmdb_year_val: - if isinstance(titles, (Series, Movies)): - for t in titles: + + tmdb_year_val = None + tmdb_name_val = None + + if self.tmdb_year: + tmdb_year_val = tags.get_year( + self.tmdb_id, kind, title_cacher, cache_title_id, cache_region, cache_account_hash + ) + + if self.tmdb_name: + tmdb_name_val = tags.get_title( + self.tmdb_id, kind, title_cacher, cache_title_id, cache_region, cache_account_hash + ) + + if isinstance(titles, (Series, Movies)): + for t in titles: + if tmdb_year_val: t.year = tmdb_year_val - else: + if tmdb_name_val: + if isinstance(t, Episode): + t.title = tmdb_name_val + else: + t.name = tmdb_name_val + else: + if tmdb_year_val: titles.year = tmdb_year_val + if tmdb_name_val: + if isinstance(titles, Episode): + titles.title = tmdb_name_val + else: + titles.name = tmdb_name_val console.print(Padding(Rule(f"[rule.text]{titles.__class__.__name__}: {titles}"), (1, 2))) @@ -729,9 +763,13 @@ class dl: if isinstance(title, Episode) and not self.tmdb_searched: kind = "tv" if self.tmdb_id: - tmdb_title = tags.get_title(self.tmdb_id, kind) + tmdb_title = tags.get_title( + self.tmdb_id, kind, title_cacher, cache_title_id, cache_region, cache_account_hash + ) else: - self.tmdb_id, tmdb_title, self.search_source = tags.search_show_info(title.title, title.year, kind) + self.tmdb_id, tmdb_title, self.search_source = tags.search_show_info( + title.title, title.year, kind, title_cacher, cache_title_id, cache_region, cache_account_hash + ) if not (self.tmdb_id and tmdb_title and tags.fuzzy_match(tmdb_title, title.title)): self.tmdb_id = None if list_ or list_titles: @@ -747,7 +785,9 @@ class dl: self.tmdb_searched = True if isinstance(title, Movie) and (list_ or list_titles) and not self.tmdb_id: - movie_id, movie_title, _ = tags.search_show_info(title.name, title.year, "movie") + movie_id, movie_title, _ = tags.search_show_info( + title.name, title.year, "movie", title_cacher, cache_title_id, cache_region, cache_account_hash + ) if movie_id: console.print( Padding( @@ -760,11 +800,7 @@ class dl: if self.tmdb_id and getattr(self, "search_source", None) != "simkl": kind = "tv" if isinstance(title, Episode) else "movie" - tags.external_ids(self.tmdb_id, kind) - if self.tmdb_year: - tmdb_year_val = tags.get_year(self.tmdb_id, kind) - if tmdb_year_val: - title.year = tmdb_year_val + tags.external_ids(self.tmdb_id, kind, title_cacher, cache_title_id, cache_region, cache_account_hash) if slow and i != 0: delay = random.randint(60, 120) diff --git a/unshackle/core/title_cacher.py b/unshackle/core/title_cacher.py index f3346aa..76ca639 100644 --- a/unshackle/core/title_cacher.py +++ b/unshackle/core/title_cacher.py @@ -180,6 +180,167 @@ class TitleCacher: "hit_rate": f"{hit_rate:.1f}%", } + def get_cached_tmdb( + self, title_id: str, kind: str, region: Optional[str] = None, account_hash: Optional[str] = None + ) -> Optional[dict]: + """ + Get cached TMDB data for a title. + + Args: + title_id: The title identifier + kind: "movie" or "tv" + region: The region/proxy identifier + account_hash: Hash of account credentials + + Returns: + Dict with 'detail' and 'external_ids' if cached and valid, None otherwise + """ + if not config.title_cache_enabled: + return None + + cache_key = self._generate_cache_key(title_id, region, account_hash) + cache = self.cacher.get(cache_key, version=1) + + if not cache or not cache.data: + return None + + tmdb_data = getattr(cache.data, "tmdb_data", None) + if not tmdb_data: + return None + + tmdb_expiration = tmdb_data.get("expires_at") + if not tmdb_expiration or datetime.now() >= tmdb_expiration: + self.log.debug(f"TMDB cache expired for {title_id}") + return None + + if tmdb_data.get("kind") != kind: + self.log.debug(f"TMDB cache kind mismatch for {title_id}: cached {tmdb_data.get('kind')}, requested {kind}") + return None + + self.log.debug(f"TMDB cache hit for {title_id}") + return { + "detail": tmdb_data.get("detail"), + "external_ids": tmdb_data.get("external_ids"), + "fetched_at": tmdb_data.get("fetched_at"), + } + + def cache_tmdb( + self, + title_id: str, + detail_response: dict, + external_ids_response: dict, + kind: str, + region: Optional[str] = None, + account_hash: Optional[str] = None, + ) -> None: + """ + Cache TMDB data for a title. + + Args: + title_id: The title identifier + detail_response: Full TMDB detail API response + external_ids_response: Full TMDB external_ids API response + kind: "movie" or "tv" + region: The region/proxy identifier + account_hash: Hash of account credentials + """ + if not config.title_cache_enabled: + return + + cache_key = self._generate_cache_key(title_id, region, account_hash) + cache = self.cacher.get(cache_key, version=1) + + if not cache or not cache.data: + self.log.debug(f"Cannot cache TMDB data: no title cache exists for {title_id}") + return + + now = datetime.now() + tmdb_data = { + "detail": detail_response, + "external_ids": external_ids_response, + "kind": kind, + "fetched_at": now, + "expires_at": now + timedelta(days=7), # 7-day expiration + } + + cache.data.tmdb_data = tmdb_data + + cache.set(cache.data, expiration=cache.expiration) + self.log.debug(f"Cached TMDB data for {title_id} (kind={kind})") + + def get_cached_simkl( + self, title_id: str, region: Optional[str] = None, account_hash: Optional[str] = None + ) -> Optional[dict]: + """ + Get cached Simkl data for a title. + + Args: + title_id: The title identifier + region: The region/proxy identifier + account_hash: Hash of account credentials + + Returns: + Simkl response dict if cached and valid, None otherwise + """ + if not config.title_cache_enabled: + return None + + cache_key = self._generate_cache_key(title_id, region, account_hash) + cache = self.cacher.get(cache_key, version=1) + + if not cache or not cache.data: + return None + + simkl_data = getattr(cache.data, "simkl_data", None) + if not simkl_data: + return None + + simkl_expiration = simkl_data.get("expires_at") + if not simkl_expiration or datetime.now() >= simkl_expiration: + self.log.debug(f"Simkl cache expired for {title_id}") + return None + + self.log.debug(f"Simkl cache hit for {title_id}") + return simkl_data.get("response") + + def cache_simkl( + self, + title_id: str, + simkl_response: dict, + region: Optional[str] = None, + account_hash: Optional[str] = None, + ) -> None: + """ + Cache Simkl data for a title. + + Args: + title_id: The title identifier + simkl_response: Full Simkl API response + region: The region/proxy identifier + account_hash: Hash of account credentials + """ + if not config.title_cache_enabled: + return + + cache_key = self._generate_cache_key(title_id, region, account_hash) + cache = self.cacher.get(cache_key, version=1) + + if not cache or not cache.data: + self.log.debug(f"Cannot cache Simkl data: no title cache exists for {title_id}") + return + + now = datetime.now() + simkl_data = { + "response": simkl_response, + "fetched_at": now, + "expires_at": now + timedelta(days=7), + } + + cache.data.simkl_data = simkl_data + + cache.set(cache.data, expiration=cache.expiration) + self.log.debug(f"Cached Simkl data for {title_id}") + def get_region_from_proxy(proxy_url: Optional[str]) -> Optional[str]: """ diff --git a/unshackle/core/utils/tags.py b/unshackle/core/utils/tags.py index f9570d0..82a8e95 100644 --- a/unshackle/core/utils/tags.py +++ b/unshackle/core/utils/tags.py @@ -66,8 +66,37 @@ def fuzzy_match(a: str, b: str, threshold: float = 0.8) -> bool: return ratio >= threshold -def search_simkl(title: str, year: Optional[int], kind: str) -> Tuple[Optional[dict], Optional[str], Optional[int]]: +def search_simkl( + title: str, + year: Optional[int], + kind: str, + title_cacher=None, + cache_title_id: Optional[str] = None, + cache_region: Optional[str] = None, + cache_account_hash: Optional[str] = None, +) -> Tuple[Optional[dict], Optional[str], Optional[int]]: """Search Simkl API for show information by filename.""" + + if title_cacher and cache_title_id: + cached_simkl = title_cacher.get_cached_simkl(cache_title_id, cache_region, cache_account_hash) + if cached_simkl: + log.debug("Using cached Simkl data") + if cached_simkl.get("type") == "episode" and "show" in cached_simkl: + show_info = cached_simkl["show"] + show_title = show_info.get("title") + tmdb_id = show_info.get("ids", {}).get("tmdbtv") + if tmdb_id: + tmdb_id = int(tmdb_id) + return cached_simkl, show_title, tmdb_id + elif cached_simkl.get("type") == "movie" and "movie" in cached_simkl: + movie_info = cached_simkl["movie"] + movie_title = movie_info.get("title") + ids = movie_info.get("ids", {}) + tmdb_id = ids.get("tmdb") or ids.get("moviedb") + if tmdb_id: + tmdb_id = int(tmdb_id) + return cached_simkl, movie_title, tmdb_id + log.debug("Searching Simkl for %r (%s, %s)", title, kind, year) client_id = _simkl_client_id() @@ -112,19 +141,23 @@ def search_simkl(title: str, year: Optional[int], kind: str) -> Tuple[Optional[d log.debug("Simkl year mismatch: searched %d, got %d", year, show_year) return None, None, None + if title_cacher and cache_title_id: + try: + title_cacher.cache_simkl(cache_title_id, data, cache_region, cache_account_hash) + except Exception as exc: + log.debug("Failed to cache Simkl data: %s", exc) + tmdb_id = show_info.get("ids", {}).get("tmdbtv") if tmdb_id: tmdb_id = int(tmdb_id) log.debug("Simkl -> %s (TMDB ID %s)", show_title, tmdb_id) return data, show_title, tmdb_id - # Handle movie responses elif data.get("type") == "movie" and "movie" in data: movie_info = data["movie"] movie_title = movie_info.get("title") movie_year = movie_info.get("year") - # Verify title matches and year if provided if not fuzzy_match(movie_title, title): log.debug("Simkl title mismatch: searched %r, got %r", title, movie_title) return None, None, None @@ -132,6 +165,12 @@ def search_simkl(title: str, year: Optional[int], kind: str) -> Tuple[Optional[d log.debug("Simkl year mismatch: searched %d, got %d", year, movie_year) return None, None, None + if title_cacher and cache_title_id: + try: + title_cacher.cache_simkl(cache_title_id, data, cache_region, cache_account_hash) + except Exception as exc: + log.debug("Failed to cache Simkl data: %s", exc) + ids = movie_info.get("ids", {}) tmdb_id = ids.get("tmdb") or ids.get("moviedb") if tmdb_id: @@ -145,18 +184,85 @@ def search_simkl(title: str, year: Optional[int], kind: str) -> Tuple[Optional[d return None, None, None -def search_show_info(title: str, year: Optional[int], kind: str) -> Tuple[Optional[int], Optional[str], Optional[str]]: +def search_show_info( + title: str, + year: Optional[int], + kind: str, + title_cacher=None, + cache_title_id: Optional[str] = None, + cache_region: Optional[str] = None, + cache_account_hash: Optional[str] = None, +) -> Tuple[Optional[int], Optional[str], Optional[str]]: """Search for show information, trying Simkl first, then TMDB fallback. Returns (tmdb_id, title, source).""" - simkl_data, simkl_title, simkl_tmdb_id = search_simkl(title, year, kind) + simkl_data, simkl_title, simkl_tmdb_id = search_simkl( + title, year, kind, title_cacher, cache_title_id, cache_region, cache_account_hash + ) if simkl_data and simkl_title and fuzzy_match(simkl_title, title): return simkl_tmdb_id, simkl_title, "simkl" - tmdb_id, tmdb_title = search_tmdb(title, year, kind) + tmdb_id, tmdb_title = search_tmdb(title, year, kind, title_cacher, cache_title_id, cache_region, cache_account_hash) return tmdb_id, tmdb_title, "tmdb" -def search_tmdb(title: str, year: Optional[int], kind: str) -> Tuple[Optional[int], Optional[str]]: +def _fetch_tmdb_detail(tmdb_id: int, kind: str) -> Optional[dict]: + """Fetch full TMDB detail response for caching.""" + api_key = _api_key() + if not api_key: + return None + + try: + session = _get_session() + r = session.get( + f"https://api.themoviedb.org/3/{kind}/{tmdb_id}", + params={"api_key": api_key}, + timeout=30, + ) + r.raise_for_status() + return r.json() + except requests.RequestException as exc: + log.debug("Failed to fetch TMDB detail: %s", exc) + return None + + +def _fetch_tmdb_external_ids(tmdb_id: int, kind: str) -> Optional[dict]: + """Fetch full TMDB external_ids response for caching.""" + api_key = _api_key() + if not api_key: + return None + + try: + session = _get_session() + r = session.get( + f"https://api.themoviedb.org/3/{kind}/{tmdb_id}/external_ids", + params={"api_key": api_key}, + timeout=30, + ) + r.raise_for_status() + return r.json() + except requests.RequestException as exc: + log.debug("Failed to fetch TMDB external IDs: %s", exc) + return None + + +def search_tmdb( + title: str, + year: Optional[int], + kind: str, + title_cacher=None, + cache_title_id: Optional[str] = None, + cache_region: Optional[str] = None, + cache_account_hash: Optional[str] = None, +) -> Tuple[Optional[int], Optional[str]]: + if title_cacher and cache_title_id: + cached_tmdb = title_cacher.get_cached_tmdb(cache_title_id, kind, cache_region, cache_account_hash) + if cached_tmdb and cached_tmdb.get("detail"): + detail = cached_tmdb["detail"] + tmdb_id = detail.get("id") + tmdb_title = detail.get("title") or detail.get("name") + log.debug("Using cached TMDB data: %r (ID %s)", tmdb_title, tmdb_id) + return tmdb_id, tmdb_title + api_key = _api_key() if not api_key: return None, None @@ -215,15 +321,41 @@ def search_tmdb(title: str, year: Optional[int], kind: str) -> Tuple[Optional[in ) if best_id is not None: + if title_cacher and cache_title_id: + try: + detail_response = _fetch_tmdb_detail(best_id, kind) + external_ids_response = _fetch_tmdb_external_ids(best_id, kind) + if detail_response and external_ids_response: + title_cacher.cache_tmdb( + cache_title_id, detail_response, external_ids_response, kind, cache_region, cache_account_hash + ) + except Exception as exc: + log.debug("Failed to cache TMDB data: %s", exc) + return best_id, best_title first = results[0] return first.get("id"), first.get("title") or first.get("name") -def get_title(tmdb_id: int, kind: str) -> Optional[str]: +def get_title( + tmdb_id: int, + kind: str, + title_cacher=None, + cache_title_id: Optional[str] = None, + cache_region: Optional[str] = None, + cache_account_hash: Optional[str] = None, +) -> Optional[str]: """Fetch the name/title of a TMDB entry by ID.""" + if title_cacher and cache_title_id: + cached_tmdb = title_cacher.get_cached_tmdb(cache_title_id, kind, cache_region, cache_account_hash) + if cached_tmdb and cached_tmdb.get("detail"): + detail = cached_tmdb["detail"] + tmdb_title = detail.get("title") or detail.get("name") + log.debug("Using cached TMDB title: %r", tmdb_title) + return tmdb_title + api_key = _api_key() if not api_key: return None @@ -236,17 +368,44 @@ def get_title(tmdb_id: int, kind: str) -> Optional[str]: timeout=30, ) r.raise_for_status() + js = r.json() + + if title_cacher and cache_title_id: + try: + external_ids_response = _fetch_tmdb_external_ids(tmdb_id, kind) + if external_ids_response: + title_cacher.cache_tmdb( + cache_title_id, js, external_ids_response, kind, cache_region, cache_account_hash + ) + except Exception as exc: + log.debug("Failed to cache TMDB data: %s", exc) + + return js.get("title") or js.get("name") except requests.RequestException as exc: log.debug("Failed to fetch TMDB title: %s", exc) return None - js = r.json() - return js.get("title") or js.get("name") - -def get_year(tmdb_id: int, kind: str) -> Optional[int]: +def get_year( + tmdb_id: int, + kind: str, + title_cacher=None, + cache_title_id: Optional[str] = None, + cache_region: Optional[str] = None, + cache_account_hash: Optional[str] = None, +) -> Optional[int]: """Fetch the release year of a TMDB entry by ID.""" + if title_cacher and cache_title_id: + cached_tmdb = title_cacher.get_cached_tmdb(cache_title_id, kind, cache_region, cache_account_hash) + if cached_tmdb and cached_tmdb.get("detail"): + detail = cached_tmdb["detail"] + date = detail.get("release_date") or detail.get("first_air_date") + if date and len(date) >= 4 and date[:4].isdigit(): + year = int(date[:4]) + log.debug("Using cached TMDB year: %d", year) + return year + api_key = _api_key() if not api_key: return None @@ -259,18 +418,41 @@ def get_year(tmdb_id: int, kind: str) -> Optional[int]: timeout=30, ) r.raise_for_status() + js = r.json() + + if title_cacher and cache_title_id: + try: + external_ids_response = _fetch_tmdb_external_ids(tmdb_id, kind) + if external_ids_response: + title_cacher.cache_tmdb( + cache_title_id, js, external_ids_response, kind, cache_region, cache_account_hash + ) + except Exception as exc: + log.debug("Failed to cache TMDB data: %s", exc) + + date = js.get("release_date") or js.get("first_air_date") + if date and len(date) >= 4 and date[:4].isdigit(): + return int(date[:4]) + return None except requests.RequestException as exc: log.debug("Failed to fetch TMDB year: %s", exc) return None - js = r.json() - date = js.get("release_date") or js.get("first_air_date") - if date and len(date) >= 4 and date[:4].isdigit(): - return int(date[:4]) - return None +def external_ids( + tmdb_id: int, + kind: str, + title_cacher=None, + cache_title_id: Optional[str] = None, + cache_region: Optional[str] = None, + cache_account_hash: Optional[str] = None, +) -> dict: + if title_cacher and cache_title_id: + cached_tmdb = title_cacher.get_cached_tmdb(cache_title_id, kind, cache_region, cache_account_hash) + if cached_tmdb and cached_tmdb.get("external_ids"): + log.debug("Using cached TMDB external IDs") + return cached_tmdb["external_ids"] -def external_ids(tmdb_id: int, kind: str) -> dict: api_key = _api_key() if not api_key: return {} @@ -287,6 +469,17 @@ def external_ids(tmdb_id: int, kind: str) -> dict: r.raise_for_status() js = r.json() log.debug("External IDs response: %s", js) + + if title_cacher and cache_title_id: + try: + detail_response = _fetch_tmdb_detail(tmdb_id, kind) + if detail_response: + title_cacher.cache_tmdb( + cache_title_id, detail_response, js, kind, cache_region, cache_account_hash + ) + except Exception as exc: + log.debug("Failed to cache TMDB data: %s", exc) + return js except requests.RequestException as exc: log.warning("Failed to fetch external IDs for %s %s: %s", kind, tmdb_id, exc)