mirror of
https://github.com/unshackle-dl/unshackle.git
synced 2026-03-12 01:19:02 +00:00
Compare commits
4 Commits
460878777d
...
9952758b38
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
9952758b38 | ||
|
|
f56e7c1ec8 | ||
|
|
096b7d70f8 | ||
|
|
f0493292af |
45
CHANGELOG.md
45
CHANGELOG.md
@@ -5,6 +5,51 @@ All notable changes to this project will be documented in this file.
|
||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||
|
||||
## [1.4.1] - 2025-08-08
|
||||
|
||||
### Added
|
||||
|
||||
- **Title Caching System**: Intelligent title caching to reduce redundant API calls
|
||||
- Configurable title caching with 30-minute default cache duration
|
||||
- 24-hour fallback cache on API failures for improved reliability
|
||||
- Region-aware caching to handle geo-restricted content properly
|
||||
- SHA256 hashing for cache keys to handle complex title IDs
|
||||
- Added `--no-cache` CLI flag to bypass caching when needed
|
||||
- Added `--reset-cache` CLI flag to clear existing cache data
|
||||
- New cache configuration variables in config system
|
||||
- Documented caching options in example configuration file
|
||||
- Significantly improves performance when debugging or modifying CLI parameters
|
||||
- **Enhanced Tagging Configuration**: New options for customizing tag behavior
|
||||
- Added `tag_group_name` config option to control group name inclusion in tags
|
||||
- Added `tag_imdb_tmdb` config option to control IMDB/TMDB details in tags
|
||||
- Added Simkl API endpoint support as fallback when no TMDB API key is provided
|
||||
- Enhanced tag_file function to prioritize provided TMDB ID when `--tmdb` flag is used
|
||||
- Improved TMDB ID handling with better prioritization logic
|
||||
|
||||
### Changed
|
||||
|
||||
- **Language Selection Enhancement**: Improved default language handling
|
||||
- Updated language option default to 'orig' when no `-l` flag is set
|
||||
- Avoids hardcoded 'en' default and respects original content language
|
||||
- **Tagging Logic Improvements**: Simplified and enhanced tagging functionality
|
||||
- Simplified Simkl search logic with soft-fail when no results found
|
||||
- Enhanced tag_file function with better TMDB ID prioritization
|
||||
- Improved error handling in tagging operations
|
||||
|
||||
### Fixed
|
||||
|
||||
- **Subtitle Processing**: Enhanced subtitle filtering for edge cases
|
||||
- Fixed ValueError in subtitle filtering for multiple colons in time references
|
||||
- Improved handling of subtitles containing complex time formatting
|
||||
- Better error handling for malformed subtitle timestamps
|
||||
|
||||
### Removed
|
||||
|
||||
- **Docker Support**: Removed Docker configuration from repository
|
||||
- Removed Dockerfile and .dockerignore files
|
||||
- Cleaned up README.md Docker-related documentation
|
||||
- Focuses on direct installation methods
|
||||
|
||||
## [1.4.0] - 2025-08-05
|
||||
|
||||
### Added
|
||||
|
||||
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
||||
|
||||
[project]
|
||||
name = "unshackle"
|
||||
version = "1.4.0"
|
||||
version = "1.4.1"
|
||||
description = "Modular Movie, TV, and Music Archival Software."
|
||||
authors = [{ name = "unshackle team" }]
|
||||
requires-python = ">=3.10,<3.13"
|
||||
|
||||
@@ -240,6 +240,8 @@ class dl:
|
||||
help="Max workers/threads to download with per-track. Default depends on the downloader.",
|
||||
)
|
||||
@click.option("--downloads", type=int, default=1, help="Amount of tracks to download concurrently.")
|
||||
@click.option("--no-cache", "no_cache", is_flag=True, default=False, help="Bypass title cache for this download.")
|
||||
@click.option("--reset-cache", "reset_cache", is_flag=True, default=False, help="Clear title cache before fetching.")
|
||||
@click.pass_context
|
||||
def cli(ctx: click.Context, **kwargs: Any) -> dl:
|
||||
return dl(ctx, **kwargs)
|
||||
@@ -461,7 +463,7 @@ class dl:
|
||||
self.log.info("Authenticated with Service")
|
||||
|
||||
with console.status("Fetching Title Metadata...", spinner="dots"):
|
||||
titles = service.get_titles()
|
||||
titles = service.get_titles_cached()
|
||||
if not titles:
|
||||
self.log.error("No titles returned, nothing to download...")
|
||||
sys.exit(1)
|
||||
|
||||
@@ -1 +1 @@
|
||||
__version__ = "1.4.0"
|
||||
__version__ = "1.4.1"
|
||||
|
||||
@@ -92,6 +92,10 @@ class Config:
|
||||
self.update_check_interval: int = kwargs.get("update_check_interval", 24)
|
||||
self.scene_naming: bool = kwargs.get("scene_naming", True)
|
||||
|
||||
self.title_cache_time: int = kwargs.get("title_cache_time", 1800) # 30 minutes default
|
||||
self.title_cache_max_retention: int = kwargs.get("title_cache_max_retention", 86400) # 24 hours default
|
||||
self.title_cache_enabled: bool = kwargs.get("title_cache_enabled", True)
|
||||
|
||||
@classmethod
|
||||
def from_yaml(cls, path: Path) -> Config:
|
||||
if not path.exists():
|
||||
|
||||
@@ -21,6 +21,7 @@ from unshackle.core.constants import AnyTrack
|
||||
from unshackle.core.credential import Credential
|
||||
from unshackle.core.drm import DRM_T
|
||||
from unshackle.core.search_result import SearchResult
|
||||
from unshackle.core.title_cacher import TitleCacher, get_account_hash, get_region_from_proxy
|
||||
from unshackle.core.titles import Title_T, Titles_T
|
||||
from unshackle.core.tracks import Chapters, Tracks
|
||||
from unshackle.core.utilities import get_ip_info
|
||||
@@ -42,6 +43,12 @@ class Service(metaclass=ABCMeta):
|
||||
|
||||
self.session = self.get_session()
|
||||
self.cache = Cacher(self.__class__.__name__)
|
||||
self.title_cache = TitleCacher(self.__class__.__name__)
|
||||
|
||||
# Store context for cache control flags and credential
|
||||
self.ctx = ctx
|
||||
self.credential = None # Will be set in authenticate()
|
||||
self.current_region = None # Will be set based on proxy/geolocation
|
||||
|
||||
if not ctx.parent or not ctx.parent.params.get("no_proxy"):
|
||||
if ctx.parent:
|
||||
@@ -79,6 +86,15 @@ class Service(metaclass=ABCMeta):
|
||||
).decode()
|
||||
}
|
||||
)
|
||||
# Store region from proxy
|
||||
self.current_region = get_region_from_proxy(proxy)
|
||||
else:
|
||||
# No proxy, try to get current region
|
||||
try:
|
||||
ip_info = get_ip_info(self.session)
|
||||
self.current_region = ip_info.get("country", "").lower() if ip_info else None
|
||||
except Exception:
|
||||
self.current_region = None
|
||||
|
||||
# Optional Abstract functions
|
||||
# The following functions may be implemented by the Service.
|
||||
@@ -123,6 +139,9 @@ class Service(metaclass=ABCMeta):
|
||||
raise TypeError(f"Expected cookies to be a {CookieJar}, not {cookies!r}.")
|
||||
self.session.cookies.update(cookies)
|
||||
|
||||
# Store credential for cache key generation
|
||||
self.credential = credential
|
||||
|
||||
def search(self) -> Generator[SearchResult, None, None]:
|
||||
"""
|
||||
Search by query for titles from the Service.
|
||||
@@ -187,6 +206,52 @@ class Service(metaclass=ABCMeta):
|
||||
This can be useful to store information on each title that will be required like any sub-asset IDs, or such.
|
||||
"""
|
||||
|
||||
def get_titles_cached(self, title_id: str = None) -> Titles_T:
|
||||
"""
|
||||
Cached wrapper around get_titles() to reduce redundant API calls.
|
||||
|
||||
This method checks the cache before calling get_titles() and handles
|
||||
fallback to cached data when API calls fail.
|
||||
|
||||
Args:
|
||||
title_id: Optional title ID for cache key generation.
|
||||
If not provided, will try to extract from service instance.
|
||||
|
||||
Returns:
|
||||
Titles object (Movies, Series, or Album)
|
||||
"""
|
||||
# Try to get title_id from service instance if not provided
|
||||
if title_id is None:
|
||||
# Different services store the title ID in different attributes
|
||||
if hasattr(self, "title"):
|
||||
title_id = self.title
|
||||
elif hasattr(self, "title_id"):
|
||||
title_id = self.title_id
|
||||
else:
|
||||
# If we can't determine title_id, just call get_titles directly
|
||||
self.log.debug("Cannot determine title_id for caching, bypassing cache")
|
||||
return self.get_titles()
|
||||
|
||||
# Get cache control flags from context
|
||||
no_cache = False
|
||||
reset_cache = False
|
||||
if self.ctx and self.ctx.parent:
|
||||
no_cache = self.ctx.parent.params.get("no_cache", False)
|
||||
reset_cache = self.ctx.parent.params.get("reset_cache", False)
|
||||
|
||||
# Get account hash for cache key
|
||||
account_hash = get_account_hash(self.credential)
|
||||
|
||||
# Use title cache to get titles with fallback support
|
||||
return self.title_cache.get_cached_titles(
|
||||
title_id=str(title_id),
|
||||
fetch_function=self.get_titles,
|
||||
region=self.current_region,
|
||||
account_hash=account_hash,
|
||||
no_cache=no_cache,
|
||||
reset_cache=reset_cache,
|
||||
)
|
||||
|
||||
@abstractmethod
|
||||
def get_tracks(self, title: Title_T) -> Tracks:
|
||||
"""
|
||||
|
||||
240
unshackle/core/title_cacher.py
Normal file
240
unshackle/core/title_cacher.py
Normal file
@@ -0,0 +1,240 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import logging
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Optional
|
||||
|
||||
from unshackle.core.cacher import Cacher
|
||||
from unshackle.core.config import config
|
||||
from unshackle.core.titles import Titles_T
|
||||
|
||||
|
||||
class TitleCacher:
|
||||
"""
|
||||
Handles caching of Title objects to reduce redundant API calls.
|
||||
|
||||
This wrapper provides:
|
||||
- Region-aware caching to handle geo-restricted content
|
||||
- Automatic fallback to cached data when API calls fail
|
||||
- Cache lifetime extension during failures
|
||||
- Cache hit/miss statistics for debugging
|
||||
"""
|
||||
|
||||
def __init__(self, service_name: str):
|
||||
self.service_name = service_name
|
||||
self.log = logging.getLogger(f"{service_name}.TitleCache")
|
||||
self.cacher = Cacher(service_name)
|
||||
self.stats = {"hits": 0, "misses": 0, "fallbacks": 0}
|
||||
|
||||
def _generate_cache_key(
|
||||
self, title_id: str, region: Optional[str] = None, account_hash: Optional[str] = None
|
||||
) -> str:
|
||||
"""
|
||||
Generate a unique cache key for title data.
|
||||
|
||||
Args:
|
||||
title_id: The title identifier
|
||||
region: The region/proxy identifier
|
||||
account_hash: Hash of account credentials (if applicable)
|
||||
|
||||
Returns:
|
||||
A unique cache key string
|
||||
"""
|
||||
# Hash the title_id to handle complex IDs (URLs, dots, special chars)
|
||||
# This ensures consistent length and filesystem-safe keys
|
||||
title_hash = hashlib.sha256(title_id.encode()).hexdigest()[:16]
|
||||
|
||||
# Start with base key using hash
|
||||
key_parts = ["titles", title_hash]
|
||||
|
||||
# Add region if available
|
||||
if region:
|
||||
key_parts.append(region.lower())
|
||||
|
||||
# Add account hash if available
|
||||
if account_hash:
|
||||
key_parts.append(account_hash[:8]) # Use first 8 chars of hash
|
||||
|
||||
# Join with underscores
|
||||
cache_key = "_".join(key_parts)
|
||||
|
||||
# Log the mapping for debugging
|
||||
self.log.debug(f"Cache key mapping: {title_id} -> {cache_key}")
|
||||
|
||||
return cache_key
|
||||
|
||||
def get_cached_titles(
|
||||
self,
|
||||
title_id: str,
|
||||
fetch_function,
|
||||
region: Optional[str] = None,
|
||||
account_hash: Optional[str] = None,
|
||||
no_cache: bool = False,
|
||||
reset_cache: bool = False,
|
||||
) -> Optional[Titles_T]:
|
||||
"""
|
||||
Get titles from cache or fetch from API with fallback support.
|
||||
|
||||
Args:
|
||||
title_id: The title identifier
|
||||
fetch_function: Function to call to fetch fresh titles
|
||||
region: The region/proxy identifier
|
||||
account_hash: Hash of account credentials
|
||||
no_cache: Bypass cache completely
|
||||
reset_cache: Clear cache before fetching
|
||||
|
||||
Returns:
|
||||
Titles object (Movies, Series, or Album)
|
||||
"""
|
||||
# If caching is globally disabled or no_cache flag is set
|
||||
if not config.title_cache_enabled or no_cache:
|
||||
self.log.debug("Cache bypassed, fetching fresh titles")
|
||||
return fetch_function()
|
||||
|
||||
# Generate cache key
|
||||
cache_key = self._generate_cache_key(title_id, region, account_hash)
|
||||
|
||||
# If reset_cache flag is set, clear the cache entry
|
||||
if reset_cache:
|
||||
self.log.info(f"Clearing cache for {cache_key}")
|
||||
cache_path = (config.directories.cache / self.service_name / cache_key).with_suffix(".json")
|
||||
if cache_path.exists():
|
||||
cache_path.unlink()
|
||||
|
||||
# Try to get from cache
|
||||
cache = self.cacher.get(cache_key, version=1)
|
||||
|
||||
# Check if we have valid cached data
|
||||
if cache and not cache.expired:
|
||||
self.stats["hits"] += 1
|
||||
self.log.debug(f"Cache hit for {title_id} (hits: {self.stats['hits']}, misses: {self.stats['misses']})")
|
||||
return cache.data
|
||||
|
||||
# Cache miss or expired, try to fetch fresh data
|
||||
self.stats["misses"] += 1
|
||||
self.log.debug(f"Cache miss for {title_id}, fetching fresh data")
|
||||
|
||||
try:
|
||||
# Attempt to fetch fresh titles
|
||||
titles = fetch_function()
|
||||
|
||||
if titles:
|
||||
# Successfully fetched, update cache
|
||||
self.log.debug(f"Successfully fetched titles for {title_id}, updating cache")
|
||||
cache = self.cacher.get(cache_key, version=1)
|
||||
cache.set(titles, expiration=datetime.now() + timedelta(seconds=config.title_cache_time))
|
||||
|
||||
return titles
|
||||
|
||||
except Exception as e:
|
||||
# API call failed, check if we have fallback cached data
|
||||
if cache and cache.data:
|
||||
# We have expired cached data, use it as fallback
|
||||
current_time = datetime.now()
|
||||
max_retention_time = cache.expiration + timedelta(
|
||||
seconds=config.title_cache_max_retention - config.title_cache_time
|
||||
)
|
||||
|
||||
if current_time < max_retention_time:
|
||||
self.stats["fallbacks"] += 1
|
||||
self.log.warning(
|
||||
f"API call failed for {title_id}, using cached data as fallback "
|
||||
f"(fallbacks: {self.stats['fallbacks']})"
|
||||
)
|
||||
self.log.debug(f"Error was: {e}")
|
||||
|
||||
# Extend cache lifetime
|
||||
extended_expiration = current_time + timedelta(minutes=5)
|
||||
if extended_expiration < max_retention_time:
|
||||
cache.expiration = extended_expiration
|
||||
cache.set(cache.data, expiration=extended_expiration)
|
||||
|
||||
return cache.data
|
||||
else:
|
||||
self.log.error(f"API call failed and cached data for {title_id} exceeded maximum retention time")
|
||||
|
||||
# Re-raise the exception if no fallback available
|
||||
raise
|
||||
|
||||
def clear_all_title_cache(self):
|
||||
"""Clear all title caches for this service."""
|
||||
cache_dir = config.directories.cache / self.service_name
|
||||
if cache_dir.exists():
|
||||
for cache_file in cache_dir.glob("titles_*.json"):
|
||||
cache_file.unlink()
|
||||
self.log.info(f"Cleared cache file: {cache_file.name}")
|
||||
|
||||
def get_cache_stats(self) -> dict:
|
||||
"""Get cache statistics."""
|
||||
total = sum(self.stats.values())
|
||||
if total > 0:
|
||||
hit_rate = (self.stats["hits"] / total) * 100
|
||||
else:
|
||||
hit_rate = 0
|
||||
|
||||
return {
|
||||
"hits": self.stats["hits"],
|
||||
"misses": self.stats["misses"],
|
||||
"fallbacks": self.stats["fallbacks"],
|
||||
"hit_rate": f"{hit_rate:.1f}%",
|
||||
}
|
||||
|
||||
|
||||
def get_region_from_proxy(proxy_url: Optional[str]) -> Optional[str]:
|
||||
"""
|
||||
Extract region identifier from proxy URL.
|
||||
|
||||
Args:
|
||||
proxy_url: The proxy URL string
|
||||
|
||||
Returns:
|
||||
Region identifier or None
|
||||
"""
|
||||
if not proxy_url:
|
||||
return None
|
||||
|
||||
# Try to extract region from common proxy patterns
|
||||
# e.g., "us123.nordvpn.com", "gb-proxy.example.com"
|
||||
import re
|
||||
|
||||
# Pattern for NordVPN style
|
||||
nord_match = re.search(r"([a-z]{2})\d+\.nordvpn", proxy_url.lower())
|
||||
if nord_match:
|
||||
return nord_match.group(1)
|
||||
|
||||
# Pattern for country code at start
|
||||
cc_match = re.search(r"([a-z]{2})[-_]", proxy_url.lower())
|
||||
if cc_match:
|
||||
return cc_match.group(1)
|
||||
|
||||
# Pattern for country code subdomain
|
||||
subdomain_match = re.search(r"://([a-z]{2})\.", proxy_url.lower())
|
||||
if subdomain_match:
|
||||
return subdomain_match.group(1)
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def get_account_hash(credential) -> Optional[str]:
|
||||
"""
|
||||
Generate a hash for account identification.
|
||||
|
||||
Args:
|
||||
credential: Credential object
|
||||
|
||||
Returns:
|
||||
SHA1 hash of the credential or None
|
||||
"""
|
||||
if not credential:
|
||||
return None
|
||||
|
||||
# Use existing sha1 property if available
|
||||
if hasattr(credential, "sha1"):
|
||||
return credential.sha1
|
||||
|
||||
# Otherwise generate hash from username
|
||||
if hasattr(credential, "username"):
|
||||
return hashlib.sha1(credential.username.encode()).hexdigest()
|
||||
|
||||
return None
|
||||
@@ -21,6 +21,12 @@ update_checks: true
|
||||
# How often to check for updates, in hours (default: 24)
|
||||
update_check_interval: 24
|
||||
|
||||
# Title caching configuration
|
||||
# Cache title metadata to reduce redundant API calls
|
||||
title_cache_enabled: true # Enable/disable title caching globally (default: true)
|
||||
title_cache_time: 1800 # Cache duration in seconds (default: 1800 = 30 minutes)
|
||||
title_cache_max_retention: 86400 # Maximum cache retention for fallback when API fails (default: 86400 = 24 hours)
|
||||
|
||||
# Muxing configuration
|
||||
muxing:
|
||||
set_title: false
|
||||
|
||||
Reference in New Issue
Block a user