diff --git a/unshackle/core/config.py b/unshackle/core/config.py index 6eb7b26..1c50d62 100644 --- a/unshackle/core/config.py +++ b/unshackle/core/config.py @@ -95,6 +95,7 @@ class Config: self.update_check_interval: int = kwargs.get("update_check_interval", 24) self.scene_naming: bool = kwargs.get("scene_naming", True) self.series_year: bool = kwargs.get("series_year", True) + self.unicode_filenames: bool = kwargs.get("unicode_filenames", False) self.title_cache_time: int = kwargs.get("title_cache_time", 1800) # 30 minutes default self.title_cache_max_retention: int = kwargs.get("title_cache_max_retention", 86400) # 24 hours default diff --git a/unshackle/core/utilities.py b/unshackle/core/utilities.py index 5aaf6f0..7a78535 100644 --- a/unshackle/core/utilities.py +++ b/unshackle/core/utilities.py @@ -120,9 +120,14 @@ def sanitize_filename(filename: str, spacer: str = ".") -> str: The spacer is safer to be a '.' for older DDL and p2p sharing spaces. This includes web-served content via direct links and such. + + Set `unicode_filenames: true` in config to preserve native language + characters (Korean, Japanese, Chinese, etc.) instead of transliterating + them to ASCII equivalents. """ - # replace all non-ASCII characters with ASCII equivalents - filename = unidecode(filename) + # optionally replace non-ASCII characters with ASCII equivalents + if not config.unicode_filenames: + filename = unidecode(filename) # remove or replace further characters as needed filename = "".join(c for c in filename if unicodedata.category(c) != "Mn") # hidden characters