From 68ad76cbb0458c6e1f1820ec478eee2b62b94176 Mon Sep 17 00:00:00 2001 From: Andy Date: Fri, 16 Jan 2026 13:43:50 +0000 Subject: [PATCH] feat(config): add unicode_filenames option to preserve native characters Add config option to disable ASCII transliteration in filenames, allowing preservation of Korean, Japanese, Chinese, and other native language characters instead of converting them via unidecode. Closes #49 --- unshackle/core/config.py | 1 + unshackle/core/utilities.py | 9 +++++++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/unshackle/core/config.py b/unshackle/core/config.py index 6eb7b26..1c50d62 100644 --- a/unshackle/core/config.py +++ b/unshackle/core/config.py @@ -95,6 +95,7 @@ class Config: self.update_check_interval: int = kwargs.get("update_check_interval", 24) self.scene_naming: bool = kwargs.get("scene_naming", True) self.series_year: bool = kwargs.get("series_year", True) + self.unicode_filenames: bool = kwargs.get("unicode_filenames", False) self.title_cache_time: int = kwargs.get("title_cache_time", 1800) # 30 minutes default self.title_cache_max_retention: int = kwargs.get("title_cache_max_retention", 86400) # 24 hours default diff --git a/unshackle/core/utilities.py b/unshackle/core/utilities.py index 5aaf6f0..7a78535 100644 --- a/unshackle/core/utilities.py +++ b/unshackle/core/utilities.py @@ -120,9 +120,14 @@ def sanitize_filename(filename: str, spacer: str = ".") -> str: The spacer is safer to be a '.' for older DDL and p2p sharing spaces. This includes web-served content via direct links and such. + + Set `unicode_filenames: true` in config to preserve native language + characters (Korean, Japanese, Chinese, etc.) instead of transliterating + them to ASCII equivalents. """ - # replace all non-ASCII characters with ASCII equivalents - filename = unidecode(filename) + # optionally replace non-ASCII characters with ASCII equivalents + if not config.unicode_filenames: + filename = unidecode(filename) # remove or replace further characters as needed filename = "".join(c for c in filename if unicodedata.category(c) != "Mn") # hidden characters