feat(config): add unicode_filenames option to preserve native characters

Add config option to disable ASCII transliteration in filenames, allowing preservation of Korean, Japanese, Chinese, and other native language characters instead of converting them via unidecode.

Closes #49
This commit is contained in:
Andy
2026-01-16 13:43:50 +00:00
parent 18b0534020
commit 68ad76cbb0
2 changed files with 8 additions and 2 deletions

View File

@@ -95,6 +95,7 @@ class Config:
self.update_check_interval: int = kwargs.get("update_check_interval", 24)
self.scene_naming: bool = kwargs.get("scene_naming", True)
self.series_year: bool = kwargs.get("series_year", True)
self.unicode_filenames: bool = kwargs.get("unicode_filenames", False)
self.title_cache_time: int = kwargs.get("title_cache_time", 1800) # 30 minutes default
self.title_cache_max_retention: int = kwargs.get("title_cache_max_retention", 86400) # 24 hours default

View File

@@ -120,9 +120,14 @@ def sanitize_filename(filename: str, spacer: str = ".") -> str:
The spacer is safer to be a '.' for older DDL and p2p sharing spaces.
This includes web-served content via direct links and such.
Set `unicode_filenames: true` in config to preserve native language
characters (Korean, Japanese, Chinese, etc.) instead of transliterating
them to ASCII equivalents.
"""
# replace all non-ASCII characters with ASCII equivalents
filename = unidecode(filename)
# optionally replace non-ASCII characters with ASCII equivalents
if not config.unicode_filenames:
filename = unidecode(filename)
# remove or replace further characters as needed
filename = "".join(c for c in filename if unicodedata.category(c) != "Mn") # hidden characters