mirror of
https://github.com/unshackle-dl/unshackle.git
synced 2026-03-09 16:09:01 +00:00
feat(config): add unicode_filenames option to preserve native characters
Add config option to disable ASCII transliteration in filenames, allowing preservation of Korean, Japanese, Chinese, and other native language characters instead of converting them via unidecode. Closes #49
This commit is contained in:
@@ -95,6 +95,7 @@ class Config:
|
||||
self.update_check_interval: int = kwargs.get("update_check_interval", 24)
|
||||
self.scene_naming: bool = kwargs.get("scene_naming", True)
|
||||
self.series_year: bool = kwargs.get("series_year", True)
|
||||
self.unicode_filenames: bool = kwargs.get("unicode_filenames", False)
|
||||
|
||||
self.title_cache_time: int = kwargs.get("title_cache_time", 1800) # 30 minutes default
|
||||
self.title_cache_max_retention: int = kwargs.get("title_cache_max_retention", 86400) # 24 hours default
|
||||
|
||||
@@ -120,9 +120,14 @@ def sanitize_filename(filename: str, spacer: str = ".") -> str:
|
||||
|
||||
The spacer is safer to be a '.' for older DDL and p2p sharing spaces.
|
||||
This includes web-served content via direct links and such.
|
||||
|
||||
Set `unicode_filenames: true` in config to preserve native language
|
||||
characters (Korean, Japanese, Chinese, etc.) instead of transliterating
|
||||
them to ASCII equivalents.
|
||||
"""
|
||||
# replace all non-ASCII characters with ASCII equivalents
|
||||
filename = unidecode(filename)
|
||||
# optionally replace non-ASCII characters with ASCII equivalents
|
||||
if not config.unicode_filenames:
|
||||
filename = unidecode(filename)
|
||||
|
||||
# remove or replace further characters as needed
|
||||
filename = "".join(c for c in filename if unicodedata.category(c) != "Mn") # hidden characters
|
||||
|
||||
Reference in New Issue
Block a user