mirror of
https://github.com/unshackle-dl/unshackle.git
synced 2026-05-16 21:59:26 +00:00
fix(sanitize): preserve parentheses, strip unidecode bracket artifacts (#105)
Commit 10cca7d re-added () to the stripped character set, which broke output_template patterns like ({year?}). The original reason for stripping parens was that unidecode maps 【】 to "[(" and ")]", leaving artifacts like [(SERIES NAME)] in filenames.
Allow parens in filenames so templates render correctly, and collapse the unidecode "[(" / ")]" sequences immediately after transliteration so unicode brackets still come out as [SERIES NAME].
This commit is contained in:
@@ -129,6 +129,8 @@ def sanitize_filename(filename: str, spacer: str = ".") -> str:
|
||||
# optionally replace non-ASCII characters with ASCII equivalents
|
||||
if not config.unicode_filenames:
|
||||
filename = unidecode(filename)
|
||||
filename = re.sub(r"\[\(+", "[", filename)
|
||||
filename = re.sub(r"\)+\]", "]", filename)
|
||||
|
||||
# remove or replace further characters as needed
|
||||
filename = "".join(c for c in filename if unicodedata.category(c) != "Mn") # hidden characters
|
||||
@@ -136,7 +138,7 @@ def sanitize_filename(filename: str, spacer: str = ".") -> str:
|
||||
if spacer == ".":
|
||||
filename = re.sub(r" - ", spacer, filename) # title separators to spacer (avoids .-. pattern)
|
||||
filename = re.sub(r"[:; ]", spacer, filename) # structural chars to (spacer)
|
||||
filename = re.sub(r"[\\*!?¿,'\"" "()<>|$#~]", "", filename) # not filename safe chars
|
||||
filename = re.sub(r"[\\*!?¿,'\"" "<>|$#~]", "", filename) # not filename safe chars
|
||||
filename = re.sub(rf"[{spacer}]{{2,}}", spacer, filename) # remove extra neighbouring (spacer)s
|
||||
|
||||
return filename
|
||||
|
||||
Reference in New Issue
Block a user