fix(sanitize): preserve parentheses, strip unidecode bracket artifacts (#105)

Commit 10cca7d re-added () to the stripped character set, which broke output_template patterns like ({year?}). The original reason for stripping parens was that unidecode maps 【】 to "[(" and ")]", leaving artifacts like [(SERIES NAME)] in filenames.

Allow parens in filenames so templates render correctly, and collapse the unidecode "[(" / ")]" sequences immediately after transliteration so unicode brackets still come out as [SERIES NAME].
This commit is contained in:
imSp4rky
2026-05-05 08:39:38 -06:00
parent 08c0862691
commit 5984eefcbe

View File

@@ -129,6 +129,8 @@ def sanitize_filename(filename: str, spacer: str = ".") -> str:
# optionally replace non-ASCII characters with ASCII equivalents
if not config.unicode_filenames:
filename = unidecode(filename)
filename = re.sub(r"\[\(+", "[", filename)
filename = re.sub(r"\)+\]", "]", filename)
# remove or replace further characters as needed
filename = "".join(c for c in filename if unicodedata.category(c) != "Mn") # hidden characters
@@ -136,7 +138,7 @@ def sanitize_filename(filename: str, spacer: str = ".") -> str:
if spacer == ".":
filename = re.sub(r" - ", spacer, filename) # title separators to spacer (avoids .-. pattern)
filename = re.sub(r"[:; ]", spacer, filename) # structural chars to (spacer)
filename = re.sub(r"[\\*!?¿,'\"" "()<>|$#~]", "", filename) # not filename safe chars
filename = re.sub(r"[\\*!?¿,'\"" "<>|$#~]", "", filename) # not filename safe chars
filename = re.sub(rf"[{spacer}]{{2,}}", spacer, filename) # remove extra neighbouring (spacer)s
return filename