feat(templates): add configurable language tagging rule engine (#58)

This commit is contained in:
Andy
2026-02-26 21:17:46 -07:00
parent 13d7636d86
commit c8883a5404
5 changed files with 185 additions and 2 deletions

View File

@@ -37,7 +37,8 @@ This is **required** in your `unshackle.yaml` — a warning is shown if not conf
Available variables: `{title}`, `{year}`, `{season}`, `{episode}`, `{season_episode}`, `{episode_name}`, Available variables: `{title}`, `{year}`, `{season}`, `{episode}`, `{season_episode}`, `{episode_name}`,
`{quality}`, `{resolution}`, `{source}`, `{audio}`, `{audio_channels}`, `{audio_full}`, `{quality}`, `{resolution}`, `{source}`, `{audio}`, `{audio_channels}`, `{audio_full}`,
`{video}`, `{hdr}`, `{hfr}`, `{atmos}`, `{dual}`, `{multi}`, `{tag}`, `{edition}`, `{repack}` `{video}`, `{hdr}`, `{hfr}`, `{atmos}`, `{dual}`, `{multi}`, `{tag}`, `{edition}`, `{repack}`,
`{lang_tag}`
Add `?` suffix to make a variable conditional (omitted when empty): `{year?}`, `{hdr?}`, `{repack?}` Add `?` suffix to make a variable conditional (omitted when empty): `{year?}`, `{hdr?}`, `{repack?}`
@@ -62,6 +63,72 @@ Example outputs:
--- ---
---
## language_tags (dict)
Automatically adds language-based identifiers (e.g., `DANiSH`, `NORDiC`, `DKsubs`) to output filenames
based on audio and subtitle track languages. Use `{lang_tag?}` in your `output_template` to place the tag.
Rules are evaluated in order; the first matching rule wins. All conditions within a single rule
must match (AND logic). If no rules match, `{lang_tag?}` is cleanly removed from the filename.
### Conditions
| Condition | Type | Description |
|-----------|------|-------------|
| `audio` | string | Matches if any selected audio track has this language |
| `subs_contain` | string | Matches if any selected subtitle has this language |
| `subs_contain_all` | list | Matches if subtitles include ALL listed languages |
Language matching uses fuzzy matching (e.g., `en` matches `en-US`, `en-GB`).
### Example: Nordic tagging
```yaml
language_tags:
rules:
- audio: da
tag: DANiSH
- audio: sv
tag: SWEDiSH
- audio: nb
tag: NORWEGiAN
- audio: en
subs_contain_all: [da, sv, nb]
tag: NORDiC
- audio: en
subs_contain: da
tag: DKsubs
output_template:
movies: '{title}.{year?}.{lang_tag?}.{quality}.{source}.WEB-DL.{audio_full}.{video}-{tag}'
```
Example outputs:
- Danish audio: `Show.S01E01.DANiSH.1080p.NF.WEB-DL.DDP5.1.H.264-TAG`
- English audio + multiple Nordic subs: `Show.S01E01.NORDiC.1080p.NF.WEB-DL.DDP5.1.H.264-TAG`
- English audio + Danish subs only: `Show.S01E01.DKsubs.1080p.NF.WEB-DL.DDP5.1.H.264-TAG`
- No matching languages: `Show.S01E01.1080p.NF.WEB-DL.DDP5.1.H.264-TAG`
### Example: Other regional tags
```yaml
language_tags:
rules:
- audio: nl
tag: DUTCH
- audio: de
tag: GERMAN
- audio: fr
subs_contain: en
tag: ENGFR
- audio: fr
tag: FRENCH
```
---
## unicode_filenames (bool) ## unicode_filenames (bool)
Allow Unicode characters in output filenames. When `false`, Unicode characters are transliterated Allow Unicode characters in output filenames. When `false`, Unicode characters are transliterated

View File

@@ -96,6 +96,7 @@ class Config:
self.update_checks: bool = kwargs.get("update_checks", True) self.update_checks: bool = kwargs.get("update_checks", True)
self.update_check_interval: int = kwargs.get("update_check_interval", 24) self.update_check_interval: int = kwargs.get("update_check_interval", 24)
self.language_tags: dict = kwargs.get("language_tags") or {}
self.output_template: dict = kwargs.get("output_template") or {} self.output_template: dict = kwargs.get("output_template") or {}
if kwargs.get("scene_naming") is not None: if kwargs.get("scene_naming") is not None:
@@ -154,6 +155,7 @@ class Config:
"hfr", "hfr",
"edition", "edition",
"repack", "repack",
"lang_tag",
} }
unsafe_chars = r'[<>:"/\\|?*]' unsafe_chars = r'[<>:"/\\|?*]'

View File

@@ -80,6 +80,7 @@ class Title:
"hdr": "", "hdr": "",
"hfr": "", "hfr": "",
"edition": "", "edition": "",
"lang_tag": "",
} }
if self.tracks: if self.tracks:
@@ -161,6 +162,14 @@ class Title:
context["dual"] = "" context["dual"] = ""
context["multi"] = "" context["multi"] = ""
lang_tag_rules = config.language_tags.get("rules") if config.language_tags else None
if lang_tag_rules and self.tracks:
from unshackle.core.utils.language_tags import evaluate_language_tag
audio_langs = [a.language for a in self.tracks.audio]
sub_langs = [s.language for s in self.tracks.subtitles]
context["lang_tag"] = evaluate_language_tag(lang_tag_rules, audio_langs, sub_langs)
return context return context
@abstractmethod @abstractmethod

View File

@@ -0,0 +1,79 @@
"""Language tag rule engine for output filename templates."""
from __future__ import annotations
import logging
from typing import Any, Sequence
from langcodes import Language
from unshackle.core.utilities import is_close_match
log = logging.getLogger(__name__)
def evaluate_language_tag(
rules: list[dict[str, Any]],
audio_languages: Sequence[Language],
subtitle_languages: Sequence[Language],
) -> str:
"""Evaluate language tag rules against selected tracks.
Rules are evaluated in order; the first matching rule's tag is returned.
Returns empty string if no rules match.
Args:
rules: List of rule dicts from config, each with conditions and a ``tag``.
audio_languages: Languages of the selected audio tracks.
subtitle_languages: Languages of the selected subtitle tracks.
Returns:
The tag string from the first matching rule, or ``""`` if none match.
"""
for rule in rules:
tag = rule.get("tag")
if not tag:
log.warning("Language tag rule missing 'tag' field, skipping: %s", rule)
continue
if _rule_matches(rule, audio_languages, subtitle_languages):
log.debug("Language tag rule matched: %s -> %s", rule, tag)
return str(tag)
return ""
def _rule_matches(
rule: dict[str, Any],
audio_languages: Sequence[Language],
subtitle_languages: Sequence[Language],
) -> bool:
"""Check if all conditions in a rule are satisfied."""
has_condition = False
audio_lang = rule.get("audio")
if audio_lang is not None:
has_condition = True
if not is_close_match(audio_lang, list(audio_languages)):
return False
subs_contain = rule.get("subs_contain")
if subs_contain is not None:
has_condition = True
if not is_close_match(subs_contain, list(subtitle_languages)):
return False
subs_contain_all = rule.get("subs_contain_all")
if subs_contain_all is not None:
has_condition = True
if not isinstance(subs_contain_all, list):
subs_contain_all = [subs_contain_all]
for lang in subs_contain_all:
if not is_close_match(lang, list(subtitle_languages)):
return False
if not has_condition:
log.warning("Language tag rule has no conditions, skipping: %s", rule)
return False
return True

View File

@@ -22,7 +22,8 @@ set_terminal_bg: false
# If not configured, default scene-style templates are used and a warning is shown. # If not configured, default scene-style templates are used and a warning is shown.
# Available variables: {title}, {year}, {season}, {episode}, {season_episode}, {episode_name}, # Available variables: {title}, {year}, {season}, {episode}, {season_episode}, {episode_name},
# {quality}, {resolution}, {source}, {audio}, {audio_channels}, {audio_full}, # {quality}, {resolution}, {source}, {audio}, {audio_channels}, {audio_full},
# {video}, {hdr}, {hfr}, {atmos}, {dual}, {multi}, {tag}, {edition}, {repack} # {video}, {hdr}, {hfr}, {atmos}, {dual}, {multi}, {tag}, {edition}, {repack},
# {lang_tag}
# Conditional variables (included only if present): Add ? suffix like {year?}, {episode_name?}, {hdr?} # Conditional variables (included only if present): Add ? suffix like {year?}, {episode_name?}, {hdr?}
# Customize the templates below: # Customize the templates below:
# #
@@ -52,6 +53,31 @@ output_template:
# movies: '{title}.{year}.{quality}.{hdr?}.{source}.WEB-DL.{audio_full}.{video}-{tag}' # movies: '{title}.{year}.{quality}.{hdr?}.{source}.WEB-DL.{audio_full}.{video}-{tag}'
# series: '{title}.{year?}.{season_episode}.{episode_name?}.{quality}.{hdr?}.{source}.WEB-DL.{audio_full}.{atmos?}.{video}-{tag}' # series: '{title}.{year?}.{season_episode}.{episode_name?}.{quality}.{hdr?}.{source}.WEB-DL.{audio_full}.{atmos?}.{video}-{tag}'
# Language-based tagging for output filenames
# Automatically adds language identifiers (e.g., DANiSH, NORDiC, DKsubs) based on
# audio and subtitle track languages. Rules are evaluated in order; first match wins.
# Use {lang_tag?} in your output_template to place the tag in the filename.
#
# Conditions (all conditions in a rule must match):
# audio: <lang> - any audio track matches this language
# subs_contain: <lang> - any subtitle matches this language
# subs_contain_all: [lang, ...] - subtitles include ALL listed languages
#
# language_tags:
# rules:
# - audio: da
# tag: DANiSH
# - audio: sv
# tag: SWEDiSH
# - audio: nb
# tag: NORWEGiAN
# - audio: en
# subs_contain_all: [da, sv, nb]
# tag: NORDiC
# - audio: en
# subs_contain: da
# tag: DKsubs
# Check for updates from GitHub repository on startup (default: true) # Check for updates from GitHub repository on startup (default: true)
update_checks: true update_checks: true