feat(dl): add --merge-video to merge video language variants

Group selected videos by (resolution, range, codec) and mux each group into one MKV; only language is collapsed, so ranges/codecs/resolutions stay in separate files. Adds --merge-video flag + muxing.merge_video config (global and per-service), docs, and tests.
This commit is contained in:
imSp4rky
2026-06-17 13:31:21 -06:00
parent 2e1ffebec0
commit ba69bc7d61
5 changed files with 207 additions and 28 deletions

View File

@@ -204,6 +204,23 @@ Enable/disable tagging downloaded files with IMDB/TMDB/TVDB identifiers (when av
Note: The `--split-audio` CLI flag overrides this setting. When `--split-audio` is passed, Note: The `--split-audio` CLI flag overrides this setting. When `--split-audio` is passed,
`merge_audio` is effectively set to `false` for that run. `merge_audio` is effectively set to `false` for that run.
- `merge_video`
Merge video **language variants** into one file. Default: `false`
- `false`: One MKV per video track (the default behaviour).
- `true`: Group the selected video tracks by `(resolution, range, codec)` and merge
each group into one MKV, so only language varies within a file. The player switches
between the language tracks. No re-encode, no concatenation.
Only the language dimension is collapsed. Different **resolutions**, **ranges**
(SDR/HDR10/HDR10+/DV/HYBRID) and **codecs** (H264/H265) always stay in separate files.
For example, `-r HYBRID,DV,HDR10,SDR --merge-video` produces one file per range (never a
single combined file), while a title offering English + French video of the same
resolution/range/codec produces one file containing both video tracks.
Note: The `--merge-video` CLI flag overrides this setting. Can be set per service under
`services.<TAG>.muxing.merge_video`. Change `group_videos_by_variant` in
`unshackle/commands/dl.py` to adjust the grouping.
- `default_language` (dict) - `default_language` (dict)
Override which track is flagged as the default in the muxed MKV, regardless Override which track is flagged as the default in the muxed MKV, regardless
of the title's original language. Useful when you always want your player to of the title's original language. Useful when you always want your player to

View File

@@ -0,0 +1,129 @@
"""Tests for ``--merge-video`` track grouping.
``group_videos_by_variant`` (``unshackle/commands/dl.py``) decides which selected video
tracks share one MKV when merge mode is on. The rule: group by ``(resolution, range,
codec)`` so only language varies within a file; resolutions, ranges and codecs stay
separate. With ``merge=False`` every track is its own group (one file per track).
These lock the pure grouping unit; the surrounding mux loop is Click-command orchestration.
"""
from __future__ import annotations
from unshackle.commands.dl import group_videos_by_variant
from unshackle.core.tracks import Video
def make_video(
track_id: str,
*,
range_: Video.Range,
height: int,
codec: Video.Codec,
language: str = "en",
) -> Video:
return Video(
id_=track_id,
url=f"https://example.test/{track_id}.m3u8",
language=language,
codec=codec,
range_=range_,
width=int(height * 16 / 9),
height=height,
bitrate=1_000_000,
)
HEVC = Video.Codec.HEVC
AVC = Video.Codec.AVC
SDR = Video.Range.SDR
HDR10 = Video.Range.HDR10
DV = Video.Range.DV
def test_merge_collapses_language_only() -> None:
"""Same (height, range, codec), different language → one group."""
videos = [
make_video("en", range_=SDR, height=1080, codec=HEVC, language="en"),
make_video("fr", range_=SDR, height=1080, codec=HEVC, language="fr"),
]
groups = group_videos_by_variant(videos, merge=True)
assert len(groups) == 1
assert [v.id for v in groups[0]] == ["en", "fr"]
def test_merge_splits_on_codec() -> None:
"""H264 vs H265 of the same resolution+range → separate groups."""
videos = [
make_video("hevc", range_=SDR, height=1080, codec=HEVC),
make_video("avc", range_=SDR, height=1080, codec=AVC),
]
groups = group_videos_by_variant(videos, merge=True)
assert len(groups) == 2
assert all(len(g) == 1 for g in groups)
def test_merge_splits_on_range() -> None:
"""SDR vs HDR10 of the same resolution+codec → separate groups."""
videos = [
make_video("sdr", range_=SDR, height=1080, codec=HEVC),
make_video("hdr10", range_=HDR10, height=1080, codec=HEVC),
]
groups = group_videos_by_variant(videos, merge=True)
assert len(groups) == 2
def test_merge_splits_on_resolution() -> None:
"""1080p vs 2160p of the same range+codec → separate groups."""
videos = [
make_video("1080", range_=SDR, height=1080, codec=HEVC),
make_video("2160", range_=SDR, height=2160, codec=HEVC),
]
groups = group_videos_by_variant(videos, merge=True)
assert len(groups) == 2
def test_merge_multi_range_yields_one_group_per_range() -> None:
"""Regression guard: -r HYBRID,DV,HDR10,SDR must never collapse into one file.
HYBRID is resolved upstream into a DV deliverable plus the requested standalone
ranges; here the four selected single-range tracks must stay in four groups.
"""
videos = [
make_video("sdr", range_=SDR, height=2160, codec=HEVC),
make_video("hdr10", range_=HDR10, height=2160, codec=HEVC),
make_video("dv", range_=DV, height=2160, codec=HEVC),
make_video("dv-hybrid", range_=DV, height=1080, codec=HEVC), # different height
]
groups = group_videos_by_variant(videos, merge=True)
assert len(groups) == 4
def test_no_merge_yields_one_group_per_track() -> None:
"""merge=False reproduces today's per-track behaviour exactly."""
videos = [
make_video("en", range_=SDR, height=1080, codec=HEVC, language="en"),
make_video("fr", range_=SDR, height=1080, codec=HEVC, language="fr"),
make_video("avc", range_=SDR, height=1080, codec=AVC),
]
groups = group_videos_by_variant(videos, merge=False)
assert len(groups) == 3
assert all(len(g) == 1 for g in groups)
def test_merge_preserves_first_seen_order() -> None:
"""Group order follows first-seen track order, for stable output filenames."""
videos = [
make_video("hevc-en", range_=SDR, height=1080, codec=HEVC, language="en"),
make_video("avc-en", range_=SDR, height=1080, codec=AVC, language="en"),
make_video("hevc-fr", range_=SDR, height=1080, codec=HEVC, language="fr"),
]
groups = group_videos_by_variant(videos, merge=True)
# HEVC group seen first (and gathers both languages), AVC group second.
assert [v.id for v in groups[0]] == ["hevc-en", "hevc-fr"]
assert [v.id for v in groups[1]] == ["avc-en"]
def test_empty_input_returns_empty() -> None:
assert group_videos_by_variant([], merge=True) == []
assert group_videos_by_variant([], merge=False) == []

View File

@@ -48,16 +48,8 @@ from unshackle.core.constants import DOWNLOAD_CANCELLED, DOWNLOAD_LICENCE_ONLY,
from unshackle.core.credential import Credential from unshackle.core.credential import Credential
from unshackle.core.drm import DRM_T, ClearKeyCENC, MonaLisa, PlayReady, Widevine from unshackle.core.drm import DRM_T, ClearKeyCENC, MonaLisa, PlayReady, Widevine
from unshackle.core.events import events from unshackle.core.events import events
from unshackle.core.music import ( from unshackle.core.music import (MusicAudioIntegrityError, MusicMetadataResult, MusicPlanner, MusicRenderer,
MusicAudioIntegrityError, file_md5, verify_music_audio, write_music_manifest, write_music_metadata)
MusicMetadataResult,
MusicPlanner,
MusicRenderer,
file_md5,
verify_music_audio,
write_music_manifest,
write_music_metadata,
)
from unshackle.core.proxies import Basic, Gluetun, Hola, NordVPN, SurfsharkVPN, WindscribeVPN from unshackle.core.proxies import Basic, Gluetun, Hola, NordVPN, SurfsharkVPN, WindscribeVPN
from unshackle.core.service import Service from unshackle.core.service import Service
from unshackle.core.services import Services from unshackle.core.services import Services
@@ -100,6 +92,22 @@ def normalize_dl_config(dl_config: dict[str, Any]) -> dict[str, Any]:
return {DL_OPTION_ALIASES.get(key, key): value for key, value in dl_config.items()} return {DL_OPTION_ALIASES.get(key, key): value for key, value in dl_config.items()}
def group_videos_by_variant(videos: list[Video], *, merge: bool) -> list[list[Video]]:
"""Group video tracks for muxing.
When ``merge`` is True, tracks sharing ``(height, range, codec)`` are grouped into one
file so only language varies within a group; different resolutions, ranges and codecs
stay in separate groups (separate files). When False, each track is its own group
(one file per track, the default behaviour). Group order follows first-seen track order.
"""
if not merge:
return [[video] for video in videos]
groups: dict[tuple[Any, ...], list[Video]] = {}
for video in videos:
groups.setdefault((video.height, video.range, video.codec), []).append(video)
return list(groups.values())
def apply_service_dl_overrides(ctx: click.Context, service_dl_config: dict[str, Any], log: logging.Logger) -> None: def apply_service_dl_overrides(ctx: click.Context, service_dl_config: dict[str, Any], log: logging.Logger) -> None:
"""Apply ``services.<TAG>.dl`` config onto ``ctx.params``. Explicit CLI/env values win; """Apply ``services.<TAG>.dl`` config onto ``ctx.params``. Explicit CLI/env values win;
defaults and global ``dl:`` default_map values are replaced.""" defaults and global ``dl:`` default_map values are replaced."""
@@ -459,6 +467,13 @@ class dl:
default=None, default=None,
help="Create separate output files per audio codec instead of merging all audio.", help="Create separate output files per audio codec instead of merging all audio.",
) )
@click.option(
"--merge-video",
"merge_video",
is_flag=True,
default=None,
help="Mux all selected video tracks into a single file instead of one file per track.",
)
@click.option( @click.option(
"--select-titles", "--select-titles",
is_flag=True, is_flag=True,
@@ -1173,6 +1188,7 @@ class dl:
worst: bool, worst: bool,
best_available: bool, best_available: bool,
split_audio: Optional[bool] = None, split_audio: Optional[bool] = None,
merge_video: Optional[bool] = None,
real_video_bitrate: bool = False, real_video_bitrate: bool = False,
real_audio_bitrate: bool = False, real_audio_bitrate: bool = False,
progress_sink: Optional[Callable[[dict[str, Any]], None]] = None, progress_sink: Optional[Callable[[dict[str, Any]], None]] = None,
@@ -3062,6 +3078,9 @@ class dl:
# When we split audio (merge_audio=False), multiple outputs may exist per title, so suffix codec. # When we split audio (merge_audio=False), multiple outputs may exist per title, so suffix codec.
append_audio_codec_suffix = not merge_audio append_audio_codec_suffix = not merge_audio
# Mux all selected video tracks into one file instead of one file per track.
merge_video = merge_video if merge_video is not None else config.muxing.get("merge_video", False)
multiplex_tasks: list[tuple[TaskID, Tracks, Optional[Audio.Codec]]] = [] multiplex_tasks: list[tuple[TaskID, Tracks, Optional[Audio.Codec]]] = []
# Track hybrid-processing outputs explicitly so we can always clean them up, # Track hybrid-processing outputs explicitly so we can always clean them up,
# even if muxing fails early (e.g. SystemExit) before the normal delete loop. # even if muxing fails early (e.g. SystemExit) before the normal delete loop.
@@ -3097,22 +3116,25 @@ class dl:
task_tracks = clone_tracks_for_audio(base_tracks, codec_audio_tracks) task_tracks = clone_tracks_for_audio(base_tracks, codec_audio_tracks)
multiplex_tasks.append((task_id, task_tracks, audio_codec)) multiplex_tasks.append((task_id, task_tracks, audio_codec))
def mux_video_standalone(video_track: Optional[Video]) -> None: def mux_video_group(video_tracks: list[Optional[Video]]) -> None:
if video_track and video_track.dv_compatible_bitstream: for video_track in video_tracks:
apply_dv_fixup(video_track) if video_track and video_track.dv_compatible_bitstream:
apply_dv_fixup(video_track)
task_description = "Multiplexing" task_description = "Multiplexing"
if video_track: # All tracks in a merged group share height/range/codec, so describe from the first.
head = next((v for v in video_tracks if v), None)
if head:
if len(quality) > 1: if len(quality) > 1:
task_description += f" {video_track.height}p" task_description += f" {head.height}p"
if len(range_) > 1: if len(range_) > 1:
task_description += f" {video_track.range.name}" task_description += f" {head.range.name}"
if len(vcodec) > 1: if len(vcodec) > 1:
task_description += f" {video_track.codec.name}" task_description += f" {head.codec.name}"
task_tracks = Tracks(title.tracks) + title.tracks.chapters + title.tracks.attachments task_tracks = Tracks(title.tracks) + title.tracks.chapters + title.tracks.attachments
if video_track: if head:
task_tracks.videos = [video_track] task_tracks.videos = [v for v in video_tracks if v]
enqueue_mux_tasks(task_description, task_tracks) enqueue_mux_tasks(task_description, task_tracks)
@@ -3172,16 +3194,18 @@ class dl:
enqueue_mux_tasks(task_description, task_tracks) enqueue_mux_tasks(task_description, task_tracks)
# Mux every requested range standalone, skipping the ingredient-only DV. # Mux every requested range standalone, skipping the ingredient-only DV.
for video_track in original_videos: # merge_video collapses only language variants (same height/range/codec).
if video_track.hybrid_base_only: standalone_videos = [v for v in original_videos if not v.hybrid_base_only]
continue for group in group_videos_by_variant(standalone_videos, merge=merge_video):
mux_video_standalone(video_track) mux_video_group(group)
console.print() console.print()
else: else:
# Normal mode: process each video track separately # Normal mode: one file per video track, unless merge_video groups
for video_track in title.tracks.videos or [None]: # same-(height, range, codec) language variants into one file.
mux_video_standalone(video_track) groups = group_videos_by_variant(title.tracks.videos, merge=merge_video)
for group in groups or [[None]]:
mux_video_group(group)
if progress_sink: if progress_sink:
progress_sink( progress_sink(

View File

@@ -158,6 +158,14 @@ muxing:
# false: Separate MKV per (quality, audio_codec) combination # false: Separate MKV per (quality, audio_codec) combination
# Example: Title.1080p.AAC.mkv, Title.1080p.EC3.mkv # Example: Title.1080p.AAC.mkv, Title.1080p.EC3.mkv
merge_audio: true merge_audio: true
# merge_video: Merge video language variants into one file
# false (default): One file per video track
# true: Group videos by (resolution, range, codec) and merge each group into one
# MKV - so only language varies within a file. Different resolutions, ranges
# (SDR/HDR10/DV/...) and codecs (H264/H265) still produce separate files.
# Example: -r HYBRID,DV,HDR10,SDR yields one file per range, not one mega-file.
# The --merge-video CLI flag overrides this.
merge_video: false
# default_language: Override which track is flagged as the default in the muxed MKV. # default_language: Override which track is flagged as the default in the muxed MKV.
# audio: BCP-47 tag of the preferred default audio track (e.g. pl, en, pt-BR). # audio: BCP-47 tag of the preferred default audio track (e.g. pl, en, pt-BR).
# Wins over the title's original_language. Falls back to is_original_lang # Wins over the title's original_language. Falls back to is_original_lang
@@ -675,9 +683,10 @@ services:
User-Agent: "Service-specific user agent string" User-Agent: "Service-specific user agent string"
Accept-Language: "en-US,en;q=0.9" Accept-Language: "en-US,en;q=0.9"
# Override muxing options # Override muxing options (always merge this service's video tracks into one file)
muxing: muxing:
set_title: true set_title: true
merge_video: true
# Remap service-provided titles before naming/output # Remap service-provided titles before naming/output
# Keyed by the exact title the service returns -> desired output title. # Keyed by the exact title the service returns -> desired output title.

2
uv.lock generated
View File

@@ -1779,7 +1779,7 @@ wheels = [
[[package]] [[package]]
name = "unshackle" name = "unshackle"
version = "5.1.0" version = "5.2.0"
source = { editable = "." } source = { editable = "." }
dependencies = [ dependencies = [
{ name = "aiohttp" }, { name = "aiohttp" },