fix(hls): finalize n_m3u8dl_re outputs

- Add a small helper to move N_m3u8DL-RE final outputs into the expected temp path (preserve actual suffix) and keep subtitle codec consistent with the produced file.
- Skip generic HLS segment merging when N_m3u8DL-RE is in use to avoid mixing in sidecar files and reduce Windows file-lock issues.
- Harden segmented WebVTT merging to avoid IndexError when caption segment indexes exceed the provided duration list.
This commit is contained in:
Andy
2026-02-06 16:17:06 -07:00
parent 3eede98376
commit ace89760e7
2 changed files with 244 additions and 190 deletions

View File

@@ -225,6 +225,39 @@ class HLS:
return tracks return tracks
@staticmethod
def _finalize_n_m3u8dl_re_output(*, track: AnyTrack, save_dir: Path, save_path: Path) -> Path:
"""
Finalize output from N_m3u8DL-RE.
We call N_m3u8DL-RE with `--save-name track.id`, so the final file should be `{track.id}.*` under `save_dir`.
This moves that output to `save_path` (preserving the real suffix) and, for subtitles, updates `track.codec`
to match the produced file extension.
"""
matches = [p for p in save_dir.rglob(f"{track.id}.*") if p.is_file()]
if not matches:
raise FileNotFoundError(f"No output files produced by N_m3u8DL-RE for save-name={track.id} in: {save_dir}")
primary = max(matches, key=lambda p: p.stat().st_size)
final_save_path = save_path.with_suffix(primary.suffix) if primary.suffix else save_path
final_save_path.parent.mkdir(parents=True, exist_ok=True)
if primary.absolute() != final_save_path.absolute():
final_save_path.unlink(missing_ok=True)
shutil.move(str(primary), str(final_save_path))
if isinstance(track, Subtitle):
ext = final_save_path.suffix.lower().lstrip(".")
try:
track.codec = Subtitle.Codec.from_mime(ext)
except ValueError:
pass
shutil.rmtree(save_dir, ignore_errors=True)
return final_save_path
@staticmethod @staticmethod
def download_track( def download_track(
track: AnyTrack, track: AnyTrack,
@@ -420,222 +453,230 @@ class HLS:
for control_file in segment_save_dir.glob("*.aria2__temp"): for control_file in segment_save_dir.glob("*.aria2__temp"):
control_file.unlink() control_file.unlink()
if not skip_merge: if skip_merge:
progress(total=total_segments, completed=0, downloaded="Merging") final_save_path = HLS._finalize_n_m3u8dl_re_output(track=track, save_dir=save_dir, save_path=save_path)
progress(downloaded="Downloaded")
track.path = final_save_path
events.emit(events.Types.TRACK_DOWNLOADED, track=track)
return
name_len = len(str(total_segments)) progress(total=total_segments, completed=0, downloaded="Merging")
discon_i = 0
range_offset = 0
map_data: Optional[tuple[m3u8.model.InitializationSection, bytes]] = None
if session_drm:
encryption_data: Optional[tuple[Optional[m3u8.Key], DRM_T]] = (initial_drm_key, session_drm)
else:
encryption_data: Optional[tuple[Optional[m3u8.Key], DRM_T]] = None
i = -1 name_len = len(str(total_segments))
for real_i, segment in enumerate(master.segments): discon_i = 0
if segment not in unwanted_segments: range_offset = 0
i += 1 map_data: Optional[tuple[m3u8.model.InitializationSection, bytes]] = None
if session_drm:
encryption_data: Optional[tuple[Optional[m3u8.Key], DRM_T]] = (initial_drm_key, session_drm)
else:
encryption_data: Optional[tuple[Optional[m3u8.Key], DRM_T]] = None
is_last_segment = (real_i + 1) == len(master.segments) i = -1
for real_i, segment in enumerate(master.segments):
if segment not in unwanted_segments:
i += 1
def merge(to: Path, via: list[Path], delete: bool = False, include_map_data: bool = False): is_last_segment = (real_i + 1) == len(master.segments)
"""
Merge all files to a given path, optionally including map data.
Parameters: def merge(to: Path, via: list[Path], delete: bool = False, include_map_data: bool = False):
to: The output file with all merged data. """
via: List of files to merge, in sequence. Merge all files to a given path, optionally including map data.
delete: Delete the file once it's been merged.
include_map_data: Whether to include the init map data.
"""
with open(to, "wb") as x:
if include_map_data and map_data and map_data[1]:
x.write(map_data[1])
for file in via:
x.write(file.read_bytes())
x.flush()
if delete:
file.unlink()
def decrypt(include_this_segment: bool) -> Path: Parameters:
""" to: The output file with all merged data.
Decrypt all segments that uses the currently set DRM. via: List of files to merge, in sequence.
delete: Delete the file once it's been merged.
include_map_data: Whether to include the init map data.
"""
with open(to, "wb") as x:
if include_map_data and map_data and map_data[1]:
x.write(map_data[1])
for file in via:
x.write(file.read_bytes())
x.flush()
if delete:
file.unlink()
All segments that will be decrypted with this DRM will be merged together def decrypt(include_this_segment: bool) -> Path:
in sequence, prefixed with the init data (if any), and then deleted. Once """
merged they will be decrypted. The merged and decrypted file names state Decrypt all segments that uses the currently set DRM.
the range of segments that were used.
Parameters: All segments that will be decrypted with this DRM will be merged together
include_this_segment: Whether to include the current segment in the in sequence, prefixed with the init data (if any), and then deleted. Once
list of segments to merge and decrypt. This should be False if merged they will be decrypted. The merged and decrypted file names state
decrypting on EXT-X-KEY changes, or True when decrypting on the the range of segments that were used.
last segment.
Returns the decrypted path. Parameters:
""" include_this_segment: Whether to include the current segment in the
drm = encryption_data[1] list of segments to merge and decrypt. This should be False if
first_segment_i = next( decrypting on EXT-X-KEY changes, or True when decrypting on the
int(file.stem) for file in sorted(segment_save_dir.iterdir()) if file.stem.isdigit() last segment.
)
last_segment_i = max(0, i - int(not include_this_segment))
range_len = (last_segment_i - first_segment_i) + 1
segment_range = f"{str(first_segment_i).zfill(name_len)}-{str(last_segment_i).zfill(name_len)}" Returns the decrypted path.
merged_path = ( """
segment_save_dir / f"{segment_range}{get_extension(master.segments[last_segment_i].uri)}" drm = encryption_data[1]
) first_segment_i = next(
decrypted_path = segment_save_dir / f"{merged_path.stem}_decrypted{merged_path.suffix}" int(file.stem) for file in sorted(segment_save_dir.iterdir()) if file.stem.isdigit()
)
last_segment_i = max(0, i - int(not include_this_segment))
range_len = (last_segment_i - first_segment_i) + 1
files = [ segment_range = f"{str(first_segment_i).zfill(name_len)}-{str(last_segment_i).zfill(name_len)}"
file merged_path = segment_save_dir / f"{segment_range}{get_extension(master.segments[last_segment_i].uri)}"
for file in sorted(segment_save_dir.iterdir()) decrypted_path = segment_save_dir / f"{merged_path.stem}_decrypted{merged_path.suffix}"
if file.stem.isdigit() and first_segment_i <= int(file.stem) <= last_segment_i
]
if not files:
raise ValueError(f"None of the segment files for {segment_range} exist...")
elif len(files) != range_len:
raise ValueError(f"Missing {range_len - len(files)} segment files for {segment_range}...")
if isinstance(drm, (Widevine, PlayReady)): files = [
# with widevine we can merge all segments and decrypt once file
merge(to=merged_path, via=files, delete=True, include_map_data=True) for file in sorted(segment_save_dir.iterdir())
drm.decrypt(merged_path) if file.stem.isdigit() and first_segment_i <= int(file.stem) <= last_segment_i
merged_path.rename(decrypted_path) ]
else: if not files:
# with other drm we must decrypt separately and then merge them raise ValueError(f"None of the segment files for {segment_range} exist...")
# for aes this is because each segment likely has 16-byte padding elif len(files) != range_len:
for file in files: raise ValueError(f"Missing {range_len - len(files)} segment files for {segment_range}...")
drm.decrypt(file)
merge(to=merged_path, via=files, delete=True, include_map_data=True)
events.emit(events.Types.TRACK_DECRYPTED, track=track, drm=drm, segment=decrypted_path) if isinstance(drm, (Widevine, PlayReady)):
# with widevine we can merge all segments and decrypt once
merge(to=merged_path, via=files, delete=True, include_map_data=True)
drm.decrypt(merged_path)
merged_path.rename(decrypted_path)
else:
# with other drm we must decrypt separately and then merge them
# for aes this is because each segment likely has 16-byte padding
for file in files:
drm.decrypt(file)
merge(to=merged_path, via=files, delete=True, include_map_data=True)
return decrypted_path events.emit(events.Types.TRACK_DECRYPTED, track=track, drm=drm, segment=decrypted_path)
def merge_discontinuity(include_this_segment: bool, include_map_data: bool = True): return decrypted_path
"""
Merge all segments of the discontinuity.
All segment files for this discontinuity must already be downloaded and def merge_discontinuity(include_this_segment: bool, include_map_data: bool = True):
already decrypted (if it needs to be decrypted). """
Merge all segments of the discontinuity.
Parameters: All segment files for this discontinuity must already be downloaded and
include_this_segment: Whether to include the current segment in the already decrypted (if it needs to be decrypted).
list of segments to merge and decrypt. This should be False if
decrypting on EXT-X-KEY changes, or True when decrypting on the
last segment.
include_map_data: Whether to prepend the init map data before the
segment files when merging.
"""
last_segment_i = max(0, i - int(not include_this_segment))
files = [ Parameters:
file include_this_segment: Whether to include the current segment in the
for file in sorted(segment_save_dir.iterdir()) list of segments to merge and decrypt. This should be False if
if int(file.stem.replace("_decrypted", "").split("-")[-1]) <= last_segment_i decrypting on EXT-X-KEY changes, or True when decrypting on the
] last segment.
if files: include_map_data: Whether to prepend the init map data before the
to_dir = segment_save_dir.parent segment files when merging.
to_path = to_dir / f"{str(discon_i).zfill(name_len)}{files[-1].suffix}" """
merge(to=to_path, via=files, delete=True, include_map_data=include_map_data) last_segment_i = max(0, i - int(not include_this_segment))
if segment not in unwanted_segments: files = [
if isinstance(track, Subtitle): file
segment_file_ext = get_extension(segment.uri) for file in sorted(segment_save_dir.iterdir())
segment_file_path = segment_save_dir / f"{str(i).zfill(name_len)}{segment_file_ext}" if int(file.stem.replace("_decrypted", "").split("-")[-1]) <= last_segment_i
segment_data = try_ensure_utf8(segment_file_path.read_bytes()) ]
if track.codec not in (Subtitle.Codec.fVTT, Subtitle.Codec.fTTML): if files:
segment_data = ( to_dir = segment_save_dir.parent
segment_data.decode("utf8") to_path = to_dir / f"{str(discon_i).zfill(name_len)}{files[-1].suffix}"
.replace("&lrm;", html.unescape("&lrm;")) merge(to=to_path, via=files, delete=True, include_map_data=include_map_data)
.replace("&rlm;", html.unescape("&rlm;"))
.encode("utf8")
)
segment_file_path.write_bytes(segment_data)
if segment.discontinuity and i != 0: if segment not in unwanted_segments:
if encryption_data: if isinstance(track, Subtitle):
decrypt(include_this_segment=False) segment_file_ext = get_extension(segment.uri)
merge_discontinuity( segment_file_path = segment_save_dir / f"{str(i).zfill(name_len)}{segment_file_ext}"
include_this_segment=False, include_map_data=not encryption_data or not encryption_data[1] segment_data = try_ensure_utf8(segment_file_path.read_bytes())
if track.codec not in (Subtitle.Codec.fVTT, Subtitle.Codec.fTTML):
segment_data = (
segment_data.decode("utf8")
.replace("&lrm;", html.unescape("&lrm;"))
.replace("&rlm;", html.unescape("&rlm;"))
.encode("utf8")
) )
segment_file_path.write_bytes(segment_data)
discon_i += 1 if segment.discontinuity and i != 0:
range_offset = 0 # TODO: Should this be reset or not?
map_data = None
if encryption_data:
encryption_data = (encryption_data[0], encryption_data[1])
if segment.init_section and (not map_data or segment.init_section != map_data[0]):
if segment.init_section.byterange:
init_byte_range = HLS.calculate_byte_range(segment.init_section.byterange, range_offset)
range_offset = init_byte_range.split("-")[0]
init_range_header = {"Range": f"bytes={init_byte_range}"}
else:
init_range_header = {}
# Handle both session types for init section request
res = session.get(
url=urljoin(segment.init_section.base_uri, segment.init_section.uri),
headers=init_range_header,
)
# Check response based on session type
if isinstance(res, requests.Response) or isinstance(res, CurlResponse):
res.raise_for_status()
init_content = res.content
else:
raise TypeError(
f"Expected response to be requests.Response or curl_cffi.Response, not {type(res)}"
)
map_data = (segment.init_section, init_content)
segment_keys = getattr(segment, "keys", None)
if segment_keys:
if cdm:
cdm_segment_keys = HLS.filter_keys_for_cdm(segment_keys, cdm)
key = HLS.get_supported_key(cdm_segment_keys) if cdm_segment_keys else HLS.get_supported_key(segment_keys)
else:
key = HLS.get_supported_key(segment_keys)
if encryption_data and encryption_data[0] != key and i != 0 and segment not in unwanted_segments:
decrypt(include_this_segment=False)
if key is None:
encryption_data = None
elif not encryption_data or encryption_data[0] != key:
drm = HLS.get_drm(key, session)
if isinstance(drm, (Widevine, PlayReady)):
try:
if map_data:
track_kid = track.get_key_id(map_data[1])
else:
track_kid = None
if not track_kid:
track_kid = drm.kid
progress(downloaded="LICENSING")
license_widevine(drm, track_kid=track_kid)
progress(downloaded="[yellow]LICENSED")
except Exception: # noqa
DOWNLOAD_CANCELLED.set() # skip pending track downloads
progress(downloaded="[red]FAILED")
raise
encryption_data = (key, drm)
if DOWNLOAD_LICENCE_ONLY.is_set():
continue
if is_last_segment:
# required as it won't end with EXT-X-DISCONTINUITY nor a new key
if encryption_data: if encryption_data:
decrypt(include_this_segment=True) decrypt(include_this_segment=False)
merge_discontinuity( merge_discontinuity(
include_this_segment=True, include_map_data=not encryption_data or not encryption_data[1] include_this_segment=False, include_map_data=not encryption_data or not encryption_data[1]
) )
progress(advance=1) discon_i += 1
range_offset = 0 # TODO: Should this be reset or not?
map_data = None
if encryption_data:
encryption_data = (encryption_data[0], encryption_data[1])
if segment.init_section and (not map_data or segment.init_section != map_data[0]):
if segment.init_section.byterange:
init_byte_range = HLS.calculate_byte_range(segment.init_section.byterange, range_offset)
range_offset = init_byte_range.split("-")[0]
init_range_header = {"Range": f"bytes={init_byte_range}"}
else:
init_range_header = {}
# Handle both session types for init section request
res = session.get(
url=urljoin(segment.init_section.base_uri, segment.init_section.uri),
headers=init_range_header,
)
# Check response based on session type
if isinstance(res, requests.Response) or isinstance(res, CurlResponse):
res.raise_for_status()
init_content = res.content
else:
raise TypeError(
f"Expected response to be requests.Response or curl_cffi.Response, not {type(res)}"
)
map_data = (segment.init_section, init_content)
segment_keys = getattr(segment, "keys", None)
if segment_keys:
if cdm:
cdm_segment_keys = HLS.filter_keys_for_cdm(segment_keys, cdm)
key = (
HLS.get_supported_key(cdm_segment_keys)
if cdm_segment_keys
else HLS.get_supported_key(segment_keys)
)
else:
key = HLS.get_supported_key(segment_keys)
if encryption_data and encryption_data[0] != key and i != 0 and segment not in unwanted_segments:
decrypt(include_this_segment=False)
if key is None:
encryption_data = None
elif not encryption_data or encryption_data[0] != key:
drm = HLS.get_drm(key, session)
if isinstance(drm, (Widevine, PlayReady)):
try:
if map_data:
track_kid = track.get_key_id(map_data[1])
else:
track_kid = None
if not track_kid:
track_kid = drm.kid
progress(downloaded="LICENSING")
license_widevine(drm, track_kid=track_kid)
progress(downloaded="[yellow]LICENSED")
except Exception: # noqa
DOWNLOAD_CANCELLED.set() # skip pending track downloads
progress(downloaded="[red]FAILED")
raise
encryption_data = (key, drm)
if DOWNLOAD_LICENCE_ONLY.is_set():
continue
if is_last_segment:
# required as it won't end with EXT-X-DISCONTINUITY nor a new key
if encryption_data:
decrypt(include_this_segment=True)
merge_discontinuity(
include_this_segment=True, include_map_data=not encryption_data or not encryption_data[1]
)
progress(advance=1)
if DOWNLOAD_LICENCE_ONLY.is_set(): if DOWNLOAD_LICENCE_ONLY.is_set():
return return

View File

@@ -168,6 +168,16 @@ def merge_segmented_webvtt(vtt_raw: str, segment_durations: Optional[list[int]]
duplicate_index: list[int] = [] duplicate_index: list[int] = []
captions = vtt.get_captions(lang) captions = vtt.get_captions(lang)
# Some providers can produce "segment_index" values that are
# outside the provided segment_durations list after normalization/merge.
# This used to crash with IndexError and abort the entire download.
if segment_durations and captions:
max_idx = max(getattr(c, "segment_index", 0) for c in captions)
if max_idx >= len(segment_durations):
# Pad with the last known duration (or 0 if empty) so indexing is safe.
pad_val = segment_durations[-1] if segment_durations else 0
segment_durations = segment_durations + [pad_val] * (max_idx - len(segment_durations) + 1)
if captions[0].segment_index == 0: if captions[0].segment_index == 0:
first_segment_mpegts = captions[0].mpegts first_segment_mpegts = captions[0].mpegts
else: else:
@@ -179,6 +189,9 @@ def merge_segmented_webvtt(vtt_raw: str, segment_durations: Optional[list[int]]
# calculate the timestamp from SegmentTemplate/SegmentList duration. # calculate the timestamp from SegmentTemplate/SegmentList duration.
likely_dash = first_segment_mpegts == 0 and caption.mpegts == 0 likely_dash = first_segment_mpegts == 0 and caption.mpegts == 0
if likely_dash and segment_durations: if likely_dash and segment_durations:
# Defensive: segment_index can still be out of range if captions are malformed.
if caption.segment_index < 0 or caption.segment_index >= len(segment_durations):
continue
duration = segment_durations[caption.segment_index] duration = segment_durations[caption.segment_index]
caption.mpegts = MPEG_TIMESCALE * (duration / timescale) caption.mpegts = MPEG_TIMESCALE * (duration / timescale)