forked from kenzuya/unshackle
fix(hls): finalize n_m3u8dl_re outputs
- Add a small helper to move N_m3u8DL-RE final outputs into the expected temp path (preserve actual suffix) and keep subtitle codec consistent with the produced file. - Skip generic HLS segment merging when N_m3u8DL-RE is in use to avoid mixing in sidecar files and reduce Windows file-lock issues. - Harden segmented WebVTT merging to avoid IndexError when caption segment indexes exceed the provided duration list.
This commit is contained in:
@@ -225,6 +225,39 @@ class HLS:
|
||||
|
||||
return tracks
|
||||
|
||||
@staticmethod
|
||||
def _finalize_n_m3u8dl_re_output(*, track: AnyTrack, save_dir: Path, save_path: Path) -> Path:
|
||||
"""
|
||||
Finalize output from N_m3u8DL-RE.
|
||||
|
||||
We call N_m3u8DL-RE with `--save-name track.id`, so the final file should be `{track.id}.*` under `save_dir`.
|
||||
This moves that output to `save_path` (preserving the real suffix) and, for subtitles, updates `track.codec`
|
||||
to match the produced file extension.
|
||||
"""
|
||||
matches = [p for p in save_dir.rglob(f"{track.id}.*") if p.is_file()]
|
||||
if not matches:
|
||||
raise FileNotFoundError(f"No output files produced by N_m3u8DL-RE for save-name={track.id} in: {save_dir}")
|
||||
|
||||
primary = max(matches, key=lambda p: p.stat().st_size)
|
||||
|
||||
final_save_path = save_path.with_suffix(primary.suffix) if primary.suffix else save_path
|
||||
|
||||
final_save_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
if primary.absolute() != final_save_path.absolute():
|
||||
final_save_path.unlink(missing_ok=True)
|
||||
shutil.move(str(primary), str(final_save_path))
|
||||
|
||||
if isinstance(track, Subtitle):
|
||||
ext = final_save_path.suffix.lower().lstrip(".")
|
||||
try:
|
||||
track.codec = Subtitle.Codec.from_mime(ext)
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
shutil.rmtree(save_dir, ignore_errors=True)
|
||||
|
||||
return final_save_path
|
||||
|
||||
@staticmethod
|
||||
def download_track(
|
||||
track: AnyTrack,
|
||||
@@ -420,222 +453,230 @@ class HLS:
|
||||
for control_file in segment_save_dir.glob("*.aria2__temp"):
|
||||
control_file.unlink()
|
||||
|
||||
if not skip_merge:
|
||||
progress(total=total_segments, completed=0, downloaded="Merging")
|
||||
if skip_merge:
|
||||
final_save_path = HLS._finalize_n_m3u8dl_re_output(track=track, save_dir=save_dir, save_path=save_path)
|
||||
progress(downloaded="Downloaded")
|
||||
track.path = final_save_path
|
||||
events.emit(events.Types.TRACK_DOWNLOADED, track=track)
|
||||
return
|
||||
|
||||
name_len = len(str(total_segments))
|
||||
discon_i = 0
|
||||
range_offset = 0
|
||||
map_data: Optional[tuple[m3u8.model.InitializationSection, bytes]] = None
|
||||
if session_drm:
|
||||
encryption_data: Optional[tuple[Optional[m3u8.Key], DRM_T]] = (initial_drm_key, session_drm)
|
||||
else:
|
||||
encryption_data: Optional[tuple[Optional[m3u8.Key], DRM_T]] = None
|
||||
progress(total=total_segments, completed=0, downloaded="Merging")
|
||||
|
||||
i = -1
|
||||
for real_i, segment in enumerate(master.segments):
|
||||
if segment not in unwanted_segments:
|
||||
i += 1
|
||||
name_len = len(str(total_segments))
|
||||
discon_i = 0
|
||||
range_offset = 0
|
||||
map_data: Optional[tuple[m3u8.model.InitializationSection, bytes]] = None
|
||||
if session_drm:
|
||||
encryption_data: Optional[tuple[Optional[m3u8.Key], DRM_T]] = (initial_drm_key, session_drm)
|
||||
else:
|
||||
encryption_data: Optional[tuple[Optional[m3u8.Key], DRM_T]] = None
|
||||
|
||||
is_last_segment = (real_i + 1) == len(master.segments)
|
||||
i = -1
|
||||
for real_i, segment in enumerate(master.segments):
|
||||
if segment not in unwanted_segments:
|
||||
i += 1
|
||||
|
||||
def merge(to: Path, via: list[Path], delete: bool = False, include_map_data: bool = False):
|
||||
"""
|
||||
Merge all files to a given path, optionally including map data.
|
||||
is_last_segment = (real_i + 1) == len(master.segments)
|
||||
|
||||
Parameters:
|
||||
to: The output file with all merged data.
|
||||
via: List of files to merge, in sequence.
|
||||
delete: Delete the file once it's been merged.
|
||||
include_map_data: Whether to include the init map data.
|
||||
"""
|
||||
with open(to, "wb") as x:
|
||||
if include_map_data and map_data and map_data[1]:
|
||||
x.write(map_data[1])
|
||||
for file in via:
|
||||
x.write(file.read_bytes())
|
||||
x.flush()
|
||||
if delete:
|
||||
file.unlink()
|
||||
def merge(to: Path, via: list[Path], delete: bool = False, include_map_data: bool = False):
|
||||
"""
|
||||
Merge all files to a given path, optionally including map data.
|
||||
|
||||
def decrypt(include_this_segment: bool) -> Path:
|
||||
"""
|
||||
Decrypt all segments that uses the currently set DRM.
|
||||
Parameters:
|
||||
to: The output file with all merged data.
|
||||
via: List of files to merge, in sequence.
|
||||
delete: Delete the file once it's been merged.
|
||||
include_map_data: Whether to include the init map data.
|
||||
"""
|
||||
with open(to, "wb") as x:
|
||||
if include_map_data and map_data and map_data[1]:
|
||||
x.write(map_data[1])
|
||||
for file in via:
|
||||
x.write(file.read_bytes())
|
||||
x.flush()
|
||||
if delete:
|
||||
file.unlink()
|
||||
|
||||
All segments that will be decrypted with this DRM will be merged together
|
||||
in sequence, prefixed with the init data (if any), and then deleted. Once
|
||||
merged they will be decrypted. The merged and decrypted file names state
|
||||
the range of segments that were used.
|
||||
def decrypt(include_this_segment: bool) -> Path:
|
||||
"""
|
||||
Decrypt all segments that uses the currently set DRM.
|
||||
|
||||
Parameters:
|
||||
include_this_segment: Whether to include the current segment in the
|
||||
list of segments to merge and decrypt. This should be False if
|
||||
decrypting on EXT-X-KEY changes, or True when decrypting on the
|
||||
last segment.
|
||||
All segments that will be decrypted with this DRM will be merged together
|
||||
in sequence, prefixed with the init data (if any), and then deleted. Once
|
||||
merged they will be decrypted. The merged and decrypted file names state
|
||||
the range of segments that were used.
|
||||
|
||||
Returns the decrypted path.
|
||||
"""
|
||||
drm = encryption_data[1]
|
||||
first_segment_i = next(
|
||||
int(file.stem) for file in sorted(segment_save_dir.iterdir()) if file.stem.isdigit()
|
||||
)
|
||||
last_segment_i = max(0, i - int(not include_this_segment))
|
||||
range_len = (last_segment_i - first_segment_i) + 1
|
||||
Parameters:
|
||||
include_this_segment: Whether to include the current segment in the
|
||||
list of segments to merge and decrypt. This should be False if
|
||||
decrypting on EXT-X-KEY changes, or True when decrypting on the
|
||||
last segment.
|
||||
|
||||
segment_range = f"{str(first_segment_i).zfill(name_len)}-{str(last_segment_i).zfill(name_len)}"
|
||||
merged_path = (
|
||||
segment_save_dir / f"{segment_range}{get_extension(master.segments[last_segment_i].uri)}"
|
||||
)
|
||||
decrypted_path = segment_save_dir / f"{merged_path.stem}_decrypted{merged_path.suffix}"
|
||||
Returns the decrypted path.
|
||||
"""
|
||||
drm = encryption_data[1]
|
||||
first_segment_i = next(
|
||||
int(file.stem) for file in sorted(segment_save_dir.iterdir()) if file.stem.isdigit()
|
||||
)
|
||||
last_segment_i = max(0, i - int(not include_this_segment))
|
||||
range_len = (last_segment_i - first_segment_i) + 1
|
||||
|
||||
files = [
|
||||
file
|
||||
for file in sorted(segment_save_dir.iterdir())
|
||||
if file.stem.isdigit() and first_segment_i <= int(file.stem) <= last_segment_i
|
||||
]
|
||||
if not files:
|
||||
raise ValueError(f"None of the segment files for {segment_range} exist...")
|
||||
elif len(files) != range_len:
|
||||
raise ValueError(f"Missing {range_len - len(files)} segment files for {segment_range}...")
|
||||
segment_range = f"{str(first_segment_i).zfill(name_len)}-{str(last_segment_i).zfill(name_len)}"
|
||||
merged_path = segment_save_dir / f"{segment_range}{get_extension(master.segments[last_segment_i].uri)}"
|
||||
decrypted_path = segment_save_dir / f"{merged_path.stem}_decrypted{merged_path.suffix}"
|
||||
|
||||
if isinstance(drm, (Widevine, PlayReady)):
|
||||
# with widevine we can merge all segments and decrypt once
|
||||
merge(to=merged_path, via=files, delete=True, include_map_data=True)
|
||||
drm.decrypt(merged_path)
|
||||
merged_path.rename(decrypted_path)
|
||||
else:
|
||||
# with other drm we must decrypt separately and then merge them
|
||||
# for aes this is because each segment likely has 16-byte padding
|
||||
for file in files:
|
||||
drm.decrypt(file)
|
||||
merge(to=merged_path, via=files, delete=True, include_map_data=True)
|
||||
files = [
|
||||
file
|
||||
for file in sorted(segment_save_dir.iterdir())
|
||||
if file.stem.isdigit() and first_segment_i <= int(file.stem) <= last_segment_i
|
||||
]
|
||||
if not files:
|
||||
raise ValueError(f"None of the segment files for {segment_range} exist...")
|
||||
elif len(files) != range_len:
|
||||
raise ValueError(f"Missing {range_len - len(files)} segment files for {segment_range}...")
|
||||
|
||||
events.emit(events.Types.TRACK_DECRYPTED, track=track, drm=drm, segment=decrypted_path)
|
||||
if isinstance(drm, (Widevine, PlayReady)):
|
||||
# with widevine we can merge all segments and decrypt once
|
||||
merge(to=merged_path, via=files, delete=True, include_map_data=True)
|
||||
drm.decrypt(merged_path)
|
||||
merged_path.rename(decrypted_path)
|
||||
else:
|
||||
# with other drm we must decrypt separately and then merge them
|
||||
# for aes this is because each segment likely has 16-byte padding
|
||||
for file in files:
|
||||
drm.decrypt(file)
|
||||
merge(to=merged_path, via=files, delete=True, include_map_data=True)
|
||||
|
||||
return decrypted_path
|
||||
events.emit(events.Types.TRACK_DECRYPTED, track=track, drm=drm, segment=decrypted_path)
|
||||
|
||||
def merge_discontinuity(include_this_segment: bool, include_map_data: bool = True):
|
||||
"""
|
||||
Merge all segments of the discontinuity.
|
||||
return decrypted_path
|
||||
|
||||
All segment files for this discontinuity must already be downloaded and
|
||||
already decrypted (if it needs to be decrypted).
|
||||
def merge_discontinuity(include_this_segment: bool, include_map_data: bool = True):
|
||||
"""
|
||||
Merge all segments of the discontinuity.
|
||||
|
||||
Parameters:
|
||||
include_this_segment: Whether to include the current segment in the
|
||||
list of segments to merge and decrypt. This should be False if
|
||||
decrypting on EXT-X-KEY changes, or True when decrypting on the
|
||||
last segment.
|
||||
include_map_data: Whether to prepend the init map data before the
|
||||
segment files when merging.
|
||||
"""
|
||||
last_segment_i = max(0, i - int(not include_this_segment))
|
||||
All segment files for this discontinuity must already be downloaded and
|
||||
already decrypted (if it needs to be decrypted).
|
||||
|
||||
files = [
|
||||
file
|
||||
for file in sorted(segment_save_dir.iterdir())
|
||||
if int(file.stem.replace("_decrypted", "").split("-")[-1]) <= last_segment_i
|
||||
]
|
||||
if files:
|
||||
to_dir = segment_save_dir.parent
|
||||
to_path = to_dir / f"{str(discon_i).zfill(name_len)}{files[-1].suffix}"
|
||||
merge(to=to_path, via=files, delete=True, include_map_data=include_map_data)
|
||||
Parameters:
|
||||
include_this_segment: Whether to include the current segment in the
|
||||
list of segments to merge and decrypt. This should be False if
|
||||
decrypting on EXT-X-KEY changes, or True when decrypting on the
|
||||
last segment.
|
||||
include_map_data: Whether to prepend the init map data before the
|
||||
segment files when merging.
|
||||
"""
|
||||
last_segment_i = max(0, i - int(not include_this_segment))
|
||||
|
||||
if segment not in unwanted_segments:
|
||||
if isinstance(track, Subtitle):
|
||||
segment_file_ext = get_extension(segment.uri)
|
||||
segment_file_path = segment_save_dir / f"{str(i).zfill(name_len)}{segment_file_ext}"
|
||||
segment_data = try_ensure_utf8(segment_file_path.read_bytes())
|
||||
if track.codec not in (Subtitle.Codec.fVTT, Subtitle.Codec.fTTML):
|
||||
segment_data = (
|
||||
segment_data.decode("utf8")
|
||||
.replace("‎", html.unescape("‎"))
|
||||
.replace("‏", html.unescape("‏"))
|
||||
.encode("utf8")
|
||||
)
|
||||
segment_file_path.write_bytes(segment_data)
|
||||
files = [
|
||||
file
|
||||
for file in sorted(segment_save_dir.iterdir())
|
||||
if int(file.stem.replace("_decrypted", "").split("-")[-1]) <= last_segment_i
|
||||
]
|
||||
if files:
|
||||
to_dir = segment_save_dir.parent
|
||||
to_path = to_dir / f"{str(discon_i).zfill(name_len)}{files[-1].suffix}"
|
||||
merge(to=to_path, via=files, delete=True, include_map_data=include_map_data)
|
||||
|
||||
if segment.discontinuity and i != 0:
|
||||
if encryption_data:
|
||||
decrypt(include_this_segment=False)
|
||||
merge_discontinuity(
|
||||
include_this_segment=False, include_map_data=not encryption_data or not encryption_data[1]
|
||||
if segment not in unwanted_segments:
|
||||
if isinstance(track, Subtitle):
|
||||
segment_file_ext = get_extension(segment.uri)
|
||||
segment_file_path = segment_save_dir / f"{str(i).zfill(name_len)}{segment_file_ext}"
|
||||
segment_data = try_ensure_utf8(segment_file_path.read_bytes())
|
||||
if track.codec not in (Subtitle.Codec.fVTT, Subtitle.Codec.fTTML):
|
||||
segment_data = (
|
||||
segment_data.decode("utf8")
|
||||
.replace("‎", html.unescape("‎"))
|
||||
.replace("‏", html.unescape("‏"))
|
||||
.encode("utf8")
|
||||
)
|
||||
segment_file_path.write_bytes(segment_data)
|
||||
|
||||
discon_i += 1
|
||||
range_offset = 0 # TODO: Should this be reset or not?
|
||||
map_data = None
|
||||
if encryption_data:
|
||||
encryption_data = (encryption_data[0], encryption_data[1])
|
||||
|
||||
if segment.init_section and (not map_data or segment.init_section != map_data[0]):
|
||||
if segment.init_section.byterange:
|
||||
init_byte_range = HLS.calculate_byte_range(segment.init_section.byterange, range_offset)
|
||||
range_offset = init_byte_range.split("-")[0]
|
||||
init_range_header = {"Range": f"bytes={init_byte_range}"}
|
||||
else:
|
||||
init_range_header = {}
|
||||
|
||||
# Handle both session types for init section request
|
||||
res = session.get(
|
||||
url=urljoin(segment.init_section.base_uri, segment.init_section.uri),
|
||||
headers=init_range_header,
|
||||
)
|
||||
|
||||
# Check response based on session type
|
||||
if isinstance(res, requests.Response) or isinstance(res, CurlResponse):
|
||||
res.raise_for_status()
|
||||
init_content = res.content
|
||||
else:
|
||||
raise TypeError(
|
||||
f"Expected response to be requests.Response or curl_cffi.Response, not {type(res)}"
|
||||
)
|
||||
|
||||
map_data = (segment.init_section, init_content)
|
||||
|
||||
segment_keys = getattr(segment, "keys", None)
|
||||
if segment_keys:
|
||||
if cdm:
|
||||
cdm_segment_keys = HLS.filter_keys_for_cdm(segment_keys, cdm)
|
||||
key = HLS.get_supported_key(cdm_segment_keys) if cdm_segment_keys else HLS.get_supported_key(segment_keys)
|
||||
else:
|
||||
key = HLS.get_supported_key(segment_keys)
|
||||
if encryption_data and encryption_data[0] != key and i != 0 and segment not in unwanted_segments:
|
||||
decrypt(include_this_segment=False)
|
||||
|
||||
if key is None:
|
||||
encryption_data = None
|
||||
elif not encryption_data or encryption_data[0] != key:
|
||||
drm = HLS.get_drm(key, session)
|
||||
if isinstance(drm, (Widevine, PlayReady)):
|
||||
try:
|
||||
if map_data:
|
||||
track_kid = track.get_key_id(map_data[1])
|
||||
else:
|
||||
track_kid = None
|
||||
if not track_kid:
|
||||
track_kid = drm.kid
|
||||
progress(downloaded="LICENSING")
|
||||
license_widevine(drm, track_kid=track_kid)
|
||||
progress(downloaded="[yellow]LICENSED")
|
||||
except Exception: # noqa
|
||||
DOWNLOAD_CANCELLED.set() # skip pending track downloads
|
||||
progress(downloaded="[red]FAILED")
|
||||
raise
|
||||
encryption_data = (key, drm)
|
||||
|
||||
if DOWNLOAD_LICENCE_ONLY.is_set():
|
||||
continue
|
||||
|
||||
if is_last_segment:
|
||||
# required as it won't end with EXT-X-DISCONTINUITY nor a new key
|
||||
if segment.discontinuity and i != 0:
|
||||
if encryption_data:
|
||||
decrypt(include_this_segment=True)
|
||||
decrypt(include_this_segment=False)
|
||||
merge_discontinuity(
|
||||
include_this_segment=True, include_map_data=not encryption_data or not encryption_data[1]
|
||||
include_this_segment=False, include_map_data=not encryption_data or not encryption_data[1]
|
||||
)
|
||||
|
||||
progress(advance=1)
|
||||
discon_i += 1
|
||||
range_offset = 0 # TODO: Should this be reset or not?
|
||||
map_data = None
|
||||
if encryption_data:
|
||||
encryption_data = (encryption_data[0], encryption_data[1])
|
||||
|
||||
if segment.init_section and (not map_data or segment.init_section != map_data[0]):
|
||||
if segment.init_section.byterange:
|
||||
init_byte_range = HLS.calculate_byte_range(segment.init_section.byterange, range_offset)
|
||||
range_offset = init_byte_range.split("-")[0]
|
||||
init_range_header = {"Range": f"bytes={init_byte_range}"}
|
||||
else:
|
||||
init_range_header = {}
|
||||
|
||||
# Handle both session types for init section request
|
||||
res = session.get(
|
||||
url=urljoin(segment.init_section.base_uri, segment.init_section.uri),
|
||||
headers=init_range_header,
|
||||
)
|
||||
|
||||
# Check response based on session type
|
||||
if isinstance(res, requests.Response) or isinstance(res, CurlResponse):
|
||||
res.raise_for_status()
|
||||
init_content = res.content
|
||||
else:
|
||||
raise TypeError(
|
||||
f"Expected response to be requests.Response or curl_cffi.Response, not {type(res)}"
|
||||
)
|
||||
|
||||
map_data = (segment.init_section, init_content)
|
||||
|
||||
segment_keys = getattr(segment, "keys", None)
|
||||
if segment_keys:
|
||||
if cdm:
|
||||
cdm_segment_keys = HLS.filter_keys_for_cdm(segment_keys, cdm)
|
||||
key = (
|
||||
HLS.get_supported_key(cdm_segment_keys)
|
||||
if cdm_segment_keys
|
||||
else HLS.get_supported_key(segment_keys)
|
||||
)
|
||||
else:
|
||||
key = HLS.get_supported_key(segment_keys)
|
||||
if encryption_data and encryption_data[0] != key and i != 0 and segment not in unwanted_segments:
|
||||
decrypt(include_this_segment=False)
|
||||
|
||||
if key is None:
|
||||
encryption_data = None
|
||||
elif not encryption_data or encryption_data[0] != key:
|
||||
drm = HLS.get_drm(key, session)
|
||||
if isinstance(drm, (Widevine, PlayReady)):
|
||||
try:
|
||||
if map_data:
|
||||
track_kid = track.get_key_id(map_data[1])
|
||||
else:
|
||||
track_kid = None
|
||||
if not track_kid:
|
||||
track_kid = drm.kid
|
||||
progress(downloaded="LICENSING")
|
||||
license_widevine(drm, track_kid=track_kid)
|
||||
progress(downloaded="[yellow]LICENSED")
|
||||
except Exception: # noqa
|
||||
DOWNLOAD_CANCELLED.set() # skip pending track downloads
|
||||
progress(downloaded="[red]FAILED")
|
||||
raise
|
||||
encryption_data = (key, drm)
|
||||
|
||||
if DOWNLOAD_LICENCE_ONLY.is_set():
|
||||
continue
|
||||
|
||||
if is_last_segment:
|
||||
# required as it won't end with EXT-X-DISCONTINUITY nor a new key
|
||||
if encryption_data:
|
||||
decrypt(include_this_segment=True)
|
||||
merge_discontinuity(
|
||||
include_this_segment=True, include_map_data=not encryption_data or not encryption_data[1]
|
||||
)
|
||||
|
||||
progress(advance=1)
|
||||
|
||||
if DOWNLOAD_LICENCE_ONLY.is_set():
|
||||
return
|
||||
|
||||
@@ -168,6 +168,16 @@ def merge_segmented_webvtt(vtt_raw: str, segment_durations: Optional[list[int]]
|
||||
duplicate_index: list[int] = []
|
||||
captions = vtt.get_captions(lang)
|
||||
|
||||
# Some providers can produce "segment_index" values that are
|
||||
# outside the provided segment_durations list after normalization/merge.
|
||||
# This used to crash with IndexError and abort the entire download.
|
||||
if segment_durations and captions:
|
||||
max_idx = max(getattr(c, "segment_index", 0) for c in captions)
|
||||
if max_idx >= len(segment_durations):
|
||||
# Pad with the last known duration (or 0 if empty) so indexing is safe.
|
||||
pad_val = segment_durations[-1] if segment_durations else 0
|
||||
segment_durations = segment_durations + [pad_val] * (max_idx - len(segment_durations) + 1)
|
||||
|
||||
if captions[0].segment_index == 0:
|
||||
first_segment_mpegts = captions[0].mpegts
|
||||
else:
|
||||
@@ -179,6 +189,9 @@ def merge_segmented_webvtt(vtt_raw: str, segment_durations: Optional[list[int]]
|
||||
# calculate the timestamp from SegmentTemplate/SegmentList duration.
|
||||
likely_dash = first_segment_mpegts == 0 and caption.mpegts == 0
|
||||
if likely_dash and segment_durations:
|
||||
# Defensive: segment_index can still be out of range if captions are malformed.
|
||||
if caption.segment_index < 0 or caption.segment_index >= len(segment_durations):
|
||||
continue
|
||||
duration = segment_durations[caption.segment_index]
|
||||
caption.mpegts = MPEG_TIMESCALE * (duration / timescale)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user