fix(dash): deduplicate multi-period SegmentBase segments

Multi-period DASH manifests using SegmentBase with shared BaseURLs were downloading the entire file once per period. Deduplicate identical segments across periods so each file is only downloaded once. Also demote multi-period log message from info to debug.
This commit is contained in:
imSp4rky
2026-04-13 16:44:35 +00:00
parent c56a92ed0c
commit 982f821f19

View File

@@ -6,7 +6,6 @@ import logging
import math
import re
import shutil
import struct
import sys
from copy import copy
from functools import partial
@@ -571,49 +570,6 @@ class DASH:
progress(downloaded="Downloaded")
@staticmethod
def _parse_sidx(data: bytes, index_range: str) -> list[str]:
"""Parse a SIDX box to extract per-segment byte ranges."""
# Find the sidx box in the data
offset = 0
while offset < len(data) - 8:
box_size = struct.unpack(">I", data[offset : offset + 4])[0]
if box_size < 8 or data[offset + 4 : offset + 8] != b"sidx":
offset += max(box_size, 8)
continue
pos = offset + 8
version = data[pos]
pos += 4 # version + flags
pos += 4 # reference_ID
pos += 4 # timescale
if version == 0:
first_offset = struct.unpack(">I", data[pos + 4 : pos + 8])[0]
pos += 8
else:
first_offset = struct.unpack(">Q", data[pos + 8 : pos + 16])[0]
pos += 16
pos += 2 # reserved
reference_count = struct.unpack(">H", data[pos : pos + 2])[0]
pos += 2
idx_end = int(index_range.split("-")[1])
current_offset = idx_end + 1 + first_offset
segments = []
for _ in range(reference_count):
ref_size = struct.unpack(">I", data[pos : pos + 4])[0] & 0x7FFFFFFF
pos += 12 # ref_info + subseg_duration + SAP fields
seg_end = current_offset + ref_size - 1
segments.append(f"{current_offset}-{seg_end}")
current_offset = seg_end + 1
return segments
return []
@staticmethod
def _is_content_period(period: Element, filtered_period_ids: list[str]) -> bool:
"""Check if a period is a valid content period (not an ad, not filtered, not trick mode)."""
@@ -814,18 +770,6 @@ class DASH:
if total_size:
media_range = f"{len(init_data)}-{total_size}"
# Parse SIDX box from indexRange to get per-segment byte ranges
index_range = segment_base.get("indexRange")
if index_range:
sidx_res = session.get(url=rep_base_url, headers={"Range": f"bytes={index_range}"})
sidx_res.raise_for_status()
sidx_segments = DASH._parse_sidx(sidx_res.content, index_range)
if sidx_segments:
for seg_range in sidx_segments:
segments.append((rep_base_url, seg_range))
else:
segments.append((rep_base_url, media_range))
else:
segments.append((rep_base_url, media_range))
elif rep_base_url:
segments.append((rep_base_url, None))