mirror of
https://github.com/unshackle-dl/unshackle.git
synced 2026-05-17 06:09:29 +00:00
feat(dl): add download resume support via HTTP Range headers
Partial downloads are now preserved across interruptions and retries. When a control file and partial data exist, the downloader sends a Range header to resume from the last byte. Falls back to full re-download if the server doesn't support Range requests (no 206).
This commit is contained in:
@@ -84,17 +84,21 @@ def download(
|
|||||||
|
|
||||||
save_dir.mkdir(parents=True, exist_ok=True)
|
save_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
if control_file.exists():
|
resume_offset = 0
|
||||||
save_path.unlink(missing_ok=True)
|
if control_file.exists() and save_path.exists():
|
||||||
|
resume_offset = save_path.stat().st_size
|
||||||
|
elif control_file.exists():
|
||||||
control_file.unlink()
|
control_file.unlink()
|
||||||
elif save_path.exists():
|
elif save_path.exists():
|
||||||
yield dict(file_downloaded=save_path, written=save_path.stat().st_size)
|
yield dict(file_downloaded=save_path, written=save_path.stat().st_size)
|
||||||
|
return
|
||||||
|
|
||||||
control_file.write_bytes(b"")
|
control_file.write_bytes(b"")
|
||||||
_time = time.time
|
_time = time.time
|
||||||
use_raw = _is_requests_session(session)
|
use_raw = _is_requests_session(session)
|
||||||
|
|
||||||
attempts = 1
|
attempts = 1
|
||||||
|
completed = False
|
||||||
try:
|
try:
|
||||||
while True:
|
while True:
|
||||||
written = 0
|
written = 0
|
||||||
@@ -102,10 +106,19 @@ def download(
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
use_rnet = _is_rnet_session(session)
|
use_rnet = _is_rnet_session(session)
|
||||||
stream = session.get(url, stream=True, **kwargs)
|
|
||||||
|
request_kwargs = dict(kwargs)
|
||||||
|
if resume_offset > 0:
|
||||||
|
req_headers = dict(request_kwargs.get("headers", {}) or {})
|
||||||
|
req_headers["Range"] = f"bytes={resume_offset}-"
|
||||||
|
request_kwargs["headers"] = req_headers
|
||||||
|
|
||||||
|
stream = session.get(url, stream=True, **request_kwargs)
|
||||||
stream.raise_for_status()
|
stream.raise_for_status()
|
||||||
|
|
||||||
# Determine content length and adaptive chunk size
|
resumed = resume_offset > 0 and stream.status_code == 206
|
||||||
|
if resume_offset > 0 and not resumed:
|
||||||
|
resume_offset = 0
|
||||||
if use_rnet:
|
if use_rnet:
|
||||||
content_length = stream.content_length or 0
|
content_length = stream.content_length or 0
|
||||||
else:
|
else:
|
||||||
@@ -117,28 +130,27 @@ def download(
|
|||||||
content_length = 0
|
content_length = 0
|
||||||
|
|
||||||
chunk_size = _adaptive_chunk_size(content_length)
|
chunk_size = _adaptive_chunk_size(content_length)
|
||||||
|
total_size = (resume_offset + content_length) if resumed and content_length > 0 else content_length
|
||||||
|
|
||||||
if not segmented:
|
if not segmented:
|
||||||
if content_length > 0:
|
if total_size > 0:
|
||||||
yield dict(total=content_length)
|
yield dict(total=total_size)
|
||||||
else:
|
else:
|
||||||
yield dict(total=None)
|
yield dict(total=None)
|
||||||
|
if resumed and resume_offset > 0:
|
||||||
|
yield dict(advance=resume_offset)
|
||||||
|
|
||||||
# Pre-allocate file when size is known (helps filesystem allocate contiguous blocks)
|
file_mode = "ab" if resumed else "wb"
|
||||||
with open(save_path, "wb", buffering=1_048_576) as f:
|
with open(save_path, file_mode, buffering=1_048_576) as f:
|
||||||
if content_length > 0:
|
if not resumed and content_length > 0:
|
||||||
f.truncate(content_length)
|
f.truncate(content_length)
|
||||||
f.seek(0)
|
f.seek(0)
|
||||||
|
|
||||||
# Cache f.write for hot loop
|
|
||||||
_write = f.write
|
_write = f.write
|
||||||
|
|
||||||
# Build chunk iterator based on session type
|
|
||||||
if use_rnet:
|
if use_rnet:
|
||||||
# rnet: native Rust streaming — 3.5x faster than curl_cffi (benchmarked)
|
|
||||||
chunks = stream.stream()
|
chunks = stream.stream()
|
||||||
elif use_raw:
|
elif use_raw:
|
||||||
# requests.Session: raw socket read — 30-35% faster than iter_content
|
|
||||||
_read = stream.raw.read
|
_read = stream.raw.read
|
||||||
|
|
||||||
def _chunks() -> Generator[bytes, None, None]:
|
def _chunks() -> Generator[bytes, None, None]:
|
||||||
@@ -151,14 +163,13 @@ def download(
|
|||||||
|
|
||||||
chunks = _chunks()
|
chunks = _chunks()
|
||||||
else:
|
else:
|
||||||
# Fallback: iter_content
|
|
||||||
def _chunks_iter() -> Generator[bytes, None, None]:
|
def _chunks_iter() -> Generator[bytes, None, None]:
|
||||||
yield from stream.iter_content(chunk_size=chunk_size)
|
yield from stream.iter_content(chunk_size=chunk_size)
|
||||||
stream.close()
|
stream.close()
|
||||||
|
|
||||||
chunks = _chunks_iter()
|
chunks = _chunks_iter()
|
||||||
|
|
||||||
# Unified write + progress loop
|
|
||||||
_data_accumulated = 0
|
_data_accumulated = 0
|
||||||
_bytes_since_yield = 0
|
_bytes_since_yield = 0
|
||||||
for chunk in chunks:
|
for chunk in chunks:
|
||||||
@@ -181,30 +192,31 @@ def download(
|
|||||||
last_speed_refresh = now
|
last_speed_refresh = now
|
||||||
_data_accumulated = 0
|
_data_accumulated = 0
|
||||||
|
|
||||||
# Flush any remaining bytes
|
|
||||||
if not segmented and _bytes_since_yield > 0:
|
if not segmented and _bytes_since_yield > 0:
|
||||||
yield dict(advance=_bytes_since_yield)
|
yield dict(advance=_bytes_since_yield)
|
||||||
|
|
||||||
# Truncate to actual written size in case pre-allocation overshot
|
if not resumed and content_length > 0 and written != content_length:
|
||||||
if content_length > 0 and written != content_length:
|
|
||||||
f.truncate(written)
|
f.truncate(written)
|
||||||
|
|
||||||
if not segmented and content_length and written < content_length:
|
if not segmented and content_length and written < content_length:
|
||||||
raise IOError(f"Failed to read {content_length} bytes from the track URI.")
|
raise IOError(f"Failed to read {content_length} bytes from the track URI.")
|
||||||
|
|
||||||
yield dict(file_downloaded=save_path, written=written)
|
yield dict(file_downloaded=save_path, written=resume_offset + written)
|
||||||
|
|
||||||
if segmented:
|
if segmented:
|
||||||
yield dict(advance=1)
|
yield dict(advance=1)
|
||||||
|
completed = True
|
||||||
break
|
break
|
||||||
except Exception as e:
|
except Exception:
|
||||||
save_path.unlink(missing_ok=True)
|
|
||||||
if DOWNLOAD_CANCELLED.is_set() or attempts == MAX_ATTEMPTS:
|
if DOWNLOAD_CANCELLED.is_set() or attempts == MAX_ATTEMPTS:
|
||||||
raise e
|
return
|
||||||
|
if save_path.exists():
|
||||||
|
resume_offset = save_path.stat().st_size
|
||||||
time.sleep(RETRY_WAIT)
|
time.sleep(RETRY_WAIT)
|
||||||
attempts += 1
|
attempts += 1
|
||||||
finally:
|
finally:
|
||||||
control_file.unlink()
|
if completed:
|
||||||
|
control_file.unlink(missing_ok=True)
|
||||||
|
|
||||||
|
|
||||||
def requests(
|
def requests(
|
||||||
|
|||||||
Reference in New Issue
Block a user