perf(downloader): optimize hot loop and threading efficiency

Replace list.pop(0) with deque.popleft() for O(1) speed tracker eviction, skip urllib3 decode chain with decode_content=False on raw reads, use running total instead of sum() for progress reporting, add explicit stream.close() on CurlSession path, replace busy-poll loop with concurrent.futures.wait(FIRST_COMPLETED), skip ThreadPoolExecutor for single-URL downloads, and DRY up duplicated raw/iter_content progress logic into a unified chunk iterator.
This commit is contained in:
Andy
2026-03-23 18:17:12 -06:00
parent 006d080416
commit 6840944738

View File

@@ -1,6 +1,8 @@
import math
import os
import time
from collections import deque
from concurrent.futures import FIRST_COMPLETED, wait
from concurrent.futures.thread import ThreadPoolExecutor
from http.cookiejar import CookieJar
from pathlib import Path
@@ -74,7 +76,7 @@ def download(
session = session or Session()
if _speed_tracker is None:
_speed_tracker = {"sizes": [], "last_refresh": time.time()}
_speed_tracker = {"sizes": deque(), "last_refresh": time.time()}
save_dir = save_path.parent
control_file = save_path.with_name(f"{save_path.name}.!dev")
@@ -95,7 +97,6 @@ def download(
try:
while True:
written = 0
download_sizes: list[int] = []
last_speed_refresh = _time()
try:
@@ -127,33 +128,30 @@ def download(
# Cache f.write for hot loop
_write = f.write
# Build chunk iterator — raw reads for requests.Session, iter_content for CurlSession
if use_raw:
# Raw socket read — 30-35% faster than iter_content (benchmarked)
# Safe in worker threads with Queue-based event dispatch
stream.raw.decode_content = False
_read = stream.raw.read
def _chunks() -> Generator[bytes, None, None]:
while True:
chunk = _read(chunk_size)
if not chunk:
break
_write(chunk)
download_size = len(chunk)
written += download_size
if not segmented:
yield dict(advance=1)
now = _time()
time_since = now - last_speed_refresh
download_sizes.append(download_size)
if time_since > PROGRESS_WINDOW or download_size < chunk_size:
data_size = sum(download_sizes)
download_speed = math.ceil(data_size / (time_since or 1))
yield dict(downloaded=f"{filesize.decimal(download_speed)}/s")
last_speed_refresh = now
download_sizes.clear()
yield chunk
stream.close()
chunks = _chunks()
else:
# CurlSession: use iter_content (raw not available)
for chunk in stream.iter_content(chunk_size=chunk_size):
def _chunks_iter() -> Generator[bytes, None, None]:
yield from stream.iter_content(chunk_size=chunk_size)
stream.close()
chunks = _chunks_iter()
# Unified write + progress loop
_data_accumulated = 0
for chunk in chunks:
_write(chunk)
download_size = len(chunk)
written += download_size
@@ -162,13 +160,12 @@ def download(
yield dict(advance=1)
now = _time()
time_since = now - last_speed_refresh
download_sizes.append(download_size)
_data_accumulated += download_size
if time_since > PROGRESS_WINDOW or download_size < chunk_size:
data_size = sum(download_sizes)
download_speed = math.ceil(data_size / (time_since or 1))
download_speed = math.ceil(_data_accumulated / (time_since or 1))
yield dict(downloaded=f"{filesize.decimal(download_speed)}/s")
last_speed_refresh = now
download_sizes.clear()
_data_accumulated = 0
# Truncate to actual written size in case pre-allocation overshot
if content_length > 0 and written != content_length:
@@ -187,7 +184,7 @@ def download(
sizes.append((now, written))
cutoff = now - SPEED_ROLLING_WINDOW
while sizes and sizes[0][0] < cutoff:
sizes.pop(0)
sizes.popleft()
time_since = now - _speed_tracker["last_refresh"]
if sizes and time_since > PROGRESS_WINDOW:
window_start = sizes[0][0]
@@ -338,9 +335,18 @@ def requests(
yield dict(total=len(urls))
# Per-call speed tracker — shared across threads within this call only
speed_tracker: dict[str, Any] = {"sizes": [], "last_refresh": time.time()}
speed_tracker: dict[str, Any] = {"sizes": deque(), "last_refresh": time.time()}
try:
# Fast path: single URL — no thread pool overhead
if len(urls) == 1:
yield from download(
session=session,
segmented=segmented_batch,
_speed_tracker=speed_tracker,
**urls[0],
)
else:
with ThreadPoolExecutor(max_workers=max_workers) as pool:
event_queue: Queue[dict[str, Any]] = Queue()
@@ -364,9 +370,9 @@ def requests(
except Empty:
break
done = {future for future in pending if future.done()}
for future in done:
pending.remove(future)
# Wait efficiently for next future completion (OS condition variable)
completed, pending = wait(pending, timeout=0.1, return_when=FIRST_COMPLETED)
for future in completed:
exc = future.exception()
if isinstance(exc, KeyboardInterrupt):
DOWNLOAD_CANCELLED.set()
@@ -392,12 +398,6 @@ def requests(
)
raise exc
if pending:
try:
yield event_queue.get(timeout=0.1)
except Empty:
pass
# Drain any remaining events from workers that just finished
while True:
try: