perf(downloader): optimize hot loop and threading efficiency

Replace list.pop(0) with deque.popleft() for O(1) speed tracker eviction, skip urllib3 decode chain with decode_content=False on raw reads, use running total instead of sum() for progress reporting, add explicit stream.close() on CurlSession path, replace busy-poll loop with concurrent.futures.wait(FIRST_COMPLETED), skip ThreadPoolExecutor for single-URL downloads, and DRY up duplicated raw/iter_content progress logic into a unified chunk iterator.
2026-05-17 06:09:29 +00:00 · 2026-03-23 18:17:12 -06:00
parent 006d080416
commit 6840944738
1 changed files with 96 additions and 96 deletions
--- a/unshackle/core/downloaders/requests.py
+++ b/unshackle/core/downloaders/requests.py
@@ -1,6 +1,8 @@
 import math
 import os
 import time
+from collections import deque
+from concurrent.futures import FIRST_COMPLETED, wait
 from concurrent.futures.thread import ThreadPoolExecutor
 from http.cookiejar import CookieJar
 from pathlib import Path
@@ -74,7 +76,7 @@ def download(
    session = session or Session()

    if _speed_tracker is None:
-        _speed_tracker = {"sizes": [], "last_refresh": time.time()}
+        _speed_tracker = {"sizes": deque(), "last_refresh": time.time()}

    save_dir = save_path.parent
    control_file = save_path.with_name(f"{save_path.name}.!dev")
@@ -95,7 +97,6 @@ def download(
    try:
        while True:
            written = 0
-            download_sizes: list[int] = []
            last_speed_refresh = _time()

            try:
@@ -127,33 +128,30 @@ def download(
                    # Cache f.write for hot loop
                    _write = f.write

+                    # Build chunk iterator — raw reads for requests.Session, iter_content for CurlSession
                    if use_raw:
-                        # Raw socket read — 30-35% faster than iter_content (benchmarked)
-                        # Safe in worker threads with Queue-based event dispatch
+                        stream.raw.decode_content = False
                        _read = stream.raw.read
+
+                        def _chunks() -> Generator[bytes, None, None]:
                            while True:
                                chunk = _read(chunk_size)
                                if not chunk:
                                    break
-                            _write(chunk)
-                            download_size = len(chunk)
-                            written += download_size
-
-                            if not segmented:
-                                yield dict(advance=1)
-                                now = _time()
-                                time_since = now - last_speed_refresh
-                                download_sizes.append(download_size)
-                                if time_since > PROGRESS_WINDOW or download_size < chunk_size:
-                                    data_size = sum(download_sizes)
-                                    download_speed = math.ceil(data_size / (time_since or 1))
-                                    yield dict(downloaded=f"{filesize.decimal(download_speed)}/s")
-                                    last_speed_refresh = now
-                                    download_sizes.clear()
+                                yield chunk
                            stream.close()
+
+                        chunks = _chunks()
                    else:
-                        # CurlSession: use iter_content (raw not available)
-                        for chunk in stream.iter_content(chunk_size=chunk_size):
+                        def _chunks_iter() -> Generator[bytes, None, None]:
+                            yield from stream.iter_content(chunk_size=chunk_size)
+                            stream.close()
+
+                        chunks = _chunks_iter()
+
+                    # Unified write + progress loop
+                    _data_accumulated = 0
+                    for chunk in chunks:
                        _write(chunk)
                        download_size = len(chunk)
                        written += download_size
@@ -162,13 +160,12 @@ def download(
                            yield dict(advance=1)
                            now = _time()
                            time_since = now - last_speed_refresh
-                                download_sizes.append(download_size)
+                            _data_accumulated += download_size
                            if time_since > PROGRESS_WINDOW or download_size < chunk_size:
-                                    data_size = sum(download_sizes)
-                                    download_speed = math.ceil(data_size / (time_since or 1))
+                                download_speed = math.ceil(_data_accumulated / (time_since or 1))
                                yield dict(downloaded=f"{filesize.decimal(download_speed)}/s")
                                last_speed_refresh = now
-                                    download_sizes.clear()
+                                _data_accumulated = 0

                    # Truncate to actual written size in case pre-allocation overshot
                    if content_length > 0 and written != content_length:
@@ -187,7 +184,7 @@ def download(
                        sizes.append((now, written))
                    cutoff = now - SPEED_ROLLING_WINDOW
                    while sizes and sizes[0][0] < cutoff:
-                        sizes.pop(0)
+                        sizes.popleft()
                    time_since = now - _speed_tracker["last_refresh"]
                    if sizes and time_since > PROGRESS_WINDOW:
                        window_start = sizes[0][0]
@@ -338,9 +335,18 @@ def requests(
        yield dict(total=len(urls))

    # Per-call speed tracker — shared across threads within this call only
-    speed_tracker: dict[str, Any] = {"sizes": [], "last_refresh": time.time()}
+    speed_tracker: dict[str, Any] = {"sizes": deque(), "last_refresh": time.time()}

    try:
+        # Fast path: single URL — no thread pool overhead
+        if len(urls) == 1:
+            yield from download(
+                session=session,
+                segmented=segmented_batch,
+                _speed_tracker=speed_tracker,
+                **urls[0],
+            )
+        else:
            with ThreadPoolExecutor(max_workers=max_workers) as pool:
                event_queue: Queue[dict[str, Any]] = Queue()

@@ -364,9 +370,9 @@ def requests(
                        except Empty:
                            break

-                done = {future for future in pending if future.done()}
-                for future in done:
-                    pending.remove(future)
+                    # Wait efficiently for next future completion (OS condition variable)
+                    completed, pending = wait(pending, timeout=0.1, return_when=FIRST_COMPLETED)
+                    for future in completed:
                        exc = future.exception()
                        if isinstance(exc, KeyboardInterrupt):
                            DOWNLOAD_CANCELLED.set()
@@ -392,12 +398,6 @@ def requests(
                                )
                            raise exc

-                if pending:
-                    try:
-                        yield event_queue.get(timeout=0.1)
-                    except Empty:
-                        pass
-
                # Drain any remaining events from workers that just finished
                while True:
                    try: