diff --git a/unshackle/core/service.py b/unshackle/core/service.py index edc780a..3f965ef 100644 --- a/unshackle/core/service.py +++ b/unshackle/core/service.py @@ -1,4 +1,3 @@ -import base64 import logging from abc import ABCMeta, abstractmethod from collections.abc import Callable, Generator @@ -213,15 +212,10 @@ class Service(metaclass=ABCMeta): if proxy: self.session.proxies.update({"all": proxy}) - proxy_parse = urlparse(proxy) - if proxy_parse.username and proxy_parse.password: - self.session.headers.update( - { - "Proxy-Authorization": base64.b64encode( - f"{proxy_parse.username}:{proxy_parse.password}".encode("utf8") - ).decode() - } - ) + # Don't set Proxy-Authorization manually: both rnet (Proxy.all) and + # requests authenticate from the credentials embedded in the proxy URL. + # A manual header here was malformed (no "Basic " scheme) and broke + # plaintext-http forward-proxy requests with HTTP 407. # Always verify proxy IP - proxies can change exit nodes try: proxy_ip_info = get_ip_info(self.session) diff --git a/unshackle/core/session.py b/unshackle/core/session.py index c51d45b..37d783b 100644 --- a/unshackle/core/session.py +++ b/unshackle/core/session.py @@ -523,15 +523,24 @@ class RnetSession: self.cookies._flush_to_client() return self._client - def _build_url(self, url: str, params: Optional[dict] = None) -> str: - """URL-encode params dict into the URL (rnet ignores params kwarg).""" + def _build_url(self, url: str, params: Optional[Any] = None) -> str: + """Encode params into the URL (rnet ignores the params kwarg). + + Accepts the same shapes as requests: a mapping, a sequence of pairs, or a + pre-built query string/bytes. A string is appended verbatim (already encoded); + urlencode() would raise TypeError on it. + """ if not params: return url + if isinstance(params, bytes): + extra = params.decode("utf-8") + elif isinstance(params, str): + extra = params + else: + extra = urlencode(params, doseq=True) parsed = urlparse(url) separator = "&" if parsed.query else "" - query = ( - parsed.query + separator + urlencode(params, doseq=True) if parsed.query else urlencode(params, doseq=True) - ) + query = parsed.query + separator + extra if parsed.query else extra return urlunparse(parsed._replace(query=query)) def get_sleep_time(self, response: Optional[RnetResponse], attempt: int) -> Optional[float]: