"""HTTP client for I2P (EepGet). Fetches content over HTTP with proxy support, redirect following, progress callbacks, and conditional requests. Ported from net.i2p.util.EepGet. """ from __future__ import annotations import urllib.request import urllib.error import urllib.parse from dataclasses import dataclass, field from typing import Callable @dataclass class EepGetResult: """Result of an HTTP fetch.""" success: bool status_code: int data: bytes = b"" headers: dict[str, str] = field(default_factory=dict) error: str = "" @property def content_length(self) -> int: return len(self.data) class EepGet: """HTTP client for I2P. Fetches URLs with redirect following, progress callbacks, If-Modified-Since support, and optional proxy. """ MAX_REDIRECTS = 5 def __init__( self, proxy_host: str | None = None, proxy_port: int | None = None, connect_timeout: float = 30.0, read_timeout: float = 60.0, user_agent: str = "I2P", ) -> None: self.proxy_host = proxy_host self.proxy_port = proxy_port self.connect_timeout = connect_timeout self.read_timeout = read_timeout self.user_agent = user_agent def fetch( self, url: str, output_file: str | None = None, progress_callback: Callable[[int, int], None] | None = None, if_modified_since: str | None = None, ) -> EepGetResult: """Fetch a URL. Args: url: The URL to fetch. output_file: If set, write response body to this file. progress_callback: Called with (bytes_downloaded, total_bytes). if_modified_since: HTTP date string for conditional request. """ redirects = 0 current_url = url while redirects <= self.MAX_REDIRECTS: try: req = urllib.request.Request(current_url) req.add_header("User-Agent", self.user_agent) req.add_header("Connection", "close") if if_modified_since: req.add_header("If-Modified-Since", if_modified_since) # Build opener (with proxy if configured) handlers: list = [] if self.proxy_host and self.proxy_port: proxy_url = f"http://{self.proxy_host}:{self.proxy_port}" handlers.append( urllib.request.ProxyHandler({"http": proxy_url, "https": proxy_url}) ) # Disable auto-redirect to handle it ourselves class NoRedirect(urllib.request.HTTPRedirectHandler): def redirect_request(self, req, fp, code, msg, headers, newurl): raise urllib.error.HTTPError( newurl, code, msg, headers, fp ) handlers.append(NoRedirect) opener = urllib.request.build_opener(*handlers) response = opener.open(req, timeout=self.connect_timeout) status = response.status resp_headers = {k: v for k, v in response.getheaders()} # Read with progress total = int(resp_headers.get("Content-Length", "0")) data = bytearray() while True: chunk = response.read(8192) if not chunk: break data.extend(chunk) if progress_callback: progress_callback(len(data), total) result_data = bytes(data) if output_file: with open(output_file, "wb") as f: f.write(result_data) return EepGetResult( success=True, status_code=status, data=result_data, headers=resp_headers, ) except urllib.error.HTTPError as e: if e.code in (301, 302, 303, 307, 308): location = e.headers.get("Location") if location: current_url = urllib.parse.urljoin(current_url, location) redirects += 1 continue # 304 Not Modified if e.code == 304: return EepGetResult( success=True, status_code=304, data=b"", ) return EepGetResult( success=False, status_code=e.code, data=e.read() if hasattr(e, "read") else b"", error=str(e), ) except Exception as e: return EepGetResult( success=False, status_code=0, error=str(e), ) return EepGetResult( success=False, status_code=0, error="Too many redirects", )