A Python port of the Invisible Internet Project (I2P)
at main 161 lines 5.2 kB view raw
1"""HTTP client for I2P (EepGet). 2 3Fetches content over HTTP with proxy support, redirect following, 4progress callbacks, and conditional requests. 5 6Ported from net.i2p.util.EepGet. 7""" 8 9from __future__ import annotations 10 11import urllib.request 12import urllib.error 13import urllib.parse 14from dataclasses import dataclass, field 15from typing import Callable 16 17 18@dataclass 19class EepGetResult: 20 """Result of an HTTP fetch.""" 21 22 success: bool 23 status_code: int 24 data: bytes = b"" 25 headers: dict[str, str] = field(default_factory=dict) 26 error: str = "" 27 28 @property 29 def content_length(self) -> int: 30 return len(self.data) 31 32 33class EepGet: 34 """HTTP client for I2P. 35 36 Fetches URLs with redirect following, progress callbacks, 37 If-Modified-Since support, and optional proxy. 38 """ 39 40 MAX_REDIRECTS = 5 41 42 def __init__( 43 self, 44 proxy_host: str | None = None, 45 proxy_port: int | None = None, 46 connect_timeout: float = 30.0, 47 read_timeout: float = 60.0, 48 user_agent: str = "I2P", 49 ) -> None: 50 self.proxy_host = proxy_host 51 self.proxy_port = proxy_port 52 self.connect_timeout = connect_timeout 53 self.read_timeout = read_timeout 54 self.user_agent = user_agent 55 56 def fetch( 57 self, 58 url: str, 59 output_file: str | None = None, 60 progress_callback: Callable[[int, int], None] | None = None, 61 if_modified_since: str | None = None, 62 ) -> EepGetResult: 63 """Fetch a URL. 64 65 Args: 66 url: The URL to fetch. 67 output_file: If set, write response body to this file. 68 progress_callback: Called with (bytes_downloaded, total_bytes). 69 if_modified_since: HTTP date string for conditional request. 70 """ 71 redirects = 0 72 current_url = url 73 74 while redirects <= self.MAX_REDIRECTS: 75 try: 76 req = urllib.request.Request(current_url) 77 req.add_header("User-Agent", self.user_agent) 78 req.add_header("Connection", "close") 79 80 if if_modified_since: 81 req.add_header("If-Modified-Since", if_modified_since) 82 83 # Build opener (with proxy if configured) 84 handlers: list = [] 85 if self.proxy_host and self.proxy_port: 86 proxy_url = f"http://{self.proxy_host}:{self.proxy_port}" 87 handlers.append( 88 urllib.request.ProxyHandler({"http": proxy_url, "https": proxy_url}) 89 ) 90 91 # Disable auto-redirect to handle it ourselves 92 class NoRedirect(urllib.request.HTTPRedirectHandler): 93 def redirect_request(self, req, fp, code, msg, headers, newurl): 94 raise urllib.error.HTTPError( 95 newurl, code, msg, headers, fp 96 ) 97 98 handlers.append(NoRedirect) 99 opener = urllib.request.build_opener(*handlers) 100 101 response = opener.open(req, timeout=self.connect_timeout) 102 status = response.status 103 resp_headers = {k: v for k, v in response.getheaders()} 104 105 # Read with progress 106 total = int(resp_headers.get("Content-Length", "0")) 107 data = bytearray() 108 while True: 109 chunk = response.read(8192) 110 if not chunk: 111 break 112 data.extend(chunk) 113 if progress_callback: 114 progress_callback(len(data), total) 115 116 result_data = bytes(data) 117 118 if output_file: 119 with open(output_file, "wb") as f: 120 f.write(result_data) 121 122 return EepGetResult( 123 success=True, 124 status_code=status, 125 data=result_data, 126 headers=resp_headers, 127 ) 128 129 except urllib.error.HTTPError as e: 130 if e.code in (301, 302, 303, 307, 308): 131 location = e.headers.get("Location") 132 if location: 133 current_url = urllib.parse.urljoin(current_url, location) 134 redirects += 1 135 continue 136 # 304 Not Modified 137 if e.code == 304: 138 return EepGetResult( 139 success=True, 140 status_code=304, 141 data=b"", 142 ) 143 return EepGetResult( 144 success=False, 145 status_code=e.code, 146 data=e.read() if hasattr(e, "read") else b"", 147 error=str(e), 148 ) 149 150 except Exception as e: 151 return EepGetResult( 152 success=False, 153 status_code=0, 154 error=str(e), 155 ) 156 157 return EepGetResult( 158 success=False, 159 status_code=0, 160 error="Too many redirects", 161 )