A Python port of the Invisible Internet Project (I2P)
1"""HTTP client for I2P (EepGet).
2
3Fetches content over HTTP with proxy support, redirect following,
4progress callbacks, and conditional requests.
5
6Ported from net.i2p.util.EepGet.
7"""
8
9from __future__ import annotations
10
11import urllib.request
12import urllib.error
13import urllib.parse
14from dataclasses import dataclass, field
15from typing import Callable
16
17
18@dataclass
19class EepGetResult:
20 """Result of an HTTP fetch."""
21
22 success: bool
23 status_code: int
24 data: bytes = b""
25 headers: dict[str, str] = field(default_factory=dict)
26 error: str = ""
27
28 @property
29 def content_length(self) -> int:
30 return len(self.data)
31
32
33class EepGet:
34 """HTTP client for I2P.
35
36 Fetches URLs with redirect following, progress callbacks,
37 If-Modified-Since support, and optional proxy.
38 """
39
40 MAX_REDIRECTS = 5
41
42 def __init__(
43 self,
44 proxy_host: str | None = None,
45 proxy_port: int | None = None,
46 connect_timeout: float = 30.0,
47 read_timeout: float = 60.0,
48 user_agent: str = "I2P",
49 ) -> None:
50 self.proxy_host = proxy_host
51 self.proxy_port = proxy_port
52 self.connect_timeout = connect_timeout
53 self.read_timeout = read_timeout
54 self.user_agent = user_agent
55
56 def fetch(
57 self,
58 url: str,
59 output_file: str | None = None,
60 progress_callback: Callable[[int, int], None] | None = None,
61 if_modified_since: str | None = None,
62 ) -> EepGetResult:
63 """Fetch a URL.
64
65 Args:
66 url: The URL to fetch.
67 output_file: If set, write response body to this file.
68 progress_callback: Called with (bytes_downloaded, total_bytes).
69 if_modified_since: HTTP date string for conditional request.
70 """
71 redirects = 0
72 current_url = url
73
74 while redirects <= self.MAX_REDIRECTS:
75 try:
76 req = urllib.request.Request(current_url)
77 req.add_header("User-Agent", self.user_agent)
78 req.add_header("Connection", "close")
79
80 if if_modified_since:
81 req.add_header("If-Modified-Since", if_modified_since)
82
83 # Build opener (with proxy if configured)
84 handlers: list = []
85 if self.proxy_host and self.proxy_port:
86 proxy_url = f"http://{self.proxy_host}:{self.proxy_port}"
87 handlers.append(
88 urllib.request.ProxyHandler({"http": proxy_url, "https": proxy_url})
89 )
90
91 # Disable auto-redirect to handle it ourselves
92 class NoRedirect(urllib.request.HTTPRedirectHandler):
93 def redirect_request(self, req, fp, code, msg, headers, newurl):
94 raise urllib.error.HTTPError(
95 newurl, code, msg, headers, fp
96 )
97
98 handlers.append(NoRedirect)
99 opener = urllib.request.build_opener(*handlers)
100
101 response = opener.open(req, timeout=self.connect_timeout)
102 status = response.status
103 resp_headers = {k: v for k, v in response.getheaders()}
104
105 # Read with progress
106 total = int(resp_headers.get("Content-Length", "0"))
107 data = bytearray()
108 while True:
109 chunk = response.read(8192)
110 if not chunk:
111 break
112 data.extend(chunk)
113 if progress_callback:
114 progress_callback(len(data), total)
115
116 result_data = bytes(data)
117
118 if output_file:
119 with open(output_file, "wb") as f:
120 f.write(result_data)
121
122 return EepGetResult(
123 success=True,
124 status_code=status,
125 data=result_data,
126 headers=resp_headers,
127 )
128
129 except urllib.error.HTTPError as e:
130 if e.code in (301, 302, 303, 307, 308):
131 location = e.headers.get("Location")
132 if location:
133 current_url = urllib.parse.urljoin(current_url, location)
134 redirects += 1
135 continue
136 # 304 Not Modified
137 if e.code == 304:
138 return EepGetResult(
139 success=True,
140 status_code=304,
141 data=b"",
142 )
143 return EepGetResult(
144 success=False,
145 status_code=e.code,
146 data=e.read() if hasattr(e, "read") else b"",
147 error=str(e),
148 )
149
150 except Exception as e:
151 return EepGetResult(
152 success=False,
153 status_code=0,
154 error=str(e),
155 )
156
157 return EepGetResult(
158 success=False,
159 status_code=0,
160 error="Too many redirects",
161 )