A Python port of the Invisible Internet Project (I2P)
1"""HTTP proxy tunnel — routes .i2p requests through SAM.
2
3Extends the existing parsing from http_proxy.py with full SAM integration,
4header filtering, CONNECT tunneling, address helpers, and outproxy support.
5
6Ported from net.i2p.i2ptunnel.I2PTunnelHTTPClient.
7"""
8
9from __future__ import annotations
10
11import logging
12import random
13from urllib.parse import urlparse, parse_qs
14
15from i2p_apps.i2ptunnel.config import TunnelDefinition
16from i2p_apps.i2ptunnel.forwarder import bridge, bridge_with_initial_data
17from i2p_apps.i2ptunnel.http_proxy import parse_http_request, extract_i2p_destination
18from i2p_apps.i2ptunnel.tasks import ClientTunnelTask
19
20logger = logging.getLogger(__name__)
21
22
23# Headers stripped from outbound requests for privacy
24_STRIPPED_OUTBOUND = {
25 "referer", "via", "x-forwarded-for", "from",
26 "proxy-connection", "proxy-authorization",
27}
28
29# I2P-specific User-Agent replacement
30_I2P_USER_AGENT = "MYOB/6.66 (AN/ON)"
31
32# Error page types
33_ERROR_MESSAGES = {
34 "dnf": "Destination Not Found",
35 "noproxy": "No Outproxy Configured",
36 "ssl": "SSL Not Allowed",
37 "timeout": "Connection Timeout",
38 "protocol": "Protocol Error",
39 "localhost": "Localhost Access Denied",
40 "baduri": "Bad Request URI",
41 "denied": "Access Denied",
42}
43
44
45class HTTPClientTask(ClientTunnelTask):
46 """HTTP proxy that routes .i2p requests through SAM.
47
48 Features:
49 - .i2p hostname resolution via SAM NAMING LOOKUP
50 - Header filtering (strips Referer, Via, X-Forwarded-For, etc.)
51 - User-Agent replacement with MYOB/6.66 for .i2p sites
52 - CONNECT method for HTTPS tunneling
53 - Address helper support (?i2paddresshelper=)
54 - Outproxy support for non-.i2p hosts
55 """
56
57 def __init__(self, config: TunnelDefinition, session) -> None:
58 super().__init__(config, session)
59 self._proxy_list = list(config.proxy_list)
60 self._address_cache: dict[str, str] = {}
61 self._failed_outproxies: set[str] = set()
62
63 # --- Header Filtering ---
64
65 def _filter_outbound_headers(
66 self, headers: dict[str, str], is_i2p: bool
67 ) -> dict[str, str]:
68 """Filter outbound headers for privacy."""
69 filtered = {}
70 for key, value in headers.items():
71 if key.lower() in _STRIPPED_OUTBOUND:
72 continue
73 if key.lower() == "user-agent":
74 filtered[key] = _I2P_USER_AGENT if is_i2p else value
75 continue
76 filtered[key] = value
77 return filtered
78
79 # --- Address Helpers ---
80
81 def _extract_address_helper(self, url: str) -> tuple[str, str | None]:
82 """Extract ?i2paddresshelper= value from URL.
83
84 Returns (host, helper_value_or_None).
85 """
86 parsed = urlparse(url)
87 params = parse_qs(parsed.query)
88 helper = params.get("i2paddresshelper", [None])[0]
89 return parsed.hostname or "", helper
90
91 def _cache_address_helper(self, hostname: str, dest: str) -> None:
92 self._address_cache[hostname] = dest
93
94 def _get_cached_helper(self, hostname: str) -> str | None:
95 return self._address_cache.get(hostname)
96
97 # --- Outproxy ---
98
99 def _has_outproxy(self) -> bool:
100 return len(self._proxy_list) > 0
101
102 def _pick_outproxy(self) -> str:
103 """Pick an outproxy destination, avoiding recently failed ones."""
104 available = [p for p in self._proxy_list if p not in self._failed_outproxies]
105 if not available:
106 # All failed — reset and try again
107 self._failed_outproxies.clear()
108 available = list(self._proxy_list)
109 if not available:
110 return ""
111 return random.choice(available)
112
113 # --- Request Classification ---
114
115 @staticmethod
116 def _is_i2p_request(host: str) -> bool:
117 return host.endswith(".i2p")
118
119 @staticmethod
120 def _is_localhost(host: str) -> bool:
121 return host in ("127.0.0.1", "localhost", "::1", "0.0.0.0")
122
123 # --- Error Pages ---
124
125 def _error_page(self, error_type: str, status_code: int) -> bytes:
126 """Generate an HTML error response."""
127 message = _ERROR_MESSAGES.get(error_type, "Error")
128 body = (
129 f"<html><head><title>I2P Proxy Error</title></head>"
130 f"<body><h1>{status_code} {message}</h1>"
131 f"<p>The I2P HTTP proxy was unable to process your request.</p>"
132 f"<p>Error: {error_type}</p>"
133 f"</body></html>"
134 )
135 return (
136 f"HTTP/1.1 {status_code} {message}\r\n"
137 f"Content-Type: text/html; charset=UTF-8\r\n"
138 f"Content-Length: {len(body)}\r\n"
139 f"Connection: close\r\n"
140 f"\r\n"
141 f"{body}"
142 ).encode("utf-8")
143
144 # --- Header Parsing Helpers ---
145
146 @staticmethod
147 async def _read_headers(reader) -> dict[str, str]:
148 """Read HTTP headers until blank line."""
149 headers: dict[str, str] = {}
150 while True:
151 line = await reader.readline()
152 if not line or line == b"\r\n" or line == b"\n":
153 break
154 text = line.decode("utf-8", errors="replace").strip()
155 if ":" in text:
156 key, _, value = text.partition(":")
157 headers[key.strip()] = value.strip()
158 return headers
159
160 @staticmethod
161 async def _read_body(reader, headers: dict[str, str]) -> bytes:
162 """Read request body based on Content-Length header."""
163 cl = headers.get("Content-Length", headers.get("content-length", "0"))
164 try:
165 length = int(cl)
166 except ValueError:
167 length = 0
168 if length > 0:
169 return await reader.read(length)
170 return b""
171
172 # --- Resolution ---
173
174 async def _resolve(self, hostname: str) -> str | None:
175 """Resolve an I2P hostname to base64 destination."""
176 # Check address helper cache first
177 cached = self._get_cached_helper(hostname)
178 if cached:
179 return cached
180
181 if hostname.endswith(".b32.i2p"):
182 return hostname
183
184 return await self._session.lookup(hostname)
185
186 # --- Request Rebuilding ---
187
188 @staticmethod
189 def _rebuild_request(method: str, path: str, headers: dict[str, str], body: bytes) -> bytes:
190 """Rebuild an HTTP request with relative path and filtered headers."""
191 lines = [f"{method} {path} HTTP/1.1"]
192 for key, value in headers.items():
193 lines.append(f"{key}: {value}")
194 lines.append("")
195 lines.append("")
196 request = "\r\n".join(lines).encode("utf-8")
197 if body:
198 request += body
199 return request
200
201 # --- Main Handler ---
202
203 async def handle_client(self, reader, writer) -> None:
204 try:
205 first_line_bytes = await reader.readline()
206 if not first_line_bytes:
207 return
208
209 first_line = first_line_bytes.decode("utf-8", errors="replace").strip()
210 try:
211 request = parse_http_request(first_line)
212 except ValueError:
213 writer.write(self._error_page("baduri", 400))
214 await writer.drain()
215 return
216
217 headers = await self._read_headers(reader)
218
219 # Reject localhost
220 if self._is_localhost(request.host):
221 writer.write(self._error_page("localhost", 403))
222 await writer.drain()
223 return
224
225 if request.is_connect:
226 await self._handle_connect(request, reader, writer)
227 elif self._is_i2p_request(request.host):
228 body = await self._read_body(reader, headers)
229 await self._handle_i2p(request, headers, body, reader, writer)
230 elif self._has_outproxy():
231 body = await self._read_body(reader, headers)
232 await self._handle_outproxy(request, headers, body, reader, writer)
233 else:
234 writer.write(self._error_page("noproxy", 503))
235 await writer.drain()
236
237 except Exception:
238 logger.exception("Error in HTTP proxy handler")
239
240 async def _handle_i2p(self, request, headers, body, reader, writer) -> None:
241 """Route request to .i2p destination via SAM."""
242 # Check for address helper
243 _, helper = self._extract_address_helper(request.raw_first_line)
244 if helper:
245 self._cache_address_helper(request.host, helper)
246
247 dest = await self._resolve(request.host)
248 if dest is None:
249 writer.write(self._error_page("dnf", 503))
250 await writer.drain()
251 return
252
253 try:
254 remote_reader, remote_writer = await self._session.connect(dest)
255 except Exception:
256 writer.write(self._error_page("timeout", 504))
257 await writer.drain()
258 return
259
260 # Filter headers and rebuild request with relative path
261 filtered = self._filter_outbound_headers(headers, is_i2p=True)
262 if "Host" not in filtered and "host" not in filtered:
263 filtered["Host"] = request.host
264 path = request.path or "/"
265 request_bytes = self._rebuild_request(request.method, path, filtered, body)
266
267 await bridge_with_initial_data(
268 reader, writer, remote_reader, remote_writer, request_bytes
269 )
270
271 async def _handle_connect(self, request, reader, writer) -> None:
272 """Handle CONNECT method for HTTPS tunneling."""
273 # Consume remaining headers
274 await self._read_headers(reader)
275
276 if self._is_i2p_request(request.host):
277 dest = await self._resolve(request.host)
278 if dest is None:
279 writer.write(self._error_page("dnf", 503))
280 await writer.drain()
281 return
282
283 try:
284 remote_reader, remote_writer = await self._session.connect(dest)
285 except Exception:
286 writer.write(self._error_page("timeout", 504))
287 await writer.drain()
288 return
289
290 writer.write(b"HTTP/1.1 200 Connection Established\r\n\r\n")
291 await writer.drain()
292 await bridge(reader, writer, remote_reader, remote_writer)
293 else:
294 writer.write(self._error_page("ssl", 403))
295 await writer.drain()
296
297 async def _handle_outproxy(self, request, headers, body, reader, writer) -> None:
298 """Forward non-.i2p request to outproxy."""
299 outproxy = self._pick_outproxy()
300 if not outproxy:
301 writer.write(self._error_page("noproxy", 503))
302 await writer.drain()
303 return
304
305 dest = await self._resolve(outproxy)
306 if dest is None:
307 self._failed_outproxies.add(outproxy)
308 writer.write(self._error_page("dnf", 503))
309 await writer.drain()
310 return
311
312 try:
313 remote_reader, remote_writer = await self._session.connect(dest)
314 except Exception:
315 self._failed_outproxies.add(outproxy)
316 writer.write(self._error_page("timeout", 504))
317 await writer.drain()
318 return
319
320 # Forward the original request (with absolute URL) to the outproxy
321 filtered = self._filter_outbound_headers(headers, is_i2p=False)
322 request_bytes = self._rebuild_request(
323 request.method, request.raw_first_line.split()[1], filtered, body
324 )
325
326 await bridge_with_initial_data(
327 reader, writer, remote_reader, remote_writer, request_bytes
328 )