we (web engine): Experimental web browser project to understand the limits of Claude
1//! WHATWG URL parser.
2//!
3//! Implements the URL Standard (<https://url.spec.whatwg.org/>):
4//! - URL record type with scheme, username, password, host, port, path, query, fragment
5//! - State-machine parser following the spec
6//! - Host parsing: domains, IPv4 addresses, IPv6 addresses
7//! - Percent-encoding and decoding (UTF-8)
8//! - Special scheme handling (http, https, ftp, ws, wss, file)
9//! - Relative URL resolution via base URL
10//! - URL serialization
11//! - Origin derivation
12
13use core::fmt;
14
15// ---------------------------------------------------------------------------
16// Error types
17// ---------------------------------------------------------------------------
18
19#[derive(Debug, Clone, PartialEq, Eq)]
20pub enum UrlError {
21 /// Input is empty or contains only whitespace.
22 EmptyInput,
23 /// Invalid URL syntax.
24 InvalidUrl,
25 /// Invalid scheme.
26 InvalidScheme,
27 /// Invalid authority.
28 InvalidAuthority,
29 /// Invalid host.
30 InvalidHost,
31 /// Invalid port number.
32 InvalidPort,
33 /// Invalid IPv4 address.
34 InvalidIpv4,
35 /// Invalid IPv6 address.
36 InvalidIpv6,
37 /// Invalid percent-encoding.
38 InvalidPercentEncoding,
39 /// Relative URL without a base.
40 RelativeWithoutBase,
41 /// Missing scheme.
42 MissingScheme,
43}
44
45impl fmt::Display for UrlError {
46 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
47 match self {
48 Self::EmptyInput => write!(f, "empty input"),
49 Self::InvalidUrl => write!(f, "invalid URL"),
50 Self::InvalidScheme => write!(f, "invalid scheme"),
51 Self::InvalidAuthority => write!(f, "invalid authority"),
52 Self::InvalidHost => write!(f, "invalid host"),
53 Self::InvalidPort => write!(f, "invalid port number"),
54 Self::InvalidIpv4 => write!(f, "invalid IPv4 address"),
55 Self::InvalidIpv6 => write!(f, "invalid IPv6 address"),
56 Self::InvalidPercentEncoding => write!(f, "invalid percent-encoding"),
57 Self::RelativeWithoutBase => write!(f, "relative URL without a base"),
58 Self::MissingScheme => write!(f, "missing scheme"),
59 }
60 }
61}
62
63pub type Result<T> = core::result::Result<T, UrlError>;
64
65// ---------------------------------------------------------------------------
66// Host
67// ---------------------------------------------------------------------------
68
69/// A parsed URL host.
70#[derive(Debug, Clone, PartialEq, Eq)]
71pub enum Host {
72 /// A domain name (already lowercased).
73 Domain(String),
74 /// An IPv4 address.
75 Ipv4(u32),
76 /// An IPv6 address (128 bits as eight 16-bit pieces).
77 Ipv6([u16; 8]),
78}
79
80impl Host {
81 /// Serialize the host to a string.
82 pub fn serialize(&self) -> String {
83 match self {
84 Host::Domain(d) => d.clone(),
85 Host::Ipv4(addr) => serialize_ipv4(*addr),
86 Host::Ipv6(pieces) => format!("[{}]", serialize_ipv6(pieces)),
87 }
88 }
89}
90
91impl fmt::Display for Host {
92 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
93 write!(f, "{}", self.serialize())
94 }
95}
96
97// ---------------------------------------------------------------------------
98// Origin
99// ---------------------------------------------------------------------------
100
101/// A URL origin (scheme, host, port).
102#[derive(Debug, Clone, PartialEq, Eq)]
103pub enum Origin {
104 /// A tuple origin (scheme, host, port).
105 Tuple(String, Host, Option<u16>),
106 /// An opaque origin (unique, not equal to anything).
107 Opaque,
108}
109
110// ---------------------------------------------------------------------------
111// URL record
112// ---------------------------------------------------------------------------
113
114/// A parsed URL record per the WHATWG URL Standard.
115#[derive(Debug, Clone, PartialEq, Eq)]
116pub struct Url {
117 /// The scheme (e.g., "http", "https", "file").
118 pub scheme: String,
119 /// The username (percent-encoded).
120 username: String,
121 /// The password (percent-encoded).
122 password: String,
123 /// The host.
124 pub host: Option<Host>,
125 /// The port (None = default or absent).
126 pub port: Option<u16>,
127 /// Path segments. For non-opaque paths, these are the segments.
128 /// For opaque paths (cannot-be-a-base URL), this is a single element.
129 path: Vec<String>,
130 /// Whether this URL has an opaque path (cannot-be-a-base URL).
131 opaque_path: bool,
132 /// The query string (without leading '?').
133 pub query: Option<String>,
134 /// The fragment (without leading '#').
135 pub fragment: Option<String>,
136}
137
138impl Url {
139 /// Parse a URL string.
140 pub fn parse(input: &str) -> Result<Self> {
141 parse_url(input, None)
142 }
143
144 /// Parse a URL string with a base URL for resolving relative references.
145 pub fn parse_with_base(input: &str, base: &Url) -> Result<Self> {
146 parse_url(input, Some(base))
147 }
148
149 /// Get the scheme.
150 pub fn scheme(&self) -> &str {
151 &self.scheme
152 }
153
154 /// Get the username (percent-encoded).
155 pub fn username(&self) -> &str {
156 &self.username
157 }
158
159 /// Get the password (percent-encoded).
160 pub fn password(&self) -> &str {
161 &self.password
162 }
163
164 /// Get the host.
165 pub fn host(&self) -> Option<&Host> {
166 self.host.as_ref()
167 }
168
169 /// Get the host as a string.
170 pub fn host_str(&self) -> Option<String> {
171 self.host.as_ref().map(|h| h.serialize())
172 }
173
174 /// Get the port.
175 pub fn port(&self) -> Option<u16> {
176 self.port
177 }
178
179 /// Get the port or the default port for the scheme.
180 pub fn port_or_default(&self) -> Option<u16> {
181 self.port.or_else(|| default_port(&self.scheme))
182 }
183
184 /// Get the path as a string.
185 pub fn path(&self) -> String {
186 if self.opaque_path {
187 self.path.first().cloned().unwrap_or_default()
188 } else {
189 let mut s = String::new();
190 for seg in &self.path {
191 s.push('/');
192 s.push_str(seg);
193 }
194 if s.is_empty() {
195 s.push('/');
196 }
197 s
198 }
199 }
200
201 /// Get the path segments.
202 pub fn path_segments(&self) -> &[String] {
203 &self.path
204 }
205
206 /// Get the query string.
207 pub fn query(&self) -> Option<&str> {
208 self.query.as_deref()
209 }
210
211 /// Get the fragment.
212 pub fn fragment(&self) -> Option<&str> {
213 self.fragment.as_deref()
214 }
215
216 /// Whether this URL has an opaque path (cannot-be-a-base).
217 pub fn cannot_be_a_base(&self) -> bool {
218 self.opaque_path
219 }
220
221 /// Whether this URL includes credentials.
222 pub fn has_credentials(&self) -> bool {
223 !self.username.is_empty() || !self.password.is_empty()
224 }
225
226 /// Derive the origin of this URL.
227 pub fn origin(&self) -> Origin {
228 match self.scheme.as_str() {
229 "http" | "https" | "ws" | "wss" | "ftp" => {
230 if let Some(host) = &self.host {
231 Origin::Tuple(self.scheme.clone(), host.clone(), self.port)
232 } else {
233 Origin::Opaque
234 }
235 }
236 _ => Origin::Opaque,
237 }
238 }
239
240 /// Serialize this URL to a string (the href).
241 pub fn serialize(&self) -> String {
242 let mut output = String::new();
243 output.push_str(&self.scheme);
244 output.push(':');
245
246 if self.host.is_some() {
247 output.push_str("//");
248 if self.has_credentials() {
249 output.push_str(&self.username);
250 if !self.password.is_empty() {
251 output.push(':');
252 output.push_str(&self.password);
253 }
254 output.push('@');
255 }
256 if let Some(ref host) = self.host {
257 output.push_str(&host.serialize());
258 }
259 if let Some(port) = self.port {
260 output.push(':');
261 output.push_str(&port.to_string());
262 }
263 } else if !self.opaque_path && self.scheme == "file" {
264 output.push_str("//");
265 }
266
267 output.push_str(&self.path());
268
269 if let Some(ref query) = self.query {
270 output.push('?');
271 output.push_str(query);
272 }
273 if let Some(ref fragment) = self.fragment {
274 output.push('#');
275 output.push_str(fragment);
276 }
277
278 output
279 }
280}
281
282impl fmt::Display for Url {
283 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
284 write!(f, "{}", self.serialize())
285 }
286}
287
288// ---------------------------------------------------------------------------
289// Special schemes
290// ---------------------------------------------------------------------------
291
292/// Whether a scheme is "special" per the URL standard.
293fn is_special_scheme(scheme: &str) -> bool {
294 matches!(scheme, "http" | "https" | "ftp" | "ws" | "wss" | "file")
295}
296
297/// Default port for a special scheme.
298fn default_port(scheme: &str) -> Option<u16> {
299 match scheme {
300 "http" | "ws" => Some(80),
301 "https" | "wss" => Some(443),
302 "ftp" => Some(21),
303 _ => None,
304 }
305}
306
307// ---------------------------------------------------------------------------
308// Percent encoding / decoding
309// ---------------------------------------------------------------------------
310
311/// The C0 control percent-encode set.
312fn is_c0_control(c: char) -> bool {
313 c <= '\u{001F}' || c > '\u{007E}'
314}
315
316/// The fragment percent-encode set.
317fn is_fragment_encode(c: char) -> bool {
318 is_c0_control(c) || c == ' ' || c == '"' || c == '<' || c == '>' || c == '`'
319}
320
321/// The query percent-encode set.
322fn is_query_encode(c: char) -> bool {
323 is_c0_control(c) || c == ' ' || c == '"' || c == '#' || c == '<' || c == '>'
324}
325
326/// The special query percent-encode set.
327fn is_special_query_encode(c: char) -> bool {
328 is_query_encode(c) || c == '\''
329}
330
331/// The path percent-encode set.
332fn is_path_encode(c: char) -> bool {
333 is_query_encode(c) || c == '?' || c == '`' || c == '{' || c == '}'
334}
335
336/// The userinfo percent-encode set.
337fn is_userinfo_encode(c: char) -> bool {
338 is_path_encode(c)
339 || c == '/'
340 || c == ':'
341 || c == ';'
342 || c == '='
343 || c == '@'
344 || c == '['
345 || c == '\\'
346 || c == ']'
347 || c == '^'
348 || c == '|'
349}
350
351/// Percent-encode a string using the given encode set predicate.
352fn percent_encode(input: &str, should_encode: fn(char) -> bool) -> String {
353 let mut out = String::with_capacity(input.len());
354 for c in input.chars() {
355 if should_encode(c) {
356 for b in c.to_string().as_bytes() {
357 out.push('%');
358 out.push(to_hex_upper(b >> 4));
359 out.push(to_hex_upper(b & 0x0F));
360 }
361 } else {
362 out.push(c);
363 }
364 }
365 out
366}
367
368fn to_hex_upper(n: u8) -> char {
369 if n < 10 {
370 (b'0' + n) as char
371 } else {
372 (b'A' + n - 10) as char
373 }
374}
375
376/// Percent-decode a byte string.
377pub fn percent_decode(input: &str) -> Vec<u8> {
378 let bytes = input.as_bytes();
379 let mut out = Vec::with_capacity(bytes.len());
380 let mut i = 0;
381 while i < bytes.len() {
382 if bytes[i] == b'%' && i + 2 < bytes.len() {
383 if let (Some(hi), Some(lo)) = (hex_val(bytes[i + 1]), hex_val(bytes[i + 2])) {
384 out.push(hi << 4 | lo);
385 i += 3;
386 continue;
387 }
388 }
389 out.push(bytes[i]);
390 i += 1;
391 }
392 out
393}
394
395/// Percent-decode to a UTF-8 string (lossy).
396pub fn percent_decode_string(input: &str) -> String {
397 String::from_utf8_lossy(&percent_decode(input)).into_owned()
398}
399
400fn hex_val(b: u8) -> Option<u8> {
401 match b {
402 b'0'..=b'9' => Some(b - b'0'),
403 b'a'..=b'f' => Some(b - b'a' + 10),
404 b'A'..=b'F' => Some(b - b'A' + 10),
405 _ => None,
406 }
407}
408
409// ---------------------------------------------------------------------------
410// IPv4 parsing
411// ---------------------------------------------------------------------------
412
413fn parse_ipv4(input: &str) -> Result<u32> {
414 let parts: Vec<&str> = input.split('.').collect();
415 if parts.len() < 2 || parts.len() > 4 {
416 return Err(UrlError::InvalidIpv4);
417 }
418 let mut numbers: Vec<u64> = Vec::with_capacity(parts.len());
419 for part in &parts {
420 if part.is_empty() {
421 return Err(UrlError::InvalidIpv4);
422 }
423 let n = parse_ipv4_number(part)?;
424 numbers.push(n);
425 }
426 let last = numbers.len() - 1;
427 for (i, &n) in numbers.iter().enumerate() {
428 if i < last && n > 255 {
429 return Err(UrlError::InvalidIpv4);
430 }
431 }
432 if numbers[last] >= 256u64.pow((4 - last) as u32) {
433 return Err(UrlError::InvalidIpv4);
434 }
435
436 let mut ipv4 = numbers[last] as u32;
437 for (i, &n) in numbers.iter().enumerate().take(last) {
438 ipv4 += (n as u32) << (8 * (3 - i));
439 }
440 Ok(ipv4)
441}
442
443fn parse_ipv4_number(input: &str) -> Result<u64> {
444 if input.is_empty() {
445 return Err(UrlError::InvalidIpv4);
446 }
447 let (s, radix) = if input.starts_with("0x") || input.starts_with("0X") {
448 (&input[2..], 16)
449 } else if input.len() > 1 && input.starts_with('0') {
450 (&input[1..], 8)
451 } else {
452 (input, 10)
453 };
454 if s.is_empty() {
455 return Ok(0);
456 }
457 u64::from_str_radix(s, radix).map_err(|_| UrlError::InvalidIpv4)
458}
459
460fn serialize_ipv4(addr: u32) -> String {
461 format!(
462 "{}.{}.{}.{}",
463 (addr >> 24) & 0xFF,
464 (addr >> 16) & 0xFF,
465 (addr >> 8) & 0xFF,
466 addr & 0xFF
467 )
468}
469
470// ---------------------------------------------------------------------------
471// IPv6 parsing
472// ---------------------------------------------------------------------------
473
474fn parse_ipv6(input: &str) -> Result<[u16; 8]> {
475 let mut pieces = [0u16; 8];
476 let mut piece_index: usize = 0;
477 let mut compress: Option<usize> = None;
478 let chars: Vec<char> = input.chars().collect();
479 let len = chars.len();
480 let mut pointer = 0;
481
482 if pointer < len && chars[pointer] == ':' {
483 if pointer + 1 >= len || chars[pointer + 1] != ':' {
484 return Err(UrlError::InvalidIpv6);
485 }
486 pointer += 2;
487 piece_index += 1;
488 compress = Some(piece_index);
489 }
490
491 while pointer < len {
492 if piece_index >= 8 {
493 return Err(UrlError::InvalidIpv6);
494 }
495
496 if chars[pointer] == ':' {
497 if compress.is_some() {
498 return Err(UrlError::InvalidIpv6);
499 }
500 pointer += 1;
501 piece_index += 1;
502 compress = Some(piece_index);
503 continue;
504 }
505
506 let mut value: u16 = 0;
507 let mut length = 0;
508 while length < 4 && pointer < len && chars[pointer].is_ascii_hexdigit() {
509 value = value * 0x10 + hex_val(chars[pointer] as u8).unwrap() as u16;
510 pointer += 1;
511 length += 1;
512 }
513
514 if pointer < len && chars[pointer] == '.' {
515 // IPv4-mapped IPv6.
516 if length == 0 {
517 return Err(UrlError::InvalidIpv6);
518 }
519 pointer -= length;
520 if piece_index > 6 {
521 return Err(UrlError::InvalidIpv6);
522 }
523 let mut numbers_seen = 0;
524 while pointer < len {
525 let mut ipv4_piece: Option<u16> = None;
526 if numbers_seen > 0 {
527 if chars[pointer] == '.' && numbers_seen < 4 {
528 pointer += 1;
529 } else {
530 return Err(UrlError::InvalidIpv6);
531 }
532 }
533 if pointer >= len || !chars[pointer].is_ascii_digit() {
534 return Err(UrlError::InvalidIpv6);
535 }
536 while pointer < len && chars[pointer].is_ascii_digit() {
537 let number = (chars[pointer] as u8 - b'0') as u16;
538 match ipv4_piece {
539 None => ipv4_piece = Some(number),
540 Some(0) => return Err(UrlError::InvalidIpv6), // leading zero
541 Some(v) => ipv4_piece = Some(v * 10 + number),
542 }
543 if ipv4_piece.unwrap_or(0) > 255 {
544 return Err(UrlError::InvalidIpv6);
545 }
546 pointer += 1;
547 }
548 pieces[piece_index] =
549 pieces[piece_index] * 0x100 + ipv4_piece.ok_or(UrlError::InvalidIpv6)?;
550 numbers_seen += 1;
551 if numbers_seen == 2 || numbers_seen == 4 {
552 piece_index += 1;
553 }
554 }
555 if numbers_seen != 4 {
556 return Err(UrlError::InvalidIpv6);
557 }
558 break;
559 }
560
561 if pointer < len && chars[pointer] == ':' {
562 pointer += 1;
563 if pointer >= len {
564 // Trailing single colon after a piece — only valid with compress.
565 }
566 } else if pointer < len {
567 return Err(UrlError::InvalidIpv6);
568 }
569
570 if piece_index >= 8 {
571 return Err(UrlError::InvalidIpv6);
572 }
573 pieces[piece_index] = value;
574 piece_index += 1;
575 }
576
577 if let Some(comp) = compress {
578 let mut swaps = piece_index - comp;
579 piece_index = 7;
580 while piece_index != 0 && swaps > 0 {
581 let swap_index = comp + swaps - 1;
582 pieces.swap(piece_index, swap_index);
583 piece_index -= 1;
584 swaps -= 1;
585 }
586 } else if piece_index != 8 {
587 return Err(UrlError::InvalidIpv6);
588 }
589
590 Ok(pieces)
591}
592
593fn serialize_ipv6(pieces: &[u16; 8]) -> String {
594 // Find the longest run of consecutive zeros for :: compression.
595 let mut best_start = None;
596 let mut best_len = 0usize;
597 let mut cur_start = None;
598 let mut cur_len = 0usize;
599
600 for (i, &p) in pieces.iter().enumerate() {
601 if p == 0 {
602 if cur_start.is_none() {
603 cur_start = Some(i);
604 cur_len = 1;
605 } else {
606 cur_len += 1;
607 }
608 } else {
609 if cur_len > best_len && cur_len >= 2 {
610 best_start = cur_start;
611 best_len = cur_len;
612 }
613 cur_start = None;
614 cur_len = 0;
615 }
616 }
617 if cur_len > best_len && cur_len >= 2 {
618 best_start = cur_start;
619 best_len = cur_len;
620 }
621
622 let mut out = String::new();
623 let mut i = 0;
624 while i < 8 {
625 if Some(i) == best_start {
626 out.push_str("::");
627 i += best_len;
628 continue;
629 }
630 if !out.is_empty() && !out.ends_with(':') {
631 out.push(':');
632 }
633 out.push_str(&format!("{:x}", pieces[i]));
634 i += 1;
635 }
636 out
637}
638
639// ---------------------------------------------------------------------------
640// Host parsing
641// ---------------------------------------------------------------------------
642
643fn parse_host(input: &str, is_special: bool) -> Result<Host> {
644 if input.is_empty() {
645 if is_special {
646 return Err(UrlError::InvalidHost);
647 }
648 return Ok(Host::Domain(String::new()));
649 }
650
651 // IPv6
652 if input.starts_with('[') {
653 if !input.ends_with(']') {
654 return Err(UrlError::InvalidIpv6);
655 }
656 let inner = &input[1..input.len() - 1];
657 let pieces = parse_ipv6(inner)?;
658 return Ok(Host::Ipv6(pieces));
659 }
660
661 if !is_special {
662 let encoded = percent_encode(input, is_c0_control);
663 return Ok(Host::Domain(encoded));
664 }
665
666 // Domain — percent-decode then lowercase.
667 let decoded = percent_decode_string(input);
668 let lowered = decoded.to_ascii_lowercase();
669
670 // Check if it's an IPv4 address.
671 if ends_with_number(&lowered) {
672 match parse_ipv4(&lowered) {
673 Ok(addr) => return Ok(Host::Ipv4(addr)),
674 Err(_) => return Err(UrlError::InvalidHost),
675 }
676 }
677
678 // Validate domain characters.
679 for c in lowered.chars() {
680 if c == '\0'
681 || c == '\t'
682 || c == '\n'
683 || c == '\r'
684 || c == ' '
685 || c == '#'
686 || c == '/'
687 || c == ':'
688 || c == '<'
689 || c == '>'
690 || c == '?'
691 || c == '@'
692 || c == '['
693 || c == '\\'
694 || c == ']'
695 || c == '^'
696 || c == '|'
697 {
698 return Err(UrlError::InvalidHost);
699 }
700 }
701
702 Ok(Host::Domain(lowered))
703}
704
705/// Check if a domain string ends with a number (suggesting IPv4).
706fn ends_with_number(input: &str) -> bool {
707 let last_part = match input.rsplit('.').next() {
708 Some(p) => p,
709 None => return false,
710 };
711 if last_part.is_empty() {
712 return false;
713 }
714 if last_part.starts_with("0x") || last_part.starts_with("0X") {
715 return last_part[2..].chars().all(|c| c.is_ascii_hexdigit());
716 }
717 last_part.chars().all(|c| c.is_ascii_digit())
718}
719
720// ---------------------------------------------------------------------------
721// Shorten path helper
722// ---------------------------------------------------------------------------
723
724fn shorten_path(scheme: &str, path: &mut Vec<String>) {
725 if scheme == "file" && path.len() == 1 {
726 if let Some(first) = path.first() {
727 if is_normalized_windows_drive_letter(first) {
728 return;
729 }
730 }
731 }
732 path.pop();
733}
734
735fn is_normalized_windows_drive_letter(s: &str) -> bool {
736 let bytes = s.as_bytes();
737 bytes.len() == 2 && bytes[0].is_ascii_alphabetic() && bytes[1] == b':'
738}
739
740fn starts_with_windows_drive_letter(s: &str) -> bool {
741 let bytes = s.as_bytes();
742 if bytes.len() < 2 {
743 return false;
744 }
745 if !bytes[0].is_ascii_alphabetic() {
746 return false;
747 }
748 if bytes[1] != b':' && bytes[1] != b'|' {
749 return false;
750 }
751 if bytes.len() >= 3 {
752 matches!(bytes[2], b'/' | b'\\' | b'?' | b'#')
753 } else {
754 true
755 }
756}
757
758// ---------------------------------------------------------------------------
759// URL parser
760// ---------------------------------------------------------------------------
761
762fn parse_url(input: &str, base: Option<&Url>) -> Result<Url> {
763 // Strip leading/trailing C0 controls and spaces.
764 let input = input.trim_matches(|c: char| c <= '\u{0020}');
765
766 if input.is_empty() {
767 if let Some(base) = base {
768 return parse_relative("", base);
769 }
770 return Err(UrlError::EmptyInput);
771 }
772
773 // Remove tab and newline characters.
774 let input: String = input
775 .chars()
776 .filter(|&c| c != '\t' && c != '\n' && c != '\r')
777 .collect();
778
779 let chars: Vec<char> = input.chars().collect();
780 let len = chars.len();
781
782 let mut pointer = 0;
783
784 // Try to parse a scheme.
785 let mut scheme = String::new();
786 let mut has_scheme = false;
787
788 if pointer < len && chars[pointer].is_ascii_alphabetic() {
789 let mut temp = String::new();
790 temp.push(chars[pointer].to_ascii_lowercase());
791 let mut p = pointer + 1;
792 while p < len
793 && (chars[p].is_ascii_alphanumeric()
794 || chars[p] == '+'
795 || chars[p] == '-'
796 || chars[p] == '.')
797 {
798 temp.push(chars[p].to_ascii_lowercase());
799 p += 1;
800 }
801 if p < len && chars[p] == ':' {
802 scheme = temp;
803 has_scheme = true;
804 pointer = p + 1; // skip the ':'
805 }
806 }
807
808 if !has_scheme {
809 if let Some(base) = base {
810 return parse_relative(&input, base);
811 }
812 return Err(UrlError::MissingScheme);
813 }
814
815 let is_special = is_special_scheme(&scheme);
816
817 let mut url = Url {
818 scheme: scheme.clone(),
819 username: String::new(),
820 password: String::new(),
821 host: None,
822 port: None,
823 path: Vec::new(),
824 opaque_path: false,
825 query: None,
826 fragment: None,
827 };
828
829 let remaining: String = chars[pointer..].iter().collect();
830
831 if scheme == "file" {
832 return parse_file_url(&remaining, base, url);
833 }
834
835 if let Some(after_slashes) = remaining.strip_prefix("//") {
836 parse_authority_and_path(&mut url, after_slashes, is_special)?;
837 } else if is_special {
838 if let Some(base) = base {
839 if base.scheme == url.scheme {
840 return parse_relative_special(&remaining, base, url);
841 }
842 }
843 if let Some(after_slash) = remaining.strip_prefix('/') {
844 parse_authority_and_path(&mut url, after_slash, is_special)?;
845 } else {
846 parse_authority_and_path(&mut url, &remaining, is_special)?;
847 }
848 } else {
849 parse_opaque_or_path(&mut url, &remaining)?;
850 }
851
852 Ok(url)
853}
854
855fn parse_authority_and_path(url: &mut Url, input: &str, is_special: bool) -> Result<()> {
856 let authority_end = input
857 .find(|c: char| c == '/' || c == '?' || c == '#' || (is_special && c == '\\'))
858 .unwrap_or(input.len());
859
860 let authority = &input[..authority_end];
861 let rest = &input[authority_end..];
862
863 let (userinfo_part, hostport) = if let Some(at_pos) = authority.rfind('@') {
864 (&authority[..at_pos], &authority[at_pos + 1..])
865 } else {
866 ("", authority)
867 };
868
869 if !userinfo_part.is_empty() {
870 if let Some(colon_pos) = userinfo_part.find(':') {
871 url.username = percent_encode(&userinfo_part[..colon_pos], is_userinfo_encode);
872 url.password = percent_encode(&userinfo_part[colon_pos + 1..], is_userinfo_encode);
873 } else {
874 url.username = percent_encode(userinfo_part, is_userinfo_encode);
875 }
876 }
877
878 let (host_str, port_str) = split_host_port(hostport);
879
880 url.host = Some(parse_host(host_str, is_special)?);
881
882 if let Some(port_s) = port_str {
883 if !port_s.is_empty() {
884 let port: u16 = port_s.parse().map_err(|_| UrlError::InvalidPort)?;
885 if default_port(&url.scheme) != Some(port) {
886 url.port = Some(port);
887 }
888 }
889 }
890
891 parse_path_query_fragment(url, rest, is_special)
892}
893
894fn split_host_port(input: &str) -> (&str, Option<&str>) {
895 if input.starts_with('[') {
896 if let Some(bracket_end) = input.find(']') {
897 let host = &input[..bracket_end + 1];
898 let after = &input[bracket_end + 1..];
899 if let Some(port_str) = after.strip_prefix(':') {
900 return (host, Some(port_str));
901 }
902 return (host, None);
903 }
904 return (input, None);
905 }
906
907 if let Some(colon_pos) = input.rfind(':') {
908 let port_part = &input[colon_pos + 1..];
909 if port_part.is_empty() || port_part.chars().all(|c| c.is_ascii_digit()) {
910 return (&input[..colon_pos], Some(port_part));
911 }
912 }
913 (input, None)
914}
915
916fn parse_path_query_fragment(url: &mut Url, input: &str, is_special: bool) -> Result<()> {
917 let mut remaining = input;
918
919 let path_end = remaining.find(['?', '#']).unwrap_or(remaining.len());
920 let path_str = &remaining[..path_end];
921 remaining = &remaining[path_end..];
922
923 parse_path_into(url, path_str, is_special);
924
925 if let Some(after_q) = remaining.strip_prefix('?') {
926 remaining = after_q;
927 let query_end = remaining.find('#').unwrap_or(remaining.len());
928 let query_str = &remaining[..query_end];
929 remaining = &remaining[query_end..];
930
931 let encode_fn = if is_special {
932 is_special_query_encode
933 } else {
934 is_query_encode
935 };
936 url.query = Some(percent_encode(query_str, encode_fn));
937 }
938
939 if let Some(after_hash) = remaining.strip_prefix('#') {
940 url.fragment = Some(percent_encode(after_hash, is_fragment_encode));
941 }
942
943 Ok(())
944}
945
946fn parse_path_into(url: &mut Url, path: &str, is_special: bool) {
947 if path.is_empty() {
948 if is_special {
949 url.path = vec![String::new()];
950 }
951 return;
952 }
953
954 let segments: Vec<&str> = if is_special {
955 path.split(['/', '\\']).collect()
956 } else {
957 path.split('/').collect()
958 };
959
960 for (i, seg) in segments.iter().enumerate() {
961 if i == 0 && seg.is_empty() {
962 continue;
963 }
964
965 let decoded = *seg;
966 if decoded == "." || decoded.eq_ignore_ascii_case("%2e") {
967 if i == segments.len() - 1 {
968 url.path.push(String::new());
969 }
970 } else if decoded == ".."
971 || decoded.eq_ignore_ascii_case(".%2e")
972 || decoded.eq_ignore_ascii_case("%2e.")
973 || decoded.eq_ignore_ascii_case("%2e%2e")
974 {
975 shorten_path(&url.scheme, &mut url.path);
976 if i == segments.len() - 1 {
977 url.path.push(String::new());
978 }
979 } else {
980 url.path.push(percent_encode(decoded, is_path_encode));
981 }
982 }
983}
984
985fn parse_opaque_or_path(url: &mut Url, input: &str) -> Result<()> {
986 let mut remaining = input;
987
988 let path_end = remaining.find(['?', '#']).unwrap_or(remaining.len());
989 let path_str = &remaining[..path_end];
990 remaining = &remaining[path_end..];
991
992 if path_str.starts_with('/') {
993 url.opaque_path = false;
994 parse_path_into(url, path_str, false);
995 } else {
996 url.opaque_path = true;
997 url.path = vec![percent_encode(path_str, is_c0_control)];
998 }
999
1000 if let Some(after_q) = remaining.strip_prefix('?') {
1001 remaining = after_q;
1002 let query_end = remaining.find('#').unwrap_or(remaining.len());
1003 let query_str = &remaining[..query_end];
1004 remaining = &remaining[query_end..];
1005 url.query = Some(percent_encode(query_str, is_query_encode));
1006 }
1007
1008 if let Some(after_hash) = remaining.strip_prefix('#') {
1009 url.fragment = Some(percent_encode(after_hash, is_fragment_encode));
1010 }
1011
1012 Ok(())
1013}
1014
1015// ---------------------------------------------------------------------------
1016// Relative URL parsing
1017// ---------------------------------------------------------------------------
1018
1019fn parse_relative(input: &str, base: &Url) -> Result<Url> {
1020 let mut url = Url {
1021 scheme: base.scheme.clone(),
1022 username: base.username.clone(),
1023 password: base.password.clone(),
1024 host: base.host.clone(),
1025 port: base.port,
1026 path: base.path.clone(),
1027 opaque_path: base.opaque_path,
1028 query: base.query.clone(),
1029 fragment: None,
1030 };
1031
1032 let is_special = is_special_scheme(&url.scheme);
1033
1034 if input.is_empty() {
1035 return Ok(url);
1036 }
1037
1038 let chars: Vec<char> = input.chars().collect();
1039
1040 if chars[0] == '/' || (is_special && chars[0] == '\\') {
1041 if input.starts_with("//") || (is_special && input.starts_with("\\/")) {
1042 let after_slashes = &input[2..];
1043 url.username = String::new();
1044 url.password = String::new();
1045 url.path = Vec::new();
1046 url.query = None;
1047 parse_authority_and_path(&mut url, after_slashes, is_special)?;
1048 return Ok(url);
1049 }
1050 url.path = Vec::new();
1051 url.query = None;
1052 parse_path_query_fragment(&mut url, input, is_special)?;
1053 return Ok(url);
1054 }
1055
1056 if let Some(after_q) = input.strip_prefix('?') {
1057 url.query = None;
1058 url.fragment = None;
1059 let query_end = after_q.find('#').unwrap_or(after_q.len());
1060 let query_str = &after_q[..query_end];
1061 let after = &after_q[query_end..];
1062
1063 let encode_fn = if is_special {
1064 is_special_query_encode
1065 } else {
1066 is_query_encode
1067 };
1068 url.query = Some(percent_encode(query_str, encode_fn));
1069
1070 if let Some(frag) = after.strip_prefix('#') {
1071 url.fragment = Some(percent_encode(frag, is_fragment_encode));
1072 }
1073 return Ok(url);
1074 }
1075
1076 if let Some(frag) = input.strip_prefix('#') {
1077 url.fragment = Some(percent_encode(frag, is_fragment_encode));
1078 return Ok(url);
1079 }
1080
1081 // Path-relative.
1082 if !url.opaque_path {
1083 shorten_path(&url.scheme, &mut url.path);
1084 }
1085 url.query = None;
1086 url.fragment = None;
1087
1088 parse_path_query_fragment(&mut url, &format!("/{input}"), is_special)?;
1089 Ok(url)
1090}
1091
1092fn parse_relative_special(remaining: &str, base: &Url, mut url: Url) -> Result<Url> {
1093 url.username = base.username.clone();
1094 url.password = base.password.clone();
1095 url.host = base.host.clone();
1096 url.port = base.port;
1097 url.path = base.path.clone();
1098 url.query = base.query.clone();
1099
1100 let is_special = true;
1101
1102 if remaining.is_empty() {
1103 return Ok(url);
1104 }
1105
1106 if remaining.starts_with('/') || remaining.starts_with('\\') {
1107 url.path = Vec::new();
1108 url.query = None;
1109 parse_path_query_fragment(&mut url, remaining, is_special)?;
1110 return Ok(url);
1111 }
1112
1113 if let Some(rest) = remaining.strip_prefix('?') {
1114 url.query = None;
1115 url.fragment = None;
1116 let query_end = rest.find('#').unwrap_or(rest.len());
1117 url.query = Some(percent_encode(&rest[..query_end], is_special_query_encode));
1118 if query_end < rest.len() {
1119 url.fragment = Some(percent_encode(&rest[query_end + 1..], is_fragment_encode));
1120 }
1121 return Ok(url);
1122 }
1123
1124 if let Some(frag) = remaining.strip_prefix('#') {
1125 url.fragment = Some(percent_encode(frag, is_fragment_encode));
1126 return Ok(url);
1127 }
1128
1129 shorten_path(&url.scheme, &mut url.path);
1130 url.query = None;
1131 parse_path_query_fragment(&mut url, &format!("/{remaining}"), is_special)?;
1132 Ok(url)
1133}
1134
1135// ---------------------------------------------------------------------------
1136// File URL parsing
1137// ---------------------------------------------------------------------------
1138
1139fn parse_file_url(input: &str, base: Option<&Url>, mut url: Url) -> Result<Url> {
1140 url.host = Some(Host::Domain(String::new()));
1141
1142 let remaining = if let Some(after) = input.strip_prefix("//") {
1143 after
1144 } else if let Some(after) = input.strip_prefix('/') {
1145 after
1146 } else if let Some(base) = base {
1147 if base.scheme == "file" {
1148 url.host = base.host.clone();
1149 url.path = base.path.clone();
1150
1151 if let Some(rest) = input.strip_prefix('?') {
1152 url.query = None;
1153 url.fragment = None;
1154 let query_end = rest.find('#').unwrap_or(rest.len());
1155 url.query = Some(percent_encode(&rest[..query_end], is_query_encode));
1156 if query_end < rest.len() {
1157 url.fragment = Some(percent_encode(&rest[query_end + 1..], is_fragment_encode));
1158 }
1159 return Ok(url);
1160 }
1161
1162 if let Some(frag) = input.strip_prefix('#') {
1163 url.fragment = Some(percent_encode(frag, is_fragment_encode));
1164 return Ok(url);
1165 }
1166
1167 shorten_path(&url.scheme, &mut url.path);
1168 url.query = None;
1169 parse_path_query_fragment(&mut url, &format!("/{input}"), false)?;
1170 return Ok(url);
1171 } else {
1172 input
1173 }
1174 } else {
1175 input
1176 };
1177
1178 let path_start = remaining
1179 .find(['/', '\\', '?', '#'])
1180 .unwrap_or(remaining.len());
1181
1182 let potential_host = &remaining[..path_start];
1183 let rest = &remaining[path_start..];
1184
1185 if starts_with_windows_drive_letter(remaining) {
1186 url.host = Some(Host::Domain(String::new()));
1187 parse_path_query_fragment(&mut url, &format!("/{remaining}"), false)?;
1188 return Ok(url);
1189 }
1190
1191 if !potential_host.is_empty() {
1192 let host = parse_host(potential_host, false)?;
1193 if host != Host::Domain(String::new()) {
1194 url.host = Some(host);
1195 }
1196 }
1197
1198 parse_path_query_fragment(&mut url, rest, false)?;
1199
1200 // Normalize Windows drive letters in path.
1201 if let Some(first) = url.path.first_mut() {
1202 if first.len() == 2 {
1203 let bytes = first.as_bytes();
1204 if bytes[0].is_ascii_alphabetic() && bytes[1] == b'|' {
1205 let mut normalized = String::new();
1206 normalized.push(bytes[0] as char);
1207 normalized.push(':');
1208 *first = normalized;
1209 }
1210 }
1211 }
1212
1213 Ok(url)
1214}
1215
1216// ---------------------------------------------------------------------------
1217// Tests
1218// ---------------------------------------------------------------------------
1219
1220#[cfg(test)]
1221mod tests {
1222 use super::*;
1223
1224 // -------------------------------------------------------------------
1225 // Basic absolute URL parsing
1226 // -------------------------------------------------------------------
1227
1228 #[test]
1229 fn parse_simple_http() {
1230 let url = Url::parse("http://example.com").unwrap();
1231 assert_eq!(url.scheme(), "http");
1232 assert_eq!(url.host_str(), Some("example.com".into()));
1233 assert_eq!(url.port(), None);
1234 assert_eq!(url.path(), "/");
1235 assert_eq!(url.query(), None);
1236 assert_eq!(url.fragment(), None);
1237 }
1238
1239 #[test]
1240 fn parse_https_with_path() {
1241 let url = Url::parse("https://example.com/foo/bar").unwrap();
1242 assert_eq!(url.scheme(), "https");
1243 assert_eq!(url.host_str(), Some("example.com".into()));
1244 assert_eq!(url.path(), "/foo/bar");
1245 }
1246
1247 #[test]
1248 fn parse_full_url() {
1249 let url =
1250 Url::parse("https://user:pass@example.com:8080/path/to/page?q=1&r=2#frag").unwrap();
1251 assert_eq!(url.scheme(), "https");
1252 assert_eq!(url.username(), "user");
1253 assert_eq!(url.password(), "pass");
1254 assert_eq!(url.host_str(), Some("example.com".into()));
1255 assert_eq!(url.port(), Some(8080));
1256 assert_eq!(url.path(), "/path/to/page");
1257 assert_eq!(url.query(), Some("q=1&r=2"));
1258 assert_eq!(url.fragment(), Some("frag"));
1259 }
1260
1261 #[test]
1262 fn parse_default_port_omitted() {
1263 let url = Url::parse("http://example.com:80/").unwrap();
1264 assert_eq!(url.port(), None);
1265 assert_eq!(url.port_or_default(), Some(80));
1266 }
1267
1268 #[test]
1269 fn parse_non_default_port() {
1270 let url = Url::parse("http://example.com:8080/").unwrap();
1271 assert_eq!(url.port(), Some(8080));
1272 }
1273
1274 #[test]
1275 fn parse_https_default_port() {
1276 let url = Url::parse("https://example.com:443/").unwrap();
1277 assert_eq!(url.port(), None);
1278 }
1279
1280 #[test]
1281 fn parse_ftp_default_port() {
1282 let url = Url::parse("ftp://files.example.com:21/readme.txt").unwrap();
1283 assert_eq!(url.port(), None);
1284 assert_eq!(url.port_or_default(), Some(21));
1285 }
1286
1287 // -------------------------------------------------------------------
1288 // Scheme handling
1289 // -------------------------------------------------------------------
1290
1291 #[test]
1292 fn scheme_is_lowercased() {
1293 let url = Url::parse("HTTP://EXAMPLE.COM").unwrap();
1294 assert_eq!(url.scheme(), "http");
1295 }
1296
1297 #[test]
1298 fn non_special_scheme() {
1299 let url = Url::parse("custom://host/path").unwrap();
1300 assert_eq!(url.scheme(), "custom");
1301 assert_eq!(url.host_str(), Some("host".into()));
1302 assert_eq!(url.path(), "/path");
1303 }
1304
1305 #[test]
1306 fn data_uri() {
1307 let url = Url::parse("data:text/html,<h1>Hello</h1>").unwrap();
1308 assert_eq!(url.scheme(), "data");
1309 assert!(url.cannot_be_a_base());
1310 }
1311
1312 #[test]
1313 fn javascript_uri() {
1314 let url = Url::parse("javascript:alert(1)").unwrap();
1315 assert_eq!(url.scheme(), "javascript");
1316 assert!(url.cannot_be_a_base());
1317 }
1318
1319 #[test]
1320 fn mailto_uri() {
1321 let url = Url::parse("mailto:user@example.com").unwrap();
1322 assert_eq!(url.scheme(), "mailto");
1323 assert!(url.cannot_be_a_base());
1324 }
1325
1326 // -------------------------------------------------------------------
1327 // Host parsing
1328 // -------------------------------------------------------------------
1329
1330 #[test]
1331 fn host_is_lowercased() {
1332 let url = Url::parse("http://EXAMPLE.COM/").unwrap();
1333 assert_eq!(url.host_str(), Some("example.com".into()));
1334 }
1335
1336 #[test]
1337 fn ipv4_host() {
1338 let url = Url::parse("http://127.0.0.1/").unwrap();
1339 assert_eq!(url.host(), Some(&Host::Ipv4(0x7F000001)));
1340 assert_eq!(url.host_str(), Some("127.0.0.1".into()));
1341 }
1342
1343 #[test]
1344 fn ipv4_host_all_zeros() {
1345 let url = Url::parse("http://0.0.0.0/").unwrap();
1346 assert_eq!(url.host(), Some(&Host::Ipv4(0)));
1347 }
1348
1349 #[test]
1350 fn ipv6_host() {
1351 let url = Url::parse("http://[::1]/").unwrap();
1352 assert_eq!(url.host(), Some(&Host::Ipv6([0, 0, 0, 0, 0, 0, 0, 1])));
1353 }
1354
1355 #[test]
1356 fn ipv6_full() {
1357 let url = Url::parse("http://[2001:db8:85a3:0:0:8a2e:370:7334]/").unwrap();
1358 assert_eq!(
1359 url.host(),
1360 Some(&Host::Ipv6([
1361 0x2001, 0x0db8, 0x85a3, 0, 0, 0x8a2e, 0x0370, 0x7334
1362 ]))
1363 );
1364 }
1365
1366 #[test]
1367 fn ipv6_serialization_compressed() {
1368 let url = Url::parse("http://[2001:db8::1]/").unwrap();
1369 assert_eq!(url.host_str(), Some("[2001:db8::1]".into()));
1370 }
1371
1372 #[test]
1373 fn ipv6_all_zeros() {
1374 let url = Url::parse("http://[::]/").unwrap();
1375 assert_eq!(url.host(), Some(&Host::Ipv6([0; 8])));
1376 assert_eq!(url.host_str(), Some("[::]".into()));
1377 }
1378
1379 #[test]
1380 fn ipv6_loopback() {
1381 let pieces = parse_ipv6("::1").unwrap();
1382 assert_eq!(pieces, [0, 0, 0, 0, 0, 0, 0, 1]);
1383 }
1384
1385 #[test]
1386 fn ipv6_with_ipv4() {
1387 let pieces = parse_ipv6("::ffff:192.168.1.1").unwrap();
1388 assert_eq!(pieces, [0, 0, 0, 0, 0, 0xffff, 0xc0a8, 0x0101]);
1389 }
1390
1391 // -------------------------------------------------------------------
1392 // IPv4 parsing
1393 // -------------------------------------------------------------------
1394
1395 #[test]
1396 fn ipv4_basic() {
1397 assert_eq!(parse_ipv4("192.168.1.1").unwrap(), 0xC0A80101);
1398 }
1399
1400 #[test]
1401 fn ipv4_hex() {
1402 assert_eq!(parse_ipv4("0xC0.0xA8.0x01.0x01").unwrap(), 0xC0A80101);
1403 }
1404
1405 #[test]
1406 fn ipv4_octal() {
1407 assert_eq!(parse_ipv4("0300.0250.01.01").unwrap(), 0xC0A80101);
1408 }
1409
1410 #[test]
1411 fn ipv4_single_number() {
1412 assert!(parse_ipv4("3232235777").is_err());
1413 }
1414
1415 #[test]
1416 fn ipv4_two_parts() {
1417 // Two parts: first is top 8 bits, second is bottom 24 bits.
1418 // 192.168.1.1 => 168*65536 + 1*256 + 1 = 11010305
1419 assert_eq!(parse_ipv4("192.11010305").unwrap(), 0xC0A80101);
1420 }
1421
1422 #[test]
1423 fn ipv4_reject_overflow() {
1424 assert!(parse_ipv4("256.0.0.0").is_err());
1425 }
1426
1427 #[test]
1428 fn ipv4_reject_empty_part() {
1429 assert!(parse_ipv4("1..1.1").is_err());
1430 }
1431
1432 // -------------------------------------------------------------------
1433 // Percent encoding/decoding
1434 // -------------------------------------------------------------------
1435
1436 #[test]
1437 fn percent_decode_basic() {
1438 assert_eq!(percent_decode_string("%48%65%6C%6C%6F"), "Hello");
1439 }
1440
1441 #[test]
1442 fn percent_decode_mixed() {
1443 assert_eq!(percent_decode_string("Hello%20World"), "Hello World");
1444 }
1445
1446 #[test]
1447 fn percent_decode_passthrough() {
1448 assert_eq!(percent_decode_string("no-encoding"), "no-encoding");
1449 }
1450
1451 #[test]
1452 fn percent_decode_partial() {
1453 assert_eq!(percent_decode_string("100%"), "100%");
1454 assert_eq!(percent_decode_string("%2"), "%2");
1455 }
1456
1457 #[test]
1458 fn percent_encode_userinfo() {
1459 let encoded = percent_encode("user@host", is_userinfo_encode);
1460 assert_eq!(encoded, "user%40host");
1461 }
1462
1463 #[test]
1464 fn percent_encode_path() {
1465 let encoded = percent_encode("hello world", is_path_encode);
1466 assert_eq!(encoded, "hello%20world");
1467 }
1468
1469 // -------------------------------------------------------------------
1470 // Path parsing and dot segments
1471 // -------------------------------------------------------------------
1472
1473 #[test]
1474 fn path_dot_removal() {
1475 let url = Url::parse("http://example.com/a/b/../c").unwrap();
1476 assert_eq!(url.path(), "/a/c");
1477 }
1478
1479 #[test]
1480 fn path_dot_current() {
1481 let url = Url::parse("http://example.com/a/./b").unwrap();
1482 assert_eq!(url.path(), "/a/b");
1483 }
1484
1485 #[test]
1486 fn path_multiple_dots() {
1487 let url = Url::parse("http://example.com/a/b/c/../../d").unwrap();
1488 assert_eq!(url.path(), "/a/d");
1489 }
1490
1491 #[test]
1492 fn path_trailing_slash() {
1493 let url = Url::parse("http://example.com/a/b/").unwrap();
1494 assert_eq!(url.path(), "/a/b/");
1495 }
1496
1497 #[test]
1498 fn path_empty() {
1499 let url = Url::parse("http://example.com").unwrap();
1500 assert_eq!(url.path(), "/");
1501 }
1502
1503 #[test]
1504 fn path_double_dot_at_root() {
1505 let url = Url::parse("http://example.com/../a").unwrap();
1506 assert_eq!(url.path(), "/a");
1507 }
1508
1509 // -------------------------------------------------------------------
1510 // Relative URL resolution
1511 // -------------------------------------------------------------------
1512
1513 #[test]
1514 fn relative_path() {
1515 let base = Url::parse("http://example.com/a/b/c").unwrap();
1516 let url = Url::parse_with_base("d", &base).unwrap();
1517 assert_eq!(url.path(), "/a/b/d");
1518 assert_eq!(url.host_str(), Some("example.com".into()));
1519 }
1520
1521 #[test]
1522 fn relative_path_with_dots() {
1523 let base = Url::parse("http://example.com/a/b/c").unwrap();
1524 let url = Url::parse_with_base("../d", &base).unwrap();
1525 assert_eq!(url.path(), "/a/d");
1526 }
1527
1528 #[test]
1529 fn relative_absolute_path() {
1530 let base = Url::parse("http://example.com/a/b/c").unwrap();
1531 let url = Url::parse_with_base("/d/e", &base).unwrap();
1532 assert_eq!(url.path(), "/d/e");
1533 assert_eq!(url.host_str(), Some("example.com".into()));
1534 }
1535
1536 #[test]
1537 fn relative_query_only() {
1538 let base = Url::parse("http://example.com/a/b?old=1").unwrap();
1539 let url = Url::parse_with_base("?new=2", &base).unwrap();
1540 assert_eq!(url.path(), "/a/b");
1541 assert_eq!(url.query(), Some("new=2"));
1542 }
1543
1544 #[test]
1545 fn relative_fragment_only() {
1546 let base = Url::parse("http://example.com/a/b#old").unwrap();
1547 let url = Url::parse_with_base("#new", &base).unwrap();
1548 assert_eq!(url.path(), "/a/b");
1549 assert_eq!(url.fragment(), Some("new"));
1550 }
1551
1552 #[test]
1553 fn relative_authority_override() {
1554 let base = Url::parse("http://example.com/a/b").unwrap();
1555 let url = Url::parse_with_base("//other.com/c", &base).unwrap();
1556 assert_eq!(url.scheme(), "http");
1557 assert_eq!(url.host_str(), Some("other.com".into()));
1558 assert_eq!(url.path(), "/c");
1559 }
1560
1561 #[test]
1562 fn absolute_url_ignores_base() {
1563 let base = Url::parse("http://example.com/a").unwrap();
1564 let url = Url::parse_with_base("https://other.com/b", &base).unwrap();
1565 assert_eq!(url.scheme(), "https");
1566 assert_eq!(url.host_str(), Some("other.com".into()));
1567 assert_eq!(url.path(), "/b");
1568 }
1569
1570 #[test]
1571 fn relative_empty_string() {
1572 let base = Url::parse("http://example.com/a/b?q=1#f").unwrap();
1573 let url = Url::parse_with_base("", &base).unwrap();
1574 assert_eq!(url.path(), "/a/b");
1575 assert_eq!(url.query(), Some("q=1"));
1576 assert_eq!(url.fragment(), None);
1577 }
1578
1579 // -------------------------------------------------------------------
1580 // Serialization
1581 // -------------------------------------------------------------------
1582
1583 #[test]
1584 fn serialize_simple() {
1585 let url = Url::parse("http://example.com/path").unwrap();
1586 assert_eq!(url.serialize(), "http://example.com/path");
1587 }
1588
1589 #[test]
1590 fn serialize_with_credentials() {
1591 let url = Url::parse("http://user:pass@example.com/").unwrap();
1592 assert_eq!(url.serialize(), "http://user:pass@example.com/");
1593 }
1594
1595 #[test]
1596 fn serialize_with_port() {
1597 let url = Url::parse("http://example.com:8080/").unwrap();
1598 assert_eq!(url.serialize(), "http://example.com:8080/");
1599 }
1600
1601 #[test]
1602 fn serialize_with_query_fragment() {
1603 let url = Url::parse("http://example.com/path?q=1#frag").unwrap();
1604 assert_eq!(url.serialize(), "http://example.com/path?q=1#frag");
1605 }
1606
1607 #[test]
1608 fn serialize_data_uri() {
1609 let url = Url::parse("data:text/html,hello").unwrap();
1610 assert_eq!(url.serialize(), "data:text/html,hello");
1611 }
1612
1613 #[test]
1614 fn roundtrip_full_url() {
1615 let input = "https://user:pass@example.com:8080/a/b?q=1#frag";
1616 let url = Url::parse(input).unwrap();
1617 assert_eq!(url.serialize(), input);
1618 }
1619
1620 #[test]
1621 fn roundtrip_ipv4() {
1622 let url = Url::parse("http://192.168.1.1/path").unwrap();
1623 assert_eq!(url.serialize(), "http://192.168.1.1/path");
1624 }
1625
1626 #[test]
1627 fn roundtrip_ipv6() {
1628 let url = Url::parse("http://[::1]/path").unwrap();
1629 assert_eq!(url.serialize(), "http://[::1]/path");
1630 }
1631
1632 // -------------------------------------------------------------------
1633 // Origin
1634 // -------------------------------------------------------------------
1635
1636 #[test]
1637 fn origin_http() {
1638 let url = Url::parse("http://example.com:8080/path").unwrap();
1639 match url.origin() {
1640 Origin::Tuple(scheme, host, port) => {
1641 assert_eq!(scheme, "http");
1642 assert_eq!(host, Host::Domain("example.com".into()));
1643 assert_eq!(port, Some(8080));
1644 }
1645 _ => panic!("expected tuple origin"),
1646 }
1647 }
1648
1649 #[test]
1650 fn origin_https_default_port() {
1651 let url = Url::parse("https://example.com/").unwrap();
1652 match url.origin() {
1653 Origin::Tuple(scheme, host, port) => {
1654 assert_eq!(scheme, "https");
1655 assert_eq!(host, Host::Domain("example.com".into()));
1656 assert_eq!(port, None);
1657 }
1658 _ => panic!("expected tuple origin"),
1659 }
1660 }
1661
1662 #[test]
1663 fn origin_data_is_opaque() {
1664 let url = Url::parse("data:text/html,hello").unwrap();
1665 assert_eq!(url.origin(), Origin::Opaque);
1666 }
1667
1668 // -------------------------------------------------------------------
1669 // File URLs
1670 // -------------------------------------------------------------------
1671
1672 #[test]
1673 fn file_url_unix() {
1674 let url = Url::parse("file:///home/user/file.txt").unwrap();
1675 assert_eq!(url.scheme(), "file");
1676 assert_eq!(url.host_str(), Some("".into()));
1677 assert_eq!(url.path(), "/home/user/file.txt");
1678 }
1679
1680 #[test]
1681 fn file_url_windows_drive() {
1682 let url = Url::parse("file:///C:/Windows/system32").unwrap();
1683 assert_eq!(url.scheme(), "file");
1684 assert_eq!(url.path(), "/C:/Windows/system32");
1685 }
1686
1687 #[test]
1688 fn file_url_with_host() {
1689 let url = Url::parse("file://server/share/file.txt").unwrap();
1690 assert_eq!(url.scheme(), "file");
1691 assert_eq!(url.host_str(), Some("server".into()));
1692 assert_eq!(url.path(), "/share/file.txt");
1693 }
1694
1695 // -------------------------------------------------------------------
1696 // Edge cases
1697 // -------------------------------------------------------------------
1698
1699 #[test]
1700 fn empty_input_fails() {
1701 assert_eq!(Url::parse(""), Err(UrlError::EmptyInput));
1702 }
1703
1704 #[test]
1705 fn whitespace_only_fails() {
1706 assert_eq!(Url::parse(" "), Err(UrlError::EmptyInput));
1707 }
1708
1709 #[test]
1710 fn missing_scheme_fails() {
1711 assert!(Url::parse("example.com").is_err());
1712 }
1713
1714 #[test]
1715 fn leading_whitespace_stripped() {
1716 let url = Url::parse(" http://example.com ").unwrap();
1717 assert_eq!(url.host_str(), Some("example.com".into()));
1718 }
1719
1720 #[test]
1721 fn tab_newline_stripped() {
1722 let url = Url::parse("http://exa\tmple\n.com/").unwrap();
1723 assert_eq!(url.host_str(), Some("example.com".into()));
1724 }
1725
1726 #[test]
1727 fn query_with_special_chars() {
1728 let url = Url::parse("http://example.com/?key=val ue&foo=bar").unwrap();
1729 assert!(url.query().unwrap().contains("key=val%20ue"));
1730 }
1731
1732 #[test]
1733 fn fragment_with_special_chars() {
1734 let url = Url::parse("http://example.com/#sec tion").unwrap();
1735 assert!(url.fragment().unwrap().contains("sec%20tion"));
1736 }
1737
1738 #[test]
1739 fn username_only() {
1740 let url = Url::parse("http://user@example.com/").unwrap();
1741 assert_eq!(url.username(), "user");
1742 assert_eq!(url.password(), "");
1743 assert!(url.has_credentials());
1744 }
1745
1746 #[test]
1747 fn no_credentials() {
1748 let url = Url::parse("http://example.com/").unwrap();
1749 assert!(!url.has_credentials());
1750 }
1751
1752 #[test]
1753 fn port_overflow_fails() {
1754 assert!(Url::parse("http://example.com:99999/").is_err());
1755 }
1756
1757 #[test]
1758 fn ws_scheme() {
1759 let url = Url::parse("ws://example.com/chat").unwrap();
1760 assert_eq!(url.scheme(), "ws");
1761 assert_eq!(url.port_or_default(), Some(80));
1762 }
1763
1764 #[test]
1765 fn wss_scheme() {
1766 let url = Url::parse("wss://example.com/chat").unwrap();
1767 assert_eq!(url.scheme(), "wss");
1768 assert_eq!(url.port_or_default(), Some(443));
1769 }
1770
1771 #[test]
1772 fn cannot_be_a_base() {
1773 let url = Url::parse("data:text/html,hello").unwrap();
1774 assert!(url.cannot_be_a_base());
1775 }
1776
1777 #[test]
1778 fn http_can_be_a_base() {
1779 let url = Url::parse("http://example.com/").unwrap();
1780 assert!(!url.cannot_be_a_base());
1781 }
1782
1783 // -------------------------------------------------------------------
1784 // Display/ToString
1785 // -------------------------------------------------------------------
1786
1787 #[test]
1788 fn display_matches_serialize() {
1789 let url = Url::parse("https://example.com:8443/path?q=1#f").unwrap();
1790 assert_eq!(format!("{url}"), url.serialize());
1791 }
1792
1793 // -------------------------------------------------------------------
1794 // Multiple path segments
1795 // -------------------------------------------------------------------
1796
1797 #[test]
1798 fn path_segments() {
1799 let url = Url::parse("http://example.com/a/b/c").unwrap();
1800 assert_eq!(url.path_segments(), &["a", "b", "c"]);
1801 }
1802
1803 #[test]
1804 fn path_segments_trailing_slash() {
1805 let url = Url::parse("http://example.com/a/b/").unwrap();
1806 assert_eq!(url.path_segments(), &["a", "b", ""]);
1807 }
1808
1809 // -------------------------------------------------------------------
1810 // Host type
1811 // -------------------------------------------------------------------
1812
1813 #[test]
1814 fn host_serialize_domain() {
1815 let h = Host::Domain("example.com".into());
1816 assert_eq!(h.serialize(), "example.com");
1817 }
1818
1819 #[test]
1820 fn host_serialize_ipv4() {
1821 let h = Host::Ipv4(0x7F000001);
1822 assert_eq!(h.serialize(), "127.0.0.1");
1823 }
1824
1825 #[test]
1826 fn host_serialize_ipv6() {
1827 let h = Host::Ipv6([0, 0, 0, 0, 0, 0, 0, 1]);
1828 assert_eq!(h.serialize(), "[::1]");
1829 }
1830
1831 // -------------------------------------------------------------------
1832 // IPv6 serialization
1833 // -------------------------------------------------------------------
1834
1835 #[test]
1836 fn ipv6_serialize_full() {
1837 let pieces = [
1838 0x2001, 0x0db8, 0x85a3, 0x0001, 0x0002, 0x8a2e, 0x0370, 0x7334,
1839 ];
1840 assert_eq!(serialize_ipv6(&pieces), "2001:db8:85a3:1:2:8a2e:370:7334");
1841 }
1842
1843 #[test]
1844 fn ipv6_serialize_compress() {
1845 let pieces = [0x2001, 0x0db8, 0, 0, 0, 0, 0, 1];
1846 assert_eq!(serialize_ipv6(&pieces), "2001:db8::1");
1847 }
1848
1849 #[test]
1850 fn ipv6_serialize_all_zeros() {
1851 let pieces = [0u16; 8];
1852 assert_eq!(serialize_ipv6(&pieces), "::");
1853 }
1854
1855 #[test]
1856 fn ipv6_serialize_no_compress_single_zero() {
1857 let pieces = [1, 0, 2, 0, 3, 0, 4, 0];
1858 assert_eq!(serialize_ipv6(&pieces), "1:0:2:0:3:0:4:0");
1859 }
1860
1861 // -------------------------------------------------------------------
1862 // Percent encoding edge cases
1863 // -------------------------------------------------------------------
1864
1865 #[test]
1866 fn percent_encode_preserves_unreserved() {
1867 let encoded = percent_encode("hello-world_test.page~1", is_path_encode);
1868 assert_eq!(encoded, "hello-world_test.page~1");
1869 }
1870
1871 #[test]
1872 fn percent_encode_multibyte_utf8() {
1873 let encoded = percent_encode("café", is_path_encode);
1874 assert_eq!(encoded, "caf%C3%A9");
1875 }
1876}