we (web engine): Experimental web browser project to understand the limits of Claude
at e2e-page-loading 1878 lines 57 kB view raw
1//! WHATWG URL parser. 2//! 3//! Implements the URL Standard (<https://url.spec.whatwg.org/>): 4//! - URL record type with scheme, username, password, host, port, path, query, fragment 5//! - State-machine parser following the spec 6//! - Host parsing: domains, IPv4 addresses, IPv6 addresses 7//! - Percent-encoding and decoding (UTF-8) 8//! - Special scheme handling (http, https, ftp, ws, wss, file) 9//! - Relative URL resolution via base URL 10//! - URL serialization 11//! - Origin derivation 12 13pub mod data_url; 14 15use core::fmt; 16 17// --------------------------------------------------------------------------- 18// Error types 19// --------------------------------------------------------------------------- 20 21#[derive(Debug, Clone, PartialEq, Eq)] 22pub enum UrlError { 23 /// Input is empty or contains only whitespace. 24 EmptyInput, 25 /// Invalid URL syntax. 26 InvalidUrl, 27 /// Invalid scheme. 28 InvalidScheme, 29 /// Invalid authority. 30 InvalidAuthority, 31 /// Invalid host. 32 InvalidHost, 33 /// Invalid port number. 34 InvalidPort, 35 /// Invalid IPv4 address. 36 InvalidIpv4, 37 /// Invalid IPv6 address. 38 InvalidIpv6, 39 /// Invalid percent-encoding. 40 InvalidPercentEncoding, 41 /// Relative URL without a base. 42 RelativeWithoutBase, 43 /// Missing scheme. 44 MissingScheme, 45} 46 47impl fmt::Display for UrlError { 48 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 49 match self { 50 Self::EmptyInput => write!(f, "empty input"), 51 Self::InvalidUrl => write!(f, "invalid URL"), 52 Self::InvalidScheme => write!(f, "invalid scheme"), 53 Self::InvalidAuthority => write!(f, "invalid authority"), 54 Self::InvalidHost => write!(f, "invalid host"), 55 Self::InvalidPort => write!(f, "invalid port number"), 56 Self::InvalidIpv4 => write!(f, "invalid IPv4 address"), 57 Self::InvalidIpv6 => write!(f, "invalid IPv6 address"), 58 Self::InvalidPercentEncoding => write!(f, "invalid percent-encoding"), 59 Self::RelativeWithoutBase => write!(f, "relative URL without a base"), 60 Self::MissingScheme => write!(f, "missing scheme"), 61 } 62 } 63} 64 65pub type Result<T> = core::result::Result<T, UrlError>; 66 67// --------------------------------------------------------------------------- 68// Host 69// --------------------------------------------------------------------------- 70 71/// A parsed URL host. 72#[derive(Debug, Clone, PartialEq, Eq)] 73pub enum Host { 74 /// A domain name (already lowercased). 75 Domain(String), 76 /// An IPv4 address. 77 Ipv4(u32), 78 /// An IPv6 address (128 bits as eight 16-bit pieces). 79 Ipv6([u16; 8]), 80} 81 82impl Host { 83 /// Serialize the host to a string. 84 pub fn serialize(&self) -> String { 85 match self { 86 Host::Domain(d) => d.clone(), 87 Host::Ipv4(addr) => serialize_ipv4(*addr), 88 Host::Ipv6(pieces) => format!("[{}]", serialize_ipv6(pieces)), 89 } 90 } 91} 92 93impl fmt::Display for Host { 94 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 95 write!(f, "{}", self.serialize()) 96 } 97} 98 99// --------------------------------------------------------------------------- 100// Origin 101// --------------------------------------------------------------------------- 102 103/// A URL origin (scheme, host, port). 104#[derive(Debug, Clone, PartialEq, Eq)] 105pub enum Origin { 106 /// A tuple origin (scheme, host, port). 107 Tuple(String, Host, Option<u16>), 108 /// An opaque origin (unique, not equal to anything). 109 Opaque, 110} 111 112// --------------------------------------------------------------------------- 113// URL record 114// --------------------------------------------------------------------------- 115 116/// A parsed URL record per the WHATWG URL Standard. 117#[derive(Debug, Clone, PartialEq, Eq)] 118pub struct Url { 119 /// The scheme (e.g., "http", "https", "file"). 120 pub scheme: String, 121 /// The username (percent-encoded). 122 username: String, 123 /// The password (percent-encoded). 124 password: String, 125 /// The host. 126 pub host: Option<Host>, 127 /// The port (None = default or absent). 128 pub port: Option<u16>, 129 /// Path segments. For non-opaque paths, these are the segments. 130 /// For opaque paths (cannot-be-a-base URL), this is a single element. 131 path: Vec<String>, 132 /// Whether this URL has an opaque path (cannot-be-a-base URL). 133 opaque_path: bool, 134 /// The query string (without leading '?'). 135 pub query: Option<String>, 136 /// The fragment (without leading '#'). 137 pub fragment: Option<String>, 138} 139 140impl Url { 141 /// Parse a URL string. 142 pub fn parse(input: &str) -> Result<Self> { 143 parse_url(input, None) 144 } 145 146 /// Parse a URL string with a base URL for resolving relative references. 147 pub fn parse_with_base(input: &str, base: &Url) -> Result<Self> { 148 parse_url(input, Some(base)) 149 } 150 151 /// Get the scheme. 152 pub fn scheme(&self) -> &str { 153 &self.scheme 154 } 155 156 /// Get the username (percent-encoded). 157 pub fn username(&self) -> &str { 158 &self.username 159 } 160 161 /// Get the password (percent-encoded). 162 pub fn password(&self) -> &str { 163 &self.password 164 } 165 166 /// Get the host. 167 pub fn host(&self) -> Option<&Host> { 168 self.host.as_ref() 169 } 170 171 /// Get the host as a string. 172 pub fn host_str(&self) -> Option<String> { 173 self.host.as_ref().map(|h| h.serialize()) 174 } 175 176 /// Get the port. 177 pub fn port(&self) -> Option<u16> { 178 self.port 179 } 180 181 /// Get the port or the default port for the scheme. 182 pub fn port_or_default(&self) -> Option<u16> { 183 self.port.or_else(|| default_port(&self.scheme)) 184 } 185 186 /// Get the path as a string. 187 pub fn path(&self) -> String { 188 if self.opaque_path { 189 self.path.first().cloned().unwrap_or_default() 190 } else { 191 let mut s = String::new(); 192 for seg in &self.path { 193 s.push('/'); 194 s.push_str(seg); 195 } 196 if s.is_empty() { 197 s.push('/'); 198 } 199 s 200 } 201 } 202 203 /// Get the path segments. 204 pub fn path_segments(&self) -> &[String] { 205 &self.path 206 } 207 208 /// Get the query string. 209 pub fn query(&self) -> Option<&str> { 210 self.query.as_deref() 211 } 212 213 /// Get the fragment. 214 pub fn fragment(&self) -> Option<&str> { 215 self.fragment.as_deref() 216 } 217 218 /// Whether this URL has an opaque path (cannot-be-a-base). 219 pub fn cannot_be_a_base(&self) -> bool { 220 self.opaque_path 221 } 222 223 /// Whether this URL includes credentials. 224 pub fn has_credentials(&self) -> bool { 225 !self.username.is_empty() || !self.password.is_empty() 226 } 227 228 /// Derive the origin of this URL. 229 pub fn origin(&self) -> Origin { 230 match self.scheme.as_str() { 231 "http" | "https" | "ws" | "wss" | "ftp" => { 232 if let Some(host) = &self.host { 233 Origin::Tuple(self.scheme.clone(), host.clone(), self.port) 234 } else { 235 Origin::Opaque 236 } 237 } 238 _ => Origin::Opaque, 239 } 240 } 241 242 /// Serialize this URL to a string (the href). 243 pub fn serialize(&self) -> String { 244 let mut output = String::new(); 245 output.push_str(&self.scheme); 246 output.push(':'); 247 248 if self.host.is_some() { 249 output.push_str("//"); 250 if self.has_credentials() { 251 output.push_str(&self.username); 252 if !self.password.is_empty() { 253 output.push(':'); 254 output.push_str(&self.password); 255 } 256 output.push('@'); 257 } 258 if let Some(ref host) = self.host { 259 output.push_str(&host.serialize()); 260 } 261 if let Some(port) = self.port { 262 output.push(':'); 263 output.push_str(&port.to_string()); 264 } 265 } else if !self.opaque_path && self.scheme == "file" { 266 output.push_str("//"); 267 } 268 269 output.push_str(&self.path()); 270 271 if let Some(ref query) = self.query { 272 output.push('?'); 273 output.push_str(query); 274 } 275 if let Some(ref fragment) = self.fragment { 276 output.push('#'); 277 output.push_str(fragment); 278 } 279 280 output 281 } 282} 283 284impl fmt::Display for Url { 285 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 286 write!(f, "{}", self.serialize()) 287 } 288} 289 290// --------------------------------------------------------------------------- 291// Special schemes 292// --------------------------------------------------------------------------- 293 294/// Whether a scheme is "special" per the URL standard. 295fn is_special_scheme(scheme: &str) -> bool { 296 matches!(scheme, "http" | "https" | "ftp" | "ws" | "wss" | "file") 297} 298 299/// Default port for a special scheme. 300fn default_port(scheme: &str) -> Option<u16> { 301 match scheme { 302 "http" | "ws" => Some(80), 303 "https" | "wss" => Some(443), 304 "ftp" => Some(21), 305 _ => None, 306 } 307} 308 309// --------------------------------------------------------------------------- 310// Percent encoding / decoding 311// --------------------------------------------------------------------------- 312 313/// The C0 control percent-encode set. 314fn is_c0_control(c: char) -> bool { 315 c <= '\u{001F}' || c > '\u{007E}' 316} 317 318/// The fragment percent-encode set. 319fn is_fragment_encode(c: char) -> bool { 320 is_c0_control(c) || c == ' ' || c == '"' || c == '<' || c == '>' || c == '`' 321} 322 323/// The query percent-encode set. 324fn is_query_encode(c: char) -> bool { 325 is_c0_control(c) || c == ' ' || c == '"' || c == '#' || c == '<' || c == '>' 326} 327 328/// The special query percent-encode set. 329fn is_special_query_encode(c: char) -> bool { 330 is_query_encode(c) || c == '\'' 331} 332 333/// The path percent-encode set. 334fn is_path_encode(c: char) -> bool { 335 is_query_encode(c) || c == '?' || c == '`' || c == '{' || c == '}' 336} 337 338/// The userinfo percent-encode set. 339fn is_userinfo_encode(c: char) -> bool { 340 is_path_encode(c) 341 || c == '/' 342 || c == ':' 343 || c == ';' 344 || c == '=' 345 || c == '@' 346 || c == '[' 347 || c == '\\' 348 || c == ']' 349 || c == '^' 350 || c == '|' 351} 352 353/// Percent-encode a string using the given encode set predicate. 354fn percent_encode(input: &str, should_encode: fn(char) -> bool) -> String { 355 let mut out = String::with_capacity(input.len()); 356 for c in input.chars() { 357 if should_encode(c) { 358 for b in c.to_string().as_bytes() { 359 out.push('%'); 360 out.push(to_hex_upper(b >> 4)); 361 out.push(to_hex_upper(b & 0x0F)); 362 } 363 } else { 364 out.push(c); 365 } 366 } 367 out 368} 369 370fn to_hex_upper(n: u8) -> char { 371 if n < 10 { 372 (b'0' + n) as char 373 } else { 374 (b'A' + n - 10) as char 375 } 376} 377 378/// Percent-decode a byte string. 379pub fn percent_decode(input: &str) -> Vec<u8> { 380 let bytes = input.as_bytes(); 381 let mut out = Vec::with_capacity(bytes.len()); 382 let mut i = 0; 383 while i < bytes.len() { 384 if bytes[i] == b'%' && i + 2 < bytes.len() { 385 if let (Some(hi), Some(lo)) = (hex_val(bytes[i + 1]), hex_val(bytes[i + 2])) { 386 out.push(hi << 4 | lo); 387 i += 3; 388 continue; 389 } 390 } 391 out.push(bytes[i]); 392 i += 1; 393 } 394 out 395} 396 397/// Percent-decode to a UTF-8 string (lossy). 398pub fn percent_decode_string(input: &str) -> String { 399 String::from_utf8_lossy(&percent_decode(input)).into_owned() 400} 401 402fn hex_val(b: u8) -> Option<u8> { 403 match b { 404 b'0'..=b'9' => Some(b - b'0'), 405 b'a'..=b'f' => Some(b - b'a' + 10), 406 b'A'..=b'F' => Some(b - b'A' + 10), 407 _ => None, 408 } 409} 410 411// --------------------------------------------------------------------------- 412// IPv4 parsing 413// --------------------------------------------------------------------------- 414 415fn parse_ipv4(input: &str) -> Result<u32> { 416 let parts: Vec<&str> = input.split('.').collect(); 417 if parts.len() < 2 || parts.len() > 4 { 418 return Err(UrlError::InvalidIpv4); 419 } 420 let mut numbers: Vec<u64> = Vec::with_capacity(parts.len()); 421 for part in &parts { 422 if part.is_empty() { 423 return Err(UrlError::InvalidIpv4); 424 } 425 let n = parse_ipv4_number(part)?; 426 numbers.push(n); 427 } 428 let last = numbers.len() - 1; 429 for (i, &n) in numbers.iter().enumerate() { 430 if i < last && n > 255 { 431 return Err(UrlError::InvalidIpv4); 432 } 433 } 434 if numbers[last] >= 256u64.pow((4 - last) as u32) { 435 return Err(UrlError::InvalidIpv4); 436 } 437 438 let mut ipv4 = numbers[last] as u32; 439 for (i, &n) in numbers.iter().enumerate().take(last) { 440 ipv4 += (n as u32) << (8 * (3 - i)); 441 } 442 Ok(ipv4) 443} 444 445fn parse_ipv4_number(input: &str) -> Result<u64> { 446 if input.is_empty() { 447 return Err(UrlError::InvalidIpv4); 448 } 449 let (s, radix) = if input.starts_with("0x") || input.starts_with("0X") { 450 (&input[2..], 16) 451 } else if input.len() > 1 && input.starts_with('0') { 452 (&input[1..], 8) 453 } else { 454 (input, 10) 455 }; 456 if s.is_empty() { 457 return Ok(0); 458 } 459 u64::from_str_radix(s, radix).map_err(|_| UrlError::InvalidIpv4) 460} 461 462fn serialize_ipv4(addr: u32) -> String { 463 format!( 464 "{}.{}.{}.{}", 465 (addr >> 24) & 0xFF, 466 (addr >> 16) & 0xFF, 467 (addr >> 8) & 0xFF, 468 addr & 0xFF 469 ) 470} 471 472// --------------------------------------------------------------------------- 473// IPv6 parsing 474// --------------------------------------------------------------------------- 475 476fn parse_ipv6(input: &str) -> Result<[u16; 8]> { 477 let mut pieces = [0u16; 8]; 478 let mut piece_index: usize = 0; 479 let mut compress: Option<usize> = None; 480 let chars: Vec<char> = input.chars().collect(); 481 let len = chars.len(); 482 let mut pointer = 0; 483 484 if pointer < len && chars[pointer] == ':' { 485 if pointer + 1 >= len || chars[pointer + 1] != ':' { 486 return Err(UrlError::InvalidIpv6); 487 } 488 pointer += 2; 489 piece_index += 1; 490 compress = Some(piece_index); 491 } 492 493 while pointer < len { 494 if piece_index >= 8 { 495 return Err(UrlError::InvalidIpv6); 496 } 497 498 if chars[pointer] == ':' { 499 if compress.is_some() { 500 return Err(UrlError::InvalidIpv6); 501 } 502 pointer += 1; 503 piece_index += 1; 504 compress = Some(piece_index); 505 continue; 506 } 507 508 let mut value: u16 = 0; 509 let mut length = 0; 510 while length < 4 && pointer < len && chars[pointer].is_ascii_hexdigit() { 511 value = value * 0x10 + hex_val(chars[pointer] as u8).unwrap() as u16; 512 pointer += 1; 513 length += 1; 514 } 515 516 if pointer < len && chars[pointer] == '.' { 517 // IPv4-mapped IPv6. 518 if length == 0 { 519 return Err(UrlError::InvalidIpv6); 520 } 521 pointer -= length; 522 if piece_index > 6 { 523 return Err(UrlError::InvalidIpv6); 524 } 525 let mut numbers_seen = 0; 526 while pointer < len { 527 let mut ipv4_piece: Option<u16> = None; 528 if numbers_seen > 0 { 529 if chars[pointer] == '.' && numbers_seen < 4 { 530 pointer += 1; 531 } else { 532 return Err(UrlError::InvalidIpv6); 533 } 534 } 535 if pointer >= len || !chars[pointer].is_ascii_digit() { 536 return Err(UrlError::InvalidIpv6); 537 } 538 while pointer < len && chars[pointer].is_ascii_digit() { 539 let number = (chars[pointer] as u8 - b'0') as u16; 540 match ipv4_piece { 541 None => ipv4_piece = Some(number), 542 Some(0) => return Err(UrlError::InvalidIpv6), // leading zero 543 Some(v) => ipv4_piece = Some(v * 10 + number), 544 } 545 if ipv4_piece.unwrap_or(0) > 255 { 546 return Err(UrlError::InvalidIpv6); 547 } 548 pointer += 1; 549 } 550 pieces[piece_index] = 551 pieces[piece_index] * 0x100 + ipv4_piece.ok_or(UrlError::InvalidIpv6)?; 552 numbers_seen += 1; 553 if numbers_seen == 2 || numbers_seen == 4 { 554 piece_index += 1; 555 } 556 } 557 if numbers_seen != 4 { 558 return Err(UrlError::InvalidIpv6); 559 } 560 break; 561 } 562 563 if pointer < len && chars[pointer] == ':' { 564 pointer += 1; 565 if pointer >= len { 566 // Trailing single colon after a piece — only valid with compress. 567 } 568 } else if pointer < len { 569 return Err(UrlError::InvalidIpv6); 570 } 571 572 if piece_index >= 8 { 573 return Err(UrlError::InvalidIpv6); 574 } 575 pieces[piece_index] = value; 576 piece_index += 1; 577 } 578 579 if let Some(comp) = compress { 580 let mut swaps = piece_index - comp; 581 piece_index = 7; 582 while piece_index != 0 && swaps > 0 { 583 let swap_index = comp + swaps - 1; 584 pieces.swap(piece_index, swap_index); 585 piece_index -= 1; 586 swaps -= 1; 587 } 588 } else if piece_index != 8 { 589 return Err(UrlError::InvalidIpv6); 590 } 591 592 Ok(pieces) 593} 594 595fn serialize_ipv6(pieces: &[u16; 8]) -> String { 596 // Find the longest run of consecutive zeros for :: compression. 597 let mut best_start = None; 598 let mut best_len = 0usize; 599 let mut cur_start = None; 600 let mut cur_len = 0usize; 601 602 for (i, &p) in pieces.iter().enumerate() { 603 if p == 0 { 604 if cur_start.is_none() { 605 cur_start = Some(i); 606 cur_len = 1; 607 } else { 608 cur_len += 1; 609 } 610 } else { 611 if cur_len > best_len && cur_len >= 2 { 612 best_start = cur_start; 613 best_len = cur_len; 614 } 615 cur_start = None; 616 cur_len = 0; 617 } 618 } 619 if cur_len > best_len && cur_len >= 2 { 620 best_start = cur_start; 621 best_len = cur_len; 622 } 623 624 let mut out = String::new(); 625 let mut i = 0; 626 while i < 8 { 627 if Some(i) == best_start { 628 out.push_str("::"); 629 i += best_len; 630 continue; 631 } 632 if !out.is_empty() && !out.ends_with(':') { 633 out.push(':'); 634 } 635 out.push_str(&format!("{:x}", pieces[i])); 636 i += 1; 637 } 638 out 639} 640 641// --------------------------------------------------------------------------- 642// Host parsing 643// --------------------------------------------------------------------------- 644 645fn parse_host(input: &str, is_special: bool) -> Result<Host> { 646 if input.is_empty() { 647 if is_special { 648 return Err(UrlError::InvalidHost); 649 } 650 return Ok(Host::Domain(String::new())); 651 } 652 653 // IPv6 654 if input.starts_with('[') { 655 if !input.ends_with(']') { 656 return Err(UrlError::InvalidIpv6); 657 } 658 let inner = &input[1..input.len() - 1]; 659 let pieces = parse_ipv6(inner)?; 660 return Ok(Host::Ipv6(pieces)); 661 } 662 663 if !is_special { 664 let encoded = percent_encode(input, is_c0_control); 665 return Ok(Host::Domain(encoded)); 666 } 667 668 // Domain — percent-decode then lowercase. 669 let decoded = percent_decode_string(input); 670 let lowered = decoded.to_ascii_lowercase(); 671 672 // Check if it's an IPv4 address. 673 if ends_with_number(&lowered) { 674 match parse_ipv4(&lowered) { 675 Ok(addr) => return Ok(Host::Ipv4(addr)), 676 Err(_) => return Err(UrlError::InvalidHost), 677 } 678 } 679 680 // Validate domain characters. 681 for c in lowered.chars() { 682 if c == '\0' 683 || c == '\t' 684 || c == '\n' 685 || c == '\r' 686 || c == ' ' 687 || c == '#' 688 || c == '/' 689 || c == ':' 690 || c == '<' 691 || c == '>' 692 || c == '?' 693 || c == '@' 694 || c == '[' 695 || c == '\\' 696 || c == ']' 697 || c == '^' 698 || c == '|' 699 { 700 return Err(UrlError::InvalidHost); 701 } 702 } 703 704 Ok(Host::Domain(lowered)) 705} 706 707/// Check if a domain string ends with a number (suggesting IPv4). 708fn ends_with_number(input: &str) -> bool { 709 let last_part = match input.rsplit('.').next() { 710 Some(p) => p, 711 None => return false, 712 }; 713 if last_part.is_empty() { 714 return false; 715 } 716 if last_part.starts_with("0x") || last_part.starts_with("0X") { 717 return last_part[2..].chars().all(|c| c.is_ascii_hexdigit()); 718 } 719 last_part.chars().all(|c| c.is_ascii_digit()) 720} 721 722// --------------------------------------------------------------------------- 723// Shorten path helper 724// --------------------------------------------------------------------------- 725 726fn shorten_path(scheme: &str, path: &mut Vec<String>) { 727 if scheme == "file" && path.len() == 1 { 728 if let Some(first) = path.first() { 729 if is_normalized_windows_drive_letter(first) { 730 return; 731 } 732 } 733 } 734 path.pop(); 735} 736 737fn is_normalized_windows_drive_letter(s: &str) -> bool { 738 let bytes = s.as_bytes(); 739 bytes.len() == 2 && bytes[0].is_ascii_alphabetic() && bytes[1] == b':' 740} 741 742fn starts_with_windows_drive_letter(s: &str) -> bool { 743 let bytes = s.as_bytes(); 744 if bytes.len() < 2 { 745 return false; 746 } 747 if !bytes[0].is_ascii_alphabetic() { 748 return false; 749 } 750 if bytes[1] != b':' && bytes[1] != b'|' { 751 return false; 752 } 753 if bytes.len() >= 3 { 754 matches!(bytes[2], b'/' | b'\\' | b'?' | b'#') 755 } else { 756 true 757 } 758} 759 760// --------------------------------------------------------------------------- 761// URL parser 762// --------------------------------------------------------------------------- 763 764fn parse_url(input: &str, base: Option<&Url>) -> Result<Url> { 765 // Strip leading/trailing C0 controls and spaces. 766 let input = input.trim_matches(|c: char| c <= '\u{0020}'); 767 768 if input.is_empty() { 769 if let Some(base) = base { 770 return parse_relative("", base); 771 } 772 return Err(UrlError::EmptyInput); 773 } 774 775 // Remove tab and newline characters. 776 let input: String = input 777 .chars() 778 .filter(|&c| c != '\t' && c != '\n' && c != '\r') 779 .collect(); 780 781 let chars: Vec<char> = input.chars().collect(); 782 let len = chars.len(); 783 784 let mut pointer = 0; 785 786 // Try to parse a scheme. 787 let mut scheme = String::new(); 788 let mut has_scheme = false; 789 790 if pointer < len && chars[pointer].is_ascii_alphabetic() { 791 let mut temp = String::new(); 792 temp.push(chars[pointer].to_ascii_lowercase()); 793 let mut p = pointer + 1; 794 while p < len 795 && (chars[p].is_ascii_alphanumeric() 796 || chars[p] == '+' 797 || chars[p] == '-' 798 || chars[p] == '.') 799 { 800 temp.push(chars[p].to_ascii_lowercase()); 801 p += 1; 802 } 803 if p < len && chars[p] == ':' { 804 scheme = temp; 805 has_scheme = true; 806 pointer = p + 1; // skip the ':' 807 } 808 } 809 810 if !has_scheme { 811 if let Some(base) = base { 812 return parse_relative(&input, base); 813 } 814 return Err(UrlError::MissingScheme); 815 } 816 817 let is_special = is_special_scheme(&scheme); 818 819 let mut url = Url { 820 scheme: scheme.clone(), 821 username: String::new(), 822 password: String::new(), 823 host: None, 824 port: None, 825 path: Vec::new(), 826 opaque_path: false, 827 query: None, 828 fragment: None, 829 }; 830 831 let remaining: String = chars[pointer..].iter().collect(); 832 833 if scheme == "file" { 834 return parse_file_url(&remaining, base, url); 835 } 836 837 if let Some(after_slashes) = remaining.strip_prefix("//") { 838 parse_authority_and_path(&mut url, after_slashes, is_special)?; 839 } else if is_special { 840 if let Some(base) = base { 841 if base.scheme == url.scheme { 842 return parse_relative_special(&remaining, base, url); 843 } 844 } 845 if let Some(after_slash) = remaining.strip_prefix('/') { 846 parse_authority_and_path(&mut url, after_slash, is_special)?; 847 } else { 848 parse_authority_and_path(&mut url, &remaining, is_special)?; 849 } 850 } else { 851 parse_opaque_or_path(&mut url, &remaining)?; 852 } 853 854 Ok(url) 855} 856 857fn parse_authority_and_path(url: &mut Url, input: &str, is_special: bool) -> Result<()> { 858 let authority_end = input 859 .find(|c: char| c == '/' || c == '?' || c == '#' || (is_special && c == '\\')) 860 .unwrap_or(input.len()); 861 862 let authority = &input[..authority_end]; 863 let rest = &input[authority_end..]; 864 865 let (userinfo_part, hostport) = if let Some(at_pos) = authority.rfind('@') { 866 (&authority[..at_pos], &authority[at_pos + 1..]) 867 } else { 868 ("", authority) 869 }; 870 871 if !userinfo_part.is_empty() { 872 if let Some(colon_pos) = userinfo_part.find(':') { 873 url.username = percent_encode(&userinfo_part[..colon_pos], is_userinfo_encode); 874 url.password = percent_encode(&userinfo_part[colon_pos + 1..], is_userinfo_encode); 875 } else { 876 url.username = percent_encode(userinfo_part, is_userinfo_encode); 877 } 878 } 879 880 let (host_str, port_str) = split_host_port(hostport); 881 882 url.host = Some(parse_host(host_str, is_special)?); 883 884 if let Some(port_s) = port_str { 885 if !port_s.is_empty() { 886 let port: u16 = port_s.parse().map_err(|_| UrlError::InvalidPort)?; 887 if default_port(&url.scheme) != Some(port) { 888 url.port = Some(port); 889 } 890 } 891 } 892 893 parse_path_query_fragment(url, rest, is_special) 894} 895 896fn split_host_port(input: &str) -> (&str, Option<&str>) { 897 if input.starts_with('[') { 898 if let Some(bracket_end) = input.find(']') { 899 let host = &input[..bracket_end + 1]; 900 let after = &input[bracket_end + 1..]; 901 if let Some(port_str) = after.strip_prefix(':') { 902 return (host, Some(port_str)); 903 } 904 return (host, None); 905 } 906 return (input, None); 907 } 908 909 if let Some(colon_pos) = input.rfind(':') { 910 let port_part = &input[colon_pos + 1..]; 911 if port_part.is_empty() || port_part.chars().all(|c| c.is_ascii_digit()) { 912 return (&input[..colon_pos], Some(port_part)); 913 } 914 } 915 (input, None) 916} 917 918fn parse_path_query_fragment(url: &mut Url, input: &str, is_special: bool) -> Result<()> { 919 let mut remaining = input; 920 921 let path_end = remaining.find(['?', '#']).unwrap_or(remaining.len()); 922 let path_str = &remaining[..path_end]; 923 remaining = &remaining[path_end..]; 924 925 parse_path_into(url, path_str, is_special); 926 927 if let Some(after_q) = remaining.strip_prefix('?') { 928 remaining = after_q; 929 let query_end = remaining.find('#').unwrap_or(remaining.len()); 930 let query_str = &remaining[..query_end]; 931 remaining = &remaining[query_end..]; 932 933 let encode_fn = if is_special { 934 is_special_query_encode 935 } else { 936 is_query_encode 937 }; 938 url.query = Some(percent_encode(query_str, encode_fn)); 939 } 940 941 if let Some(after_hash) = remaining.strip_prefix('#') { 942 url.fragment = Some(percent_encode(after_hash, is_fragment_encode)); 943 } 944 945 Ok(()) 946} 947 948fn parse_path_into(url: &mut Url, path: &str, is_special: bool) { 949 if path.is_empty() { 950 if is_special { 951 url.path = vec![String::new()]; 952 } 953 return; 954 } 955 956 let segments: Vec<&str> = if is_special { 957 path.split(['/', '\\']).collect() 958 } else { 959 path.split('/').collect() 960 }; 961 962 for (i, seg) in segments.iter().enumerate() { 963 if i == 0 && seg.is_empty() { 964 continue; 965 } 966 967 let decoded = *seg; 968 if decoded == "." || decoded.eq_ignore_ascii_case("%2e") { 969 if i == segments.len() - 1 { 970 url.path.push(String::new()); 971 } 972 } else if decoded == ".." 973 || decoded.eq_ignore_ascii_case(".%2e") 974 || decoded.eq_ignore_ascii_case("%2e.") 975 || decoded.eq_ignore_ascii_case("%2e%2e") 976 { 977 shorten_path(&url.scheme, &mut url.path); 978 if i == segments.len() - 1 { 979 url.path.push(String::new()); 980 } 981 } else { 982 url.path.push(percent_encode(decoded, is_path_encode)); 983 } 984 } 985} 986 987fn parse_opaque_or_path(url: &mut Url, input: &str) -> Result<()> { 988 let mut remaining = input; 989 990 let path_end = remaining.find(['?', '#']).unwrap_or(remaining.len()); 991 let path_str = &remaining[..path_end]; 992 remaining = &remaining[path_end..]; 993 994 if path_str.starts_with('/') { 995 url.opaque_path = false; 996 parse_path_into(url, path_str, false); 997 } else { 998 url.opaque_path = true; 999 url.path = vec![percent_encode(path_str, is_c0_control)]; 1000 } 1001 1002 if let Some(after_q) = remaining.strip_prefix('?') { 1003 remaining = after_q; 1004 let query_end = remaining.find('#').unwrap_or(remaining.len()); 1005 let query_str = &remaining[..query_end]; 1006 remaining = &remaining[query_end..]; 1007 url.query = Some(percent_encode(query_str, is_query_encode)); 1008 } 1009 1010 if let Some(after_hash) = remaining.strip_prefix('#') { 1011 url.fragment = Some(percent_encode(after_hash, is_fragment_encode)); 1012 } 1013 1014 Ok(()) 1015} 1016 1017// --------------------------------------------------------------------------- 1018// Relative URL parsing 1019// --------------------------------------------------------------------------- 1020 1021fn parse_relative(input: &str, base: &Url) -> Result<Url> { 1022 let mut url = Url { 1023 scheme: base.scheme.clone(), 1024 username: base.username.clone(), 1025 password: base.password.clone(), 1026 host: base.host.clone(), 1027 port: base.port, 1028 path: base.path.clone(), 1029 opaque_path: base.opaque_path, 1030 query: base.query.clone(), 1031 fragment: None, 1032 }; 1033 1034 let is_special = is_special_scheme(&url.scheme); 1035 1036 if input.is_empty() { 1037 return Ok(url); 1038 } 1039 1040 let chars: Vec<char> = input.chars().collect(); 1041 1042 if chars[0] == '/' || (is_special && chars[0] == '\\') { 1043 if input.starts_with("//") || (is_special && input.starts_with("\\/")) { 1044 let after_slashes = &input[2..]; 1045 url.username = String::new(); 1046 url.password = String::new(); 1047 url.path = Vec::new(); 1048 url.query = None; 1049 parse_authority_and_path(&mut url, after_slashes, is_special)?; 1050 return Ok(url); 1051 } 1052 url.path = Vec::new(); 1053 url.query = None; 1054 parse_path_query_fragment(&mut url, input, is_special)?; 1055 return Ok(url); 1056 } 1057 1058 if let Some(after_q) = input.strip_prefix('?') { 1059 url.query = None; 1060 url.fragment = None; 1061 let query_end = after_q.find('#').unwrap_or(after_q.len()); 1062 let query_str = &after_q[..query_end]; 1063 let after = &after_q[query_end..]; 1064 1065 let encode_fn = if is_special { 1066 is_special_query_encode 1067 } else { 1068 is_query_encode 1069 }; 1070 url.query = Some(percent_encode(query_str, encode_fn)); 1071 1072 if let Some(frag) = after.strip_prefix('#') { 1073 url.fragment = Some(percent_encode(frag, is_fragment_encode)); 1074 } 1075 return Ok(url); 1076 } 1077 1078 if let Some(frag) = input.strip_prefix('#') { 1079 url.fragment = Some(percent_encode(frag, is_fragment_encode)); 1080 return Ok(url); 1081 } 1082 1083 // Path-relative. 1084 if !url.opaque_path { 1085 shorten_path(&url.scheme, &mut url.path); 1086 } 1087 url.query = None; 1088 url.fragment = None; 1089 1090 parse_path_query_fragment(&mut url, &format!("/{input}"), is_special)?; 1091 Ok(url) 1092} 1093 1094fn parse_relative_special(remaining: &str, base: &Url, mut url: Url) -> Result<Url> { 1095 url.username = base.username.clone(); 1096 url.password = base.password.clone(); 1097 url.host = base.host.clone(); 1098 url.port = base.port; 1099 url.path = base.path.clone(); 1100 url.query = base.query.clone(); 1101 1102 let is_special = true; 1103 1104 if remaining.is_empty() { 1105 return Ok(url); 1106 } 1107 1108 if remaining.starts_with('/') || remaining.starts_with('\\') { 1109 url.path = Vec::new(); 1110 url.query = None; 1111 parse_path_query_fragment(&mut url, remaining, is_special)?; 1112 return Ok(url); 1113 } 1114 1115 if let Some(rest) = remaining.strip_prefix('?') { 1116 url.query = None; 1117 url.fragment = None; 1118 let query_end = rest.find('#').unwrap_or(rest.len()); 1119 url.query = Some(percent_encode(&rest[..query_end], is_special_query_encode)); 1120 if query_end < rest.len() { 1121 url.fragment = Some(percent_encode(&rest[query_end + 1..], is_fragment_encode)); 1122 } 1123 return Ok(url); 1124 } 1125 1126 if let Some(frag) = remaining.strip_prefix('#') { 1127 url.fragment = Some(percent_encode(frag, is_fragment_encode)); 1128 return Ok(url); 1129 } 1130 1131 shorten_path(&url.scheme, &mut url.path); 1132 url.query = None; 1133 parse_path_query_fragment(&mut url, &format!("/{remaining}"), is_special)?; 1134 Ok(url) 1135} 1136 1137// --------------------------------------------------------------------------- 1138// File URL parsing 1139// --------------------------------------------------------------------------- 1140 1141fn parse_file_url(input: &str, base: Option<&Url>, mut url: Url) -> Result<Url> { 1142 url.host = Some(Host::Domain(String::new())); 1143 1144 let remaining = if let Some(after) = input.strip_prefix("//") { 1145 after 1146 } else if let Some(after) = input.strip_prefix('/') { 1147 after 1148 } else if let Some(base) = base { 1149 if base.scheme == "file" { 1150 url.host = base.host.clone(); 1151 url.path = base.path.clone(); 1152 1153 if let Some(rest) = input.strip_prefix('?') { 1154 url.query = None; 1155 url.fragment = None; 1156 let query_end = rest.find('#').unwrap_or(rest.len()); 1157 url.query = Some(percent_encode(&rest[..query_end], is_query_encode)); 1158 if query_end < rest.len() { 1159 url.fragment = Some(percent_encode(&rest[query_end + 1..], is_fragment_encode)); 1160 } 1161 return Ok(url); 1162 } 1163 1164 if let Some(frag) = input.strip_prefix('#') { 1165 url.fragment = Some(percent_encode(frag, is_fragment_encode)); 1166 return Ok(url); 1167 } 1168 1169 shorten_path(&url.scheme, &mut url.path); 1170 url.query = None; 1171 parse_path_query_fragment(&mut url, &format!("/{input}"), false)?; 1172 return Ok(url); 1173 } else { 1174 input 1175 } 1176 } else { 1177 input 1178 }; 1179 1180 let path_start = remaining 1181 .find(['/', '\\', '?', '#']) 1182 .unwrap_or(remaining.len()); 1183 1184 let potential_host = &remaining[..path_start]; 1185 let rest = &remaining[path_start..]; 1186 1187 if starts_with_windows_drive_letter(remaining) { 1188 url.host = Some(Host::Domain(String::new())); 1189 parse_path_query_fragment(&mut url, &format!("/{remaining}"), false)?; 1190 return Ok(url); 1191 } 1192 1193 if !potential_host.is_empty() { 1194 let host = parse_host(potential_host, false)?; 1195 if host != Host::Domain(String::new()) { 1196 url.host = Some(host); 1197 } 1198 } 1199 1200 parse_path_query_fragment(&mut url, rest, false)?; 1201 1202 // Normalize Windows drive letters in path. 1203 if let Some(first) = url.path.first_mut() { 1204 if first.len() == 2 { 1205 let bytes = first.as_bytes(); 1206 if bytes[0].is_ascii_alphabetic() && bytes[1] == b'|' { 1207 let mut normalized = String::new(); 1208 normalized.push(bytes[0] as char); 1209 normalized.push(':'); 1210 *first = normalized; 1211 } 1212 } 1213 } 1214 1215 Ok(url) 1216} 1217 1218// --------------------------------------------------------------------------- 1219// Tests 1220// --------------------------------------------------------------------------- 1221 1222#[cfg(test)] 1223mod tests { 1224 use super::*; 1225 1226 // ------------------------------------------------------------------- 1227 // Basic absolute URL parsing 1228 // ------------------------------------------------------------------- 1229 1230 #[test] 1231 fn parse_simple_http() { 1232 let url = Url::parse("http://example.com").unwrap(); 1233 assert_eq!(url.scheme(), "http"); 1234 assert_eq!(url.host_str(), Some("example.com".into())); 1235 assert_eq!(url.port(), None); 1236 assert_eq!(url.path(), "/"); 1237 assert_eq!(url.query(), None); 1238 assert_eq!(url.fragment(), None); 1239 } 1240 1241 #[test] 1242 fn parse_https_with_path() { 1243 let url = Url::parse("https://example.com/foo/bar").unwrap(); 1244 assert_eq!(url.scheme(), "https"); 1245 assert_eq!(url.host_str(), Some("example.com".into())); 1246 assert_eq!(url.path(), "/foo/bar"); 1247 } 1248 1249 #[test] 1250 fn parse_full_url() { 1251 let url = 1252 Url::parse("https://user:pass@example.com:8080/path/to/page?q=1&r=2#frag").unwrap(); 1253 assert_eq!(url.scheme(), "https"); 1254 assert_eq!(url.username(), "user"); 1255 assert_eq!(url.password(), "pass"); 1256 assert_eq!(url.host_str(), Some("example.com".into())); 1257 assert_eq!(url.port(), Some(8080)); 1258 assert_eq!(url.path(), "/path/to/page"); 1259 assert_eq!(url.query(), Some("q=1&r=2")); 1260 assert_eq!(url.fragment(), Some("frag")); 1261 } 1262 1263 #[test] 1264 fn parse_default_port_omitted() { 1265 let url = Url::parse("http://example.com:80/").unwrap(); 1266 assert_eq!(url.port(), None); 1267 assert_eq!(url.port_or_default(), Some(80)); 1268 } 1269 1270 #[test] 1271 fn parse_non_default_port() { 1272 let url = Url::parse("http://example.com:8080/").unwrap(); 1273 assert_eq!(url.port(), Some(8080)); 1274 } 1275 1276 #[test] 1277 fn parse_https_default_port() { 1278 let url = Url::parse("https://example.com:443/").unwrap(); 1279 assert_eq!(url.port(), None); 1280 } 1281 1282 #[test] 1283 fn parse_ftp_default_port() { 1284 let url = Url::parse("ftp://files.example.com:21/readme.txt").unwrap(); 1285 assert_eq!(url.port(), None); 1286 assert_eq!(url.port_or_default(), Some(21)); 1287 } 1288 1289 // ------------------------------------------------------------------- 1290 // Scheme handling 1291 // ------------------------------------------------------------------- 1292 1293 #[test] 1294 fn scheme_is_lowercased() { 1295 let url = Url::parse("HTTP://EXAMPLE.COM").unwrap(); 1296 assert_eq!(url.scheme(), "http"); 1297 } 1298 1299 #[test] 1300 fn non_special_scheme() { 1301 let url = Url::parse("custom://host/path").unwrap(); 1302 assert_eq!(url.scheme(), "custom"); 1303 assert_eq!(url.host_str(), Some("host".into())); 1304 assert_eq!(url.path(), "/path"); 1305 } 1306 1307 #[test] 1308 fn data_uri() { 1309 let url = Url::parse("data:text/html,<h1>Hello</h1>").unwrap(); 1310 assert_eq!(url.scheme(), "data"); 1311 assert!(url.cannot_be_a_base()); 1312 } 1313 1314 #[test] 1315 fn javascript_uri() { 1316 let url = Url::parse("javascript:alert(1)").unwrap(); 1317 assert_eq!(url.scheme(), "javascript"); 1318 assert!(url.cannot_be_a_base()); 1319 } 1320 1321 #[test] 1322 fn mailto_uri() { 1323 let url = Url::parse("mailto:user@example.com").unwrap(); 1324 assert_eq!(url.scheme(), "mailto"); 1325 assert!(url.cannot_be_a_base()); 1326 } 1327 1328 // ------------------------------------------------------------------- 1329 // Host parsing 1330 // ------------------------------------------------------------------- 1331 1332 #[test] 1333 fn host_is_lowercased() { 1334 let url = Url::parse("http://EXAMPLE.COM/").unwrap(); 1335 assert_eq!(url.host_str(), Some("example.com".into())); 1336 } 1337 1338 #[test] 1339 fn ipv4_host() { 1340 let url = Url::parse("http://127.0.0.1/").unwrap(); 1341 assert_eq!(url.host(), Some(&Host::Ipv4(0x7F000001))); 1342 assert_eq!(url.host_str(), Some("127.0.0.1".into())); 1343 } 1344 1345 #[test] 1346 fn ipv4_host_all_zeros() { 1347 let url = Url::parse("http://0.0.0.0/").unwrap(); 1348 assert_eq!(url.host(), Some(&Host::Ipv4(0))); 1349 } 1350 1351 #[test] 1352 fn ipv6_host() { 1353 let url = Url::parse("http://[::1]/").unwrap(); 1354 assert_eq!(url.host(), Some(&Host::Ipv6([0, 0, 0, 0, 0, 0, 0, 1]))); 1355 } 1356 1357 #[test] 1358 fn ipv6_full() { 1359 let url = Url::parse("http://[2001:db8:85a3:0:0:8a2e:370:7334]/").unwrap(); 1360 assert_eq!( 1361 url.host(), 1362 Some(&Host::Ipv6([ 1363 0x2001, 0x0db8, 0x85a3, 0, 0, 0x8a2e, 0x0370, 0x7334 1364 ])) 1365 ); 1366 } 1367 1368 #[test] 1369 fn ipv6_serialization_compressed() { 1370 let url = Url::parse("http://[2001:db8::1]/").unwrap(); 1371 assert_eq!(url.host_str(), Some("[2001:db8::1]".into())); 1372 } 1373 1374 #[test] 1375 fn ipv6_all_zeros() { 1376 let url = Url::parse("http://[::]/").unwrap(); 1377 assert_eq!(url.host(), Some(&Host::Ipv6([0; 8]))); 1378 assert_eq!(url.host_str(), Some("[::]".into())); 1379 } 1380 1381 #[test] 1382 fn ipv6_loopback() { 1383 let pieces = parse_ipv6("::1").unwrap(); 1384 assert_eq!(pieces, [0, 0, 0, 0, 0, 0, 0, 1]); 1385 } 1386 1387 #[test] 1388 fn ipv6_with_ipv4() { 1389 let pieces = parse_ipv6("::ffff:192.168.1.1").unwrap(); 1390 assert_eq!(pieces, [0, 0, 0, 0, 0, 0xffff, 0xc0a8, 0x0101]); 1391 } 1392 1393 // ------------------------------------------------------------------- 1394 // IPv4 parsing 1395 // ------------------------------------------------------------------- 1396 1397 #[test] 1398 fn ipv4_basic() { 1399 assert_eq!(parse_ipv4("192.168.1.1").unwrap(), 0xC0A80101); 1400 } 1401 1402 #[test] 1403 fn ipv4_hex() { 1404 assert_eq!(parse_ipv4("0xC0.0xA8.0x01.0x01").unwrap(), 0xC0A80101); 1405 } 1406 1407 #[test] 1408 fn ipv4_octal() { 1409 assert_eq!(parse_ipv4("0300.0250.01.01").unwrap(), 0xC0A80101); 1410 } 1411 1412 #[test] 1413 fn ipv4_single_number() { 1414 assert!(parse_ipv4("3232235777").is_err()); 1415 } 1416 1417 #[test] 1418 fn ipv4_two_parts() { 1419 // Two parts: first is top 8 bits, second is bottom 24 bits. 1420 // 192.168.1.1 => 168*65536 + 1*256 + 1 = 11010305 1421 assert_eq!(parse_ipv4("192.11010305").unwrap(), 0xC0A80101); 1422 } 1423 1424 #[test] 1425 fn ipv4_reject_overflow() { 1426 assert!(parse_ipv4("256.0.0.0").is_err()); 1427 } 1428 1429 #[test] 1430 fn ipv4_reject_empty_part() { 1431 assert!(parse_ipv4("1..1.1").is_err()); 1432 } 1433 1434 // ------------------------------------------------------------------- 1435 // Percent encoding/decoding 1436 // ------------------------------------------------------------------- 1437 1438 #[test] 1439 fn percent_decode_basic() { 1440 assert_eq!(percent_decode_string("%48%65%6C%6C%6F"), "Hello"); 1441 } 1442 1443 #[test] 1444 fn percent_decode_mixed() { 1445 assert_eq!(percent_decode_string("Hello%20World"), "Hello World"); 1446 } 1447 1448 #[test] 1449 fn percent_decode_passthrough() { 1450 assert_eq!(percent_decode_string("no-encoding"), "no-encoding"); 1451 } 1452 1453 #[test] 1454 fn percent_decode_partial() { 1455 assert_eq!(percent_decode_string("100%"), "100%"); 1456 assert_eq!(percent_decode_string("%2"), "%2"); 1457 } 1458 1459 #[test] 1460 fn percent_encode_userinfo() { 1461 let encoded = percent_encode("user@host", is_userinfo_encode); 1462 assert_eq!(encoded, "user%40host"); 1463 } 1464 1465 #[test] 1466 fn percent_encode_path() { 1467 let encoded = percent_encode("hello world", is_path_encode); 1468 assert_eq!(encoded, "hello%20world"); 1469 } 1470 1471 // ------------------------------------------------------------------- 1472 // Path parsing and dot segments 1473 // ------------------------------------------------------------------- 1474 1475 #[test] 1476 fn path_dot_removal() { 1477 let url = Url::parse("http://example.com/a/b/../c").unwrap(); 1478 assert_eq!(url.path(), "/a/c"); 1479 } 1480 1481 #[test] 1482 fn path_dot_current() { 1483 let url = Url::parse("http://example.com/a/./b").unwrap(); 1484 assert_eq!(url.path(), "/a/b"); 1485 } 1486 1487 #[test] 1488 fn path_multiple_dots() { 1489 let url = Url::parse("http://example.com/a/b/c/../../d").unwrap(); 1490 assert_eq!(url.path(), "/a/d"); 1491 } 1492 1493 #[test] 1494 fn path_trailing_slash() { 1495 let url = Url::parse("http://example.com/a/b/").unwrap(); 1496 assert_eq!(url.path(), "/a/b/"); 1497 } 1498 1499 #[test] 1500 fn path_empty() { 1501 let url = Url::parse("http://example.com").unwrap(); 1502 assert_eq!(url.path(), "/"); 1503 } 1504 1505 #[test] 1506 fn path_double_dot_at_root() { 1507 let url = Url::parse("http://example.com/../a").unwrap(); 1508 assert_eq!(url.path(), "/a"); 1509 } 1510 1511 // ------------------------------------------------------------------- 1512 // Relative URL resolution 1513 // ------------------------------------------------------------------- 1514 1515 #[test] 1516 fn relative_path() { 1517 let base = Url::parse("http://example.com/a/b/c").unwrap(); 1518 let url = Url::parse_with_base("d", &base).unwrap(); 1519 assert_eq!(url.path(), "/a/b/d"); 1520 assert_eq!(url.host_str(), Some("example.com".into())); 1521 } 1522 1523 #[test] 1524 fn relative_path_with_dots() { 1525 let base = Url::parse("http://example.com/a/b/c").unwrap(); 1526 let url = Url::parse_with_base("../d", &base).unwrap(); 1527 assert_eq!(url.path(), "/a/d"); 1528 } 1529 1530 #[test] 1531 fn relative_absolute_path() { 1532 let base = Url::parse("http://example.com/a/b/c").unwrap(); 1533 let url = Url::parse_with_base("/d/e", &base).unwrap(); 1534 assert_eq!(url.path(), "/d/e"); 1535 assert_eq!(url.host_str(), Some("example.com".into())); 1536 } 1537 1538 #[test] 1539 fn relative_query_only() { 1540 let base = Url::parse("http://example.com/a/b?old=1").unwrap(); 1541 let url = Url::parse_with_base("?new=2", &base).unwrap(); 1542 assert_eq!(url.path(), "/a/b"); 1543 assert_eq!(url.query(), Some("new=2")); 1544 } 1545 1546 #[test] 1547 fn relative_fragment_only() { 1548 let base = Url::parse("http://example.com/a/b#old").unwrap(); 1549 let url = Url::parse_with_base("#new", &base).unwrap(); 1550 assert_eq!(url.path(), "/a/b"); 1551 assert_eq!(url.fragment(), Some("new")); 1552 } 1553 1554 #[test] 1555 fn relative_authority_override() { 1556 let base = Url::parse("http://example.com/a/b").unwrap(); 1557 let url = Url::parse_with_base("//other.com/c", &base).unwrap(); 1558 assert_eq!(url.scheme(), "http"); 1559 assert_eq!(url.host_str(), Some("other.com".into())); 1560 assert_eq!(url.path(), "/c"); 1561 } 1562 1563 #[test] 1564 fn absolute_url_ignores_base() { 1565 let base = Url::parse("http://example.com/a").unwrap(); 1566 let url = Url::parse_with_base("https://other.com/b", &base).unwrap(); 1567 assert_eq!(url.scheme(), "https"); 1568 assert_eq!(url.host_str(), Some("other.com".into())); 1569 assert_eq!(url.path(), "/b"); 1570 } 1571 1572 #[test] 1573 fn relative_empty_string() { 1574 let base = Url::parse("http://example.com/a/b?q=1#f").unwrap(); 1575 let url = Url::parse_with_base("", &base).unwrap(); 1576 assert_eq!(url.path(), "/a/b"); 1577 assert_eq!(url.query(), Some("q=1")); 1578 assert_eq!(url.fragment(), None); 1579 } 1580 1581 // ------------------------------------------------------------------- 1582 // Serialization 1583 // ------------------------------------------------------------------- 1584 1585 #[test] 1586 fn serialize_simple() { 1587 let url = Url::parse("http://example.com/path").unwrap(); 1588 assert_eq!(url.serialize(), "http://example.com/path"); 1589 } 1590 1591 #[test] 1592 fn serialize_with_credentials() { 1593 let url = Url::parse("http://user:pass@example.com/").unwrap(); 1594 assert_eq!(url.serialize(), "http://user:pass@example.com/"); 1595 } 1596 1597 #[test] 1598 fn serialize_with_port() { 1599 let url = Url::parse("http://example.com:8080/").unwrap(); 1600 assert_eq!(url.serialize(), "http://example.com:8080/"); 1601 } 1602 1603 #[test] 1604 fn serialize_with_query_fragment() { 1605 let url = Url::parse("http://example.com/path?q=1#frag").unwrap(); 1606 assert_eq!(url.serialize(), "http://example.com/path?q=1#frag"); 1607 } 1608 1609 #[test] 1610 fn serialize_data_uri() { 1611 let url = Url::parse("data:text/html,hello").unwrap(); 1612 assert_eq!(url.serialize(), "data:text/html,hello"); 1613 } 1614 1615 #[test] 1616 fn roundtrip_full_url() { 1617 let input = "https://user:pass@example.com:8080/a/b?q=1#frag"; 1618 let url = Url::parse(input).unwrap(); 1619 assert_eq!(url.serialize(), input); 1620 } 1621 1622 #[test] 1623 fn roundtrip_ipv4() { 1624 let url = Url::parse("http://192.168.1.1/path").unwrap(); 1625 assert_eq!(url.serialize(), "http://192.168.1.1/path"); 1626 } 1627 1628 #[test] 1629 fn roundtrip_ipv6() { 1630 let url = Url::parse("http://[::1]/path").unwrap(); 1631 assert_eq!(url.serialize(), "http://[::1]/path"); 1632 } 1633 1634 // ------------------------------------------------------------------- 1635 // Origin 1636 // ------------------------------------------------------------------- 1637 1638 #[test] 1639 fn origin_http() { 1640 let url = Url::parse("http://example.com:8080/path").unwrap(); 1641 match url.origin() { 1642 Origin::Tuple(scheme, host, port) => { 1643 assert_eq!(scheme, "http"); 1644 assert_eq!(host, Host::Domain("example.com".into())); 1645 assert_eq!(port, Some(8080)); 1646 } 1647 _ => panic!("expected tuple origin"), 1648 } 1649 } 1650 1651 #[test] 1652 fn origin_https_default_port() { 1653 let url = Url::parse("https://example.com/").unwrap(); 1654 match url.origin() { 1655 Origin::Tuple(scheme, host, port) => { 1656 assert_eq!(scheme, "https"); 1657 assert_eq!(host, Host::Domain("example.com".into())); 1658 assert_eq!(port, None); 1659 } 1660 _ => panic!("expected tuple origin"), 1661 } 1662 } 1663 1664 #[test] 1665 fn origin_data_is_opaque() { 1666 let url = Url::parse("data:text/html,hello").unwrap(); 1667 assert_eq!(url.origin(), Origin::Opaque); 1668 } 1669 1670 // ------------------------------------------------------------------- 1671 // File URLs 1672 // ------------------------------------------------------------------- 1673 1674 #[test] 1675 fn file_url_unix() { 1676 let url = Url::parse("file:///home/user/file.txt").unwrap(); 1677 assert_eq!(url.scheme(), "file"); 1678 assert_eq!(url.host_str(), Some("".into())); 1679 assert_eq!(url.path(), "/home/user/file.txt"); 1680 } 1681 1682 #[test] 1683 fn file_url_windows_drive() { 1684 let url = Url::parse("file:///C:/Windows/system32").unwrap(); 1685 assert_eq!(url.scheme(), "file"); 1686 assert_eq!(url.path(), "/C:/Windows/system32"); 1687 } 1688 1689 #[test] 1690 fn file_url_with_host() { 1691 let url = Url::parse("file://server/share/file.txt").unwrap(); 1692 assert_eq!(url.scheme(), "file"); 1693 assert_eq!(url.host_str(), Some("server".into())); 1694 assert_eq!(url.path(), "/share/file.txt"); 1695 } 1696 1697 // ------------------------------------------------------------------- 1698 // Edge cases 1699 // ------------------------------------------------------------------- 1700 1701 #[test] 1702 fn empty_input_fails() { 1703 assert_eq!(Url::parse(""), Err(UrlError::EmptyInput)); 1704 } 1705 1706 #[test] 1707 fn whitespace_only_fails() { 1708 assert_eq!(Url::parse(" "), Err(UrlError::EmptyInput)); 1709 } 1710 1711 #[test] 1712 fn missing_scheme_fails() { 1713 assert!(Url::parse("example.com").is_err()); 1714 } 1715 1716 #[test] 1717 fn leading_whitespace_stripped() { 1718 let url = Url::parse(" http://example.com ").unwrap(); 1719 assert_eq!(url.host_str(), Some("example.com".into())); 1720 } 1721 1722 #[test] 1723 fn tab_newline_stripped() { 1724 let url = Url::parse("http://exa\tmple\n.com/").unwrap(); 1725 assert_eq!(url.host_str(), Some("example.com".into())); 1726 } 1727 1728 #[test] 1729 fn query_with_special_chars() { 1730 let url = Url::parse("http://example.com/?key=val ue&foo=bar").unwrap(); 1731 assert!(url.query().unwrap().contains("key=val%20ue")); 1732 } 1733 1734 #[test] 1735 fn fragment_with_special_chars() { 1736 let url = Url::parse("http://example.com/#sec tion").unwrap(); 1737 assert!(url.fragment().unwrap().contains("sec%20tion")); 1738 } 1739 1740 #[test] 1741 fn username_only() { 1742 let url = Url::parse("http://user@example.com/").unwrap(); 1743 assert_eq!(url.username(), "user"); 1744 assert_eq!(url.password(), ""); 1745 assert!(url.has_credentials()); 1746 } 1747 1748 #[test] 1749 fn no_credentials() { 1750 let url = Url::parse("http://example.com/").unwrap(); 1751 assert!(!url.has_credentials()); 1752 } 1753 1754 #[test] 1755 fn port_overflow_fails() { 1756 assert!(Url::parse("http://example.com:99999/").is_err()); 1757 } 1758 1759 #[test] 1760 fn ws_scheme() { 1761 let url = Url::parse("ws://example.com/chat").unwrap(); 1762 assert_eq!(url.scheme(), "ws"); 1763 assert_eq!(url.port_or_default(), Some(80)); 1764 } 1765 1766 #[test] 1767 fn wss_scheme() { 1768 let url = Url::parse("wss://example.com/chat").unwrap(); 1769 assert_eq!(url.scheme(), "wss"); 1770 assert_eq!(url.port_or_default(), Some(443)); 1771 } 1772 1773 #[test] 1774 fn cannot_be_a_base() { 1775 let url = Url::parse("data:text/html,hello").unwrap(); 1776 assert!(url.cannot_be_a_base()); 1777 } 1778 1779 #[test] 1780 fn http_can_be_a_base() { 1781 let url = Url::parse("http://example.com/").unwrap(); 1782 assert!(!url.cannot_be_a_base()); 1783 } 1784 1785 // ------------------------------------------------------------------- 1786 // Display/ToString 1787 // ------------------------------------------------------------------- 1788 1789 #[test] 1790 fn display_matches_serialize() { 1791 let url = Url::parse("https://example.com:8443/path?q=1#f").unwrap(); 1792 assert_eq!(format!("{url}"), url.serialize()); 1793 } 1794 1795 // ------------------------------------------------------------------- 1796 // Multiple path segments 1797 // ------------------------------------------------------------------- 1798 1799 #[test] 1800 fn path_segments() { 1801 let url = Url::parse("http://example.com/a/b/c").unwrap(); 1802 assert_eq!(url.path_segments(), &["a", "b", "c"]); 1803 } 1804 1805 #[test] 1806 fn path_segments_trailing_slash() { 1807 let url = Url::parse("http://example.com/a/b/").unwrap(); 1808 assert_eq!(url.path_segments(), &["a", "b", ""]); 1809 } 1810 1811 // ------------------------------------------------------------------- 1812 // Host type 1813 // ------------------------------------------------------------------- 1814 1815 #[test] 1816 fn host_serialize_domain() { 1817 let h = Host::Domain("example.com".into()); 1818 assert_eq!(h.serialize(), "example.com"); 1819 } 1820 1821 #[test] 1822 fn host_serialize_ipv4() { 1823 let h = Host::Ipv4(0x7F000001); 1824 assert_eq!(h.serialize(), "127.0.0.1"); 1825 } 1826 1827 #[test] 1828 fn host_serialize_ipv6() { 1829 let h = Host::Ipv6([0, 0, 0, 0, 0, 0, 0, 1]); 1830 assert_eq!(h.serialize(), "[::1]"); 1831 } 1832 1833 // ------------------------------------------------------------------- 1834 // IPv6 serialization 1835 // ------------------------------------------------------------------- 1836 1837 #[test] 1838 fn ipv6_serialize_full() { 1839 let pieces = [ 1840 0x2001, 0x0db8, 0x85a3, 0x0001, 0x0002, 0x8a2e, 0x0370, 0x7334, 1841 ]; 1842 assert_eq!(serialize_ipv6(&pieces), "2001:db8:85a3:1:2:8a2e:370:7334"); 1843 } 1844 1845 #[test] 1846 fn ipv6_serialize_compress() { 1847 let pieces = [0x2001, 0x0db8, 0, 0, 0, 0, 0, 1]; 1848 assert_eq!(serialize_ipv6(&pieces), "2001:db8::1"); 1849 } 1850 1851 #[test] 1852 fn ipv6_serialize_all_zeros() { 1853 let pieces = [0u16; 8]; 1854 assert_eq!(serialize_ipv6(&pieces), "::"); 1855 } 1856 1857 #[test] 1858 fn ipv6_serialize_no_compress_single_zero() { 1859 let pieces = [1, 0, 2, 0, 3, 0, 4, 0]; 1860 assert_eq!(serialize_ipv6(&pieces), "1:0:2:0:3:0:4:0"); 1861 } 1862 1863 // ------------------------------------------------------------------- 1864 // Percent encoding edge cases 1865 // ------------------------------------------------------------------- 1866 1867 #[test] 1868 fn percent_encode_preserves_unreserved() { 1869 let encoded = percent_encode("hello-world_test.page~1", is_path_encode); 1870 assert_eq!(encoded, "hello-world_test.page~1"); 1871 } 1872 1873 #[test] 1874 fn percent_encode_multibyte_utf8() { 1875 let encoded = percent_encode("café", is_path_encode); 1876 assert_eq!(encoded, "caf%C3%A9"); 1877 } 1878}