we (web engine): Experimental web browser project to understand the limits of Claude
at gif-decoder 1876 lines 57 kB view raw
1//! WHATWG URL parser. 2//! 3//! Implements the URL Standard (<https://url.spec.whatwg.org/>): 4//! - URL record type with scheme, username, password, host, port, path, query, fragment 5//! - State-machine parser following the spec 6//! - Host parsing: domains, IPv4 addresses, IPv6 addresses 7//! - Percent-encoding and decoding (UTF-8) 8//! - Special scheme handling (http, https, ftp, ws, wss, file) 9//! - Relative URL resolution via base URL 10//! - URL serialization 11//! - Origin derivation 12 13use core::fmt; 14 15// --------------------------------------------------------------------------- 16// Error types 17// --------------------------------------------------------------------------- 18 19#[derive(Debug, Clone, PartialEq, Eq)] 20pub enum UrlError { 21 /// Input is empty or contains only whitespace. 22 EmptyInput, 23 /// Invalid URL syntax. 24 InvalidUrl, 25 /// Invalid scheme. 26 InvalidScheme, 27 /// Invalid authority. 28 InvalidAuthority, 29 /// Invalid host. 30 InvalidHost, 31 /// Invalid port number. 32 InvalidPort, 33 /// Invalid IPv4 address. 34 InvalidIpv4, 35 /// Invalid IPv6 address. 36 InvalidIpv6, 37 /// Invalid percent-encoding. 38 InvalidPercentEncoding, 39 /// Relative URL without a base. 40 RelativeWithoutBase, 41 /// Missing scheme. 42 MissingScheme, 43} 44 45impl fmt::Display for UrlError { 46 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 47 match self { 48 Self::EmptyInput => write!(f, "empty input"), 49 Self::InvalidUrl => write!(f, "invalid URL"), 50 Self::InvalidScheme => write!(f, "invalid scheme"), 51 Self::InvalidAuthority => write!(f, "invalid authority"), 52 Self::InvalidHost => write!(f, "invalid host"), 53 Self::InvalidPort => write!(f, "invalid port number"), 54 Self::InvalidIpv4 => write!(f, "invalid IPv4 address"), 55 Self::InvalidIpv6 => write!(f, "invalid IPv6 address"), 56 Self::InvalidPercentEncoding => write!(f, "invalid percent-encoding"), 57 Self::RelativeWithoutBase => write!(f, "relative URL without a base"), 58 Self::MissingScheme => write!(f, "missing scheme"), 59 } 60 } 61} 62 63pub type Result<T> = core::result::Result<T, UrlError>; 64 65// --------------------------------------------------------------------------- 66// Host 67// --------------------------------------------------------------------------- 68 69/// A parsed URL host. 70#[derive(Debug, Clone, PartialEq, Eq)] 71pub enum Host { 72 /// A domain name (already lowercased). 73 Domain(String), 74 /// An IPv4 address. 75 Ipv4(u32), 76 /// An IPv6 address (128 bits as eight 16-bit pieces). 77 Ipv6([u16; 8]), 78} 79 80impl Host { 81 /// Serialize the host to a string. 82 pub fn serialize(&self) -> String { 83 match self { 84 Host::Domain(d) => d.clone(), 85 Host::Ipv4(addr) => serialize_ipv4(*addr), 86 Host::Ipv6(pieces) => format!("[{}]", serialize_ipv6(pieces)), 87 } 88 } 89} 90 91impl fmt::Display for Host { 92 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 93 write!(f, "{}", self.serialize()) 94 } 95} 96 97// --------------------------------------------------------------------------- 98// Origin 99// --------------------------------------------------------------------------- 100 101/// A URL origin (scheme, host, port). 102#[derive(Debug, Clone, PartialEq, Eq)] 103pub enum Origin { 104 /// A tuple origin (scheme, host, port). 105 Tuple(String, Host, Option<u16>), 106 /// An opaque origin (unique, not equal to anything). 107 Opaque, 108} 109 110// --------------------------------------------------------------------------- 111// URL record 112// --------------------------------------------------------------------------- 113 114/// A parsed URL record per the WHATWG URL Standard. 115#[derive(Debug, Clone, PartialEq, Eq)] 116pub struct Url { 117 /// The scheme (e.g., "http", "https", "file"). 118 pub scheme: String, 119 /// The username (percent-encoded). 120 username: String, 121 /// The password (percent-encoded). 122 password: String, 123 /// The host. 124 pub host: Option<Host>, 125 /// The port (None = default or absent). 126 pub port: Option<u16>, 127 /// Path segments. For non-opaque paths, these are the segments. 128 /// For opaque paths (cannot-be-a-base URL), this is a single element. 129 path: Vec<String>, 130 /// Whether this URL has an opaque path (cannot-be-a-base URL). 131 opaque_path: bool, 132 /// The query string (without leading '?'). 133 pub query: Option<String>, 134 /// The fragment (without leading '#'). 135 pub fragment: Option<String>, 136} 137 138impl Url { 139 /// Parse a URL string. 140 pub fn parse(input: &str) -> Result<Self> { 141 parse_url(input, None) 142 } 143 144 /// Parse a URL string with a base URL for resolving relative references. 145 pub fn parse_with_base(input: &str, base: &Url) -> Result<Self> { 146 parse_url(input, Some(base)) 147 } 148 149 /// Get the scheme. 150 pub fn scheme(&self) -> &str { 151 &self.scheme 152 } 153 154 /// Get the username (percent-encoded). 155 pub fn username(&self) -> &str { 156 &self.username 157 } 158 159 /// Get the password (percent-encoded). 160 pub fn password(&self) -> &str { 161 &self.password 162 } 163 164 /// Get the host. 165 pub fn host(&self) -> Option<&Host> { 166 self.host.as_ref() 167 } 168 169 /// Get the host as a string. 170 pub fn host_str(&self) -> Option<String> { 171 self.host.as_ref().map(|h| h.serialize()) 172 } 173 174 /// Get the port. 175 pub fn port(&self) -> Option<u16> { 176 self.port 177 } 178 179 /// Get the port or the default port for the scheme. 180 pub fn port_or_default(&self) -> Option<u16> { 181 self.port.or_else(|| default_port(&self.scheme)) 182 } 183 184 /// Get the path as a string. 185 pub fn path(&self) -> String { 186 if self.opaque_path { 187 self.path.first().cloned().unwrap_or_default() 188 } else { 189 let mut s = String::new(); 190 for seg in &self.path { 191 s.push('/'); 192 s.push_str(seg); 193 } 194 if s.is_empty() { 195 s.push('/'); 196 } 197 s 198 } 199 } 200 201 /// Get the path segments. 202 pub fn path_segments(&self) -> &[String] { 203 &self.path 204 } 205 206 /// Get the query string. 207 pub fn query(&self) -> Option<&str> { 208 self.query.as_deref() 209 } 210 211 /// Get the fragment. 212 pub fn fragment(&self) -> Option<&str> { 213 self.fragment.as_deref() 214 } 215 216 /// Whether this URL has an opaque path (cannot-be-a-base). 217 pub fn cannot_be_a_base(&self) -> bool { 218 self.opaque_path 219 } 220 221 /// Whether this URL includes credentials. 222 pub fn has_credentials(&self) -> bool { 223 !self.username.is_empty() || !self.password.is_empty() 224 } 225 226 /// Derive the origin of this URL. 227 pub fn origin(&self) -> Origin { 228 match self.scheme.as_str() { 229 "http" | "https" | "ws" | "wss" | "ftp" => { 230 if let Some(host) = &self.host { 231 Origin::Tuple(self.scheme.clone(), host.clone(), self.port) 232 } else { 233 Origin::Opaque 234 } 235 } 236 _ => Origin::Opaque, 237 } 238 } 239 240 /// Serialize this URL to a string (the href). 241 pub fn serialize(&self) -> String { 242 let mut output = String::new(); 243 output.push_str(&self.scheme); 244 output.push(':'); 245 246 if self.host.is_some() { 247 output.push_str("//"); 248 if self.has_credentials() { 249 output.push_str(&self.username); 250 if !self.password.is_empty() { 251 output.push(':'); 252 output.push_str(&self.password); 253 } 254 output.push('@'); 255 } 256 if let Some(ref host) = self.host { 257 output.push_str(&host.serialize()); 258 } 259 if let Some(port) = self.port { 260 output.push(':'); 261 output.push_str(&port.to_string()); 262 } 263 } else if !self.opaque_path && self.scheme == "file" { 264 output.push_str("//"); 265 } 266 267 output.push_str(&self.path()); 268 269 if let Some(ref query) = self.query { 270 output.push('?'); 271 output.push_str(query); 272 } 273 if let Some(ref fragment) = self.fragment { 274 output.push('#'); 275 output.push_str(fragment); 276 } 277 278 output 279 } 280} 281 282impl fmt::Display for Url { 283 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 284 write!(f, "{}", self.serialize()) 285 } 286} 287 288// --------------------------------------------------------------------------- 289// Special schemes 290// --------------------------------------------------------------------------- 291 292/// Whether a scheme is "special" per the URL standard. 293fn is_special_scheme(scheme: &str) -> bool { 294 matches!(scheme, "http" | "https" | "ftp" | "ws" | "wss" | "file") 295} 296 297/// Default port for a special scheme. 298fn default_port(scheme: &str) -> Option<u16> { 299 match scheme { 300 "http" | "ws" => Some(80), 301 "https" | "wss" => Some(443), 302 "ftp" => Some(21), 303 _ => None, 304 } 305} 306 307// --------------------------------------------------------------------------- 308// Percent encoding / decoding 309// --------------------------------------------------------------------------- 310 311/// The C0 control percent-encode set. 312fn is_c0_control(c: char) -> bool { 313 c <= '\u{001F}' || c > '\u{007E}' 314} 315 316/// The fragment percent-encode set. 317fn is_fragment_encode(c: char) -> bool { 318 is_c0_control(c) || c == ' ' || c == '"' || c == '<' || c == '>' || c == '`' 319} 320 321/// The query percent-encode set. 322fn is_query_encode(c: char) -> bool { 323 is_c0_control(c) || c == ' ' || c == '"' || c == '#' || c == '<' || c == '>' 324} 325 326/// The special query percent-encode set. 327fn is_special_query_encode(c: char) -> bool { 328 is_query_encode(c) || c == '\'' 329} 330 331/// The path percent-encode set. 332fn is_path_encode(c: char) -> bool { 333 is_query_encode(c) || c == '?' || c == '`' || c == '{' || c == '}' 334} 335 336/// The userinfo percent-encode set. 337fn is_userinfo_encode(c: char) -> bool { 338 is_path_encode(c) 339 || c == '/' 340 || c == ':' 341 || c == ';' 342 || c == '=' 343 || c == '@' 344 || c == '[' 345 || c == '\\' 346 || c == ']' 347 || c == '^' 348 || c == '|' 349} 350 351/// Percent-encode a string using the given encode set predicate. 352fn percent_encode(input: &str, should_encode: fn(char) -> bool) -> String { 353 let mut out = String::with_capacity(input.len()); 354 for c in input.chars() { 355 if should_encode(c) { 356 for b in c.to_string().as_bytes() { 357 out.push('%'); 358 out.push(to_hex_upper(b >> 4)); 359 out.push(to_hex_upper(b & 0x0F)); 360 } 361 } else { 362 out.push(c); 363 } 364 } 365 out 366} 367 368fn to_hex_upper(n: u8) -> char { 369 if n < 10 { 370 (b'0' + n) as char 371 } else { 372 (b'A' + n - 10) as char 373 } 374} 375 376/// Percent-decode a byte string. 377pub fn percent_decode(input: &str) -> Vec<u8> { 378 let bytes = input.as_bytes(); 379 let mut out = Vec::with_capacity(bytes.len()); 380 let mut i = 0; 381 while i < bytes.len() { 382 if bytes[i] == b'%' && i + 2 < bytes.len() { 383 if let (Some(hi), Some(lo)) = (hex_val(bytes[i + 1]), hex_val(bytes[i + 2])) { 384 out.push(hi << 4 | lo); 385 i += 3; 386 continue; 387 } 388 } 389 out.push(bytes[i]); 390 i += 1; 391 } 392 out 393} 394 395/// Percent-decode to a UTF-8 string (lossy). 396pub fn percent_decode_string(input: &str) -> String { 397 String::from_utf8_lossy(&percent_decode(input)).into_owned() 398} 399 400fn hex_val(b: u8) -> Option<u8> { 401 match b { 402 b'0'..=b'9' => Some(b - b'0'), 403 b'a'..=b'f' => Some(b - b'a' + 10), 404 b'A'..=b'F' => Some(b - b'A' + 10), 405 _ => None, 406 } 407} 408 409// --------------------------------------------------------------------------- 410// IPv4 parsing 411// --------------------------------------------------------------------------- 412 413fn parse_ipv4(input: &str) -> Result<u32> { 414 let parts: Vec<&str> = input.split('.').collect(); 415 if parts.len() < 2 || parts.len() > 4 { 416 return Err(UrlError::InvalidIpv4); 417 } 418 let mut numbers: Vec<u64> = Vec::with_capacity(parts.len()); 419 for part in &parts { 420 if part.is_empty() { 421 return Err(UrlError::InvalidIpv4); 422 } 423 let n = parse_ipv4_number(part)?; 424 numbers.push(n); 425 } 426 let last = numbers.len() - 1; 427 for (i, &n) in numbers.iter().enumerate() { 428 if i < last && n > 255 { 429 return Err(UrlError::InvalidIpv4); 430 } 431 } 432 if numbers[last] >= 256u64.pow((4 - last) as u32) { 433 return Err(UrlError::InvalidIpv4); 434 } 435 436 let mut ipv4 = numbers[last] as u32; 437 for (i, &n) in numbers.iter().enumerate().take(last) { 438 ipv4 += (n as u32) << (8 * (3 - i)); 439 } 440 Ok(ipv4) 441} 442 443fn parse_ipv4_number(input: &str) -> Result<u64> { 444 if input.is_empty() { 445 return Err(UrlError::InvalidIpv4); 446 } 447 let (s, radix) = if input.starts_with("0x") || input.starts_with("0X") { 448 (&input[2..], 16) 449 } else if input.len() > 1 && input.starts_with('0') { 450 (&input[1..], 8) 451 } else { 452 (input, 10) 453 }; 454 if s.is_empty() { 455 return Ok(0); 456 } 457 u64::from_str_radix(s, radix).map_err(|_| UrlError::InvalidIpv4) 458} 459 460fn serialize_ipv4(addr: u32) -> String { 461 format!( 462 "{}.{}.{}.{}", 463 (addr >> 24) & 0xFF, 464 (addr >> 16) & 0xFF, 465 (addr >> 8) & 0xFF, 466 addr & 0xFF 467 ) 468} 469 470// --------------------------------------------------------------------------- 471// IPv6 parsing 472// --------------------------------------------------------------------------- 473 474fn parse_ipv6(input: &str) -> Result<[u16; 8]> { 475 let mut pieces = [0u16; 8]; 476 let mut piece_index: usize = 0; 477 let mut compress: Option<usize> = None; 478 let chars: Vec<char> = input.chars().collect(); 479 let len = chars.len(); 480 let mut pointer = 0; 481 482 if pointer < len && chars[pointer] == ':' { 483 if pointer + 1 >= len || chars[pointer + 1] != ':' { 484 return Err(UrlError::InvalidIpv6); 485 } 486 pointer += 2; 487 piece_index += 1; 488 compress = Some(piece_index); 489 } 490 491 while pointer < len { 492 if piece_index >= 8 { 493 return Err(UrlError::InvalidIpv6); 494 } 495 496 if chars[pointer] == ':' { 497 if compress.is_some() { 498 return Err(UrlError::InvalidIpv6); 499 } 500 pointer += 1; 501 piece_index += 1; 502 compress = Some(piece_index); 503 continue; 504 } 505 506 let mut value: u16 = 0; 507 let mut length = 0; 508 while length < 4 && pointer < len && chars[pointer].is_ascii_hexdigit() { 509 value = value * 0x10 + hex_val(chars[pointer] as u8).unwrap() as u16; 510 pointer += 1; 511 length += 1; 512 } 513 514 if pointer < len && chars[pointer] == '.' { 515 // IPv4-mapped IPv6. 516 if length == 0 { 517 return Err(UrlError::InvalidIpv6); 518 } 519 pointer -= length; 520 if piece_index > 6 { 521 return Err(UrlError::InvalidIpv6); 522 } 523 let mut numbers_seen = 0; 524 while pointer < len { 525 let mut ipv4_piece: Option<u16> = None; 526 if numbers_seen > 0 { 527 if chars[pointer] == '.' && numbers_seen < 4 { 528 pointer += 1; 529 } else { 530 return Err(UrlError::InvalidIpv6); 531 } 532 } 533 if pointer >= len || !chars[pointer].is_ascii_digit() { 534 return Err(UrlError::InvalidIpv6); 535 } 536 while pointer < len && chars[pointer].is_ascii_digit() { 537 let number = (chars[pointer] as u8 - b'0') as u16; 538 match ipv4_piece { 539 None => ipv4_piece = Some(number), 540 Some(0) => return Err(UrlError::InvalidIpv6), // leading zero 541 Some(v) => ipv4_piece = Some(v * 10 + number), 542 } 543 if ipv4_piece.unwrap_or(0) > 255 { 544 return Err(UrlError::InvalidIpv6); 545 } 546 pointer += 1; 547 } 548 pieces[piece_index] = 549 pieces[piece_index] * 0x100 + ipv4_piece.ok_or(UrlError::InvalidIpv6)?; 550 numbers_seen += 1; 551 if numbers_seen == 2 || numbers_seen == 4 { 552 piece_index += 1; 553 } 554 } 555 if numbers_seen != 4 { 556 return Err(UrlError::InvalidIpv6); 557 } 558 break; 559 } 560 561 if pointer < len && chars[pointer] == ':' { 562 pointer += 1; 563 if pointer >= len { 564 // Trailing single colon after a piece — only valid with compress. 565 } 566 } else if pointer < len { 567 return Err(UrlError::InvalidIpv6); 568 } 569 570 if piece_index >= 8 { 571 return Err(UrlError::InvalidIpv6); 572 } 573 pieces[piece_index] = value; 574 piece_index += 1; 575 } 576 577 if let Some(comp) = compress { 578 let mut swaps = piece_index - comp; 579 piece_index = 7; 580 while piece_index != 0 && swaps > 0 { 581 let swap_index = comp + swaps - 1; 582 pieces.swap(piece_index, swap_index); 583 piece_index -= 1; 584 swaps -= 1; 585 } 586 } else if piece_index != 8 { 587 return Err(UrlError::InvalidIpv6); 588 } 589 590 Ok(pieces) 591} 592 593fn serialize_ipv6(pieces: &[u16; 8]) -> String { 594 // Find the longest run of consecutive zeros for :: compression. 595 let mut best_start = None; 596 let mut best_len = 0usize; 597 let mut cur_start = None; 598 let mut cur_len = 0usize; 599 600 for (i, &p) in pieces.iter().enumerate() { 601 if p == 0 { 602 if cur_start.is_none() { 603 cur_start = Some(i); 604 cur_len = 1; 605 } else { 606 cur_len += 1; 607 } 608 } else { 609 if cur_len > best_len && cur_len >= 2 { 610 best_start = cur_start; 611 best_len = cur_len; 612 } 613 cur_start = None; 614 cur_len = 0; 615 } 616 } 617 if cur_len > best_len && cur_len >= 2 { 618 best_start = cur_start; 619 best_len = cur_len; 620 } 621 622 let mut out = String::new(); 623 let mut i = 0; 624 while i < 8 { 625 if Some(i) == best_start { 626 out.push_str("::"); 627 i += best_len; 628 continue; 629 } 630 if !out.is_empty() && !out.ends_with(':') { 631 out.push(':'); 632 } 633 out.push_str(&format!("{:x}", pieces[i])); 634 i += 1; 635 } 636 out 637} 638 639// --------------------------------------------------------------------------- 640// Host parsing 641// --------------------------------------------------------------------------- 642 643fn parse_host(input: &str, is_special: bool) -> Result<Host> { 644 if input.is_empty() { 645 if is_special { 646 return Err(UrlError::InvalidHost); 647 } 648 return Ok(Host::Domain(String::new())); 649 } 650 651 // IPv6 652 if input.starts_with('[') { 653 if !input.ends_with(']') { 654 return Err(UrlError::InvalidIpv6); 655 } 656 let inner = &input[1..input.len() - 1]; 657 let pieces = parse_ipv6(inner)?; 658 return Ok(Host::Ipv6(pieces)); 659 } 660 661 if !is_special { 662 let encoded = percent_encode(input, is_c0_control); 663 return Ok(Host::Domain(encoded)); 664 } 665 666 // Domain — percent-decode then lowercase. 667 let decoded = percent_decode_string(input); 668 let lowered = decoded.to_ascii_lowercase(); 669 670 // Check if it's an IPv4 address. 671 if ends_with_number(&lowered) { 672 match parse_ipv4(&lowered) { 673 Ok(addr) => return Ok(Host::Ipv4(addr)), 674 Err(_) => return Err(UrlError::InvalidHost), 675 } 676 } 677 678 // Validate domain characters. 679 for c in lowered.chars() { 680 if c == '\0' 681 || c == '\t' 682 || c == '\n' 683 || c == '\r' 684 || c == ' ' 685 || c == '#' 686 || c == '/' 687 || c == ':' 688 || c == '<' 689 || c == '>' 690 || c == '?' 691 || c == '@' 692 || c == '[' 693 || c == '\\' 694 || c == ']' 695 || c == '^' 696 || c == '|' 697 { 698 return Err(UrlError::InvalidHost); 699 } 700 } 701 702 Ok(Host::Domain(lowered)) 703} 704 705/// Check if a domain string ends with a number (suggesting IPv4). 706fn ends_with_number(input: &str) -> bool { 707 let last_part = match input.rsplit('.').next() { 708 Some(p) => p, 709 None => return false, 710 }; 711 if last_part.is_empty() { 712 return false; 713 } 714 if last_part.starts_with("0x") || last_part.starts_with("0X") { 715 return last_part[2..].chars().all(|c| c.is_ascii_hexdigit()); 716 } 717 last_part.chars().all(|c| c.is_ascii_digit()) 718} 719 720// --------------------------------------------------------------------------- 721// Shorten path helper 722// --------------------------------------------------------------------------- 723 724fn shorten_path(scheme: &str, path: &mut Vec<String>) { 725 if scheme == "file" && path.len() == 1 { 726 if let Some(first) = path.first() { 727 if is_normalized_windows_drive_letter(first) { 728 return; 729 } 730 } 731 } 732 path.pop(); 733} 734 735fn is_normalized_windows_drive_letter(s: &str) -> bool { 736 let bytes = s.as_bytes(); 737 bytes.len() == 2 && bytes[0].is_ascii_alphabetic() && bytes[1] == b':' 738} 739 740fn starts_with_windows_drive_letter(s: &str) -> bool { 741 let bytes = s.as_bytes(); 742 if bytes.len() < 2 { 743 return false; 744 } 745 if !bytes[0].is_ascii_alphabetic() { 746 return false; 747 } 748 if bytes[1] != b':' && bytes[1] != b'|' { 749 return false; 750 } 751 if bytes.len() >= 3 { 752 matches!(bytes[2], b'/' | b'\\' | b'?' | b'#') 753 } else { 754 true 755 } 756} 757 758// --------------------------------------------------------------------------- 759// URL parser 760// --------------------------------------------------------------------------- 761 762fn parse_url(input: &str, base: Option<&Url>) -> Result<Url> { 763 // Strip leading/trailing C0 controls and spaces. 764 let input = input.trim_matches(|c: char| c <= '\u{0020}'); 765 766 if input.is_empty() { 767 if let Some(base) = base { 768 return parse_relative("", base); 769 } 770 return Err(UrlError::EmptyInput); 771 } 772 773 // Remove tab and newline characters. 774 let input: String = input 775 .chars() 776 .filter(|&c| c != '\t' && c != '\n' && c != '\r') 777 .collect(); 778 779 let chars: Vec<char> = input.chars().collect(); 780 let len = chars.len(); 781 782 let mut pointer = 0; 783 784 // Try to parse a scheme. 785 let mut scheme = String::new(); 786 let mut has_scheme = false; 787 788 if pointer < len && chars[pointer].is_ascii_alphabetic() { 789 let mut temp = String::new(); 790 temp.push(chars[pointer].to_ascii_lowercase()); 791 let mut p = pointer + 1; 792 while p < len 793 && (chars[p].is_ascii_alphanumeric() 794 || chars[p] == '+' 795 || chars[p] == '-' 796 || chars[p] == '.') 797 { 798 temp.push(chars[p].to_ascii_lowercase()); 799 p += 1; 800 } 801 if p < len && chars[p] == ':' { 802 scheme = temp; 803 has_scheme = true; 804 pointer = p + 1; // skip the ':' 805 } 806 } 807 808 if !has_scheme { 809 if let Some(base) = base { 810 return parse_relative(&input, base); 811 } 812 return Err(UrlError::MissingScheme); 813 } 814 815 let is_special = is_special_scheme(&scheme); 816 817 let mut url = Url { 818 scheme: scheme.clone(), 819 username: String::new(), 820 password: String::new(), 821 host: None, 822 port: None, 823 path: Vec::new(), 824 opaque_path: false, 825 query: None, 826 fragment: None, 827 }; 828 829 let remaining: String = chars[pointer..].iter().collect(); 830 831 if scheme == "file" { 832 return parse_file_url(&remaining, base, url); 833 } 834 835 if let Some(after_slashes) = remaining.strip_prefix("//") { 836 parse_authority_and_path(&mut url, after_slashes, is_special)?; 837 } else if is_special { 838 if let Some(base) = base { 839 if base.scheme == url.scheme { 840 return parse_relative_special(&remaining, base, url); 841 } 842 } 843 if let Some(after_slash) = remaining.strip_prefix('/') { 844 parse_authority_and_path(&mut url, after_slash, is_special)?; 845 } else { 846 parse_authority_and_path(&mut url, &remaining, is_special)?; 847 } 848 } else { 849 parse_opaque_or_path(&mut url, &remaining)?; 850 } 851 852 Ok(url) 853} 854 855fn parse_authority_and_path(url: &mut Url, input: &str, is_special: bool) -> Result<()> { 856 let authority_end = input 857 .find(|c: char| c == '/' || c == '?' || c == '#' || (is_special && c == '\\')) 858 .unwrap_or(input.len()); 859 860 let authority = &input[..authority_end]; 861 let rest = &input[authority_end..]; 862 863 let (userinfo_part, hostport) = if let Some(at_pos) = authority.rfind('@') { 864 (&authority[..at_pos], &authority[at_pos + 1..]) 865 } else { 866 ("", authority) 867 }; 868 869 if !userinfo_part.is_empty() { 870 if let Some(colon_pos) = userinfo_part.find(':') { 871 url.username = percent_encode(&userinfo_part[..colon_pos], is_userinfo_encode); 872 url.password = percent_encode(&userinfo_part[colon_pos + 1..], is_userinfo_encode); 873 } else { 874 url.username = percent_encode(userinfo_part, is_userinfo_encode); 875 } 876 } 877 878 let (host_str, port_str) = split_host_port(hostport); 879 880 url.host = Some(parse_host(host_str, is_special)?); 881 882 if let Some(port_s) = port_str { 883 if !port_s.is_empty() { 884 let port: u16 = port_s.parse().map_err(|_| UrlError::InvalidPort)?; 885 if default_port(&url.scheme) != Some(port) { 886 url.port = Some(port); 887 } 888 } 889 } 890 891 parse_path_query_fragment(url, rest, is_special) 892} 893 894fn split_host_port(input: &str) -> (&str, Option<&str>) { 895 if input.starts_with('[') { 896 if let Some(bracket_end) = input.find(']') { 897 let host = &input[..bracket_end + 1]; 898 let after = &input[bracket_end + 1..]; 899 if let Some(port_str) = after.strip_prefix(':') { 900 return (host, Some(port_str)); 901 } 902 return (host, None); 903 } 904 return (input, None); 905 } 906 907 if let Some(colon_pos) = input.rfind(':') { 908 let port_part = &input[colon_pos + 1..]; 909 if port_part.is_empty() || port_part.chars().all(|c| c.is_ascii_digit()) { 910 return (&input[..colon_pos], Some(port_part)); 911 } 912 } 913 (input, None) 914} 915 916fn parse_path_query_fragment(url: &mut Url, input: &str, is_special: bool) -> Result<()> { 917 let mut remaining = input; 918 919 let path_end = remaining.find(['?', '#']).unwrap_or(remaining.len()); 920 let path_str = &remaining[..path_end]; 921 remaining = &remaining[path_end..]; 922 923 parse_path_into(url, path_str, is_special); 924 925 if let Some(after_q) = remaining.strip_prefix('?') { 926 remaining = after_q; 927 let query_end = remaining.find('#').unwrap_or(remaining.len()); 928 let query_str = &remaining[..query_end]; 929 remaining = &remaining[query_end..]; 930 931 let encode_fn = if is_special { 932 is_special_query_encode 933 } else { 934 is_query_encode 935 }; 936 url.query = Some(percent_encode(query_str, encode_fn)); 937 } 938 939 if let Some(after_hash) = remaining.strip_prefix('#') { 940 url.fragment = Some(percent_encode(after_hash, is_fragment_encode)); 941 } 942 943 Ok(()) 944} 945 946fn parse_path_into(url: &mut Url, path: &str, is_special: bool) { 947 if path.is_empty() { 948 if is_special { 949 url.path = vec![String::new()]; 950 } 951 return; 952 } 953 954 let segments: Vec<&str> = if is_special { 955 path.split(['/', '\\']).collect() 956 } else { 957 path.split('/').collect() 958 }; 959 960 for (i, seg) in segments.iter().enumerate() { 961 if i == 0 && seg.is_empty() { 962 continue; 963 } 964 965 let decoded = *seg; 966 if decoded == "." || decoded.eq_ignore_ascii_case("%2e") { 967 if i == segments.len() - 1 { 968 url.path.push(String::new()); 969 } 970 } else if decoded == ".." 971 || decoded.eq_ignore_ascii_case(".%2e") 972 || decoded.eq_ignore_ascii_case("%2e.") 973 || decoded.eq_ignore_ascii_case("%2e%2e") 974 { 975 shorten_path(&url.scheme, &mut url.path); 976 if i == segments.len() - 1 { 977 url.path.push(String::new()); 978 } 979 } else { 980 url.path.push(percent_encode(decoded, is_path_encode)); 981 } 982 } 983} 984 985fn parse_opaque_or_path(url: &mut Url, input: &str) -> Result<()> { 986 let mut remaining = input; 987 988 let path_end = remaining.find(['?', '#']).unwrap_or(remaining.len()); 989 let path_str = &remaining[..path_end]; 990 remaining = &remaining[path_end..]; 991 992 if path_str.starts_with('/') { 993 url.opaque_path = false; 994 parse_path_into(url, path_str, false); 995 } else { 996 url.opaque_path = true; 997 url.path = vec![percent_encode(path_str, is_c0_control)]; 998 } 999 1000 if let Some(after_q) = remaining.strip_prefix('?') { 1001 remaining = after_q; 1002 let query_end = remaining.find('#').unwrap_or(remaining.len()); 1003 let query_str = &remaining[..query_end]; 1004 remaining = &remaining[query_end..]; 1005 url.query = Some(percent_encode(query_str, is_query_encode)); 1006 } 1007 1008 if let Some(after_hash) = remaining.strip_prefix('#') { 1009 url.fragment = Some(percent_encode(after_hash, is_fragment_encode)); 1010 } 1011 1012 Ok(()) 1013} 1014 1015// --------------------------------------------------------------------------- 1016// Relative URL parsing 1017// --------------------------------------------------------------------------- 1018 1019fn parse_relative(input: &str, base: &Url) -> Result<Url> { 1020 let mut url = Url { 1021 scheme: base.scheme.clone(), 1022 username: base.username.clone(), 1023 password: base.password.clone(), 1024 host: base.host.clone(), 1025 port: base.port, 1026 path: base.path.clone(), 1027 opaque_path: base.opaque_path, 1028 query: base.query.clone(), 1029 fragment: None, 1030 }; 1031 1032 let is_special = is_special_scheme(&url.scheme); 1033 1034 if input.is_empty() { 1035 return Ok(url); 1036 } 1037 1038 let chars: Vec<char> = input.chars().collect(); 1039 1040 if chars[0] == '/' || (is_special && chars[0] == '\\') { 1041 if input.starts_with("//") || (is_special && input.starts_with("\\/")) { 1042 let after_slashes = &input[2..]; 1043 url.username = String::new(); 1044 url.password = String::new(); 1045 url.path = Vec::new(); 1046 url.query = None; 1047 parse_authority_and_path(&mut url, after_slashes, is_special)?; 1048 return Ok(url); 1049 } 1050 url.path = Vec::new(); 1051 url.query = None; 1052 parse_path_query_fragment(&mut url, input, is_special)?; 1053 return Ok(url); 1054 } 1055 1056 if let Some(after_q) = input.strip_prefix('?') { 1057 url.query = None; 1058 url.fragment = None; 1059 let query_end = after_q.find('#').unwrap_or(after_q.len()); 1060 let query_str = &after_q[..query_end]; 1061 let after = &after_q[query_end..]; 1062 1063 let encode_fn = if is_special { 1064 is_special_query_encode 1065 } else { 1066 is_query_encode 1067 }; 1068 url.query = Some(percent_encode(query_str, encode_fn)); 1069 1070 if let Some(frag) = after.strip_prefix('#') { 1071 url.fragment = Some(percent_encode(frag, is_fragment_encode)); 1072 } 1073 return Ok(url); 1074 } 1075 1076 if let Some(frag) = input.strip_prefix('#') { 1077 url.fragment = Some(percent_encode(frag, is_fragment_encode)); 1078 return Ok(url); 1079 } 1080 1081 // Path-relative. 1082 if !url.opaque_path { 1083 shorten_path(&url.scheme, &mut url.path); 1084 } 1085 url.query = None; 1086 url.fragment = None; 1087 1088 parse_path_query_fragment(&mut url, &format!("/{input}"), is_special)?; 1089 Ok(url) 1090} 1091 1092fn parse_relative_special(remaining: &str, base: &Url, mut url: Url) -> Result<Url> { 1093 url.username = base.username.clone(); 1094 url.password = base.password.clone(); 1095 url.host = base.host.clone(); 1096 url.port = base.port; 1097 url.path = base.path.clone(); 1098 url.query = base.query.clone(); 1099 1100 let is_special = true; 1101 1102 if remaining.is_empty() { 1103 return Ok(url); 1104 } 1105 1106 if remaining.starts_with('/') || remaining.starts_with('\\') { 1107 url.path = Vec::new(); 1108 url.query = None; 1109 parse_path_query_fragment(&mut url, remaining, is_special)?; 1110 return Ok(url); 1111 } 1112 1113 if let Some(rest) = remaining.strip_prefix('?') { 1114 url.query = None; 1115 url.fragment = None; 1116 let query_end = rest.find('#').unwrap_or(rest.len()); 1117 url.query = Some(percent_encode(&rest[..query_end], is_special_query_encode)); 1118 if query_end < rest.len() { 1119 url.fragment = Some(percent_encode(&rest[query_end + 1..], is_fragment_encode)); 1120 } 1121 return Ok(url); 1122 } 1123 1124 if let Some(frag) = remaining.strip_prefix('#') { 1125 url.fragment = Some(percent_encode(frag, is_fragment_encode)); 1126 return Ok(url); 1127 } 1128 1129 shorten_path(&url.scheme, &mut url.path); 1130 url.query = None; 1131 parse_path_query_fragment(&mut url, &format!("/{remaining}"), is_special)?; 1132 Ok(url) 1133} 1134 1135// --------------------------------------------------------------------------- 1136// File URL parsing 1137// --------------------------------------------------------------------------- 1138 1139fn parse_file_url(input: &str, base: Option<&Url>, mut url: Url) -> Result<Url> { 1140 url.host = Some(Host::Domain(String::new())); 1141 1142 let remaining = if let Some(after) = input.strip_prefix("//") { 1143 after 1144 } else if let Some(after) = input.strip_prefix('/') { 1145 after 1146 } else if let Some(base) = base { 1147 if base.scheme == "file" { 1148 url.host = base.host.clone(); 1149 url.path = base.path.clone(); 1150 1151 if let Some(rest) = input.strip_prefix('?') { 1152 url.query = None; 1153 url.fragment = None; 1154 let query_end = rest.find('#').unwrap_or(rest.len()); 1155 url.query = Some(percent_encode(&rest[..query_end], is_query_encode)); 1156 if query_end < rest.len() { 1157 url.fragment = Some(percent_encode(&rest[query_end + 1..], is_fragment_encode)); 1158 } 1159 return Ok(url); 1160 } 1161 1162 if let Some(frag) = input.strip_prefix('#') { 1163 url.fragment = Some(percent_encode(frag, is_fragment_encode)); 1164 return Ok(url); 1165 } 1166 1167 shorten_path(&url.scheme, &mut url.path); 1168 url.query = None; 1169 parse_path_query_fragment(&mut url, &format!("/{input}"), false)?; 1170 return Ok(url); 1171 } else { 1172 input 1173 } 1174 } else { 1175 input 1176 }; 1177 1178 let path_start = remaining 1179 .find(['/', '\\', '?', '#']) 1180 .unwrap_or(remaining.len()); 1181 1182 let potential_host = &remaining[..path_start]; 1183 let rest = &remaining[path_start..]; 1184 1185 if starts_with_windows_drive_letter(remaining) { 1186 url.host = Some(Host::Domain(String::new())); 1187 parse_path_query_fragment(&mut url, &format!("/{remaining}"), false)?; 1188 return Ok(url); 1189 } 1190 1191 if !potential_host.is_empty() { 1192 let host = parse_host(potential_host, false)?; 1193 if host != Host::Domain(String::new()) { 1194 url.host = Some(host); 1195 } 1196 } 1197 1198 parse_path_query_fragment(&mut url, rest, false)?; 1199 1200 // Normalize Windows drive letters in path. 1201 if let Some(first) = url.path.first_mut() { 1202 if first.len() == 2 { 1203 let bytes = first.as_bytes(); 1204 if bytes[0].is_ascii_alphabetic() && bytes[1] == b'|' { 1205 let mut normalized = String::new(); 1206 normalized.push(bytes[0] as char); 1207 normalized.push(':'); 1208 *first = normalized; 1209 } 1210 } 1211 } 1212 1213 Ok(url) 1214} 1215 1216// --------------------------------------------------------------------------- 1217// Tests 1218// --------------------------------------------------------------------------- 1219 1220#[cfg(test)] 1221mod tests { 1222 use super::*; 1223 1224 // ------------------------------------------------------------------- 1225 // Basic absolute URL parsing 1226 // ------------------------------------------------------------------- 1227 1228 #[test] 1229 fn parse_simple_http() { 1230 let url = Url::parse("http://example.com").unwrap(); 1231 assert_eq!(url.scheme(), "http"); 1232 assert_eq!(url.host_str(), Some("example.com".into())); 1233 assert_eq!(url.port(), None); 1234 assert_eq!(url.path(), "/"); 1235 assert_eq!(url.query(), None); 1236 assert_eq!(url.fragment(), None); 1237 } 1238 1239 #[test] 1240 fn parse_https_with_path() { 1241 let url = Url::parse("https://example.com/foo/bar").unwrap(); 1242 assert_eq!(url.scheme(), "https"); 1243 assert_eq!(url.host_str(), Some("example.com".into())); 1244 assert_eq!(url.path(), "/foo/bar"); 1245 } 1246 1247 #[test] 1248 fn parse_full_url() { 1249 let url = 1250 Url::parse("https://user:pass@example.com:8080/path/to/page?q=1&r=2#frag").unwrap(); 1251 assert_eq!(url.scheme(), "https"); 1252 assert_eq!(url.username(), "user"); 1253 assert_eq!(url.password(), "pass"); 1254 assert_eq!(url.host_str(), Some("example.com".into())); 1255 assert_eq!(url.port(), Some(8080)); 1256 assert_eq!(url.path(), "/path/to/page"); 1257 assert_eq!(url.query(), Some("q=1&r=2")); 1258 assert_eq!(url.fragment(), Some("frag")); 1259 } 1260 1261 #[test] 1262 fn parse_default_port_omitted() { 1263 let url = Url::parse("http://example.com:80/").unwrap(); 1264 assert_eq!(url.port(), None); 1265 assert_eq!(url.port_or_default(), Some(80)); 1266 } 1267 1268 #[test] 1269 fn parse_non_default_port() { 1270 let url = Url::parse("http://example.com:8080/").unwrap(); 1271 assert_eq!(url.port(), Some(8080)); 1272 } 1273 1274 #[test] 1275 fn parse_https_default_port() { 1276 let url = Url::parse("https://example.com:443/").unwrap(); 1277 assert_eq!(url.port(), None); 1278 } 1279 1280 #[test] 1281 fn parse_ftp_default_port() { 1282 let url = Url::parse("ftp://files.example.com:21/readme.txt").unwrap(); 1283 assert_eq!(url.port(), None); 1284 assert_eq!(url.port_or_default(), Some(21)); 1285 } 1286 1287 // ------------------------------------------------------------------- 1288 // Scheme handling 1289 // ------------------------------------------------------------------- 1290 1291 #[test] 1292 fn scheme_is_lowercased() { 1293 let url = Url::parse("HTTP://EXAMPLE.COM").unwrap(); 1294 assert_eq!(url.scheme(), "http"); 1295 } 1296 1297 #[test] 1298 fn non_special_scheme() { 1299 let url = Url::parse("custom://host/path").unwrap(); 1300 assert_eq!(url.scheme(), "custom"); 1301 assert_eq!(url.host_str(), Some("host".into())); 1302 assert_eq!(url.path(), "/path"); 1303 } 1304 1305 #[test] 1306 fn data_uri() { 1307 let url = Url::parse("data:text/html,<h1>Hello</h1>").unwrap(); 1308 assert_eq!(url.scheme(), "data"); 1309 assert!(url.cannot_be_a_base()); 1310 } 1311 1312 #[test] 1313 fn javascript_uri() { 1314 let url = Url::parse("javascript:alert(1)").unwrap(); 1315 assert_eq!(url.scheme(), "javascript"); 1316 assert!(url.cannot_be_a_base()); 1317 } 1318 1319 #[test] 1320 fn mailto_uri() { 1321 let url = Url::parse("mailto:user@example.com").unwrap(); 1322 assert_eq!(url.scheme(), "mailto"); 1323 assert!(url.cannot_be_a_base()); 1324 } 1325 1326 // ------------------------------------------------------------------- 1327 // Host parsing 1328 // ------------------------------------------------------------------- 1329 1330 #[test] 1331 fn host_is_lowercased() { 1332 let url = Url::parse("http://EXAMPLE.COM/").unwrap(); 1333 assert_eq!(url.host_str(), Some("example.com".into())); 1334 } 1335 1336 #[test] 1337 fn ipv4_host() { 1338 let url = Url::parse("http://127.0.0.1/").unwrap(); 1339 assert_eq!(url.host(), Some(&Host::Ipv4(0x7F000001))); 1340 assert_eq!(url.host_str(), Some("127.0.0.1".into())); 1341 } 1342 1343 #[test] 1344 fn ipv4_host_all_zeros() { 1345 let url = Url::parse("http://0.0.0.0/").unwrap(); 1346 assert_eq!(url.host(), Some(&Host::Ipv4(0))); 1347 } 1348 1349 #[test] 1350 fn ipv6_host() { 1351 let url = Url::parse("http://[::1]/").unwrap(); 1352 assert_eq!(url.host(), Some(&Host::Ipv6([0, 0, 0, 0, 0, 0, 0, 1]))); 1353 } 1354 1355 #[test] 1356 fn ipv6_full() { 1357 let url = Url::parse("http://[2001:db8:85a3:0:0:8a2e:370:7334]/").unwrap(); 1358 assert_eq!( 1359 url.host(), 1360 Some(&Host::Ipv6([ 1361 0x2001, 0x0db8, 0x85a3, 0, 0, 0x8a2e, 0x0370, 0x7334 1362 ])) 1363 ); 1364 } 1365 1366 #[test] 1367 fn ipv6_serialization_compressed() { 1368 let url = Url::parse("http://[2001:db8::1]/").unwrap(); 1369 assert_eq!(url.host_str(), Some("[2001:db8::1]".into())); 1370 } 1371 1372 #[test] 1373 fn ipv6_all_zeros() { 1374 let url = Url::parse("http://[::]/").unwrap(); 1375 assert_eq!(url.host(), Some(&Host::Ipv6([0; 8]))); 1376 assert_eq!(url.host_str(), Some("[::]".into())); 1377 } 1378 1379 #[test] 1380 fn ipv6_loopback() { 1381 let pieces = parse_ipv6("::1").unwrap(); 1382 assert_eq!(pieces, [0, 0, 0, 0, 0, 0, 0, 1]); 1383 } 1384 1385 #[test] 1386 fn ipv6_with_ipv4() { 1387 let pieces = parse_ipv6("::ffff:192.168.1.1").unwrap(); 1388 assert_eq!(pieces, [0, 0, 0, 0, 0, 0xffff, 0xc0a8, 0x0101]); 1389 } 1390 1391 // ------------------------------------------------------------------- 1392 // IPv4 parsing 1393 // ------------------------------------------------------------------- 1394 1395 #[test] 1396 fn ipv4_basic() { 1397 assert_eq!(parse_ipv4("192.168.1.1").unwrap(), 0xC0A80101); 1398 } 1399 1400 #[test] 1401 fn ipv4_hex() { 1402 assert_eq!(parse_ipv4("0xC0.0xA8.0x01.0x01").unwrap(), 0xC0A80101); 1403 } 1404 1405 #[test] 1406 fn ipv4_octal() { 1407 assert_eq!(parse_ipv4("0300.0250.01.01").unwrap(), 0xC0A80101); 1408 } 1409 1410 #[test] 1411 fn ipv4_single_number() { 1412 assert!(parse_ipv4("3232235777").is_err()); 1413 } 1414 1415 #[test] 1416 fn ipv4_two_parts() { 1417 // Two parts: first is top 8 bits, second is bottom 24 bits. 1418 // 192.168.1.1 => 168*65536 + 1*256 + 1 = 11010305 1419 assert_eq!(parse_ipv4("192.11010305").unwrap(), 0xC0A80101); 1420 } 1421 1422 #[test] 1423 fn ipv4_reject_overflow() { 1424 assert!(parse_ipv4("256.0.0.0").is_err()); 1425 } 1426 1427 #[test] 1428 fn ipv4_reject_empty_part() { 1429 assert!(parse_ipv4("1..1.1").is_err()); 1430 } 1431 1432 // ------------------------------------------------------------------- 1433 // Percent encoding/decoding 1434 // ------------------------------------------------------------------- 1435 1436 #[test] 1437 fn percent_decode_basic() { 1438 assert_eq!(percent_decode_string("%48%65%6C%6C%6F"), "Hello"); 1439 } 1440 1441 #[test] 1442 fn percent_decode_mixed() { 1443 assert_eq!(percent_decode_string("Hello%20World"), "Hello World"); 1444 } 1445 1446 #[test] 1447 fn percent_decode_passthrough() { 1448 assert_eq!(percent_decode_string("no-encoding"), "no-encoding"); 1449 } 1450 1451 #[test] 1452 fn percent_decode_partial() { 1453 assert_eq!(percent_decode_string("100%"), "100%"); 1454 assert_eq!(percent_decode_string("%2"), "%2"); 1455 } 1456 1457 #[test] 1458 fn percent_encode_userinfo() { 1459 let encoded = percent_encode("user@host", is_userinfo_encode); 1460 assert_eq!(encoded, "user%40host"); 1461 } 1462 1463 #[test] 1464 fn percent_encode_path() { 1465 let encoded = percent_encode("hello world", is_path_encode); 1466 assert_eq!(encoded, "hello%20world"); 1467 } 1468 1469 // ------------------------------------------------------------------- 1470 // Path parsing and dot segments 1471 // ------------------------------------------------------------------- 1472 1473 #[test] 1474 fn path_dot_removal() { 1475 let url = Url::parse("http://example.com/a/b/../c").unwrap(); 1476 assert_eq!(url.path(), "/a/c"); 1477 } 1478 1479 #[test] 1480 fn path_dot_current() { 1481 let url = Url::parse("http://example.com/a/./b").unwrap(); 1482 assert_eq!(url.path(), "/a/b"); 1483 } 1484 1485 #[test] 1486 fn path_multiple_dots() { 1487 let url = Url::parse("http://example.com/a/b/c/../../d").unwrap(); 1488 assert_eq!(url.path(), "/a/d"); 1489 } 1490 1491 #[test] 1492 fn path_trailing_slash() { 1493 let url = Url::parse("http://example.com/a/b/").unwrap(); 1494 assert_eq!(url.path(), "/a/b/"); 1495 } 1496 1497 #[test] 1498 fn path_empty() { 1499 let url = Url::parse("http://example.com").unwrap(); 1500 assert_eq!(url.path(), "/"); 1501 } 1502 1503 #[test] 1504 fn path_double_dot_at_root() { 1505 let url = Url::parse("http://example.com/../a").unwrap(); 1506 assert_eq!(url.path(), "/a"); 1507 } 1508 1509 // ------------------------------------------------------------------- 1510 // Relative URL resolution 1511 // ------------------------------------------------------------------- 1512 1513 #[test] 1514 fn relative_path() { 1515 let base = Url::parse("http://example.com/a/b/c").unwrap(); 1516 let url = Url::parse_with_base("d", &base).unwrap(); 1517 assert_eq!(url.path(), "/a/b/d"); 1518 assert_eq!(url.host_str(), Some("example.com".into())); 1519 } 1520 1521 #[test] 1522 fn relative_path_with_dots() { 1523 let base = Url::parse("http://example.com/a/b/c").unwrap(); 1524 let url = Url::parse_with_base("../d", &base).unwrap(); 1525 assert_eq!(url.path(), "/a/d"); 1526 } 1527 1528 #[test] 1529 fn relative_absolute_path() { 1530 let base = Url::parse("http://example.com/a/b/c").unwrap(); 1531 let url = Url::parse_with_base("/d/e", &base).unwrap(); 1532 assert_eq!(url.path(), "/d/e"); 1533 assert_eq!(url.host_str(), Some("example.com".into())); 1534 } 1535 1536 #[test] 1537 fn relative_query_only() { 1538 let base = Url::parse("http://example.com/a/b?old=1").unwrap(); 1539 let url = Url::parse_with_base("?new=2", &base).unwrap(); 1540 assert_eq!(url.path(), "/a/b"); 1541 assert_eq!(url.query(), Some("new=2")); 1542 } 1543 1544 #[test] 1545 fn relative_fragment_only() { 1546 let base = Url::parse("http://example.com/a/b#old").unwrap(); 1547 let url = Url::parse_with_base("#new", &base).unwrap(); 1548 assert_eq!(url.path(), "/a/b"); 1549 assert_eq!(url.fragment(), Some("new")); 1550 } 1551 1552 #[test] 1553 fn relative_authority_override() { 1554 let base = Url::parse("http://example.com/a/b").unwrap(); 1555 let url = Url::parse_with_base("//other.com/c", &base).unwrap(); 1556 assert_eq!(url.scheme(), "http"); 1557 assert_eq!(url.host_str(), Some("other.com".into())); 1558 assert_eq!(url.path(), "/c"); 1559 } 1560 1561 #[test] 1562 fn absolute_url_ignores_base() { 1563 let base = Url::parse("http://example.com/a").unwrap(); 1564 let url = Url::parse_with_base("https://other.com/b", &base).unwrap(); 1565 assert_eq!(url.scheme(), "https"); 1566 assert_eq!(url.host_str(), Some("other.com".into())); 1567 assert_eq!(url.path(), "/b"); 1568 } 1569 1570 #[test] 1571 fn relative_empty_string() { 1572 let base = Url::parse("http://example.com/a/b?q=1#f").unwrap(); 1573 let url = Url::parse_with_base("", &base).unwrap(); 1574 assert_eq!(url.path(), "/a/b"); 1575 assert_eq!(url.query(), Some("q=1")); 1576 assert_eq!(url.fragment(), None); 1577 } 1578 1579 // ------------------------------------------------------------------- 1580 // Serialization 1581 // ------------------------------------------------------------------- 1582 1583 #[test] 1584 fn serialize_simple() { 1585 let url = Url::parse("http://example.com/path").unwrap(); 1586 assert_eq!(url.serialize(), "http://example.com/path"); 1587 } 1588 1589 #[test] 1590 fn serialize_with_credentials() { 1591 let url = Url::parse("http://user:pass@example.com/").unwrap(); 1592 assert_eq!(url.serialize(), "http://user:pass@example.com/"); 1593 } 1594 1595 #[test] 1596 fn serialize_with_port() { 1597 let url = Url::parse("http://example.com:8080/").unwrap(); 1598 assert_eq!(url.serialize(), "http://example.com:8080/"); 1599 } 1600 1601 #[test] 1602 fn serialize_with_query_fragment() { 1603 let url = Url::parse("http://example.com/path?q=1#frag").unwrap(); 1604 assert_eq!(url.serialize(), "http://example.com/path?q=1#frag"); 1605 } 1606 1607 #[test] 1608 fn serialize_data_uri() { 1609 let url = Url::parse("data:text/html,hello").unwrap(); 1610 assert_eq!(url.serialize(), "data:text/html,hello"); 1611 } 1612 1613 #[test] 1614 fn roundtrip_full_url() { 1615 let input = "https://user:pass@example.com:8080/a/b?q=1#frag"; 1616 let url = Url::parse(input).unwrap(); 1617 assert_eq!(url.serialize(), input); 1618 } 1619 1620 #[test] 1621 fn roundtrip_ipv4() { 1622 let url = Url::parse("http://192.168.1.1/path").unwrap(); 1623 assert_eq!(url.serialize(), "http://192.168.1.1/path"); 1624 } 1625 1626 #[test] 1627 fn roundtrip_ipv6() { 1628 let url = Url::parse("http://[::1]/path").unwrap(); 1629 assert_eq!(url.serialize(), "http://[::1]/path"); 1630 } 1631 1632 // ------------------------------------------------------------------- 1633 // Origin 1634 // ------------------------------------------------------------------- 1635 1636 #[test] 1637 fn origin_http() { 1638 let url = Url::parse("http://example.com:8080/path").unwrap(); 1639 match url.origin() { 1640 Origin::Tuple(scheme, host, port) => { 1641 assert_eq!(scheme, "http"); 1642 assert_eq!(host, Host::Domain("example.com".into())); 1643 assert_eq!(port, Some(8080)); 1644 } 1645 _ => panic!("expected tuple origin"), 1646 } 1647 } 1648 1649 #[test] 1650 fn origin_https_default_port() { 1651 let url = Url::parse("https://example.com/").unwrap(); 1652 match url.origin() { 1653 Origin::Tuple(scheme, host, port) => { 1654 assert_eq!(scheme, "https"); 1655 assert_eq!(host, Host::Domain("example.com".into())); 1656 assert_eq!(port, None); 1657 } 1658 _ => panic!("expected tuple origin"), 1659 } 1660 } 1661 1662 #[test] 1663 fn origin_data_is_opaque() { 1664 let url = Url::parse("data:text/html,hello").unwrap(); 1665 assert_eq!(url.origin(), Origin::Opaque); 1666 } 1667 1668 // ------------------------------------------------------------------- 1669 // File URLs 1670 // ------------------------------------------------------------------- 1671 1672 #[test] 1673 fn file_url_unix() { 1674 let url = Url::parse("file:///home/user/file.txt").unwrap(); 1675 assert_eq!(url.scheme(), "file"); 1676 assert_eq!(url.host_str(), Some("".into())); 1677 assert_eq!(url.path(), "/home/user/file.txt"); 1678 } 1679 1680 #[test] 1681 fn file_url_windows_drive() { 1682 let url = Url::parse("file:///C:/Windows/system32").unwrap(); 1683 assert_eq!(url.scheme(), "file"); 1684 assert_eq!(url.path(), "/C:/Windows/system32"); 1685 } 1686 1687 #[test] 1688 fn file_url_with_host() { 1689 let url = Url::parse("file://server/share/file.txt").unwrap(); 1690 assert_eq!(url.scheme(), "file"); 1691 assert_eq!(url.host_str(), Some("server".into())); 1692 assert_eq!(url.path(), "/share/file.txt"); 1693 } 1694 1695 // ------------------------------------------------------------------- 1696 // Edge cases 1697 // ------------------------------------------------------------------- 1698 1699 #[test] 1700 fn empty_input_fails() { 1701 assert_eq!(Url::parse(""), Err(UrlError::EmptyInput)); 1702 } 1703 1704 #[test] 1705 fn whitespace_only_fails() { 1706 assert_eq!(Url::parse(" "), Err(UrlError::EmptyInput)); 1707 } 1708 1709 #[test] 1710 fn missing_scheme_fails() { 1711 assert!(Url::parse("example.com").is_err()); 1712 } 1713 1714 #[test] 1715 fn leading_whitespace_stripped() { 1716 let url = Url::parse(" http://example.com ").unwrap(); 1717 assert_eq!(url.host_str(), Some("example.com".into())); 1718 } 1719 1720 #[test] 1721 fn tab_newline_stripped() { 1722 let url = Url::parse("http://exa\tmple\n.com/").unwrap(); 1723 assert_eq!(url.host_str(), Some("example.com".into())); 1724 } 1725 1726 #[test] 1727 fn query_with_special_chars() { 1728 let url = Url::parse("http://example.com/?key=val ue&foo=bar").unwrap(); 1729 assert!(url.query().unwrap().contains("key=val%20ue")); 1730 } 1731 1732 #[test] 1733 fn fragment_with_special_chars() { 1734 let url = Url::parse("http://example.com/#sec tion").unwrap(); 1735 assert!(url.fragment().unwrap().contains("sec%20tion")); 1736 } 1737 1738 #[test] 1739 fn username_only() { 1740 let url = Url::parse("http://user@example.com/").unwrap(); 1741 assert_eq!(url.username(), "user"); 1742 assert_eq!(url.password(), ""); 1743 assert!(url.has_credentials()); 1744 } 1745 1746 #[test] 1747 fn no_credentials() { 1748 let url = Url::parse("http://example.com/").unwrap(); 1749 assert!(!url.has_credentials()); 1750 } 1751 1752 #[test] 1753 fn port_overflow_fails() { 1754 assert!(Url::parse("http://example.com:99999/").is_err()); 1755 } 1756 1757 #[test] 1758 fn ws_scheme() { 1759 let url = Url::parse("ws://example.com/chat").unwrap(); 1760 assert_eq!(url.scheme(), "ws"); 1761 assert_eq!(url.port_or_default(), Some(80)); 1762 } 1763 1764 #[test] 1765 fn wss_scheme() { 1766 let url = Url::parse("wss://example.com/chat").unwrap(); 1767 assert_eq!(url.scheme(), "wss"); 1768 assert_eq!(url.port_or_default(), Some(443)); 1769 } 1770 1771 #[test] 1772 fn cannot_be_a_base() { 1773 let url = Url::parse("data:text/html,hello").unwrap(); 1774 assert!(url.cannot_be_a_base()); 1775 } 1776 1777 #[test] 1778 fn http_can_be_a_base() { 1779 let url = Url::parse("http://example.com/").unwrap(); 1780 assert!(!url.cannot_be_a_base()); 1781 } 1782 1783 // ------------------------------------------------------------------- 1784 // Display/ToString 1785 // ------------------------------------------------------------------- 1786 1787 #[test] 1788 fn display_matches_serialize() { 1789 let url = Url::parse("https://example.com:8443/path?q=1#f").unwrap(); 1790 assert_eq!(format!("{url}"), url.serialize()); 1791 } 1792 1793 // ------------------------------------------------------------------- 1794 // Multiple path segments 1795 // ------------------------------------------------------------------- 1796 1797 #[test] 1798 fn path_segments() { 1799 let url = Url::parse("http://example.com/a/b/c").unwrap(); 1800 assert_eq!(url.path_segments(), &["a", "b", "c"]); 1801 } 1802 1803 #[test] 1804 fn path_segments_trailing_slash() { 1805 let url = Url::parse("http://example.com/a/b/").unwrap(); 1806 assert_eq!(url.path_segments(), &["a", "b", ""]); 1807 } 1808 1809 // ------------------------------------------------------------------- 1810 // Host type 1811 // ------------------------------------------------------------------- 1812 1813 #[test] 1814 fn host_serialize_domain() { 1815 let h = Host::Domain("example.com".into()); 1816 assert_eq!(h.serialize(), "example.com"); 1817 } 1818 1819 #[test] 1820 fn host_serialize_ipv4() { 1821 let h = Host::Ipv4(0x7F000001); 1822 assert_eq!(h.serialize(), "127.0.0.1"); 1823 } 1824 1825 #[test] 1826 fn host_serialize_ipv6() { 1827 let h = Host::Ipv6([0, 0, 0, 0, 0, 0, 0, 1]); 1828 assert_eq!(h.serialize(), "[::1]"); 1829 } 1830 1831 // ------------------------------------------------------------------- 1832 // IPv6 serialization 1833 // ------------------------------------------------------------------- 1834 1835 #[test] 1836 fn ipv6_serialize_full() { 1837 let pieces = [ 1838 0x2001, 0x0db8, 0x85a3, 0x0001, 0x0002, 0x8a2e, 0x0370, 0x7334, 1839 ]; 1840 assert_eq!(serialize_ipv6(&pieces), "2001:db8:85a3:1:2:8a2e:370:7334"); 1841 } 1842 1843 #[test] 1844 fn ipv6_serialize_compress() { 1845 let pieces = [0x2001, 0x0db8, 0, 0, 0, 0, 0, 1]; 1846 assert_eq!(serialize_ipv6(&pieces), "2001:db8::1"); 1847 } 1848 1849 #[test] 1850 fn ipv6_serialize_all_zeros() { 1851 let pieces = [0u16; 8]; 1852 assert_eq!(serialize_ipv6(&pieces), "::"); 1853 } 1854 1855 #[test] 1856 fn ipv6_serialize_no_compress_single_zero() { 1857 let pieces = [1, 0, 2, 0, 3, 0, 4, 0]; 1858 assert_eq!(serialize_ipv6(&pieces), "1:0:2:0:3:0:4:0"); 1859 } 1860 1861 // ------------------------------------------------------------------- 1862 // Percent encoding edge cases 1863 // ------------------------------------------------------------------- 1864 1865 #[test] 1866 fn percent_encode_preserves_unreserved() { 1867 let encoded = percent_encode("hello-world_test.page~1", is_path_encode); 1868 assert_eq!(encoded, "hello-world_test.page~1"); 1869 } 1870 1871 #[test] 1872 fn percent_encode_multibyte_utf8() { 1873 let encoded = percent_encode("café", is_path_encode); 1874 assert_eq!(encoded, "caf%C3%A9"); 1875 } 1876}