we (web engine): Experimental web browser project to understand the limits of Claude
1//! WHATWG URL parser.
2//!
3//! Implements the URL Standard (<https://url.spec.whatwg.org/>):
4//! - URL record type with scheme, username, password, host, port, path, query, fragment
5//! - State-machine parser following the spec
6//! - Host parsing: domains, IPv4 addresses, IPv6 addresses
7//! - Percent-encoding and decoding (UTF-8)
8//! - Special scheme handling (http, https, ftp, ws, wss, file)
9//! - Relative URL resolution via base URL
10//! - URL serialization
11//! - Origin derivation
12
13pub mod data_url;
14
15use core::fmt;
16
17// ---------------------------------------------------------------------------
18// Error types
19// ---------------------------------------------------------------------------
20
21#[derive(Debug, Clone, PartialEq, Eq)]
22pub enum UrlError {
23 /// Input is empty or contains only whitespace.
24 EmptyInput,
25 /// Invalid URL syntax.
26 InvalidUrl,
27 /// Invalid scheme.
28 InvalidScheme,
29 /// Invalid authority.
30 InvalidAuthority,
31 /// Invalid host.
32 InvalidHost,
33 /// Invalid port number.
34 InvalidPort,
35 /// Invalid IPv4 address.
36 InvalidIpv4,
37 /// Invalid IPv6 address.
38 InvalidIpv6,
39 /// Invalid percent-encoding.
40 InvalidPercentEncoding,
41 /// Relative URL without a base.
42 RelativeWithoutBase,
43 /// Missing scheme.
44 MissingScheme,
45}
46
47impl fmt::Display for UrlError {
48 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
49 match self {
50 Self::EmptyInput => write!(f, "empty input"),
51 Self::InvalidUrl => write!(f, "invalid URL"),
52 Self::InvalidScheme => write!(f, "invalid scheme"),
53 Self::InvalidAuthority => write!(f, "invalid authority"),
54 Self::InvalidHost => write!(f, "invalid host"),
55 Self::InvalidPort => write!(f, "invalid port number"),
56 Self::InvalidIpv4 => write!(f, "invalid IPv4 address"),
57 Self::InvalidIpv6 => write!(f, "invalid IPv6 address"),
58 Self::InvalidPercentEncoding => write!(f, "invalid percent-encoding"),
59 Self::RelativeWithoutBase => write!(f, "relative URL without a base"),
60 Self::MissingScheme => write!(f, "missing scheme"),
61 }
62 }
63}
64
65pub type Result<T> = core::result::Result<T, UrlError>;
66
67// ---------------------------------------------------------------------------
68// Host
69// ---------------------------------------------------------------------------
70
71/// A parsed URL host.
72#[derive(Debug, Clone, PartialEq, Eq)]
73pub enum Host {
74 /// A domain name (already lowercased).
75 Domain(String),
76 /// An IPv4 address.
77 Ipv4(u32),
78 /// An IPv6 address (128 bits as eight 16-bit pieces).
79 Ipv6([u16; 8]),
80}
81
82impl Host {
83 /// Serialize the host to a string.
84 pub fn serialize(&self) -> String {
85 match self {
86 Host::Domain(d) => d.clone(),
87 Host::Ipv4(addr) => serialize_ipv4(*addr),
88 Host::Ipv6(pieces) => format!("[{}]", serialize_ipv6(pieces)),
89 }
90 }
91}
92
93impl fmt::Display for Host {
94 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
95 write!(f, "{}", self.serialize())
96 }
97}
98
99// ---------------------------------------------------------------------------
100// Origin
101// ---------------------------------------------------------------------------
102
103/// A URL origin (scheme, host, port).
104#[derive(Debug, Clone, PartialEq, Eq)]
105pub enum Origin {
106 /// A tuple origin (scheme, host, port).
107 Tuple(String, Host, Option<u16>),
108 /// An opaque origin (unique, not equal to anything).
109 Opaque,
110}
111
112// ---------------------------------------------------------------------------
113// URL record
114// ---------------------------------------------------------------------------
115
116/// A parsed URL record per the WHATWG URL Standard.
117#[derive(Debug, Clone, PartialEq, Eq)]
118pub struct Url {
119 /// The scheme (e.g., "http", "https", "file").
120 pub scheme: String,
121 /// The username (percent-encoded).
122 username: String,
123 /// The password (percent-encoded).
124 password: String,
125 /// The host.
126 pub host: Option<Host>,
127 /// The port (None = default or absent).
128 pub port: Option<u16>,
129 /// Path segments. For non-opaque paths, these are the segments.
130 /// For opaque paths (cannot-be-a-base URL), this is a single element.
131 path: Vec<String>,
132 /// Whether this URL has an opaque path (cannot-be-a-base URL).
133 opaque_path: bool,
134 /// The query string (without leading '?').
135 pub query: Option<String>,
136 /// The fragment (without leading '#').
137 pub fragment: Option<String>,
138}
139
140impl Url {
141 /// Parse a URL string.
142 pub fn parse(input: &str) -> Result<Self> {
143 parse_url(input, None)
144 }
145
146 /// Parse a URL string with a base URL for resolving relative references.
147 pub fn parse_with_base(input: &str, base: &Url) -> Result<Self> {
148 parse_url(input, Some(base))
149 }
150
151 /// Get the scheme.
152 pub fn scheme(&self) -> &str {
153 &self.scheme
154 }
155
156 /// Get the username (percent-encoded).
157 pub fn username(&self) -> &str {
158 &self.username
159 }
160
161 /// Get the password (percent-encoded).
162 pub fn password(&self) -> &str {
163 &self.password
164 }
165
166 /// Get the host.
167 pub fn host(&self) -> Option<&Host> {
168 self.host.as_ref()
169 }
170
171 /// Get the host as a string.
172 pub fn host_str(&self) -> Option<String> {
173 self.host.as_ref().map(|h| h.serialize())
174 }
175
176 /// Get the port.
177 pub fn port(&self) -> Option<u16> {
178 self.port
179 }
180
181 /// Get the port or the default port for the scheme.
182 pub fn port_or_default(&self) -> Option<u16> {
183 self.port.or_else(|| default_port(&self.scheme))
184 }
185
186 /// Get the path as a string.
187 pub fn path(&self) -> String {
188 if self.opaque_path {
189 self.path.first().cloned().unwrap_or_default()
190 } else {
191 let mut s = String::new();
192 for seg in &self.path {
193 s.push('/');
194 s.push_str(seg);
195 }
196 if s.is_empty() {
197 s.push('/');
198 }
199 s
200 }
201 }
202
203 /// Get the path segments.
204 pub fn path_segments(&self) -> &[String] {
205 &self.path
206 }
207
208 /// Get the query string.
209 pub fn query(&self) -> Option<&str> {
210 self.query.as_deref()
211 }
212
213 /// Get the fragment.
214 pub fn fragment(&self) -> Option<&str> {
215 self.fragment.as_deref()
216 }
217
218 /// Whether this URL has an opaque path (cannot-be-a-base).
219 pub fn cannot_be_a_base(&self) -> bool {
220 self.opaque_path
221 }
222
223 /// Whether this URL includes credentials.
224 pub fn has_credentials(&self) -> bool {
225 !self.username.is_empty() || !self.password.is_empty()
226 }
227
228 /// Derive the origin of this URL.
229 pub fn origin(&self) -> Origin {
230 match self.scheme.as_str() {
231 "http" | "https" | "ws" | "wss" | "ftp" => {
232 if let Some(host) = &self.host {
233 Origin::Tuple(self.scheme.clone(), host.clone(), self.port)
234 } else {
235 Origin::Opaque
236 }
237 }
238 _ => Origin::Opaque,
239 }
240 }
241
242 /// Serialize this URL to a string (the href).
243 pub fn serialize(&self) -> String {
244 let mut output = String::new();
245 output.push_str(&self.scheme);
246 output.push(':');
247
248 if self.host.is_some() {
249 output.push_str("//");
250 if self.has_credentials() {
251 output.push_str(&self.username);
252 if !self.password.is_empty() {
253 output.push(':');
254 output.push_str(&self.password);
255 }
256 output.push('@');
257 }
258 if let Some(ref host) = self.host {
259 output.push_str(&host.serialize());
260 }
261 if let Some(port) = self.port {
262 output.push(':');
263 output.push_str(&port.to_string());
264 }
265 } else if !self.opaque_path && self.scheme == "file" {
266 output.push_str("//");
267 }
268
269 output.push_str(&self.path());
270
271 if let Some(ref query) = self.query {
272 output.push('?');
273 output.push_str(query);
274 }
275 if let Some(ref fragment) = self.fragment {
276 output.push('#');
277 output.push_str(fragment);
278 }
279
280 output
281 }
282}
283
284impl fmt::Display for Url {
285 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
286 write!(f, "{}", self.serialize())
287 }
288}
289
290// ---------------------------------------------------------------------------
291// Special schemes
292// ---------------------------------------------------------------------------
293
294/// Whether a scheme is "special" per the URL standard.
295fn is_special_scheme(scheme: &str) -> bool {
296 matches!(scheme, "http" | "https" | "ftp" | "ws" | "wss" | "file")
297}
298
299/// Default port for a special scheme.
300fn default_port(scheme: &str) -> Option<u16> {
301 match scheme {
302 "http" | "ws" => Some(80),
303 "https" | "wss" => Some(443),
304 "ftp" => Some(21),
305 _ => None,
306 }
307}
308
309// ---------------------------------------------------------------------------
310// Percent encoding / decoding
311// ---------------------------------------------------------------------------
312
313/// The C0 control percent-encode set.
314fn is_c0_control(c: char) -> bool {
315 c <= '\u{001F}' || c > '\u{007E}'
316}
317
318/// The fragment percent-encode set.
319fn is_fragment_encode(c: char) -> bool {
320 is_c0_control(c) || c == ' ' || c == '"' || c == '<' || c == '>' || c == '`'
321}
322
323/// The query percent-encode set.
324fn is_query_encode(c: char) -> bool {
325 is_c0_control(c) || c == ' ' || c == '"' || c == '#' || c == '<' || c == '>'
326}
327
328/// The special query percent-encode set.
329fn is_special_query_encode(c: char) -> bool {
330 is_query_encode(c) || c == '\''
331}
332
333/// The path percent-encode set.
334fn is_path_encode(c: char) -> bool {
335 is_query_encode(c) || c == '?' || c == '`' || c == '{' || c == '}'
336}
337
338/// The userinfo percent-encode set.
339fn is_userinfo_encode(c: char) -> bool {
340 is_path_encode(c)
341 || c == '/'
342 || c == ':'
343 || c == ';'
344 || c == '='
345 || c == '@'
346 || c == '['
347 || c == '\\'
348 || c == ']'
349 || c == '^'
350 || c == '|'
351}
352
353/// Percent-encode a string using the given encode set predicate.
354fn percent_encode(input: &str, should_encode: fn(char) -> bool) -> String {
355 let mut out = String::with_capacity(input.len());
356 for c in input.chars() {
357 if should_encode(c) {
358 for b in c.to_string().as_bytes() {
359 out.push('%');
360 out.push(to_hex_upper(b >> 4));
361 out.push(to_hex_upper(b & 0x0F));
362 }
363 } else {
364 out.push(c);
365 }
366 }
367 out
368}
369
370fn to_hex_upper(n: u8) -> char {
371 if n < 10 {
372 (b'0' + n) as char
373 } else {
374 (b'A' + n - 10) as char
375 }
376}
377
378/// Percent-decode a byte string.
379pub fn percent_decode(input: &str) -> Vec<u8> {
380 let bytes = input.as_bytes();
381 let mut out = Vec::with_capacity(bytes.len());
382 let mut i = 0;
383 while i < bytes.len() {
384 if bytes[i] == b'%' && i + 2 < bytes.len() {
385 if let (Some(hi), Some(lo)) = (hex_val(bytes[i + 1]), hex_val(bytes[i + 2])) {
386 out.push(hi << 4 | lo);
387 i += 3;
388 continue;
389 }
390 }
391 out.push(bytes[i]);
392 i += 1;
393 }
394 out
395}
396
397/// Percent-decode to a UTF-8 string (lossy).
398pub fn percent_decode_string(input: &str) -> String {
399 String::from_utf8_lossy(&percent_decode(input)).into_owned()
400}
401
402fn hex_val(b: u8) -> Option<u8> {
403 match b {
404 b'0'..=b'9' => Some(b - b'0'),
405 b'a'..=b'f' => Some(b - b'a' + 10),
406 b'A'..=b'F' => Some(b - b'A' + 10),
407 _ => None,
408 }
409}
410
411// ---------------------------------------------------------------------------
412// IPv4 parsing
413// ---------------------------------------------------------------------------
414
415fn parse_ipv4(input: &str) -> Result<u32> {
416 let parts: Vec<&str> = input.split('.').collect();
417 if parts.len() < 2 || parts.len() > 4 {
418 return Err(UrlError::InvalidIpv4);
419 }
420 let mut numbers: Vec<u64> = Vec::with_capacity(parts.len());
421 for part in &parts {
422 if part.is_empty() {
423 return Err(UrlError::InvalidIpv4);
424 }
425 let n = parse_ipv4_number(part)?;
426 numbers.push(n);
427 }
428 let last = numbers.len() - 1;
429 for (i, &n) in numbers.iter().enumerate() {
430 if i < last && n > 255 {
431 return Err(UrlError::InvalidIpv4);
432 }
433 }
434 if numbers[last] >= 256u64.pow((4 - last) as u32) {
435 return Err(UrlError::InvalidIpv4);
436 }
437
438 let mut ipv4 = numbers[last] as u32;
439 for (i, &n) in numbers.iter().enumerate().take(last) {
440 ipv4 += (n as u32) << (8 * (3 - i));
441 }
442 Ok(ipv4)
443}
444
445fn parse_ipv4_number(input: &str) -> Result<u64> {
446 if input.is_empty() {
447 return Err(UrlError::InvalidIpv4);
448 }
449 let (s, radix) = if input.starts_with("0x") || input.starts_with("0X") {
450 (&input[2..], 16)
451 } else if input.len() > 1 && input.starts_with('0') {
452 (&input[1..], 8)
453 } else {
454 (input, 10)
455 };
456 if s.is_empty() {
457 return Ok(0);
458 }
459 u64::from_str_radix(s, radix).map_err(|_| UrlError::InvalidIpv4)
460}
461
462fn serialize_ipv4(addr: u32) -> String {
463 format!(
464 "{}.{}.{}.{}",
465 (addr >> 24) & 0xFF,
466 (addr >> 16) & 0xFF,
467 (addr >> 8) & 0xFF,
468 addr & 0xFF
469 )
470}
471
472// ---------------------------------------------------------------------------
473// IPv6 parsing
474// ---------------------------------------------------------------------------
475
476fn parse_ipv6(input: &str) -> Result<[u16; 8]> {
477 let mut pieces = [0u16; 8];
478 let mut piece_index: usize = 0;
479 let mut compress: Option<usize> = None;
480 let chars: Vec<char> = input.chars().collect();
481 let len = chars.len();
482 let mut pointer = 0;
483
484 if pointer < len && chars[pointer] == ':' {
485 if pointer + 1 >= len || chars[pointer + 1] != ':' {
486 return Err(UrlError::InvalidIpv6);
487 }
488 pointer += 2;
489 piece_index += 1;
490 compress = Some(piece_index);
491 }
492
493 while pointer < len {
494 if piece_index >= 8 {
495 return Err(UrlError::InvalidIpv6);
496 }
497
498 if chars[pointer] == ':' {
499 if compress.is_some() {
500 return Err(UrlError::InvalidIpv6);
501 }
502 pointer += 1;
503 piece_index += 1;
504 compress = Some(piece_index);
505 continue;
506 }
507
508 let mut value: u16 = 0;
509 let mut length = 0;
510 while length < 4 && pointer < len && chars[pointer].is_ascii_hexdigit() {
511 value = value * 0x10 + hex_val(chars[pointer] as u8).unwrap() as u16;
512 pointer += 1;
513 length += 1;
514 }
515
516 if pointer < len && chars[pointer] == '.' {
517 // IPv4-mapped IPv6.
518 if length == 0 {
519 return Err(UrlError::InvalidIpv6);
520 }
521 pointer -= length;
522 if piece_index > 6 {
523 return Err(UrlError::InvalidIpv6);
524 }
525 let mut numbers_seen = 0;
526 while pointer < len {
527 let mut ipv4_piece: Option<u16> = None;
528 if numbers_seen > 0 {
529 if chars[pointer] == '.' && numbers_seen < 4 {
530 pointer += 1;
531 } else {
532 return Err(UrlError::InvalidIpv6);
533 }
534 }
535 if pointer >= len || !chars[pointer].is_ascii_digit() {
536 return Err(UrlError::InvalidIpv6);
537 }
538 while pointer < len && chars[pointer].is_ascii_digit() {
539 let number = (chars[pointer] as u8 - b'0') as u16;
540 match ipv4_piece {
541 None => ipv4_piece = Some(number),
542 Some(0) => return Err(UrlError::InvalidIpv6), // leading zero
543 Some(v) => ipv4_piece = Some(v * 10 + number),
544 }
545 if ipv4_piece.unwrap_or(0) > 255 {
546 return Err(UrlError::InvalidIpv6);
547 }
548 pointer += 1;
549 }
550 pieces[piece_index] =
551 pieces[piece_index] * 0x100 + ipv4_piece.ok_or(UrlError::InvalidIpv6)?;
552 numbers_seen += 1;
553 if numbers_seen == 2 || numbers_seen == 4 {
554 piece_index += 1;
555 }
556 }
557 if numbers_seen != 4 {
558 return Err(UrlError::InvalidIpv6);
559 }
560 break;
561 }
562
563 if pointer < len && chars[pointer] == ':' {
564 pointer += 1;
565 if pointer >= len {
566 // Trailing single colon after a piece — only valid with compress.
567 }
568 } else if pointer < len {
569 return Err(UrlError::InvalidIpv6);
570 }
571
572 if piece_index >= 8 {
573 return Err(UrlError::InvalidIpv6);
574 }
575 pieces[piece_index] = value;
576 piece_index += 1;
577 }
578
579 if let Some(comp) = compress {
580 let mut swaps = piece_index - comp;
581 piece_index = 7;
582 while piece_index != 0 && swaps > 0 {
583 let swap_index = comp + swaps - 1;
584 pieces.swap(piece_index, swap_index);
585 piece_index -= 1;
586 swaps -= 1;
587 }
588 } else if piece_index != 8 {
589 return Err(UrlError::InvalidIpv6);
590 }
591
592 Ok(pieces)
593}
594
595fn serialize_ipv6(pieces: &[u16; 8]) -> String {
596 // Find the longest run of consecutive zeros for :: compression.
597 let mut best_start = None;
598 let mut best_len = 0usize;
599 let mut cur_start = None;
600 let mut cur_len = 0usize;
601
602 for (i, &p) in pieces.iter().enumerate() {
603 if p == 0 {
604 if cur_start.is_none() {
605 cur_start = Some(i);
606 cur_len = 1;
607 } else {
608 cur_len += 1;
609 }
610 } else {
611 if cur_len > best_len && cur_len >= 2 {
612 best_start = cur_start;
613 best_len = cur_len;
614 }
615 cur_start = None;
616 cur_len = 0;
617 }
618 }
619 if cur_len > best_len && cur_len >= 2 {
620 best_start = cur_start;
621 best_len = cur_len;
622 }
623
624 let mut out = String::new();
625 let mut i = 0;
626 while i < 8 {
627 if Some(i) == best_start {
628 out.push_str("::");
629 i += best_len;
630 continue;
631 }
632 if !out.is_empty() && !out.ends_with(':') {
633 out.push(':');
634 }
635 out.push_str(&format!("{:x}", pieces[i]));
636 i += 1;
637 }
638 out
639}
640
641// ---------------------------------------------------------------------------
642// Host parsing
643// ---------------------------------------------------------------------------
644
645fn parse_host(input: &str, is_special: bool) -> Result<Host> {
646 if input.is_empty() {
647 if is_special {
648 return Err(UrlError::InvalidHost);
649 }
650 return Ok(Host::Domain(String::new()));
651 }
652
653 // IPv6
654 if input.starts_with('[') {
655 if !input.ends_with(']') {
656 return Err(UrlError::InvalidIpv6);
657 }
658 let inner = &input[1..input.len() - 1];
659 let pieces = parse_ipv6(inner)?;
660 return Ok(Host::Ipv6(pieces));
661 }
662
663 if !is_special {
664 let encoded = percent_encode(input, is_c0_control);
665 return Ok(Host::Domain(encoded));
666 }
667
668 // Domain — percent-decode then lowercase.
669 let decoded = percent_decode_string(input);
670 let lowered = decoded.to_ascii_lowercase();
671
672 // Check if it's an IPv4 address.
673 if ends_with_number(&lowered) {
674 match parse_ipv4(&lowered) {
675 Ok(addr) => return Ok(Host::Ipv4(addr)),
676 Err(_) => return Err(UrlError::InvalidHost),
677 }
678 }
679
680 // Validate domain characters.
681 for c in lowered.chars() {
682 if c == '\0'
683 || c == '\t'
684 || c == '\n'
685 || c == '\r'
686 || c == ' '
687 || c == '#'
688 || c == '/'
689 || c == ':'
690 || c == '<'
691 || c == '>'
692 || c == '?'
693 || c == '@'
694 || c == '['
695 || c == '\\'
696 || c == ']'
697 || c == '^'
698 || c == '|'
699 {
700 return Err(UrlError::InvalidHost);
701 }
702 }
703
704 Ok(Host::Domain(lowered))
705}
706
707/// Check if a domain string ends with a number (suggesting IPv4).
708fn ends_with_number(input: &str) -> bool {
709 let last_part = match input.rsplit('.').next() {
710 Some(p) => p,
711 None => return false,
712 };
713 if last_part.is_empty() {
714 return false;
715 }
716 if last_part.starts_with("0x") || last_part.starts_with("0X") {
717 return last_part[2..].chars().all(|c| c.is_ascii_hexdigit());
718 }
719 last_part.chars().all(|c| c.is_ascii_digit())
720}
721
722// ---------------------------------------------------------------------------
723// Shorten path helper
724// ---------------------------------------------------------------------------
725
726fn shorten_path(scheme: &str, path: &mut Vec<String>) {
727 if scheme == "file" && path.len() == 1 {
728 if let Some(first) = path.first() {
729 if is_normalized_windows_drive_letter(first) {
730 return;
731 }
732 }
733 }
734 path.pop();
735}
736
737fn is_normalized_windows_drive_letter(s: &str) -> bool {
738 let bytes = s.as_bytes();
739 bytes.len() == 2 && bytes[0].is_ascii_alphabetic() && bytes[1] == b':'
740}
741
742fn starts_with_windows_drive_letter(s: &str) -> bool {
743 let bytes = s.as_bytes();
744 if bytes.len() < 2 {
745 return false;
746 }
747 if !bytes[0].is_ascii_alphabetic() {
748 return false;
749 }
750 if bytes[1] != b':' && bytes[1] != b'|' {
751 return false;
752 }
753 if bytes.len() >= 3 {
754 matches!(bytes[2], b'/' | b'\\' | b'?' | b'#')
755 } else {
756 true
757 }
758}
759
760// ---------------------------------------------------------------------------
761// URL parser
762// ---------------------------------------------------------------------------
763
764fn parse_url(input: &str, base: Option<&Url>) -> Result<Url> {
765 // Strip leading/trailing C0 controls and spaces.
766 let input = input.trim_matches(|c: char| c <= '\u{0020}');
767
768 if input.is_empty() {
769 if let Some(base) = base {
770 return parse_relative("", base);
771 }
772 return Err(UrlError::EmptyInput);
773 }
774
775 // Remove tab and newline characters.
776 let input: String = input
777 .chars()
778 .filter(|&c| c != '\t' && c != '\n' && c != '\r')
779 .collect();
780
781 let chars: Vec<char> = input.chars().collect();
782 let len = chars.len();
783
784 let mut pointer = 0;
785
786 // Try to parse a scheme.
787 let mut scheme = String::new();
788 let mut has_scheme = false;
789
790 if pointer < len && chars[pointer].is_ascii_alphabetic() {
791 let mut temp = String::new();
792 temp.push(chars[pointer].to_ascii_lowercase());
793 let mut p = pointer + 1;
794 while p < len
795 && (chars[p].is_ascii_alphanumeric()
796 || chars[p] == '+'
797 || chars[p] == '-'
798 || chars[p] == '.')
799 {
800 temp.push(chars[p].to_ascii_lowercase());
801 p += 1;
802 }
803 if p < len && chars[p] == ':' {
804 scheme = temp;
805 has_scheme = true;
806 pointer = p + 1; // skip the ':'
807 }
808 }
809
810 if !has_scheme {
811 if let Some(base) = base {
812 return parse_relative(&input, base);
813 }
814 return Err(UrlError::MissingScheme);
815 }
816
817 let is_special = is_special_scheme(&scheme);
818
819 let mut url = Url {
820 scheme: scheme.clone(),
821 username: String::new(),
822 password: String::new(),
823 host: None,
824 port: None,
825 path: Vec::new(),
826 opaque_path: false,
827 query: None,
828 fragment: None,
829 };
830
831 let remaining: String = chars[pointer..].iter().collect();
832
833 if scheme == "file" {
834 return parse_file_url(&remaining, base, url);
835 }
836
837 if let Some(after_slashes) = remaining.strip_prefix("//") {
838 parse_authority_and_path(&mut url, after_slashes, is_special)?;
839 } else if is_special {
840 if let Some(base) = base {
841 if base.scheme == url.scheme {
842 return parse_relative_special(&remaining, base, url);
843 }
844 }
845 if let Some(after_slash) = remaining.strip_prefix('/') {
846 parse_authority_and_path(&mut url, after_slash, is_special)?;
847 } else {
848 parse_authority_and_path(&mut url, &remaining, is_special)?;
849 }
850 } else {
851 parse_opaque_or_path(&mut url, &remaining)?;
852 }
853
854 Ok(url)
855}
856
857fn parse_authority_and_path(url: &mut Url, input: &str, is_special: bool) -> Result<()> {
858 let authority_end = input
859 .find(|c: char| c == '/' || c == '?' || c == '#' || (is_special && c == '\\'))
860 .unwrap_or(input.len());
861
862 let authority = &input[..authority_end];
863 let rest = &input[authority_end..];
864
865 let (userinfo_part, hostport) = if let Some(at_pos) = authority.rfind('@') {
866 (&authority[..at_pos], &authority[at_pos + 1..])
867 } else {
868 ("", authority)
869 };
870
871 if !userinfo_part.is_empty() {
872 if let Some(colon_pos) = userinfo_part.find(':') {
873 url.username = percent_encode(&userinfo_part[..colon_pos], is_userinfo_encode);
874 url.password = percent_encode(&userinfo_part[colon_pos + 1..], is_userinfo_encode);
875 } else {
876 url.username = percent_encode(userinfo_part, is_userinfo_encode);
877 }
878 }
879
880 let (host_str, port_str) = split_host_port(hostport);
881
882 url.host = Some(parse_host(host_str, is_special)?);
883
884 if let Some(port_s) = port_str {
885 if !port_s.is_empty() {
886 let port: u16 = port_s.parse().map_err(|_| UrlError::InvalidPort)?;
887 if default_port(&url.scheme) != Some(port) {
888 url.port = Some(port);
889 }
890 }
891 }
892
893 parse_path_query_fragment(url, rest, is_special)
894}
895
896fn split_host_port(input: &str) -> (&str, Option<&str>) {
897 if input.starts_with('[') {
898 if let Some(bracket_end) = input.find(']') {
899 let host = &input[..bracket_end + 1];
900 let after = &input[bracket_end + 1..];
901 if let Some(port_str) = after.strip_prefix(':') {
902 return (host, Some(port_str));
903 }
904 return (host, None);
905 }
906 return (input, None);
907 }
908
909 if let Some(colon_pos) = input.rfind(':') {
910 let port_part = &input[colon_pos + 1..];
911 if port_part.is_empty() || port_part.chars().all(|c| c.is_ascii_digit()) {
912 return (&input[..colon_pos], Some(port_part));
913 }
914 }
915 (input, None)
916}
917
918fn parse_path_query_fragment(url: &mut Url, input: &str, is_special: bool) -> Result<()> {
919 let mut remaining = input;
920
921 let path_end = remaining.find(['?', '#']).unwrap_or(remaining.len());
922 let path_str = &remaining[..path_end];
923 remaining = &remaining[path_end..];
924
925 parse_path_into(url, path_str, is_special);
926
927 if let Some(after_q) = remaining.strip_prefix('?') {
928 remaining = after_q;
929 let query_end = remaining.find('#').unwrap_or(remaining.len());
930 let query_str = &remaining[..query_end];
931 remaining = &remaining[query_end..];
932
933 let encode_fn = if is_special {
934 is_special_query_encode
935 } else {
936 is_query_encode
937 };
938 url.query = Some(percent_encode(query_str, encode_fn));
939 }
940
941 if let Some(after_hash) = remaining.strip_prefix('#') {
942 url.fragment = Some(percent_encode(after_hash, is_fragment_encode));
943 }
944
945 Ok(())
946}
947
948fn parse_path_into(url: &mut Url, path: &str, is_special: bool) {
949 if path.is_empty() {
950 if is_special {
951 url.path = vec![String::new()];
952 }
953 return;
954 }
955
956 let segments: Vec<&str> = if is_special {
957 path.split(['/', '\\']).collect()
958 } else {
959 path.split('/').collect()
960 };
961
962 for (i, seg) in segments.iter().enumerate() {
963 if i == 0 && seg.is_empty() {
964 continue;
965 }
966
967 let decoded = *seg;
968 if decoded == "." || decoded.eq_ignore_ascii_case("%2e") {
969 if i == segments.len() - 1 {
970 url.path.push(String::new());
971 }
972 } else if decoded == ".."
973 || decoded.eq_ignore_ascii_case(".%2e")
974 || decoded.eq_ignore_ascii_case("%2e.")
975 || decoded.eq_ignore_ascii_case("%2e%2e")
976 {
977 shorten_path(&url.scheme, &mut url.path);
978 if i == segments.len() - 1 {
979 url.path.push(String::new());
980 }
981 } else {
982 url.path.push(percent_encode(decoded, is_path_encode));
983 }
984 }
985}
986
987fn parse_opaque_or_path(url: &mut Url, input: &str) -> Result<()> {
988 let mut remaining = input;
989
990 let path_end = remaining.find(['?', '#']).unwrap_or(remaining.len());
991 let path_str = &remaining[..path_end];
992 remaining = &remaining[path_end..];
993
994 if path_str.starts_with('/') {
995 url.opaque_path = false;
996 parse_path_into(url, path_str, false);
997 } else {
998 url.opaque_path = true;
999 url.path = vec![percent_encode(path_str, is_c0_control)];
1000 }
1001
1002 if let Some(after_q) = remaining.strip_prefix('?') {
1003 remaining = after_q;
1004 let query_end = remaining.find('#').unwrap_or(remaining.len());
1005 let query_str = &remaining[..query_end];
1006 remaining = &remaining[query_end..];
1007 url.query = Some(percent_encode(query_str, is_query_encode));
1008 }
1009
1010 if let Some(after_hash) = remaining.strip_prefix('#') {
1011 url.fragment = Some(percent_encode(after_hash, is_fragment_encode));
1012 }
1013
1014 Ok(())
1015}
1016
1017// ---------------------------------------------------------------------------
1018// Relative URL parsing
1019// ---------------------------------------------------------------------------
1020
1021fn parse_relative(input: &str, base: &Url) -> Result<Url> {
1022 let mut url = Url {
1023 scheme: base.scheme.clone(),
1024 username: base.username.clone(),
1025 password: base.password.clone(),
1026 host: base.host.clone(),
1027 port: base.port,
1028 path: base.path.clone(),
1029 opaque_path: base.opaque_path,
1030 query: base.query.clone(),
1031 fragment: None,
1032 };
1033
1034 let is_special = is_special_scheme(&url.scheme);
1035
1036 if input.is_empty() {
1037 return Ok(url);
1038 }
1039
1040 let chars: Vec<char> = input.chars().collect();
1041
1042 if chars[0] == '/' || (is_special && chars[0] == '\\') {
1043 if input.starts_with("//") || (is_special && input.starts_with("\\/")) {
1044 let after_slashes = &input[2..];
1045 url.username = String::new();
1046 url.password = String::new();
1047 url.path = Vec::new();
1048 url.query = None;
1049 parse_authority_and_path(&mut url, after_slashes, is_special)?;
1050 return Ok(url);
1051 }
1052 url.path = Vec::new();
1053 url.query = None;
1054 parse_path_query_fragment(&mut url, input, is_special)?;
1055 return Ok(url);
1056 }
1057
1058 if let Some(after_q) = input.strip_prefix('?') {
1059 url.query = None;
1060 url.fragment = None;
1061 let query_end = after_q.find('#').unwrap_or(after_q.len());
1062 let query_str = &after_q[..query_end];
1063 let after = &after_q[query_end..];
1064
1065 let encode_fn = if is_special {
1066 is_special_query_encode
1067 } else {
1068 is_query_encode
1069 };
1070 url.query = Some(percent_encode(query_str, encode_fn));
1071
1072 if let Some(frag) = after.strip_prefix('#') {
1073 url.fragment = Some(percent_encode(frag, is_fragment_encode));
1074 }
1075 return Ok(url);
1076 }
1077
1078 if let Some(frag) = input.strip_prefix('#') {
1079 url.fragment = Some(percent_encode(frag, is_fragment_encode));
1080 return Ok(url);
1081 }
1082
1083 // Path-relative.
1084 if !url.opaque_path {
1085 shorten_path(&url.scheme, &mut url.path);
1086 }
1087 url.query = None;
1088 url.fragment = None;
1089
1090 parse_path_query_fragment(&mut url, &format!("/{input}"), is_special)?;
1091 Ok(url)
1092}
1093
1094fn parse_relative_special(remaining: &str, base: &Url, mut url: Url) -> Result<Url> {
1095 url.username = base.username.clone();
1096 url.password = base.password.clone();
1097 url.host = base.host.clone();
1098 url.port = base.port;
1099 url.path = base.path.clone();
1100 url.query = base.query.clone();
1101
1102 let is_special = true;
1103
1104 if remaining.is_empty() {
1105 return Ok(url);
1106 }
1107
1108 if remaining.starts_with('/') || remaining.starts_with('\\') {
1109 url.path = Vec::new();
1110 url.query = None;
1111 parse_path_query_fragment(&mut url, remaining, is_special)?;
1112 return Ok(url);
1113 }
1114
1115 if let Some(rest) = remaining.strip_prefix('?') {
1116 url.query = None;
1117 url.fragment = None;
1118 let query_end = rest.find('#').unwrap_or(rest.len());
1119 url.query = Some(percent_encode(&rest[..query_end], is_special_query_encode));
1120 if query_end < rest.len() {
1121 url.fragment = Some(percent_encode(&rest[query_end + 1..], is_fragment_encode));
1122 }
1123 return Ok(url);
1124 }
1125
1126 if let Some(frag) = remaining.strip_prefix('#') {
1127 url.fragment = Some(percent_encode(frag, is_fragment_encode));
1128 return Ok(url);
1129 }
1130
1131 shorten_path(&url.scheme, &mut url.path);
1132 url.query = None;
1133 parse_path_query_fragment(&mut url, &format!("/{remaining}"), is_special)?;
1134 Ok(url)
1135}
1136
1137// ---------------------------------------------------------------------------
1138// File URL parsing
1139// ---------------------------------------------------------------------------
1140
1141fn parse_file_url(input: &str, base: Option<&Url>, mut url: Url) -> Result<Url> {
1142 url.host = Some(Host::Domain(String::new()));
1143
1144 let remaining = if let Some(after) = input.strip_prefix("//") {
1145 after
1146 } else if let Some(after) = input.strip_prefix('/') {
1147 after
1148 } else if let Some(base) = base {
1149 if base.scheme == "file" {
1150 url.host = base.host.clone();
1151 url.path = base.path.clone();
1152
1153 if let Some(rest) = input.strip_prefix('?') {
1154 url.query = None;
1155 url.fragment = None;
1156 let query_end = rest.find('#').unwrap_or(rest.len());
1157 url.query = Some(percent_encode(&rest[..query_end], is_query_encode));
1158 if query_end < rest.len() {
1159 url.fragment = Some(percent_encode(&rest[query_end + 1..], is_fragment_encode));
1160 }
1161 return Ok(url);
1162 }
1163
1164 if let Some(frag) = input.strip_prefix('#') {
1165 url.fragment = Some(percent_encode(frag, is_fragment_encode));
1166 return Ok(url);
1167 }
1168
1169 shorten_path(&url.scheme, &mut url.path);
1170 url.query = None;
1171 parse_path_query_fragment(&mut url, &format!("/{input}"), false)?;
1172 return Ok(url);
1173 } else {
1174 input
1175 }
1176 } else {
1177 input
1178 };
1179
1180 let path_start = remaining
1181 .find(['/', '\\', '?', '#'])
1182 .unwrap_or(remaining.len());
1183
1184 let potential_host = &remaining[..path_start];
1185 let rest = &remaining[path_start..];
1186
1187 if starts_with_windows_drive_letter(remaining) {
1188 url.host = Some(Host::Domain(String::new()));
1189 parse_path_query_fragment(&mut url, &format!("/{remaining}"), false)?;
1190 return Ok(url);
1191 }
1192
1193 if !potential_host.is_empty() {
1194 let host = parse_host(potential_host, false)?;
1195 if host != Host::Domain(String::new()) {
1196 url.host = Some(host);
1197 }
1198 }
1199
1200 parse_path_query_fragment(&mut url, rest, false)?;
1201
1202 // Normalize Windows drive letters in path.
1203 if let Some(first) = url.path.first_mut() {
1204 if first.len() == 2 {
1205 let bytes = first.as_bytes();
1206 if bytes[0].is_ascii_alphabetic() && bytes[1] == b'|' {
1207 let mut normalized = String::new();
1208 normalized.push(bytes[0] as char);
1209 normalized.push(':');
1210 *first = normalized;
1211 }
1212 }
1213 }
1214
1215 Ok(url)
1216}
1217
1218// ---------------------------------------------------------------------------
1219// Tests
1220// ---------------------------------------------------------------------------
1221
1222#[cfg(test)]
1223mod tests {
1224 use super::*;
1225
1226 // -------------------------------------------------------------------
1227 // Basic absolute URL parsing
1228 // -------------------------------------------------------------------
1229
1230 #[test]
1231 fn parse_simple_http() {
1232 let url = Url::parse("http://example.com").unwrap();
1233 assert_eq!(url.scheme(), "http");
1234 assert_eq!(url.host_str(), Some("example.com".into()));
1235 assert_eq!(url.port(), None);
1236 assert_eq!(url.path(), "/");
1237 assert_eq!(url.query(), None);
1238 assert_eq!(url.fragment(), None);
1239 }
1240
1241 #[test]
1242 fn parse_https_with_path() {
1243 let url = Url::parse("https://example.com/foo/bar").unwrap();
1244 assert_eq!(url.scheme(), "https");
1245 assert_eq!(url.host_str(), Some("example.com".into()));
1246 assert_eq!(url.path(), "/foo/bar");
1247 }
1248
1249 #[test]
1250 fn parse_full_url() {
1251 let url =
1252 Url::parse("https://user:pass@example.com:8080/path/to/page?q=1&r=2#frag").unwrap();
1253 assert_eq!(url.scheme(), "https");
1254 assert_eq!(url.username(), "user");
1255 assert_eq!(url.password(), "pass");
1256 assert_eq!(url.host_str(), Some("example.com".into()));
1257 assert_eq!(url.port(), Some(8080));
1258 assert_eq!(url.path(), "/path/to/page");
1259 assert_eq!(url.query(), Some("q=1&r=2"));
1260 assert_eq!(url.fragment(), Some("frag"));
1261 }
1262
1263 #[test]
1264 fn parse_default_port_omitted() {
1265 let url = Url::parse("http://example.com:80/").unwrap();
1266 assert_eq!(url.port(), None);
1267 assert_eq!(url.port_or_default(), Some(80));
1268 }
1269
1270 #[test]
1271 fn parse_non_default_port() {
1272 let url = Url::parse("http://example.com:8080/").unwrap();
1273 assert_eq!(url.port(), Some(8080));
1274 }
1275
1276 #[test]
1277 fn parse_https_default_port() {
1278 let url = Url::parse("https://example.com:443/").unwrap();
1279 assert_eq!(url.port(), None);
1280 }
1281
1282 #[test]
1283 fn parse_ftp_default_port() {
1284 let url = Url::parse("ftp://files.example.com:21/readme.txt").unwrap();
1285 assert_eq!(url.port(), None);
1286 assert_eq!(url.port_or_default(), Some(21));
1287 }
1288
1289 // -------------------------------------------------------------------
1290 // Scheme handling
1291 // -------------------------------------------------------------------
1292
1293 #[test]
1294 fn scheme_is_lowercased() {
1295 let url = Url::parse("HTTP://EXAMPLE.COM").unwrap();
1296 assert_eq!(url.scheme(), "http");
1297 }
1298
1299 #[test]
1300 fn non_special_scheme() {
1301 let url = Url::parse("custom://host/path").unwrap();
1302 assert_eq!(url.scheme(), "custom");
1303 assert_eq!(url.host_str(), Some("host".into()));
1304 assert_eq!(url.path(), "/path");
1305 }
1306
1307 #[test]
1308 fn data_uri() {
1309 let url = Url::parse("data:text/html,<h1>Hello</h1>").unwrap();
1310 assert_eq!(url.scheme(), "data");
1311 assert!(url.cannot_be_a_base());
1312 }
1313
1314 #[test]
1315 fn javascript_uri() {
1316 let url = Url::parse("javascript:alert(1)").unwrap();
1317 assert_eq!(url.scheme(), "javascript");
1318 assert!(url.cannot_be_a_base());
1319 }
1320
1321 #[test]
1322 fn mailto_uri() {
1323 let url = Url::parse("mailto:user@example.com").unwrap();
1324 assert_eq!(url.scheme(), "mailto");
1325 assert!(url.cannot_be_a_base());
1326 }
1327
1328 // -------------------------------------------------------------------
1329 // Host parsing
1330 // -------------------------------------------------------------------
1331
1332 #[test]
1333 fn host_is_lowercased() {
1334 let url = Url::parse("http://EXAMPLE.COM/").unwrap();
1335 assert_eq!(url.host_str(), Some("example.com".into()));
1336 }
1337
1338 #[test]
1339 fn ipv4_host() {
1340 let url = Url::parse("http://127.0.0.1/").unwrap();
1341 assert_eq!(url.host(), Some(&Host::Ipv4(0x7F000001)));
1342 assert_eq!(url.host_str(), Some("127.0.0.1".into()));
1343 }
1344
1345 #[test]
1346 fn ipv4_host_all_zeros() {
1347 let url = Url::parse("http://0.0.0.0/").unwrap();
1348 assert_eq!(url.host(), Some(&Host::Ipv4(0)));
1349 }
1350
1351 #[test]
1352 fn ipv6_host() {
1353 let url = Url::parse("http://[::1]/").unwrap();
1354 assert_eq!(url.host(), Some(&Host::Ipv6([0, 0, 0, 0, 0, 0, 0, 1])));
1355 }
1356
1357 #[test]
1358 fn ipv6_full() {
1359 let url = Url::parse("http://[2001:db8:85a3:0:0:8a2e:370:7334]/").unwrap();
1360 assert_eq!(
1361 url.host(),
1362 Some(&Host::Ipv6([
1363 0x2001, 0x0db8, 0x85a3, 0, 0, 0x8a2e, 0x0370, 0x7334
1364 ]))
1365 );
1366 }
1367
1368 #[test]
1369 fn ipv6_serialization_compressed() {
1370 let url = Url::parse("http://[2001:db8::1]/").unwrap();
1371 assert_eq!(url.host_str(), Some("[2001:db8::1]".into()));
1372 }
1373
1374 #[test]
1375 fn ipv6_all_zeros() {
1376 let url = Url::parse("http://[::]/").unwrap();
1377 assert_eq!(url.host(), Some(&Host::Ipv6([0; 8])));
1378 assert_eq!(url.host_str(), Some("[::]".into()));
1379 }
1380
1381 #[test]
1382 fn ipv6_loopback() {
1383 let pieces = parse_ipv6("::1").unwrap();
1384 assert_eq!(pieces, [0, 0, 0, 0, 0, 0, 0, 1]);
1385 }
1386
1387 #[test]
1388 fn ipv6_with_ipv4() {
1389 let pieces = parse_ipv6("::ffff:192.168.1.1").unwrap();
1390 assert_eq!(pieces, [0, 0, 0, 0, 0, 0xffff, 0xc0a8, 0x0101]);
1391 }
1392
1393 // -------------------------------------------------------------------
1394 // IPv4 parsing
1395 // -------------------------------------------------------------------
1396
1397 #[test]
1398 fn ipv4_basic() {
1399 assert_eq!(parse_ipv4("192.168.1.1").unwrap(), 0xC0A80101);
1400 }
1401
1402 #[test]
1403 fn ipv4_hex() {
1404 assert_eq!(parse_ipv4("0xC0.0xA8.0x01.0x01").unwrap(), 0xC0A80101);
1405 }
1406
1407 #[test]
1408 fn ipv4_octal() {
1409 assert_eq!(parse_ipv4("0300.0250.01.01").unwrap(), 0xC0A80101);
1410 }
1411
1412 #[test]
1413 fn ipv4_single_number() {
1414 assert!(parse_ipv4("3232235777").is_err());
1415 }
1416
1417 #[test]
1418 fn ipv4_two_parts() {
1419 // Two parts: first is top 8 bits, second is bottom 24 bits.
1420 // 192.168.1.1 => 168*65536 + 1*256 + 1 = 11010305
1421 assert_eq!(parse_ipv4("192.11010305").unwrap(), 0xC0A80101);
1422 }
1423
1424 #[test]
1425 fn ipv4_reject_overflow() {
1426 assert!(parse_ipv4("256.0.0.0").is_err());
1427 }
1428
1429 #[test]
1430 fn ipv4_reject_empty_part() {
1431 assert!(parse_ipv4("1..1.1").is_err());
1432 }
1433
1434 // -------------------------------------------------------------------
1435 // Percent encoding/decoding
1436 // -------------------------------------------------------------------
1437
1438 #[test]
1439 fn percent_decode_basic() {
1440 assert_eq!(percent_decode_string("%48%65%6C%6C%6F"), "Hello");
1441 }
1442
1443 #[test]
1444 fn percent_decode_mixed() {
1445 assert_eq!(percent_decode_string("Hello%20World"), "Hello World");
1446 }
1447
1448 #[test]
1449 fn percent_decode_passthrough() {
1450 assert_eq!(percent_decode_string("no-encoding"), "no-encoding");
1451 }
1452
1453 #[test]
1454 fn percent_decode_partial() {
1455 assert_eq!(percent_decode_string("100%"), "100%");
1456 assert_eq!(percent_decode_string("%2"), "%2");
1457 }
1458
1459 #[test]
1460 fn percent_encode_userinfo() {
1461 let encoded = percent_encode("user@host", is_userinfo_encode);
1462 assert_eq!(encoded, "user%40host");
1463 }
1464
1465 #[test]
1466 fn percent_encode_path() {
1467 let encoded = percent_encode("hello world", is_path_encode);
1468 assert_eq!(encoded, "hello%20world");
1469 }
1470
1471 // -------------------------------------------------------------------
1472 // Path parsing and dot segments
1473 // -------------------------------------------------------------------
1474
1475 #[test]
1476 fn path_dot_removal() {
1477 let url = Url::parse("http://example.com/a/b/../c").unwrap();
1478 assert_eq!(url.path(), "/a/c");
1479 }
1480
1481 #[test]
1482 fn path_dot_current() {
1483 let url = Url::parse("http://example.com/a/./b").unwrap();
1484 assert_eq!(url.path(), "/a/b");
1485 }
1486
1487 #[test]
1488 fn path_multiple_dots() {
1489 let url = Url::parse("http://example.com/a/b/c/../../d").unwrap();
1490 assert_eq!(url.path(), "/a/d");
1491 }
1492
1493 #[test]
1494 fn path_trailing_slash() {
1495 let url = Url::parse("http://example.com/a/b/").unwrap();
1496 assert_eq!(url.path(), "/a/b/");
1497 }
1498
1499 #[test]
1500 fn path_empty() {
1501 let url = Url::parse("http://example.com").unwrap();
1502 assert_eq!(url.path(), "/");
1503 }
1504
1505 #[test]
1506 fn path_double_dot_at_root() {
1507 let url = Url::parse("http://example.com/../a").unwrap();
1508 assert_eq!(url.path(), "/a");
1509 }
1510
1511 // -------------------------------------------------------------------
1512 // Relative URL resolution
1513 // -------------------------------------------------------------------
1514
1515 #[test]
1516 fn relative_path() {
1517 let base = Url::parse("http://example.com/a/b/c").unwrap();
1518 let url = Url::parse_with_base("d", &base).unwrap();
1519 assert_eq!(url.path(), "/a/b/d");
1520 assert_eq!(url.host_str(), Some("example.com".into()));
1521 }
1522
1523 #[test]
1524 fn relative_path_with_dots() {
1525 let base = Url::parse("http://example.com/a/b/c").unwrap();
1526 let url = Url::parse_with_base("../d", &base).unwrap();
1527 assert_eq!(url.path(), "/a/d");
1528 }
1529
1530 #[test]
1531 fn relative_absolute_path() {
1532 let base = Url::parse("http://example.com/a/b/c").unwrap();
1533 let url = Url::parse_with_base("/d/e", &base).unwrap();
1534 assert_eq!(url.path(), "/d/e");
1535 assert_eq!(url.host_str(), Some("example.com".into()));
1536 }
1537
1538 #[test]
1539 fn relative_query_only() {
1540 let base = Url::parse("http://example.com/a/b?old=1").unwrap();
1541 let url = Url::parse_with_base("?new=2", &base).unwrap();
1542 assert_eq!(url.path(), "/a/b");
1543 assert_eq!(url.query(), Some("new=2"));
1544 }
1545
1546 #[test]
1547 fn relative_fragment_only() {
1548 let base = Url::parse("http://example.com/a/b#old").unwrap();
1549 let url = Url::parse_with_base("#new", &base).unwrap();
1550 assert_eq!(url.path(), "/a/b");
1551 assert_eq!(url.fragment(), Some("new"));
1552 }
1553
1554 #[test]
1555 fn relative_authority_override() {
1556 let base = Url::parse("http://example.com/a/b").unwrap();
1557 let url = Url::parse_with_base("//other.com/c", &base).unwrap();
1558 assert_eq!(url.scheme(), "http");
1559 assert_eq!(url.host_str(), Some("other.com".into()));
1560 assert_eq!(url.path(), "/c");
1561 }
1562
1563 #[test]
1564 fn absolute_url_ignores_base() {
1565 let base = Url::parse("http://example.com/a").unwrap();
1566 let url = Url::parse_with_base("https://other.com/b", &base).unwrap();
1567 assert_eq!(url.scheme(), "https");
1568 assert_eq!(url.host_str(), Some("other.com".into()));
1569 assert_eq!(url.path(), "/b");
1570 }
1571
1572 #[test]
1573 fn relative_empty_string() {
1574 let base = Url::parse("http://example.com/a/b?q=1#f").unwrap();
1575 let url = Url::parse_with_base("", &base).unwrap();
1576 assert_eq!(url.path(), "/a/b");
1577 assert_eq!(url.query(), Some("q=1"));
1578 assert_eq!(url.fragment(), None);
1579 }
1580
1581 // -------------------------------------------------------------------
1582 // Serialization
1583 // -------------------------------------------------------------------
1584
1585 #[test]
1586 fn serialize_simple() {
1587 let url = Url::parse("http://example.com/path").unwrap();
1588 assert_eq!(url.serialize(), "http://example.com/path");
1589 }
1590
1591 #[test]
1592 fn serialize_with_credentials() {
1593 let url = Url::parse("http://user:pass@example.com/").unwrap();
1594 assert_eq!(url.serialize(), "http://user:pass@example.com/");
1595 }
1596
1597 #[test]
1598 fn serialize_with_port() {
1599 let url = Url::parse("http://example.com:8080/").unwrap();
1600 assert_eq!(url.serialize(), "http://example.com:8080/");
1601 }
1602
1603 #[test]
1604 fn serialize_with_query_fragment() {
1605 let url = Url::parse("http://example.com/path?q=1#frag").unwrap();
1606 assert_eq!(url.serialize(), "http://example.com/path?q=1#frag");
1607 }
1608
1609 #[test]
1610 fn serialize_data_uri() {
1611 let url = Url::parse("data:text/html,hello").unwrap();
1612 assert_eq!(url.serialize(), "data:text/html,hello");
1613 }
1614
1615 #[test]
1616 fn roundtrip_full_url() {
1617 let input = "https://user:pass@example.com:8080/a/b?q=1#frag";
1618 let url = Url::parse(input).unwrap();
1619 assert_eq!(url.serialize(), input);
1620 }
1621
1622 #[test]
1623 fn roundtrip_ipv4() {
1624 let url = Url::parse("http://192.168.1.1/path").unwrap();
1625 assert_eq!(url.serialize(), "http://192.168.1.1/path");
1626 }
1627
1628 #[test]
1629 fn roundtrip_ipv6() {
1630 let url = Url::parse("http://[::1]/path").unwrap();
1631 assert_eq!(url.serialize(), "http://[::1]/path");
1632 }
1633
1634 // -------------------------------------------------------------------
1635 // Origin
1636 // -------------------------------------------------------------------
1637
1638 #[test]
1639 fn origin_http() {
1640 let url = Url::parse("http://example.com:8080/path").unwrap();
1641 match url.origin() {
1642 Origin::Tuple(scheme, host, port) => {
1643 assert_eq!(scheme, "http");
1644 assert_eq!(host, Host::Domain("example.com".into()));
1645 assert_eq!(port, Some(8080));
1646 }
1647 _ => panic!("expected tuple origin"),
1648 }
1649 }
1650
1651 #[test]
1652 fn origin_https_default_port() {
1653 let url = Url::parse("https://example.com/").unwrap();
1654 match url.origin() {
1655 Origin::Tuple(scheme, host, port) => {
1656 assert_eq!(scheme, "https");
1657 assert_eq!(host, Host::Domain("example.com".into()));
1658 assert_eq!(port, None);
1659 }
1660 _ => panic!("expected tuple origin"),
1661 }
1662 }
1663
1664 #[test]
1665 fn origin_data_is_opaque() {
1666 let url = Url::parse("data:text/html,hello").unwrap();
1667 assert_eq!(url.origin(), Origin::Opaque);
1668 }
1669
1670 // -------------------------------------------------------------------
1671 // File URLs
1672 // -------------------------------------------------------------------
1673
1674 #[test]
1675 fn file_url_unix() {
1676 let url = Url::parse("file:///home/user/file.txt").unwrap();
1677 assert_eq!(url.scheme(), "file");
1678 assert_eq!(url.host_str(), Some("".into()));
1679 assert_eq!(url.path(), "/home/user/file.txt");
1680 }
1681
1682 #[test]
1683 fn file_url_windows_drive() {
1684 let url = Url::parse("file:///C:/Windows/system32").unwrap();
1685 assert_eq!(url.scheme(), "file");
1686 assert_eq!(url.path(), "/C:/Windows/system32");
1687 }
1688
1689 #[test]
1690 fn file_url_with_host() {
1691 let url = Url::parse("file://server/share/file.txt").unwrap();
1692 assert_eq!(url.scheme(), "file");
1693 assert_eq!(url.host_str(), Some("server".into()));
1694 assert_eq!(url.path(), "/share/file.txt");
1695 }
1696
1697 // -------------------------------------------------------------------
1698 // Edge cases
1699 // -------------------------------------------------------------------
1700
1701 #[test]
1702 fn empty_input_fails() {
1703 assert_eq!(Url::parse(""), Err(UrlError::EmptyInput));
1704 }
1705
1706 #[test]
1707 fn whitespace_only_fails() {
1708 assert_eq!(Url::parse(" "), Err(UrlError::EmptyInput));
1709 }
1710
1711 #[test]
1712 fn missing_scheme_fails() {
1713 assert!(Url::parse("example.com").is_err());
1714 }
1715
1716 #[test]
1717 fn leading_whitespace_stripped() {
1718 let url = Url::parse(" http://example.com ").unwrap();
1719 assert_eq!(url.host_str(), Some("example.com".into()));
1720 }
1721
1722 #[test]
1723 fn tab_newline_stripped() {
1724 let url = Url::parse("http://exa\tmple\n.com/").unwrap();
1725 assert_eq!(url.host_str(), Some("example.com".into()));
1726 }
1727
1728 #[test]
1729 fn query_with_special_chars() {
1730 let url = Url::parse("http://example.com/?key=val ue&foo=bar").unwrap();
1731 assert!(url.query().unwrap().contains("key=val%20ue"));
1732 }
1733
1734 #[test]
1735 fn fragment_with_special_chars() {
1736 let url = Url::parse("http://example.com/#sec tion").unwrap();
1737 assert!(url.fragment().unwrap().contains("sec%20tion"));
1738 }
1739
1740 #[test]
1741 fn username_only() {
1742 let url = Url::parse("http://user@example.com/").unwrap();
1743 assert_eq!(url.username(), "user");
1744 assert_eq!(url.password(), "");
1745 assert!(url.has_credentials());
1746 }
1747
1748 #[test]
1749 fn no_credentials() {
1750 let url = Url::parse("http://example.com/").unwrap();
1751 assert!(!url.has_credentials());
1752 }
1753
1754 #[test]
1755 fn port_overflow_fails() {
1756 assert!(Url::parse("http://example.com:99999/").is_err());
1757 }
1758
1759 #[test]
1760 fn ws_scheme() {
1761 let url = Url::parse("ws://example.com/chat").unwrap();
1762 assert_eq!(url.scheme(), "ws");
1763 assert_eq!(url.port_or_default(), Some(80));
1764 }
1765
1766 #[test]
1767 fn wss_scheme() {
1768 let url = Url::parse("wss://example.com/chat").unwrap();
1769 assert_eq!(url.scheme(), "wss");
1770 assert_eq!(url.port_or_default(), Some(443));
1771 }
1772
1773 #[test]
1774 fn cannot_be_a_base() {
1775 let url = Url::parse("data:text/html,hello").unwrap();
1776 assert!(url.cannot_be_a_base());
1777 }
1778
1779 #[test]
1780 fn http_can_be_a_base() {
1781 let url = Url::parse("http://example.com/").unwrap();
1782 assert!(!url.cannot_be_a_base());
1783 }
1784
1785 // -------------------------------------------------------------------
1786 // Display/ToString
1787 // -------------------------------------------------------------------
1788
1789 #[test]
1790 fn display_matches_serialize() {
1791 let url = Url::parse("https://example.com:8443/path?q=1#f").unwrap();
1792 assert_eq!(format!("{url}"), url.serialize());
1793 }
1794
1795 // -------------------------------------------------------------------
1796 // Multiple path segments
1797 // -------------------------------------------------------------------
1798
1799 #[test]
1800 fn path_segments() {
1801 let url = Url::parse("http://example.com/a/b/c").unwrap();
1802 assert_eq!(url.path_segments(), &["a", "b", "c"]);
1803 }
1804
1805 #[test]
1806 fn path_segments_trailing_slash() {
1807 let url = Url::parse("http://example.com/a/b/").unwrap();
1808 assert_eq!(url.path_segments(), &["a", "b", ""]);
1809 }
1810
1811 // -------------------------------------------------------------------
1812 // Host type
1813 // -------------------------------------------------------------------
1814
1815 #[test]
1816 fn host_serialize_domain() {
1817 let h = Host::Domain("example.com".into());
1818 assert_eq!(h.serialize(), "example.com");
1819 }
1820
1821 #[test]
1822 fn host_serialize_ipv4() {
1823 let h = Host::Ipv4(0x7F000001);
1824 assert_eq!(h.serialize(), "127.0.0.1");
1825 }
1826
1827 #[test]
1828 fn host_serialize_ipv6() {
1829 let h = Host::Ipv6([0, 0, 0, 0, 0, 0, 0, 1]);
1830 assert_eq!(h.serialize(), "[::1]");
1831 }
1832
1833 // -------------------------------------------------------------------
1834 // IPv6 serialization
1835 // -------------------------------------------------------------------
1836
1837 #[test]
1838 fn ipv6_serialize_full() {
1839 let pieces = [
1840 0x2001, 0x0db8, 0x85a3, 0x0001, 0x0002, 0x8a2e, 0x0370, 0x7334,
1841 ];
1842 assert_eq!(serialize_ipv6(&pieces), "2001:db8:85a3:1:2:8a2e:370:7334");
1843 }
1844
1845 #[test]
1846 fn ipv6_serialize_compress() {
1847 let pieces = [0x2001, 0x0db8, 0, 0, 0, 0, 0, 1];
1848 assert_eq!(serialize_ipv6(&pieces), "2001:db8::1");
1849 }
1850
1851 #[test]
1852 fn ipv6_serialize_all_zeros() {
1853 let pieces = [0u16; 8];
1854 assert_eq!(serialize_ipv6(&pieces), "::");
1855 }
1856
1857 #[test]
1858 fn ipv6_serialize_no_compress_single_zero() {
1859 let pieces = [1, 0, 2, 0, 3, 0, 4, 0];
1860 assert_eq!(serialize_ipv6(&pieces), "1:0:2:0:3:0:4:0");
1861 }
1862
1863 // -------------------------------------------------------------------
1864 // Percent encoding edge cases
1865 // -------------------------------------------------------------------
1866
1867 #[test]
1868 fn percent_encode_preserves_unreserved() {
1869 let encoded = percent_encode("hello-world_test.page~1", is_path_encode);
1870 assert_eq!(encoded, "hello-world_test.page~1");
1871 }
1872
1873 #[test]
1874 fn percent_encode_multibyte_utf8() {
1875 let encoded = percent_encode("café", is_path_encode);
1876 assert_eq!(encoded, "caf%C3%A9");
1877 }
1878}