//! WHATWG URL parser.
//!
//! Implements the URL Standard ():
//! - URL record type with scheme, username, password, host, port, path, query, fragment
//! - State-machine parser following the spec
//! - Host parsing: domains, IPv4 addresses, IPv6 addresses
//! - Percent-encoding and decoding (UTF-8)
//! - Special scheme handling (http, https, ftp, ws, wss, file)
//! - Relative URL resolution via base URL
//! - URL serialization
//! - Origin derivation
use core::fmt;
// ---------------------------------------------------------------------------
// Error types
// ---------------------------------------------------------------------------
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum UrlError {
/// Input is empty or contains only whitespace.
EmptyInput,
/// Invalid URL syntax.
InvalidUrl,
/// Invalid scheme.
InvalidScheme,
/// Invalid authority.
InvalidAuthority,
/// Invalid host.
InvalidHost,
/// Invalid port number.
InvalidPort,
/// Invalid IPv4 address.
InvalidIpv4,
/// Invalid IPv6 address.
InvalidIpv6,
/// Invalid percent-encoding.
InvalidPercentEncoding,
/// Relative URL without a base.
RelativeWithoutBase,
/// Missing scheme.
MissingScheme,
}
impl fmt::Display for UrlError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::EmptyInput => write!(f, "empty input"),
Self::InvalidUrl => write!(f, "invalid URL"),
Self::InvalidScheme => write!(f, "invalid scheme"),
Self::InvalidAuthority => write!(f, "invalid authority"),
Self::InvalidHost => write!(f, "invalid host"),
Self::InvalidPort => write!(f, "invalid port number"),
Self::InvalidIpv4 => write!(f, "invalid IPv4 address"),
Self::InvalidIpv6 => write!(f, "invalid IPv6 address"),
Self::InvalidPercentEncoding => write!(f, "invalid percent-encoding"),
Self::RelativeWithoutBase => write!(f, "relative URL without a base"),
Self::MissingScheme => write!(f, "missing scheme"),
}
}
}
pub type Result = core::result::Result;
// ---------------------------------------------------------------------------
// Host
// ---------------------------------------------------------------------------
/// A parsed URL host.
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum Host {
/// A domain name (already lowercased).
Domain(String),
/// An IPv4 address.
Ipv4(u32),
/// An IPv6 address (128 bits as eight 16-bit pieces).
Ipv6([u16; 8]),
}
impl Host {
/// Serialize the host to a string.
pub fn serialize(&self) -> String {
match self {
Host::Domain(d) => d.clone(),
Host::Ipv4(addr) => serialize_ipv4(*addr),
Host::Ipv6(pieces) => format!("[{}]", serialize_ipv6(pieces)),
}
}
}
impl fmt::Display for Host {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.serialize())
}
}
// ---------------------------------------------------------------------------
// Origin
// ---------------------------------------------------------------------------
/// A URL origin (scheme, host, port).
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum Origin {
/// A tuple origin (scheme, host, port).
Tuple(String, Host, Option),
/// An opaque origin (unique, not equal to anything).
Opaque,
}
// ---------------------------------------------------------------------------
// URL record
// ---------------------------------------------------------------------------
/// A parsed URL record per the WHATWG URL Standard.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Url {
/// The scheme (e.g., "http", "https", "file").
pub scheme: String,
/// The username (percent-encoded).
username: String,
/// The password (percent-encoded).
password: String,
/// The host.
pub host: Option,
/// The port (None = default or absent).
pub port: Option,
/// Path segments. For non-opaque paths, these are the segments.
/// For opaque paths (cannot-be-a-base URL), this is a single element.
path: Vec,
/// Whether this URL has an opaque path (cannot-be-a-base URL).
opaque_path: bool,
/// The query string (without leading '?').
pub query: Option,
/// The fragment (without leading '#').
pub fragment: Option,
}
impl Url {
/// Parse a URL string.
pub fn parse(input: &str) -> Result {
parse_url(input, None)
}
/// Parse a URL string with a base URL for resolving relative references.
pub fn parse_with_base(input: &str, base: &Url) -> Result {
parse_url(input, Some(base))
}
/// Get the scheme.
pub fn scheme(&self) -> &str {
&self.scheme
}
/// Get the username (percent-encoded).
pub fn username(&self) -> &str {
&self.username
}
/// Get the password (percent-encoded).
pub fn password(&self) -> &str {
&self.password
}
/// Get the host.
pub fn host(&self) -> Option<&Host> {
self.host.as_ref()
}
/// Get the host as a string.
pub fn host_str(&self) -> Option {
self.host.as_ref().map(|h| h.serialize())
}
/// Get the port.
pub fn port(&self) -> Option {
self.port
}
/// Get the port or the default port for the scheme.
pub fn port_or_default(&self) -> Option {
self.port.or_else(|| default_port(&self.scheme))
}
/// Get the path as a string.
pub fn path(&self) -> String {
if self.opaque_path {
self.path.first().cloned().unwrap_or_default()
} else {
let mut s = String::new();
for seg in &self.path {
s.push('/');
s.push_str(seg);
}
if s.is_empty() {
s.push('/');
}
s
}
}
/// Get the path segments.
pub fn path_segments(&self) -> &[String] {
&self.path
}
/// Get the query string.
pub fn query(&self) -> Option<&str> {
self.query.as_deref()
}
/// Get the fragment.
pub fn fragment(&self) -> Option<&str> {
self.fragment.as_deref()
}
/// Whether this URL has an opaque path (cannot-be-a-base).
pub fn cannot_be_a_base(&self) -> bool {
self.opaque_path
}
/// Whether this URL includes credentials.
pub fn has_credentials(&self) -> bool {
!self.username.is_empty() || !self.password.is_empty()
}
/// Derive the origin of this URL.
pub fn origin(&self) -> Origin {
match self.scheme.as_str() {
"http" | "https" | "ws" | "wss" | "ftp" => {
if let Some(host) = &self.host {
Origin::Tuple(self.scheme.clone(), host.clone(), self.port)
} else {
Origin::Opaque
}
}
_ => Origin::Opaque,
}
}
/// Serialize this URL to a string (the href).
pub fn serialize(&self) -> String {
let mut output = String::new();
output.push_str(&self.scheme);
output.push(':');
if self.host.is_some() {
output.push_str("//");
if self.has_credentials() {
output.push_str(&self.username);
if !self.password.is_empty() {
output.push(':');
output.push_str(&self.password);
}
output.push('@');
}
if let Some(ref host) = self.host {
output.push_str(&host.serialize());
}
if let Some(port) = self.port {
output.push(':');
output.push_str(&port.to_string());
}
} else if !self.opaque_path && self.scheme == "file" {
output.push_str("//");
}
output.push_str(&self.path());
if let Some(ref query) = self.query {
output.push('?');
output.push_str(query);
}
if let Some(ref fragment) = self.fragment {
output.push('#');
output.push_str(fragment);
}
output
}
}
impl fmt::Display for Url {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.serialize())
}
}
// ---------------------------------------------------------------------------
// Special schemes
// ---------------------------------------------------------------------------
/// Whether a scheme is "special" per the URL standard.
fn is_special_scheme(scheme: &str) -> bool {
matches!(scheme, "http" | "https" | "ftp" | "ws" | "wss" | "file")
}
/// Default port for a special scheme.
fn default_port(scheme: &str) -> Option {
match scheme {
"http" | "ws" => Some(80),
"https" | "wss" => Some(443),
"ftp" => Some(21),
_ => None,
}
}
// ---------------------------------------------------------------------------
// Percent encoding / decoding
// ---------------------------------------------------------------------------
/// The C0 control percent-encode set.
fn is_c0_control(c: char) -> bool {
c <= '\u{001F}' || c > '\u{007E}'
}
/// The fragment percent-encode set.
fn is_fragment_encode(c: char) -> bool {
is_c0_control(c) || c == ' ' || c == '"' || c == '<' || c == '>' || c == '`'
}
/// The query percent-encode set.
fn is_query_encode(c: char) -> bool {
is_c0_control(c) || c == ' ' || c == '"' || c == '#' || c == '<' || c == '>'
}
/// The special query percent-encode set.
fn is_special_query_encode(c: char) -> bool {
is_query_encode(c) || c == '\''
}
/// The path percent-encode set.
fn is_path_encode(c: char) -> bool {
is_query_encode(c) || c == '?' || c == '`' || c == '{' || c == '}'
}
/// The userinfo percent-encode set.
fn is_userinfo_encode(c: char) -> bool {
is_path_encode(c)
|| c == '/'
|| c == ':'
|| c == ';'
|| c == '='
|| c == '@'
|| c == '['
|| c == '\\'
|| c == ']'
|| c == '^'
|| c == '|'
}
/// Percent-encode a string using the given encode set predicate.
fn percent_encode(input: &str, should_encode: fn(char) -> bool) -> String {
let mut out = String::with_capacity(input.len());
for c in input.chars() {
if should_encode(c) {
for b in c.to_string().as_bytes() {
out.push('%');
out.push(to_hex_upper(b >> 4));
out.push(to_hex_upper(b & 0x0F));
}
} else {
out.push(c);
}
}
out
}
fn to_hex_upper(n: u8) -> char {
if n < 10 {
(b'0' + n) as char
} else {
(b'A' + n - 10) as char
}
}
/// Percent-decode a byte string.
pub fn percent_decode(input: &str) -> Vec {
let bytes = input.as_bytes();
let mut out = Vec::with_capacity(bytes.len());
let mut i = 0;
while i < bytes.len() {
if bytes[i] == b'%' && i + 2 < bytes.len() {
if let (Some(hi), Some(lo)) = (hex_val(bytes[i + 1]), hex_val(bytes[i + 2])) {
out.push(hi << 4 | lo);
i += 3;
continue;
}
}
out.push(bytes[i]);
i += 1;
}
out
}
/// Percent-decode to a UTF-8 string (lossy).
pub fn percent_decode_string(input: &str) -> String {
String::from_utf8_lossy(&percent_decode(input)).into_owned()
}
fn hex_val(b: u8) -> Option {
match b {
b'0'..=b'9' => Some(b - b'0'),
b'a'..=b'f' => Some(b - b'a' + 10),
b'A'..=b'F' => Some(b - b'A' + 10),
_ => None,
}
}
// ---------------------------------------------------------------------------
// IPv4 parsing
// ---------------------------------------------------------------------------
fn parse_ipv4(input: &str) -> Result {
let parts: Vec<&str> = input.split('.').collect();
if parts.len() < 2 || parts.len() > 4 {
return Err(UrlError::InvalidIpv4);
}
let mut numbers: Vec = Vec::with_capacity(parts.len());
for part in &parts {
if part.is_empty() {
return Err(UrlError::InvalidIpv4);
}
let n = parse_ipv4_number(part)?;
numbers.push(n);
}
let last = numbers.len() - 1;
for (i, &n) in numbers.iter().enumerate() {
if i < last && n > 255 {
return Err(UrlError::InvalidIpv4);
}
}
if numbers[last] >= 256u64.pow((4 - last) as u32) {
return Err(UrlError::InvalidIpv4);
}
let mut ipv4 = numbers[last] as u32;
for (i, &n) in numbers.iter().enumerate().take(last) {
ipv4 += (n as u32) << (8 * (3 - i));
}
Ok(ipv4)
}
fn parse_ipv4_number(input: &str) -> Result {
if input.is_empty() {
return Err(UrlError::InvalidIpv4);
}
let (s, radix) = if input.starts_with("0x") || input.starts_with("0X") {
(&input[2..], 16)
} else if input.len() > 1 && input.starts_with('0') {
(&input[1..], 8)
} else {
(input, 10)
};
if s.is_empty() {
return Ok(0);
}
u64::from_str_radix(s, radix).map_err(|_| UrlError::InvalidIpv4)
}
fn serialize_ipv4(addr: u32) -> String {
format!(
"{}.{}.{}.{}",
(addr >> 24) & 0xFF,
(addr >> 16) & 0xFF,
(addr >> 8) & 0xFF,
addr & 0xFF
)
}
// ---------------------------------------------------------------------------
// IPv6 parsing
// ---------------------------------------------------------------------------
fn parse_ipv6(input: &str) -> Result<[u16; 8]> {
let mut pieces = [0u16; 8];
let mut piece_index: usize = 0;
let mut compress: Option = None;
let chars: Vec = input.chars().collect();
let len = chars.len();
let mut pointer = 0;
if pointer < len && chars[pointer] == ':' {
if pointer + 1 >= len || chars[pointer + 1] != ':' {
return Err(UrlError::InvalidIpv6);
}
pointer += 2;
piece_index += 1;
compress = Some(piece_index);
}
while pointer < len {
if piece_index >= 8 {
return Err(UrlError::InvalidIpv6);
}
if chars[pointer] == ':' {
if compress.is_some() {
return Err(UrlError::InvalidIpv6);
}
pointer += 1;
piece_index += 1;
compress = Some(piece_index);
continue;
}
let mut value: u16 = 0;
let mut length = 0;
while length < 4 && pointer < len && chars[pointer].is_ascii_hexdigit() {
value = value * 0x10 + hex_val(chars[pointer] as u8).unwrap() as u16;
pointer += 1;
length += 1;
}
if pointer < len && chars[pointer] == '.' {
// IPv4-mapped IPv6.
if length == 0 {
return Err(UrlError::InvalidIpv6);
}
pointer -= length;
if piece_index > 6 {
return Err(UrlError::InvalidIpv6);
}
let mut numbers_seen = 0;
while pointer < len {
let mut ipv4_piece: Option = None;
if numbers_seen > 0 {
if chars[pointer] == '.' && numbers_seen < 4 {
pointer += 1;
} else {
return Err(UrlError::InvalidIpv6);
}
}
if pointer >= len || !chars[pointer].is_ascii_digit() {
return Err(UrlError::InvalidIpv6);
}
while pointer < len && chars[pointer].is_ascii_digit() {
let number = (chars[pointer] as u8 - b'0') as u16;
match ipv4_piece {
None => ipv4_piece = Some(number),
Some(0) => return Err(UrlError::InvalidIpv6), // leading zero
Some(v) => ipv4_piece = Some(v * 10 + number),
}
if ipv4_piece.unwrap_or(0) > 255 {
return Err(UrlError::InvalidIpv6);
}
pointer += 1;
}
pieces[piece_index] =
pieces[piece_index] * 0x100 + ipv4_piece.ok_or(UrlError::InvalidIpv6)?;
numbers_seen += 1;
if numbers_seen == 2 || numbers_seen == 4 {
piece_index += 1;
}
}
if numbers_seen != 4 {
return Err(UrlError::InvalidIpv6);
}
break;
}
if pointer < len && chars[pointer] == ':' {
pointer += 1;
if pointer >= len {
// Trailing single colon after a piece — only valid with compress.
}
} else if pointer < len {
return Err(UrlError::InvalidIpv6);
}
if piece_index >= 8 {
return Err(UrlError::InvalidIpv6);
}
pieces[piece_index] = value;
piece_index += 1;
}
if let Some(comp) = compress {
let mut swaps = piece_index - comp;
piece_index = 7;
while piece_index != 0 && swaps > 0 {
let swap_index = comp + swaps - 1;
pieces.swap(piece_index, swap_index);
piece_index -= 1;
swaps -= 1;
}
} else if piece_index != 8 {
return Err(UrlError::InvalidIpv6);
}
Ok(pieces)
}
fn serialize_ipv6(pieces: &[u16; 8]) -> String {
// Find the longest run of consecutive zeros for :: compression.
let mut best_start = None;
let mut best_len = 0usize;
let mut cur_start = None;
let mut cur_len = 0usize;
for (i, &p) in pieces.iter().enumerate() {
if p == 0 {
if cur_start.is_none() {
cur_start = Some(i);
cur_len = 1;
} else {
cur_len += 1;
}
} else {
if cur_len > best_len && cur_len >= 2 {
best_start = cur_start;
best_len = cur_len;
}
cur_start = None;
cur_len = 0;
}
}
if cur_len > best_len && cur_len >= 2 {
best_start = cur_start;
best_len = cur_len;
}
let mut out = String::new();
let mut i = 0;
while i < 8 {
if Some(i) == best_start {
out.push_str("::");
i += best_len;
continue;
}
if !out.is_empty() && !out.ends_with(':') {
out.push(':');
}
out.push_str(&format!("{:x}", pieces[i]));
i += 1;
}
out
}
// ---------------------------------------------------------------------------
// Host parsing
// ---------------------------------------------------------------------------
fn parse_host(input: &str, is_special: bool) -> Result {
if input.is_empty() {
if is_special {
return Err(UrlError::InvalidHost);
}
return Ok(Host::Domain(String::new()));
}
// IPv6
if input.starts_with('[') {
if !input.ends_with(']') {
return Err(UrlError::InvalidIpv6);
}
let inner = &input[1..input.len() - 1];
let pieces = parse_ipv6(inner)?;
return Ok(Host::Ipv6(pieces));
}
if !is_special {
let encoded = percent_encode(input, is_c0_control);
return Ok(Host::Domain(encoded));
}
// Domain — percent-decode then lowercase.
let decoded = percent_decode_string(input);
let lowered = decoded.to_ascii_lowercase();
// Check if it's an IPv4 address.
if ends_with_number(&lowered) {
match parse_ipv4(&lowered) {
Ok(addr) => return Ok(Host::Ipv4(addr)),
Err(_) => return Err(UrlError::InvalidHost),
}
}
// Validate domain characters.
for c in lowered.chars() {
if c == '\0'
|| c == '\t'
|| c == '\n'
|| c == '\r'
|| c == ' '
|| c == '#'
|| c == '/'
|| c == ':'
|| c == '<'
|| c == '>'
|| c == '?'
|| c == '@'
|| c == '['
|| c == '\\'
|| c == ']'
|| c == '^'
|| c == '|'
{
return Err(UrlError::InvalidHost);
}
}
Ok(Host::Domain(lowered))
}
/// Check if a domain string ends with a number (suggesting IPv4).
fn ends_with_number(input: &str) -> bool {
let last_part = match input.rsplit('.').next() {
Some(p) => p,
None => return false,
};
if last_part.is_empty() {
return false;
}
if last_part.starts_with("0x") || last_part.starts_with("0X") {
return last_part[2..].chars().all(|c| c.is_ascii_hexdigit());
}
last_part.chars().all(|c| c.is_ascii_digit())
}
// ---------------------------------------------------------------------------
// Shorten path helper
// ---------------------------------------------------------------------------
fn shorten_path(scheme: &str, path: &mut Vec) {
if scheme == "file" && path.len() == 1 {
if let Some(first) = path.first() {
if is_normalized_windows_drive_letter(first) {
return;
}
}
}
path.pop();
}
fn is_normalized_windows_drive_letter(s: &str) -> bool {
let bytes = s.as_bytes();
bytes.len() == 2 && bytes[0].is_ascii_alphabetic() && bytes[1] == b':'
}
fn starts_with_windows_drive_letter(s: &str) -> bool {
let bytes = s.as_bytes();
if bytes.len() < 2 {
return false;
}
if !bytes[0].is_ascii_alphabetic() {
return false;
}
if bytes[1] != b':' && bytes[1] != b'|' {
return false;
}
if bytes.len() >= 3 {
matches!(bytes[2], b'/' | b'\\' | b'?' | b'#')
} else {
true
}
}
// ---------------------------------------------------------------------------
// URL parser
// ---------------------------------------------------------------------------
fn parse_url(input: &str, base: Option<&Url>) -> Result {
// Strip leading/trailing C0 controls and spaces.
let input = input.trim_matches(|c: char| c <= '\u{0020}');
if input.is_empty() {
if let Some(base) = base {
return parse_relative("", base);
}
return Err(UrlError::EmptyInput);
}
// Remove tab and newline characters.
let input: String = input
.chars()
.filter(|&c| c != '\t' && c != '\n' && c != '\r')
.collect();
let chars: Vec = input.chars().collect();
let len = chars.len();
let mut pointer = 0;
// Try to parse a scheme.
let mut scheme = String::new();
let mut has_scheme = false;
if pointer < len && chars[pointer].is_ascii_alphabetic() {
let mut temp = String::new();
temp.push(chars[pointer].to_ascii_lowercase());
let mut p = pointer + 1;
while p < len
&& (chars[p].is_ascii_alphanumeric()
|| chars[p] == '+'
|| chars[p] == '-'
|| chars[p] == '.')
{
temp.push(chars[p].to_ascii_lowercase());
p += 1;
}
if p < len && chars[p] == ':' {
scheme = temp;
has_scheme = true;
pointer = p + 1; // skip the ':'
}
}
if !has_scheme {
if let Some(base) = base {
return parse_relative(&input, base);
}
return Err(UrlError::MissingScheme);
}
let is_special = is_special_scheme(&scheme);
let mut url = Url {
scheme: scheme.clone(),
username: String::new(),
password: String::new(),
host: None,
port: None,
path: Vec::new(),
opaque_path: false,
query: None,
fragment: None,
};
let remaining: String = chars[pointer..].iter().collect();
if scheme == "file" {
return parse_file_url(&remaining, base, url);
}
if let Some(after_slashes) = remaining.strip_prefix("//") {
parse_authority_and_path(&mut url, after_slashes, is_special)?;
} else if is_special {
if let Some(base) = base {
if base.scheme == url.scheme {
return parse_relative_special(&remaining, base, url);
}
}
if let Some(after_slash) = remaining.strip_prefix('/') {
parse_authority_and_path(&mut url, after_slash, is_special)?;
} else {
parse_authority_and_path(&mut url, &remaining, is_special)?;
}
} else {
parse_opaque_or_path(&mut url, &remaining)?;
}
Ok(url)
}
fn parse_authority_and_path(url: &mut Url, input: &str, is_special: bool) -> Result<()> {
let authority_end = input
.find(|c: char| c == '/' || c == '?' || c == '#' || (is_special && c == '\\'))
.unwrap_or(input.len());
let authority = &input[..authority_end];
let rest = &input[authority_end..];
let (userinfo_part, hostport) = if let Some(at_pos) = authority.rfind('@') {
(&authority[..at_pos], &authority[at_pos + 1..])
} else {
("", authority)
};
if !userinfo_part.is_empty() {
if let Some(colon_pos) = userinfo_part.find(':') {
url.username = percent_encode(&userinfo_part[..colon_pos], is_userinfo_encode);
url.password = percent_encode(&userinfo_part[colon_pos + 1..], is_userinfo_encode);
} else {
url.username = percent_encode(userinfo_part, is_userinfo_encode);
}
}
let (host_str, port_str) = split_host_port(hostport);
url.host = Some(parse_host(host_str, is_special)?);
if let Some(port_s) = port_str {
if !port_s.is_empty() {
let port: u16 = port_s.parse().map_err(|_| UrlError::InvalidPort)?;
if default_port(&url.scheme) != Some(port) {
url.port = Some(port);
}
}
}
parse_path_query_fragment(url, rest, is_special)
}
fn split_host_port(input: &str) -> (&str, Option<&str>) {
if input.starts_with('[') {
if let Some(bracket_end) = input.find(']') {
let host = &input[..bracket_end + 1];
let after = &input[bracket_end + 1..];
if let Some(port_str) = after.strip_prefix(':') {
return (host, Some(port_str));
}
return (host, None);
}
return (input, None);
}
if let Some(colon_pos) = input.rfind(':') {
let port_part = &input[colon_pos + 1..];
if port_part.is_empty() || port_part.chars().all(|c| c.is_ascii_digit()) {
return (&input[..colon_pos], Some(port_part));
}
}
(input, None)
}
fn parse_path_query_fragment(url: &mut Url, input: &str, is_special: bool) -> Result<()> {
let mut remaining = input;
let path_end = remaining.find(['?', '#']).unwrap_or(remaining.len());
let path_str = &remaining[..path_end];
remaining = &remaining[path_end..];
parse_path_into(url, path_str, is_special);
if let Some(after_q) = remaining.strip_prefix('?') {
remaining = after_q;
let query_end = remaining.find('#').unwrap_or(remaining.len());
let query_str = &remaining[..query_end];
remaining = &remaining[query_end..];
let encode_fn = if is_special {
is_special_query_encode
} else {
is_query_encode
};
url.query = Some(percent_encode(query_str, encode_fn));
}
if let Some(after_hash) = remaining.strip_prefix('#') {
url.fragment = Some(percent_encode(after_hash, is_fragment_encode));
}
Ok(())
}
fn parse_path_into(url: &mut Url, path: &str, is_special: bool) {
if path.is_empty() {
if is_special {
url.path = vec![String::new()];
}
return;
}
let segments: Vec<&str> = if is_special {
path.split(['/', '\\']).collect()
} else {
path.split('/').collect()
};
for (i, seg) in segments.iter().enumerate() {
if i == 0 && seg.is_empty() {
continue;
}
let decoded = *seg;
if decoded == "." || decoded.eq_ignore_ascii_case("%2e") {
if i == segments.len() - 1 {
url.path.push(String::new());
}
} else if decoded == ".."
|| decoded.eq_ignore_ascii_case(".%2e")
|| decoded.eq_ignore_ascii_case("%2e.")
|| decoded.eq_ignore_ascii_case("%2e%2e")
{
shorten_path(&url.scheme, &mut url.path);
if i == segments.len() - 1 {
url.path.push(String::new());
}
} else {
url.path.push(percent_encode(decoded, is_path_encode));
}
}
}
fn parse_opaque_or_path(url: &mut Url, input: &str) -> Result<()> {
let mut remaining = input;
let path_end = remaining.find(['?', '#']).unwrap_or(remaining.len());
let path_str = &remaining[..path_end];
remaining = &remaining[path_end..];
if path_str.starts_with('/') {
url.opaque_path = false;
parse_path_into(url, path_str, false);
} else {
url.opaque_path = true;
url.path = vec![percent_encode(path_str, is_c0_control)];
}
if let Some(after_q) = remaining.strip_prefix('?') {
remaining = after_q;
let query_end = remaining.find('#').unwrap_or(remaining.len());
let query_str = &remaining[..query_end];
remaining = &remaining[query_end..];
url.query = Some(percent_encode(query_str, is_query_encode));
}
if let Some(after_hash) = remaining.strip_prefix('#') {
url.fragment = Some(percent_encode(after_hash, is_fragment_encode));
}
Ok(())
}
// ---------------------------------------------------------------------------
// Relative URL parsing
// ---------------------------------------------------------------------------
fn parse_relative(input: &str, base: &Url) -> Result {
let mut url = Url {
scheme: base.scheme.clone(),
username: base.username.clone(),
password: base.password.clone(),
host: base.host.clone(),
port: base.port,
path: base.path.clone(),
opaque_path: base.opaque_path,
query: base.query.clone(),
fragment: None,
};
let is_special = is_special_scheme(&url.scheme);
if input.is_empty() {
return Ok(url);
}
let chars: Vec = input.chars().collect();
if chars[0] == '/' || (is_special && chars[0] == '\\') {
if input.starts_with("//") || (is_special && input.starts_with("\\/")) {
let after_slashes = &input[2..];
url.username = String::new();
url.password = String::new();
url.path = Vec::new();
url.query = None;
parse_authority_and_path(&mut url, after_slashes, is_special)?;
return Ok(url);
}
url.path = Vec::new();
url.query = None;
parse_path_query_fragment(&mut url, input, is_special)?;
return Ok(url);
}
if let Some(after_q) = input.strip_prefix('?') {
url.query = None;
url.fragment = None;
let query_end = after_q.find('#').unwrap_or(after_q.len());
let query_str = &after_q[..query_end];
let after = &after_q[query_end..];
let encode_fn = if is_special {
is_special_query_encode
} else {
is_query_encode
};
url.query = Some(percent_encode(query_str, encode_fn));
if let Some(frag) = after.strip_prefix('#') {
url.fragment = Some(percent_encode(frag, is_fragment_encode));
}
return Ok(url);
}
if let Some(frag) = input.strip_prefix('#') {
url.fragment = Some(percent_encode(frag, is_fragment_encode));
return Ok(url);
}
// Path-relative.
if !url.opaque_path {
shorten_path(&url.scheme, &mut url.path);
}
url.query = None;
url.fragment = None;
parse_path_query_fragment(&mut url, &format!("/{input}"), is_special)?;
Ok(url)
}
fn parse_relative_special(remaining: &str, base: &Url, mut url: Url) -> Result {
url.username = base.username.clone();
url.password = base.password.clone();
url.host = base.host.clone();
url.port = base.port;
url.path = base.path.clone();
url.query = base.query.clone();
let is_special = true;
if remaining.is_empty() {
return Ok(url);
}
if remaining.starts_with('/') || remaining.starts_with('\\') {
url.path = Vec::new();
url.query = None;
parse_path_query_fragment(&mut url, remaining, is_special)?;
return Ok(url);
}
if let Some(rest) = remaining.strip_prefix('?') {
url.query = None;
url.fragment = None;
let query_end = rest.find('#').unwrap_or(rest.len());
url.query = Some(percent_encode(&rest[..query_end], is_special_query_encode));
if query_end < rest.len() {
url.fragment = Some(percent_encode(&rest[query_end + 1..], is_fragment_encode));
}
return Ok(url);
}
if let Some(frag) = remaining.strip_prefix('#') {
url.fragment = Some(percent_encode(frag, is_fragment_encode));
return Ok(url);
}
shorten_path(&url.scheme, &mut url.path);
url.query = None;
parse_path_query_fragment(&mut url, &format!("/{remaining}"), is_special)?;
Ok(url)
}
// ---------------------------------------------------------------------------
// File URL parsing
// ---------------------------------------------------------------------------
fn parse_file_url(input: &str, base: Option<&Url>, mut url: Url) -> Result {
url.host = Some(Host::Domain(String::new()));
let remaining = if let Some(after) = input.strip_prefix("//") {
after
} else if let Some(after) = input.strip_prefix('/') {
after
} else if let Some(base) = base {
if base.scheme == "file" {
url.host = base.host.clone();
url.path = base.path.clone();
if let Some(rest) = input.strip_prefix('?') {
url.query = None;
url.fragment = None;
let query_end = rest.find('#').unwrap_or(rest.len());
url.query = Some(percent_encode(&rest[..query_end], is_query_encode));
if query_end < rest.len() {
url.fragment = Some(percent_encode(&rest[query_end + 1..], is_fragment_encode));
}
return Ok(url);
}
if let Some(frag) = input.strip_prefix('#') {
url.fragment = Some(percent_encode(frag, is_fragment_encode));
return Ok(url);
}
shorten_path(&url.scheme, &mut url.path);
url.query = None;
parse_path_query_fragment(&mut url, &format!("/{input}"), false)?;
return Ok(url);
} else {
input
}
} else {
input
};
let path_start = remaining
.find(['/', '\\', '?', '#'])
.unwrap_or(remaining.len());
let potential_host = &remaining[..path_start];
let rest = &remaining[path_start..];
if starts_with_windows_drive_letter(remaining) {
url.host = Some(Host::Domain(String::new()));
parse_path_query_fragment(&mut url, &format!("/{remaining}"), false)?;
return Ok(url);
}
if !potential_host.is_empty() {
let host = parse_host(potential_host, false)?;
if host != Host::Domain(String::new()) {
url.host = Some(host);
}
}
parse_path_query_fragment(&mut url, rest, false)?;
// Normalize Windows drive letters in path.
if let Some(first) = url.path.first_mut() {
if first.len() == 2 {
let bytes = first.as_bytes();
if bytes[0].is_ascii_alphabetic() && bytes[1] == b'|' {
let mut normalized = String::new();
normalized.push(bytes[0] as char);
normalized.push(':');
*first = normalized;
}
}
}
Ok(url)
}
// ---------------------------------------------------------------------------
// Tests
// ---------------------------------------------------------------------------
#[cfg(test)]
mod tests {
use super::*;
// -------------------------------------------------------------------
// Basic absolute URL parsing
// -------------------------------------------------------------------
#[test]
fn parse_simple_http() {
let url = Url::parse("http://example.com").unwrap();
assert_eq!(url.scheme(), "http");
assert_eq!(url.host_str(), Some("example.com".into()));
assert_eq!(url.port(), None);
assert_eq!(url.path(), "/");
assert_eq!(url.query(), None);
assert_eq!(url.fragment(), None);
}
#[test]
fn parse_https_with_path() {
let url = Url::parse("https://example.com/foo/bar").unwrap();
assert_eq!(url.scheme(), "https");
assert_eq!(url.host_str(), Some("example.com".into()));
assert_eq!(url.path(), "/foo/bar");
}
#[test]
fn parse_full_url() {
let url =
Url::parse("https://user:pass@example.com:8080/path/to/page?q=1&r=2#frag").unwrap();
assert_eq!(url.scheme(), "https");
assert_eq!(url.username(), "user");
assert_eq!(url.password(), "pass");
assert_eq!(url.host_str(), Some("example.com".into()));
assert_eq!(url.port(), Some(8080));
assert_eq!(url.path(), "/path/to/page");
assert_eq!(url.query(), Some("q=1&r=2"));
assert_eq!(url.fragment(), Some("frag"));
}
#[test]
fn parse_default_port_omitted() {
let url = Url::parse("http://example.com:80/").unwrap();
assert_eq!(url.port(), None);
assert_eq!(url.port_or_default(), Some(80));
}
#[test]
fn parse_non_default_port() {
let url = Url::parse("http://example.com:8080/").unwrap();
assert_eq!(url.port(), Some(8080));
}
#[test]
fn parse_https_default_port() {
let url = Url::parse("https://example.com:443/").unwrap();
assert_eq!(url.port(), None);
}
#[test]
fn parse_ftp_default_port() {
let url = Url::parse("ftp://files.example.com:21/readme.txt").unwrap();
assert_eq!(url.port(), None);
assert_eq!(url.port_or_default(), Some(21));
}
// -------------------------------------------------------------------
// Scheme handling
// -------------------------------------------------------------------
#[test]
fn scheme_is_lowercased() {
let url = Url::parse("HTTP://EXAMPLE.COM").unwrap();
assert_eq!(url.scheme(), "http");
}
#[test]
fn non_special_scheme() {
let url = Url::parse("custom://host/path").unwrap();
assert_eq!(url.scheme(), "custom");
assert_eq!(url.host_str(), Some("host".into()));
assert_eq!(url.path(), "/path");
}
#[test]
fn data_uri() {
let url = Url::parse("data:text/html,Hello
").unwrap();
assert_eq!(url.scheme(), "data");
assert!(url.cannot_be_a_base());
}
#[test]
fn javascript_uri() {
let url = Url::parse("javascript:alert(1)").unwrap();
assert_eq!(url.scheme(), "javascript");
assert!(url.cannot_be_a_base());
}
#[test]
fn mailto_uri() {
let url = Url::parse("mailto:user@example.com").unwrap();
assert_eq!(url.scheme(), "mailto");
assert!(url.cannot_be_a_base());
}
// -------------------------------------------------------------------
// Host parsing
// -------------------------------------------------------------------
#[test]
fn host_is_lowercased() {
let url = Url::parse("http://EXAMPLE.COM/").unwrap();
assert_eq!(url.host_str(), Some("example.com".into()));
}
#[test]
fn ipv4_host() {
let url = Url::parse("http://127.0.0.1/").unwrap();
assert_eq!(url.host(), Some(&Host::Ipv4(0x7F000001)));
assert_eq!(url.host_str(), Some("127.0.0.1".into()));
}
#[test]
fn ipv4_host_all_zeros() {
let url = Url::parse("http://0.0.0.0/").unwrap();
assert_eq!(url.host(), Some(&Host::Ipv4(0)));
}
#[test]
fn ipv6_host() {
let url = Url::parse("http://[::1]/").unwrap();
assert_eq!(url.host(), Some(&Host::Ipv6([0, 0, 0, 0, 0, 0, 0, 1])));
}
#[test]
fn ipv6_full() {
let url = Url::parse("http://[2001:db8:85a3:0:0:8a2e:370:7334]/").unwrap();
assert_eq!(
url.host(),
Some(&Host::Ipv6([
0x2001, 0x0db8, 0x85a3, 0, 0, 0x8a2e, 0x0370, 0x7334
]))
);
}
#[test]
fn ipv6_serialization_compressed() {
let url = Url::parse("http://[2001:db8::1]/").unwrap();
assert_eq!(url.host_str(), Some("[2001:db8::1]".into()));
}
#[test]
fn ipv6_all_zeros() {
let url = Url::parse("http://[::]/").unwrap();
assert_eq!(url.host(), Some(&Host::Ipv6([0; 8])));
assert_eq!(url.host_str(), Some("[::]".into()));
}
#[test]
fn ipv6_loopback() {
let pieces = parse_ipv6("::1").unwrap();
assert_eq!(pieces, [0, 0, 0, 0, 0, 0, 0, 1]);
}
#[test]
fn ipv6_with_ipv4() {
let pieces = parse_ipv6("::ffff:192.168.1.1").unwrap();
assert_eq!(pieces, [0, 0, 0, 0, 0, 0xffff, 0xc0a8, 0x0101]);
}
// -------------------------------------------------------------------
// IPv4 parsing
// -------------------------------------------------------------------
#[test]
fn ipv4_basic() {
assert_eq!(parse_ipv4("192.168.1.1").unwrap(), 0xC0A80101);
}
#[test]
fn ipv4_hex() {
assert_eq!(parse_ipv4("0xC0.0xA8.0x01.0x01").unwrap(), 0xC0A80101);
}
#[test]
fn ipv4_octal() {
assert_eq!(parse_ipv4("0300.0250.01.01").unwrap(), 0xC0A80101);
}
#[test]
fn ipv4_single_number() {
assert!(parse_ipv4("3232235777").is_err());
}
#[test]
fn ipv4_two_parts() {
// Two parts: first is top 8 bits, second is bottom 24 bits.
// 192.168.1.1 => 168*65536 + 1*256 + 1 = 11010305
assert_eq!(parse_ipv4("192.11010305").unwrap(), 0xC0A80101);
}
#[test]
fn ipv4_reject_overflow() {
assert!(parse_ipv4("256.0.0.0").is_err());
}
#[test]
fn ipv4_reject_empty_part() {
assert!(parse_ipv4("1..1.1").is_err());
}
// -------------------------------------------------------------------
// Percent encoding/decoding
// -------------------------------------------------------------------
#[test]
fn percent_decode_basic() {
assert_eq!(percent_decode_string("%48%65%6C%6C%6F"), "Hello");
}
#[test]
fn percent_decode_mixed() {
assert_eq!(percent_decode_string("Hello%20World"), "Hello World");
}
#[test]
fn percent_decode_passthrough() {
assert_eq!(percent_decode_string("no-encoding"), "no-encoding");
}
#[test]
fn percent_decode_partial() {
assert_eq!(percent_decode_string("100%"), "100%");
assert_eq!(percent_decode_string("%2"), "%2");
}
#[test]
fn percent_encode_userinfo() {
let encoded = percent_encode("user@host", is_userinfo_encode);
assert_eq!(encoded, "user%40host");
}
#[test]
fn percent_encode_path() {
let encoded = percent_encode("hello world", is_path_encode);
assert_eq!(encoded, "hello%20world");
}
// -------------------------------------------------------------------
// Path parsing and dot segments
// -------------------------------------------------------------------
#[test]
fn path_dot_removal() {
let url = Url::parse("http://example.com/a/b/../c").unwrap();
assert_eq!(url.path(), "/a/c");
}
#[test]
fn path_dot_current() {
let url = Url::parse("http://example.com/a/./b").unwrap();
assert_eq!(url.path(), "/a/b");
}
#[test]
fn path_multiple_dots() {
let url = Url::parse("http://example.com/a/b/c/../../d").unwrap();
assert_eq!(url.path(), "/a/d");
}
#[test]
fn path_trailing_slash() {
let url = Url::parse("http://example.com/a/b/").unwrap();
assert_eq!(url.path(), "/a/b/");
}
#[test]
fn path_empty() {
let url = Url::parse("http://example.com").unwrap();
assert_eq!(url.path(), "/");
}
#[test]
fn path_double_dot_at_root() {
let url = Url::parse("http://example.com/../a").unwrap();
assert_eq!(url.path(), "/a");
}
// -------------------------------------------------------------------
// Relative URL resolution
// -------------------------------------------------------------------
#[test]
fn relative_path() {
let base = Url::parse("http://example.com/a/b/c").unwrap();
let url = Url::parse_with_base("d", &base).unwrap();
assert_eq!(url.path(), "/a/b/d");
assert_eq!(url.host_str(), Some("example.com".into()));
}
#[test]
fn relative_path_with_dots() {
let base = Url::parse("http://example.com/a/b/c").unwrap();
let url = Url::parse_with_base("../d", &base).unwrap();
assert_eq!(url.path(), "/a/d");
}
#[test]
fn relative_absolute_path() {
let base = Url::parse("http://example.com/a/b/c").unwrap();
let url = Url::parse_with_base("/d/e", &base).unwrap();
assert_eq!(url.path(), "/d/e");
assert_eq!(url.host_str(), Some("example.com".into()));
}
#[test]
fn relative_query_only() {
let base = Url::parse("http://example.com/a/b?old=1").unwrap();
let url = Url::parse_with_base("?new=2", &base).unwrap();
assert_eq!(url.path(), "/a/b");
assert_eq!(url.query(), Some("new=2"));
}
#[test]
fn relative_fragment_only() {
let base = Url::parse("http://example.com/a/b#old").unwrap();
let url = Url::parse_with_base("#new", &base).unwrap();
assert_eq!(url.path(), "/a/b");
assert_eq!(url.fragment(), Some("new"));
}
#[test]
fn relative_authority_override() {
let base = Url::parse("http://example.com/a/b").unwrap();
let url = Url::parse_with_base("//other.com/c", &base).unwrap();
assert_eq!(url.scheme(), "http");
assert_eq!(url.host_str(), Some("other.com".into()));
assert_eq!(url.path(), "/c");
}
#[test]
fn absolute_url_ignores_base() {
let base = Url::parse("http://example.com/a").unwrap();
let url = Url::parse_with_base("https://other.com/b", &base).unwrap();
assert_eq!(url.scheme(), "https");
assert_eq!(url.host_str(), Some("other.com".into()));
assert_eq!(url.path(), "/b");
}
#[test]
fn relative_empty_string() {
let base = Url::parse("http://example.com/a/b?q=1#f").unwrap();
let url = Url::parse_with_base("", &base).unwrap();
assert_eq!(url.path(), "/a/b");
assert_eq!(url.query(), Some("q=1"));
assert_eq!(url.fragment(), None);
}
// -------------------------------------------------------------------
// Serialization
// -------------------------------------------------------------------
#[test]
fn serialize_simple() {
let url = Url::parse("http://example.com/path").unwrap();
assert_eq!(url.serialize(), "http://example.com/path");
}
#[test]
fn serialize_with_credentials() {
let url = Url::parse("http://user:pass@example.com/").unwrap();
assert_eq!(url.serialize(), "http://user:pass@example.com/");
}
#[test]
fn serialize_with_port() {
let url = Url::parse("http://example.com:8080/").unwrap();
assert_eq!(url.serialize(), "http://example.com:8080/");
}
#[test]
fn serialize_with_query_fragment() {
let url = Url::parse("http://example.com/path?q=1#frag").unwrap();
assert_eq!(url.serialize(), "http://example.com/path?q=1#frag");
}
#[test]
fn serialize_data_uri() {
let url = Url::parse("data:text/html,hello").unwrap();
assert_eq!(url.serialize(), "data:text/html,hello");
}
#[test]
fn roundtrip_full_url() {
let input = "https://user:pass@example.com:8080/a/b?q=1#frag";
let url = Url::parse(input).unwrap();
assert_eq!(url.serialize(), input);
}
#[test]
fn roundtrip_ipv4() {
let url = Url::parse("http://192.168.1.1/path").unwrap();
assert_eq!(url.serialize(), "http://192.168.1.1/path");
}
#[test]
fn roundtrip_ipv6() {
let url = Url::parse("http://[::1]/path").unwrap();
assert_eq!(url.serialize(), "http://[::1]/path");
}
// -------------------------------------------------------------------
// Origin
// -------------------------------------------------------------------
#[test]
fn origin_http() {
let url = Url::parse("http://example.com:8080/path").unwrap();
match url.origin() {
Origin::Tuple(scheme, host, port) => {
assert_eq!(scheme, "http");
assert_eq!(host, Host::Domain("example.com".into()));
assert_eq!(port, Some(8080));
}
_ => panic!("expected tuple origin"),
}
}
#[test]
fn origin_https_default_port() {
let url = Url::parse("https://example.com/").unwrap();
match url.origin() {
Origin::Tuple(scheme, host, port) => {
assert_eq!(scheme, "https");
assert_eq!(host, Host::Domain("example.com".into()));
assert_eq!(port, None);
}
_ => panic!("expected tuple origin"),
}
}
#[test]
fn origin_data_is_opaque() {
let url = Url::parse("data:text/html,hello").unwrap();
assert_eq!(url.origin(), Origin::Opaque);
}
// -------------------------------------------------------------------
// File URLs
// -------------------------------------------------------------------
#[test]
fn file_url_unix() {
let url = Url::parse("file:///home/user/file.txt").unwrap();
assert_eq!(url.scheme(), "file");
assert_eq!(url.host_str(), Some("".into()));
assert_eq!(url.path(), "/home/user/file.txt");
}
#[test]
fn file_url_windows_drive() {
let url = Url::parse("file:///C:/Windows/system32").unwrap();
assert_eq!(url.scheme(), "file");
assert_eq!(url.path(), "/C:/Windows/system32");
}
#[test]
fn file_url_with_host() {
let url = Url::parse("file://server/share/file.txt").unwrap();
assert_eq!(url.scheme(), "file");
assert_eq!(url.host_str(), Some("server".into()));
assert_eq!(url.path(), "/share/file.txt");
}
// -------------------------------------------------------------------
// Edge cases
// -------------------------------------------------------------------
#[test]
fn empty_input_fails() {
assert_eq!(Url::parse(""), Err(UrlError::EmptyInput));
}
#[test]
fn whitespace_only_fails() {
assert_eq!(Url::parse(" "), Err(UrlError::EmptyInput));
}
#[test]
fn missing_scheme_fails() {
assert!(Url::parse("example.com").is_err());
}
#[test]
fn leading_whitespace_stripped() {
let url = Url::parse(" http://example.com ").unwrap();
assert_eq!(url.host_str(), Some("example.com".into()));
}
#[test]
fn tab_newline_stripped() {
let url = Url::parse("http://exa\tmple\n.com/").unwrap();
assert_eq!(url.host_str(), Some("example.com".into()));
}
#[test]
fn query_with_special_chars() {
let url = Url::parse("http://example.com/?key=val ue&foo=bar").unwrap();
assert!(url.query().unwrap().contains("key=val%20ue"));
}
#[test]
fn fragment_with_special_chars() {
let url = Url::parse("http://example.com/#sec tion").unwrap();
assert!(url.fragment().unwrap().contains("sec%20tion"));
}
#[test]
fn username_only() {
let url = Url::parse("http://user@example.com/").unwrap();
assert_eq!(url.username(), "user");
assert_eq!(url.password(), "");
assert!(url.has_credentials());
}
#[test]
fn no_credentials() {
let url = Url::parse("http://example.com/").unwrap();
assert!(!url.has_credentials());
}
#[test]
fn port_overflow_fails() {
assert!(Url::parse("http://example.com:99999/").is_err());
}
#[test]
fn ws_scheme() {
let url = Url::parse("ws://example.com/chat").unwrap();
assert_eq!(url.scheme(), "ws");
assert_eq!(url.port_or_default(), Some(80));
}
#[test]
fn wss_scheme() {
let url = Url::parse("wss://example.com/chat").unwrap();
assert_eq!(url.scheme(), "wss");
assert_eq!(url.port_or_default(), Some(443));
}
#[test]
fn cannot_be_a_base() {
let url = Url::parse("data:text/html,hello").unwrap();
assert!(url.cannot_be_a_base());
}
#[test]
fn http_can_be_a_base() {
let url = Url::parse("http://example.com/").unwrap();
assert!(!url.cannot_be_a_base());
}
// -------------------------------------------------------------------
// Display/ToString
// -------------------------------------------------------------------
#[test]
fn display_matches_serialize() {
let url = Url::parse("https://example.com:8443/path?q=1#f").unwrap();
assert_eq!(format!("{url}"), url.serialize());
}
// -------------------------------------------------------------------
// Multiple path segments
// -------------------------------------------------------------------
#[test]
fn path_segments() {
let url = Url::parse("http://example.com/a/b/c").unwrap();
assert_eq!(url.path_segments(), &["a", "b", "c"]);
}
#[test]
fn path_segments_trailing_slash() {
let url = Url::parse("http://example.com/a/b/").unwrap();
assert_eq!(url.path_segments(), &["a", "b", ""]);
}
// -------------------------------------------------------------------
// Host type
// -------------------------------------------------------------------
#[test]
fn host_serialize_domain() {
let h = Host::Domain("example.com".into());
assert_eq!(h.serialize(), "example.com");
}
#[test]
fn host_serialize_ipv4() {
let h = Host::Ipv4(0x7F000001);
assert_eq!(h.serialize(), "127.0.0.1");
}
#[test]
fn host_serialize_ipv6() {
let h = Host::Ipv6([0, 0, 0, 0, 0, 0, 0, 1]);
assert_eq!(h.serialize(), "[::1]");
}
// -------------------------------------------------------------------
// IPv6 serialization
// -------------------------------------------------------------------
#[test]
fn ipv6_serialize_full() {
let pieces = [
0x2001, 0x0db8, 0x85a3, 0x0001, 0x0002, 0x8a2e, 0x0370, 0x7334,
];
assert_eq!(serialize_ipv6(&pieces), "2001:db8:85a3:1:2:8a2e:370:7334");
}
#[test]
fn ipv6_serialize_compress() {
let pieces = [0x2001, 0x0db8, 0, 0, 0, 0, 0, 1];
assert_eq!(serialize_ipv6(&pieces), "2001:db8::1");
}
#[test]
fn ipv6_serialize_all_zeros() {
let pieces = [0u16; 8];
assert_eq!(serialize_ipv6(&pieces), "::");
}
#[test]
fn ipv6_serialize_no_compress_single_zero() {
let pieces = [1, 0, 2, 0, 3, 0, 4, 0];
assert_eq!(serialize_ipv6(&pieces), "1:0:2:0:3:0:4:0");
}
// -------------------------------------------------------------------
// Percent encoding edge cases
// -------------------------------------------------------------------
#[test]
fn percent_encode_preserves_unreserved() {
let encoded = percent_encode("hello-world_test.page~1", is_path_encode);
assert_eq!(encoded, "hello-world_test.page~1");
}
#[test]
fn percent_encode_multibyte_utf8() {
let encoded = percent_encode("café", is_path_encode);
assert_eq!(encoded, "caf%C3%A9");
}
}