we (web engine): Experimental web browser project to understand the limits of Claude
at texture-validation 626 lines 20 kB view raw
1//! Image resource loading: fetch and decode images referenced by `<img>` elements. 2//! 3//! After HTML parsing, this module scans the DOM for `<img src="...">` elements, 4//! fetches image data via the `ResourceLoader`, detects the image format from 5//! magic bytes, and decodes using the appropriate `image` crate decoder. 6 7use std::collections::HashMap; 8 9use we_dom::{Document, NodeData, NodeId}; 10use we_image::gif::decode_gif; 11use we_image::jpeg::decode_jpeg; 12use we_image::pixel::{Image, ImageError}; 13use we_image::png::decode_png; 14use we_url::Url; 15 16use crate::loader::{LoadError, Resource, ResourceLoader}; 17 18/// Detected image format from magic bytes. 19#[derive(Debug, Clone, Copy, PartialEq, Eq)] 20pub enum ImageFormat { 21 Png, 22 Jpeg, 23 Gif, 24 Unknown, 25} 26 27/// Errors that can occur during image loading. 28#[derive(Debug)] 29pub enum ImgLoadError { 30 /// A resource failed to load. 31 Load(LoadError), 32 /// The fetched resource could not be decoded as an image. 33 Decode(ImageError), 34 /// Unknown or unsupported image format. 35 UnknownFormat { url: String }, 36} 37 38impl std::fmt::Display for ImgLoadError { 39 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 40 match self { 41 Self::Load(e) => write!(f, "image load error: {e}"), 42 Self::Decode(e) => write!(f, "image decode error: {e}"), 43 Self::UnknownFormat { url } => write!(f, "unknown image format at {url}"), 44 } 45 } 46} 47 48impl From<LoadError> for ImgLoadError { 49 fn from(e: LoadError) -> Self { 50 Self::Load(e) 51 } 52} 53 54impl From<ImageError> for ImgLoadError { 55 fn from(e: ImageError) -> Self { 56 Self::Decode(e) 57 } 58} 59 60/// A successfully or unsuccessfully loaded image resource. 61pub struct ImageResource { 62 /// The decoded RGBA8 image, if loading and decoding succeeded. 63 pub image: Option<Image>, 64 /// Display width in CSS pixels (from `width` attribute or intrinsic). 65 pub display_width: f32, 66 /// Display height in CSS pixels (from `height` attribute or intrinsic). 67 pub display_height: f32, 68 /// Alt text from the `alt` attribute. 69 pub alt: String, 70} 71 72/// Map from DOM node IDs to their loaded image resources. 73pub type ImageStore = HashMap<NodeId, ImageResource>; 74 75/// Detect image format from the first bytes of data. 76pub fn detect_format(data: &[u8]) -> ImageFormat { 77 // PNG: 89 50 4E 47 0D 0A 1A 0A 78 if data.len() >= 8 && data[..8] == [0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A] { 79 return ImageFormat::Png; 80 } 81 // JPEG: FF D8 82 if data.len() >= 2 && data[0] == 0xFF && data[1] == 0xD8 { 83 return ImageFormat::Jpeg; 84 } 85 // GIF: GIF87a or GIF89a 86 if data.len() >= 6 && &data[..3] == b"GIF" { 87 return ImageFormat::Gif; 88 } 89 ImageFormat::Unknown 90} 91 92/// Collect and load all images referenced by `<img>` elements in the DOM. 93/// 94/// Scans the DOM in document order for `<img>` elements with a `src` attribute, 95/// fetches each image via the `ResourceLoader`, detects the format, and decodes. 96/// Failed loads produce an `ImageResource` with `image: None` (graceful degradation). 97pub fn collect_images(doc: &Document, loader: &mut ResourceLoader, base_url: &Url) -> ImageStore { 98 let mut store = ImageStore::new(); 99 let mut img_nodes = Vec::new(); 100 collect_img_nodes(doc, doc.root(), &mut img_nodes); 101 102 for node in img_nodes { 103 let src = match doc.get_attribute(node, "src") { 104 Some(s) if !s.is_empty() => s.to_string(), 105 _ => { 106 // No src — record with alt text only. 107 let alt = doc.get_attribute(node, "alt").unwrap_or("").to_string(); 108 store.insert( 109 node, 110 ImageResource { 111 image: None, 112 display_width: 0.0, 113 display_height: 0.0, 114 alt, 115 }, 116 ); 117 continue; 118 } 119 }; 120 121 let alt = doc.get_attribute(node, "alt").unwrap_or("").to_string(); 122 let attr_width = parse_dimension_attr(doc.get_attribute(node, "width")); 123 let attr_height = parse_dimension_attr(doc.get_attribute(node, "height")); 124 125 match fetch_and_decode(loader, &src, base_url) { 126 Ok(image) => { 127 let intrinsic_w = image.width as f32; 128 let intrinsic_h = image.height as f32; 129 let (dw, dh) = 130 resolve_dimensions(attr_width, attr_height, intrinsic_w, intrinsic_h); 131 store.insert( 132 node, 133 ImageResource { 134 image: Some(image), 135 display_width: dw, 136 display_height: dh, 137 alt, 138 }, 139 ); 140 } 141 Err(_) => { 142 // Graceful degradation: store alt text, no image. 143 let (dw, dh) = match (attr_width, attr_height) { 144 (Some(w), Some(h)) => (w, h), 145 (Some(w), None) => (w, 0.0), 146 (None, Some(h)) => (0.0, h), 147 (None, None) => (0.0, 0.0), 148 }; 149 store.insert( 150 node, 151 ImageResource { 152 image: None, 153 display_width: dw, 154 display_height: dh, 155 alt, 156 }, 157 ); 158 } 159 } 160 } 161 162 store 163} 164 165/// Walk the DOM in document order and collect `<img>` element nodes. 166fn collect_img_nodes(doc: &Document, node: NodeId, result: &mut Vec<NodeId>) { 167 if let NodeData::Element { tag_name, .. } = doc.node_data(node) { 168 if tag_name.eq_ignore_ascii_case("img") { 169 result.push(node); 170 } 171 } 172 for child in doc.children(node) { 173 collect_img_nodes(doc, child, result); 174 } 175} 176 177/// Parse a dimension attribute value (e.g., `width="200"`) to f32. 178fn parse_dimension_attr(value: Option<&str>) -> Option<f32> { 179 value.and_then(|v| { 180 let v = v.trim(); 181 // Strip trailing "px" if present. 182 let v = v.strip_suffix("px").unwrap_or(v); 183 v.parse::<f32>().ok().filter(|&n| n > 0.0) 184 }) 185} 186 187/// Resolve display dimensions from attribute values and intrinsic image size. 188/// 189/// If both attributes are set, use them directly. 190/// If only one is set, scale the other proportionally. 191/// If neither is set, use intrinsic dimensions. 192fn resolve_dimensions( 193 attr_w: Option<f32>, 194 attr_h: Option<f32>, 195 intrinsic_w: f32, 196 intrinsic_h: f32, 197) -> (f32, f32) { 198 match (attr_w, attr_h) { 199 (Some(w), Some(h)) => (w, h), 200 (Some(w), None) => { 201 if intrinsic_w > 0.0 { 202 (w, w * intrinsic_h / intrinsic_w) 203 } else { 204 (w, intrinsic_h) 205 } 206 } 207 (None, Some(h)) => { 208 if intrinsic_h > 0.0 { 209 (h * intrinsic_w / intrinsic_h, h) 210 } else { 211 (intrinsic_w, h) 212 } 213 } 214 (None, None) => (intrinsic_w, intrinsic_h), 215 } 216} 217 218/// Fetch image data from a URL and decode it. 219fn fetch_and_decode( 220 loader: &mut ResourceLoader, 221 src: &str, 222 base_url: &Url, 223) -> Result<Image, ImgLoadError> { 224 let resource = loader.fetch_url(src, Some(base_url))?; 225 226 let (data, url_str) = match resource { 227 Resource::Image { data, url, .. } => (data, url.to_string()), 228 Resource::Other { data, url, .. } => (data, url.to_string()), 229 Resource::Html { text, .. } => (text.into_bytes(), src.to_string()), 230 Resource::Css { text, .. } | Resource::Script { text, .. } => { 231 (text.into_bytes(), src.to_string()) 232 } 233 }; 234 235 decode_image_data(&data, &url_str) 236} 237 238/// Decode raw bytes into an Image, detecting format from magic bytes. 239fn decode_image_data(data: &[u8], url: &str) -> Result<Image, ImgLoadError> { 240 match detect_format(data) { 241 ImageFormat::Png => Ok(decode_png(data)?), 242 ImageFormat::Jpeg => Ok(decode_jpeg(data)?), 243 ImageFormat::Gif => Ok(decode_gif(data)?), 244 ImageFormat::Unknown => Err(ImgLoadError::UnknownFormat { 245 url: url.to_string(), 246 }), 247 } 248} 249 250// --------------------------------------------------------------------------- 251// Tests 252// --------------------------------------------------------------------------- 253 254#[cfg(test)] 255mod tests { 256 use super::*; 257 258 // ----------------------------------------------------------------------- 259 // detect_format 260 // ----------------------------------------------------------------------- 261 262 #[test] 263 fn detect_png() { 264 let data = [0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A, 0x00]; 265 assert_eq!(detect_format(&data), ImageFormat::Png); 266 } 267 268 #[test] 269 fn detect_jpeg() { 270 let data = [0xFF, 0xD8, 0xFF, 0xE0]; 271 assert_eq!(detect_format(&data), ImageFormat::Jpeg); 272 } 273 274 #[test] 275 fn detect_gif87a() { 276 assert_eq!(detect_format(b"GIF87a..."), ImageFormat::Gif); 277 } 278 279 #[test] 280 fn detect_gif89a() { 281 assert_eq!(detect_format(b"GIF89a..."), ImageFormat::Gif); 282 } 283 284 #[test] 285 fn detect_unknown_empty() { 286 assert_eq!(detect_format(&[]), ImageFormat::Unknown); 287 } 288 289 #[test] 290 fn detect_unknown_random() { 291 assert_eq!(detect_format(&[0x00, 0x01, 0x02]), ImageFormat::Unknown); 292 } 293 294 #[test] 295 fn detect_short_data() { 296 assert_eq!(detect_format(&[0xFF]), ImageFormat::Unknown); 297 } 298 299 // ----------------------------------------------------------------------- 300 // parse_dimension_attr 301 // ----------------------------------------------------------------------- 302 303 #[test] 304 fn parse_dimension_integer() { 305 assert_eq!(parse_dimension_attr(Some("200")), Some(200.0)); 306 } 307 308 #[test] 309 fn parse_dimension_with_px_suffix() { 310 assert_eq!(parse_dimension_attr(Some("100px")), Some(100.0)); 311 } 312 313 #[test] 314 fn parse_dimension_float() { 315 assert_eq!(parse_dimension_attr(Some("50.5")), Some(50.5)); 316 } 317 318 #[test] 319 fn parse_dimension_whitespace() { 320 assert_eq!(parse_dimension_attr(Some(" 300 ")), Some(300.0)); 321 } 322 323 #[test] 324 fn parse_dimension_zero() { 325 assert_eq!(parse_dimension_attr(Some("0")), None); 326 } 327 328 #[test] 329 fn parse_dimension_negative() { 330 assert_eq!(parse_dimension_attr(Some("-10")), None); 331 } 332 333 #[test] 334 fn parse_dimension_invalid() { 335 assert_eq!(parse_dimension_attr(Some("abc")), None); 336 } 337 338 #[test] 339 fn parse_dimension_none() { 340 assert_eq!(parse_dimension_attr(None), None); 341 } 342 343 #[test] 344 fn parse_dimension_empty() { 345 assert_eq!(parse_dimension_attr(Some("")), None); 346 } 347 348 // ----------------------------------------------------------------------- 349 // resolve_dimensions 350 // ----------------------------------------------------------------------- 351 352 #[test] 353 fn resolve_both_attrs() { 354 let (w, h) = resolve_dimensions(Some(400.0), Some(300.0), 800.0, 600.0); 355 assert_eq!(w, 400.0); 356 assert_eq!(h, 300.0); 357 } 358 359 #[test] 360 fn resolve_width_only_proportional() { 361 let (w, h) = resolve_dimensions(Some(400.0), None, 800.0, 600.0); 362 assert_eq!(w, 400.0); 363 assert_eq!(h, 300.0); // 400 * 600/800 364 } 365 366 #[test] 367 fn resolve_height_only_proportional() { 368 let (w, h) = resolve_dimensions(None, Some(300.0), 800.0, 600.0); 369 assert_eq!(w, 400.0); // 300 * 800/600 370 assert_eq!(h, 300.0); 371 } 372 373 #[test] 374 fn resolve_neither_uses_intrinsic() { 375 let (w, h) = resolve_dimensions(None, None, 1024.0, 768.0); 376 assert_eq!(w, 1024.0); 377 assert_eq!(h, 768.0); 378 } 379 380 #[test] 381 fn resolve_width_only_zero_intrinsic() { 382 let (w, h) = resolve_dimensions(Some(400.0), None, 0.0, 600.0); 383 assert_eq!(w, 400.0); 384 assert_eq!(h, 600.0); // can't scale, use intrinsic height 385 } 386 387 #[test] 388 fn resolve_height_only_zero_intrinsic() { 389 let (w, h) = resolve_dimensions(None, Some(300.0), 800.0, 0.0); 390 assert_eq!(w, 800.0); // can't scale, use intrinsic width 391 assert_eq!(h, 300.0); 392 } 393 394 // ----------------------------------------------------------------------- 395 // collect_img_nodes 396 // ----------------------------------------------------------------------- 397 398 #[test] 399 fn collects_img_elements() { 400 let mut doc = Document::new(); 401 let root = doc.root(); 402 403 let html = doc.create_element("html"); 404 doc.append_child(root, html); 405 406 let body = doc.create_element("body"); 407 doc.append_child(html, body); 408 409 let img = doc.create_element("img"); 410 doc.set_attribute(img, "src", "photo.png"); 411 doc.append_child(body, img); 412 413 let mut nodes = Vec::new(); 414 collect_img_nodes(&doc, doc.root(), &mut nodes); 415 assert_eq!(nodes.len(), 1); 416 assert_eq!(doc.tag_name(nodes[0]), Some("img")); 417 } 418 419 #[test] 420 fn collects_multiple_imgs() { 421 let mut doc = Document::new(); 422 let root = doc.root(); 423 424 let body = doc.create_element("body"); 425 doc.append_child(root, body); 426 427 let img1 = doc.create_element("img"); 428 doc.set_attribute(img1, "src", "a.png"); 429 doc.append_child(body, img1); 430 431 let img2 = doc.create_element("img"); 432 doc.set_attribute(img2, "src", "b.jpg"); 433 doc.append_child(body, img2); 434 435 let mut nodes = Vec::new(); 436 collect_img_nodes(&doc, doc.root(), &mut nodes); 437 assert_eq!(nodes.len(), 2); 438 } 439 440 #[test] 441 fn ignores_non_img_elements() { 442 let mut doc = Document::new(); 443 let root = doc.root(); 444 445 let p = doc.create_element("p"); 446 doc.append_child(root, p); 447 448 let div = doc.create_element("div"); 449 doc.append_child(root, div); 450 451 let mut nodes = Vec::new(); 452 collect_img_nodes(&doc, doc.root(), &mut nodes); 453 assert!(nodes.is_empty()); 454 } 455 456 // ----------------------------------------------------------------------- 457 // collect_images — integration 458 // ----------------------------------------------------------------------- 459 460 #[test] 461 fn collect_images_no_src() { 462 let mut doc = Document::new(); 463 let root = doc.root(); 464 465 let img = doc.create_element("img"); 466 doc.set_attribute(img, "alt", "missing"); 467 doc.append_child(root, img); 468 469 let mut loader = ResourceLoader::new(); 470 let base = Url::parse("http://example.com/").unwrap(); 471 472 let store = collect_images(&doc, &mut loader, &base); 473 assert_eq!(store.len(), 1); 474 let res = store.get(&img).unwrap(); 475 assert!(res.image.is_none()); 476 assert_eq!(res.alt, "missing"); 477 } 478 479 #[test] 480 fn collect_images_empty_src() { 481 let mut doc = Document::new(); 482 let root = doc.root(); 483 484 let img = doc.create_element("img"); 485 doc.set_attribute(img, "src", ""); 486 doc.set_attribute(img, "alt", "empty src"); 487 doc.append_child(root, img); 488 489 let mut loader = ResourceLoader::new(); 490 let base = Url::parse("http://example.com/").unwrap(); 491 492 let store = collect_images(&doc, &mut loader, &base); 493 let res = store.get(&img).unwrap(); 494 assert!(res.image.is_none()); 495 assert_eq!(res.alt, "empty src"); 496 } 497 498 #[test] 499 fn collect_images_failed_fetch_graceful() { 500 let mut doc = Document::new(); 501 let root = doc.root(); 502 503 let img = doc.create_element("img"); 504 doc.set_attribute(img, "src", "http://nonexistent.test/photo.png"); 505 doc.set_attribute(img, "alt", "Photo"); 506 doc.set_attribute(img, "width", "200"); 507 doc.set_attribute(img, "height", "150"); 508 doc.append_child(root, img); 509 510 let mut loader = ResourceLoader::new(); 511 let base = Url::parse("http://example.com/").unwrap(); 512 513 let store = collect_images(&doc, &mut loader, &base); 514 let res = store.get(&img).unwrap(); 515 assert!(res.image.is_none()); 516 assert_eq!(res.alt, "Photo"); 517 assert_eq!(res.display_width, 200.0); 518 assert_eq!(res.display_height, 150.0); 519 } 520 521 #[test] 522 fn collect_images_no_alt() { 523 let mut doc = Document::new(); 524 let root = doc.root(); 525 526 let img = doc.create_element("img"); 527 doc.set_attribute(img, "src", "http://nonexistent.test/x.png"); 528 doc.append_child(root, img); 529 530 let mut loader = ResourceLoader::new(); 531 let base = Url::parse("http://example.com/").unwrap(); 532 533 let store = collect_images(&doc, &mut loader, &base); 534 let res = store.get(&img).unwrap(); 535 assert_eq!(res.alt, ""); 536 } 537 538 // ----------------------------------------------------------------------- 539 // ImgLoadError display 540 // ----------------------------------------------------------------------- 541 542 #[test] 543 fn error_display_unknown_format() { 544 let e = ImgLoadError::UnknownFormat { 545 url: "test.bin".to_string(), 546 }; 547 assert_eq!(e.to_string(), "unknown image format at test.bin"); 548 } 549 550 #[test] 551 fn error_display_load() { 552 let e = ImgLoadError::Load(LoadError::InvalidUrl("bad".to_string())); 553 assert!(e.to_string().contains("image load error")); 554 } 555 556 #[test] 557 fn error_display_decode() { 558 let e = ImgLoadError::Decode(ImageError::Decode("corrupt".to_string())); 559 assert!(e.to_string().contains("image decode error")); 560 } 561 562 // ----------------------------------------------------------------------- 563 // decode_image_data — unit tests with minimal valid images 564 // ----------------------------------------------------------------------- 565 566 #[test] 567 fn decode_unknown_format_error() { 568 let result = decode_image_data(&[0x00, 0x01, 0x02], "mystery.bin"); 569 assert!(result.is_err()); 570 assert!(matches!(result, Err(ImgLoadError::UnknownFormat { .. }))); 571 } 572 573 #[test] 574 fn decode_truncated_png_error() { 575 // Valid PNG header but no actual image data. 576 let data = [0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A]; 577 let result = decode_image_data(&data, "broken.png"); 578 assert!(result.is_err()); 579 } 580 581 #[test] 582 fn decode_truncated_jpeg_error() { 583 let data = [0xFF, 0xD8, 0xFF, 0xE0]; 584 let result = decode_image_data(&data, "broken.jpg"); 585 assert!(result.is_err()); 586 } 587 588 #[test] 589 fn decode_truncated_gif_error() { 590 let result = decode_image_data(b"GIF89a", "broken.gif"); 591 assert!(result.is_err()); 592 } 593 594 // ----------------------------------------------------------------------- 595 // decode_image_data — valid minimal images 596 // ----------------------------------------------------------------------- 597 598 #[test] 599 fn decode_valid_png() { 600 // Minimal 1x1 red PNG (RGB, bit depth 8). 601 let data: &[u8] = &[ 602 0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A, // PNG signature 603 0x00, 0x00, 0x00, 0x0D, // IHDR length 604 0x49, 0x48, 0x44, 0x52, // IHDR 605 0x00, 0x00, 0x00, 0x01, // width: 1 606 0x00, 0x00, 0x00, 0x01, // height: 1 607 0x08, 0x02, // bit depth: 8, color type: RGB 608 0x00, 0x00, 0x00, // compression, filter, interlace 609 0x90, 0x77, 0x53, 0xDE, // CRC 610 0x00, 0x00, 0x00, 0x0C, // IDAT length 611 0x49, 0x44, 0x41, 0x54, // IDAT 612 0x78, 0x9C, 0x63, 0xF8, 0xCF, 0xC0, 0x00, 0x00, // zlib data 613 0x03, 0x01, 0x01, 0x00, // Adler32 614 0xC9, 0xFE, 0x92, 0xEF, // CRC 615 0x00, 0x00, 0x00, 0x00, // IEND length 616 0x49, 0x45, 0x4E, 0x44, // IEND 617 0xAE, 0x42, 0x60, 0x82, // CRC 618 ]; 619 let result = decode_image_data(data, "red.png"); 620 assert!(result.is_ok(), "PNG decode failed: {:?}", result.err()); 621 let img = result.unwrap(); 622 assert_eq!(img.width, 1); 623 assert_eq!(img.height, 1); 624 assert_eq!(img.data.len(), 4); // 1x1 RGBA8 625 } 626}