we (web engine): Experimental web browser project to understand the limits of Claude
at float-layout 624 lines 20 kB view raw
1//! Image resource loading: fetch and decode images referenced by `<img>` elements. 2//! 3//! After HTML parsing, this module scans the DOM for `<img src="...">` elements, 4//! fetches image data via the `ResourceLoader`, detects the image format from 5//! magic bytes, and decodes using the appropriate `image` crate decoder. 6 7use std::collections::HashMap; 8 9use we_dom::{Document, NodeData, NodeId}; 10use we_image::gif::decode_gif; 11use we_image::jpeg::decode_jpeg; 12use we_image::pixel::{Image, ImageError}; 13use we_image::png::decode_png; 14use we_url::Url; 15 16use crate::loader::{LoadError, Resource, ResourceLoader}; 17 18/// Detected image format from magic bytes. 19#[derive(Debug, Clone, Copy, PartialEq, Eq)] 20pub enum ImageFormat { 21 Png, 22 Jpeg, 23 Gif, 24 Unknown, 25} 26 27/// Errors that can occur during image loading. 28#[derive(Debug)] 29pub enum ImgLoadError { 30 /// A resource failed to load. 31 Load(LoadError), 32 /// The fetched resource could not be decoded as an image. 33 Decode(ImageError), 34 /// Unknown or unsupported image format. 35 UnknownFormat { url: String }, 36} 37 38impl std::fmt::Display for ImgLoadError { 39 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 40 match self { 41 Self::Load(e) => write!(f, "image load error: {e}"), 42 Self::Decode(e) => write!(f, "image decode error: {e}"), 43 Self::UnknownFormat { url } => write!(f, "unknown image format at {url}"), 44 } 45 } 46} 47 48impl From<LoadError> for ImgLoadError { 49 fn from(e: LoadError) -> Self { 50 Self::Load(e) 51 } 52} 53 54impl From<ImageError> for ImgLoadError { 55 fn from(e: ImageError) -> Self { 56 Self::Decode(e) 57 } 58} 59 60/// A successfully or unsuccessfully loaded image resource. 61pub struct ImageResource { 62 /// The decoded RGBA8 image, if loading and decoding succeeded. 63 pub image: Option<Image>, 64 /// Display width in CSS pixels (from `width` attribute or intrinsic). 65 pub display_width: f32, 66 /// Display height in CSS pixels (from `height` attribute or intrinsic). 67 pub display_height: f32, 68 /// Alt text from the `alt` attribute. 69 pub alt: String, 70} 71 72/// Map from DOM node IDs to their loaded image resources. 73pub type ImageStore = HashMap<NodeId, ImageResource>; 74 75/// Detect image format from the first bytes of data. 76pub fn detect_format(data: &[u8]) -> ImageFormat { 77 // PNG: 89 50 4E 47 0D 0A 1A 0A 78 if data.len() >= 8 && data[..8] == [0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A] { 79 return ImageFormat::Png; 80 } 81 // JPEG: FF D8 82 if data.len() >= 2 && data[0] == 0xFF && data[1] == 0xD8 { 83 return ImageFormat::Jpeg; 84 } 85 // GIF: GIF87a or GIF89a 86 if data.len() >= 6 && &data[..3] == b"GIF" { 87 return ImageFormat::Gif; 88 } 89 ImageFormat::Unknown 90} 91 92/// Collect and load all images referenced by `<img>` elements in the DOM. 93/// 94/// Scans the DOM in document order for `<img>` elements with a `src` attribute, 95/// fetches each image via the `ResourceLoader`, detects the format, and decodes. 96/// Failed loads produce an `ImageResource` with `image: None` (graceful degradation). 97pub fn collect_images(doc: &Document, loader: &mut ResourceLoader, base_url: &Url) -> ImageStore { 98 let mut store = ImageStore::new(); 99 let mut img_nodes = Vec::new(); 100 collect_img_nodes(doc, doc.root(), &mut img_nodes); 101 102 for node in img_nodes { 103 let src = match doc.get_attribute(node, "src") { 104 Some(s) if !s.is_empty() => s.to_string(), 105 _ => { 106 // No src — record with alt text only. 107 let alt = doc.get_attribute(node, "alt").unwrap_or("").to_string(); 108 store.insert( 109 node, 110 ImageResource { 111 image: None, 112 display_width: 0.0, 113 display_height: 0.0, 114 alt, 115 }, 116 ); 117 continue; 118 } 119 }; 120 121 let alt = doc.get_attribute(node, "alt").unwrap_or("").to_string(); 122 let attr_width = parse_dimension_attr(doc.get_attribute(node, "width")); 123 let attr_height = parse_dimension_attr(doc.get_attribute(node, "height")); 124 125 match fetch_and_decode(loader, &src, base_url) { 126 Ok(image) => { 127 let intrinsic_w = image.width as f32; 128 let intrinsic_h = image.height as f32; 129 let (dw, dh) = 130 resolve_dimensions(attr_width, attr_height, intrinsic_w, intrinsic_h); 131 store.insert( 132 node, 133 ImageResource { 134 image: Some(image), 135 display_width: dw, 136 display_height: dh, 137 alt, 138 }, 139 ); 140 } 141 Err(_) => { 142 // Graceful degradation: store alt text, no image. 143 let (dw, dh) = match (attr_width, attr_height) { 144 (Some(w), Some(h)) => (w, h), 145 (Some(w), None) => (w, 0.0), 146 (None, Some(h)) => (0.0, h), 147 (None, None) => (0.0, 0.0), 148 }; 149 store.insert( 150 node, 151 ImageResource { 152 image: None, 153 display_width: dw, 154 display_height: dh, 155 alt, 156 }, 157 ); 158 } 159 } 160 } 161 162 store 163} 164 165/// Walk the DOM in document order and collect `<img>` element nodes. 166fn collect_img_nodes(doc: &Document, node: NodeId, result: &mut Vec<NodeId>) { 167 if let NodeData::Element { tag_name, .. } = doc.node_data(node) { 168 if tag_name.eq_ignore_ascii_case("img") { 169 result.push(node); 170 } 171 } 172 for child in doc.children(node) { 173 collect_img_nodes(doc, child, result); 174 } 175} 176 177/// Parse a dimension attribute value (e.g., `width="200"`) to f32. 178fn parse_dimension_attr(value: Option<&str>) -> Option<f32> { 179 value.and_then(|v| { 180 let v = v.trim(); 181 // Strip trailing "px" if present. 182 let v = v.strip_suffix("px").unwrap_or(v); 183 v.parse::<f32>().ok().filter(|&n| n > 0.0) 184 }) 185} 186 187/// Resolve display dimensions from attribute values and intrinsic image size. 188/// 189/// If both attributes are set, use them directly. 190/// If only one is set, scale the other proportionally. 191/// If neither is set, use intrinsic dimensions. 192fn resolve_dimensions( 193 attr_w: Option<f32>, 194 attr_h: Option<f32>, 195 intrinsic_w: f32, 196 intrinsic_h: f32, 197) -> (f32, f32) { 198 match (attr_w, attr_h) { 199 (Some(w), Some(h)) => (w, h), 200 (Some(w), None) => { 201 if intrinsic_w > 0.0 { 202 (w, w * intrinsic_h / intrinsic_w) 203 } else { 204 (w, intrinsic_h) 205 } 206 } 207 (None, Some(h)) => { 208 if intrinsic_h > 0.0 { 209 (h * intrinsic_w / intrinsic_h, h) 210 } else { 211 (intrinsic_w, h) 212 } 213 } 214 (None, None) => (intrinsic_w, intrinsic_h), 215 } 216} 217 218/// Fetch image data from a URL and decode it. 219fn fetch_and_decode( 220 loader: &mut ResourceLoader, 221 src: &str, 222 base_url: &Url, 223) -> Result<Image, ImgLoadError> { 224 let resource = loader.fetch_url(src, Some(base_url))?; 225 226 let (data, url_str) = match resource { 227 Resource::Image { data, url, .. } => (data, url.to_string()), 228 Resource::Other { data, url, .. } => (data, url.to_string()), 229 Resource::Html { text, .. } => (text.into_bytes(), src.to_string()), 230 Resource::Css { text, .. } => (text.into_bytes(), src.to_string()), 231 }; 232 233 decode_image_data(&data, &url_str) 234} 235 236/// Decode raw bytes into an Image, detecting format from magic bytes. 237fn decode_image_data(data: &[u8], url: &str) -> Result<Image, ImgLoadError> { 238 match detect_format(data) { 239 ImageFormat::Png => Ok(decode_png(data)?), 240 ImageFormat::Jpeg => Ok(decode_jpeg(data)?), 241 ImageFormat::Gif => Ok(decode_gif(data)?), 242 ImageFormat::Unknown => Err(ImgLoadError::UnknownFormat { 243 url: url.to_string(), 244 }), 245 } 246} 247 248// --------------------------------------------------------------------------- 249// Tests 250// --------------------------------------------------------------------------- 251 252#[cfg(test)] 253mod tests { 254 use super::*; 255 256 // ----------------------------------------------------------------------- 257 // detect_format 258 // ----------------------------------------------------------------------- 259 260 #[test] 261 fn detect_png() { 262 let data = [0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A, 0x00]; 263 assert_eq!(detect_format(&data), ImageFormat::Png); 264 } 265 266 #[test] 267 fn detect_jpeg() { 268 let data = [0xFF, 0xD8, 0xFF, 0xE0]; 269 assert_eq!(detect_format(&data), ImageFormat::Jpeg); 270 } 271 272 #[test] 273 fn detect_gif87a() { 274 assert_eq!(detect_format(b"GIF87a..."), ImageFormat::Gif); 275 } 276 277 #[test] 278 fn detect_gif89a() { 279 assert_eq!(detect_format(b"GIF89a..."), ImageFormat::Gif); 280 } 281 282 #[test] 283 fn detect_unknown_empty() { 284 assert_eq!(detect_format(&[]), ImageFormat::Unknown); 285 } 286 287 #[test] 288 fn detect_unknown_random() { 289 assert_eq!(detect_format(&[0x00, 0x01, 0x02]), ImageFormat::Unknown); 290 } 291 292 #[test] 293 fn detect_short_data() { 294 assert_eq!(detect_format(&[0xFF]), ImageFormat::Unknown); 295 } 296 297 // ----------------------------------------------------------------------- 298 // parse_dimension_attr 299 // ----------------------------------------------------------------------- 300 301 #[test] 302 fn parse_dimension_integer() { 303 assert_eq!(parse_dimension_attr(Some("200")), Some(200.0)); 304 } 305 306 #[test] 307 fn parse_dimension_with_px_suffix() { 308 assert_eq!(parse_dimension_attr(Some("100px")), Some(100.0)); 309 } 310 311 #[test] 312 fn parse_dimension_float() { 313 assert_eq!(parse_dimension_attr(Some("50.5")), Some(50.5)); 314 } 315 316 #[test] 317 fn parse_dimension_whitespace() { 318 assert_eq!(parse_dimension_attr(Some(" 300 ")), Some(300.0)); 319 } 320 321 #[test] 322 fn parse_dimension_zero() { 323 assert_eq!(parse_dimension_attr(Some("0")), None); 324 } 325 326 #[test] 327 fn parse_dimension_negative() { 328 assert_eq!(parse_dimension_attr(Some("-10")), None); 329 } 330 331 #[test] 332 fn parse_dimension_invalid() { 333 assert_eq!(parse_dimension_attr(Some("abc")), None); 334 } 335 336 #[test] 337 fn parse_dimension_none() { 338 assert_eq!(parse_dimension_attr(None), None); 339 } 340 341 #[test] 342 fn parse_dimension_empty() { 343 assert_eq!(parse_dimension_attr(Some("")), None); 344 } 345 346 // ----------------------------------------------------------------------- 347 // resolve_dimensions 348 // ----------------------------------------------------------------------- 349 350 #[test] 351 fn resolve_both_attrs() { 352 let (w, h) = resolve_dimensions(Some(400.0), Some(300.0), 800.0, 600.0); 353 assert_eq!(w, 400.0); 354 assert_eq!(h, 300.0); 355 } 356 357 #[test] 358 fn resolve_width_only_proportional() { 359 let (w, h) = resolve_dimensions(Some(400.0), None, 800.0, 600.0); 360 assert_eq!(w, 400.0); 361 assert_eq!(h, 300.0); // 400 * 600/800 362 } 363 364 #[test] 365 fn resolve_height_only_proportional() { 366 let (w, h) = resolve_dimensions(None, Some(300.0), 800.0, 600.0); 367 assert_eq!(w, 400.0); // 300 * 800/600 368 assert_eq!(h, 300.0); 369 } 370 371 #[test] 372 fn resolve_neither_uses_intrinsic() { 373 let (w, h) = resolve_dimensions(None, None, 1024.0, 768.0); 374 assert_eq!(w, 1024.0); 375 assert_eq!(h, 768.0); 376 } 377 378 #[test] 379 fn resolve_width_only_zero_intrinsic() { 380 let (w, h) = resolve_dimensions(Some(400.0), None, 0.0, 600.0); 381 assert_eq!(w, 400.0); 382 assert_eq!(h, 600.0); // can't scale, use intrinsic height 383 } 384 385 #[test] 386 fn resolve_height_only_zero_intrinsic() { 387 let (w, h) = resolve_dimensions(None, Some(300.0), 800.0, 0.0); 388 assert_eq!(w, 800.0); // can't scale, use intrinsic width 389 assert_eq!(h, 300.0); 390 } 391 392 // ----------------------------------------------------------------------- 393 // collect_img_nodes 394 // ----------------------------------------------------------------------- 395 396 #[test] 397 fn collects_img_elements() { 398 let mut doc = Document::new(); 399 let root = doc.root(); 400 401 let html = doc.create_element("html"); 402 doc.append_child(root, html); 403 404 let body = doc.create_element("body"); 405 doc.append_child(html, body); 406 407 let img = doc.create_element("img"); 408 doc.set_attribute(img, "src", "photo.png"); 409 doc.append_child(body, img); 410 411 let mut nodes = Vec::new(); 412 collect_img_nodes(&doc, doc.root(), &mut nodes); 413 assert_eq!(nodes.len(), 1); 414 assert_eq!(doc.tag_name(nodes[0]), Some("img")); 415 } 416 417 #[test] 418 fn collects_multiple_imgs() { 419 let mut doc = Document::new(); 420 let root = doc.root(); 421 422 let body = doc.create_element("body"); 423 doc.append_child(root, body); 424 425 let img1 = doc.create_element("img"); 426 doc.set_attribute(img1, "src", "a.png"); 427 doc.append_child(body, img1); 428 429 let img2 = doc.create_element("img"); 430 doc.set_attribute(img2, "src", "b.jpg"); 431 doc.append_child(body, img2); 432 433 let mut nodes = Vec::new(); 434 collect_img_nodes(&doc, doc.root(), &mut nodes); 435 assert_eq!(nodes.len(), 2); 436 } 437 438 #[test] 439 fn ignores_non_img_elements() { 440 let mut doc = Document::new(); 441 let root = doc.root(); 442 443 let p = doc.create_element("p"); 444 doc.append_child(root, p); 445 446 let div = doc.create_element("div"); 447 doc.append_child(root, div); 448 449 let mut nodes = Vec::new(); 450 collect_img_nodes(&doc, doc.root(), &mut nodes); 451 assert!(nodes.is_empty()); 452 } 453 454 // ----------------------------------------------------------------------- 455 // collect_images — integration 456 // ----------------------------------------------------------------------- 457 458 #[test] 459 fn collect_images_no_src() { 460 let mut doc = Document::new(); 461 let root = doc.root(); 462 463 let img = doc.create_element("img"); 464 doc.set_attribute(img, "alt", "missing"); 465 doc.append_child(root, img); 466 467 let mut loader = ResourceLoader::new(); 468 let base = Url::parse("http://example.com/").unwrap(); 469 470 let store = collect_images(&doc, &mut loader, &base); 471 assert_eq!(store.len(), 1); 472 let res = store.get(&img).unwrap(); 473 assert!(res.image.is_none()); 474 assert_eq!(res.alt, "missing"); 475 } 476 477 #[test] 478 fn collect_images_empty_src() { 479 let mut doc = Document::new(); 480 let root = doc.root(); 481 482 let img = doc.create_element("img"); 483 doc.set_attribute(img, "src", ""); 484 doc.set_attribute(img, "alt", "empty src"); 485 doc.append_child(root, img); 486 487 let mut loader = ResourceLoader::new(); 488 let base = Url::parse("http://example.com/").unwrap(); 489 490 let store = collect_images(&doc, &mut loader, &base); 491 let res = store.get(&img).unwrap(); 492 assert!(res.image.is_none()); 493 assert_eq!(res.alt, "empty src"); 494 } 495 496 #[test] 497 fn collect_images_failed_fetch_graceful() { 498 let mut doc = Document::new(); 499 let root = doc.root(); 500 501 let img = doc.create_element("img"); 502 doc.set_attribute(img, "src", "http://nonexistent.test/photo.png"); 503 doc.set_attribute(img, "alt", "Photo"); 504 doc.set_attribute(img, "width", "200"); 505 doc.set_attribute(img, "height", "150"); 506 doc.append_child(root, img); 507 508 let mut loader = ResourceLoader::new(); 509 let base = Url::parse("http://example.com/").unwrap(); 510 511 let store = collect_images(&doc, &mut loader, &base); 512 let res = store.get(&img).unwrap(); 513 assert!(res.image.is_none()); 514 assert_eq!(res.alt, "Photo"); 515 assert_eq!(res.display_width, 200.0); 516 assert_eq!(res.display_height, 150.0); 517 } 518 519 #[test] 520 fn collect_images_no_alt() { 521 let mut doc = Document::new(); 522 let root = doc.root(); 523 524 let img = doc.create_element("img"); 525 doc.set_attribute(img, "src", "http://nonexistent.test/x.png"); 526 doc.append_child(root, img); 527 528 let mut loader = ResourceLoader::new(); 529 let base = Url::parse("http://example.com/").unwrap(); 530 531 let store = collect_images(&doc, &mut loader, &base); 532 let res = store.get(&img).unwrap(); 533 assert_eq!(res.alt, ""); 534 } 535 536 // ----------------------------------------------------------------------- 537 // ImgLoadError display 538 // ----------------------------------------------------------------------- 539 540 #[test] 541 fn error_display_unknown_format() { 542 let e = ImgLoadError::UnknownFormat { 543 url: "test.bin".to_string(), 544 }; 545 assert_eq!(e.to_string(), "unknown image format at test.bin"); 546 } 547 548 #[test] 549 fn error_display_load() { 550 let e = ImgLoadError::Load(LoadError::InvalidUrl("bad".to_string())); 551 assert!(e.to_string().contains("image load error")); 552 } 553 554 #[test] 555 fn error_display_decode() { 556 let e = ImgLoadError::Decode(ImageError::Decode("corrupt".to_string())); 557 assert!(e.to_string().contains("image decode error")); 558 } 559 560 // ----------------------------------------------------------------------- 561 // decode_image_data — unit tests with minimal valid images 562 // ----------------------------------------------------------------------- 563 564 #[test] 565 fn decode_unknown_format_error() { 566 let result = decode_image_data(&[0x00, 0x01, 0x02], "mystery.bin"); 567 assert!(result.is_err()); 568 assert!(matches!(result, Err(ImgLoadError::UnknownFormat { .. }))); 569 } 570 571 #[test] 572 fn decode_truncated_png_error() { 573 // Valid PNG header but no actual image data. 574 let data = [0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A]; 575 let result = decode_image_data(&data, "broken.png"); 576 assert!(result.is_err()); 577 } 578 579 #[test] 580 fn decode_truncated_jpeg_error() { 581 let data = [0xFF, 0xD8, 0xFF, 0xE0]; 582 let result = decode_image_data(&data, "broken.jpg"); 583 assert!(result.is_err()); 584 } 585 586 #[test] 587 fn decode_truncated_gif_error() { 588 let result = decode_image_data(b"GIF89a", "broken.gif"); 589 assert!(result.is_err()); 590 } 591 592 // ----------------------------------------------------------------------- 593 // decode_image_data — valid minimal images 594 // ----------------------------------------------------------------------- 595 596 #[test] 597 fn decode_valid_png() { 598 // Minimal 1x1 red PNG (RGB, bit depth 8). 599 let data: &[u8] = &[ 600 0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A, // PNG signature 601 0x00, 0x00, 0x00, 0x0D, // IHDR length 602 0x49, 0x48, 0x44, 0x52, // IHDR 603 0x00, 0x00, 0x00, 0x01, // width: 1 604 0x00, 0x00, 0x00, 0x01, // height: 1 605 0x08, 0x02, // bit depth: 8, color type: RGB 606 0x00, 0x00, 0x00, // compression, filter, interlace 607 0x90, 0x77, 0x53, 0xDE, // CRC 608 0x00, 0x00, 0x00, 0x0C, // IDAT length 609 0x49, 0x44, 0x41, 0x54, // IDAT 610 0x78, 0x9C, 0x63, 0xF8, 0xCF, 0xC0, 0x00, 0x00, // zlib data 611 0x03, 0x01, 0x01, 0x00, // Adler32 612 0xC9, 0xFE, 0x92, 0xEF, // CRC 613 0x00, 0x00, 0x00, 0x00, // IEND length 614 0x49, 0x45, 0x4E, 0x44, // IEND 615 0xAE, 0x42, 0x60, 0x82, // CRC 616 ]; 617 let result = decode_image_data(data, "red.png"); 618 assert!(result.is_ok(), "PNG decode failed: {:?}", result.err()); 619 let img = result.unwrap(); 620 assert_eq!(img.width, 1); 621 assert_eq!(img.height, 1); 622 assert_eq!(img.data.len(), 4); // 1x1 RGBA8 623 } 624}