//! Image resource loading: fetch and decode images referenced by `` elements. //! //! After HTML parsing, this module scans the DOM for `` elements, //! fetches image data via the `ResourceLoader`, detects the image format from //! magic bytes, and decodes using the appropriate `image` crate decoder. use std::collections::HashMap; use we_dom::{Document, NodeData, NodeId}; use we_image::gif::decode_gif; use we_image::jpeg::decode_jpeg; use we_image::pixel::{Image, ImageError}; use we_image::png::decode_png; use we_url::Url; use crate::loader::{LoadError, Resource, ResourceLoader}; /// Detected image format from magic bytes. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum ImageFormat { Png, Jpeg, Gif, Unknown, } /// Errors that can occur during image loading. #[derive(Debug)] pub enum ImgLoadError { /// A resource failed to load. Load(LoadError), /// The fetched resource could not be decoded as an image. Decode(ImageError), /// Unknown or unsupported image format. UnknownFormat { url: String }, } impl std::fmt::Display for ImgLoadError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Self::Load(e) => write!(f, "image load error: {e}"), Self::Decode(e) => write!(f, "image decode error: {e}"), Self::UnknownFormat { url } => write!(f, "unknown image format at {url}"), } } } impl From for ImgLoadError { fn from(e: LoadError) -> Self { Self::Load(e) } } impl From for ImgLoadError { fn from(e: ImageError) -> Self { Self::Decode(e) } } /// A successfully or unsuccessfully loaded image resource. pub struct ImageResource { /// The decoded RGBA8 image, if loading and decoding succeeded. pub image: Option, /// Display width in CSS pixels (from `width` attribute or intrinsic). pub display_width: f32, /// Display height in CSS pixels (from `height` attribute or intrinsic). pub display_height: f32, /// Alt text from the `alt` attribute. pub alt: String, } /// Map from DOM node IDs to their loaded image resources. pub type ImageStore = HashMap; /// Detect image format from the first bytes of data. pub fn detect_format(data: &[u8]) -> ImageFormat { // PNG: 89 50 4E 47 0D 0A 1A 0A if data.len() >= 8 && data[..8] == [0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A] { return ImageFormat::Png; } // JPEG: FF D8 if data.len() >= 2 && data[0] == 0xFF && data[1] == 0xD8 { return ImageFormat::Jpeg; } // GIF: GIF87a or GIF89a if data.len() >= 6 && &data[..3] == b"GIF" { return ImageFormat::Gif; } ImageFormat::Unknown } /// Collect and load all images referenced by `` elements in the DOM. /// /// Scans the DOM in document order for `` elements with a `src` attribute, /// fetches each image via the `ResourceLoader`, detects the format, and decodes. /// Failed loads produce an `ImageResource` with `image: None` (graceful degradation). pub fn collect_images(doc: &Document, loader: &mut ResourceLoader, base_url: &Url) -> ImageStore { let mut store = ImageStore::new(); let mut img_nodes = Vec::new(); collect_img_nodes(doc, doc.root(), &mut img_nodes); for node in img_nodes { let src = match doc.get_attribute(node, "src") { Some(s) if !s.is_empty() => s.to_string(), _ => { // No src — record with alt text only. let alt = doc.get_attribute(node, "alt").unwrap_or("").to_string(); store.insert( node, ImageResource { image: None, display_width: 0.0, display_height: 0.0, alt, }, ); continue; } }; let alt = doc.get_attribute(node, "alt").unwrap_or("").to_string(); let attr_width = parse_dimension_attr(doc.get_attribute(node, "width")); let attr_height = parse_dimension_attr(doc.get_attribute(node, "height")); match fetch_and_decode(loader, &src, base_url) { Ok(image) => { let intrinsic_w = image.width as f32; let intrinsic_h = image.height as f32; let (dw, dh) = resolve_dimensions(attr_width, attr_height, intrinsic_w, intrinsic_h); store.insert( node, ImageResource { image: Some(image), display_width: dw, display_height: dh, alt, }, ); } Err(_) => { // Graceful degradation: store alt text, no image. let (dw, dh) = match (attr_width, attr_height) { (Some(w), Some(h)) => (w, h), (Some(w), None) => (w, 0.0), (None, Some(h)) => (0.0, h), (None, None) => (0.0, 0.0), }; store.insert( node, ImageResource { image: None, display_width: dw, display_height: dh, alt, }, ); } } } store } /// Walk the DOM in document order and collect `` element nodes. fn collect_img_nodes(doc: &Document, node: NodeId, result: &mut Vec) { if let NodeData::Element { tag_name, .. } = doc.node_data(node) { if tag_name.eq_ignore_ascii_case("img") { result.push(node); } } for child in doc.children(node) { collect_img_nodes(doc, child, result); } } /// Parse a dimension attribute value (e.g., `width="200"`) to f32. fn parse_dimension_attr(value: Option<&str>) -> Option { value.and_then(|v| { let v = v.trim(); // Strip trailing "px" if present. let v = v.strip_suffix("px").unwrap_or(v); v.parse::().ok().filter(|&n| n > 0.0) }) } /// Resolve display dimensions from attribute values and intrinsic image size. /// /// If both attributes are set, use them directly. /// If only one is set, scale the other proportionally. /// If neither is set, use intrinsic dimensions. fn resolve_dimensions( attr_w: Option, attr_h: Option, intrinsic_w: f32, intrinsic_h: f32, ) -> (f32, f32) { match (attr_w, attr_h) { (Some(w), Some(h)) => (w, h), (Some(w), None) => { if intrinsic_w > 0.0 { (w, w * intrinsic_h / intrinsic_w) } else { (w, intrinsic_h) } } (None, Some(h)) => { if intrinsic_h > 0.0 { (h * intrinsic_w / intrinsic_h, h) } else { (intrinsic_w, h) } } (None, None) => (intrinsic_w, intrinsic_h), } } /// Fetch image data from a URL and decode it. fn fetch_and_decode( loader: &mut ResourceLoader, src: &str, base_url: &Url, ) -> Result { let resource = loader.fetch_url(src, Some(base_url))?; let (data, url_str) = match resource { Resource::Image { data, url, .. } => (data, url.to_string()), Resource::Other { data, url, .. } => (data, url.to_string()), Resource::Html { text, .. } => (text.into_bytes(), src.to_string()), Resource::Css { text, .. } => (text.into_bytes(), src.to_string()), }; decode_image_data(&data, &url_str) } /// Decode raw bytes into an Image, detecting format from magic bytes. fn decode_image_data(data: &[u8], url: &str) -> Result { match detect_format(data) { ImageFormat::Png => Ok(decode_png(data)?), ImageFormat::Jpeg => Ok(decode_jpeg(data)?), ImageFormat::Gif => Ok(decode_gif(data)?), ImageFormat::Unknown => Err(ImgLoadError::UnknownFormat { url: url.to_string(), }), } } // --------------------------------------------------------------------------- // Tests // --------------------------------------------------------------------------- #[cfg(test)] mod tests { use super::*; // ----------------------------------------------------------------------- // detect_format // ----------------------------------------------------------------------- #[test] fn detect_png() { let data = [0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A, 0x00]; assert_eq!(detect_format(&data), ImageFormat::Png); } #[test] fn detect_jpeg() { let data = [0xFF, 0xD8, 0xFF, 0xE0]; assert_eq!(detect_format(&data), ImageFormat::Jpeg); } #[test] fn detect_gif87a() { assert_eq!(detect_format(b"GIF87a..."), ImageFormat::Gif); } #[test] fn detect_gif89a() { assert_eq!(detect_format(b"GIF89a..."), ImageFormat::Gif); } #[test] fn detect_unknown_empty() { assert_eq!(detect_format(&[]), ImageFormat::Unknown); } #[test] fn detect_unknown_random() { assert_eq!(detect_format(&[0x00, 0x01, 0x02]), ImageFormat::Unknown); } #[test] fn detect_short_data() { assert_eq!(detect_format(&[0xFF]), ImageFormat::Unknown); } // ----------------------------------------------------------------------- // parse_dimension_attr // ----------------------------------------------------------------------- #[test] fn parse_dimension_integer() { assert_eq!(parse_dimension_attr(Some("200")), Some(200.0)); } #[test] fn parse_dimension_with_px_suffix() { assert_eq!(parse_dimension_attr(Some("100px")), Some(100.0)); } #[test] fn parse_dimension_float() { assert_eq!(parse_dimension_attr(Some("50.5")), Some(50.5)); } #[test] fn parse_dimension_whitespace() { assert_eq!(parse_dimension_attr(Some(" 300 ")), Some(300.0)); } #[test] fn parse_dimension_zero() { assert_eq!(parse_dimension_attr(Some("0")), None); } #[test] fn parse_dimension_negative() { assert_eq!(parse_dimension_attr(Some("-10")), None); } #[test] fn parse_dimension_invalid() { assert_eq!(parse_dimension_attr(Some("abc")), None); } #[test] fn parse_dimension_none() { assert_eq!(parse_dimension_attr(None), None); } #[test] fn parse_dimension_empty() { assert_eq!(parse_dimension_attr(Some("")), None); } // ----------------------------------------------------------------------- // resolve_dimensions // ----------------------------------------------------------------------- #[test] fn resolve_both_attrs() { let (w, h) = resolve_dimensions(Some(400.0), Some(300.0), 800.0, 600.0); assert_eq!(w, 400.0); assert_eq!(h, 300.0); } #[test] fn resolve_width_only_proportional() { let (w, h) = resolve_dimensions(Some(400.0), None, 800.0, 600.0); assert_eq!(w, 400.0); assert_eq!(h, 300.0); // 400 * 600/800 } #[test] fn resolve_height_only_proportional() { let (w, h) = resolve_dimensions(None, Some(300.0), 800.0, 600.0); assert_eq!(w, 400.0); // 300 * 800/600 assert_eq!(h, 300.0); } #[test] fn resolve_neither_uses_intrinsic() { let (w, h) = resolve_dimensions(None, None, 1024.0, 768.0); assert_eq!(w, 1024.0); assert_eq!(h, 768.0); } #[test] fn resolve_width_only_zero_intrinsic() { let (w, h) = resolve_dimensions(Some(400.0), None, 0.0, 600.0); assert_eq!(w, 400.0); assert_eq!(h, 600.0); // can't scale, use intrinsic height } #[test] fn resolve_height_only_zero_intrinsic() { let (w, h) = resolve_dimensions(None, Some(300.0), 800.0, 0.0); assert_eq!(w, 800.0); // can't scale, use intrinsic width assert_eq!(h, 300.0); } // ----------------------------------------------------------------------- // collect_img_nodes // ----------------------------------------------------------------------- #[test] fn collects_img_elements() { let mut doc = Document::new(); let root = doc.root(); let html = doc.create_element("html"); doc.append_child(root, html); let body = doc.create_element("body"); doc.append_child(html, body); let img = doc.create_element("img"); doc.set_attribute(img, "src", "photo.png"); doc.append_child(body, img); let mut nodes = Vec::new(); collect_img_nodes(&doc, doc.root(), &mut nodes); assert_eq!(nodes.len(), 1); assert_eq!(doc.tag_name(nodes[0]), Some("img")); } #[test] fn collects_multiple_imgs() { let mut doc = Document::new(); let root = doc.root(); let body = doc.create_element("body"); doc.append_child(root, body); let img1 = doc.create_element("img"); doc.set_attribute(img1, "src", "a.png"); doc.append_child(body, img1); let img2 = doc.create_element("img"); doc.set_attribute(img2, "src", "b.jpg"); doc.append_child(body, img2); let mut nodes = Vec::new(); collect_img_nodes(&doc, doc.root(), &mut nodes); assert_eq!(nodes.len(), 2); } #[test] fn ignores_non_img_elements() { let mut doc = Document::new(); let root = doc.root(); let p = doc.create_element("p"); doc.append_child(root, p); let div = doc.create_element("div"); doc.append_child(root, div); let mut nodes = Vec::new(); collect_img_nodes(&doc, doc.root(), &mut nodes); assert!(nodes.is_empty()); } // ----------------------------------------------------------------------- // collect_images — integration // ----------------------------------------------------------------------- #[test] fn collect_images_no_src() { let mut doc = Document::new(); let root = doc.root(); let img = doc.create_element("img"); doc.set_attribute(img, "alt", "missing"); doc.append_child(root, img); let mut loader = ResourceLoader::new(); let base = Url::parse("http://example.com/").unwrap(); let store = collect_images(&doc, &mut loader, &base); assert_eq!(store.len(), 1); let res = store.get(&img).unwrap(); assert!(res.image.is_none()); assert_eq!(res.alt, "missing"); } #[test] fn collect_images_empty_src() { let mut doc = Document::new(); let root = doc.root(); let img = doc.create_element("img"); doc.set_attribute(img, "src", ""); doc.set_attribute(img, "alt", "empty src"); doc.append_child(root, img); let mut loader = ResourceLoader::new(); let base = Url::parse("http://example.com/").unwrap(); let store = collect_images(&doc, &mut loader, &base); let res = store.get(&img).unwrap(); assert!(res.image.is_none()); assert_eq!(res.alt, "empty src"); } #[test] fn collect_images_failed_fetch_graceful() { let mut doc = Document::new(); let root = doc.root(); let img = doc.create_element("img"); doc.set_attribute(img, "src", "http://nonexistent.test/photo.png"); doc.set_attribute(img, "alt", "Photo"); doc.set_attribute(img, "width", "200"); doc.set_attribute(img, "height", "150"); doc.append_child(root, img); let mut loader = ResourceLoader::new(); let base = Url::parse("http://example.com/").unwrap(); let store = collect_images(&doc, &mut loader, &base); let res = store.get(&img).unwrap(); assert!(res.image.is_none()); assert_eq!(res.alt, "Photo"); assert_eq!(res.display_width, 200.0); assert_eq!(res.display_height, 150.0); } #[test] fn collect_images_no_alt() { let mut doc = Document::new(); let root = doc.root(); let img = doc.create_element("img"); doc.set_attribute(img, "src", "http://nonexistent.test/x.png"); doc.append_child(root, img); let mut loader = ResourceLoader::new(); let base = Url::parse("http://example.com/").unwrap(); let store = collect_images(&doc, &mut loader, &base); let res = store.get(&img).unwrap(); assert_eq!(res.alt, ""); } // ----------------------------------------------------------------------- // ImgLoadError display // ----------------------------------------------------------------------- #[test] fn error_display_unknown_format() { let e = ImgLoadError::UnknownFormat { url: "test.bin".to_string(), }; assert_eq!(e.to_string(), "unknown image format at test.bin"); } #[test] fn error_display_load() { let e = ImgLoadError::Load(LoadError::InvalidUrl("bad".to_string())); assert!(e.to_string().contains("image load error")); } #[test] fn error_display_decode() { let e = ImgLoadError::Decode(ImageError::Decode("corrupt".to_string())); assert!(e.to_string().contains("image decode error")); } // ----------------------------------------------------------------------- // decode_image_data — unit tests with minimal valid images // ----------------------------------------------------------------------- #[test] fn decode_unknown_format_error() { let result = decode_image_data(&[0x00, 0x01, 0x02], "mystery.bin"); assert!(result.is_err()); assert!(matches!(result, Err(ImgLoadError::UnknownFormat { .. }))); } #[test] fn decode_truncated_png_error() { // Valid PNG header but no actual image data. let data = [0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A]; let result = decode_image_data(&data, "broken.png"); assert!(result.is_err()); } #[test] fn decode_truncated_jpeg_error() { let data = [0xFF, 0xD8, 0xFF, 0xE0]; let result = decode_image_data(&data, "broken.jpg"); assert!(result.is_err()); } #[test] fn decode_truncated_gif_error() { let result = decode_image_data(b"GIF89a", "broken.gif"); assert!(result.is_err()); } // ----------------------------------------------------------------------- // decode_image_data — valid minimal images // ----------------------------------------------------------------------- #[test] fn decode_valid_png() { // Minimal 1x1 red PNG (RGB, bit depth 8). let data: &[u8] = &[ 0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A, // PNG signature 0x00, 0x00, 0x00, 0x0D, // IHDR length 0x49, 0x48, 0x44, 0x52, // IHDR 0x00, 0x00, 0x00, 0x01, // width: 1 0x00, 0x00, 0x00, 0x01, // height: 1 0x08, 0x02, // bit depth: 8, color type: RGB 0x00, 0x00, 0x00, // compression, filter, interlace 0x90, 0x77, 0x53, 0xDE, // CRC 0x00, 0x00, 0x00, 0x0C, // IDAT length 0x49, 0x44, 0x41, 0x54, // IDAT 0x78, 0x9C, 0x63, 0xF8, 0xCF, 0xC0, 0x00, 0x00, // zlib data 0x03, 0x01, 0x01, 0x00, // Adler32 0xC9, 0xFE, 0x92, 0xEF, // CRC 0x00, 0x00, 0x00, 0x00, // IEND length 0x49, 0x45, 0x4E, 0x44, // IEND 0xAE, 0x42, 0x60, 0x82, // CRC ]; let result = decode_image_data(data, "red.png"); assert!(result.is_ok(), "PNG decode failed: {:?}", result.err()); let img = result.unwrap(); assert_eq!(img.width, 1); assert_eq!(img.height, 1); assert_eq!(img.data.len(), 4); // 1x1 RGBA8 } }