//! Image resource loading: fetch and decode images referenced by `
` elements.
//!
//! After HTML parsing, this module scans the DOM for `
` elements,
//! fetches image data via the `ResourceLoader`, detects the image format from
//! magic bytes, and decodes using the appropriate `image` crate decoder.
use std::collections::HashMap;
use we_dom::{Document, NodeData, NodeId};
use we_image::gif::decode_gif;
use we_image::jpeg::decode_jpeg;
use we_image::pixel::{Image, ImageError};
use we_image::png::decode_png;
use we_url::Url;
use crate::loader::{LoadError, Resource, ResourceLoader};
/// Detected image format from magic bytes.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ImageFormat {
Png,
Jpeg,
Gif,
Unknown,
}
/// Errors that can occur during image loading.
#[derive(Debug)]
pub enum ImgLoadError {
/// A resource failed to load.
Load(LoadError),
/// The fetched resource could not be decoded as an image.
Decode(ImageError),
/// Unknown or unsupported image format.
UnknownFormat { url: String },
}
impl std::fmt::Display for ImgLoadError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Load(e) => write!(f, "image load error: {e}"),
Self::Decode(e) => write!(f, "image decode error: {e}"),
Self::UnknownFormat { url } => write!(f, "unknown image format at {url}"),
}
}
}
impl From for ImgLoadError {
fn from(e: LoadError) -> Self {
Self::Load(e)
}
}
impl From for ImgLoadError {
fn from(e: ImageError) -> Self {
Self::Decode(e)
}
}
/// A successfully or unsuccessfully loaded image resource.
pub struct ImageResource {
/// The decoded RGBA8 image, if loading and decoding succeeded.
pub image: Option,
/// Display width in CSS pixels (from `width` attribute or intrinsic).
pub display_width: f32,
/// Display height in CSS pixels (from `height` attribute or intrinsic).
pub display_height: f32,
/// Alt text from the `alt` attribute.
pub alt: String,
}
/// Map from DOM node IDs to their loaded image resources.
pub type ImageStore = HashMap;
/// Detect image format from the first bytes of data.
pub fn detect_format(data: &[u8]) -> ImageFormat {
// PNG: 89 50 4E 47 0D 0A 1A 0A
if data.len() >= 8 && data[..8] == [0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A] {
return ImageFormat::Png;
}
// JPEG: FF D8
if data.len() >= 2 && data[0] == 0xFF && data[1] == 0xD8 {
return ImageFormat::Jpeg;
}
// GIF: GIF87a or GIF89a
if data.len() >= 6 && &data[..3] == b"GIF" {
return ImageFormat::Gif;
}
ImageFormat::Unknown
}
/// Collect and load all images referenced by `
` elements in the DOM.
///
/// Scans the DOM in document order for `
` elements with a `src` attribute,
/// fetches each image via the `ResourceLoader`, detects the format, and decodes.
/// Failed loads produce an `ImageResource` with `image: None` (graceful degradation).
pub fn collect_images(doc: &Document, loader: &mut ResourceLoader, base_url: &Url) -> ImageStore {
let mut store = ImageStore::new();
let mut img_nodes = Vec::new();
collect_img_nodes(doc, doc.root(), &mut img_nodes);
for node in img_nodes {
let src = match doc.get_attribute(node, "src") {
Some(s) if !s.is_empty() => s.to_string(),
_ => {
// No src — record with alt text only.
let alt = doc.get_attribute(node, "alt").unwrap_or("").to_string();
store.insert(
node,
ImageResource {
image: None,
display_width: 0.0,
display_height: 0.0,
alt,
},
);
continue;
}
};
let alt = doc.get_attribute(node, "alt").unwrap_or("").to_string();
let attr_width = parse_dimension_attr(doc.get_attribute(node, "width"));
let attr_height = parse_dimension_attr(doc.get_attribute(node, "height"));
match fetch_and_decode(loader, &src, base_url) {
Ok(image) => {
let intrinsic_w = image.width as f32;
let intrinsic_h = image.height as f32;
let (dw, dh) =
resolve_dimensions(attr_width, attr_height, intrinsic_w, intrinsic_h);
store.insert(
node,
ImageResource {
image: Some(image),
display_width: dw,
display_height: dh,
alt,
},
);
}
Err(_) => {
// Graceful degradation: store alt text, no image.
let (dw, dh) = match (attr_width, attr_height) {
(Some(w), Some(h)) => (w, h),
(Some(w), None) => (w, 0.0),
(None, Some(h)) => (0.0, h),
(None, None) => (0.0, 0.0),
};
store.insert(
node,
ImageResource {
image: None,
display_width: dw,
display_height: dh,
alt,
},
);
}
}
}
store
}
/// Walk the DOM in document order and collect `
` element nodes.
fn collect_img_nodes(doc: &Document, node: NodeId, result: &mut Vec) {
if let NodeData::Element { tag_name, .. } = doc.node_data(node) {
if tag_name.eq_ignore_ascii_case("img") {
result.push(node);
}
}
for child in doc.children(node) {
collect_img_nodes(doc, child, result);
}
}
/// Parse a dimension attribute value (e.g., `width="200"`) to f32.
fn parse_dimension_attr(value: Option<&str>) -> Option {
value.and_then(|v| {
let v = v.trim();
// Strip trailing "px" if present.
let v = v.strip_suffix("px").unwrap_or(v);
v.parse::().ok().filter(|&n| n > 0.0)
})
}
/// Resolve display dimensions from attribute values and intrinsic image size.
///
/// If both attributes are set, use them directly.
/// If only one is set, scale the other proportionally.
/// If neither is set, use intrinsic dimensions.
fn resolve_dimensions(
attr_w: Option,
attr_h: Option,
intrinsic_w: f32,
intrinsic_h: f32,
) -> (f32, f32) {
match (attr_w, attr_h) {
(Some(w), Some(h)) => (w, h),
(Some(w), None) => {
if intrinsic_w > 0.0 {
(w, w * intrinsic_h / intrinsic_w)
} else {
(w, intrinsic_h)
}
}
(None, Some(h)) => {
if intrinsic_h > 0.0 {
(h * intrinsic_w / intrinsic_h, h)
} else {
(intrinsic_w, h)
}
}
(None, None) => (intrinsic_w, intrinsic_h),
}
}
/// Fetch image data from a URL and decode it.
fn fetch_and_decode(
loader: &mut ResourceLoader,
src: &str,
base_url: &Url,
) -> Result {
let resource = loader.fetch_url(src, Some(base_url))?;
let (data, url_str) = match resource {
Resource::Image { data, url, .. } => (data, url.to_string()),
Resource::Other { data, url, .. } => (data, url.to_string()),
Resource::Html { text, .. } => (text.into_bytes(), src.to_string()),
Resource::Css { text, .. } => (text.into_bytes(), src.to_string()),
};
decode_image_data(&data, &url_str)
}
/// Decode raw bytes into an Image, detecting format from magic bytes.
fn decode_image_data(data: &[u8], url: &str) -> Result {
match detect_format(data) {
ImageFormat::Png => Ok(decode_png(data)?),
ImageFormat::Jpeg => Ok(decode_jpeg(data)?),
ImageFormat::Gif => Ok(decode_gif(data)?),
ImageFormat::Unknown => Err(ImgLoadError::UnknownFormat {
url: url.to_string(),
}),
}
}
// ---------------------------------------------------------------------------
// Tests
// ---------------------------------------------------------------------------
#[cfg(test)]
mod tests {
use super::*;
// -----------------------------------------------------------------------
// detect_format
// -----------------------------------------------------------------------
#[test]
fn detect_png() {
let data = [0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A, 0x00];
assert_eq!(detect_format(&data), ImageFormat::Png);
}
#[test]
fn detect_jpeg() {
let data = [0xFF, 0xD8, 0xFF, 0xE0];
assert_eq!(detect_format(&data), ImageFormat::Jpeg);
}
#[test]
fn detect_gif87a() {
assert_eq!(detect_format(b"GIF87a..."), ImageFormat::Gif);
}
#[test]
fn detect_gif89a() {
assert_eq!(detect_format(b"GIF89a..."), ImageFormat::Gif);
}
#[test]
fn detect_unknown_empty() {
assert_eq!(detect_format(&[]), ImageFormat::Unknown);
}
#[test]
fn detect_unknown_random() {
assert_eq!(detect_format(&[0x00, 0x01, 0x02]), ImageFormat::Unknown);
}
#[test]
fn detect_short_data() {
assert_eq!(detect_format(&[0xFF]), ImageFormat::Unknown);
}
// -----------------------------------------------------------------------
// parse_dimension_attr
// -----------------------------------------------------------------------
#[test]
fn parse_dimension_integer() {
assert_eq!(parse_dimension_attr(Some("200")), Some(200.0));
}
#[test]
fn parse_dimension_with_px_suffix() {
assert_eq!(parse_dimension_attr(Some("100px")), Some(100.0));
}
#[test]
fn parse_dimension_float() {
assert_eq!(parse_dimension_attr(Some("50.5")), Some(50.5));
}
#[test]
fn parse_dimension_whitespace() {
assert_eq!(parse_dimension_attr(Some(" 300 ")), Some(300.0));
}
#[test]
fn parse_dimension_zero() {
assert_eq!(parse_dimension_attr(Some("0")), None);
}
#[test]
fn parse_dimension_negative() {
assert_eq!(parse_dimension_attr(Some("-10")), None);
}
#[test]
fn parse_dimension_invalid() {
assert_eq!(parse_dimension_attr(Some("abc")), None);
}
#[test]
fn parse_dimension_none() {
assert_eq!(parse_dimension_attr(None), None);
}
#[test]
fn parse_dimension_empty() {
assert_eq!(parse_dimension_attr(Some("")), None);
}
// -----------------------------------------------------------------------
// resolve_dimensions
// -----------------------------------------------------------------------
#[test]
fn resolve_both_attrs() {
let (w, h) = resolve_dimensions(Some(400.0), Some(300.0), 800.0, 600.0);
assert_eq!(w, 400.0);
assert_eq!(h, 300.0);
}
#[test]
fn resolve_width_only_proportional() {
let (w, h) = resolve_dimensions(Some(400.0), None, 800.0, 600.0);
assert_eq!(w, 400.0);
assert_eq!(h, 300.0); // 400 * 600/800
}
#[test]
fn resolve_height_only_proportional() {
let (w, h) = resolve_dimensions(None, Some(300.0), 800.0, 600.0);
assert_eq!(w, 400.0); // 300 * 800/600
assert_eq!(h, 300.0);
}
#[test]
fn resolve_neither_uses_intrinsic() {
let (w, h) = resolve_dimensions(None, None, 1024.0, 768.0);
assert_eq!(w, 1024.0);
assert_eq!(h, 768.0);
}
#[test]
fn resolve_width_only_zero_intrinsic() {
let (w, h) = resolve_dimensions(Some(400.0), None, 0.0, 600.0);
assert_eq!(w, 400.0);
assert_eq!(h, 600.0); // can't scale, use intrinsic height
}
#[test]
fn resolve_height_only_zero_intrinsic() {
let (w, h) = resolve_dimensions(None, Some(300.0), 800.0, 0.0);
assert_eq!(w, 800.0); // can't scale, use intrinsic width
assert_eq!(h, 300.0);
}
// -----------------------------------------------------------------------
// collect_img_nodes
// -----------------------------------------------------------------------
#[test]
fn collects_img_elements() {
let mut doc = Document::new();
let root = doc.root();
let html = doc.create_element("html");
doc.append_child(root, html);
let body = doc.create_element("body");
doc.append_child(html, body);
let img = doc.create_element("img");
doc.set_attribute(img, "src", "photo.png");
doc.append_child(body, img);
let mut nodes = Vec::new();
collect_img_nodes(&doc, doc.root(), &mut nodes);
assert_eq!(nodes.len(), 1);
assert_eq!(doc.tag_name(nodes[0]), Some("img"));
}
#[test]
fn collects_multiple_imgs() {
let mut doc = Document::new();
let root = doc.root();
let body = doc.create_element("body");
doc.append_child(root, body);
let img1 = doc.create_element("img");
doc.set_attribute(img1, "src", "a.png");
doc.append_child(body, img1);
let img2 = doc.create_element("img");
doc.set_attribute(img2, "src", "b.jpg");
doc.append_child(body, img2);
let mut nodes = Vec::new();
collect_img_nodes(&doc, doc.root(), &mut nodes);
assert_eq!(nodes.len(), 2);
}
#[test]
fn ignores_non_img_elements() {
let mut doc = Document::new();
let root = doc.root();
let p = doc.create_element("p");
doc.append_child(root, p);
let div = doc.create_element("div");
doc.append_child(root, div);
let mut nodes = Vec::new();
collect_img_nodes(&doc, doc.root(), &mut nodes);
assert!(nodes.is_empty());
}
// -----------------------------------------------------------------------
// collect_images — integration
// -----------------------------------------------------------------------
#[test]
fn collect_images_no_src() {
let mut doc = Document::new();
let root = doc.root();
let img = doc.create_element("img");
doc.set_attribute(img, "alt", "missing");
doc.append_child(root, img);
let mut loader = ResourceLoader::new();
let base = Url::parse("http://example.com/").unwrap();
let store = collect_images(&doc, &mut loader, &base);
assert_eq!(store.len(), 1);
let res = store.get(&img).unwrap();
assert!(res.image.is_none());
assert_eq!(res.alt, "missing");
}
#[test]
fn collect_images_empty_src() {
let mut doc = Document::new();
let root = doc.root();
let img = doc.create_element("img");
doc.set_attribute(img, "src", "");
doc.set_attribute(img, "alt", "empty src");
doc.append_child(root, img);
let mut loader = ResourceLoader::new();
let base = Url::parse("http://example.com/").unwrap();
let store = collect_images(&doc, &mut loader, &base);
let res = store.get(&img).unwrap();
assert!(res.image.is_none());
assert_eq!(res.alt, "empty src");
}
#[test]
fn collect_images_failed_fetch_graceful() {
let mut doc = Document::new();
let root = doc.root();
let img = doc.create_element("img");
doc.set_attribute(img, "src", "http://nonexistent.test/photo.png");
doc.set_attribute(img, "alt", "Photo");
doc.set_attribute(img, "width", "200");
doc.set_attribute(img, "height", "150");
doc.append_child(root, img);
let mut loader = ResourceLoader::new();
let base = Url::parse("http://example.com/").unwrap();
let store = collect_images(&doc, &mut loader, &base);
let res = store.get(&img).unwrap();
assert!(res.image.is_none());
assert_eq!(res.alt, "Photo");
assert_eq!(res.display_width, 200.0);
assert_eq!(res.display_height, 150.0);
}
#[test]
fn collect_images_no_alt() {
let mut doc = Document::new();
let root = doc.root();
let img = doc.create_element("img");
doc.set_attribute(img, "src", "http://nonexistent.test/x.png");
doc.append_child(root, img);
let mut loader = ResourceLoader::new();
let base = Url::parse("http://example.com/").unwrap();
let store = collect_images(&doc, &mut loader, &base);
let res = store.get(&img).unwrap();
assert_eq!(res.alt, "");
}
// -----------------------------------------------------------------------
// ImgLoadError display
// -----------------------------------------------------------------------
#[test]
fn error_display_unknown_format() {
let e = ImgLoadError::UnknownFormat {
url: "test.bin".to_string(),
};
assert_eq!(e.to_string(), "unknown image format at test.bin");
}
#[test]
fn error_display_load() {
let e = ImgLoadError::Load(LoadError::InvalidUrl("bad".to_string()));
assert!(e.to_string().contains("image load error"));
}
#[test]
fn error_display_decode() {
let e = ImgLoadError::Decode(ImageError::Decode("corrupt".to_string()));
assert!(e.to_string().contains("image decode error"));
}
// -----------------------------------------------------------------------
// decode_image_data — unit tests with minimal valid images
// -----------------------------------------------------------------------
#[test]
fn decode_unknown_format_error() {
let result = decode_image_data(&[0x00, 0x01, 0x02], "mystery.bin");
assert!(result.is_err());
assert!(matches!(result, Err(ImgLoadError::UnknownFormat { .. })));
}
#[test]
fn decode_truncated_png_error() {
// Valid PNG header but no actual image data.
let data = [0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A];
let result = decode_image_data(&data, "broken.png");
assert!(result.is_err());
}
#[test]
fn decode_truncated_jpeg_error() {
let data = [0xFF, 0xD8, 0xFF, 0xE0];
let result = decode_image_data(&data, "broken.jpg");
assert!(result.is_err());
}
#[test]
fn decode_truncated_gif_error() {
let result = decode_image_data(b"GIF89a", "broken.gif");
assert!(result.is_err());
}
// -----------------------------------------------------------------------
// decode_image_data — valid minimal images
// -----------------------------------------------------------------------
#[test]
fn decode_valid_png() {
// Minimal 1x1 red PNG (RGB, bit depth 8).
let data: &[u8] = &[
0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A, // PNG signature
0x00, 0x00, 0x00, 0x0D, // IHDR length
0x49, 0x48, 0x44, 0x52, // IHDR
0x00, 0x00, 0x00, 0x01, // width: 1
0x00, 0x00, 0x00, 0x01, // height: 1
0x08, 0x02, // bit depth: 8, color type: RGB
0x00, 0x00, 0x00, // compression, filter, interlace
0x90, 0x77, 0x53, 0xDE, // CRC
0x00, 0x00, 0x00, 0x0C, // IDAT length
0x49, 0x44, 0x41, 0x54, // IDAT
0x78, 0x9C, 0x63, 0xF8, 0xCF, 0xC0, 0x00, 0x00, // zlib data
0x03, 0x01, 0x01, 0x00, // Adler32
0xC9, 0xFE, 0x92, 0xEF, // CRC
0x00, 0x00, 0x00, 0x00, // IEND length
0x49, 0x45, 0x4E, 0x44, // IEND
0xAE, 0x42, 0x60, 0x82, // CRC
];
let result = decode_image_data(data, "red.png");
assert!(result.is_ok(), "PNG decode failed: {:?}", result.err());
let img = result.unwrap();
assert_eq!(img.width, 1);
assert_eq!(img.height, 1);
assert_eq!(img.data.len(), 4); // 1x1 RGBA8
}
}