we (web engine): Experimental web browser project to understand the limits of Claude
1//! Image resource loading: fetch and decode images referenced by `<img>` elements.
2//!
3//! After HTML parsing, this module scans the DOM for `<img src="...">` elements,
4//! fetches image data via the `ResourceLoader`, detects the image format from
5//! magic bytes, and decodes using the appropriate `image` crate decoder.
6
7use std::collections::HashMap;
8
9use we_dom::{Document, NodeData, NodeId};
10use we_image::gif::decode_gif;
11use we_image::jpeg::decode_jpeg;
12use we_image::pixel::{Image, ImageError};
13use we_image::png::decode_png;
14use we_url::Url;
15
16use crate::loader::{LoadError, Resource, ResourceLoader};
17
18/// Detected image format from magic bytes.
19#[derive(Debug, Clone, Copy, PartialEq, Eq)]
20pub enum ImageFormat {
21 Png,
22 Jpeg,
23 Gif,
24 Unknown,
25}
26
27/// Errors that can occur during image loading.
28#[derive(Debug)]
29pub enum ImgLoadError {
30 /// A resource failed to load.
31 Load(LoadError),
32 /// The fetched resource could not be decoded as an image.
33 Decode(ImageError),
34 /// Unknown or unsupported image format.
35 UnknownFormat { url: String },
36}
37
38impl std::fmt::Display for ImgLoadError {
39 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
40 match self {
41 Self::Load(e) => write!(f, "image load error: {e}"),
42 Self::Decode(e) => write!(f, "image decode error: {e}"),
43 Self::UnknownFormat { url } => write!(f, "unknown image format at {url}"),
44 }
45 }
46}
47
48impl From<LoadError> for ImgLoadError {
49 fn from(e: LoadError) -> Self {
50 Self::Load(e)
51 }
52}
53
54impl From<ImageError> for ImgLoadError {
55 fn from(e: ImageError) -> Self {
56 Self::Decode(e)
57 }
58}
59
60/// A successfully or unsuccessfully loaded image resource.
61pub struct ImageResource {
62 /// The decoded RGBA8 image, if loading and decoding succeeded.
63 pub image: Option<Image>,
64 /// Display width in CSS pixels (from `width` attribute or intrinsic).
65 pub display_width: f32,
66 /// Display height in CSS pixels (from `height` attribute or intrinsic).
67 pub display_height: f32,
68 /// Alt text from the `alt` attribute.
69 pub alt: String,
70}
71
72/// Map from DOM node IDs to their loaded image resources.
73pub type ImageStore = HashMap<NodeId, ImageResource>;
74
75/// Detect image format from the first bytes of data.
76pub fn detect_format(data: &[u8]) -> ImageFormat {
77 // PNG: 89 50 4E 47 0D 0A 1A 0A
78 if data.len() >= 8 && data[..8] == [0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A] {
79 return ImageFormat::Png;
80 }
81 // JPEG: FF D8
82 if data.len() >= 2 && data[0] == 0xFF && data[1] == 0xD8 {
83 return ImageFormat::Jpeg;
84 }
85 // GIF: GIF87a or GIF89a
86 if data.len() >= 6 && &data[..3] == b"GIF" {
87 return ImageFormat::Gif;
88 }
89 ImageFormat::Unknown
90}
91
92/// Collect and load all images referenced by `<img>` elements in the DOM.
93///
94/// Scans the DOM in document order for `<img>` elements with a `src` attribute,
95/// fetches each image via the `ResourceLoader`, detects the format, and decodes.
96/// Failed loads produce an `ImageResource` with `image: None` (graceful degradation).
97pub fn collect_images(doc: &Document, loader: &mut ResourceLoader, base_url: &Url) -> ImageStore {
98 let mut store = ImageStore::new();
99 let mut img_nodes = Vec::new();
100 collect_img_nodes(doc, doc.root(), &mut img_nodes);
101
102 for node in img_nodes {
103 let src = match doc.get_attribute(node, "src") {
104 Some(s) if !s.is_empty() => s.to_string(),
105 _ => {
106 // No src — record with alt text only.
107 let alt = doc.get_attribute(node, "alt").unwrap_or("").to_string();
108 store.insert(
109 node,
110 ImageResource {
111 image: None,
112 display_width: 0.0,
113 display_height: 0.0,
114 alt,
115 },
116 );
117 continue;
118 }
119 };
120
121 let alt = doc.get_attribute(node, "alt").unwrap_or("").to_string();
122 let attr_width = parse_dimension_attr(doc.get_attribute(node, "width"));
123 let attr_height = parse_dimension_attr(doc.get_attribute(node, "height"));
124
125 match fetch_and_decode(loader, &src, base_url) {
126 Ok(image) => {
127 let intrinsic_w = image.width as f32;
128 let intrinsic_h = image.height as f32;
129 let (dw, dh) =
130 resolve_dimensions(attr_width, attr_height, intrinsic_w, intrinsic_h);
131 store.insert(
132 node,
133 ImageResource {
134 image: Some(image),
135 display_width: dw,
136 display_height: dh,
137 alt,
138 },
139 );
140 }
141 Err(_) => {
142 // Graceful degradation: store alt text, no image.
143 let (dw, dh) = match (attr_width, attr_height) {
144 (Some(w), Some(h)) => (w, h),
145 (Some(w), None) => (w, 0.0),
146 (None, Some(h)) => (0.0, h),
147 (None, None) => (0.0, 0.0),
148 };
149 store.insert(
150 node,
151 ImageResource {
152 image: None,
153 display_width: dw,
154 display_height: dh,
155 alt,
156 },
157 );
158 }
159 }
160 }
161
162 store
163}
164
165/// Walk the DOM in document order and collect `<img>` element nodes.
166fn collect_img_nodes(doc: &Document, node: NodeId, result: &mut Vec<NodeId>) {
167 if let NodeData::Element { tag_name, .. } = doc.node_data(node) {
168 if tag_name.eq_ignore_ascii_case("img") {
169 result.push(node);
170 }
171 }
172 for child in doc.children(node) {
173 collect_img_nodes(doc, child, result);
174 }
175}
176
177/// Parse a dimension attribute value (e.g., `width="200"`) to f32.
178fn parse_dimension_attr(value: Option<&str>) -> Option<f32> {
179 value.and_then(|v| {
180 let v = v.trim();
181 // Strip trailing "px" if present.
182 let v = v.strip_suffix("px").unwrap_or(v);
183 v.parse::<f32>().ok().filter(|&n| n > 0.0)
184 })
185}
186
187/// Resolve display dimensions from attribute values and intrinsic image size.
188///
189/// If both attributes are set, use them directly.
190/// If only one is set, scale the other proportionally.
191/// If neither is set, use intrinsic dimensions.
192fn resolve_dimensions(
193 attr_w: Option<f32>,
194 attr_h: Option<f32>,
195 intrinsic_w: f32,
196 intrinsic_h: f32,
197) -> (f32, f32) {
198 match (attr_w, attr_h) {
199 (Some(w), Some(h)) => (w, h),
200 (Some(w), None) => {
201 if intrinsic_w > 0.0 {
202 (w, w * intrinsic_h / intrinsic_w)
203 } else {
204 (w, intrinsic_h)
205 }
206 }
207 (None, Some(h)) => {
208 if intrinsic_h > 0.0 {
209 (h * intrinsic_w / intrinsic_h, h)
210 } else {
211 (intrinsic_w, h)
212 }
213 }
214 (None, None) => (intrinsic_w, intrinsic_h),
215 }
216}
217
218/// Fetch image data from a URL and decode it.
219fn fetch_and_decode(
220 loader: &mut ResourceLoader,
221 src: &str,
222 base_url: &Url,
223) -> Result<Image, ImgLoadError> {
224 let resource = loader.fetch_url(src, Some(base_url))?;
225
226 let (data, url_str) = match resource {
227 Resource::Image { data, url, .. } => (data, url.to_string()),
228 Resource::Other { data, url, .. } => (data, url.to_string()),
229 Resource::Html { text, .. } => (text.into_bytes(), src.to_string()),
230 Resource::Css { text, .. } => (text.into_bytes(), src.to_string()),
231 };
232
233 decode_image_data(&data, &url_str)
234}
235
236/// Decode raw bytes into an Image, detecting format from magic bytes.
237fn decode_image_data(data: &[u8], url: &str) -> Result<Image, ImgLoadError> {
238 match detect_format(data) {
239 ImageFormat::Png => Ok(decode_png(data)?),
240 ImageFormat::Jpeg => Ok(decode_jpeg(data)?),
241 ImageFormat::Gif => Ok(decode_gif(data)?),
242 ImageFormat::Unknown => Err(ImgLoadError::UnknownFormat {
243 url: url.to_string(),
244 }),
245 }
246}
247
248// ---------------------------------------------------------------------------
249// Tests
250// ---------------------------------------------------------------------------
251
252#[cfg(test)]
253mod tests {
254 use super::*;
255
256 // -----------------------------------------------------------------------
257 // detect_format
258 // -----------------------------------------------------------------------
259
260 #[test]
261 fn detect_png() {
262 let data = [0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A, 0x00];
263 assert_eq!(detect_format(&data), ImageFormat::Png);
264 }
265
266 #[test]
267 fn detect_jpeg() {
268 let data = [0xFF, 0xD8, 0xFF, 0xE0];
269 assert_eq!(detect_format(&data), ImageFormat::Jpeg);
270 }
271
272 #[test]
273 fn detect_gif87a() {
274 assert_eq!(detect_format(b"GIF87a..."), ImageFormat::Gif);
275 }
276
277 #[test]
278 fn detect_gif89a() {
279 assert_eq!(detect_format(b"GIF89a..."), ImageFormat::Gif);
280 }
281
282 #[test]
283 fn detect_unknown_empty() {
284 assert_eq!(detect_format(&[]), ImageFormat::Unknown);
285 }
286
287 #[test]
288 fn detect_unknown_random() {
289 assert_eq!(detect_format(&[0x00, 0x01, 0x02]), ImageFormat::Unknown);
290 }
291
292 #[test]
293 fn detect_short_data() {
294 assert_eq!(detect_format(&[0xFF]), ImageFormat::Unknown);
295 }
296
297 // -----------------------------------------------------------------------
298 // parse_dimension_attr
299 // -----------------------------------------------------------------------
300
301 #[test]
302 fn parse_dimension_integer() {
303 assert_eq!(parse_dimension_attr(Some("200")), Some(200.0));
304 }
305
306 #[test]
307 fn parse_dimension_with_px_suffix() {
308 assert_eq!(parse_dimension_attr(Some("100px")), Some(100.0));
309 }
310
311 #[test]
312 fn parse_dimension_float() {
313 assert_eq!(parse_dimension_attr(Some("50.5")), Some(50.5));
314 }
315
316 #[test]
317 fn parse_dimension_whitespace() {
318 assert_eq!(parse_dimension_attr(Some(" 300 ")), Some(300.0));
319 }
320
321 #[test]
322 fn parse_dimension_zero() {
323 assert_eq!(parse_dimension_attr(Some("0")), None);
324 }
325
326 #[test]
327 fn parse_dimension_negative() {
328 assert_eq!(parse_dimension_attr(Some("-10")), None);
329 }
330
331 #[test]
332 fn parse_dimension_invalid() {
333 assert_eq!(parse_dimension_attr(Some("abc")), None);
334 }
335
336 #[test]
337 fn parse_dimension_none() {
338 assert_eq!(parse_dimension_attr(None), None);
339 }
340
341 #[test]
342 fn parse_dimension_empty() {
343 assert_eq!(parse_dimension_attr(Some("")), None);
344 }
345
346 // -----------------------------------------------------------------------
347 // resolve_dimensions
348 // -----------------------------------------------------------------------
349
350 #[test]
351 fn resolve_both_attrs() {
352 let (w, h) = resolve_dimensions(Some(400.0), Some(300.0), 800.0, 600.0);
353 assert_eq!(w, 400.0);
354 assert_eq!(h, 300.0);
355 }
356
357 #[test]
358 fn resolve_width_only_proportional() {
359 let (w, h) = resolve_dimensions(Some(400.0), None, 800.0, 600.0);
360 assert_eq!(w, 400.0);
361 assert_eq!(h, 300.0); // 400 * 600/800
362 }
363
364 #[test]
365 fn resolve_height_only_proportional() {
366 let (w, h) = resolve_dimensions(None, Some(300.0), 800.0, 600.0);
367 assert_eq!(w, 400.0); // 300 * 800/600
368 assert_eq!(h, 300.0);
369 }
370
371 #[test]
372 fn resolve_neither_uses_intrinsic() {
373 let (w, h) = resolve_dimensions(None, None, 1024.0, 768.0);
374 assert_eq!(w, 1024.0);
375 assert_eq!(h, 768.0);
376 }
377
378 #[test]
379 fn resolve_width_only_zero_intrinsic() {
380 let (w, h) = resolve_dimensions(Some(400.0), None, 0.0, 600.0);
381 assert_eq!(w, 400.0);
382 assert_eq!(h, 600.0); // can't scale, use intrinsic height
383 }
384
385 #[test]
386 fn resolve_height_only_zero_intrinsic() {
387 let (w, h) = resolve_dimensions(None, Some(300.0), 800.0, 0.0);
388 assert_eq!(w, 800.0); // can't scale, use intrinsic width
389 assert_eq!(h, 300.0);
390 }
391
392 // -----------------------------------------------------------------------
393 // collect_img_nodes
394 // -----------------------------------------------------------------------
395
396 #[test]
397 fn collects_img_elements() {
398 let mut doc = Document::new();
399 let root = doc.root();
400
401 let html = doc.create_element("html");
402 doc.append_child(root, html);
403
404 let body = doc.create_element("body");
405 doc.append_child(html, body);
406
407 let img = doc.create_element("img");
408 doc.set_attribute(img, "src", "photo.png");
409 doc.append_child(body, img);
410
411 let mut nodes = Vec::new();
412 collect_img_nodes(&doc, doc.root(), &mut nodes);
413 assert_eq!(nodes.len(), 1);
414 assert_eq!(doc.tag_name(nodes[0]), Some("img"));
415 }
416
417 #[test]
418 fn collects_multiple_imgs() {
419 let mut doc = Document::new();
420 let root = doc.root();
421
422 let body = doc.create_element("body");
423 doc.append_child(root, body);
424
425 let img1 = doc.create_element("img");
426 doc.set_attribute(img1, "src", "a.png");
427 doc.append_child(body, img1);
428
429 let img2 = doc.create_element("img");
430 doc.set_attribute(img2, "src", "b.jpg");
431 doc.append_child(body, img2);
432
433 let mut nodes = Vec::new();
434 collect_img_nodes(&doc, doc.root(), &mut nodes);
435 assert_eq!(nodes.len(), 2);
436 }
437
438 #[test]
439 fn ignores_non_img_elements() {
440 let mut doc = Document::new();
441 let root = doc.root();
442
443 let p = doc.create_element("p");
444 doc.append_child(root, p);
445
446 let div = doc.create_element("div");
447 doc.append_child(root, div);
448
449 let mut nodes = Vec::new();
450 collect_img_nodes(&doc, doc.root(), &mut nodes);
451 assert!(nodes.is_empty());
452 }
453
454 // -----------------------------------------------------------------------
455 // collect_images — integration
456 // -----------------------------------------------------------------------
457
458 #[test]
459 fn collect_images_no_src() {
460 let mut doc = Document::new();
461 let root = doc.root();
462
463 let img = doc.create_element("img");
464 doc.set_attribute(img, "alt", "missing");
465 doc.append_child(root, img);
466
467 let mut loader = ResourceLoader::new();
468 let base = Url::parse("http://example.com/").unwrap();
469
470 let store = collect_images(&doc, &mut loader, &base);
471 assert_eq!(store.len(), 1);
472 let res = store.get(&img).unwrap();
473 assert!(res.image.is_none());
474 assert_eq!(res.alt, "missing");
475 }
476
477 #[test]
478 fn collect_images_empty_src() {
479 let mut doc = Document::new();
480 let root = doc.root();
481
482 let img = doc.create_element("img");
483 doc.set_attribute(img, "src", "");
484 doc.set_attribute(img, "alt", "empty src");
485 doc.append_child(root, img);
486
487 let mut loader = ResourceLoader::new();
488 let base = Url::parse("http://example.com/").unwrap();
489
490 let store = collect_images(&doc, &mut loader, &base);
491 let res = store.get(&img).unwrap();
492 assert!(res.image.is_none());
493 assert_eq!(res.alt, "empty src");
494 }
495
496 #[test]
497 fn collect_images_failed_fetch_graceful() {
498 let mut doc = Document::new();
499 let root = doc.root();
500
501 let img = doc.create_element("img");
502 doc.set_attribute(img, "src", "http://nonexistent.test/photo.png");
503 doc.set_attribute(img, "alt", "Photo");
504 doc.set_attribute(img, "width", "200");
505 doc.set_attribute(img, "height", "150");
506 doc.append_child(root, img);
507
508 let mut loader = ResourceLoader::new();
509 let base = Url::parse("http://example.com/").unwrap();
510
511 let store = collect_images(&doc, &mut loader, &base);
512 let res = store.get(&img).unwrap();
513 assert!(res.image.is_none());
514 assert_eq!(res.alt, "Photo");
515 assert_eq!(res.display_width, 200.0);
516 assert_eq!(res.display_height, 150.0);
517 }
518
519 #[test]
520 fn collect_images_no_alt() {
521 let mut doc = Document::new();
522 let root = doc.root();
523
524 let img = doc.create_element("img");
525 doc.set_attribute(img, "src", "http://nonexistent.test/x.png");
526 doc.append_child(root, img);
527
528 let mut loader = ResourceLoader::new();
529 let base = Url::parse("http://example.com/").unwrap();
530
531 let store = collect_images(&doc, &mut loader, &base);
532 let res = store.get(&img).unwrap();
533 assert_eq!(res.alt, "");
534 }
535
536 // -----------------------------------------------------------------------
537 // ImgLoadError display
538 // -----------------------------------------------------------------------
539
540 #[test]
541 fn error_display_unknown_format() {
542 let e = ImgLoadError::UnknownFormat {
543 url: "test.bin".to_string(),
544 };
545 assert_eq!(e.to_string(), "unknown image format at test.bin");
546 }
547
548 #[test]
549 fn error_display_load() {
550 let e = ImgLoadError::Load(LoadError::InvalidUrl("bad".to_string()));
551 assert!(e.to_string().contains("image load error"));
552 }
553
554 #[test]
555 fn error_display_decode() {
556 let e = ImgLoadError::Decode(ImageError::Decode("corrupt".to_string()));
557 assert!(e.to_string().contains("image decode error"));
558 }
559
560 // -----------------------------------------------------------------------
561 // decode_image_data — unit tests with minimal valid images
562 // -----------------------------------------------------------------------
563
564 #[test]
565 fn decode_unknown_format_error() {
566 let result = decode_image_data(&[0x00, 0x01, 0x02], "mystery.bin");
567 assert!(result.is_err());
568 assert!(matches!(result, Err(ImgLoadError::UnknownFormat { .. })));
569 }
570
571 #[test]
572 fn decode_truncated_png_error() {
573 // Valid PNG header but no actual image data.
574 let data = [0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A];
575 let result = decode_image_data(&data, "broken.png");
576 assert!(result.is_err());
577 }
578
579 #[test]
580 fn decode_truncated_jpeg_error() {
581 let data = [0xFF, 0xD8, 0xFF, 0xE0];
582 let result = decode_image_data(&data, "broken.jpg");
583 assert!(result.is_err());
584 }
585
586 #[test]
587 fn decode_truncated_gif_error() {
588 let result = decode_image_data(b"GIF89a", "broken.gif");
589 assert!(result.is_err());
590 }
591
592 // -----------------------------------------------------------------------
593 // decode_image_data — valid minimal images
594 // -----------------------------------------------------------------------
595
596 #[test]
597 fn decode_valid_png() {
598 // Minimal 1x1 red PNG (RGB, bit depth 8).
599 let data: &[u8] = &[
600 0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A, // PNG signature
601 0x00, 0x00, 0x00, 0x0D, // IHDR length
602 0x49, 0x48, 0x44, 0x52, // IHDR
603 0x00, 0x00, 0x00, 0x01, // width: 1
604 0x00, 0x00, 0x00, 0x01, // height: 1
605 0x08, 0x02, // bit depth: 8, color type: RGB
606 0x00, 0x00, 0x00, // compression, filter, interlace
607 0x90, 0x77, 0x53, 0xDE, // CRC
608 0x00, 0x00, 0x00, 0x0C, // IDAT length
609 0x49, 0x44, 0x41, 0x54, // IDAT
610 0x78, 0x9C, 0x63, 0xF8, 0xCF, 0xC0, 0x00, 0x00, // zlib data
611 0x03, 0x01, 0x01, 0x00, // Adler32
612 0xC9, 0xFE, 0x92, 0xEF, // CRC
613 0x00, 0x00, 0x00, 0x00, // IEND length
614 0x49, 0x45, 0x4E, 0x44, // IEND
615 0xAE, 0x42, 0x60, 0x82, // CRC
616 ];
617 let result = decode_image_data(data, "red.png");
618 assert!(result.is_ok(), "PNG decode failed: {:?}", result.err());
619 let img = result.unwrap();
620 assert_eq!(img.width, 1);
621 assert_eq!(img.height, 1);
622 assert_eq!(img.data.len(), 4); // 1x1 RGBA8
623 }
624}