we (web engine): Experimental web browser project to understand the limits of Claude
1//! Image resource loading: fetch and decode images referenced by `<img>` elements.
2//!
3//! After HTML parsing, this module scans the DOM for `<img src="...">` elements,
4//! fetches image data via the `ResourceLoader`, detects the image format from
5//! magic bytes, and decodes using the appropriate `image` crate decoder.
6
7use std::collections::HashMap;
8
9use we_dom::{Document, NodeData, NodeId};
10use we_image::gif::decode_gif;
11use we_image::jpeg::decode_jpeg;
12use we_image::pixel::{Image, ImageError};
13use we_image::png::decode_png;
14use we_url::Url;
15
16use crate::loader::{LoadError, Resource, ResourceLoader};
17
18/// Detected image format from magic bytes.
19#[derive(Debug, Clone, Copy, PartialEq, Eq)]
20pub enum ImageFormat {
21 Png,
22 Jpeg,
23 Gif,
24 Unknown,
25}
26
27/// Errors that can occur during image loading.
28#[derive(Debug)]
29pub enum ImgLoadError {
30 /// A resource failed to load.
31 Load(LoadError),
32 /// The fetched resource could not be decoded as an image.
33 Decode(ImageError),
34 /// Unknown or unsupported image format.
35 UnknownFormat { url: String },
36}
37
38impl std::fmt::Display for ImgLoadError {
39 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
40 match self {
41 Self::Load(e) => write!(f, "image load error: {e}"),
42 Self::Decode(e) => write!(f, "image decode error: {e}"),
43 Self::UnknownFormat { url } => write!(f, "unknown image format at {url}"),
44 }
45 }
46}
47
48impl From<LoadError> for ImgLoadError {
49 fn from(e: LoadError) -> Self {
50 Self::Load(e)
51 }
52}
53
54impl From<ImageError> for ImgLoadError {
55 fn from(e: ImageError) -> Self {
56 Self::Decode(e)
57 }
58}
59
60/// A successfully or unsuccessfully loaded image resource.
61pub struct ImageResource {
62 /// The decoded RGBA8 image, if loading and decoding succeeded.
63 pub image: Option<Image>,
64 /// Display width in CSS pixels (from `width` attribute or intrinsic).
65 pub display_width: f32,
66 /// Display height in CSS pixels (from `height` attribute or intrinsic).
67 pub display_height: f32,
68 /// Alt text from the `alt` attribute.
69 pub alt: String,
70}
71
72/// Map from DOM node IDs to their loaded image resources.
73pub type ImageStore = HashMap<NodeId, ImageResource>;
74
75/// Detect image format from the first bytes of data.
76pub fn detect_format(data: &[u8]) -> ImageFormat {
77 // PNG: 89 50 4E 47 0D 0A 1A 0A
78 if data.len() >= 8 && data[..8] == [0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A] {
79 return ImageFormat::Png;
80 }
81 // JPEG: FF D8
82 if data.len() >= 2 && data[0] == 0xFF && data[1] == 0xD8 {
83 return ImageFormat::Jpeg;
84 }
85 // GIF: GIF87a or GIF89a
86 if data.len() >= 6 && &data[..3] == b"GIF" {
87 return ImageFormat::Gif;
88 }
89 ImageFormat::Unknown
90}
91
92/// Collect and load all images referenced by `<img>` elements in the DOM.
93///
94/// Scans the DOM in document order for `<img>` elements with a `src` attribute,
95/// fetches each image via the `ResourceLoader`, detects the format, and decodes.
96/// Failed loads produce an `ImageResource` with `image: None` (graceful degradation).
97pub fn collect_images(doc: &Document, loader: &mut ResourceLoader, base_url: &Url) -> ImageStore {
98 let mut store = ImageStore::new();
99 let mut img_nodes = Vec::new();
100 collect_img_nodes(doc, doc.root(), &mut img_nodes);
101
102 for node in img_nodes {
103 let src = match doc.get_attribute(node, "src") {
104 Some(s) if !s.is_empty() => s.to_string(),
105 _ => {
106 // No src — record with alt text only.
107 let alt = doc.get_attribute(node, "alt").unwrap_or("").to_string();
108 store.insert(
109 node,
110 ImageResource {
111 image: None,
112 display_width: 0.0,
113 display_height: 0.0,
114 alt,
115 },
116 );
117 continue;
118 }
119 };
120
121 let alt = doc.get_attribute(node, "alt").unwrap_or("").to_string();
122 let attr_width = parse_dimension_attr(doc.get_attribute(node, "width"));
123 let attr_height = parse_dimension_attr(doc.get_attribute(node, "height"));
124
125 match fetch_and_decode(loader, &src, base_url) {
126 Ok(image) => {
127 let intrinsic_w = image.width as f32;
128 let intrinsic_h = image.height as f32;
129 let (dw, dh) =
130 resolve_dimensions(attr_width, attr_height, intrinsic_w, intrinsic_h);
131 store.insert(
132 node,
133 ImageResource {
134 image: Some(image),
135 display_width: dw,
136 display_height: dh,
137 alt,
138 },
139 );
140 }
141 Err(_) => {
142 // Graceful degradation: store alt text, no image.
143 let (dw, dh) = match (attr_width, attr_height) {
144 (Some(w), Some(h)) => (w, h),
145 (Some(w), None) => (w, 0.0),
146 (None, Some(h)) => (0.0, h),
147 (None, None) => (0.0, 0.0),
148 };
149 store.insert(
150 node,
151 ImageResource {
152 image: None,
153 display_width: dw,
154 display_height: dh,
155 alt,
156 },
157 );
158 }
159 }
160 }
161
162 store
163}
164
165/// Walk the DOM in document order and collect `<img>` element nodes.
166fn collect_img_nodes(doc: &Document, node: NodeId, result: &mut Vec<NodeId>) {
167 if let NodeData::Element { tag_name, .. } = doc.node_data(node) {
168 if tag_name.eq_ignore_ascii_case("img") {
169 result.push(node);
170 }
171 }
172 for child in doc.children(node) {
173 collect_img_nodes(doc, child, result);
174 }
175}
176
177/// Parse a dimension attribute value (e.g., `width="200"`) to f32.
178fn parse_dimension_attr(value: Option<&str>) -> Option<f32> {
179 value.and_then(|v| {
180 let v = v.trim();
181 // Strip trailing "px" if present.
182 let v = v.strip_suffix("px").unwrap_or(v);
183 v.parse::<f32>().ok().filter(|&n| n > 0.0)
184 })
185}
186
187/// Resolve display dimensions from attribute values and intrinsic image size.
188///
189/// If both attributes are set, use them directly.
190/// If only one is set, scale the other proportionally.
191/// If neither is set, use intrinsic dimensions.
192fn resolve_dimensions(
193 attr_w: Option<f32>,
194 attr_h: Option<f32>,
195 intrinsic_w: f32,
196 intrinsic_h: f32,
197) -> (f32, f32) {
198 match (attr_w, attr_h) {
199 (Some(w), Some(h)) => (w, h),
200 (Some(w), None) => {
201 if intrinsic_w > 0.0 {
202 (w, w * intrinsic_h / intrinsic_w)
203 } else {
204 (w, intrinsic_h)
205 }
206 }
207 (None, Some(h)) => {
208 if intrinsic_h > 0.0 {
209 (h * intrinsic_w / intrinsic_h, h)
210 } else {
211 (intrinsic_w, h)
212 }
213 }
214 (None, None) => (intrinsic_w, intrinsic_h),
215 }
216}
217
218/// Fetch image data from a URL and decode it.
219fn fetch_and_decode(
220 loader: &mut ResourceLoader,
221 src: &str,
222 base_url: &Url,
223) -> Result<Image, ImgLoadError> {
224 let resource = loader.fetch_url(src, Some(base_url))?;
225
226 let (data, url_str) = match resource {
227 Resource::Image { data, url, .. } => (data, url.to_string()),
228 Resource::Other { data, url, .. } => (data, url.to_string()),
229 Resource::Html { text, .. } => (text.into_bytes(), src.to_string()),
230 Resource::Css { text, .. } | Resource::Script { text, .. } => {
231 (text.into_bytes(), src.to_string())
232 }
233 };
234
235 decode_image_data(&data, &url_str)
236}
237
238/// Decode raw bytes into an Image, detecting format from magic bytes.
239fn decode_image_data(data: &[u8], url: &str) -> Result<Image, ImgLoadError> {
240 match detect_format(data) {
241 ImageFormat::Png => Ok(decode_png(data)?),
242 ImageFormat::Jpeg => Ok(decode_jpeg(data)?),
243 ImageFormat::Gif => Ok(decode_gif(data)?),
244 ImageFormat::Unknown => Err(ImgLoadError::UnknownFormat {
245 url: url.to_string(),
246 }),
247 }
248}
249
250// ---------------------------------------------------------------------------
251// Tests
252// ---------------------------------------------------------------------------
253
254#[cfg(test)]
255mod tests {
256 use super::*;
257
258 // -----------------------------------------------------------------------
259 // detect_format
260 // -----------------------------------------------------------------------
261
262 #[test]
263 fn detect_png() {
264 let data = [0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A, 0x00];
265 assert_eq!(detect_format(&data), ImageFormat::Png);
266 }
267
268 #[test]
269 fn detect_jpeg() {
270 let data = [0xFF, 0xD8, 0xFF, 0xE0];
271 assert_eq!(detect_format(&data), ImageFormat::Jpeg);
272 }
273
274 #[test]
275 fn detect_gif87a() {
276 assert_eq!(detect_format(b"GIF87a..."), ImageFormat::Gif);
277 }
278
279 #[test]
280 fn detect_gif89a() {
281 assert_eq!(detect_format(b"GIF89a..."), ImageFormat::Gif);
282 }
283
284 #[test]
285 fn detect_unknown_empty() {
286 assert_eq!(detect_format(&[]), ImageFormat::Unknown);
287 }
288
289 #[test]
290 fn detect_unknown_random() {
291 assert_eq!(detect_format(&[0x00, 0x01, 0x02]), ImageFormat::Unknown);
292 }
293
294 #[test]
295 fn detect_short_data() {
296 assert_eq!(detect_format(&[0xFF]), ImageFormat::Unknown);
297 }
298
299 // -----------------------------------------------------------------------
300 // parse_dimension_attr
301 // -----------------------------------------------------------------------
302
303 #[test]
304 fn parse_dimension_integer() {
305 assert_eq!(parse_dimension_attr(Some("200")), Some(200.0));
306 }
307
308 #[test]
309 fn parse_dimension_with_px_suffix() {
310 assert_eq!(parse_dimension_attr(Some("100px")), Some(100.0));
311 }
312
313 #[test]
314 fn parse_dimension_float() {
315 assert_eq!(parse_dimension_attr(Some("50.5")), Some(50.5));
316 }
317
318 #[test]
319 fn parse_dimension_whitespace() {
320 assert_eq!(parse_dimension_attr(Some(" 300 ")), Some(300.0));
321 }
322
323 #[test]
324 fn parse_dimension_zero() {
325 assert_eq!(parse_dimension_attr(Some("0")), None);
326 }
327
328 #[test]
329 fn parse_dimension_negative() {
330 assert_eq!(parse_dimension_attr(Some("-10")), None);
331 }
332
333 #[test]
334 fn parse_dimension_invalid() {
335 assert_eq!(parse_dimension_attr(Some("abc")), None);
336 }
337
338 #[test]
339 fn parse_dimension_none() {
340 assert_eq!(parse_dimension_attr(None), None);
341 }
342
343 #[test]
344 fn parse_dimension_empty() {
345 assert_eq!(parse_dimension_attr(Some("")), None);
346 }
347
348 // -----------------------------------------------------------------------
349 // resolve_dimensions
350 // -----------------------------------------------------------------------
351
352 #[test]
353 fn resolve_both_attrs() {
354 let (w, h) = resolve_dimensions(Some(400.0), Some(300.0), 800.0, 600.0);
355 assert_eq!(w, 400.0);
356 assert_eq!(h, 300.0);
357 }
358
359 #[test]
360 fn resolve_width_only_proportional() {
361 let (w, h) = resolve_dimensions(Some(400.0), None, 800.0, 600.0);
362 assert_eq!(w, 400.0);
363 assert_eq!(h, 300.0); // 400 * 600/800
364 }
365
366 #[test]
367 fn resolve_height_only_proportional() {
368 let (w, h) = resolve_dimensions(None, Some(300.0), 800.0, 600.0);
369 assert_eq!(w, 400.0); // 300 * 800/600
370 assert_eq!(h, 300.0);
371 }
372
373 #[test]
374 fn resolve_neither_uses_intrinsic() {
375 let (w, h) = resolve_dimensions(None, None, 1024.0, 768.0);
376 assert_eq!(w, 1024.0);
377 assert_eq!(h, 768.0);
378 }
379
380 #[test]
381 fn resolve_width_only_zero_intrinsic() {
382 let (w, h) = resolve_dimensions(Some(400.0), None, 0.0, 600.0);
383 assert_eq!(w, 400.0);
384 assert_eq!(h, 600.0); // can't scale, use intrinsic height
385 }
386
387 #[test]
388 fn resolve_height_only_zero_intrinsic() {
389 let (w, h) = resolve_dimensions(None, Some(300.0), 800.0, 0.0);
390 assert_eq!(w, 800.0); // can't scale, use intrinsic width
391 assert_eq!(h, 300.0);
392 }
393
394 // -----------------------------------------------------------------------
395 // collect_img_nodes
396 // -----------------------------------------------------------------------
397
398 #[test]
399 fn collects_img_elements() {
400 let mut doc = Document::new();
401 let root = doc.root();
402
403 let html = doc.create_element("html");
404 doc.append_child(root, html);
405
406 let body = doc.create_element("body");
407 doc.append_child(html, body);
408
409 let img = doc.create_element("img");
410 doc.set_attribute(img, "src", "photo.png");
411 doc.append_child(body, img);
412
413 let mut nodes = Vec::new();
414 collect_img_nodes(&doc, doc.root(), &mut nodes);
415 assert_eq!(nodes.len(), 1);
416 assert_eq!(doc.tag_name(nodes[0]), Some("img"));
417 }
418
419 #[test]
420 fn collects_multiple_imgs() {
421 let mut doc = Document::new();
422 let root = doc.root();
423
424 let body = doc.create_element("body");
425 doc.append_child(root, body);
426
427 let img1 = doc.create_element("img");
428 doc.set_attribute(img1, "src", "a.png");
429 doc.append_child(body, img1);
430
431 let img2 = doc.create_element("img");
432 doc.set_attribute(img2, "src", "b.jpg");
433 doc.append_child(body, img2);
434
435 let mut nodes = Vec::new();
436 collect_img_nodes(&doc, doc.root(), &mut nodes);
437 assert_eq!(nodes.len(), 2);
438 }
439
440 #[test]
441 fn ignores_non_img_elements() {
442 let mut doc = Document::new();
443 let root = doc.root();
444
445 let p = doc.create_element("p");
446 doc.append_child(root, p);
447
448 let div = doc.create_element("div");
449 doc.append_child(root, div);
450
451 let mut nodes = Vec::new();
452 collect_img_nodes(&doc, doc.root(), &mut nodes);
453 assert!(nodes.is_empty());
454 }
455
456 // -----------------------------------------------------------------------
457 // collect_images — integration
458 // -----------------------------------------------------------------------
459
460 #[test]
461 fn collect_images_no_src() {
462 let mut doc = Document::new();
463 let root = doc.root();
464
465 let img = doc.create_element("img");
466 doc.set_attribute(img, "alt", "missing");
467 doc.append_child(root, img);
468
469 let mut loader = ResourceLoader::new();
470 let base = Url::parse("http://example.com/").unwrap();
471
472 let store = collect_images(&doc, &mut loader, &base);
473 assert_eq!(store.len(), 1);
474 let res = store.get(&img).unwrap();
475 assert!(res.image.is_none());
476 assert_eq!(res.alt, "missing");
477 }
478
479 #[test]
480 fn collect_images_empty_src() {
481 let mut doc = Document::new();
482 let root = doc.root();
483
484 let img = doc.create_element("img");
485 doc.set_attribute(img, "src", "");
486 doc.set_attribute(img, "alt", "empty src");
487 doc.append_child(root, img);
488
489 let mut loader = ResourceLoader::new();
490 let base = Url::parse("http://example.com/").unwrap();
491
492 let store = collect_images(&doc, &mut loader, &base);
493 let res = store.get(&img).unwrap();
494 assert!(res.image.is_none());
495 assert_eq!(res.alt, "empty src");
496 }
497
498 #[test]
499 fn collect_images_failed_fetch_graceful() {
500 let mut doc = Document::new();
501 let root = doc.root();
502
503 let img = doc.create_element("img");
504 doc.set_attribute(img, "src", "http://nonexistent.test/photo.png");
505 doc.set_attribute(img, "alt", "Photo");
506 doc.set_attribute(img, "width", "200");
507 doc.set_attribute(img, "height", "150");
508 doc.append_child(root, img);
509
510 let mut loader = ResourceLoader::new();
511 let base = Url::parse("http://example.com/").unwrap();
512
513 let store = collect_images(&doc, &mut loader, &base);
514 let res = store.get(&img).unwrap();
515 assert!(res.image.is_none());
516 assert_eq!(res.alt, "Photo");
517 assert_eq!(res.display_width, 200.0);
518 assert_eq!(res.display_height, 150.0);
519 }
520
521 #[test]
522 fn collect_images_no_alt() {
523 let mut doc = Document::new();
524 let root = doc.root();
525
526 let img = doc.create_element("img");
527 doc.set_attribute(img, "src", "http://nonexistent.test/x.png");
528 doc.append_child(root, img);
529
530 let mut loader = ResourceLoader::new();
531 let base = Url::parse("http://example.com/").unwrap();
532
533 let store = collect_images(&doc, &mut loader, &base);
534 let res = store.get(&img).unwrap();
535 assert_eq!(res.alt, "");
536 }
537
538 // -----------------------------------------------------------------------
539 // ImgLoadError display
540 // -----------------------------------------------------------------------
541
542 #[test]
543 fn error_display_unknown_format() {
544 let e = ImgLoadError::UnknownFormat {
545 url: "test.bin".to_string(),
546 };
547 assert_eq!(e.to_string(), "unknown image format at test.bin");
548 }
549
550 #[test]
551 fn error_display_load() {
552 let e = ImgLoadError::Load(LoadError::InvalidUrl("bad".to_string()));
553 assert!(e.to_string().contains("image load error"));
554 }
555
556 #[test]
557 fn error_display_decode() {
558 let e = ImgLoadError::Decode(ImageError::Decode("corrupt".to_string()));
559 assert!(e.to_string().contains("image decode error"));
560 }
561
562 // -----------------------------------------------------------------------
563 // decode_image_data — unit tests with minimal valid images
564 // -----------------------------------------------------------------------
565
566 #[test]
567 fn decode_unknown_format_error() {
568 let result = decode_image_data(&[0x00, 0x01, 0x02], "mystery.bin");
569 assert!(result.is_err());
570 assert!(matches!(result, Err(ImgLoadError::UnknownFormat { .. })));
571 }
572
573 #[test]
574 fn decode_truncated_png_error() {
575 // Valid PNG header but no actual image data.
576 let data = [0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A];
577 let result = decode_image_data(&data, "broken.png");
578 assert!(result.is_err());
579 }
580
581 #[test]
582 fn decode_truncated_jpeg_error() {
583 let data = [0xFF, 0xD8, 0xFF, 0xE0];
584 let result = decode_image_data(&data, "broken.jpg");
585 assert!(result.is_err());
586 }
587
588 #[test]
589 fn decode_truncated_gif_error() {
590 let result = decode_image_data(b"GIF89a", "broken.gif");
591 assert!(result.is_err());
592 }
593
594 // -----------------------------------------------------------------------
595 // decode_image_data — valid minimal images
596 // -----------------------------------------------------------------------
597
598 #[test]
599 fn decode_valid_png() {
600 // Minimal 1x1 red PNG (RGB, bit depth 8).
601 let data: &[u8] = &[
602 0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A, // PNG signature
603 0x00, 0x00, 0x00, 0x0D, // IHDR length
604 0x49, 0x48, 0x44, 0x52, // IHDR
605 0x00, 0x00, 0x00, 0x01, // width: 1
606 0x00, 0x00, 0x00, 0x01, // height: 1
607 0x08, 0x02, // bit depth: 8, color type: RGB
608 0x00, 0x00, 0x00, // compression, filter, interlace
609 0x90, 0x77, 0x53, 0xDE, // CRC
610 0x00, 0x00, 0x00, 0x0C, // IDAT length
611 0x49, 0x44, 0x41, 0x54, // IDAT
612 0x78, 0x9C, 0x63, 0xF8, 0xCF, 0xC0, 0x00, 0x00, // zlib data
613 0x03, 0x01, 0x01, 0x00, // Adler32
614 0xC9, 0xFE, 0x92, 0xEF, // CRC
615 0x00, 0x00, 0x00, 0x00, // IEND length
616 0x49, 0x45, 0x4E, 0x44, // IEND
617 0xAE, 0x42, 0x60, 0x82, // CRC
618 ];
619 let result = decode_image_data(data, "red.png");
620 assert!(result.is_ok(), "PNG decode failed: {:?}", result.err());
621 let img = result.unwrap();
622 assert_eq!(img.width, 1);
623 assert_eq!(img.height, 1);
624 assert_eq!(img.data.len(), 4); // 1x1 RGBA8
625 }
626}