we (web engine): Experimental web browser project to understand the limits of Claude
1//! zlib decompression (RFC 1950).
2//!
3//! Parses the zlib header and trailer, delegates to DEFLATE for the actual
4//! compressed data, and verifies the Adler-32 checksum.
5
6use crate::deflate;
7use std::fmt;
8
9// ---------------------------------------------------------------------------
10// Error type
11// ---------------------------------------------------------------------------
12
13/// Errors that can occur during zlib decompression.
14#[derive(Debug, Clone, PartialEq, Eq)]
15pub enum ZlibError {
16 /// Input is too short to contain a valid zlib stream.
17 InputTooShort,
18 /// Header checksum failed (CMF*256 + FLG must be a multiple of 31).
19 InvalidHeaderChecksum,
20 /// Unsupported compression method (only DEFLATE / CM=8 is supported).
21 UnsupportedCompressionMethod(u8),
22 /// Window size exceeds the maximum (32KB / 2^15).
23 InvalidWindowSize(u8),
24 /// Preset dictionary is required but not supported.
25 PresetDictionaryNotSupported,
26 /// Adler-32 checksum of decompressed data does not match the trailer.
27 ChecksumMismatch { expected: u32, actual: u32 },
28 /// Input is too short to contain the Adler-32 trailer.
29 MissingTrailer,
30 /// DEFLATE decompression error.
31 Deflate(deflate::DeflateError),
32}
33
34impl fmt::Display for ZlibError {
35 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
36 match self {
37 Self::InputTooShort => write!(f, "input too short for zlib header"),
38 Self::InvalidHeaderChecksum => write!(f, "invalid zlib header checksum"),
39 Self::UnsupportedCompressionMethod(cm) => {
40 write!(f, "unsupported compression method: {cm}")
41 }
42 Self::InvalidWindowSize(cinfo) => {
43 write!(f, "invalid window size: CINFO={cinfo}")
44 }
45 Self::PresetDictionaryNotSupported => {
46 write!(f, "preset dictionary not supported")
47 }
48 Self::ChecksumMismatch { expected, actual } => {
49 write!(
50 f,
51 "Adler-32 checksum mismatch: expected {expected:#010x}, got {actual:#010x}"
52 )
53 }
54 Self::MissingTrailer => write!(f, "missing Adler-32 trailer"),
55 Self::Deflate(e) => write!(f, "deflate error: {e}"),
56 }
57 }
58}
59
60impl From<deflate::DeflateError> for ZlibError {
61 fn from(e: deflate::DeflateError) -> Self {
62 Self::Deflate(e)
63 }
64}
65
66pub type Result<T> = std::result::Result<T, ZlibError>;
67
68// ---------------------------------------------------------------------------
69// Adler-32
70// ---------------------------------------------------------------------------
71
72/// Compute the Adler-32 checksum of `data`.
73fn adler32(data: &[u8]) -> u32 {
74 const MOD_ADLER: u32 = 65521;
75
76 let mut a: u32 = 1;
77 let mut b: u32 = 0;
78
79 // Process in blocks to avoid frequent modular reductions.
80 // The maximum block size before a or b can overflow u32:
81 // a accumulates at most 255 per byte, so after n bytes a <= 1 + 255*n.
82 // b accumulates at most a per byte. To keep b < 2^32 we need
83 // n * (1 + 255*n/2) < 2^32, which gives n ~= 5552.
84 const BLOCK_SIZE: usize = 5552;
85
86 for chunk in data.chunks(BLOCK_SIZE) {
87 for &byte in chunk {
88 a += byte as u32;
89 b += a;
90 }
91 a %= MOD_ADLER;
92 b %= MOD_ADLER;
93 }
94
95 (b << 16) | a
96}
97
98// ---------------------------------------------------------------------------
99// Decompressor
100// ---------------------------------------------------------------------------
101
102/// Decompress a zlib-wrapped data stream per RFC 1950.
103///
104/// The input must contain:
105/// - 2-byte zlib header (CMF + FLG)
106/// - DEFLATE compressed data
107/// - 4-byte Adler-32 checksum (big-endian)
108pub fn zlib_decompress(input: &[u8]) -> Result<Vec<u8>> {
109 // Need at least 2 bytes for header + 4 bytes for trailer
110 if input.len() < 6 {
111 return Err(ZlibError::InputTooShort);
112 }
113
114 let cmf = input[0];
115 let flg = input[1];
116
117 // Validate header checksum: (CMF*256 + FLG) must be a multiple of 31
118 let check = (cmf as u16) * 256 + (flg as u16);
119 if !check.is_multiple_of(31) {
120 return Err(ZlibError::InvalidHeaderChecksum);
121 }
122
123 // CMF: lower 4 bits = compression method (CM), upper 4 bits = CINFO
124 let cm = cmf & 0x0F;
125 let cinfo = (cmf >> 4) & 0x0F;
126
127 // Only DEFLATE (CM=8) is supported
128 if cm != 8 {
129 return Err(ZlibError::UnsupportedCompressionMethod(cm));
130 }
131
132 // CINFO is the log2 of the LZ77 window size minus 8.
133 // Maximum is 7 (window = 2^15 = 32768 bytes).
134 if cinfo > 7 {
135 return Err(ZlibError::InvalidWindowSize(cinfo));
136 }
137
138 // FLG bit 5: preset dictionary flag (FDICT)
139 let fdict = (flg >> 5) & 1;
140 if fdict != 0 {
141 return Err(ZlibError::PresetDictionaryNotSupported);
142 }
143
144 // Compressed data starts after the 2-byte header
145 // Adler-32 checksum occupies the last 4 bytes
146 if input.len() < 6 {
147 return Err(ZlibError::MissingTrailer);
148 }
149
150 let compressed = &input[2..input.len() - 4];
151 let trailer = &input[input.len() - 4..];
152
153 // Decompress
154 let decompressed = deflate::inflate(compressed)?;
155
156 // Read expected Adler-32 (big-endian)
157 let expected = ((trailer[0] as u32) << 24)
158 | ((trailer[1] as u32) << 16)
159 | ((trailer[2] as u32) << 8)
160 | (trailer[3] as u32);
161
162 // Verify checksum
163 let actual = adler32(&decompressed);
164 if actual != expected {
165 return Err(ZlibError::ChecksumMismatch { expected, actual });
166 }
167
168 Ok(decompressed)
169}
170
171// ---------------------------------------------------------------------------
172// Tests
173// ---------------------------------------------------------------------------
174
175#[cfg(test)]
176mod tests {
177 use super::*;
178
179 // -- Adler-32 tests --
180
181 #[test]
182 fn adler32_empty() {
183 // Adler-32 of empty data is 1 (a=1, b=0)
184 assert_eq!(adler32(&[]), 0x0000_0001);
185 }
186
187 #[test]
188 fn adler32_single_byte() {
189 // a = 1 + byte, b = 1 + a = 2 + byte
190 // For 'a' (97): a = 98, b = 98. Result = (98 << 16) | 98 = 0x00620062
191 assert_eq!(adler32(b"a"), 0x00620062);
192 }
193
194 #[test]
195 fn adler32_wikipedia_example() {
196 // Known value: Adler-32 of "Wikipedia" = 0x11E60398
197 assert_eq!(adler32(b"Wikipedia"), 0x11E60398);
198 }
199
200 #[test]
201 fn adler32_abc() {
202 // a starts at 1, b starts at 0
203 // After 'a'(97): a=98, b=98
204 // After 'b'(98): a=196, b=294
205 // After 'c'(99): a=295, b=589
206 // Result = (589 << 16) | 295 = 0x024D0127
207 assert_eq!(adler32(b"abc"), 0x024D0127);
208 }
209
210 #[test]
211 fn adler32_large_block() {
212 // Test with data larger than BLOCK_SIZE (5552) to exercise modular reduction
213 let data = vec![0xFF; 10000];
214 let checksum = adler32(&data);
215 // Verify it produces a non-trivial result (not overflow)
216 assert_ne!(checksum, 0);
217 // Verify it's deterministic
218 assert_eq!(adler32(&data), checksum);
219 }
220
221 // -- Error Display tests --
222
223 #[test]
224 fn error_display() {
225 assert_eq!(
226 ZlibError::InputTooShort.to_string(),
227 "input too short for zlib header"
228 );
229 assert_eq!(
230 ZlibError::InvalidHeaderChecksum.to_string(),
231 "invalid zlib header checksum"
232 );
233 assert_eq!(
234 ZlibError::UnsupportedCompressionMethod(15).to_string(),
235 "unsupported compression method: 15"
236 );
237 assert_eq!(
238 ZlibError::InvalidWindowSize(8).to_string(),
239 "invalid window size: CINFO=8"
240 );
241 assert_eq!(
242 ZlibError::PresetDictionaryNotSupported.to_string(),
243 "preset dictionary not supported"
244 );
245 assert_eq!(
246 ZlibError::ChecksumMismatch {
247 expected: 0x12345678,
248 actual: 0xABCDEF01
249 }
250 .to_string(),
251 "Adler-32 checksum mismatch: expected 0x12345678, got 0xabcdef01"
252 );
253 assert_eq!(
254 ZlibError::MissingTrailer.to_string(),
255 "missing Adler-32 trailer"
256 );
257 assert_eq!(
258 ZlibError::Deflate(deflate::DeflateError::UnexpectedEof).to_string(),
259 "deflate error: unexpected end of input"
260 );
261 }
262
263 // -- Header validation tests --
264
265 #[test]
266 fn input_too_short() {
267 assert!(matches!(
268 zlib_decompress(&[]),
269 Err(ZlibError::InputTooShort)
270 ));
271 assert!(matches!(
272 zlib_decompress(&[0x78]),
273 Err(ZlibError::InputTooShort)
274 ));
275 assert!(matches!(
276 zlib_decompress(&[0x78, 0x9C, 0x03, 0x00]),
277 Err(ZlibError::InputTooShort)
278 ));
279 }
280
281 #[test]
282 fn invalid_header_checksum() {
283 // CMF=0x78 (DEFLATE, window=32KB), FLG=0x00 → 0x7800 % 31 = 18 ≠ 0
284 let mut input = vec![0x78, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x01];
285 assert!(matches!(
286 zlib_decompress(&input),
287 Err(ZlibError::InvalidHeaderChecksum)
288 ));
289 // Fix the checksum by trying various FLG values
290 input[1] = 0x01; // 0x7801 % 31 = 0x7801 = 30721, 30721 % 31 = 0
291 assert!(!matches!(
292 zlib_decompress(&input),
293 Err(ZlibError::InvalidHeaderChecksum)
294 ));
295 }
296
297 #[test]
298 fn unsupported_compression_method() {
299 // CM=15 instead of 8, with valid checksum
300 // CMF = (CINFO << 4) | CM = (7 << 4) | 15 = 0x7F
301 // Need FLG such that (0x7F * 256 + FLG) % 31 == 0
302 // 0x7F00 = 32512, 32512 % 31 = 32512 - 1048*31 = 32512 - 32488 = 24
303 // FLG = 31 - 24 = 7
304 let input = vec![0x7F, 0x07, 0x03, 0x00, 0x00, 0x00, 0x00, 0x01];
305 assert!(matches!(
306 zlib_decompress(&input),
307 Err(ZlibError::UnsupportedCompressionMethod(15))
308 ));
309 }
310
311 #[test]
312 fn invalid_window_size() {
313 // CINFO=8 (invalid, max is 7), CM=8
314 // CMF = (8 << 4) | 8 = 0x88
315 // 0x8800 = 34816, 34816 % 31 = 34816 - 1123*31 = 34816 - 34813 = 3
316 // FLG = 31 - 3 = 28
317 let input = vec![0x88, 0x1C, 0x03, 0x00, 0x00, 0x00, 0x00, 0x01];
318 assert!(matches!(
319 zlib_decompress(&input),
320 Err(ZlibError::InvalidWindowSize(8))
321 ));
322 }
323
324 #[test]
325 fn preset_dictionary_rejected() {
326 // CMF=0x78 (DEFLATE, window=32KB), FLG with FDICT=1
327 // FLG bit 5 = 1, so FLG = 0x20 | adjustment for checksum
328 // 0x7800 + FLG must be multiple of 31
329 // 0x7800 = 30720, 30720 % 31 = 30720 - 990*31 = 30720 - 30690 = 30
330 // Need FLG where (30 + FLG) % 31 == 0 and bit 5 is set
331 // FLG = 1 makes 30721 % 31 = 0, but bit 5 is not set
332 // FLG with bit 5 set: 0x20 = 32, (30 + 32) % 31 = 62 % 31 = 0.
333 let input = vec![0x78, 0x20, 0x03, 0x00, 0x00, 0x00, 0x00, 0x01];
334 assert!(matches!(
335 zlib_decompress(&input),
336 Err(ZlibError::PresetDictionaryNotSupported)
337 ));
338 }
339
340 // -- Decompression tests --
341
342 #[test]
343 fn decompress_empty() {
344 // zlib compressed empty data: header 0x78 0x9C + deflate empty + adler32(empty)
345 // DEFLATE empty: BFINAL=1, BTYPE=01 (fixed), EOB → 0x03, 0x00
346 // Adler-32 of empty = 0x00000001 (big-endian: 00 00 00 01)
347 let input = [0x78, 0x9C, 0x03, 0x00, 0x00, 0x00, 0x00, 0x01];
348 let result = zlib_decompress(&input).unwrap();
349 assert!(result.is_empty());
350 }
351
352 #[test]
353 fn decompress_hello() {
354 // zlib-compressed "Hello" — standard zlib level 6 output
355 let input = [
356 0x78, 0x9C, 0xf3, 0x48, 0xcd, 0xc9, 0xc9, 0x07, 0x00, 0x05, 0x8c, 0x01, 0xf5,
357 ];
358 let result = zlib_decompress(&input).unwrap();
359 assert_eq!(result, b"Hello");
360 }
361
362 /// Build a zlib stream from raw DEFLATE data and the expected decompressed output.
363 fn make_zlib_stream(deflate_data: &[u8], decompressed: &[u8]) -> Vec<u8> {
364 let checksum = adler32(decompressed);
365 let mut stream = vec![0x78, 0x9C]; // CMF=DEFLATE/32KB, FLG=level6
366 stream.extend_from_slice(deflate_data);
367 stream.push((checksum >> 24) as u8);
368 stream.push((checksum >> 16) as u8);
369 stream.push((checksum >> 8) as u8);
370 stream.push(checksum as u8);
371 stream
372 }
373
374 #[test]
375 fn decompress_pangram() {
376 let expected = b"The quick brown fox jumps over the lazy dog";
377 // Raw DEFLATE for the pangram (verified by deflate::tests::inflate_pangram)
378 let deflate_data = [
379 0x0b, 0xc9, 0x48, 0x55, 0x28, 0x2c, 0xcd, 0x4c, 0xce, 0x56, 0x48, 0x2a, 0xca, 0x2f,
380 0xcf, 0x53, 0x48, 0xcb, 0xaf, 0x50, 0xc8, 0x2a, 0xcd, 0x2d, 0x28, 0x56, 0xc8, 0x2f,
381 0x4b, 0x2d, 0x52, 0x28, 0x01, 0x4a, 0xe7, 0x24, 0x56, 0x55, 0x2a, 0xa4, 0xe4, 0xa7,
382 0x03, 0x00,
383 ];
384 let input = make_zlib_stream(&deflate_data, expected);
385 let result = zlib_decompress(&input).unwrap();
386 assert_eq!(result, expected);
387 }
388
389 #[test]
390 fn decompress_repeated() {
391 let expected: Vec<u8> = b"abcdefghijklmnopqrstuvwxyz".repeat(10);
392 // Raw DEFLATE for alphabet repeated (verified by deflate::tests::inflate_alphabet_repeated)
393 let deflate_data = [
394 0x4b, 0x4c, 0x4a, 0x4e, 0x49, 0x4d, 0x4b, 0xcf, 0xc8, 0xcc, 0xca, 0xce, 0xc9, 0xcd,
395 0xcb, 0x2f, 0x28, 0x2c, 0x2a, 0x2e, 0x29, 0x2d, 0x2b, 0xaf, 0xa8, 0xac, 0x4a, 0x1c,
396 0x31, 0x32, 0x00,
397 ];
398 let input = make_zlib_stream(&deflate_data, &expected);
399 let result = zlib_decompress(&input).unwrap();
400 assert_eq!(result, expected);
401 }
402
403 #[test]
404 fn decompress_non_compressed_block() {
405 // zlib stream using a non-compressed DEFLATE block for "Hello"
406 // Header: 0x78, 0x01 (DEFLATE, window=32KB, level=best speed)
407 // DEFLATE: 0x01 (BFINAL=1, BTYPE=00), LEN=5, NLEN=!5, "Hello"
408 // Adler-32 of "Hello" = 0x058C01F5
409 let mut input = vec![0x78, 0x01];
410 input.push(0x01); // BFINAL=1, BTYPE=00
411 input.extend_from_slice(&[5, 0]); // LEN=5
412 input.extend_from_slice(&[0xFA, 0xFF]); // NLEN=!5
413 input.extend_from_slice(b"Hello");
414 input.extend_from_slice(&[0x05, 0x8C, 0x01, 0xF5]); // Adler-32
415 let result = zlib_decompress(&input).unwrap();
416 assert_eq!(result, b"Hello");
417 }
418
419 #[test]
420 fn checksum_mismatch() {
421 // Valid header + valid deflate, but wrong Adler-32
422 let input = [
423 0x78, 0x9C, 0xf3, 0x48, 0xcd, 0xc9, 0xc9, 0x07, 0x00, 0xFF, 0xFF, 0xFF, 0xFF,
424 ];
425 assert!(matches!(
426 zlib_decompress(&input),
427 Err(ZlibError::ChecksumMismatch { .. })
428 ));
429 }
430
431 #[test]
432 fn deflate_error_propagated() {
433 // Valid zlib header but broken DEFLATE data
434 // Header: 0x78, 0x9C
435 // DEFLATE: 0x07 = BFINAL=1, BTYPE=11 (invalid)
436 // Fake trailer: 0x00, 0x00, 0x00, 0x01
437 let input = [0x78, 0x9C, 0x07, 0x00, 0x00, 0x00, 0x00, 0x01];
438 let err = zlib_decompress(&input).unwrap_err();
439 assert!(matches!(
440 err,
441 ZlibError::Deflate(deflate::DeflateError::InvalidBlockType(3))
442 ));
443 }
444
445 // -- Various header configurations --
446
447 #[test]
448 fn different_window_sizes() {
449 // Test that different CINFO values (0-7) are accepted
450 // We just need valid headers — use empty DEFLATE stream
451 for cinfo in 0..=7u8 {
452 let cmf = (cinfo << 4) | 8; // CM=8 (DEFLATE)
453 let fcheck = (31 - ((cmf as u16 * 256) % 31)) % 31;
454 let flg = fcheck as u8;
455 // Empty DEFLATE: 0x03, 0x00
456 // Adler-32 of empty: 0x00, 0x00, 0x00, 0x01
457 let input = vec![cmf, flg, 0x03, 0x00, 0x00, 0x00, 0x00, 0x01];
458 let result = zlib_decompress(&input);
459 assert!(result.is_ok(), "CINFO={cinfo} should be valid: {result:?}");
460 assert!(result.unwrap().is_empty());
461 }
462 }
463
464 // -- Adler-32 verification with known values --
465
466 #[test]
467 fn adler32_all_zeros() {
468 let data = vec![0u8; 100];
469 // a = 1 + 0*100 = 1, b = 100*1 = 100
470 assert_eq!(adler32(&data), (100 << 16) | 1);
471 }
472
473 #[test]
474 fn adler32_single_ff() {
475 // a = 1 + 255 = 256, b = 0 + 256 = 256
476 assert_eq!(adler32(&[0xFF]), (256 << 16) | 256);
477 }
478}