we (web engine): Experimental web browser project to understand the limits of Claude
at gif-decoder 478 lines 17 kB view raw
1//! zlib decompression (RFC 1950). 2//! 3//! Parses the zlib header and trailer, delegates to DEFLATE for the actual 4//! compressed data, and verifies the Adler-32 checksum. 5 6use crate::deflate; 7use std::fmt; 8 9// --------------------------------------------------------------------------- 10// Error type 11// --------------------------------------------------------------------------- 12 13/// Errors that can occur during zlib decompression. 14#[derive(Debug, Clone, PartialEq, Eq)] 15pub enum ZlibError { 16 /// Input is too short to contain a valid zlib stream. 17 InputTooShort, 18 /// Header checksum failed (CMF*256 + FLG must be a multiple of 31). 19 InvalidHeaderChecksum, 20 /// Unsupported compression method (only DEFLATE / CM=8 is supported). 21 UnsupportedCompressionMethod(u8), 22 /// Window size exceeds the maximum (32KB / 2^15). 23 InvalidWindowSize(u8), 24 /// Preset dictionary is required but not supported. 25 PresetDictionaryNotSupported, 26 /// Adler-32 checksum of decompressed data does not match the trailer. 27 ChecksumMismatch { expected: u32, actual: u32 }, 28 /// Input is too short to contain the Adler-32 trailer. 29 MissingTrailer, 30 /// DEFLATE decompression error. 31 Deflate(deflate::DeflateError), 32} 33 34impl fmt::Display for ZlibError { 35 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 36 match self { 37 Self::InputTooShort => write!(f, "input too short for zlib header"), 38 Self::InvalidHeaderChecksum => write!(f, "invalid zlib header checksum"), 39 Self::UnsupportedCompressionMethod(cm) => { 40 write!(f, "unsupported compression method: {cm}") 41 } 42 Self::InvalidWindowSize(cinfo) => { 43 write!(f, "invalid window size: CINFO={cinfo}") 44 } 45 Self::PresetDictionaryNotSupported => { 46 write!(f, "preset dictionary not supported") 47 } 48 Self::ChecksumMismatch { expected, actual } => { 49 write!( 50 f, 51 "Adler-32 checksum mismatch: expected {expected:#010x}, got {actual:#010x}" 52 ) 53 } 54 Self::MissingTrailer => write!(f, "missing Adler-32 trailer"), 55 Self::Deflate(e) => write!(f, "deflate error: {e}"), 56 } 57 } 58} 59 60impl From<deflate::DeflateError> for ZlibError { 61 fn from(e: deflate::DeflateError) -> Self { 62 Self::Deflate(e) 63 } 64} 65 66pub type Result<T> = std::result::Result<T, ZlibError>; 67 68// --------------------------------------------------------------------------- 69// Adler-32 70// --------------------------------------------------------------------------- 71 72/// Compute the Adler-32 checksum of `data`. 73fn adler32(data: &[u8]) -> u32 { 74 const MOD_ADLER: u32 = 65521; 75 76 let mut a: u32 = 1; 77 let mut b: u32 = 0; 78 79 // Process in blocks to avoid frequent modular reductions. 80 // The maximum block size before a or b can overflow u32: 81 // a accumulates at most 255 per byte, so after n bytes a <= 1 + 255*n. 82 // b accumulates at most a per byte. To keep b < 2^32 we need 83 // n * (1 + 255*n/2) < 2^32, which gives n ~= 5552. 84 const BLOCK_SIZE: usize = 5552; 85 86 for chunk in data.chunks(BLOCK_SIZE) { 87 for &byte in chunk { 88 a += byte as u32; 89 b += a; 90 } 91 a %= MOD_ADLER; 92 b %= MOD_ADLER; 93 } 94 95 (b << 16) | a 96} 97 98// --------------------------------------------------------------------------- 99// Decompressor 100// --------------------------------------------------------------------------- 101 102/// Decompress a zlib-wrapped data stream per RFC 1950. 103/// 104/// The input must contain: 105/// - 2-byte zlib header (CMF + FLG) 106/// - DEFLATE compressed data 107/// - 4-byte Adler-32 checksum (big-endian) 108pub fn zlib_decompress(input: &[u8]) -> Result<Vec<u8>> { 109 // Need at least 2 bytes for header + 4 bytes for trailer 110 if input.len() < 6 { 111 return Err(ZlibError::InputTooShort); 112 } 113 114 let cmf = input[0]; 115 let flg = input[1]; 116 117 // Validate header checksum: (CMF*256 + FLG) must be a multiple of 31 118 let check = (cmf as u16) * 256 + (flg as u16); 119 if !check.is_multiple_of(31) { 120 return Err(ZlibError::InvalidHeaderChecksum); 121 } 122 123 // CMF: lower 4 bits = compression method (CM), upper 4 bits = CINFO 124 let cm = cmf & 0x0F; 125 let cinfo = (cmf >> 4) & 0x0F; 126 127 // Only DEFLATE (CM=8) is supported 128 if cm != 8 { 129 return Err(ZlibError::UnsupportedCompressionMethod(cm)); 130 } 131 132 // CINFO is the log2 of the LZ77 window size minus 8. 133 // Maximum is 7 (window = 2^15 = 32768 bytes). 134 if cinfo > 7 { 135 return Err(ZlibError::InvalidWindowSize(cinfo)); 136 } 137 138 // FLG bit 5: preset dictionary flag (FDICT) 139 let fdict = (flg >> 5) & 1; 140 if fdict != 0 { 141 return Err(ZlibError::PresetDictionaryNotSupported); 142 } 143 144 // Compressed data starts after the 2-byte header 145 // Adler-32 checksum occupies the last 4 bytes 146 if input.len() < 6 { 147 return Err(ZlibError::MissingTrailer); 148 } 149 150 let compressed = &input[2..input.len() - 4]; 151 let trailer = &input[input.len() - 4..]; 152 153 // Decompress 154 let decompressed = deflate::inflate(compressed)?; 155 156 // Read expected Adler-32 (big-endian) 157 let expected = ((trailer[0] as u32) << 24) 158 | ((trailer[1] as u32) << 16) 159 | ((trailer[2] as u32) << 8) 160 | (trailer[3] as u32); 161 162 // Verify checksum 163 let actual = adler32(&decompressed); 164 if actual != expected { 165 return Err(ZlibError::ChecksumMismatch { expected, actual }); 166 } 167 168 Ok(decompressed) 169} 170 171// --------------------------------------------------------------------------- 172// Tests 173// --------------------------------------------------------------------------- 174 175#[cfg(test)] 176mod tests { 177 use super::*; 178 179 // -- Adler-32 tests -- 180 181 #[test] 182 fn adler32_empty() { 183 // Adler-32 of empty data is 1 (a=1, b=0) 184 assert_eq!(adler32(&[]), 0x0000_0001); 185 } 186 187 #[test] 188 fn adler32_single_byte() { 189 // a = 1 + byte, b = 1 + a = 2 + byte 190 // For 'a' (97): a = 98, b = 98. Result = (98 << 16) | 98 = 0x00620062 191 assert_eq!(adler32(b"a"), 0x00620062); 192 } 193 194 #[test] 195 fn adler32_wikipedia_example() { 196 // Known value: Adler-32 of "Wikipedia" = 0x11E60398 197 assert_eq!(adler32(b"Wikipedia"), 0x11E60398); 198 } 199 200 #[test] 201 fn adler32_abc() { 202 // a starts at 1, b starts at 0 203 // After 'a'(97): a=98, b=98 204 // After 'b'(98): a=196, b=294 205 // After 'c'(99): a=295, b=589 206 // Result = (589 << 16) | 295 = 0x024D0127 207 assert_eq!(adler32(b"abc"), 0x024D0127); 208 } 209 210 #[test] 211 fn adler32_large_block() { 212 // Test with data larger than BLOCK_SIZE (5552) to exercise modular reduction 213 let data = vec![0xFF; 10000]; 214 let checksum = adler32(&data); 215 // Verify it produces a non-trivial result (not overflow) 216 assert_ne!(checksum, 0); 217 // Verify it's deterministic 218 assert_eq!(adler32(&data), checksum); 219 } 220 221 // -- Error Display tests -- 222 223 #[test] 224 fn error_display() { 225 assert_eq!( 226 ZlibError::InputTooShort.to_string(), 227 "input too short for zlib header" 228 ); 229 assert_eq!( 230 ZlibError::InvalidHeaderChecksum.to_string(), 231 "invalid zlib header checksum" 232 ); 233 assert_eq!( 234 ZlibError::UnsupportedCompressionMethod(15).to_string(), 235 "unsupported compression method: 15" 236 ); 237 assert_eq!( 238 ZlibError::InvalidWindowSize(8).to_string(), 239 "invalid window size: CINFO=8" 240 ); 241 assert_eq!( 242 ZlibError::PresetDictionaryNotSupported.to_string(), 243 "preset dictionary not supported" 244 ); 245 assert_eq!( 246 ZlibError::ChecksumMismatch { 247 expected: 0x12345678, 248 actual: 0xABCDEF01 249 } 250 .to_string(), 251 "Adler-32 checksum mismatch: expected 0x12345678, got 0xabcdef01" 252 ); 253 assert_eq!( 254 ZlibError::MissingTrailer.to_string(), 255 "missing Adler-32 trailer" 256 ); 257 assert_eq!( 258 ZlibError::Deflate(deflate::DeflateError::UnexpectedEof).to_string(), 259 "deflate error: unexpected end of input" 260 ); 261 } 262 263 // -- Header validation tests -- 264 265 #[test] 266 fn input_too_short() { 267 assert!(matches!( 268 zlib_decompress(&[]), 269 Err(ZlibError::InputTooShort) 270 )); 271 assert!(matches!( 272 zlib_decompress(&[0x78]), 273 Err(ZlibError::InputTooShort) 274 )); 275 assert!(matches!( 276 zlib_decompress(&[0x78, 0x9C, 0x03, 0x00]), 277 Err(ZlibError::InputTooShort) 278 )); 279 } 280 281 #[test] 282 fn invalid_header_checksum() { 283 // CMF=0x78 (DEFLATE, window=32KB), FLG=0x00 → 0x7800 % 31 = 18 ≠ 0 284 let mut input = vec![0x78, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x01]; 285 assert!(matches!( 286 zlib_decompress(&input), 287 Err(ZlibError::InvalidHeaderChecksum) 288 )); 289 // Fix the checksum by trying various FLG values 290 input[1] = 0x01; // 0x7801 % 31 = 0x7801 = 30721, 30721 % 31 = 0 291 assert!(!matches!( 292 zlib_decompress(&input), 293 Err(ZlibError::InvalidHeaderChecksum) 294 )); 295 } 296 297 #[test] 298 fn unsupported_compression_method() { 299 // CM=15 instead of 8, with valid checksum 300 // CMF = (CINFO << 4) | CM = (7 << 4) | 15 = 0x7F 301 // Need FLG such that (0x7F * 256 + FLG) % 31 == 0 302 // 0x7F00 = 32512, 32512 % 31 = 32512 - 1048*31 = 32512 - 32488 = 24 303 // FLG = 31 - 24 = 7 304 let input = vec![0x7F, 0x07, 0x03, 0x00, 0x00, 0x00, 0x00, 0x01]; 305 assert!(matches!( 306 zlib_decompress(&input), 307 Err(ZlibError::UnsupportedCompressionMethod(15)) 308 )); 309 } 310 311 #[test] 312 fn invalid_window_size() { 313 // CINFO=8 (invalid, max is 7), CM=8 314 // CMF = (8 << 4) | 8 = 0x88 315 // 0x8800 = 34816, 34816 % 31 = 34816 - 1123*31 = 34816 - 34813 = 3 316 // FLG = 31 - 3 = 28 317 let input = vec![0x88, 0x1C, 0x03, 0x00, 0x00, 0x00, 0x00, 0x01]; 318 assert!(matches!( 319 zlib_decompress(&input), 320 Err(ZlibError::InvalidWindowSize(8)) 321 )); 322 } 323 324 #[test] 325 fn preset_dictionary_rejected() { 326 // CMF=0x78 (DEFLATE, window=32KB), FLG with FDICT=1 327 // FLG bit 5 = 1, so FLG = 0x20 | adjustment for checksum 328 // 0x7800 + FLG must be multiple of 31 329 // 0x7800 = 30720, 30720 % 31 = 30720 - 990*31 = 30720 - 30690 = 30 330 // Need FLG where (30 + FLG) % 31 == 0 and bit 5 is set 331 // FLG = 1 makes 30721 % 31 = 0, but bit 5 is not set 332 // FLG with bit 5 set: 0x20 = 32, (30 + 32) % 31 = 62 % 31 = 0. 333 let input = vec![0x78, 0x20, 0x03, 0x00, 0x00, 0x00, 0x00, 0x01]; 334 assert!(matches!( 335 zlib_decompress(&input), 336 Err(ZlibError::PresetDictionaryNotSupported) 337 )); 338 } 339 340 // -- Decompression tests -- 341 342 #[test] 343 fn decompress_empty() { 344 // zlib compressed empty data: header 0x78 0x9C + deflate empty + adler32(empty) 345 // DEFLATE empty: BFINAL=1, BTYPE=01 (fixed), EOB → 0x03, 0x00 346 // Adler-32 of empty = 0x00000001 (big-endian: 00 00 00 01) 347 let input = [0x78, 0x9C, 0x03, 0x00, 0x00, 0x00, 0x00, 0x01]; 348 let result = zlib_decompress(&input).unwrap(); 349 assert!(result.is_empty()); 350 } 351 352 #[test] 353 fn decompress_hello() { 354 // zlib-compressed "Hello" — standard zlib level 6 output 355 let input = [ 356 0x78, 0x9C, 0xf3, 0x48, 0xcd, 0xc9, 0xc9, 0x07, 0x00, 0x05, 0x8c, 0x01, 0xf5, 357 ]; 358 let result = zlib_decompress(&input).unwrap(); 359 assert_eq!(result, b"Hello"); 360 } 361 362 /// Build a zlib stream from raw DEFLATE data and the expected decompressed output. 363 fn make_zlib_stream(deflate_data: &[u8], decompressed: &[u8]) -> Vec<u8> { 364 let checksum = adler32(decompressed); 365 let mut stream = vec![0x78, 0x9C]; // CMF=DEFLATE/32KB, FLG=level6 366 stream.extend_from_slice(deflate_data); 367 stream.push((checksum >> 24) as u8); 368 stream.push((checksum >> 16) as u8); 369 stream.push((checksum >> 8) as u8); 370 stream.push(checksum as u8); 371 stream 372 } 373 374 #[test] 375 fn decompress_pangram() { 376 let expected = b"The quick brown fox jumps over the lazy dog"; 377 // Raw DEFLATE for the pangram (verified by deflate::tests::inflate_pangram) 378 let deflate_data = [ 379 0x0b, 0xc9, 0x48, 0x55, 0x28, 0x2c, 0xcd, 0x4c, 0xce, 0x56, 0x48, 0x2a, 0xca, 0x2f, 380 0xcf, 0x53, 0x48, 0xcb, 0xaf, 0x50, 0xc8, 0x2a, 0xcd, 0x2d, 0x28, 0x56, 0xc8, 0x2f, 381 0x4b, 0x2d, 0x52, 0x28, 0x01, 0x4a, 0xe7, 0x24, 0x56, 0x55, 0x2a, 0xa4, 0xe4, 0xa7, 382 0x03, 0x00, 383 ]; 384 let input = make_zlib_stream(&deflate_data, expected); 385 let result = zlib_decompress(&input).unwrap(); 386 assert_eq!(result, expected); 387 } 388 389 #[test] 390 fn decompress_repeated() { 391 let expected: Vec<u8> = b"abcdefghijklmnopqrstuvwxyz".repeat(10); 392 // Raw DEFLATE for alphabet repeated (verified by deflate::tests::inflate_alphabet_repeated) 393 let deflate_data = [ 394 0x4b, 0x4c, 0x4a, 0x4e, 0x49, 0x4d, 0x4b, 0xcf, 0xc8, 0xcc, 0xca, 0xce, 0xc9, 0xcd, 395 0xcb, 0x2f, 0x28, 0x2c, 0x2a, 0x2e, 0x29, 0x2d, 0x2b, 0xaf, 0xa8, 0xac, 0x4a, 0x1c, 396 0x31, 0x32, 0x00, 397 ]; 398 let input = make_zlib_stream(&deflate_data, &expected); 399 let result = zlib_decompress(&input).unwrap(); 400 assert_eq!(result, expected); 401 } 402 403 #[test] 404 fn decompress_non_compressed_block() { 405 // zlib stream using a non-compressed DEFLATE block for "Hello" 406 // Header: 0x78, 0x01 (DEFLATE, window=32KB, level=best speed) 407 // DEFLATE: 0x01 (BFINAL=1, BTYPE=00), LEN=5, NLEN=!5, "Hello" 408 // Adler-32 of "Hello" = 0x058C01F5 409 let mut input = vec![0x78, 0x01]; 410 input.push(0x01); // BFINAL=1, BTYPE=00 411 input.extend_from_slice(&[5, 0]); // LEN=5 412 input.extend_from_slice(&[0xFA, 0xFF]); // NLEN=!5 413 input.extend_from_slice(b"Hello"); 414 input.extend_from_slice(&[0x05, 0x8C, 0x01, 0xF5]); // Adler-32 415 let result = zlib_decompress(&input).unwrap(); 416 assert_eq!(result, b"Hello"); 417 } 418 419 #[test] 420 fn checksum_mismatch() { 421 // Valid header + valid deflate, but wrong Adler-32 422 let input = [ 423 0x78, 0x9C, 0xf3, 0x48, 0xcd, 0xc9, 0xc9, 0x07, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 424 ]; 425 assert!(matches!( 426 zlib_decompress(&input), 427 Err(ZlibError::ChecksumMismatch { .. }) 428 )); 429 } 430 431 #[test] 432 fn deflate_error_propagated() { 433 // Valid zlib header but broken DEFLATE data 434 // Header: 0x78, 0x9C 435 // DEFLATE: 0x07 = BFINAL=1, BTYPE=11 (invalid) 436 // Fake trailer: 0x00, 0x00, 0x00, 0x01 437 let input = [0x78, 0x9C, 0x07, 0x00, 0x00, 0x00, 0x00, 0x01]; 438 let err = zlib_decompress(&input).unwrap_err(); 439 assert!(matches!( 440 err, 441 ZlibError::Deflate(deflate::DeflateError::InvalidBlockType(3)) 442 )); 443 } 444 445 // -- Various header configurations -- 446 447 #[test] 448 fn different_window_sizes() { 449 // Test that different CINFO values (0-7) are accepted 450 // We just need valid headers — use empty DEFLATE stream 451 for cinfo in 0..=7u8 { 452 let cmf = (cinfo << 4) | 8; // CM=8 (DEFLATE) 453 let fcheck = (31 - ((cmf as u16 * 256) % 31)) % 31; 454 let flg = fcheck as u8; 455 // Empty DEFLATE: 0x03, 0x00 456 // Adler-32 of empty: 0x00, 0x00, 0x00, 0x01 457 let input = vec![cmf, flg, 0x03, 0x00, 0x00, 0x00, 0x00, 0x01]; 458 let result = zlib_decompress(&input); 459 assert!(result.is_ok(), "CINFO={cinfo} should be valid: {result:?}"); 460 assert!(result.unwrap().is_empty()); 461 } 462 } 463 464 // -- Adler-32 verification with known values -- 465 466 #[test] 467 fn adler32_all_zeros() { 468 let data = vec![0u8; 100]; 469 // a = 1 + 0*100 = 1, b = 100*1 = 100 470 assert_eq!(adler32(&data), (100 << 16) | 1); 471 } 472 473 #[test] 474 fn adler32_single_ff() { 475 // a = 1 + 255 = 256, b = 0 + 256 = 256 476 assert_eq!(adler32(&[0xFF]), (256 << 16) | 256); 477 } 478}