//! zlib decompression (RFC 1950). //! //! Parses the zlib header and trailer, delegates to DEFLATE for the actual //! compressed data, and verifies the Adler-32 checksum. use crate::deflate; use std::fmt; // --------------------------------------------------------------------------- // Error type // --------------------------------------------------------------------------- /// Errors that can occur during zlib decompression. #[derive(Debug, Clone, PartialEq, Eq)] pub enum ZlibError { /// Input is too short to contain a valid zlib stream. InputTooShort, /// Header checksum failed (CMF*256 + FLG must be a multiple of 31). InvalidHeaderChecksum, /// Unsupported compression method (only DEFLATE / CM=8 is supported). UnsupportedCompressionMethod(u8), /// Window size exceeds the maximum (32KB / 2^15). InvalidWindowSize(u8), /// Preset dictionary is required but not supported. PresetDictionaryNotSupported, /// Adler-32 checksum of decompressed data does not match the trailer. ChecksumMismatch { expected: u32, actual: u32 }, /// Input is too short to contain the Adler-32 trailer. MissingTrailer, /// DEFLATE decompression error. Deflate(deflate::DeflateError), } impl fmt::Display for ZlibError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { Self::InputTooShort => write!(f, "input too short for zlib header"), Self::InvalidHeaderChecksum => write!(f, "invalid zlib header checksum"), Self::UnsupportedCompressionMethod(cm) => { write!(f, "unsupported compression method: {cm}") } Self::InvalidWindowSize(cinfo) => { write!(f, "invalid window size: CINFO={cinfo}") } Self::PresetDictionaryNotSupported => { write!(f, "preset dictionary not supported") } Self::ChecksumMismatch { expected, actual } => { write!( f, "Adler-32 checksum mismatch: expected {expected:#010x}, got {actual:#010x}" ) } Self::MissingTrailer => write!(f, "missing Adler-32 trailer"), Self::Deflate(e) => write!(f, "deflate error: {e}"), } } } impl From for ZlibError { fn from(e: deflate::DeflateError) -> Self { Self::Deflate(e) } } pub type Result = std::result::Result; // --------------------------------------------------------------------------- // Adler-32 // --------------------------------------------------------------------------- /// Compute the Adler-32 checksum of `data`. fn adler32(data: &[u8]) -> u32 { const MOD_ADLER: u32 = 65521; let mut a: u32 = 1; let mut b: u32 = 0; // Process in blocks to avoid frequent modular reductions. // The maximum block size before a or b can overflow u32: // a accumulates at most 255 per byte, so after n bytes a <= 1 + 255*n. // b accumulates at most a per byte. To keep b < 2^32 we need // n * (1 + 255*n/2) < 2^32, which gives n ~= 5552. const BLOCK_SIZE: usize = 5552; for chunk in data.chunks(BLOCK_SIZE) { for &byte in chunk { a += byte as u32; b += a; } a %= MOD_ADLER; b %= MOD_ADLER; } (b << 16) | a } // --------------------------------------------------------------------------- // Decompressor // --------------------------------------------------------------------------- /// Decompress a zlib-wrapped data stream per RFC 1950. /// /// The input must contain: /// - 2-byte zlib header (CMF + FLG) /// - DEFLATE compressed data /// - 4-byte Adler-32 checksum (big-endian) pub fn zlib_decompress(input: &[u8]) -> Result> { // Need at least 2 bytes for header + 4 bytes for trailer if input.len() < 6 { return Err(ZlibError::InputTooShort); } let cmf = input[0]; let flg = input[1]; // Validate header checksum: (CMF*256 + FLG) must be a multiple of 31 let check = (cmf as u16) * 256 + (flg as u16); if !check.is_multiple_of(31) { return Err(ZlibError::InvalidHeaderChecksum); } // CMF: lower 4 bits = compression method (CM), upper 4 bits = CINFO let cm = cmf & 0x0F; let cinfo = (cmf >> 4) & 0x0F; // Only DEFLATE (CM=8) is supported if cm != 8 { return Err(ZlibError::UnsupportedCompressionMethod(cm)); } // CINFO is the log2 of the LZ77 window size minus 8. // Maximum is 7 (window = 2^15 = 32768 bytes). if cinfo > 7 { return Err(ZlibError::InvalidWindowSize(cinfo)); } // FLG bit 5: preset dictionary flag (FDICT) let fdict = (flg >> 5) & 1; if fdict != 0 { return Err(ZlibError::PresetDictionaryNotSupported); } // Compressed data starts after the 2-byte header // Adler-32 checksum occupies the last 4 bytes if input.len() < 6 { return Err(ZlibError::MissingTrailer); } let compressed = &input[2..input.len() - 4]; let trailer = &input[input.len() - 4..]; // Decompress let decompressed = deflate::inflate(compressed)?; // Read expected Adler-32 (big-endian) let expected = ((trailer[0] as u32) << 24) | ((trailer[1] as u32) << 16) | ((trailer[2] as u32) << 8) | (trailer[3] as u32); // Verify checksum let actual = adler32(&decompressed); if actual != expected { return Err(ZlibError::ChecksumMismatch { expected, actual }); } Ok(decompressed) } // --------------------------------------------------------------------------- // Tests // --------------------------------------------------------------------------- #[cfg(test)] mod tests { use super::*; // -- Adler-32 tests -- #[test] fn adler32_empty() { // Adler-32 of empty data is 1 (a=1, b=0) assert_eq!(adler32(&[]), 0x0000_0001); } #[test] fn adler32_single_byte() { // a = 1 + byte, b = 1 + a = 2 + byte // For 'a' (97): a = 98, b = 98. Result = (98 << 16) | 98 = 0x00620062 assert_eq!(adler32(b"a"), 0x00620062); } #[test] fn adler32_wikipedia_example() { // Known value: Adler-32 of "Wikipedia" = 0x11E60398 assert_eq!(adler32(b"Wikipedia"), 0x11E60398); } #[test] fn adler32_abc() { // a starts at 1, b starts at 0 // After 'a'(97): a=98, b=98 // After 'b'(98): a=196, b=294 // After 'c'(99): a=295, b=589 // Result = (589 << 16) | 295 = 0x024D0127 assert_eq!(adler32(b"abc"), 0x024D0127); } #[test] fn adler32_large_block() { // Test with data larger than BLOCK_SIZE (5552) to exercise modular reduction let data = vec![0xFF; 10000]; let checksum = adler32(&data); // Verify it produces a non-trivial result (not overflow) assert_ne!(checksum, 0); // Verify it's deterministic assert_eq!(adler32(&data), checksum); } // -- Error Display tests -- #[test] fn error_display() { assert_eq!( ZlibError::InputTooShort.to_string(), "input too short for zlib header" ); assert_eq!( ZlibError::InvalidHeaderChecksum.to_string(), "invalid zlib header checksum" ); assert_eq!( ZlibError::UnsupportedCompressionMethod(15).to_string(), "unsupported compression method: 15" ); assert_eq!( ZlibError::InvalidWindowSize(8).to_string(), "invalid window size: CINFO=8" ); assert_eq!( ZlibError::PresetDictionaryNotSupported.to_string(), "preset dictionary not supported" ); assert_eq!( ZlibError::ChecksumMismatch { expected: 0x12345678, actual: 0xABCDEF01 } .to_string(), "Adler-32 checksum mismatch: expected 0x12345678, got 0xabcdef01" ); assert_eq!( ZlibError::MissingTrailer.to_string(), "missing Adler-32 trailer" ); assert_eq!( ZlibError::Deflate(deflate::DeflateError::UnexpectedEof).to_string(), "deflate error: unexpected end of input" ); } // -- Header validation tests -- #[test] fn input_too_short() { assert!(matches!( zlib_decompress(&[]), Err(ZlibError::InputTooShort) )); assert!(matches!( zlib_decompress(&[0x78]), Err(ZlibError::InputTooShort) )); assert!(matches!( zlib_decompress(&[0x78, 0x9C, 0x03, 0x00]), Err(ZlibError::InputTooShort) )); } #[test] fn invalid_header_checksum() { // CMF=0x78 (DEFLATE, window=32KB), FLG=0x00 → 0x7800 % 31 = 18 ≠ 0 let mut input = vec![0x78, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x01]; assert!(matches!( zlib_decompress(&input), Err(ZlibError::InvalidHeaderChecksum) )); // Fix the checksum by trying various FLG values input[1] = 0x01; // 0x7801 % 31 = 0x7801 = 30721, 30721 % 31 = 0 assert!(!matches!( zlib_decompress(&input), Err(ZlibError::InvalidHeaderChecksum) )); } #[test] fn unsupported_compression_method() { // CM=15 instead of 8, with valid checksum // CMF = (CINFO << 4) | CM = (7 << 4) | 15 = 0x7F // Need FLG such that (0x7F * 256 + FLG) % 31 == 0 // 0x7F00 = 32512, 32512 % 31 = 32512 - 1048*31 = 32512 - 32488 = 24 // FLG = 31 - 24 = 7 let input = vec![0x7F, 0x07, 0x03, 0x00, 0x00, 0x00, 0x00, 0x01]; assert!(matches!( zlib_decompress(&input), Err(ZlibError::UnsupportedCompressionMethod(15)) )); } #[test] fn invalid_window_size() { // CINFO=8 (invalid, max is 7), CM=8 // CMF = (8 << 4) | 8 = 0x88 // 0x8800 = 34816, 34816 % 31 = 34816 - 1123*31 = 34816 - 34813 = 3 // FLG = 31 - 3 = 28 let input = vec![0x88, 0x1C, 0x03, 0x00, 0x00, 0x00, 0x00, 0x01]; assert!(matches!( zlib_decompress(&input), Err(ZlibError::InvalidWindowSize(8)) )); } #[test] fn preset_dictionary_rejected() { // CMF=0x78 (DEFLATE, window=32KB), FLG with FDICT=1 // FLG bit 5 = 1, so FLG = 0x20 | adjustment for checksum // 0x7800 + FLG must be multiple of 31 // 0x7800 = 30720, 30720 % 31 = 30720 - 990*31 = 30720 - 30690 = 30 // Need FLG where (30 + FLG) % 31 == 0 and bit 5 is set // FLG = 1 makes 30721 % 31 = 0, but bit 5 is not set // FLG with bit 5 set: 0x20 = 32, (30 + 32) % 31 = 62 % 31 = 0. let input = vec![0x78, 0x20, 0x03, 0x00, 0x00, 0x00, 0x00, 0x01]; assert!(matches!( zlib_decompress(&input), Err(ZlibError::PresetDictionaryNotSupported) )); } // -- Decompression tests -- #[test] fn decompress_empty() { // zlib compressed empty data: header 0x78 0x9C + deflate empty + adler32(empty) // DEFLATE empty: BFINAL=1, BTYPE=01 (fixed), EOB → 0x03, 0x00 // Adler-32 of empty = 0x00000001 (big-endian: 00 00 00 01) let input = [0x78, 0x9C, 0x03, 0x00, 0x00, 0x00, 0x00, 0x01]; let result = zlib_decompress(&input).unwrap(); assert!(result.is_empty()); } #[test] fn decompress_hello() { // zlib-compressed "Hello" — standard zlib level 6 output let input = [ 0x78, 0x9C, 0xf3, 0x48, 0xcd, 0xc9, 0xc9, 0x07, 0x00, 0x05, 0x8c, 0x01, 0xf5, ]; let result = zlib_decompress(&input).unwrap(); assert_eq!(result, b"Hello"); } /// Build a zlib stream from raw DEFLATE data and the expected decompressed output. fn make_zlib_stream(deflate_data: &[u8], decompressed: &[u8]) -> Vec { let checksum = adler32(decompressed); let mut stream = vec![0x78, 0x9C]; // CMF=DEFLATE/32KB, FLG=level6 stream.extend_from_slice(deflate_data); stream.push((checksum >> 24) as u8); stream.push((checksum >> 16) as u8); stream.push((checksum >> 8) as u8); stream.push(checksum as u8); stream } #[test] fn decompress_pangram() { let expected = b"The quick brown fox jumps over the lazy dog"; // Raw DEFLATE for the pangram (verified by deflate::tests::inflate_pangram) let deflate_data = [ 0x0b, 0xc9, 0x48, 0x55, 0x28, 0x2c, 0xcd, 0x4c, 0xce, 0x56, 0x48, 0x2a, 0xca, 0x2f, 0xcf, 0x53, 0x48, 0xcb, 0xaf, 0x50, 0xc8, 0x2a, 0xcd, 0x2d, 0x28, 0x56, 0xc8, 0x2f, 0x4b, 0x2d, 0x52, 0x28, 0x01, 0x4a, 0xe7, 0x24, 0x56, 0x55, 0x2a, 0xa4, 0xe4, 0xa7, 0x03, 0x00, ]; let input = make_zlib_stream(&deflate_data, expected); let result = zlib_decompress(&input).unwrap(); assert_eq!(result, expected); } #[test] fn decompress_repeated() { let expected: Vec = b"abcdefghijklmnopqrstuvwxyz".repeat(10); // Raw DEFLATE for alphabet repeated (verified by deflate::tests::inflate_alphabet_repeated) let deflate_data = [ 0x4b, 0x4c, 0x4a, 0x4e, 0x49, 0x4d, 0x4b, 0xcf, 0xc8, 0xcc, 0xca, 0xce, 0xc9, 0xcd, 0xcb, 0x2f, 0x28, 0x2c, 0x2a, 0x2e, 0x29, 0x2d, 0x2b, 0xaf, 0xa8, 0xac, 0x4a, 0x1c, 0x31, 0x32, 0x00, ]; let input = make_zlib_stream(&deflate_data, &expected); let result = zlib_decompress(&input).unwrap(); assert_eq!(result, expected); } #[test] fn decompress_non_compressed_block() { // zlib stream using a non-compressed DEFLATE block for "Hello" // Header: 0x78, 0x01 (DEFLATE, window=32KB, level=best speed) // DEFLATE: 0x01 (BFINAL=1, BTYPE=00), LEN=5, NLEN=!5, "Hello" // Adler-32 of "Hello" = 0x058C01F5 let mut input = vec![0x78, 0x01]; input.push(0x01); // BFINAL=1, BTYPE=00 input.extend_from_slice(&[5, 0]); // LEN=5 input.extend_from_slice(&[0xFA, 0xFF]); // NLEN=!5 input.extend_from_slice(b"Hello"); input.extend_from_slice(&[0x05, 0x8C, 0x01, 0xF5]); // Adler-32 let result = zlib_decompress(&input).unwrap(); assert_eq!(result, b"Hello"); } #[test] fn checksum_mismatch() { // Valid header + valid deflate, but wrong Adler-32 let input = [ 0x78, 0x9C, 0xf3, 0x48, 0xcd, 0xc9, 0xc9, 0x07, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, ]; assert!(matches!( zlib_decompress(&input), Err(ZlibError::ChecksumMismatch { .. }) )); } #[test] fn deflate_error_propagated() { // Valid zlib header but broken DEFLATE data // Header: 0x78, 0x9C // DEFLATE: 0x07 = BFINAL=1, BTYPE=11 (invalid) // Fake trailer: 0x00, 0x00, 0x00, 0x01 let input = [0x78, 0x9C, 0x07, 0x00, 0x00, 0x00, 0x00, 0x01]; let err = zlib_decompress(&input).unwrap_err(); assert!(matches!( err, ZlibError::Deflate(deflate::DeflateError::InvalidBlockType(3)) )); } // -- Various header configurations -- #[test] fn different_window_sizes() { // Test that different CINFO values (0-7) are accepted // We just need valid headers — use empty DEFLATE stream for cinfo in 0..=7u8 { let cmf = (cinfo << 4) | 8; // CM=8 (DEFLATE) let fcheck = (31 - ((cmf as u16 * 256) % 31)) % 31; let flg = fcheck as u8; // Empty DEFLATE: 0x03, 0x00 // Adler-32 of empty: 0x00, 0x00, 0x00, 0x01 let input = vec![cmf, flg, 0x03, 0x00, 0x00, 0x00, 0x00, 0x01]; let result = zlib_decompress(&input); assert!(result.is_ok(), "CINFO={cinfo} should be valid: {result:?}"); assert!(result.unwrap().is_empty()); } } // -- Adler-32 verification with known values -- #[test] fn adler32_all_zeros() { let data = vec![0u8; 100]; // a = 1 + 0*100 = 1, b = 100*1 = 100 assert_eq!(adler32(&data), (100 << 16) | 1); } #[test] fn adler32_single_ff() { // a = 1 + 255 = 256, b = 0 + 256 = 256 assert_eq!(adler32(&[0xFF]), (256 << 16) | 256); } }