//! DEFLATE decompressor (RFC 1951). //! //! Supports all three block types: //! - Non-compressed (BTYPE=00) //! - Fixed Huffman codes (BTYPE=01) //! - Dynamic Huffman codes (BTYPE=10) use std::fmt; // --------------------------------------------------------------------------- // Error type // --------------------------------------------------------------------------- /// Errors that can occur during DEFLATE decompression. #[derive(Debug, Clone, PartialEq, Eq)] pub enum DeflateError { /// Unexpected end of input data. UnexpectedEof, /// Invalid block type (BTYPE=11 is reserved). InvalidBlockType(u8), /// Non-compressed block length check failed (LEN != ~NLEN). LenMismatch { len: u16, nlen: u16 }, /// Invalid Huffman code encountered. InvalidCode, /// Invalid code length code in dynamic block header. InvalidCodeLengths, /// Back-reference distance exceeds available output. InvalidDistance { distance: usize, available: usize }, /// Huffman tree is over-subscribed or otherwise invalid. InvalidHuffmanTree, } impl fmt::Display for DeflateError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { Self::UnexpectedEof => write!(f, "unexpected end of input"), Self::InvalidBlockType(b) => write!(f, "invalid block type: {b}"), Self::LenMismatch { len, nlen } => { write!( f, "non-compressed block length mismatch: LEN={len}, NLEN={nlen}" ) } Self::InvalidCode => write!(f, "invalid Huffman code"), Self::InvalidCodeLengths => write!(f, "invalid code length codes"), Self::InvalidDistance { distance, available, } => { write!( f, "invalid back-reference: distance {distance} exceeds available {available}" ) } Self::InvalidHuffmanTree => write!(f, "invalid Huffman tree"), } } } pub type Result = std::result::Result; // --------------------------------------------------------------------------- // Bit reader // --------------------------------------------------------------------------- /// Reads bits from a byte slice, LSB-first (as required by DEFLATE). struct BitReader<'a> { data: &'a [u8], pos: usize, bit_buf: u32, bits_in_buf: u8, } impl<'a> BitReader<'a> { fn new(data: &'a [u8]) -> Self { Self { data, pos: 0, bit_buf: 0, bits_in_buf: 0, } } /// Ensure at least `count` bits are available in the buffer. fn fill(&mut self, count: u8) -> Result<()> { while self.bits_in_buf < count { if self.pos >= self.data.len() { return Err(DeflateError::UnexpectedEof); } self.bit_buf |= (self.data[self.pos] as u32) << self.bits_in_buf; self.pos += 1; self.bits_in_buf += 8; } Ok(()) } /// Fill the buffer with as many bits as possible, up to `count`. /// Does not error on EOF — just fills what's available. fn fill_available(&mut self, count: u8) { while self.bits_in_buf < count { if self.pos >= self.data.len() { return; } self.bit_buf |= (self.data[self.pos] as u32) << self.bits_in_buf; self.pos += 1; self.bits_in_buf += 8; } } /// Peek at the lowest `count` bits without consuming them. fn peek(&self, count: u8) -> u32 { self.bit_buf & ((1u32 << count) - 1) } /// Consume `count` bits from the buffer (must have been fill'd first). fn consume(&mut self, count: u8) { self.bit_buf >>= count; self.bits_in_buf -= count; } /// Read `count` bits (up to 25) as a u32, LSB first. fn read_bits(&mut self, count: u8) -> Result { debug_assert!(count <= 25); self.fill(count)?; let val = self.peek(count); self.consume(count); Ok(val) } /// Align to the next byte boundary by discarding remaining bits. fn align_to_byte(&mut self) { self.bit_buf = 0; self.bits_in_buf = 0; } /// Read a raw byte (after byte-alignment). fn read_byte(&mut self) -> Result { if self.pos >= self.data.len() { return Err(DeflateError::UnexpectedEof); } let b = self.data[self.pos]; self.pos += 1; Ok(b) } /// Read `n` raw bytes into a buffer (after byte-alignment). fn read_bytes(&mut self, buf: &mut [u8]) -> Result<()> { let n = buf.len(); if self.pos + n > self.data.len() { return Err(DeflateError::UnexpectedEof); } buf.copy_from_slice(&self.data[self.pos..self.pos + n]); self.pos += n; Ok(()) } } // --------------------------------------------------------------------------- // Huffman tree // --------------------------------------------------------------------------- /// Maximum number of bits in a Huffman code for DEFLATE. const MAX_BITS: usize = 15; /// A Huffman decoding table built from code lengths. /// /// Uses a two-level lookup: a primary table indexed by up to `primary_bits` /// bits, and secondary tables for longer codes. struct HuffmanTable { /// Lookup table entries. Each entry is either: /// - A leaf: (symbol << 4) | code_length (code_length > 0) /// - A subtable pointer: (subtable_offset << 4) | 0, where offset is into `entries` entries: Vec, primary_bits: u8, } impl HuffmanTable { /// Build a Huffman table from code lengths. /// /// `code_lengths[i]` is the bit length of symbol `i`. A length of 0 means /// the symbol does not appear. fn from_code_lengths(code_lengths: &[u8], primary_bits: u8) -> Result { let max_code_len = code_lengths.iter().copied().max().unwrap_or(0) as usize; if max_code_len > MAX_BITS { return Err(DeflateError::InvalidHuffmanTree); } // Count codes of each length let mut bl_count = [0u32; MAX_BITS + 1]; for &len in code_lengths { bl_count[len as usize] += 1; } bl_count[0] = 0; // Check for empty tree (all lengths are 0) let total_codes: u32 = bl_count[1..].iter().sum(); if total_codes == 0 { // Empty tree — return a table that always fails to decode let size = 1 << primary_bits; return Ok(Self { entries: vec![0; size], primary_bits, }); } // Compute next_code for each bit length (RFC 1951 §3.2.2) let mut next_code = [0u32; MAX_BITS + 1]; let mut code = 0u32; for bits in 1..=max_code_len { code = (code + bl_count[bits - 1]) << 1; next_code[bits] = code; } // Assign codes to symbols let mut codes = vec![0u32; code_lengths.len()]; let mut lengths = vec![0u8; code_lengths.len()]; for (sym, &len) in code_lengths.iter().enumerate() { if len > 0 { codes[sym] = next_code[len as usize]; lengths[sym] = len; next_code[len as usize] += 1; } } // Build the lookup table let primary_size = 1usize << primary_bits; let mut entries = vec![0u32; primary_size]; // For codes that fit in primary_bits, fill all matching entries for (sym, (&code_val, &code_len)) in codes.iter().zip(lengths.iter()).enumerate() { if code_len == 0 { continue; } if code_len <= primary_bits { // Reverse the code bits for LSB-first lookup let reversed = reverse_bits(code_val, code_len); let extra_bits = primary_bits - code_len; let base = reversed as usize; // Fill all entries where the extra bits vary for i in 0..(1usize << extra_bits) { let index = base | (i << code_len); entries[index] = ((sym as u32) << 4) | (code_len as u32); } } } // For codes longer than primary_bits, build secondary tables if max_code_len as u8 > primary_bits { // Group long codes by their primary_bits prefix type SubtableEntry = (u32, u8, u16); let mut subtables: Vec<(usize, Vec)> = Vec::new(); for (sym, (&code_val, &code_len)) in codes.iter().zip(lengths.iter()).enumerate() { if code_len == 0 || code_len <= primary_bits { continue; } let reversed = reverse_bits(code_val, code_len); let primary_index = (reversed as usize) & (primary_size - 1); let secondary_bits = reversed >> primary_bits; let secondary_len = code_len - primary_bits; // Find or create subtable for this primary index let pos = subtables.iter().position(|(idx, _)| *idx == primary_index); match pos { Some(p) => { subtables[p] .1 .push((secondary_bits, secondary_len, sym as u16)); } None => { subtables.push(( primary_index, vec![(secondary_bits, secondary_len, sym as u16)], )); } } } for (primary_index, symbols) in &subtables { let max_secondary = symbols.iter().map(|(_, l, _)| *l).max().unwrap_or(0); let subtable_size = 1usize << max_secondary; let subtable_offset = entries.len(); entries.resize(entries.len() + subtable_size, 0); for &(sec_bits, sec_len, sym) in symbols { let extra = max_secondary - sec_len; let base = sec_bits as usize; for i in 0..(1usize << extra) { let index = subtable_offset + base + (i << sec_len); let total_len = primary_bits + sec_len; entries[index] = ((sym as u32) << 4) | (total_len as u32); } } // Store subtable pointer in primary entry // Format: (subtable_offset << 4) | 0, with high bit indicating subtable // We use bit pattern: offset in upper bits, max_secondary in bits [1..4], flag=0 in bit 0 // Actually simpler: store (offset << 8) | (max_secondary << 4) with code_len = 0 entries[*primary_index] = ((subtable_offset as u32) << 8) | ((max_secondary as u32) << 4); } } Ok(Self { entries, primary_bits, }) } /// Decode one symbol from the bit stream. fn decode(&self, reader: &mut BitReader<'_>) -> Result { // Fill as many bits as possible up to primary_bits. // Near the end of stream we may have fewer bits than primary_bits, // but enough for the actual code (e.g., 7-bit end-of-block in a 9-bit table). reader.fill_available(self.primary_bits); if reader.bits_in_buf == 0 { return Err(DeflateError::UnexpectedEof); } // Peek only as many bits as we actually have (padded with zeros) let peek_bits = std::cmp::min(self.primary_bits, reader.bits_in_buf); let bits = reader.peek(peek_bits); // If we have fewer bits than primary_bits, the upper bits are implicitly 0, // which means only codes whose upper bits are 0 can match. This works for // end-of-stream where the last code is padded with zeros. let entry = self.entries[bits as usize]; let code_len = entry & 0xF; if code_len > 0 { if code_len as u8 > reader.bits_in_buf { return Err(DeflateError::UnexpectedEof); } // Direct hit — consume only the actual code length reader.consume(code_len as u8); return Ok((entry >> 4) as u16); } // Subtable lookup — consume primary_bits, then peek into subtable reader.consume(self.primary_bits); let subtable_offset = (entry >> 8) as usize; let max_secondary = ((entry >> 4) & 0xF) as u8; reader.fill(max_secondary)?; let sec_bits = reader.peek(max_secondary); let sub_entry = self.entries[subtable_offset + sec_bits as usize]; let sub_code_len = sub_entry & 0xF; if sub_code_len == 0 { return Err(DeflateError::InvalidCode); } let actual_secondary = sub_code_len as u8 - self.primary_bits; reader.consume(actual_secondary); Ok((sub_entry >> 4) as u16) } } /// Reverse the bottom `len` bits of `code`. fn reverse_bits(code: u32, len: u8) -> u32 { let mut result = 0u32; let mut c = code; for _ in 0..len { result = (result << 1) | (c & 1); c >>= 1; } result } // --------------------------------------------------------------------------- // Fixed Huffman tables (RFC 1951 §3.2.6) // --------------------------------------------------------------------------- fn fixed_literal_lengths() -> [u8; 288] { let mut lengths = [0u8; 288]; lengths[..=143].fill(8); lengths[144..=255].fill(9); lengths[256..=279].fill(7); lengths[280..=287].fill(8); lengths } fn fixed_distance_lengths() -> [u8; 32] { [5u8; 32] } // --------------------------------------------------------------------------- // Length and distance base values (RFC 1951 §3.2.5) // --------------------------------------------------------------------------- /// (base_length, extra_bits) for length codes 257..285 const LENGTH_TABLE: [(u16, u8); 29] = [ (3, 0), (4, 0), (5, 0), (6, 0), (7, 0), (8, 0), (9, 0), (10, 0), (11, 1), (13, 1), (15, 1), (17, 1), (19, 2), (23, 2), (27, 2), (31, 2), (35, 3), (43, 3), (51, 3), (59, 3), (67, 4), (83, 4), (99, 4), (115, 4), (131, 5), (163, 5), (195, 5), (227, 5), (258, 0), ]; /// (base_distance, extra_bits) for distance codes 0..29 const DISTANCE_TABLE: [(u16, u8); 30] = [ (1, 0), (2, 0), (3, 0), (4, 0), (5, 1), (7, 1), (9, 2), (13, 2), (17, 3), (25, 3), (33, 4), (49, 4), (65, 5), (97, 5), (129, 6), (193, 6), (257, 7), (385, 7), (513, 8), (769, 8), (1025, 9), (1537, 9), (2049, 10), (3073, 10), (4097, 11), (6145, 11), (8193, 12), (12289, 12), (16385, 13), (24577, 13), ]; /// Order in which code length code lengths appear (RFC 1951 §3.2.7). const CODE_LENGTH_ORDER: [usize; 19] = [ 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15, ]; // --------------------------------------------------------------------------- // Decompressor // --------------------------------------------------------------------------- /// Decompress a DEFLATE compressed data stream per RFC 1951. pub fn inflate(input: &[u8]) -> Result> { let mut reader = BitReader::new(input); let mut output = Vec::new(); loop { let bfinal = reader.read_bits(1)?; let btype = reader.read_bits(2)? as u8; match btype { 0 => decode_uncompressed(&mut reader, &mut output)?, 1 => decode_fixed(&mut reader, &mut output)?, 2 => decode_dynamic(&mut reader, &mut output)?, _ => return Err(DeflateError::InvalidBlockType(btype)), } if bfinal == 1 { break; } } Ok(output) } /// Decode a non-compressed block (BTYPE=00). fn decode_uncompressed(reader: &mut BitReader<'_>, output: &mut Vec) -> Result<()> { reader.align_to_byte(); let lo = reader.read_byte()? as u16; let hi = reader.read_byte()? as u16; let len = lo | (hi << 8); let nlo = reader.read_byte()? as u16; let nhi = reader.read_byte()? as u16; let nlen = nlo | (nhi << 8); if len != !nlen { return Err(DeflateError::LenMismatch { len, nlen }); } let mut buf = vec![0u8; len as usize]; reader.read_bytes(&mut buf)?; output.extend_from_slice(&buf); Ok(()) } /// Decode a block with fixed Huffman codes (BTYPE=01). fn decode_fixed(reader: &mut BitReader<'_>, output: &mut Vec) -> Result<()> { let lit_lengths = fixed_literal_lengths(); let dist_lengths = fixed_distance_lengths(); let lit_table = HuffmanTable::from_code_lengths(&lit_lengths, 9)?; let dist_table = HuffmanTable::from_code_lengths(&dist_lengths, 5)?; decode_compressed(reader, &lit_table, &dist_table, output) } /// Decode a block with dynamic Huffman codes (BTYPE=10). fn decode_dynamic(reader: &mut BitReader<'_>, output: &mut Vec) -> Result<()> { let hlit = reader.read_bits(5)? as usize + 257; let hdist = reader.read_bits(5)? as usize + 1; let hclen = reader.read_bits(4)? as usize + 4; // Read code length code lengths let mut cl_lengths = [0u8; 19]; for i in 0..hclen { cl_lengths[CODE_LENGTH_ORDER[i]] = reader.read_bits(3)? as u8; } let cl_table = HuffmanTable::from_code_lengths(&cl_lengths, 7)?; // Decode literal/length and distance code lengths let total = hlit + hdist; let mut combined_lengths = vec![0u8; total]; let mut i = 0; while i < total { let sym = cl_table.decode(reader)?; match sym { 0..=15 => { combined_lengths[i] = sym as u8; i += 1; } 16 => { // Repeat previous code length 3-6 times if i == 0 { return Err(DeflateError::InvalidCodeLengths); } let repeat = reader.read_bits(2)? as usize + 3; let prev = combined_lengths[i - 1]; for _ in 0..repeat { if i >= total { return Err(DeflateError::InvalidCodeLengths); } combined_lengths[i] = prev; i += 1; } } 17 => { // Repeat 0 for 3-10 times let repeat = reader.read_bits(3)? as usize + 3; for _ in 0..repeat { if i >= total { return Err(DeflateError::InvalidCodeLengths); } combined_lengths[i] = 0; i += 1; } } 18 => { // Repeat 0 for 11-138 times let repeat = reader.read_bits(7)? as usize + 11; for _ in 0..repeat { if i >= total { return Err(DeflateError::InvalidCodeLengths); } combined_lengths[i] = 0; i += 1; } } _ => return Err(DeflateError::InvalidCodeLengths), } } let lit_lengths = &combined_lengths[..hlit]; let dist_lengths = &combined_lengths[hlit..]; let lit_table = HuffmanTable::from_code_lengths(lit_lengths, 9)?; let dist_table = HuffmanTable::from_code_lengths(dist_lengths, 6)?; decode_compressed(reader, &lit_table, &dist_table, output) } /// Decode compressed data using literal/length and distance Huffman tables. fn decode_compressed( reader: &mut BitReader<'_>, lit_table: &HuffmanTable, dist_table: &HuffmanTable, output: &mut Vec, ) -> Result<()> { loop { let sym = lit_table.decode(reader)?; match sym { 0..=255 => { output.push(sym as u8); } 256 => { // End of block return Ok(()); } 257..=285 => { // Length/distance pair let length_index = (sym - 257) as usize; let (base_len, extra_bits) = LENGTH_TABLE[length_index]; let length = base_len as usize + if extra_bits > 0 { reader.read_bits(extra_bits)? as usize } else { 0 }; let dist_sym = dist_table.decode(reader)?; if dist_sym as usize >= DISTANCE_TABLE.len() { return Err(DeflateError::InvalidCode); } let (base_dist, dist_extra) = DISTANCE_TABLE[dist_sym as usize]; let distance = base_dist as usize + if dist_extra > 0 { reader.read_bits(dist_extra)? as usize } else { 0 }; if distance > output.len() { return Err(DeflateError::InvalidDistance { distance, available: output.len(), }); } // Copy from back-reference — byte-by-byte to handle overlapping let start = output.len() - distance; for i in 0..length { let b = output[start + (i % distance)]; output.push(b); } } _ => { return Err(DeflateError::InvalidCode); } } } } // --------------------------------------------------------------------------- // Tests // --------------------------------------------------------------------------- #[cfg(test)] mod tests { use super::*; // -- Error Display tests -- #[test] fn error_display() { assert_eq!( DeflateError::UnexpectedEof.to_string(), "unexpected end of input" ); assert_eq!( DeflateError::InvalidBlockType(3).to_string(), "invalid block type: 3" ); assert_eq!( DeflateError::LenMismatch { len: 5, nlen: 10 }.to_string(), "non-compressed block length mismatch: LEN=5, NLEN=10" ); assert_eq!( DeflateError::InvalidCode.to_string(), "invalid Huffman code" ); assert_eq!( DeflateError::InvalidCodeLengths.to_string(), "invalid code length codes" ); assert_eq!( DeflateError::InvalidDistance { distance: 100, available: 50 } .to_string(), "invalid back-reference: distance 100 exceeds available 50" ); assert_eq!( DeflateError::InvalidHuffmanTree.to_string(), "invalid Huffman tree" ); } // -- BitReader tests -- #[test] fn bit_reader_single_byte() { let data = [0b10110100u8]; let mut reader = BitReader::new(&data); // LSB first: bits are 0,0,1,0,1,1,0,1 assert_eq!(reader.read_bits(1).unwrap(), 0); assert_eq!(reader.read_bits(1).unwrap(), 0); assert_eq!(reader.read_bits(1).unwrap(), 1); assert_eq!(reader.read_bits(1).unwrap(), 0); assert_eq!(reader.read_bits(1).unwrap(), 1); assert_eq!(reader.read_bits(1).unwrap(), 1); assert_eq!(reader.read_bits(1).unwrap(), 0); assert_eq!(reader.read_bits(1).unwrap(), 1); } #[test] fn bit_reader_multi_bit() { let data = [0b11010011u8]; let mut reader = BitReader::new(&data); // Read 3 bits LSB first: bits 0,1,0 → reversed from MSB: 011 → value 3 assert_eq!(reader.read_bits(3).unwrap(), 0b011); // lowest 3 bits assert_eq!(reader.read_bits(5).unwrap(), 0b11010); // remaining 5 bits } #[test] fn bit_reader_cross_byte() { let data = [0xFF, 0x00]; let mut reader = BitReader::new(&data); assert_eq!(reader.read_bits(4).unwrap(), 0xF); assert_eq!(reader.read_bits(8).unwrap(), 0x0F); // 4 bits from first byte + 4 from second } #[test] fn bit_reader_eof() { let data = [0x00]; let mut reader = BitReader::new(&data); reader.read_bits(8).unwrap(); assert!(matches!( reader.read_bits(1), Err(DeflateError::UnexpectedEof) )); } #[test] fn bit_reader_align_and_read_bytes() { let data = [0xFF, 0xAB, 0xCD]; let mut reader = BitReader::new(&data); reader.read_bits(3).unwrap(); // read some bits reader.align_to_byte(); let mut buf = [0u8; 2]; reader.read_bytes(&mut buf).unwrap(); assert_eq!(buf, [0xAB, 0xCD]); } // -- reverse_bits tests -- #[test] fn reverse_bits_basic() { assert_eq!(reverse_bits(0b110, 3), 0b011); assert_eq!(reverse_bits(0b1010, 4), 0b0101); assert_eq!(reverse_bits(0b1, 1), 0b1); assert_eq!(reverse_bits(0b0, 1), 0b0); assert_eq!(reverse_bits(0b11001, 5), 0b10011); } // -- Huffman table tests -- #[test] fn huffman_fixed_literal_table() { // Verify fixed literal table can be constructed let lengths = fixed_literal_lengths(); let table = HuffmanTable::from_code_lengths(&lengths, 9).unwrap(); assert!(!table.entries.is_empty()); } #[test] fn huffman_fixed_distance_table() { let lengths = fixed_distance_lengths(); let table = HuffmanTable::from_code_lengths(&lengths, 5).unwrap(); assert!(!table.entries.is_empty()); } #[test] fn huffman_simple_decode() { // Build a simple tree: A=0, B=10, C=11 let lengths = [1u8, 2, 2]; let table = HuffmanTable::from_code_lengths(&lengths, 2).unwrap(); // A = code 0 (1 bit), B = code 10 (2 bits), C = code 11 (2 bits) // LSB first: A=0, B=01, C=11 // Encode: A, B, C, A = 0 | 01 | 11 | 0 = 0b0_11_01_0 in LSB order // In bytes: 0b01110100 (pad with zeros on MSB side if needed) // Wait, let me recalculate for LSB-first bit reader: // A: bit 0 → 0 // B: bits 1-2 → 01 (LSB first, so code 10 reversed = 01) // C: bits 3-4 → 11 (code 11 reversed = 11) // A: bit 5 → 0 // Byte: bits 76543210 = 00_0_11_01_0 = 0b00011010 = 0x1A let data = [0x1A]; let mut reader = BitReader::new(&data); assert_eq!(table.decode(&mut reader).unwrap(), 0); // A assert_eq!(table.decode(&mut reader).unwrap(), 1); // B assert_eq!(table.decode(&mut reader).unwrap(), 2); // C assert_eq!(table.decode(&mut reader).unwrap(), 0); // A } // -- Non-compressed block (BTYPE=00) tests -- #[test] fn inflate_uncompressed_block() { // BFINAL=1, BTYPE=00 (non-compressed) // Bits: 1 (final) 00 (type) → 0b001 = least significant 3 bits // Then aligned: LEN=5, NLEN=~5=0xFFFA let data_to_store = b"Hello"; let mut input = Vec::new(); // First byte: BFINAL=1, BTYPE=00 → bits 1,0,0 → byte 0x01 input.push(0x01); // LEN = 5, little-endian input.push(5); input.push(0); // NLEN = !5 = 0xFFFA, little-endian input.push(0xFA); input.push(0xFF); // Data input.extend_from_slice(data_to_store); let result = inflate(&input).unwrap(); assert_eq!(result, b"Hello"); } #[test] fn inflate_uncompressed_empty() { // Non-compressed block with length 0 let mut input = Vec::new(); input.push(0x01); // BFINAL=1, BTYPE=00 input.push(0); // LEN=0 input.push(0); input.push(0xFF); // NLEN=0xFFFF = !0 input.push(0xFF); let result = inflate(&input).unwrap(); assert!(result.is_empty()); } #[test] fn inflate_uncompressed_len_mismatch() { let mut input = Vec::new(); input.push(0x01); input.push(5); input.push(0); input.push(0x00); // Wrong NLEN input.push(0x00); assert!(matches!( inflate(&input), Err(DeflateError::LenMismatch { .. }) )); } // -- Multiple non-compressed blocks -- #[test] fn inflate_two_uncompressed_blocks() { let mut input = Vec::new(); // Block 1: BFINAL=0, BTYPE=00 input.push(0x00); input.push(3); input.push(0); input.push(0xFC); input.push(0xFF); input.extend_from_slice(b"Hel"); // Block 2: BFINAL=1, BTYPE=00 input.push(0x01); input.push(2); input.push(0); input.push(0xFD); input.push(0xFF); input.extend_from_slice(b"lo"); let result = inflate(&input).unwrap(); assert_eq!(result, b"Hello"); } // -- Fixed Huffman block tests -- #[test] fn inflate_fixed_huffman_literals_only() { // Use Rust's flate2-equivalent: manually construct a fixed Huffman block // that encodes just literal bytes and end-of-block. // // For simplicity, we test against known compressed output. // The easiest way is to compress with the non-compressed format // and verify, then test with known DEFLATE streams. // Use a known compressed representation of empty data: // Fixed block with just end-of-block (symbol 256) // Symbol 256 has code length 7, code = 0000000 in fixed Huffman // Actually, per fixed table: 256-279 have 7-bit codes starting at 0b0000000 // Code for 256: 7 bits, value 0b0000000 // Reversed for LSB-first: 0b0000000 // BFINAL=1, BTYPE=01: bits 1, 0, 1 → 0b101 = 5 // Then 7 bits of 0 for symbol 256 // Total: 3 + 7 = 10 bits → 2 bytes // Byte 0: bits 0-7: 1,0,1, 0,0,0,0,0 = 0b00000_101 = 0x05 // Byte 1: bits 8-9: 0,0 (remaining bits of the code) + padding = 0x00 let input = [0x03, 0x00]; // Known empty fixed block let result = inflate(&input).unwrap(); assert!(result.is_empty()); } #[test] fn inflate_fixed_huffman_known_data() { // This is the DEFLATE compressed form of "Hello" using fixed Huffman codes. // Generated by standard deflate: raw deflate of "Hello" // We can verify this by using known test vectors. // // Actually, let's build it manually: // BFINAL=1, BTYPE=01 (fixed) // Then encode 'H'(72), 'e'(101), 'l'(108), 'l'(108), 'o'(111), EOB(256) // // Fixed Huffman codes for literals 0-143: 8-bit codes 00110000..10111111 // Code for literal N (0-143): reversed(N + 0x30) in 8 bits // // Let's use a known deflate stream instead. The bytes below are the // raw DEFLATE encoding of "Hello" (verified against RFC 1951). let input = [0xf3, 0x48, 0xcd, 0xc9, 0xc9, 0x07, 0x00]; let result = inflate(&input).unwrap(); assert_eq!(result, b"Hello"); } // -- Invalid block type -- #[test] fn inflate_invalid_block_type() { // BFINAL=1, BTYPE=11 (reserved) // Bits: 1, 1, 1 → byte 0x07 let input = [0x07]; assert!(matches!( inflate(&input), Err(DeflateError::InvalidBlockType(3)) )); } // -- Empty input -- #[test] fn inflate_empty_input() { assert!(matches!(inflate(&[]), Err(DeflateError::UnexpectedEof))); } // -- Distance validation -- #[test] fn inflate_distance_table_coverage() { // Verify all distance table entries are valid for (i, &(base, extra)) in DISTANCE_TABLE.iter().enumerate() { assert!(base >= 1, "distance code {i} has base {base} < 1"); let max_dist = base as u32 + ((1u32 << extra) - 1); assert!( max_dist <= 32768, "distance code {i} max {max_dist} exceeds 32KB window" ); } } // -- Length table coverage -- #[test] fn inflate_length_table_coverage() { // Verify length table entries for (i, &(base, extra)) in LENGTH_TABLE.iter().enumerate() { assert!(base >= 3, "length code {i} has base {base} < 3"); if i < 28 { // Codes 257-284 have ranges let max_len = base as u32 + ((1u32 << extra) - 1); assert!(max_len <= 258, "length code {i} max {max_len} exceeds 258"); } } } // -- Back-reference (LZ77) tests via fixed Huffman -- #[test] fn inflate_with_back_reference() { // DEFLATE compressed "abcabcabc" — uses a back-reference // Raw DEFLATE for "abcabcabc" with fixed Huffman: let input = [0x4b, 0x4c, 0x4a, 0x4e, 0x04, 0x23, 0x00]; let result = inflate(&input).unwrap(); assert_eq!(result, b"abcabcabc"); } // -- Dynamic Huffman block tests -- #[test] fn inflate_dynamic_huffman() { // A known DEFLATE stream with dynamic Huffman codes. // This is the raw DEFLATE encoding of "AAAAAAAAAA" (10 A's) // which benefits from dynamic codes + back-references. let input = [0x73, 0x74, 0x84, 0x01, 0x00]; let result = inflate(&input).unwrap(); assert_eq!(result, b"AAAAAAAAAA"); } // -- Fixed Huffman table properties -- #[test] fn fixed_literal_lengths_correct() { let lengths = fixed_literal_lengths(); // 0-143: length 8 for i in 0..=143 { assert_eq!(lengths[i], 8, "literal {i} should have length 8"); } // 144-255: length 9 for i in 144..=255 { assert_eq!(lengths[i], 9, "literal {i} should have length 9"); } // 256-279: length 7 for i in 256..=279 { assert_eq!(lengths[i], 7, "literal {i} should have length 7"); } // 280-287: length 8 for i in 280..=287 { assert_eq!(lengths[i], 8, "literal {i} should have length 8"); } } #[test] fn fixed_distance_lengths_correct() { let lengths = fixed_distance_lengths(); for (i, &len) in lengths.iter().enumerate() { assert_eq!(len, 5, "distance {i} should have length 5"); } } // -- Round-trip test using non-compressed then verifying inflate -- #[test] fn inflate_uncompressed_large_block() { // Test with a larger block (256 bytes) let payload: Vec = (0..=255).collect(); let mut input = Vec::new(); input.push(0x01); // BFINAL=1, BTYPE=00 let len = payload.len() as u16; input.push(len as u8); input.push((len >> 8) as u8); let nlen = !len; input.push(nlen as u8); input.push((nlen >> 8) as u8); input.extend_from_slice(&payload); let result = inflate(&input).unwrap(); assert_eq!(result, payload); } // -- Code length order -- #[test] fn code_length_order_correct() { // Per RFC 1951 §3.2.7 assert_eq!( CODE_LENGTH_ORDER, [16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15] ); } // -- Overlapping back-reference -- #[test] fn inflate_overlapping_backref() { // Test that overlapping back-references work correctly. // e.g., distance=1, length=5 starting from "A" should produce "AAAAA" // This is tested indirectly through the DEFLATE stream, // but let's verify the copy logic directly. let mut output = vec![b'A']; let distance = 1usize; let length = 5usize; let start = output.len() - distance; for i in 0..length { let b = output[start + (i % distance)]; output.push(b); } assert_eq!(output, b"AAAAAA"); } // -- Comprehensive round-trip tests with zlib-generated vectors -- #[test] fn inflate_single_byte() { let compressed = [0x4b, 0x04, 0x00]; let result = inflate(&compressed).unwrap(); assert_eq!(result, b"a"); } #[test] fn inflate_repeated_char_1000() { // 1000 'a' characters — heavy LZ77 back-referencing let compressed = [ 0x4b, 0x4c, 0x1c, 0x05, 0xa3, 0x60, 0x14, 0x0c, 0x77, 0x00, 0x00, ]; let result = inflate(&compressed).unwrap(); assert_eq!(result.len(), 1000); assert!(result.iter().all(|&b| b == b'a')); } #[test] fn inflate_pangram() { let compressed = [ 0x0b, 0xc9, 0x48, 0x55, 0x28, 0x2c, 0xcd, 0x4c, 0xce, 0x56, 0x48, 0x2a, 0xca, 0x2f, 0xcf, 0x53, 0x48, 0xcb, 0xaf, 0x50, 0xc8, 0x2a, 0xcd, 0x2d, 0x28, 0x56, 0xc8, 0x2f, 0x4b, 0x2d, 0x52, 0x28, 0x01, 0x4a, 0xe7, 0x24, 0x56, 0x55, 0x2a, 0xa4, 0xe4, 0xa7, 0x03, 0x00, ]; let result = inflate(&compressed).unwrap(); assert_eq!(result, b"The quick brown fox jumps over the lazy dog"); } #[test] fn inflate_alphabet_repeated() { // "abcdefghijklmnopqrstuvwxyz" * 10 = 260 bytes let compressed = [ 0x4b, 0x4c, 0x4a, 0x4e, 0x49, 0x4d, 0x4b, 0xcf, 0xc8, 0xcc, 0xca, 0xce, 0xc9, 0xcd, 0xcb, 0x2f, 0x28, 0x2c, 0x2a, 0x2e, 0x29, 0x2d, 0x2b, 0xaf, 0xa8, 0xac, 0x4a, 0x1c, 0x31, 0x32, 0x00, ]; let result = inflate(&compressed).unwrap(); let expected: Vec = b"abcdefghijklmnopqrstuvwxyz".repeat(10); assert_eq!(result, expected); } // -- All byte values via non-compressed block -- #[test] fn inflate_all_byte_values() { // Non-compressed block containing all 256 byte values let payload: Vec = (0..=255).collect(); // zlib at level 6 uses non-compressed for this data let compressed = [ 0x01, 0x00, 0x01, 0xff, 0xfe, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, ]; let result = inflate(&compressed).unwrap(); assert_eq!(result, payload); } // -- Maximum back-reference distance (32KB window) -- #[test] fn inflate_max_window_uncompressed() { // Test near the 32KB window limit with non-compressed blocks let payload = vec![0xAB; 32768]; // Build as a series of non-compressed blocks (max 65535 bytes each) let mut input = Vec::new(); input.push(0x01); // BFINAL=1, BTYPE=00 let len = 32768u16; // 32768 = 0x8000 which fits in u16 input.push(len as u8); input.push((len >> 8) as u8); let nlen = !len; input.push(nlen as u8); input.push((nlen >> 8) as u8); input.extend_from_slice(&payload); let result = inflate(&input).unwrap(); assert_eq!(result.len(), 32768); assert!(result.iter().all(|&b| b == 0xAB)); } }