crates/image/src/deflate.rs at utf-codecs

pierrelf.com / we
fork atom
we (web engine): Experimental web browser project to understand the limits of Claude
fork atom
we / crates / image / src / deflate.rs
at utf-codecs 1199 lines 41 kB view raw
wrap content
pierrelf.com Implement DEFLATE decompressor (RFC 1951) 15d ago
30fd849a
   1//! DEFLATE decompressor (RFC 1951).
   2//!
   3//! Supports all three block types:
   4//! - Non-compressed (BTYPE=00)
   5//! - Fixed Huffman codes (BTYPE=01)
   6//! - Dynamic Huffman codes (BTYPE=10)
   7
   8use std::fmt;
   9
  10// ---------------------------------------------------------------------------
  11// Error type
  12// ---------------------------------------------------------------------------
  13
  14/// Errors that can occur during DEFLATE decompression.
  15#[derive(Debug, Clone, PartialEq, Eq)]
  16pub enum DeflateError {
  17    /// Unexpected end of input data.
  18    UnexpectedEof,
  19    /// Invalid block type (BTYPE=11 is reserved).
  20    InvalidBlockType(u8),
  21    /// Non-compressed block length check failed (LEN != ~NLEN).
  22    LenMismatch { len: u16, nlen: u16 },
  23    /// Invalid Huffman code encountered.
  24    InvalidCode,
  25    /// Invalid code length code in dynamic block header.
  26    InvalidCodeLengths,
  27    /// Back-reference distance exceeds available output.
  28    InvalidDistance { distance: usize, available: usize },
  29    /// Huffman tree is over-subscribed or otherwise invalid.
  30    InvalidHuffmanTree,
  31}
  32
  33impl fmt::Display for DeflateError {
  34    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
  35        match self {
  36            Self::UnexpectedEof => write!(f, "unexpected end of input"),
  37            Self::InvalidBlockType(b) => write!(f, "invalid block type: {b}"),
  38            Self::LenMismatch { len, nlen } => {
  39                write!(
  40                    f,
  41                    "non-compressed block length mismatch: LEN={len}, NLEN={nlen}"
  42                )
  43            }
  44            Self::InvalidCode => write!(f, "invalid Huffman code"),
  45            Self::InvalidCodeLengths => write!(f, "invalid code length codes"),
  46            Self::InvalidDistance {
  47                distance,
  48                available,
  49            } => {
  50                write!(
  51                    f,
  52                    "invalid back-reference: distance {distance} exceeds available {available}"
  53                )
  54            }
  55            Self::InvalidHuffmanTree => write!(f, "invalid Huffman tree"),
  56        }
  57    }
  58}
  59
  60pub type Result<T> = std::result::Result<T, DeflateError>;
  61
  62// ---------------------------------------------------------------------------
  63// Bit reader
  64// ---------------------------------------------------------------------------
  65
  66/// Reads bits from a byte slice, LSB-first (as required by DEFLATE).
  67struct BitReader<'a> {
  68    data: &'a [u8],
  69    pos: usize,
  70    bit_buf: u32,
  71    bits_in_buf: u8,
  72}
  73
  74impl<'a> BitReader<'a> {
  75    fn new(data: &'a [u8]) -> Self {
  76        Self {
  77            data,
  78            pos: 0,
  79            bit_buf: 0,
  80            bits_in_buf: 0,
  81        }
  82    }
  83
  84    /// Ensure at least `count` bits are available in the buffer.
  85    fn fill(&mut self, count: u8) -> Result<()> {
  86        while self.bits_in_buf < count {
  87            if self.pos >= self.data.len() {
  88                return Err(DeflateError::UnexpectedEof);
  89            }
  90            self.bit_buf |= (self.data[self.pos] as u32) << self.bits_in_buf;
  91            self.pos += 1;
  92            self.bits_in_buf += 8;
  93        }
  94        Ok(())
  95    }
  96
  97    /// Fill the buffer with as many bits as possible, up to `count`.
  98    /// Does not error on EOF — just fills what's available.
  99    fn fill_available(&mut self, count: u8) {
 100        while self.bits_in_buf < count {
 101            if self.pos >= self.data.len() {
 102                return;
 103            }
 104            self.bit_buf |= (self.data[self.pos] as u32) << self.bits_in_buf;
 105            self.pos += 1;
 106            self.bits_in_buf += 8;
 107        }
 108    }
 109
 110    /// Peek at the lowest `count` bits without consuming them.
 111    fn peek(&self, count: u8) -> u32 {
 112        self.bit_buf & ((1u32 << count) - 1)
 113    }
 114
 115    /// Consume `count` bits from the buffer (must have been fill'd first).
 116    fn consume(&mut self, count: u8) {
 117        self.bit_buf >>= count;
 118        self.bits_in_buf -= count;
 119    }
 120
 121    /// Read `count` bits (up to 25) as a u32, LSB first.
 122    fn read_bits(&mut self, count: u8) -> Result<u32> {
 123        debug_assert!(count <= 25);
 124        self.fill(count)?;
 125        let val = self.peek(count);
 126        self.consume(count);
 127        Ok(val)
 128    }
 129
 130    /// Align to the next byte boundary by discarding remaining bits.
 131    fn align_to_byte(&mut self) {
 132        self.bit_buf = 0;
 133        self.bits_in_buf = 0;
 134    }
 135
 136    /// Read a raw byte (after byte-alignment).
 137    fn read_byte(&mut self) -> Result<u8> {
 138        if self.pos >= self.data.len() {
 139            return Err(DeflateError::UnexpectedEof);
 140        }
 141        let b = self.data[self.pos];
 142        self.pos += 1;
 143        Ok(b)
 144    }
 145
 146    /// Read `n` raw bytes into a buffer (after byte-alignment).
 147    fn read_bytes(&mut self, buf: &mut [u8]) -> Result<()> {
 148        let n = buf.len();
 149        if self.pos + n > self.data.len() {
 150            return Err(DeflateError::UnexpectedEof);
 151        }
 152        buf.copy_from_slice(&self.data[self.pos..self.pos + n]);
 153        self.pos += n;
 154        Ok(())
 155    }
 156}
 157
 158// ---------------------------------------------------------------------------
 159// Huffman tree
 160// ---------------------------------------------------------------------------
 161
 162/// Maximum number of bits in a Huffman code for DEFLATE.
 163const MAX_BITS: usize = 15;
 164
 165/// A Huffman decoding table built from code lengths.
 166///
 167/// Uses a two-level lookup: a primary table indexed by up to `primary_bits`
 168/// bits, and secondary tables for longer codes.
 169struct HuffmanTable {
 170    /// Lookup table entries. Each entry is either:
 171    /// - A leaf: (symbol << 4) | code_length (code_length > 0)
 172    /// - A subtable pointer: (subtable_offset << 4) | 0, where offset is into `entries`
 173    entries: Vec<u32>,
 174    primary_bits: u8,
 175}
 176
 177impl HuffmanTable {
 178    /// Build a Huffman table from code lengths.
 179    ///
 180    /// `code_lengths[i]` is the bit length of symbol `i`. A length of 0 means
 181    /// the symbol does not appear.
 182    fn from_code_lengths(code_lengths: &[u8], primary_bits: u8) -> Result<Self> {
 183        let max_code_len = code_lengths.iter().copied().max().unwrap_or(0) as usize;
 184
 185        if max_code_len > MAX_BITS {
 186            return Err(DeflateError::InvalidHuffmanTree);
 187        }
 188
 189        // Count codes of each length
 190        let mut bl_count = [0u32; MAX_BITS + 1];
 191        for &len in code_lengths {
 192            bl_count[len as usize] += 1;
 193        }
 194        bl_count[0] = 0;
 195
 196        // Check for empty tree (all lengths are 0)
 197        let total_codes: u32 = bl_count[1..].iter().sum();
 198        if total_codes == 0 {
 199            // Empty tree — return a table that always fails to decode
 200            let size = 1 << primary_bits;
 201            return Ok(Self {
 202                entries: vec![0; size],
 203                primary_bits,
 204            });
 205        }
 206
 207        // Compute next_code for each bit length (RFC 1951 §3.2.2)
 208        let mut next_code = [0u32; MAX_BITS + 1];
 209        let mut code = 0u32;
 210        for bits in 1..=max_code_len {
 211            code = (code + bl_count[bits - 1]) << 1;
 212            next_code[bits] = code;
 213        }
 214
 215        // Assign codes to symbols
 216        let mut codes = vec![0u32; code_lengths.len()];
 217        let mut lengths = vec![0u8; code_lengths.len()];
 218        for (sym, &len) in code_lengths.iter().enumerate() {
 219            if len > 0 {
 220                codes[sym] = next_code[len as usize];
 221                lengths[sym] = len;
 222                next_code[len as usize] += 1;
 223            }
 224        }
 225
 226        // Build the lookup table
 227        let primary_size = 1usize << primary_bits;
 228        let mut entries = vec![0u32; primary_size];
 229
 230        // For codes that fit in primary_bits, fill all matching entries
 231        for (sym, (&code_val, &code_len)) in codes.iter().zip(lengths.iter()).enumerate() {
 232            if code_len == 0 {
 233                continue;
 234            }
 235            if code_len <= primary_bits {
 236                // Reverse the code bits for LSB-first lookup
 237                let reversed = reverse_bits(code_val, code_len);
 238                let extra_bits = primary_bits - code_len;
 239                let base = reversed as usize;
 240                // Fill all entries where the extra bits vary
 241                for i in 0..(1usize << extra_bits) {
 242                    let index = base | (i << code_len);
 243                    entries[index] = ((sym as u32) << 4) | (code_len as u32);
 244                }
 245            }
 246        }
 247
 248        // For codes longer than primary_bits, build secondary tables
 249        if max_code_len as u8 > primary_bits {
 250            // Group long codes by their primary_bits prefix
 251            type SubtableEntry = (u32, u8, u16);
 252            let mut subtables: Vec<(usize, Vec<SubtableEntry>)> = Vec::new();
 253
 254            for (sym, (&code_val, &code_len)) in codes.iter().zip(lengths.iter()).enumerate() {
 255                if code_len == 0 || code_len <= primary_bits {
 256                    continue;
 257                }
 258                let reversed = reverse_bits(code_val, code_len);
 259                let primary_index = (reversed as usize) & (primary_size - 1);
 260                let secondary_bits = reversed >> primary_bits;
 261                let secondary_len = code_len - primary_bits;
 262
 263                // Find or create subtable for this primary index
 264                let pos = subtables.iter().position(|(idx, _)| *idx == primary_index);
 265                match pos {
 266                    Some(p) => {
 267                        subtables[p]
 268                            .1
 269                            .push((secondary_bits, secondary_len, sym as u16));
 270                    }
 271                    None => {
 272                        subtables.push((
 273                            primary_index,
 274                            vec![(secondary_bits, secondary_len, sym as u16)],
 275                        ));
 276                    }
 277                }
 278            }
 279
 280            for (primary_index, symbols) in &subtables {
 281                let max_secondary = symbols.iter().map(|(_, l, _)| *l).max().unwrap_or(0);
 282                let subtable_size = 1usize << max_secondary;
 283                let subtable_offset = entries.len();
 284
 285                entries.resize(entries.len() + subtable_size, 0);
 286
 287                for &(sec_bits, sec_len, sym) in symbols {
 288                    let extra = max_secondary - sec_len;
 289                    let base = sec_bits as usize;
 290                    for i in 0..(1usize << extra) {
 291                        let index = subtable_offset + base + (i << sec_len);
 292                        let total_len = primary_bits + sec_len;
 293                        entries[index] = ((sym as u32) << 4) | (total_len as u32);
 294                    }
 295                }
 296
 297                // Store subtable pointer in primary entry
 298                // Format: (subtable_offset << 4) | 0, with high bit indicating subtable
 299                // We use bit pattern: offset in upper bits, max_secondary in bits [1..4], flag=0 in bit 0
 300                // Actually simpler: store (offset << 8) | (max_secondary << 4) with code_len = 0
 301                entries[*primary_index] =
 302                    ((subtable_offset as u32) << 8) | ((max_secondary as u32) << 4);
 303            }
 304        }
 305
 306        Ok(Self {
 307            entries,
 308            primary_bits,
 309        })
 310    }
 311
 312    /// Decode one symbol from the bit stream.
 313    fn decode(&self, reader: &mut BitReader<'_>) -> Result<u16> {
 314        // Fill as many bits as possible up to primary_bits.
 315        // Near the end of stream we may have fewer bits than primary_bits,
 316        // but enough for the actual code (e.g., 7-bit end-of-block in a 9-bit table).
 317        reader.fill_available(self.primary_bits);
 318        if reader.bits_in_buf == 0 {
 319            return Err(DeflateError::UnexpectedEof);
 320        }
 321        // Peek only as many bits as we actually have (padded with zeros)
 322        let peek_bits = std::cmp::min(self.primary_bits, reader.bits_in_buf);
 323        let bits = reader.peek(peek_bits);
 324        // If we have fewer bits than primary_bits, the upper bits are implicitly 0,
 325        // which means only codes whose upper bits are 0 can match. This works for
 326        // end-of-stream where the last code is padded with zeros.
 327        let entry = self.entries[bits as usize];
 328
 329        let code_len = entry & 0xF;
 330        if code_len > 0 {
 331            if code_len as u8 > reader.bits_in_buf {
 332                return Err(DeflateError::UnexpectedEof);
 333            }
 334            // Direct hit — consume only the actual code length
 335            reader.consume(code_len as u8);
 336            return Ok((entry >> 4) as u16);
 337        }
 338
 339        // Subtable lookup — consume primary_bits, then peek into subtable
 340        reader.consume(self.primary_bits);
 341        let subtable_offset = (entry >> 8) as usize;
 342        let max_secondary = ((entry >> 4) & 0xF) as u8;
 343
 344        reader.fill(max_secondary)?;
 345        let sec_bits = reader.peek(max_secondary);
 346        let sub_entry = self.entries[subtable_offset + sec_bits as usize];
 347        let sub_code_len = sub_entry & 0xF;
 348
 349        if sub_code_len == 0 {
 350            return Err(DeflateError::InvalidCode);
 351        }
 352
 353        let actual_secondary = sub_code_len as u8 - self.primary_bits;
 354        reader.consume(actual_secondary);
 355
 356        Ok((sub_entry >> 4) as u16)
 357    }
 358}
 359
 360/// Reverse the bottom `len` bits of `code`.
 361fn reverse_bits(code: u32, len: u8) -> u32 {
 362    let mut result = 0u32;
 363    let mut c = code;
 364    for _ in 0..len {
 365        result = (result << 1) | (c & 1);
 366        c >>= 1;
 367    }
 368    result
 369}
 370
 371// ---------------------------------------------------------------------------
 372// Fixed Huffman tables (RFC 1951 §3.2.6)
 373// ---------------------------------------------------------------------------
 374
 375fn fixed_literal_lengths() -> [u8; 288] {
 376    let mut lengths = [0u8; 288];
 377    lengths[..=143].fill(8);
 378    lengths[144..=255].fill(9);
 379    lengths[256..=279].fill(7);
 380    lengths[280..=287].fill(8);
 381    lengths
 382}
 383
 384fn fixed_distance_lengths() -> [u8; 32] {
 385    [5u8; 32]
 386}
 387
 388// ---------------------------------------------------------------------------
 389// Length and distance base values (RFC 1951 §3.2.5)
 390// ---------------------------------------------------------------------------
 391
 392/// (base_length, extra_bits) for length codes 257..285
 393const LENGTH_TABLE: [(u16, u8); 29] = [
 394    (3, 0),
 395    (4, 0),
 396    (5, 0),
 397    (6, 0),
 398    (7, 0),
 399    (8, 0),
 400    (9, 0),
 401    (10, 0),
 402    (11, 1),
 403    (13, 1),
 404    (15, 1),
 405    (17, 1),
 406    (19, 2),
 407    (23, 2),
 408    (27, 2),
 409    (31, 2),
 410    (35, 3),
 411    (43, 3),
 412    (51, 3),
 413    (59, 3),
 414    (67, 4),
 415    (83, 4),
 416    (99, 4),
 417    (115, 4),
 418    (131, 5),
 419    (163, 5),
 420    (195, 5),
 421    (227, 5),
 422    (258, 0),
 423];
 424
 425/// (base_distance, extra_bits) for distance codes 0..29
 426const DISTANCE_TABLE: [(u16, u8); 30] = [
 427    (1, 0),
 428    (2, 0),
 429    (3, 0),
 430    (4, 0),
 431    (5, 1),
 432    (7, 1),
 433    (9, 2),
 434    (13, 2),
 435    (17, 3),
 436    (25, 3),
 437    (33, 4),
 438    (49, 4),
 439    (65, 5),
 440    (97, 5),
 441    (129, 6),
 442    (193, 6),
 443    (257, 7),
 444    (385, 7),
 445    (513, 8),
 446    (769, 8),
 447    (1025, 9),
 448    (1537, 9),
 449    (2049, 10),
 450    (3073, 10),
 451    (4097, 11),
 452    (6145, 11),
 453    (8193, 12),
 454    (12289, 12),
 455    (16385, 13),
 456    (24577, 13),
 457];
 458
 459/// Order in which code length code lengths appear (RFC 1951 §3.2.7).
 460const CODE_LENGTH_ORDER: [usize; 19] = [
 461    16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15,
 462];
 463
 464// ---------------------------------------------------------------------------
 465// Decompressor
 466// ---------------------------------------------------------------------------
 467
 468/// Decompress a DEFLATE compressed data stream per RFC 1951.
 469pub fn inflate(input: &[u8]) -> Result<Vec<u8>> {
 470    let mut reader = BitReader::new(input);
 471    let mut output = Vec::new();
 472
 473    loop {
 474        let bfinal = reader.read_bits(1)?;
 475        let btype = reader.read_bits(2)? as u8;
 476
 477        match btype {
 478            0 => decode_uncompressed(&mut reader, &mut output)?,
 479            1 => decode_fixed(&mut reader, &mut output)?,
 480            2 => decode_dynamic(&mut reader, &mut output)?,
 481            _ => return Err(DeflateError::InvalidBlockType(btype)),
 482        }
 483
 484        if bfinal == 1 {
 485            break;
 486        }
 487    }
 488
 489    Ok(output)
 490}
 491
 492/// Decode a non-compressed block (BTYPE=00).
 493fn decode_uncompressed(reader: &mut BitReader<'_>, output: &mut Vec<u8>) -> Result<()> {
 494    reader.align_to_byte();
 495
 496    let lo = reader.read_byte()? as u16;
 497    let hi = reader.read_byte()? as u16;
 498    let len = lo | (hi << 8);
 499
 500    let nlo = reader.read_byte()? as u16;
 501    let nhi = reader.read_byte()? as u16;
 502    let nlen = nlo | (nhi << 8);
 503
 504    if len != !nlen {
 505        return Err(DeflateError::LenMismatch { len, nlen });
 506    }
 507
 508    let mut buf = vec![0u8; len as usize];
 509    reader.read_bytes(&mut buf)?;
 510    output.extend_from_slice(&buf);
 511
 512    Ok(())
 513}
 514
 515/// Decode a block with fixed Huffman codes (BTYPE=01).
 516fn decode_fixed(reader: &mut BitReader<'_>, output: &mut Vec<u8>) -> Result<()> {
 517    let lit_lengths = fixed_literal_lengths();
 518    let dist_lengths = fixed_distance_lengths();
 519
 520    let lit_table = HuffmanTable::from_code_lengths(&lit_lengths, 9)?;
 521    let dist_table = HuffmanTable::from_code_lengths(&dist_lengths, 5)?;
 522
 523    decode_compressed(reader, &lit_table, &dist_table, output)
 524}
 525
 526/// Decode a block with dynamic Huffman codes (BTYPE=10).
 527fn decode_dynamic(reader: &mut BitReader<'_>, output: &mut Vec<u8>) -> Result<()> {
 528    let hlit = reader.read_bits(5)? as usize + 257;
 529    let hdist = reader.read_bits(5)? as usize + 1;
 530    let hclen = reader.read_bits(4)? as usize + 4;
 531
 532    // Read code length code lengths
 533    let mut cl_lengths = [0u8; 19];
 534    for i in 0..hclen {
 535        cl_lengths[CODE_LENGTH_ORDER[i]] = reader.read_bits(3)? as u8;
 536    }
 537
 538    let cl_table = HuffmanTable::from_code_lengths(&cl_lengths, 7)?;
 539
 540    // Decode literal/length and distance code lengths
 541    let total = hlit + hdist;
 542    let mut combined_lengths = vec![0u8; total];
 543    let mut i = 0;
 544
 545    while i < total {
 546        let sym = cl_table.decode(reader)?;
 547
 548        match sym {
 549            0..=15 => {
 550                combined_lengths[i] = sym as u8;
 551                i += 1;
 552            }
 553            16 => {
 554                // Repeat previous code length 3-6 times
 555                if i == 0 {
 556                    return Err(DeflateError::InvalidCodeLengths);
 557                }
 558                let repeat = reader.read_bits(2)? as usize + 3;
 559                let prev = combined_lengths[i - 1];
 560                for _ in 0..repeat {
 561                    if i >= total {
 562                        return Err(DeflateError::InvalidCodeLengths);
 563                    }
 564                    combined_lengths[i] = prev;
 565                    i += 1;
 566                }
 567            }
 568            17 => {
 569                // Repeat 0 for 3-10 times
 570                let repeat = reader.read_bits(3)? as usize + 3;
 571                for _ in 0..repeat {
 572                    if i >= total {
 573                        return Err(DeflateError::InvalidCodeLengths);
 574                    }
 575                    combined_lengths[i] = 0;
 576                    i += 1;
 577                }
 578            }
 579            18 => {
 580                // Repeat 0 for 11-138 times
 581                let repeat = reader.read_bits(7)? as usize + 11;
 582                for _ in 0..repeat {
 583                    if i >= total {
 584                        return Err(DeflateError::InvalidCodeLengths);
 585                    }
 586                    combined_lengths[i] = 0;
 587                    i += 1;
 588                }
 589            }
 590            _ => return Err(DeflateError::InvalidCodeLengths),
 591        }
 592    }
 593
 594    let lit_lengths = &combined_lengths[..hlit];
 595    let dist_lengths = &combined_lengths[hlit..];
 596
 597    let lit_table = HuffmanTable::from_code_lengths(lit_lengths, 9)?;
 598    let dist_table = HuffmanTable::from_code_lengths(dist_lengths, 6)?;
 599
 600    decode_compressed(reader, &lit_table, &dist_table, output)
 601}
 602
 603/// Decode compressed data using literal/length and distance Huffman tables.
 604fn decode_compressed(
 605    reader: &mut BitReader<'_>,
 606    lit_table: &HuffmanTable,
 607    dist_table: &HuffmanTable,
 608    output: &mut Vec<u8>,
 609) -> Result<()> {
 610    loop {
 611        let sym = lit_table.decode(reader)?;
 612
 613        match sym {
 614            0..=255 => {
 615                output.push(sym as u8);
 616            }
 617            256 => {
 618                // End of block
 619                return Ok(());
 620            }
 621            257..=285 => {
 622                // Length/distance pair
 623                let length_index = (sym - 257) as usize;
 624                let (base_len, extra_bits) = LENGTH_TABLE[length_index];
 625                let length = base_len as usize
 626                    + if extra_bits > 0 {
 627                        reader.read_bits(extra_bits)? as usize
 628                    } else {
 629                        0
 630                    };
 631
 632                let dist_sym = dist_table.decode(reader)?;
 633                if dist_sym as usize >= DISTANCE_TABLE.len() {
 634                    return Err(DeflateError::InvalidCode);
 635                }
 636                let (base_dist, dist_extra) = DISTANCE_TABLE[dist_sym as usize];
 637                let distance = base_dist as usize
 638                    + if dist_extra > 0 {
 639                        reader.read_bits(dist_extra)? as usize
 640                    } else {
 641                        0
 642                    };
 643
 644                if distance > output.len() {
 645                    return Err(DeflateError::InvalidDistance {
 646                        distance,
 647                        available: output.len(),
 648                    });
 649                }
 650
 651                // Copy from back-reference — byte-by-byte to handle overlapping
 652                let start = output.len() - distance;
 653                for i in 0..length {
 654                    let b = output[start + (i % distance)];
 655                    output.push(b);
 656                }
 657            }
 658            _ => {
 659                return Err(DeflateError::InvalidCode);
 660            }
 661        }
 662    }
 663}
 664
 665// ---------------------------------------------------------------------------
 666// Tests
 667// ---------------------------------------------------------------------------
 668
 669#[cfg(test)]
 670mod tests {
 671    use super::*;
 672
 673    // -- Error Display tests --
 674
 675    #[test]
 676    fn error_display() {
 677        assert_eq!(
 678            DeflateError::UnexpectedEof.to_string(),
 679            "unexpected end of input"
 680        );
 681        assert_eq!(
 682            DeflateError::InvalidBlockType(3).to_string(),
 683            "invalid block type: 3"
 684        );
 685        assert_eq!(
 686            DeflateError::LenMismatch { len: 5, nlen: 10 }.to_string(),
 687            "non-compressed block length mismatch: LEN=5, NLEN=10"
 688        );
 689        assert_eq!(
 690            DeflateError::InvalidCode.to_string(),
 691            "invalid Huffman code"
 692        );
 693        assert_eq!(
 694            DeflateError::InvalidCodeLengths.to_string(),
 695            "invalid code length codes"
 696        );
 697        assert_eq!(
 698            DeflateError::InvalidDistance {
 699                distance: 100,
 700                available: 50
 701            }
 702            .to_string(),
 703            "invalid back-reference: distance 100 exceeds available 50"
 704        );
 705        assert_eq!(
 706            DeflateError::InvalidHuffmanTree.to_string(),
 707            "invalid Huffman tree"
 708        );
 709    }
 710
 711    // -- BitReader tests --
 712
 713    #[test]
 714    fn bit_reader_single_byte() {
 715        let data = [0b10110100u8];
 716        let mut reader = BitReader::new(&data);
 717        // LSB first: bits are 0,0,1,0,1,1,0,1
 718        assert_eq!(reader.read_bits(1).unwrap(), 0);
 719        assert_eq!(reader.read_bits(1).unwrap(), 0);
 720        assert_eq!(reader.read_bits(1).unwrap(), 1);
 721        assert_eq!(reader.read_bits(1).unwrap(), 0);
 722        assert_eq!(reader.read_bits(1).unwrap(), 1);
 723        assert_eq!(reader.read_bits(1).unwrap(), 1);
 724        assert_eq!(reader.read_bits(1).unwrap(), 0);
 725        assert_eq!(reader.read_bits(1).unwrap(), 1);
 726    }
 727
 728    #[test]
 729    fn bit_reader_multi_bit() {
 730        let data = [0b11010011u8];
 731        let mut reader = BitReader::new(&data);
 732        // Read 3 bits LSB first: bits 0,1,0 → reversed from MSB: 011 → value 3
 733        assert_eq!(reader.read_bits(3).unwrap(), 0b011); // lowest 3 bits
 734        assert_eq!(reader.read_bits(5).unwrap(), 0b11010); // remaining 5 bits
 735    }
 736
 737    #[test]
 738    fn bit_reader_cross_byte() {
 739        let data = [0xFF, 0x00];
 740        let mut reader = BitReader::new(&data);
 741        assert_eq!(reader.read_bits(4).unwrap(), 0xF);
 742        assert_eq!(reader.read_bits(8).unwrap(), 0x0F); // 4 bits from first byte + 4 from second
 743    }
 744
 745    #[test]
 746    fn bit_reader_eof() {
 747        let data = [0x00];
 748        let mut reader = BitReader::new(&data);
 749        reader.read_bits(8).unwrap();
 750        assert!(matches!(
 751            reader.read_bits(1),
 752            Err(DeflateError::UnexpectedEof)
 753        ));
 754    }
 755
 756    #[test]
 757    fn bit_reader_align_and_read_bytes() {
 758        let data = [0xFF, 0xAB, 0xCD];
 759        let mut reader = BitReader::new(&data);
 760        reader.read_bits(3).unwrap(); // read some bits
 761        reader.align_to_byte();
 762        let mut buf = [0u8; 2];
 763        reader.read_bytes(&mut buf).unwrap();
 764        assert_eq!(buf, [0xAB, 0xCD]);
 765    }
 766
 767    // -- reverse_bits tests --
 768
 769    #[test]
 770    fn reverse_bits_basic() {
 771        assert_eq!(reverse_bits(0b110, 3), 0b011);
 772        assert_eq!(reverse_bits(0b1010, 4), 0b0101);
 773        assert_eq!(reverse_bits(0b1, 1), 0b1);
 774        assert_eq!(reverse_bits(0b0, 1), 0b0);
 775        assert_eq!(reverse_bits(0b11001, 5), 0b10011);
 776    }
 777
 778    // -- Huffman table tests --
 779
 780    #[test]
 781    fn huffman_fixed_literal_table() {
 782        // Verify fixed literal table can be constructed
 783        let lengths = fixed_literal_lengths();
 784        let table = HuffmanTable::from_code_lengths(&lengths, 9).unwrap();
 785        assert!(!table.entries.is_empty());
 786    }
 787
 788    #[test]
 789    fn huffman_fixed_distance_table() {
 790        let lengths = fixed_distance_lengths();
 791        let table = HuffmanTable::from_code_lengths(&lengths, 5).unwrap();
 792        assert!(!table.entries.is_empty());
 793    }
 794
 795    #[test]
 796    fn huffman_simple_decode() {
 797        // Build a simple tree: A=0, B=10, C=11
 798        let lengths = [1u8, 2, 2];
 799        let table = HuffmanTable::from_code_lengths(&lengths, 2).unwrap();
 800
 801        // A = code 0 (1 bit), B = code 10 (2 bits), C = code 11 (2 bits)
 802        // LSB first: A=0, B=01, C=11
 803        // Encode: A, B, C, A = 0 | 01 | 11 | 0 = 0b0_11_01_0 in LSB order
 804        // In bytes: 0b01110100 (pad with zeros on MSB side if needed)
 805        // Wait, let me recalculate for LSB-first bit reader:
 806        // A: bit 0 → 0
 807        // B: bits 1-2 → 01 (LSB first, so code 10 reversed = 01)
 808        // C: bits 3-4 → 11 (code 11 reversed = 11)
 809        // A: bit 5 → 0
 810        // Byte: bits 76543210 = 00_0_11_01_0 = 0b00011010 = 0x1A
 811        let data = [0x1A];
 812        let mut reader = BitReader::new(&data);
 813
 814        assert_eq!(table.decode(&mut reader).unwrap(), 0); // A
 815        assert_eq!(table.decode(&mut reader).unwrap(), 1); // B
 816        assert_eq!(table.decode(&mut reader).unwrap(), 2); // C
 817        assert_eq!(table.decode(&mut reader).unwrap(), 0); // A
 818    }
 819
 820    // -- Non-compressed block (BTYPE=00) tests --
 821
 822    #[test]
 823    fn inflate_uncompressed_block() {
 824        // BFINAL=1, BTYPE=00 (non-compressed)
 825        // Bits: 1 (final) 00 (type) → 0b001 = least significant 3 bits
 826        // Then aligned: LEN=5, NLEN=~5=0xFFFA
 827        let data_to_store = b"Hello";
 828        let mut input = Vec::new();
 829        // First byte: BFINAL=1, BTYPE=00 → bits 1,0,0 → byte 0x01
 830        input.push(0x01);
 831        // LEN = 5, little-endian
 832        input.push(5);
 833        input.push(0);
 834        // NLEN = !5 = 0xFFFA, little-endian
 835        input.push(0xFA);
 836        input.push(0xFF);
 837        // Data
 838        input.extend_from_slice(data_to_store);
 839
 840        let result = inflate(&input).unwrap();
 841        assert_eq!(result, b"Hello");
 842    }
 843
 844    #[test]
 845    fn inflate_uncompressed_empty() {
 846        // Non-compressed block with length 0
 847        let mut input = Vec::new();
 848        input.push(0x01); // BFINAL=1, BTYPE=00
 849        input.push(0); // LEN=0
 850        input.push(0);
 851        input.push(0xFF); // NLEN=0xFFFF = !0
 852        input.push(0xFF);
 853
 854        let result = inflate(&input).unwrap();
 855        assert!(result.is_empty());
 856    }
 857
 858    #[test]
 859    fn inflate_uncompressed_len_mismatch() {
 860        let mut input = Vec::new();
 861        input.push(0x01);
 862        input.push(5);
 863        input.push(0);
 864        input.push(0x00); // Wrong NLEN
 865        input.push(0x00);
 866
 867        assert!(matches!(
 868            inflate(&input),
 869            Err(DeflateError::LenMismatch { .. })
 870        ));
 871    }
 872
 873    // -- Multiple non-compressed blocks --
 874
 875    #[test]
 876    fn inflate_two_uncompressed_blocks() {
 877        let mut input = Vec::new();
 878
 879        // Block 1: BFINAL=0, BTYPE=00
 880        input.push(0x00);
 881        input.push(3);
 882        input.push(0);
 883        input.push(0xFC);
 884        input.push(0xFF);
 885        input.extend_from_slice(b"Hel");
 886
 887        // Block 2: BFINAL=1, BTYPE=00
 888        input.push(0x01);
 889        input.push(2);
 890        input.push(0);
 891        input.push(0xFD);
 892        input.push(0xFF);
 893        input.extend_from_slice(b"lo");
 894
 895        let result = inflate(&input).unwrap();
 896        assert_eq!(result, b"Hello");
 897    }
 898
 899    // -- Fixed Huffman block tests --
 900
 901    #[test]
 902    fn inflate_fixed_huffman_literals_only() {
 903        // Use Rust's flate2-equivalent: manually construct a fixed Huffman block
 904        // that encodes just literal bytes and end-of-block.
 905        //
 906        // For simplicity, we test against known compressed output.
 907        // The easiest way is to compress with the non-compressed format
 908        // and verify, then test with known DEFLATE streams.
 909
 910        // Use a known compressed representation of empty data:
 911        // Fixed block with just end-of-block (symbol 256)
 912        // Symbol 256 has code length 7, code = 0000000 in fixed Huffman
 913        // Actually, per fixed table: 256-279 have 7-bit codes starting at 0b0000000
 914        // Code for 256: 7 bits, value 0b0000000
 915        // Reversed for LSB-first: 0b0000000
 916        // BFINAL=1, BTYPE=01: bits 1, 0, 1 → 0b101 = 5
 917        // Then 7 bits of 0 for symbol 256
 918        // Total: 3 + 7 = 10 bits → 2 bytes
 919        // Byte 0: bits 0-7: 1,0,1, 0,0,0,0,0 = 0b00000_101 = 0x05
 920        // Byte 1: bits 8-9: 0,0 (remaining bits of the code) + padding = 0x00
 921        let input = [0x03, 0x00]; // Known empty fixed block
 922        let result = inflate(&input).unwrap();
 923        assert!(result.is_empty());
 924    }
 925
 926    #[test]
 927    fn inflate_fixed_huffman_known_data() {
 928        // This is the DEFLATE compressed form of "Hello" using fixed Huffman codes.
 929        // Generated by standard deflate: raw deflate of "Hello"
 930        // We can verify this by using known test vectors.
 931        //
 932        // Actually, let's build it manually:
 933        // BFINAL=1, BTYPE=01 (fixed)
 934        // Then encode 'H'(72), 'e'(101), 'l'(108), 'l'(108), 'o'(111), EOB(256)
 935        //
 936        // Fixed Huffman codes for literals 0-143: 8-bit codes 00110000..10111111
 937        // Code for literal N (0-143): reversed(N + 0x30) in 8 bits
 938        //
 939        // Let's use a known deflate stream instead. The bytes below are the
 940        // raw DEFLATE encoding of "Hello" (verified against RFC 1951).
 941        let input = [0xf3, 0x48, 0xcd, 0xc9, 0xc9, 0x07, 0x00];
 942        let result = inflate(&input).unwrap();
 943        assert_eq!(result, b"Hello");
 944    }
 945
 946    // -- Invalid block type --
 947
 948    #[test]
 949    fn inflate_invalid_block_type() {
 950        // BFINAL=1, BTYPE=11 (reserved)
 951        // Bits: 1, 1, 1 → byte 0x07
 952        let input = [0x07];
 953        assert!(matches!(
 954            inflate(&input),
 955            Err(DeflateError::InvalidBlockType(3))
 956        ));
 957    }
 958
 959    // -- Empty input --
 960
 961    #[test]
 962    fn inflate_empty_input() {
 963        assert!(matches!(inflate(&[]), Err(DeflateError::UnexpectedEof)));
 964    }
 965
 966    // -- Distance validation --
 967
 968    #[test]
 969    fn inflate_distance_table_coverage() {
 970        // Verify all distance table entries are valid
 971        for (i, &(base, extra)) in DISTANCE_TABLE.iter().enumerate() {
 972            assert!(base >= 1, "distance code {i} has base {base} < 1");
 973            let max_dist = base as u32 + ((1u32 << extra) - 1);
 974            assert!(
 975                max_dist <= 32768,
 976                "distance code {i} max {max_dist} exceeds 32KB window"
 977            );
 978        }
 979    }
 980
 981    // -- Length table coverage --
 982
 983    #[test]
 984    fn inflate_length_table_coverage() {
 985        // Verify length table entries
 986        for (i, &(base, extra)) in LENGTH_TABLE.iter().enumerate() {
 987            assert!(base >= 3, "length code {i} has base {base} < 3");
 988            if i < 28 {
 989                // Codes 257-284 have ranges
 990                let max_len = base as u32 + ((1u32 << extra) - 1);
 991                assert!(max_len <= 258, "length code {i} max {max_len} exceeds 258");
 992            }
 993        }
 994    }
 995
 996    // -- Back-reference (LZ77) tests via fixed Huffman --
 997
 998    #[test]
 999    fn inflate_with_back_reference() {
1000        // DEFLATE compressed "abcabcabc" — uses a back-reference
1001        // Raw DEFLATE for "abcabcabc" with fixed Huffman:
1002        let input = [0x4b, 0x4c, 0x4a, 0x4e, 0x04, 0x23, 0x00];
1003        let result = inflate(&input).unwrap();
1004        assert_eq!(result, b"abcabcabc");
1005    }
1006
1007    // -- Dynamic Huffman block tests --
1008
1009    #[test]
1010    fn inflate_dynamic_huffman() {
1011        // A known DEFLATE stream with dynamic Huffman codes.
1012        // This is the raw DEFLATE encoding of "AAAAAAAAAA" (10 A's)
1013        // which benefits from dynamic codes + back-references.
1014        let input = [0x73, 0x74, 0x84, 0x01, 0x00];
1015        let result = inflate(&input).unwrap();
1016        assert_eq!(result, b"AAAAAAAAAA");
1017    }
1018
1019    // -- Fixed Huffman table properties --
1020
1021    #[test]
1022    fn fixed_literal_lengths_correct() {
1023        let lengths = fixed_literal_lengths();
1024        // 0-143: length 8
1025        for i in 0..=143 {
1026            assert_eq!(lengths[i], 8, "literal {i} should have length 8");
1027        }
1028        // 144-255: length 9
1029        for i in 144..=255 {
1030            assert_eq!(lengths[i], 9, "literal {i} should have length 9");
1031        }
1032        // 256-279: length 7
1033        for i in 256..=279 {
1034            assert_eq!(lengths[i], 7, "literal {i} should have length 7");
1035        }
1036        // 280-287: length 8
1037        for i in 280..=287 {
1038            assert_eq!(lengths[i], 8, "literal {i} should have length 8");
1039        }
1040    }
1041
1042    #[test]
1043    fn fixed_distance_lengths_correct() {
1044        let lengths = fixed_distance_lengths();
1045        for (i, &len) in lengths.iter().enumerate() {
1046            assert_eq!(len, 5, "distance {i} should have length 5");
1047        }
1048    }
1049
1050    // -- Round-trip test using non-compressed then verifying inflate --
1051
1052    #[test]
1053    fn inflate_uncompressed_large_block() {
1054        // Test with a larger block (256 bytes)
1055        let payload: Vec<u8> = (0..=255).collect();
1056        let mut input = Vec::new();
1057        input.push(0x01); // BFINAL=1, BTYPE=00
1058        let len = payload.len() as u16;
1059        input.push(len as u8);
1060        input.push((len >> 8) as u8);
1061        let nlen = !len;
1062        input.push(nlen as u8);
1063        input.push((nlen >> 8) as u8);
1064        input.extend_from_slice(&payload);
1065
1066        let result = inflate(&input).unwrap();
1067        assert_eq!(result, payload);
1068    }
1069
1070    // -- Code length order --
1071
1072    #[test]
1073    fn code_length_order_correct() {
1074        // Per RFC 1951 §3.2.7
1075        assert_eq!(
1076            CODE_LENGTH_ORDER,
1077            [16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15]
1078        );
1079    }
1080
1081    // -- Overlapping back-reference --
1082
1083    #[test]
1084    fn inflate_overlapping_backref() {
1085        // Test that overlapping back-references work correctly.
1086        // e.g., distance=1, length=5 starting from "A" should produce "AAAAA"
1087        // This is tested indirectly through the DEFLATE stream,
1088        // but let's verify the copy logic directly.
1089        let mut output = vec![b'A'];
1090        let distance = 1usize;
1091        let length = 5usize;
1092        let start = output.len() - distance;
1093        for i in 0..length {
1094            let b = output[start + (i % distance)];
1095            output.push(b);
1096        }
1097        assert_eq!(output, b"AAAAAA");
1098    }
1099
1100    // -- Comprehensive round-trip tests with zlib-generated vectors --
1101
1102    #[test]
1103    fn inflate_single_byte() {
1104        let compressed = [0x4b, 0x04, 0x00];
1105        let result = inflate(&compressed).unwrap();
1106        assert_eq!(result, b"a");
1107    }
1108
1109    #[test]
1110    fn inflate_repeated_char_1000() {
1111        // 1000 'a' characters — heavy LZ77 back-referencing
1112        let compressed = [
1113            0x4b, 0x4c, 0x1c, 0x05, 0xa3, 0x60, 0x14, 0x0c, 0x77, 0x00, 0x00,
1114        ];
1115        let result = inflate(&compressed).unwrap();
1116        assert_eq!(result.len(), 1000);
1117        assert!(result.iter().all(|&b| b == b'a'));
1118    }
1119
1120    #[test]
1121    fn inflate_pangram() {
1122        let compressed = [
1123            0x0b, 0xc9, 0x48, 0x55, 0x28, 0x2c, 0xcd, 0x4c, 0xce, 0x56, 0x48, 0x2a, 0xca, 0x2f,
1124            0xcf, 0x53, 0x48, 0xcb, 0xaf, 0x50, 0xc8, 0x2a, 0xcd, 0x2d, 0x28, 0x56, 0xc8, 0x2f,
1125            0x4b, 0x2d, 0x52, 0x28, 0x01, 0x4a, 0xe7, 0x24, 0x56, 0x55, 0x2a, 0xa4, 0xe4, 0xa7,
1126            0x03, 0x00,
1127        ];
1128        let result = inflate(&compressed).unwrap();
1129        assert_eq!(result, b"The quick brown fox jumps over the lazy dog");
1130    }
1131
1132    #[test]
1133    fn inflate_alphabet_repeated() {
1134        // "abcdefghijklmnopqrstuvwxyz" * 10 = 260 bytes
1135        let compressed = [
1136            0x4b, 0x4c, 0x4a, 0x4e, 0x49, 0x4d, 0x4b, 0xcf, 0xc8, 0xcc, 0xca, 0xce, 0xc9, 0xcd,
1137            0xcb, 0x2f, 0x28, 0x2c, 0x2a, 0x2e, 0x29, 0x2d, 0x2b, 0xaf, 0xa8, 0xac, 0x4a, 0x1c,
1138            0x31, 0x32, 0x00,
1139        ];
1140        let result = inflate(&compressed).unwrap();
1141        let expected: Vec<u8> = b"abcdefghijklmnopqrstuvwxyz".repeat(10);
1142        assert_eq!(result, expected);
1143    }
1144
1145    // -- All byte values via non-compressed block --
1146
1147    #[test]
1148    fn inflate_all_byte_values() {
1149        // Non-compressed block containing all 256 byte values
1150        let payload: Vec<u8> = (0..=255).collect();
1151        // zlib at level 6 uses non-compressed for this data
1152        let compressed = [
1153            0x01, 0x00, 0x01, 0xff, 0xfe, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
1154            0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
1155            0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 0x21, 0x22, 0x23, 0x24,
1156            0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32,
1157            0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40,
1158            0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e,
1159            0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c,
1160            0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a,
1161            0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
1162            0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86,
1163            0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94,
1164            0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2,
1165            0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 0xb0,
1166            0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe,
1167            0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc,
1168            0xcd, 0xce, 0xcf, 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda,
1169            0xdb, 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8,
1170            0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6,
1171            0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff,
1172        ];
1173        let result = inflate(&compressed).unwrap();
1174        assert_eq!(result, payload);
1175    }
1176
1177    // -- Maximum back-reference distance (32KB window) --
1178
1179    #[test]
1180    fn inflate_max_window_uncompressed() {
1181        // Test near the 32KB window limit with non-compressed blocks
1182        let payload = vec![0xAB; 32768];
1183        // Build as a series of non-compressed blocks (max 65535 bytes each)
1184        let mut input = Vec::new();
1185        input.push(0x01); // BFINAL=1, BTYPE=00
1186        let len = 32768u16;
1187        // 32768 = 0x8000 which fits in u16
1188        input.push(len as u8);
1189        input.push((len >> 8) as u8);
1190        let nlen = !len;
1191        input.push(nlen as u8);
1192        input.push((nlen >> 8) as u8);
1193        input.extend_from_slice(&payload);
1194
1195        let result = inflate(&input).unwrap();
1196        assert_eq!(result.len(), 32768);
1197        assert!(result.iter().all(|&b| b == 0xAB));
1198    }
1199}