an efficient binary archive format
at main 429 lines 14 kB view raw
1//! Bindle is a binary archive format for collecting files. 2//! 3//! The format uses memory-mapped I/O for fast reads, optional zstd compression, 4//! and supports append-only writes with shadowing for updates. 5//! 6//! # Example 7//! 8//! ```no_run 9//! use bindle_file::{Bindle, Compress}; 10//! 11//! let mut archive = Bindle::open("data.bndl")?; 12//! archive.add("file.txt", b"data", Compress::None)?; 13//! archive.save()?; 14//! 15//! let data = archive.read("file.txt").unwrap(); 16//! # Ok::<(), std::io::Error>(()) 17//! ``` 18 19use std::io::{self, Write}; 20 21// Module declarations 22mod bindle; 23mod compress; 24mod entry; 25mod reader; 26mod writer; 27 28pub(crate) mod ffi; 29 30// Public re-exports 31pub use bindle::Bindle; 32pub use compress::Compress; 33pub use entry::Entry; 34pub use reader::Reader; 35pub use writer::Writer; 36 37// Constants 38pub(crate) const BNDL_MAGIC: &[u8; 8] = b"BINDL001"; 39pub(crate) const BNDL_ALIGN: usize = 8; 40pub(crate) const ENTRY_SIZE: usize = std::mem::size_of::<Entry>(); 41pub(crate) const FOOTER_SIZE: usize = std::mem::size_of::<entry::Footer>(); 42pub(crate) const HEADER_SIZE: usize = 8; 43pub(crate) const AUTO_COMPRESS_THRESHOLD: usize = 2048; 44pub(crate) const FOOTER_MAGIC: u32 = 0x62626262; 45const ZEROS: &[u8; 64] = &[0u8; 64]; // Reusable zero buffer for padding 46 47// Helper functions 48pub(crate) fn pad< 49 const SIZE: usize, 50 T: Copy + TryFrom<usize> + std::ops::Sub<T, Output = T> + std::ops::Rem<T, Output = T>, 51>( 52 n: T, 53) -> T 54where 55 <T as std::ops::Sub>::Output: std::ops::Rem<T>, 56{ 57 if let Ok(size) = T::try_from(SIZE) { 58 return (size - (n % size)) % size; 59 } 60 61 unreachable!() 62} 63 64// Helper to write padding zeros without allocating 65pub(crate) fn write_padding<W: Write>(writer: &mut W, len: usize) -> io::Result<()> { 66 let mut remaining = len; 67 while remaining > 0 { 68 let chunk = remaining.min(ZEROS.len()); 69 writer.write_all(&ZEROS[..chunk])?; 70 remaining -= chunk; 71 } 72 Ok(()) 73} 74 75#[cfg(test)] 76mod tests { 77 use super::*; 78 use std::fs; 79 use std::fs::OpenOptions; 80 use std::io::{Seek, SeekFrom}; 81 82 #[test] 83 fn test_create_and_read() { 84 let path = "test_basic.bindl"; 85 let data = b"Hello, Bindle World!"; 86 87 // 1. Create and Write 88 { 89 let mut fp = Bindle::open(path).expect("Failed to open"); 90 fp.add("hello.txt", data, Compress::None) 91 .expect("Failed to add"); 92 fp.save().expect("Failed to commit"); 93 } 94 95 // 2. Open and Read 96 { 97 let fp = Bindle::open(path).expect("Failed to re-open"); 98 let result = fp.read("hello.txt").expect("File not found"); 99 assert_eq!(result.as_ref(), data); 100 } 101 102 fs::remove_file(path).ok(); 103 } 104 105 #[test] 106 fn test_zstd_compression() { 107 let path = "test_zstd.bindl"; 108 // Highly compressible data 109 let data = vec![b'A'; 1000]; 110 111 { 112 let mut fp = Bindle::open(path).expect("Failed to open"); 113 fp.add("large.bin", &data, Compress::Zstd) 114 .expect("Failed to add"); 115 fp.save().expect("Failed to commit"); 116 } 117 118 let fp = Bindle::open(path).expect("Failed to re-open"); 119 120 // Ensure data is correct 121 let result = fp.read("large.bin").expect("File not found"); 122 assert_eq!(result, data); 123 124 // Ensure the file on disk is actually smaller than the raw data (including headers) 125 let meta = fs::metadata(path).unwrap(); 126 assert!(meta.len() < 1000); 127 128 fs::remove_file(path).ok(); 129 } 130 131 #[test] 132 fn test_append_functionality() { 133 let path = "test_append.bindl"; 134 let _ = std::fs::remove_file(path); 135 136 // 1. Initial creation 137 { 138 let mut fp = Bindle::open(path).expect("Fail open 1"); 139 fp.add("1.txt", b"First", Compress::Zstd).unwrap(); 140 fp.save().expect("Fail commit 1"); 141 } // File handle closed here 142 143 // 2. Append session 144 { 145 let mut fp = Bindle::open(path).expect("Fail open 2"); 146 // At this point, entries contains "1.txt" 147 148 fp.add("2.txt", b"Second", Compress::None).unwrap(); 149 fp.save().expect("Fail commit 2"); 150 151 // Now test the read 152 let first = fp.read("1.txt").expect("Could not find 1.txt"); 153 let second = fp.read("2.txt").expect("Could not find 2.txt"); 154 155 assert_eq!(first.as_ref(), b"First"); 156 assert_eq!(second.as_ref(), b"Second"); 157 } 158 let _ = std::fs::remove_file(path); 159 } 160 161 #[test] 162 fn test_invalid_magic() { 163 let path = "invalid.bindl"; 164 fs::write(path, b"NOT_A_PACK_FILE_AT_ALL").unwrap(); 165 166 let res = Bindle::open(path); 167 assert!(res.is_err()); 168 169 fs::remove_file(path).ok(); 170 } 171 172 #[test] 173 fn test_key_shadowing() { 174 let path = "test_shadow.bindl"; 175 let _ = fs::remove_file(path); 176 177 let mut b = Bindle::open(path).expect("Failed to open"); 178 179 // 1. Add initial version 180 b.add("config.txt", b"v1", Compress::None).unwrap(); 181 b.save().unwrap(); 182 183 // 2. Overwrite with v2 (shadowing) 184 b.add("config.txt", b"version_2_is_longer", Compress::None) 185 .unwrap(); 186 b.save().unwrap(); 187 188 // 3. Verify latest version is retrieved 189 let b2 = Bindle::open(path).expect("Failed to reopen"); 190 let result = b2.read("config.txt").unwrap(); 191 assert_eq!(result.as_ref(), b"version_2_is_longer"); 192 193 // 4. Verify index count hasn't grown (still 1 entry) 194 assert_eq!(b2.len(), 1); 195 196 fs::remove_file(path).ok(); 197 } 198 199 #[test] 200 fn test_vacuum_reclaims_space() { 201 let path = "test_vacuum.bindl"; 202 let _ = fs::remove_file(path); 203 204 let mut b = Bindle::open(path).expect("Failed to open"); 205 206 // 1. Add a large file 207 let large_data = vec![0u8; 1024]; 208 b.add("large.bin", &large_data, Compress::None).unwrap(); 209 b.save().unwrap(); 210 let size_v1 = fs::metadata(path).unwrap().len(); 211 212 // 2. Shadow it with a tiny file 213 b.add("large.bin", b"tiny", Compress::None).unwrap(); 214 b.save().unwrap(); 215 let size_v2 = fs::metadata(path).unwrap().len(); 216 217 // Size should have increased because we appended 'tiny' 218 assert!(size_v2 > size_v1); 219 220 // 3. Run Vacuum 221 b.vacuum().expect("Vacuum failed"); 222 let size_v3 = fs::metadata(path).unwrap().len(); 223 224 // 4. Verify size is now significantly smaller (reclaimed 1024 bytes) 225 assert!(size_v3 < size_v2); 226 227 // 5. Verify data integrity after vacuum 228 let b2 = Bindle::open(path).unwrap(); 229 assert_eq!(b2.read("large.bin").unwrap().as_ref(), b"tiny"); 230 231 fs::remove_file(path).ok(); 232 } 233 234 #[test] 235 fn test_directory_pack_unpack_roundtrip() { 236 let bindle_path = "roundtrip.bindl"; 237 let src_dir = "test_src"; 238 let out_dir = "test_out"; 239 240 // Clean up previous runs 241 let _ = fs::remove_dir_all(src_dir); 242 let _ = fs::remove_dir_all(out_dir); 243 let _ = fs::remove_file(bindle_path); 244 245 // 1. Create a dummy directory structure 246 fs::create_dir_all(format!("{}/subdir", src_dir)).unwrap(); 247 fs::write(format!("{}/file1.txt", src_dir), b"Hello World").unwrap(); 248 fs::write( 249 format!("{}/subdir/file2.txt", src_dir), 250 b"Compressed Data Content", 251 ) 252 .unwrap(); 253 254 // 2. Pack the directory using Rust 255 { 256 let mut b = Bindle::open(bindle_path).unwrap(); 257 b.pack(src_dir, Compress::Zstd).expect("Pack failed"); 258 b.save().expect("Save failed"); 259 } 260 261 // 3. Unpack the directory using Rust 262 { 263 let b = Bindle::open(bindle_path).unwrap(); 264 b.unpack(out_dir).expect("Unpack failed"); 265 } 266 267 // 4. Verify the contents match exactly 268 let content1 = fs::read_to_string(format!("{}/file1.txt", out_dir)).unwrap(); 269 let content2 = fs::read_to_string(format!("{}/subdir/file2.txt", out_dir)).unwrap(); 270 271 assert_eq!(content1, "Hello World"); 272 assert_eq!(content2, "Compressed Data Content"); 273 274 // Cleanup 275 fs::remove_dir_all(src_dir).ok(); 276 fs::remove_dir_all(out_dir).ok(); 277 fs::remove_file(bindle_path).ok(); 278 } 279 280 #[test] 281 fn test_streaming_manual_chunks() { 282 let path = "test_stream.bindl"; 283 let _ = std::fs::remove_file(path); 284 let chunk1 = b"Hello "; 285 let chunk2 = b"Streaming "; 286 let chunk3 = b"World!"; 287 let expected = b"Hello Streaming World!"; 288 289 { 290 let mut b = Bindle::open(path).expect("Failed to open"); 291 // Start a stream without compression 292 let mut s = b 293 .writer("streamed_file.txt", Compress::None) 294 .expect("Failed to start stream"); 295 296 // Write chunks manually 297 s.write_chunk(chunk1).unwrap(); 298 s.write_chunk(chunk2).unwrap(); 299 s.write_chunk(chunk3).unwrap(); 300 301 s.close().expect("Failed to finish stream"); 302 b.save().expect("Failed to save"); 303 } 304 305 // Verification 306 let b = Bindle::open(path).expect("Failed to reopen"); 307 let result = b.read("streamed_file.txt").expect("Entry not found"); 308 assert_eq!(result.as_ref(), expected); 309 assert_eq!(result.len(), expected.len()); 310 311 let _ = std::fs::remove_file(path); 312 } 313 314 #[test] 315 fn test_crc32_corruption_detection() { 316 let path = "test_crc32.bindl"; 317 let _ = std::fs::remove_file(path); 318 let data = b"Test data for CRC32 verification"; 319 320 // 1. Create a file with valid data 321 { 322 let mut b = Bindle::open(path).expect("Failed to open"); 323 b.add("test.txt", data, Compress::None).unwrap(); 324 b.save().unwrap(); 325 } 326 327 // 2. Verify that reading with correct data works 328 { 329 let b = Bindle::open(path).expect("Failed to reopen"); 330 let result = b.read("test.txt").expect("Should read successfully"); 331 assert_eq!(result.as_ref(), data); 332 } 333 334 // 3. Corrupt the data by modifying a byte directly in the file 335 { 336 let mut file = OpenOptions::new() 337 .write(true) 338 .read(true) 339 .open(path) 340 .unwrap(); 341 342 // Skip the header and modify the first byte of data 343 file.seek(SeekFrom::Start(HEADER_SIZE as u64)).unwrap(); 344 file.write(&[b'X']).unwrap(); // Corrupt first byte 345 file.flush().unwrap(); 346 } 347 348 // 4. Verify that reading corrupted data fails CRC32 check 349 { 350 let b = Bindle::open(path).expect("Failed to reopen after corruption"); 351 let result = b.read("test.txt"); 352 assert!(result.is_none(), "Read should fail due to CRC32 mismatch"); 353 } 354 355 let _ = std::fs::remove_file(path); 356 } 357 358 #[test] 359 fn test_crc32_with_compression() { 360 let path = "test_crc32_compressed.bindl"; 361 let _ = std::fs::remove_file(path); 362 let data = vec![b'A'; 2000]; // Large enough to trigger compression 363 364 // 1. Create a file with compressed data 365 { 366 let mut b = Bindle::open(path).expect("Failed to open"); 367 b.add("compressed.bin", &data, Compress::Zstd).unwrap(); 368 b.save().unwrap(); 369 } 370 371 // 2. Verify that reading compressed data works and CRC32 is verified 372 { 373 let b = Bindle::open(path).expect("Failed to reopen"); 374 let result = b.read("compressed.bin").expect("Should read successfully"); 375 assert_eq!(result.as_ref(), data.as_slice()); 376 } 377 378 // 3. Also test with the streaming reader 379 { 380 let b = Bindle::open(path).expect("Failed to reopen"); 381 let mut reader = b.reader("compressed.bin").unwrap(); 382 let mut output = Vec::new(); 383 std::io::copy(&mut reader, &mut output).unwrap(); 384 reader.verify_crc32().expect("CRC32 should match"); 385 assert_eq!(output, data); 386 } 387 388 let _ = std::fs::remove_file(path); 389 } 390 391 #[test] 392 fn test_remove_entry() { 393 let path = "test_remove.bindl"; 394 let _ = fs::remove_file(path); 395 396 let mut b = Bindle::open(path).expect("Failed to open"); 397 398 // Add some entries 399 b.add("file1.txt", b"Content 1", Compress::None).unwrap(); 400 b.add("file2.txt", b"Content 2", Compress::None).unwrap(); 401 b.add("file3.txt", b"Content 3", Compress::None).unwrap(); 402 b.save().unwrap(); 403 404 assert_eq!(b.len(), 3); 405 assert!(b.exists("file2.txt")); 406 407 // Remove an entry 408 assert!(b.remove("file2.txt")); 409 assert_eq!(b.len(), 2); 410 assert!(!b.exists("file2.txt")); 411 412 // Try to remove non-existent entry 413 assert!(!b.remove("nonexistent.txt")); 414 415 // Save and reload to verify persistence 416 b.save().unwrap(); 417 let b2 = Bindle::open(path).unwrap(); 418 assert_eq!(b2.len(), 2); 419 assert!(b2.exists("file1.txt")); 420 assert!(!b2.exists("file2.txt")); 421 assert!(b2.exists("file3.txt")); 422 423 // Verify data still readable for remaining entries 424 assert_eq!(b2.read("file1.txt").unwrap().as_ref(), b"Content 1"); 425 assert_eq!(b2.read("file3.txt").unwrap().as_ref(), b"Content 3"); 426 427 fs::remove_file(path).ok(); 428 } 429}