an efficient binary archive format
1//! Bindle is a binary archive format for collecting files.
2//!
3//! The format uses memory-mapped I/O for fast reads, optional zstd compression,
4//! and supports append-only writes with shadowing for updates.
5//!
6//! # Example
7//!
8//! ```no_run
9//! use bindle_file::{Bindle, Compress};
10//!
11//! let mut archive = Bindle::open("data.bndl")?;
12//! archive.add("file.txt", b"data", Compress::None)?;
13//! archive.save()?;
14//!
15//! let data = archive.read("file.txt").unwrap();
16//! # Ok::<(), std::io::Error>(())
17//! ```
18
19use std::io::{self, Write};
20
21// Module declarations
22mod bindle;
23mod compress;
24mod entry;
25mod reader;
26mod writer;
27
28pub(crate) mod ffi;
29
30// Public re-exports
31pub use bindle::Bindle;
32pub use compress::Compress;
33pub use entry::Entry;
34pub use reader::Reader;
35pub use writer::Writer;
36
37// Constants
38pub(crate) const BNDL_MAGIC: &[u8; 8] = b"BINDL001";
39pub(crate) const BNDL_ALIGN: usize = 8;
40pub(crate) const ENTRY_SIZE: usize = std::mem::size_of::<Entry>();
41pub(crate) const FOOTER_SIZE: usize = std::mem::size_of::<entry::Footer>();
42pub(crate) const HEADER_SIZE: usize = 8;
43pub(crate) const AUTO_COMPRESS_THRESHOLD: usize = 2048;
44pub(crate) const FOOTER_MAGIC: u32 = 0x62626262;
45const ZEROS: &[u8; 64] = &[0u8; 64]; // Reusable zero buffer for padding
46
47// Helper functions
48pub(crate) fn pad<
49 const SIZE: usize,
50 T: Copy + TryFrom<usize> + std::ops::Sub<T, Output = T> + std::ops::Rem<T, Output = T>,
51>(
52 n: T,
53) -> T
54where
55 <T as std::ops::Sub>::Output: std::ops::Rem<T>,
56{
57 if let Ok(size) = T::try_from(SIZE) {
58 return (size - (n % size)) % size;
59 }
60
61 unreachable!()
62}
63
64// Helper to write padding zeros without allocating
65pub(crate) fn write_padding<W: Write>(writer: &mut W, len: usize) -> io::Result<()> {
66 let mut remaining = len;
67 while remaining > 0 {
68 let chunk = remaining.min(ZEROS.len());
69 writer.write_all(&ZEROS[..chunk])?;
70 remaining -= chunk;
71 }
72 Ok(())
73}
74
75#[cfg(test)]
76mod tests {
77 use super::*;
78 use std::fs;
79 use std::fs::OpenOptions;
80 use std::io::{Seek, SeekFrom};
81
82 #[test]
83 fn test_create_and_read() {
84 let path = "test_basic.bindl";
85 let data = b"Hello, Bindle World!";
86
87 // 1. Create and Write
88 {
89 let mut fp = Bindle::open(path).expect("Failed to open");
90 fp.add("hello.txt", data, Compress::None)
91 .expect("Failed to add");
92 fp.save().expect("Failed to commit");
93 }
94
95 // 2. Open and Read
96 {
97 let fp = Bindle::open(path).expect("Failed to re-open");
98 let result = fp.read("hello.txt").expect("File not found");
99 assert_eq!(result.as_ref(), data);
100 }
101
102 fs::remove_file(path).ok();
103 }
104
105 #[test]
106 fn test_zstd_compression() {
107 let path = "test_zstd.bindl";
108 // Highly compressible data
109 let data = vec![b'A'; 1000];
110
111 {
112 let mut fp = Bindle::open(path).expect("Failed to open");
113 fp.add("large.bin", &data, Compress::Zstd)
114 .expect("Failed to add");
115 fp.save().expect("Failed to commit");
116 }
117
118 let fp = Bindle::open(path).expect("Failed to re-open");
119
120 // Ensure data is correct
121 let result = fp.read("large.bin").expect("File not found");
122 assert_eq!(result, data);
123
124 // Ensure the file on disk is actually smaller than the raw data (including headers)
125 let meta = fs::metadata(path).unwrap();
126 assert!(meta.len() < 1000);
127
128 fs::remove_file(path).ok();
129 }
130
131 #[test]
132 fn test_append_functionality() {
133 let path = "test_append.bindl";
134 let _ = std::fs::remove_file(path);
135
136 // 1. Initial creation
137 {
138 let mut fp = Bindle::open(path).expect("Fail open 1");
139 fp.add("1.txt", b"First", Compress::Zstd).unwrap();
140 fp.save().expect("Fail commit 1");
141 } // File handle closed here
142
143 // 2. Append session
144 {
145 let mut fp = Bindle::open(path).expect("Fail open 2");
146 // At this point, entries contains "1.txt"
147
148 fp.add("2.txt", b"Second", Compress::None).unwrap();
149 fp.save().expect("Fail commit 2");
150
151 // Now test the read
152 let first = fp.read("1.txt").expect("Could not find 1.txt");
153 let second = fp.read("2.txt").expect("Could not find 2.txt");
154
155 assert_eq!(first.as_ref(), b"First");
156 assert_eq!(second.as_ref(), b"Second");
157 }
158 let _ = std::fs::remove_file(path);
159 }
160
161 #[test]
162 fn test_invalid_magic() {
163 let path = "invalid.bindl";
164 fs::write(path, b"NOT_A_PACK_FILE_AT_ALL").unwrap();
165
166 let res = Bindle::open(path);
167 assert!(res.is_err());
168
169 fs::remove_file(path).ok();
170 }
171
172 #[test]
173 fn test_key_shadowing() {
174 let path = "test_shadow.bindl";
175 let _ = fs::remove_file(path);
176
177 let mut b = Bindle::open(path).expect("Failed to open");
178
179 // 1. Add initial version
180 b.add("config.txt", b"v1", Compress::None).unwrap();
181 b.save().unwrap();
182
183 // 2. Overwrite with v2 (shadowing)
184 b.add("config.txt", b"version_2_is_longer", Compress::None)
185 .unwrap();
186 b.save().unwrap();
187
188 // 3. Verify latest version is retrieved
189 let b2 = Bindle::open(path).expect("Failed to reopen");
190 let result = b2.read("config.txt").unwrap();
191 assert_eq!(result.as_ref(), b"version_2_is_longer");
192
193 // 4. Verify index count hasn't grown (still 1 entry)
194 assert_eq!(b2.len(), 1);
195
196 fs::remove_file(path).ok();
197 }
198
199 #[test]
200 fn test_vacuum_reclaims_space() {
201 let path = "test_vacuum.bindl";
202 let _ = fs::remove_file(path);
203
204 let mut b = Bindle::open(path).expect("Failed to open");
205
206 // 1. Add a large file
207 let large_data = vec![0u8; 1024];
208 b.add("large.bin", &large_data, Compress::None).unwrap();
209 b.save().unwrap();
210 let size_v1 = fs::metadata(path).unwrap().len();
211
212 // 2. Shadow it with a tiny file
213 b.add("large.bin", b"tiny", Compress::None).unwrap();
214 b.save().unwrap();
215 let size_v2 = fs::metadata(path).unwrap().len();
216
217 // Size should have increased because we appended 'tiny'
218 assert!(size_v2 > size_v1);
219
220 // 3. Run Vacuum
221 b.vacuum().expect("Vacuum failed");
222 let size_v3 = fs::metadata(path).unwrap().len();
223
224 // 4. Verify size is now significantly smaller (reclaimed 1024 bytes)
225 assert!(size_v3 < size_v2);
226
227 // 5. Verify data integrity after vacuum
228 let b2 = Bindle::open(path).unwrap();
229 assert_eq!(b2.read("large.bin").unwrap().as_ref(), b"tiny");
230
231 fs::remove_file(path).ok();
232 }
233
234 #[test]
235 fn test_directory_pack_unpack_roundtrip() {
236 let bindle_path = "roundtrip.bindl";
237 let src_dir = "test_src";
238 let out_dir = "test_out";
239
240 // Clean up previous runs
241 let _ = fs::remove_dir_all(src_dir);
242 let _ = fs::remove_dir_all(out_dir);
243 let _ = fs::remove_file(bindle_path);
244
245 // 1. Create a dummy directory structure
246 fs::create_dir_all(format!("{}/subdir", src_dir)).unwrap();
247 fs::write(format!("{}/file1.txt", src_dir), b"Hello World").unwrap();
248 fs::write(
249 format!("{}/subdir/file2.txt", src_dir),
250 b"Compressed Data Content",
251 )
252 .unwrap();
253
254 // 2. Pack the directory using Rust
255 {
256 let mut b = Bindle::open(bindle_path).unwrap();
257 b.pack(src_dir, Compress::Zstd).expect("Pack failed");
258 b.save().expect("Save failed");
259 }
260
261 // 3. Unpack the directory using Rust
262 {
263 let b = Bindle::open(bindle_path).unwrap();
264 b.unpack(out_dir).expect("Unpack failed");
265 }
266
267 // 4. Verify the contents match exactly
268 let content1 = fs::read_to_string(format!("{}/file1.txt", out_dir)).unwrap();
269 let content2 = fs::read_to_string(format!("{}/subdir/file2.txt", out_dir)).unwrap();
270
271 assert_eq!(content1, "Hello World");
272 assert_eq!(content2, "Compressed Data Content");
273
274 // Cleanup
275 fs::remove_dir_all(src_dir).ok();
276 fs::remove_dir_all(out_dir).ok();
277 fs::remove_file(bindle_path).ok();
278 }
279
280 #[test]
281 fn test_streaming_manual_chunks() {
282 let path = "test_stream.bindl";
283 let _ = std::fs::remove_file(path);
284 let chunk1 = b"Hello ";
285 let chunk2 = b"Streaming ";
286 let chunk3 = b"World!";
287 let expected = b"Hello Streaming World!";
288
289 {
290 let mut b = Bindle::open(path).expect("Failed to open");
291 // Start a stream without compression
292 let mut s = b
293 .writer("streamed_file.txt", Compress::None)
294 .expect("Failed to start stream");
295
296 // Write chunks manually
297 s.write_chunk(chunk1).unwrap();
298 s.write_chunk(chunk2).unwrap();
299 s.write_chunk(chunk3).unwrap();
300
301 s.close().expect("Failed to finish stream");
302 b.save().expect("Failed to save");
303 }
304
305 // Verification
306 let b = Bindle::open(path).expect("Failed to reopen");
307 let result = b.read("streamed_file.txt").expect("Entry not found");
308 assert_eq!(result.as_ref(), expected);
309 assert_eq!(result.len(), expected.len());
310
311 let _ = std::fs::remove_file(path);
312 }
313
314 #[test]
315 fn test_crc32_corruption_detection() {
316 let path = "test_crc32.bindl";
317 let _ = std::fs::remove_file(path);
318 let data = b"Test data for CRC32 verification";
319
320 // 1. Create a file with valid data
321 {
322 let mut b = Bindle::open(path).expect("Failed to open");
323 b.add("test.txt", data, Compress::None).unwrap();
324 b.save().unwrap();
325 }
326
327 // 2. Verify that reading with correct data works
328 {
329 let b = Bindle::open(path).expect("Failed to reopen");
330 let result = b.read("test.txt").expect("Should read successfully");
331 assert_eq!(result.as_ref(), data);
332 }
333
334 // 3. Corrupt the data by modifying a byte directly in the file
335 {
336 let mut file = OpenOptions::new()
337 .write(true)
338 .read(true)
339 .open(path)
340 .unwrap();
341
342 // Skip the header and modify the first byte of data
343 file.seek(SeekFrom::Start(HEADER_SIZE as u64)).unwrap();
344 file.write(&[b'X']).unwrap(); // Corrupt first byte
345 file.flush().unwrap();
346 }
347
348 // 4. Verify that reading corrupted data fails CRC32 check
349 {
350 let b = Bindle::open(path).expect("Failed to reopen after corruption");
351 let result = b.read("test.txt");
352 assert!(result.is_none(), "Read should fail due to CRC32 mismatch");
353 }
354
355 let _ = std::fs::remove_file(path);
356 }
357
358 #[test]
359 fn test_crc32_with_compression() {
360 let path = "test_crc32_compressed.bindl";
361 let _ = std::fs::remove_file(path);
362 let data = vec![b'A'; 2000]; // Large enough to trigger compression
363
364 // 1. Create a file with compressed data
365 {
366 let mut b = Bindle::open(path).expect("Failed to open");
367 b.add("compressed.bin", &data, Compress::Zstd).unwrap();
368 b.save().unwrap();
369 }
370
371 // 2. Verify that reading compressed data works and CRC32 is verified
372 {
373 let b = Bindle::open(path).expect("Failed to reopen");
374 let result = b.read("compressed.bin").expect("Should read successfully");
375 assert_eq!(result.as_ref(), data.as_slice());
376 }
377
378 // 3. Also test with the streaming reader
379 {
380 let b = Bindle::open(path).expect("Failed to reopen");
381 let mut reader = b.reader("compressed.bin").unwrap();
382 let mut output = Vec::new();
383 std::io::copy(&mut reader, &mut output).unwrap();
384 reader.verify_crc32().expect("CRC32 should match");
385 assert_eq!(output, data);
386 }
387
388 let _ = std::fs::remove_file(path);
389 }
390
391 #[test]
392 fn test_remove_entry() {
393 let path = "test_remove.bindl";
394 let _ = fs::remove_file(path);
395
396 let mut b = Bindle::open(path).expect("Failed to open");
397
398 // Add some entries
399 b.add("file1.txt", b"Content 1", Compress::None).unwrap();
400 b.add("file2.txt", b"Content 2", Compress::None).unwrap();
401 b.add("file3.txt", b"Content 3", Compress::None).unwrap();
402 b.save().unwrap();
403
404 assert_eq!(b.len(), 3);
405 assert!(b.exists("file2.txt"));
406
407 // Remove an entry
408 assert!(b.remove("file2.txt"));
409 assert_eq!(b.len(), 2);
410 assert!(!b.exists("file2.txt"));
411
412 // Try to remove non-existent entry
413 assert!(!b.remove("nonexistent.txt"));
414
415 // Save and reload to verify persistence
416 b.save().unwrap();
417 let b2 = Bindle::open(path).unwrap();
418 assert_eq!(b2.len(), 2);
419 assert!(b2.exists("file1.txt"));
420 assert!(!b2.exists("file2.txt"));
421 assert!(b2.exists("file3.txt"));
422
423 // Verify data still readable for remaining entries
424 assert_eq!(b2.read("file1.txt").unwrap().as_ref(), b"Content 1");
425 assert_eq!(b2.read("file3.txt").unwrap().as_ref(), b"Content 3");
426
427 fs::remove_file(path).ok();
428 }
429}