an efficient binary archive format
1use crc32fast::Hasher;
2use memmap2::Mmap;
3use std::borrow::Cow;
4use std::collections::BTreeMap;
5use std::fs::{File, OpenOptions};
6use std::io::{self, BufWriter, Read, Seek, SeekFrom, Write};
7use std::path::{Path, PathBuf};
8use zerocopy::{FromBytes, IntoBytes};
9
10use crate::compress::Compress;
11use crate::entry::{Entry, Footer};
12use crate::reader::{Either, Reader};
13use crate::writer::Writer;
14use crate::{
15 AUTO_COMPRESS_THRESHOLD, BNDL_ALIGN, BNDL_MAGIC, ENTRY_SIZE, FOOTER_MAGIC, FOOTER_SIZE,
16 HEADER_SIZE, pad, write_padding,
17};
18
19/// A binary archive for collecting files.
20///
21/// Uses memory-mapped I/O for fast reads, supports optional zstd compression, and handles updates via shadowing.
22/// Files can be added incrementally without rewriting the entire archive.
23///
24/// # Example
25///
26/// ```no_run
27/// use bindle_file::{Bindle, Compress};
28///
29/// let mut archive = Bindle::open("data.bndl")?;
30/// archive.add("file.txt", b"data", Compress::None)?;
31/// archive.save()?;
32/// # Ok::<(), std::io::Error>(())
33/// ```
34pub struct Bindle {
35 pub(crate) path: PathBuf,
36 pub(crate) file: File,
37 pub(crate) mmap: Option<Mmap>,
38 pub(crate) index: BTreeMap<String, Entry>,
39 pub(crate) data_end: u64,
40}
41
42impl Bindle {
43 /// Creates a new archive, overwriting any existing file at the path.
44 pub fn create<P: AsRef<Path>>(path: P) -> io::Result<Self> {
45 let path_buf = path.as_ref().to_path_buf();
46 let opts = OpenOptions::new()
47 .truncate(true)
48 .read(true)
49 .write(true)
50 .create(true)
51 .to_owned();
52 Self::new(path_buf, opts)
53 }
54
55 /// Opens an existing archive or creates a new one if it doesn't exist.
56 pub fn open<P: AsRef<Path>>(path: P) -> io::Result<Self> {
57 let path_buf = path.as_ref().to_path_buf();
58 let opts = OpenOptions::new()
59 .read(true)
60 .write(true)
61 .create(true)
62 .to_owned();
63 Self::new(path_buf, opts)
64 }
65
66 /// Opens an existing archive. Returns an error if the file doesn't exist.
67 pub fn load<P: AsRef<Path>>(path: P) -> io::Result<Self> {
68 let path_buf = path.as_ref().to_path_buf();
69 let opts = OpenOptions::new().read(true).write(true).to_owned();
70 Self::new(path_buf, opts)
71 }
72
73 /// Create a new `Bindle` from a path and file, the path must match the file
74 pub fn new(path: PathBuf, opts: OpenOptions) -> io::Result<Self> {
75 let mut file = opts.open(&path)?;
76 file.lock_shared()?;
77 let len = file.metadata()?.len();
78
79 // Handle completely new/empty files
80 if len == 0 {
81 file.write_all(BNDL_MAGIC)?;
82 return Ok(Self {
83 path,
84 file,
85 mmap: None,
86 index: BTreeMap::new(),
87 data_end: HEADER_SIZE as u64,
88 });
89 }
90
91 // Safety check: File must be at least HEADER + FOOTER size (24 bytes)
92 // This prevents "attempt to subtract with overflow" when calculating footer_pos
93 if len < (HEADER_SIZE + FOOTER_SIZE) as u64 {
94 return Err(io::Error::new(
95 io::ErrorKind::InvalidData,
96 "File too small to be a valid bindle",
97 ));
98 }
99
100 let mut header = [0u8; 8];
101 file.read_exact(&mut header)?;
102 if &header != BNDL_MAGIC {
103 return Err(io::Error::new(io::ErrorKind::InvalidData, "Invalid header"));
104 }
105
106 let m = unsafe { Mmap::map(&file)? };
107
108 // Calculate footer position. Subtraction is now safe due to the check above.
109 let footer_pos = m.len() - FOOTER_SIZE;
110 let footer = Footer::read_from_bytes(&m[footer_pos..])
111 .map_err(|_| io::Error::new(io::ErrorKind::InvalidData, "Failed to read footer"))?;
112
113 if footer.magic() != FOOTER_MAGIC {
114 return Err(io::Error::new(
115 io::ErrorKind::InvalidData,
116 "Invalid footer, the file may be corrupt",
117 ));
118 }
119
120 let data_end = footer.index_offset();
121 let count = footer.entry_count();
122 let mut index = BTreeMap::new();
123
124 let mut cursor = data_end as usize;
125 for _ in 0..count {
126 // Ensure there is enough data left for an Entry header
127 if cursor + ENTRY_SIZE > footer_pos {
128 break;
129 }
130
131 let entry = match Entry::read_from_bytes(&m[cursor..cursor + ENTRY_SIZE]) {
132 Ok(e) => e,
133 Err(_) => break, // Corrupted entry, stop reading
134 };
135 let n_start = cursor + ENTRY_SIZE;
136
137 // Validate that the filename exists within the mapped bounds
138 if n_start + entry.name_len() > footer_pos {
139 break;
140 }
141
142 let name =
143 String::from_utf8_lossy(&m[n_start..n_start + entry.name_len()]).into_owned();
144 index.insert(name, entry);
145
146 let total = ENTRY_SIZE + entry.name_len();
147 cursor += (total + (BNDL_ALIGN - 1)) & !(BNDL_ALIGN - 1);
148 }
149
150 Ok(Self {
151 path,
152 file,
153 mmap: Some(m),
154 index,
155 data_end,
156 })
157 }
158
159 fn should_auto_compress(&self, compress: Compress, len: usize) -> bool {
160 compress == Compress::Zstd || (compress == Compress::Auto && len > AUTO_COMPRESS_THRESHOLD)
161 }
162
163 /// Adds data to the archive with the given name.
164 ///
165 /// If an entry with the same name exists, it will be shadowed. Call [`save()`](Bindle::save) to commit changes.
166 pub fn add(&mut self, name: &str, data: &[u8], compress: Compress) -> io::Result<()> {
167 let mut stream = self.writer(name, compress)?;
168 stream.write_all(data)?;
169 stream.close()?;
170 Ok(())
171 }
172
173 /// Adds a file from the filesystem to the archive.
174 ///
175 /// Reads the file at `path` and stores it with the given `name`. Call [`save()`](Bindle::save) to commit changes.
176 pub fn add_file(
177 &mut self,
178 name: &str,
179 path: impl AsRef<Path>,
180 compress: Compress,
181 ) -> io::Result<()> {
182 let mut stream = self.writer(name, compress)?;
183 let mut src = std::fs::File::open(path)?;
184 std::io::copy(&mut src, &mut stream)?;
185 Ok(())
186 }
187
188 /// Commits all pending changes by writing the index and footer to disk.
189 ///
190 /// Must be called after add/remove operations to make changes persistent.
191 pub fn save(&mut self) -> io::Result<()> {
192 self.file.lock()?;
193 self.file.seek(SeekFrom::Start(self.data_end))?;
194 let index_start = self.data_end;
195
196 // Use buffered writer to batch index writes
197 {
198 let mut writer = BufWriter::new(&mut self.file);
199 for (name, entry) in &self.index {
200 writer.write_all(entry.as_bytes())?;
201 writer.write_all(name.as_bytes())?;
202 let pad = pad::<BNDL_ALIGN, usize>(ENTRY_SIZE + name.len());
203 if pad > 0 {
204 write_padding(&mut writer, pad)?;
205 }
206 }
207
208 let footer = Footer::new(index_start, self.index.len() as u32, FOOTER_MAGIC);
209 writer.write_all(footer.as_bytes())?;
210 writer.flush()?;
211 } // Drop writer here to release borrow
212
213 // Truncate file to current position to remove any old data
214 let current_pos = self.file.stream_position()?;
215 self.file.set_len(current_pos)?;
216
217 let mmap = unsafe { Mmap::map(&self.file)? };
218 self.mmap = Some(mmap);
219 self.file.lock_shared()?;
220 Ok(())
221 }
222
223 /// Reclaims space by removing shadowed data.
224 ///
225 /// Rebuilds the archive with only live entries, removing old versions of updated files.
226 pub fn vacuum(&mut self) -> io::Result<()> {
227 let temp_path = self.path.with_extension("tmp");
228
229 // Create temp file and keep handle to reuse after rename
230 let mut temp_file = OpenOptions::new()
231 .write(true)
232 .read(true)
233 .create(true)
234 .truncate(true)
235 .open(&temp_path)?;
236
237 temp_file.lock()?;
238 temp_file.write_all(BNDL_MAGIC)?;
239 let mut current_offset = HEADER_SIZE as u64;
240
241 // Copy only live entries from original to temp
242 for entry in self.index.values_mut() {
243 self.file.seek(SeekFrom::Start(entry.offset()))?;
244 temp_file.seek(SeekFrom::Start(current_offset))?;
245
246 // Stream data without allocating full buffer
247 let mut limited = (&mut self.file).take(entry.compressed_size());
248 io::copy(&mut limited, &mut temp_file)?;
249
250 entry.set_offset(current_offset);
251 let pad = pad::<8, u64>(entry.compressed_size());
252 if pad > 0 {
253 write_padding(&mut temp_file, pad as usize)?;
254 }
255 current_offset += entry.compressed_size() + pad;
256 }
257
258 // Write the index and footer
259 let index_start = current_offset;
260 for (name, entry) in &self.index {
261 temp_file.write_all(entry.as_bytes())?;
262 temp_file.write_all(name.as_bytes())?;
263 let pad = pad::<BNDL_ALIGN, usize>(ENTRY_SIZE + name.len());
264 if pad > 0 {
265 write_padding(&mut temp_file, pad)?;
266 }
267 }
268
269 let footer = Footer::new(index_start, self.index.len() as u32, FOOTER_MAGIC);
270 temp_file.write_all(footer.as_bytes())?;
271 temp_file.sync_all()?;
272
273 // Acquire exclusive lock just before rename to prevent concurrent access
274 self.file.lock()?;
275
276 // Release locks and close current file
277 drop(self.mmap.take());
278 let _ = self.file.unlock();
279
280 // Atomically replace original with temp
281 std::fs::rename(&temp_path, &self.path)?;
282
283 // Reuse temp_file handle (still valid after rename)
284 temp_file.lock_shared()?;
285 let mmap = unsafe { Mmap::map(&temp_file)? };
286
287 let footer_pos = mmap.len() - FOOTER_SIZE;
288 let footer = Footer::read_from_bytes(&mmap[footer_pos..]).map_err(|_| {
289 io::Error::new(
290 io::ErrorKind::InvalidData,
291 "Failed to read footer after vacuum",
292 )
293 })?;
294
295 self.file = temp_file;
296 self.mmap = Some(mmap);
297 self.data_end = footer.index_offset();
298
299 Ok(())
300 }
301
302 /// Reads an entry from the archive, decompressing if needed.
303 ///
304 /// Returns `None` if the entry doesn't exist or if CRC32 verification fails.
305 pub fn read<'a>(&'a self, name: &str) -> Option<Cow<'a, [u8]>> {
306 let entry = self.index.get(name)?;
307 let mmap = self.mmap.as_ref()?;
308
309 let data = if entry.compression_type() == Compress::Zstd {
310 let compressed_data = mmap.get(
311 entry.offset() as usize..(entry.offset() + entry.compressed_size()) as usize,
312 )?;
313 let mut out = Vec::with_capacity(entry.uncompressed_size() as usize);
314 zstd::Decoder::new(compressed_data)
315 .ok()?
316 .read_to_end(&mut out)
317 .ok()?;
318 Cow::Owned(out)
319 } else {
320 let uncompressed_data = mmap.get(
321 entry.offset() as usize..(entry.offset() + entry.uncompressed_size()) as usize,
322 )?;
323 Cow::Borrowed(uncompressed_data)
324 };
325
326 // Verify CRC32
327 let computed_crc = crc32fast::hash(&data);
328 if computed_crc != entry.crc32() {
329 return None;
330 }
331
332 Some(data)
333 }
334
335 /// Reads an entry into a provided buffer, avoiding allocation.
336 ///
337 /// Decompresses if needed and verifies CRC32. Returns the number of bytes read.
338 /// If the buffer is too small, only reads up to buffer.len() bytes.
339 ///
340 /// # Example
341 ///
342 /// ```no_run
343 /// use bindle_file::Bindle;
344 ///
345 /// let archive = Bindle::open("data.bndl")?;
346 /// let mut buffer = vec![0u8; 1024];
347 /// let bytes_read = archive.read_into("file.txt", &mut buffer)?;
348 /// # Ok::<(), std::io::Error>(())
349 /// ```
350 pub fn read_into(&self, name: &str, buffer: &mut [u8]) -> io::Result<usize> {
351 let mut reader = self.reader(name)?;
352 let bytes_read = reader.read(buffer)?;
353 reader.verify_crc32()?;
354 Ok(bytes_read)
355 }
356
357 /// Reads an entry and writes it to the given writer.
358 ///
359 /// Returns the number of bytes written. Verifies CRC32 after reading.
360 pub fn read_to<W: std::io::Write>(&self, name: &str, mut w: W) -> std::io::Result<u64> {
361 let mut reader = self.reader(name)?;
362 let bytes_copied = std::io::copy(&mut reader, &mut w)?;
363 reader.verify_crc32()?;
364 Ok(bytes_copied)
365 }
366
367 /// Returns a streaming reader for an entry.
368 ///
369 /// Automatically decompresses if the entry is compressed. Call [`Reader::verify_crc32()`] after reading to verify integrity.
370 pub fn reader<'a>(&'a self, name: &str) -> io::Result<Reader<'a>> {
371 let entry = self
372 .index
373 .get(name)
374 .ok_or_else(|| io::Error::new(io::ErrorKind::NotFound, "Entry not found"))?;
375
376 let start = entry.offset() as usize;
377 let end = start + entry.compressed_size() as usize;
378 let mmap = self
379 .mmap
380 .as_ref()
381 .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "Missing mmap"))?;
382 let data_slice = &mmap[start..end];
383
384 let cursor = io::Cursor::new(data_slice);
385
386 if entry.compression_type() == Compress::Zstd {
387 // Zstd streaming decoder
388 let decoder = zstd::Decoder::new(cursor)?;
389 Ok(Reader {
390 decoder: Either::Left(decoder),
391 crc32_hasher: Hasher::new(),
392 expected_crc32: entry.crc32(),
393 })
394 } else {
395 Ok(Reader {
396 decoder: Either::Right(cursor),
397 crc32_hasher: Hasher::new(),
398 expected_crc32: entry.crc32(),
399 })
400 }
401 }
402
403 /// Returns the number of entries in the archive.
404 pub fn len(&self) -> usize {
405 self.index.len()
406 }
407
408 /// Returns true if the archive contains no entries.
409 pub fn is_empty(&self) -> bool {
410 self.index.is_empty()
411 }
412
413 /// Returns a reference to the archive index.
414 ///
415 /// The index maps entry names to their metadata.
416 pub fn index(&self) -> &BTreeMap<String, Entry> {
417 &self.index
418 }
419
420 /// Removes all entries from the index.
421 ///
422 /// Call [`save()`](Bindle::save) to commit. Data remains in the file until [`vacuum()`](Bindle::vacuum) is called.
423 pub fn clear(&mut self) {
424 self.index.clear()
425 }
426
427 /// Returns true if an entry with the given name exists.
428 pub fn exists(&self, name: &str) -> bool {
429 self.index.contains_key(name)
430 }
431
432 /// Removes an entry from the index.
433 ///
434 /// Returns true if the entry existed. Data remains in the file until [`vacuum()`](Bindle::vacuum) is called.
435 pub fn remove(&mut self, name: &str) -> bool {
436 self.index.remove(name).is_some()
437 }
438
439 /// Recursively adds all files from a directory to the archive.
440 ///
441 /// File paths are stored relative to the source directory. Call [`save()`](Bindle::save) to commit.
442 pub fn pack<P: AsRef<Path>>(&mut self, src_dir: P, compress: Compress) -> io::Result<()> {
443 self.pack_recursive(src_dir.as_ref(), src_dir.as_ref(), compress)
444 }
445
446 fn pack_recursive(
447 &mut self,
448 base: &Path,
449 current: &Path,
450 compress: Compress,
451 ) -> io::Result<()> {
452 if current.is_dir() {
453 for entry in std::fs::read_dir(current)? {
454 self.pack_recursive(base, &entry?.path(), compress)?;
455 }
456 } else {
457 let name = current
458 .strip_prefix(base)
459 .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?
460 .to_str()
461 .unwrap_or_default();
462 self.add_file(&name, current, compress)?;
463 }
464 Ok(())
465 }
466
467 /// Extracts all entries to a destination directory.
468 ///
469 /// Creates subdirectories as needed to match the stored paths.
470 pub fn unpack<P: AsRef<Path>>(&self, dest: P) -> io::Result<()> {
471 let dest_path = dest.as_ref();
472 std::fs::create_dir_all(dest_path)?;
473
474 // Collect all unique parent directories
475 let mut dirs = std::collections::HashSet::new();
476 for (name, _) in &self.index {
477 if let Some(parent) = Path::new(name).parent() {
478 // Only add non-empty parent paths
479 if parent != Path::new("") {
480 dirs.insert(dest_path.join(parent));
481 }
482 }
483 }
484
485 // Create all directories upfront (sorted for parent-first order)
486 if !dirs.is_empty() {
487 let mut dirs: Vec<_> = dirs.into_iter().collect();
488 dirs.sort();
489 for dir in dirs {
490 std::fs::create_dir_all(&dir)?;
491 }
492 }
493
494 // Sort entries by physical offset for sequential reads (better cache locality)
495 let mut entries: Vec<_> = self.index.iter().collect();
496 entries.sort_by_key(|(_, entry)| entry.offset());
497
498 // Extract files without per-file directory checks
499 for (name, _) in entries {
500 let file_path = dest_path.join(name);
501 let mut reader = self.reader(name)?;
502 let mut file = File::create(&file_path)?;
503 io::copy(&mut reader, &mut file)?;
504 reader.verify_crc32()?;
505 }
506 Ok(())
507 }
508
509 /// Creates a streaming writer for adding an entry.
510 ///
511 /// The writer must be closed and then [`save()`](Bindle::save) must be called to commit the entry.
512 pub fn writer<'a>(&'a mut self, name: &str, compress: Compress) -> io::Result<Writer<'a>> {
513 self.file.lock()?;
514 // Only seek if not already at the correct position
515 let current_pos = self.file.stream_position()?;
516 if current_pos != self.data_end {
517 self.file.seek(SeekFrom::Start(self.data_end))?;
518 }
519 let compress = self.should_auto_compress(compress, 0);
520 let start_offset = self.data_end;
521 let encoder = if compress {
522 let f = self.file.try_clone()?;
523 Some(zstd::Encoder::new(f, 3)?)
524 } else {
525 None
526 };
527 Ok(Writer {
528 name: name.to_string(),
529 bindle: self,
530 encoder,
531 start_offset,
532 uncompressed_size: 0,
533 crc32_hasher: Hasher::new(),
534 })
535 }
536}
537
538impl Drop for Bindle {
539 fn drop(&mut self) {
540 let _ = self.file.unlock();
541 }
542}