an efficient binary archive format

improve locking

+45 -47
+2 -1
README.md
··· 1 1 # bindle-file 2 2 3 - `bindle` is a general purpose, binary archive format designed for efficient reads and writes. 3 + [bindle](https://en.wikipedia.org/wiki/Bindle) is a general purpose binary archive 4 + format for collecting files.
+9 -9
SPEC.md
··· 30 30 - **Shadowing:** New versions of existing files are simply appended to the end of the data segment. The file remains append-only until a vacuum operation is performed. 31 31 32 32 ### 2.3 Index Entry 33 - The index is a series of entries. Each entry consists of a fixed metadata block followed by a variable-length filename. 33 + The index is a series of entries. Each entry consists of a fixed metadata block followed by a variable-length filename. All multi-byte integers are stored in little-endian byte order. 34 34 35 35 | Field | Size | Type | Description | 36 36 | :--- | :--- | :--- | :--- | 37 37 | `offset` | 8 bytes | u64 | Absolute file offset to the data blob | 38 38 | `c_size` | 8 bytes | u64 | Compressed size on disk | 39 39 | `u_size` | 8 bytes | u64 | Original uncompressed size | 40 - | `crc32` | 4 bytes | u32 | Checksum of the stored data | 40 + | `crc32` | 4 bytes | u32 | CRC32 checksum of the uncompressed data | 41 41 | `name_len` | 2 bytes | u16 | Length of the filename string | 42 - | `comp_type` | 1 byte | u8 | `0` = Raw, `1` = Zstandard | 42 + | `comp_type` | 1 byte | u8 | `0` = None, `1` = Zstd | 43 43 | `reserved` | 1 byte | u8 | Alignment padding | 44 44 | `filename` | Variable | UTF-8 | The entry name | 45 45 46 46 **Padding:** After the filename, the file MUST be padded with null bytes (`\0`) to the next 8-byte boundary before the next entry begins. 47 47 48 48 ### 2.4 Footer 49 - The last 16 bytes of the file are used to locate the index. Both fields are stored in little-endian format. 49 + The last 16 bytes of the file are used to locate the index. All fields are stored in little-endian format. 50 50 51 51 | Field | Size | Type | Description | 52 52 | :--- | :--- | :--- | :--- | 53 53 | `index_offset` | 8 bytes | u64 | Absolute offset to the start of the index | 54 54 | `entry_count` | 4 bytes | u32 | Total number of unique entries in the index | 55 - | `magic` | 4 bytes | u32 | Magic sentinel value `62 62 62 62` (ASCII: `bbbb`). 55 + | `magic` | 4 bytes | u32 | Magic sentinel value `0x62626262` (ASCII: `bbbb`) 56 56 57 57 --- 58 58 ··· 68 68 ### 3.2 Vacuuming 69 69 To reclaim space used by shadowed data: 70 70 1. Create a temporary file and write the `BINDL001` header. 71 - 2. Iterate through the **live** index entries only. 72 - 3. Copy the referenced data blobs to the new file, updating their offsets in a new in-memory index. 73 - 4. Write the new Index and Footer to the temporary file. 74 - 5. Atomically replace the old file with the new one. 71 + 2. Iterate through the **live** index entries only, copying referenced data from the original. 72 + 3. Write the new Index and Footer to the temporary file. 73 + 4. Atomically replace the original file with the temporary file. 74 + 5. On failure, delete the temporary file. 75 75 76 76 --- 77 77
+31 -37
src/bindle.rs
··· 194 194 } 195 195 196 196 pub fn vacuum(&mut self) -> io::Result<()> { 197 - let backup_path = self.path.with_extension("backup"); 197 + let temp_path = self.path.with_extension("tmp"); 198 198 199 - // Release locks and close current file 200 - drop(self.mmap.take()); 201 - let _ = self.file.unlock(); 202 - 203 - // Rename original to backup 204 - std::fs::rename(&self.path, &backup_path)?; 205 - 206 - // Open backup for reading 207 - let mut backup_file = File::open(&backup_path)?; 208 - 209 - // Create new file at original path 199 + // Create and lock temp file 210 200 let result = { 211 - let mut new_file = OpenOptions::new() 201 + let mut temp_file = OpenOptions::new() 212 202 .write(true) 213 203 .read(true) 214 204 .create(true) 215 205 .truncate(true) 216 - .open(&self.path)?; 206 + .open(&temp_path)?; 217 207 218 - new_file.write_all(BNDL_MAGIC)?; 208 + temp_file.lock_exclusive()?; 209 + temp_file.write_all(BNDL_MAGIC)?; 219 210 let mut current_offset = HEADER_SIZE as u64; 220 211 221 - // Copy only live entries from backup to new file 212 + // Copy only live entries from original to temp 222 213 for entry in self.index.values_mut() { 223 214 let mut buf = vec![0u8; entry.compressed_size() as usize]; 224 - backup_file.seek(SeekFrom::Start(entry.offset()))?; 225 - backup_file.read_exact(&mut buf)?; 215 + self.file.seek(SeekFrom::Start(entry.offset()))?; 216 + self.file.read_exact(&mut buf)?; 226 217 227 - new_file.seek(SeekFrom::Start(current_offset))?; 228 - new_file.write_all(&buf)?; 218 + temp_file.seek(SeekFrom::Start(current_offset))?; 219 + temp_file.write_all(&buf)?; 229 220 230 221 entry.set_offset(current_offset); 231 222 let pad = pad::<8, u64>(entry.compressed_size()); 232 223 if pad > 0 { 233 - write_padding(&mut new_file, pad as usize)?; 224 + write_padding(&mut temp_file, pad as usize)?; 234 225 } 235 226 current_offset += entry.compressed_size() + pad; 236 227 } ··· 238 229 // Write the index and footer 239 230 let index_start = current_offset; 240 231 for (name, entry) in &self.index { 241 - new_file.write_all(entry.as_bytes())?; 242 - new_file.write_all(name.as_bytes())?; 232 + temp_file.write_all(entry.as_bytes())?; 233 + temp_file.write_all(name.as_bytes())?; 243 234 let pad = pad::<BNDL_ALIGN, usize>(ENTRY_SIZE + name.len()); 244 235 if pad > 0 { 245 - write_padding(&mut new_file, pad)?; 236 + write_padding(&mut temp_file, pad)?; 246 237 } 247 238 } 248 239 249 240 let footer = Footer::new(index_start, self.index.len() as u32, FOOTER_MAGIC); 250 - new_file.write_all(footer.as_bytes())?; 251 - new_file.sync_all()?; 241 + temp_file.write_all(footer.as_bytes())?; 242 + temp_file.sync_all()?; 252 243 253 244 Ok(()) 254 245 }; 255 246 256 247 // Handle result 257 - match result { 258 - Ok(()) => { 259 - // Success - delete backup 260 - std::fs::remove_file(&backup_path).ok(); 261 - } 262 - Err(e) => { 263 - // Failure - restore from backup 264 - std::fs::remove_file(&self.path).ok(); 265 - std::fs::rename(&backup_path, &self.path).ok(); 266 - return Err(e); 267 - } 248 + if let Err(e) = result { 249 + std::fs::remove_file(&temp_path).ok(); 250 + return Err(e); 268 251 } 252 + 253 + // Acquire exclusive lock just before rename to prevent concurrent access 254 + self.file.lock_exclusive()?; 255 + 256 + // Release locks and close current file 257 + drop(self.mmap.take()); 258 + let _ = self.file.unlock(); 259 + 260 + // Atomically replace original with temp 261 + std::fs::rename(&temp_path, &self.path)?; 269 262 270 263 // Re-open the new file 271 264 let file = OpenOptions::new().read(true).write(true).open(&self.path)?; ··· 435 428 } 436 429 437 430 pub fn writer<'a>(&'a mut self, name: &str, compress: Compress) -> io::Result<Writer<'a>> { 431 + self.file.lock_exclusive()?; 438 432 self.file.seek(SeekFrom::Start(self.data_end))?; 439 433 let compress = self.should_auto_compress(compress, 0); 440 434 let f = self.file.try_clone()?;
+3
src/writer.rs
··· 86 86 87 87 self.bindle.index.insert(self.name.clone(), entry); 88 88 self.name.clear(); // Mark as closed 89 + 90 + // Downgrade to shared lock after write completes 91 + self.bindle.file.lock_shared()?; 89 92 Ok(()) 90 93 } 91 94