+78
-11
Cargo.lock
+78
-11
Cargo.lock
···
682
682
"multihash",
683
683
"serde",
684
684
"serde_bytes",
685
-
"unsigned-varint",
685
+
"unsigned-varint 0.8.0",
686
686
]
687
687
688
688
[[package]]
···
968
968
"fiat-crypto",
969
969
"rustc_version",
970
970
"subtle",
971
+
"zeroize",
971
972
]
972
973
973
974
[[package]]
···
1181
1182
dependencies = [
1182
1183
"curve25519-dalek",
1183
1184
"ed25519",
1185
+
"rand_core 0.6.4",
1186
+
"serde",
1184
1187
"sha2",
1185
1188
"subtle",
1189
+
"zeroize",
1186
1190
]
1187
1191
1188
1192
[[package]]
···
1291
1295
checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb"
1292
1296
dependencies = [
1293
1297
"libc",
1294
-
"windows-sys 0.59.0",
1298
+
"windows-sys 0.61.2",
1295
1299
]
1296
1300
1297
1301
[[package]]
···
1940
1944
"libc",
1941
1945
"percent-encoding",
1942
1946
"pin-project-lite",
1943
-
"socket2 0.5.10",
1947
+
"socket2 0.6.1",
1944
1948
"system-configuration",
1945
1949
"tokio",
1946
1950
"tower-service",
···
2214
2218
]
2215
2219
2216
2220
[[package]]
2221
+
name = "iroh-car"
2222
+
version = "0.5.1"
2223
+
source = "registry+https://github.com/rust-lang/crates.io-index"
2224
+
checksum = "cb7f8cd4cb9aa083fba8b52e921764252d0b4dcb1cd6d120b809dbfe1106e81a"
2225
+
dependencies = [
2226
+
"anyhow",
2227
+
"cid",
2228
+
"futures",
2229
+
"serde",
2230
+
"serde_ipld_dagcbor",
2231
+
"thiserror 1.0.69",
2232
+
"tokio",
2233
+
"unsigned-varint 0.7.2",
2234
+
]
2235
+
2236
+
[[package]]
2217
2237
name = "is_ci"
2218
2238
version = "1.2.0"
2219
2239
source = "registry+https://github.com/rust-lang/crates.io-index"
···
2557
2577
]
2558
2578
2559
2579
[[package]]
2580
+
name = "jacquard-repo"
2581
+
version = "0.7.0"
2582
+
dependencies = [
2583
+
"bytes",
2584
+
"cid",
2585
+
"ed25519-dalek",
2586
+
"hex",
2587
+
"ipld-core",
2588
+
"iroh-car",
2589
+
"jacquard-common 0.7.0",
2590
+
"jacquard-derive 0.7.0",
2591
+
"k256",
2592
+
"miette",
2593
+
"multihash",
2594
+
"n0-future",
2595
+
"p256",
2596
+
"rand 0.8.5",
2597
+
"serde",
2598
+
"serde_bytes",
2599
+
"serde_ipld_dagcbor",
2600
+
"serde_ipld_dagjson",
2601
+
"sha2",
2602
+
"smol_str",
2603
+
"tempfile",
2604
+
"thiserror 2.0.17",
2605
+
"tokio",
2606
+
"trait-variant",
2607
+
]
2608
+
2609
+
[[package]]
2560
2610
name = "jni"
2561
2611
version = "0.21.1"
2562
2612
source = "registry+https://github.com/rust-lang/crates.io-index"
···
2996
3046
dependencies = [
2997
3047
"core2",
2998
3048
"serde",
2999
-
"unsigned-varint",
3049
+
"unsigned-varint 0.8.0",
3000
3050
]
3001
3051
3002
3052
[[package]]
···
3072
3122
source = "registry+https://github.com/rust-lang/crates.io-index"
3073
3123
checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5"
3074
3124
dependencies = [
3075
-
"windows-sys 0.59.0",
3125
+
"windows-sys 0.61.2",
3076
3126
]
3077
3127
3078
3128
[[package]]
···
3649
3699
"quinn-udp",
3650
3700
"rustc-hash",
3651
3701
"rustls",
3652
-
"socket2 0.5.10",
3702
+
"socket2 0.6.1",
3653
3703
"thiserror 2.0.17",
3654
3704
"tokio",
3655
3705
"tracing",
···
3686
3736
"cfg_aliases",
3687
3737
"libc",
3688
3738
"once_cell",
3689
-
"socket2 0.5.10",
3739
+
"socket2 0.6.1",
3690
3740
"tracing",
3691
-
"windows-sys 0.59.0",
3741
+
"windows-sys 0.60.2",
3692
3742
]
3693
3743
3694
3744
[[package]]
···
4107
4157
"errno",
4108
4158
"libc",
4109
4159
"linux-raw-sys 0.11.0",
4110
-
"windows-sys 0.59.0",
4160
+
"windows-sys 0.61.2",
4111
4161
]
4112
4162
4113
4163
[[package]]
···
4344
4394
]
4345
4395
4346
4396
[[package]]
4397
+
name = "serde_ipld_dagjson"
4398
+
version = "0.2.1"
4399
+
source = "registry+https://github.com/rust-lang/crates.io-index"
4400
+
checksum = "82d2d9d1f29999ee9a3d774fe2a5db4cc199da5178d0350f5e4482ea04252aee"
4401
+
dependencies = [
4402
+
"ipld-core",
4403
+
"serde",
4404
+
"serde_json",
4405
+
]
4406
+
4407
+
[[package]]
4347
4408
name = "serde_json"
4348
4409
version = "1.0.145"
4349
4410
source = "registry+https://github.com/rust-lang/crates.io-index"
···
4770
4831
"getrandom 0.3.4",
4771
4832
"once_cell",
4772
4833
"rustix 1.1.2",
4773
-
"windows-sys 0.59.0",
4834
+
"windows-sys 0.61.2",
4774
4835
]
4775
4836
4776
4837
[[package]]
···
5317
5378
5318
5379
[[package]]
5319
5380
name = "unsigned-varint"
5381
+
version = "0.7.2"
5382
+
source = "registry+https://github.com/rust-lang/crates.io-index"
5383
+
checksum = "6889a77d49f1f013504cec6bf97a2c730394adedaeb1deb5ea08949a50541105"
5384
+
5385
+
[[package]]
5386
+
name = "unsigned-varint"
5320
5387
version = "0.8.0"
5321
5388
source = "registry+https://github.com/rust-lang/crates.io-index"
5322
5389
checksum = "eb066959b24b5196ae73cb057f45598450d2c5f71460e98c49b738086eff9c06"
···
5629
5696
source = "registry+https://github.com/rust-lang/crates.io-index"
5630
5697
checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22"
5631
5698
dependencies = [
5632
-
"windows-sys 0.48.0",
5699
+
"windows-sys 0.61.2",
5633
5700
]
5634
5701
5635
5702
[[package]]
+23
-17
crates/jacquard-common/src/types/crypto.rs
+23
-17
crates/jacquard-common/src/types/crypto.rs
···
20
20
use crate::IntoStatic;
21
21
use std::borrow::Cow;
22
22
23
+
/// Multicodec code for SHA2-256 hash
24
+
pub const SHA2_256: u64 = 0x12;
25
+
26
+
/// Multicodec code for DAG-CBOR codec
27
+
pub const DAG_CBOR: u64 = 0x71;
28
+
23
29
/// Known multicodec key codecs for Multikey public keys
24
30
///
25
31
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
···
186
192
}
187
193
}
188
194
189
-
fn decode_uvarint(data: &[u8]) -> Option<(u64, usize)> {
195
+
pub fn decode_uvarint(data: &[u8]) -> Option<(u64, usize)> {
190
196
let mut x: u64 = 0;
191
197
let mut s: u32 = 0;
192
198
for (i, b) in data.iter().copied().enumerate() {
···
202
208
None
203
209
}
204
210
211
+
pub fn encode_uvarint(mut x: u64) -> Vec<u8> {
212
+
let mut out = Vec::new();
213
+
while x >= 0x80 {
214
+
out.push(((x as u8) & 0x7F) | 0x80);
215
+
x >>= 7;
216
+
}
217
+
out.push(x as u8);
218
+
out
219
+
}
220
+
221
+
pub fn multikey(code: u64, key: &[u8]) -> String {
222
+
let mut buf = encode_uvarint(code);
223
+
buf.extend_from_slice(key);
224
+
multibase::encode(multibase::Base::Base58Btc, buf)
225
+
}
226
+
205
227
#[cfg(test)]
206
228
mod tests {
207
229
use super::*;
208
230
use multibase;
209
-
210
-
fn encode_uvarint(mut x: u64) -> Vec<u8> {
211
-
let mut out = Vec::new();
212
-
while x >= 0x80 {
213
-
out.push(((x as u8) & 0x7F) | 0x80);
214
-
x >>= 7;
215
-
}
216
-
out.push(x as u8);
217
-
out
218
-
}
219
-
220
-
fn multikey(code: u64, key: &[u8]) -> String {
221
-
let mut buf = encode_uvarint(code);
222
-
buf.extend_from_slice(key);
223
-
multibase::encode(multibase::Base::Base58Btc, buf)
224
-
}
225
231
226
232
#[test]
227
233
fn decode_ed25519() {
+4
-6
crates/jacquard-common/src/types/tid.rs
+4
-6
crates/jacquard-common/src/types/tid.rs
···
120
120
121
121
/// Construct a TID from a timestamp (in microseconds) and clock ID
122
122
pub fn from_time(timestamp: u64, clkid: u32) -> Self {
123
-
let str = smol_str::format_smolstr!(
124
-
"{0}{1:2>2}",
125
-
s32_encode(timestamp as u64),
126
-
s32_encode(Into::<u32>::into(clkid) as u64)
127
-
);
128
-
Self(str)
123
+
// Combine timestamp and clock ID into single u64: 53 bits timestamp + 10 bits clock ID
124
+
// 0TTTTTTTTTTTTTTT TTTTTTTTTTTTTTTT TTTTTTTTTTTTTTTT TTTTTTCCCCCCCCCC
125
+
let tid = (timestamp << 10) & 0x7FFF_FFFF_FFFF_FC00 | (clkid as u64 & 0x3FF);
126
+
Self(s32_encode(tid))
129
127
}
130
128
131
129
/// Extract the timestamp component (microseconds since UNIX epoch)
+64
crates/jacquard-repo/Cargo.toml
+64
crates/jacquard-repo/Cargo.toml
···
1
+
[package]
2
+
name = "jacquard-repo"
3
+
description = "AT Protocol repository primitives: MST, commits, CAR I/O"
4
+
edition.workspace = true
5
+
version.workspace = true
6
+
authors.workspace = true
7
+
repository.workspace = true
8
+
keywords.workspace = true
9
+
categories.workspace = true
10
+
readme.workspace = true
11
+
exclude.workspace = true
12
+
license.workspace = true
13
+
14
+
[features]
15
+
default = []
16
+
17
+
[dependencies]
18
+
# Internal
19
+
jacquard-common = { path = "../jacquard-common", version = "0.7.0", features = ["crypto-ed25519", "crypto-k256", "crypto-p256"] }
20
+
jacquard-derive = { path = "../jacquard-derive", version = "0.7.0" }
21
+
22
+
# Serialization
23
+
serde.workspace = true
24
+
serde_ipld_dagcbor.workspace = true
25
+
serde_bytes = "0.11"
26
+
27
+
# IPLD primitives (match jacquard-common versions)
28
+
cid = { version = "0.11.1", features = ["serde", "std"] }
29
+
ipld-core = { version = "0.4.2", features = ["serde"] }
30
+
multihash = "0.19.3"
31
+
32
+
# CAR file I/O
33
+
iroh-car = "0.5"
34
+
35
+
# Data types
36
+
bytes.workspace = true
37
+
smol_str.workspace = true
38
+
39
+
# Hashing
40
+
sha2 = "0.10"
41
+
42
+
# Error handling
43
+
thiserror.workspace = true
44
+
miette.workspace = true
45
+
46
+
# Async
47
+
trait-variant.workspace = true
48
+
n0-future.workspace = true
49
+
tokio = { workspace = true, default-features = false, features = ["fs", "io-util"] }
50
+
51
+
# Crypto (for commit signing/verification)
52
+
ed25519-dalek = { version = "2", features = ["rand_core"] }
53
+
k256 = { version = "0.13", features = ["ecdsa", "sha256"] }
54
+
p256 = { version = "0.13", features = ["ecdsa", "sha256"] }
55
+
56
+
[dev-dependencies]
57
+
serde_ipld_dagjson = "0.2"
58
+
tokio = { workspace = true, features = ["macros", "rt", "rt-multi-thread"] }
59
+
tempfile = "3.14"
60
+
rand = "0.8"
61
+
hex = "0.4"
62
+
63
+
[package.metadata.docs.rs]
64
+
all-features = true
+28
crates/jacquard-repo/src/car/mod.rs
+28
crates/jacquard-repo/src/car/mod.rs
···
1
+
//! CAR (Content Addressable aRchive) file I/O
2
+
//!
3
+
//! Provides utilities for reading and writing CAR files, which are the standard
4
+
//! format for AT Protocol repository export/import.
5
+
//!
6
+
//! # Examples
7
+
//!
8
+
//! Reading a CAR file:
9
+
//! ```ignore
10
+
//! use jacquard_repo::car::reader::read_car;
11
+
//!
12
+
//! let blocks = read_car("repo.car").await?;
13
+
//! ```
14
+
//!
15
+
//! Writing a CAR file:
16
+
//! ```ignore
17
+
//! use jacquard_repo::car::writer::write_car;
18
+
//!
19
+
//! let roots = vec![commit_cid];
20
+
//! write_car("repo.car", roots, blocks).await?;
21
+
//! ```
22
+
23
+
pub mod reader;
24
+
pub mod writer;
25
+
26
+
// Re-export commonly used functions and types
27
+
pub use reader::{parse_car_bytes, read_car, read_car_header, stream_car, ParsedCar};
28
+
pub use writer::{export_repo_car, write_car, write_car_bytes};
+274
crates/jacquard-repo/src/car/reader.rs
+274
crates/jacquard-repo/src/car/reader.rs
···
1
+
//! CAR file reading utilities
2
+
//!
3
+
//! Provides functions for reading CAR (Content Addressable aRchive) files into memory
4
+
//! or streaming them for large repositories.
5
+
6
+
use crate::error::Result;
7
+
use bytes::Bytes;
8
+
use cid::Cid as IpldCid;
9
+
use iroh_car::CarReader;
10
+
use n0_future::stream::StreamExt;
11
+
use std::collections::BTreeMap;
12
+
use std::path::Path;
13
+
use tokio::fs::File;
14
+
15
+
/// Parsed CAR file data
16
+
#[derive(Debug, Clone)]
17
+
pub struct ParsedCar {
18
+
/// The first root CID from the CAR header
19
+
pub root: IpldCid,
20
+
/// All blocks in the CAR file
21
+
pub blocks: BTreeMap<IpldCid, Bytes>,
22
+
}
23
+
24
+
/// Read entire CAR file into memory
25
+
///
26
+
/// Returns BTreeMap of CID -> block data (sorted order for determinism).
27
+
/// For large CAR files, consider using `stream_car()` instead.
28
+
pub async fn read_car(path: impl AsRef<Path>) -> Result<BTreeMap<IpldCid, Bytes>> {
29
+
let file = File::open(path)
30
+
.await
31
+
.map_err(|e| crate::error::RepoError::io(e))?;
32
+
33
+
let reader = CarReader::new(file)
34
+
.await
35
+
.map_err(|e| crate::error::RepoError::car(e))?;
36
+
37
+
let mut blocks = BTreeMap::new();
38
+
let stream = reader.stream();
39
+
n0_future::pin!(stream);
40
+
41
+
while let Some(result) = stream.next().await {
42
+
let (cid, data) = result.map_err(|e| crate::error::RepoError::car_parse(e))?;
43
+
blocks.insert(cid, Bytes::from(data));
44
+
}
45
+
46
+
Ok(blocks)
47
+
}
48
+
49
+
/// Read CAR file header (roots only)
50
+
///
51
+
/// Useful for checking roots without loading all blocks.
52
+
pub async fn read_car_header(path: impl AsRef<Path>) -> Result<Vec<IpldCid>> {
53
+
let file = File::open(path)
54
+
.await
55
+
.map_err(|e| crate::error::RepoError::io(e))?;
56
+
57
+
let reader = CarReader::new(file)
58
+
.await
59
+
.map_err(|e| crate::error::RepoError::car(e))?;
60
+
61
+
Ok(reader.header().roots().to_vec())
62
+
}
63
+
64
+
/// Parse CAR bytes into root and block map
65
+
///
66
+
/// For in-memory CAR data (e.g., from firehose commit messages, merkle proofs).
67
+
/// Returns the first root CID and all blocks.
68
+
pub async fn parse_car_bytes(data: &[u8]) -> Result<ParsedCar> {
69
+
let reader = CarReader::new(data)
70
+
.await
71
+
.map_err(|e| crate::error::RepoError::car_parse(e))?;
72
+
73
+
let roots = reader.header().roots();
74
+
let root = roots
75
+
.first()
76
+
.copied()
77
+
.ok_or_else(|| crate::error::RepoError::invalid("CAR file has no roots"))?;
78
+
79
+
let mut blocks = BTreeMap::new();
80
+
let stream = reader.stream();
81
+
n0_future::pin!(stream);
82
+
83
+
while let Some(result) = stream.next().await {
84
+
let (cid, data) = result.map_err(|e| crate::error::RepoError::car_parse(e))?;
85
+
blocks.insert(cid, Bytes::from(data));
86
+
}
87
+
88
+
Ok(ParsedCar { root, blocks })
89
+
}
90
+
91
+
/// Stream CAR blocks without loading entire file into memory
92
+
///
93
+
/// Useful for processing large CAR files incrementally.
94
+
pub async fn stream_car(path: impl AsRef<Path>) -> Result<CarBlockStream> {
95
+
let file = File::open(path)
96
+
.await
97
+
.map_err(|e| crate::error::RepoError::io(e))?;
98
+
99
+
let reader = CarReader::new(file)
100
+
.await
101
+
.map_err(|e| crate::error::RepoError::car(e))?;
102
+
103
+
let roots = reader.header().roots().to_vec();
104
+
let stream = Box::pin(reader.stream());
105
+
106
+
Ok(CarBlockStream { stream, roots })
107
+
}
108
+
109
+
/// Streaming CAR block reader
110
+
///
111
+
/// Iterates through CAR blocks without loading entire file into memory.
112
+
pub struct CarBlockStream {
113
+
stream: std::pin::Pin<
114
+
Box<
115
+
dyn n0_future::stream::Stream<
116
+
Item = std::result::Result<(IpldCid, Vec<u8>), iroh_car::Error>,
117
+
> + Send,
118
+
>,
119
+
>,
120
+
roots: Vec<IpldCid>,
121
+
}
122
+
123
+
impl CarBlockStream {
124
+
/// Get next block from the stream
125
+
///
126
+
/// Returns `None` when stream is exhausted.
127
+
pub async fn next(&mut self) -> Result<Option<(IpldCid, Bytes)>> {
128
+
match self.stream.next().await {
129
+
Some(result) => {
130
+
let (cid, data) = result.map_err(|e| crate::error::RepoError::car_parse(e))?;
131
+
Ok(Some((cid, Bytes::from(data))))
132
+
}
133
+
None => Ok(None),
134
+
}
135
+
}
136
+
137
+
/// Get the CAR file roots
138
+
pub fn roots(&self) -> &[IpldCid] {
139
+
&self.roots
140
+
}
141
+
}
142
+
143
+
#[cfg(test)]
144
+
mod tests {
145
+
use crate::DAG_CBOR_CID_CODEC;
146
+
147
+
use super::*;
148
+
use iroh_car::CarWriter;
149
+
use jacquard_common::types::crypto::SHA2_256;
150
+
use tempfile::NamedTempFile;
151
+
use tokio::io::AsyncWriteExt;
152
+
153
+
async fn make_test_car(roots: Vec<IpldCid>, blocks: Vec<(IpldCid, Vec<u8>)>) -> Vec<u8> {
154
+
let mut buf = Vec::new();
155
+
let header = iroh_car::CarHeader::new_v1(roots);
156
+
let mut writer = CarWriter::new(header, &mut buf);
157
+
158
+
for (cid, data) in blocks {
159
+
writer.write(cid, data).await.unwrap();
160
+
}
161
+
162
+
writer.finish().await.unwrap();
163
+
buf.flush().await.unwrap();
164
+
buf
165
+
}
166
+
167
+
fn make_test_cid(value: u8) -> IpldCid {
168
+
use sha2::{Digest, Sha256};
169
+
let hash = Sha256::digest(&[value]);
170
+
let mh = multihash::Multihash::wrap(SHA2_256, &hash).unwrap();
171
+
IpldCid::new_v1(DAG_CBOR_CID_CODEC, mh) // dag-cbor codec
172
+
}
173
+
174
+
#[tokio::test]
175
+
async fn test_parse_car_with_blocks() {
176
+
let cid1 = make_test_cid(1);
177
+
let cid2 = make_test_cid(2);
178
+
let data1 = vec![1, 2, 3];
179
+
let data2 = vec![4, 5, 6];
180
+
181
+
let car_bytes = make_test_car(
182
+
vec![cid1],
183
+
vec![(cid1, data1.clone()), (cid2, data2.clone())],
184
+
)
185
+
.await;
186
+
187
+
let parsed = parse_car_bytes(&car_bytes).await.unwrap();
188
+
assert_eq!(parsed.root, cid1);
189
+
assert_eq!(parsed.blocks.len(), 2);
190
+
assert_eq!(parsed.blocks.get(&cid1).unwrap().as_ref(), &data1);
191
+
assert_eq!(parsed.blocks.get(&cid2).unwrap().as_ref(), &data2);
192
+
}
193
+
194
+
#[tokio::test]
195
+
async fn test_read_car_from_file() {
196
+
let cid1 = make_test_cid(1);
197
+
let data1 = vec![1, 2, 3];
198
+
199
+
let car_bytes = make_test_car(vec![cid1], vec![(cid1, data1.clone())]).await;
200
+
201
+
// Write to temp file
202
+
let temp_file = NamedTempFile::new().unwrap();
203
+
tokio::io::AsyncWriteExt::write_all(
204
+
&mut tokio::fs::File::from_std(temp_file.reopen().unwrap()),
205
+
&car_bytes,
206
+
)
207
+
.await
208
+
.unwrap();
209
+
210
+
// Read back
211
+
let blocks = read_car(temp_file.path()).await.unwrap();
212
+
assert_eq!(blocks.len(), 1);
213
+
assert_eq!(blocks.get(&cid1).unwrap().as_ref(), &data1);
214
+
}
215
+
216
+
#[tokio::test]
217
+
async fn test_read_car_header() {
218
+
let cid1 = make_test_cid(1);
219
+
let cid2 = make_test_cid(2);
220
+
let data1 = vec![1, 2, 3];
221
+
222
+
let car_bytes = make_test_car(vec![cid1, cid2], vec![(cid1, data1)]).await;
223
+
224
+
let temp_file = NamedTempFile::new().unwrap();
225
+
tokio::io::AsyncWriteExt::write_all(
226
+
&mut tokio::fs::File::from_std(temp_file.reopen().unwrap()),
227
+
&car_bytes,
228
+
)
229
+
.await
230
+
.unwrap();
231
+
232
+
let roots = read_car_header(temp_file.path()).await.unwrap();
233
+
assert_eq!(roots.len(), 2);
234
+
assert_eq!(roots[0], cid1);
235
+
assert_eq!(roots[1], cid2);
236
+
}
237
+
238
+
#[tokio::test]
239
+
async fn test_stream_car() {
240
+
let cid1 = make_test_cid(1);
241
+
let cid2 = make_test_cid(2);
242
+
let data1 = vec![1, 2, 3];
243
+
let data2 = vec![4, 5, 6];
244
+
245
+
let car_bytes = make_test_car(
246
+
vec![cid1],
247
+
vec![(cid1, data1.clone()), (cid2, data2.clone())],
248
+
)
249
+
.await;
250
+
251
+
let temp_file = NamedTempFile::new().unwrap();
252
+
tokio::io::AsyncWriteExt::write_all(
253
+
&mut tokio::fs::File::from_std(temp_file.reopen().unwrap()),
254
+
&car_bytes,
255
+
)
256
+
.await
257
+
.unwrap();
258
+
259
+
let mut stream = stream_car(temp_file.path()).await.unwrap();
260
+
261
+
// Read first block
262
+
let (cid, data) = stream.next().await.unwrap().unwrap();
263
+
assert_eq!(cid, cid1);
264
+
assert_eq!(data.as_ref(), &data1);
265
+
266
+
// Read second block
267
+
let (cid, data) = stream.next().await.unwrap().unwrap();
268
+
assert_eq!(cid, cid2);
269
+
assert_eq!(data.as_ref(), &data2);
270
+
271
+
// Stream exhausted
272
+
assert!(stream.next().await.unwrap().is_none());
273
+
}
274
+
}
+218
crates/jacquard-repo/src/car/writer.rs
+218
crates/jacquard-repo/src/car/writer.rs
···
1
+
//! CAR file writing utilities
2
+
//!
3
+
//! Provides functions for writing blocks to CAR (Content Addressable aRchive) files.
4
+
5
+
use crate::error::Result;
6
+
use crate::mst::tree::Mst;
7
+
use crate::storage::BlockStore;
8
+
use bytes::Bytes;
9
+
use cid::Cid as IpldCid;
10
+
use iroh_car::CarWriter;
11
+
use std::collections::BTreeMap;
12
+
use std::path::Path;
13
+
use tokio::fs::File;
14
+
use tokio::io::AsyncWriteExt;
15
+
16
+
/// Write blocks to CAR file
17
+
///
18
+
/// Roots should contain commit CID(s).
19
+
/// Blocks are written in sorted CID order (BTreeMap) for determinism.
20
+
pub async fn write_car(
21
+
path: impl AsRef<Path>,
22
+
roots: Vec<IpldCid>,
23
+
blocks: BTreeMap<IpldCid, Bytes>,
24
+
) -> Result<()> {
25
+
let file = File::create(path)
26
+
.await
27
+
.map_err(|e| crate::error::RepoError::io(e))?;
28
+
29
+
let header = iroh_car::CarHeader::new_v1(roots);
30
+
let mut writer = CarWriter::new(header, file);
31
+
32
+
for (cid, data) in blocks {
33
+
writer
34
+
.write(cid, data.as_ref())
35
+
.await
36
+
.map_err(|e| crate::error::RepoError::car(e))?;
37
+
}
38
+
39
+
writer
40
+
.finish()
41
+
.await
42
+
.map_err(|e| crate::error::RepoError::car(e))?;
43
+
44
+
Ok(())
45
+
}
46
+
47
+
/// Write blocks to CAR bytes (in-memory)
48
+
///
49
+
/// Like `write_car()` but writes to a Vec<u8> instead of a file.
50
+
/// Useful for tests and proof generation.
51
+
pub async fn write_car_bytes(
52
+
root: IpldCid,
53
+
blocks: BTreeMap<IpldCid, Bytes>,
54
+
) -> Result<Vec<u8>> {
55
+
let mut buffer = Vec::new();
56
+
let header = iroh_car::CarHeader::new_v1(vec![root]);
57
+
let mut writer = CarWriter::new(header, &mut buffer);
58
+
59
+
for (cid, data) in blocks {
60
+
writer
61
+
.write(cid, data.as_ref())
62
+
.await
63
+
.map_err(|e| crate::error::RepoError::car(e))?;
64
+
}
65
+
66
+
writer
67
+
.finish()
68
+
.await
69
+
.map_err(|e| crate::error::RepoError::car(e))?;
70
+
71
+
buffer.flush().await.map_err(|e| crate::error::RepoError::io(e))?;
72
+
73
+
Ok(buffer)
74
+
}
75
+
76
+
/// Write MST + commit to CAR file
77
+
///
78
+
/// Streams blocks directly to CAR file:
79
+
/// - Commit block (from storage)
80
+
/// - All MST node blocks (from storage)
81
+
/// - All record blocks (from storage)
82
+
///
83
+
/// Uses streaming to avoid loading all blocks into memory.
84
+
pub async fn export_repo_car<S: BlockStore + Sync + 'static>(
85
+
path: impl AsRef<Path>,
86
+
commit_cid: IpldCid,
87
+
mst: &Mst<S>,
88
+
) -> Result<()> {
89
+
let file = File::create(path)
90
+
.await
91
+
.map_err(|e| crate::error::RepoError::io(e))?;
92
+
93
+
let header = iroh_car::CarHeader::new_v1(vec![commit_cid]);
94
+
let mut writer = CarWriter::new(header, file);
95
+
96
+
// Write commit block first
97
+
let storage = mst.storage();
98
+
let commit_data = storage
99
+
.get(&commit_cid)
100
+
.await?
101
+
.ok_or_else(|| crate::error::RepoError::not_found("commit", &commit_cid))?;
102
+
103
+
writer
104
+
.write(commit_cid, &commit_data)
105
+
.await
106
+
.map_err(|e| crate::error::RepoError::car(e))?;
107
+
108
+
// Stream MST and record blocks
109
+
mst.write_blocks_to_car(&mut writer).await?;
110
+
111
+
// Finish writing
112
+
writer
113
+
.finish()
114
+
.await
115
+
.map_err(|e| crate::error::RepoError::car(e))?;
116
+
117
+
Ok(())
118
+
}
119
+
120
+
#[cfg(test)]
121
+
mod tests {
122
+
use super::*;
123
+
use crate::DAG_CBOR_CID_CODEC;
124
+
use crate::car::reader::read_car;
125
+
use crate::mst::tree::Mst;
126
+
use crate::storage::memory::MemoryBlockStore;
127
+
use jacquard_common::types::crypto::SHA2_256;
128
+
use std::sync::Arc;
129
+
use tempfile::NamedTempFile;
130
+
131
+
fn make_test_cid(value: u8) -> IpldCid {
132
+
use sha2::{Digest, Sha256};
133
+
let hash = Sha256::digest(&[value]);
134
+
let mh = multihash::Multihash::wrap(SHA2_256, &hash).unwrap();
135
+
136
+
IpldCid::new_v1(DAG_CBOR_CID_CODEC, mh)
137
+
}
138
+
139
+
#[tokio::test]
140
+
async fn test_write_car_with_blocks() {
141
+
let temp_file = NamedTempFile::new().unwrap();
142
+
143
+
let cid1 = make_test_cid(1);
144
+
let cid2 = make_test_cid(2);
145
+
let data1 = Bytes::from_static(&[1, 2, 3]);
146
+
let data2 = Bytes::from_static(&[4, 5, 6]);
147
+
148
+
let mut blocks = BTreeMap::new();
149
+
blocks.insert(cid1, data1.clone());
150
+
blocks.insert(cid2, data2.clone());
151
+
152
+
write_car(temp_file.path(), vec![cid1], blocks)
153
+
.await
154
+
.unwrap();
155
+
156
+
// Read back and verify
157
+
let read_blocks = read_car(temp_file.path()).await.unwrap();
158
+
assert_eq!(read_blocks.len(), 2);
159
+
assert_eq!(read_blocks.get(&cid1).unwrap(), &data1);
160
+
assert_eq!(read_blocks.get(&cid2).unwrap(), &data2);
161
+
}
162
+
163
+
#[tokio::test]
164
+
async fn test_export_mst_to_car() {
165
+
let storage = Arc::new(MemoryBlockStore::new());
166
+
let mst = Mst::new(storage.clone());
167
+
168
+
// Add some entries
169
+
let cid1 = make_test_cid(1);
170
+
let cid2 = make_test_cid(2);
171
+
172
+
let mst = mst.add("app.bsky.feed.post/abc123", cid1).await.unwrap();
173
+
let mst = mst.add("app.bsky.feed.post/def456", cid2).await.unwrap();
174
+
175
+
// Persist MST blocks to storage
176
+
mst.persist().await.unwrap();
177
+
178
+
// Persist record blocks to storage
179
+
storage
180
+
.put_with_cid(cid1, Bytes::from_static(&[1, 1, 1]))
181
+
.await
182
+
.unwrap();
183
+
storage
184
+
.put_with_cid(cid2, Bytes::from_static(&[2, 2, 2]))
185
+
.await
186
+
.unwrap();
187
+
188
+
// Create and persist commit block
189
+
let commit_cid = make_test_cid(99);
190
+
let commit_data = Bytes::from_static(&[99, 99, 99]);
191
+
storage
192
+
.put_with_cid(commit_cid, commit_data.clone())
193
+
.await
194
+
.unwrap();
195
+
196
+
let temp_file = NamedTempFile::new().unwrap();
197
+
198
+
// Export to CAR
199
+
export_repo_car(temp_file.path(), commit_cid, &mst)
200
+
.await
201
+
.unwrap();
202
+
203
+
// Read back and verify
204
+
let blocks = read_car(temp_file.path()).await.unwrap();
205
+
206
+
// Should have commit + MST nodes + record blocks
207
+
assert!(blocks.contains_key(&commit_cid));
208
+
assert_eq!(blocks.get(&commit_cid).unwrap(), &commit_data);
209
+
210
+
// Should have at least the root node
211
+
let root_cid = mst.root().await.unwrap();
212
+
assert!(blocks.contains_key(&root_cid));
213
+
214
+
// Should have record blocks
215
+
assert!(blocks.contains_key(&cid1));
216
+
assert!(blocks.contains_key(&cid2));
217
+
}
218
+
}
+336
crates/jacquard-repo/src/commit/firehose.rs
+336
crates/jacquard-repo/src/commit/firehose.rs
···
1
+
//! Firehose commit message structures
2
+
//!
3
+
//! These structures are vendored from `jacquard-api::com_atproto::sync::subscribe_repos`
4
+
//! to avoid a dependency on the full API crate. They represent firehose protocol messages,
5
+
//! which are DISTINCT from repository commit objects.
6
+
7
+
use bytes::Bytes;
8
+
use jacquard_common::IntoStatic;
9
+
use jacquard_common::types::string::{Did, Tid};
10
+
11
+
/// Firehose commit message (sync v1.0 and v1.1)
12
+
///
13
+
/// Represents an update of repository state in the firehose stream.
14
+
/// This is the message format sent over `com.atproto.sync.subscribeRepos`.
15
+
///
16
+
/// **Sync v1.0 vs v1.1:**
17
+
/// - v1.0: `prev_data` is None/skipped, consumers must have sufficient previous repository state to validate
18
+
/// - v1.1: `prev_data` includes previous MST root for inductive validation
19
+
#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
20
+
#[serde(rename_all = "camelCase")]
21
+
pub struct FirehoseCommit<'a> {
22
+
/// The repo this event comes from
23
+
#[serde(borrow)]
24
+
pub repo: Did<'a>,
25
+
26
+
/// The rev of the emitted commit
27
+
pub rev: Tid,
28
+
29
+
/// The stream sequence number of this message
30
+
pub seq: i64,
31
+
32
+
/// The rev of the last emitted commit from this repo (if any)
33
+
pub since: Tid,
34
+
35
+
/// Timestamp of when this message was originally broadcast
36
+
pub time: jacquard_common::types::string::Datetime,
37
+
38
+
/// Repo commit object CID
39
+
///
40
+
/// This CID points to the repository commit block (with did, version, data, rev, prev, sig).
41
+
/// It must be the first entry in the CAR header 'roots' list.
42
+
#[serde(borrow)]
43
+
pub commit: jacquard_common::types::cid::CidLink<'a>,
44
+
45
+
/// CAR file containing relevant blocks
46
+
///
47
+
/// Contains blocks as a diff since the previous repo state. The commit block
48
+
/// must be included, and its CID must be the first root in the CAR header.
49
+
///
50
+
/// For sync v1.1, may include additional MST node blocks needed for operation inversion.
51
+
#[serde(with = "super::serde_bytes_helper")]
52
+
pub blocks: Bytes,
53
+
54
+
/// Operations in this commit
55
+
#[serde(borrow)]
56
+
pub ops: Vec<RepoOp<'a>>,
57
+
58
+
/// Previous MST root CID (sync v1.1 only)
59
+
///
60
+
/// The root CID of the MST tree for the previous commit (indicated by the 'since' field).
61
+
/// Corresponds to the 'data' field in the previous repo commit object.
62
+
///
63
+
/// **Sync v1.1 inductive validation:**
64
+
/// - Enables validation without local MST state
65
+
/// - Operations can be inverted (creates→deletes, deletes→creates with prev values)
66
+
/// - Required for "inductive firehose" consumption
67
+
///
68
+
/// **Sync v1.0:**
69
+
/// - This field is None
70
+
/// - Consumers must have previous repository state
71
+
#[serde(skip_serializing_if = "Option::is_none")]
72
+
#[serde(borrow)]
73
+
pub prev_data: Option<jacquard_common::types::cid::CidLink<'a>>,
74
+
75
+
/// Blob CIDs referenced in this commit
76
+
#[serde(borrow)]
77
+
pub blobs: Vec<jacquard_common::types::cid::CidLink<'a>>,
78
+
79
+
/// DEPRECATED: Replaced by #sync event and data limits
80
+
///
81
+
/// Indicates that this commit contained too many ops, or data size was too large.
82
+
/// Consumers will need to make a separate request to get missing data.
83
+
pub too_big: bool,
84
+
85
+
/// DEPRECATED: Unused
86
+
pub rebase: bool,
87
+
}
88
+
89
+
/// A repository operation (mutation of a single record)
90
+
#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
91
+
#[serde(rename_all = "camelCase")]
92
+
pub struct RepoOp<'a> {
93
+
/// Operation type: "create", "update", or "delete"
94
+
#[serde(borrow)]
95
+
pub action: jacquard_common::CowStr<'a>,
96
+
97
+
/// Collection/rkey path (e.g., "app.bsky.feed.post/abc123")
98
+
#[serde(borrow)]
99
+
pub path: jacquard_common::CowStr<'a>,
100
+
101
+
/// For creates and updates, the new record CID. For deletions, None (null).
102
+
#[serde(skip_serializing_if = "Option::is_none")]
103
+
#[serde(borrow)]
104
+
pub cid: Option<jacquard_common::types::cid::CidLink<'a>>,
105
+
106
+
/// For updates and deletes, the previous record CID
107
+
///
108
+
/// Required for sync v1.1 inductive firehose validation.
109
+
/// For creates, this field should not be defined.
110
+
#[serde(skip_serializing_if = "Option::is_none")]
111
+
#[serde(borrow)]
112
+
pub prev: Option<jacquard_common::types::cid::CidLink<'a>>,
113
+
}
114
+
115
+
impl IntoStatic for FirehoseCommit<'_> {
116
+
type Output = FirehoseCommit<'static>;
117
+
118
+
fn into_static(self) -> Self::Output {
119
+
FirehoseCommit {
120
+
repo: self.repo.into_static(),
121
+
rev: self.rev,
122
+
seq: self.seq,
123
+
since: self.since,
124
+
time: self.time,
125
+
commit: self.commit.into_static(),
126
+
blocks: self.blocks,
127
+
ops: self.ops.into_iter().map(|op| op.into_static()).collect(),
128
+
prev_data: self.prev_data.map(|pd| pd.into_static()),
129
+
blobs: self.blobs.into_iter().map(|b| b.into_static()).collect(),
130
+
too_big: self.too_big,
131
+
rebase: self.rebase,
132
+
}
133
+
}
134
+
}
135
+
136
+
impl IntoStatic for RepoOp<'_> {
137
+
type Output = RepoOp<'static>;
138
+
139
+
fn into_static(self) -> Self::Output {
140
+
RepoOp {
141
+
action: self.action.into_static(),
142
+
path: self.path.into_static(),
143
+
cid: self.cid.into_static(),
144
+
prev: self.prev.map(|p| p.into_static()),
145
+
}
146
+
}
147
+
}
148
+
149
+
/// Validation functions for firehose commit messages
150
+
///
151
+
/// These functions validate commits from the `com.atproto.sync.subscribeRepos` firehose.
152
+
use crate::error::Result;
153
+
use crate::mst::Mst;
154
+
use crate::storage::BlockStore;
155
+
use cid::Cid as IpldCid;
156
+
use std::sync::Arc;
157
+
158
+
impl<'a> FirehoseCommit<'a> {
159
+
/// Validate a sync v1.0 commit
160
+
///
161
+
/// **Requirements:**
162
+
/// - Must have previous MST state (potentially full repository)
163
+
/// - All blocks needed for validation must be in `self.blocks`
164
+
///
165
+
/// **Validation steps:**
166
+
/// 1. Parse CAR blocks from `self.blocks` into temporary storage
167
+
/// 2. Load commit object and verify signature
168
+
/// 3. Apply operations to previous MST (using temporary storage for new blocks)
169
+
/// 4. Verify result matches commit.data (new MST root)
170
+
///
171
+
/// Returns the new MST root CID on success.
172
+
pub async fn validate_v1_0<S: BlockStore + Sync + 'static>(
173
+
&self,
174
+
prev_mst_root: Option<IpldCid>,
175
+
prev_storage: Arc<S>,
176
+
pubkey: &jacquard_common::types::crypto::PublicKey<'_>,
177
+
) -> Result<IpldCid> {
178
+
// 1. Parse CAR blocks from the firehose message into temporary storage
179
+
let parsed = crate::car::parse_car_bytes(&self.blocks).await?;
180
+
let temp_storage = crate::storage::MemoryBlockStore::new_from_blocks(parsed.blocks);
181
+
182
+
// 2. Create layered storage: reads from temp first, then prev; writes to temp only
183
+
// This avoids copying all previous MST blocks
184
+
let layered_storage =
185
+
crate::storage::LayeredBlockStore::new(temp_storage.clone(), prev_storage);
186
+
187
+
// 3. Extract and verify commit object from temporary storage
188
+
let commit_cid: IpldCid = self
189
+
.commit
190
+
.to_ipld()
191
+
.map_err(|e| crate::error::RepoError::invalid(format!("Invalid commit CID: {}", e)))?;
192
+
let commit_bytes = temp_storage
193
+
.get(&commit_cid)
194
+
.await?
195
+
.ok_or_else(|| crate::error::RepoError::not_found("commit block", &commit_cid))?;
196
+
197
+
let commit = super::Commit::from_cbor(&commit_bytes)?;
198
+
199
+
// Verify DID matches
200
+
if commit.did().as_ref() != self.repo.as_ref() {
201
+
return Err(crate::error::RepoError::invalid_commit(format!(
202
+
"DID mismatch: commit has {}, message has {}",
203
+
commit.did(),
204
+
self.repo
205
+
)));
206
+
}
207
+
208
+
// Verify signature
209
+
commit.verify(pubkey)?;
210
+
211
+
let layered_arc = Arc::new(layered_storage);
212
+
213
+
// 4. Load previous MST state from layered storage (or start empty)
214
+
let prev_mst = if let Some(prev_root) = prev_mst_root {
215
+
Mst::load(layered_arc.clone(), prev_root, None)
216
+
} else {
217
+
Mst::new(layered_arc.clone())
218
+
};
219
+
220
+
// 5. Load new MST from commit.data (claimed result)
221
+
let expected_root = *commit.data();
222
+
let new_mst = Mst::load(layered_arc, expected_root, None);
223
+
224
+
// 6. Compute diff to get verified write ops (with actual prev values from tree state)
225
+
let diff = prev_mst.diff(&new_mst).await?;
226
+
let verified_ops = diff.to_verified_ops();
227
+
228
+
// 7. Apply verified ops to prev MST
229
+
let computed_mst = prev_mst.batch(&verified_ops).await?;
230
+
231
+
// 8. Verify computed result matches claimed result
232
+
let computed_root = computed_mst.get_pointer().await?;
233
+
234
+
if computed_root != expected_root {
235
+
return Err(crate::error::RepoError::invalid_commit(format!(
236
+
"MST root mismatch: expected {}, got {}",
237
+
expected_root, computed_root
238
+
)));
239
+
}
240
+
241
+
Ok(expected_root)
242
+
}
243
+
244
+
/// Validate a sync v1.1 commit (inductive validation)
245
+
///
246
+
/// **Requirements:**
247
+
/// - `self.prev_data` must be Some (contains previous MST root)
248
+
/// - All blocks needed for validation must be in `self.blocks`
249
+
///
250
+
/// **Validation steps:**
251
+
/// 1. Parse CAR blocks from `self.blocks` into temporary storage
252
+
/// 2. Load commit object and verify signature
253
+
/// 3. Start from `prev_data` MST root (loaded from temp storage)
254
+
/// 4. Apply operations (with prev CID validation for updates/deletes)
255
+
/// 5. Verify result matches commit.data (new MST root)
256
+
///
257
+
/// Returns the new MST root CID on success.
258
+
///
259
+
/// **Inductive property:** Can validate without any external state besides the blocks
260
+
/// in this message. The `prev_data` field provides the starting MST root, and operations
261
+
/// include `prev` CIDs for validation. All necessary blocks must be in the CAR bytes.
262
+
pub async fn validate_v1_1(
263
+
&self,
264
+
pubkey: &jacquard_common::types::crypto::PublicKey<'_>,
265
+
) -> Result<IpldCid> {
266
+
// 1. Require prev_data for v1.1
267
+
let prev_data_cid: IpldCid = self
268
+
.prev_data
269
+
.as_ref()
270
+
.ok_or_else(|| {
271
+
crate::error::RepoError::invalid_commit(
272
+
"Sync v1.1 validation requires prev_data field",
273
+
)
274
+
})?
275
+
.to_ipld()
276
+
.map_err(|e| {
277
+
crate::error::RepoError::invalid(format!("Invalid prev_data CID: {}", e))
278
+
})?;
279
+
280
+
// 2. Parse CAR blocks from the firehose message into temporary storage
281
+
let parsed = crate::car::parse_car_bytes(&self.blocks).await?;
282
+
let temp_storage = Arc::new(crate::storage::MemoryBlockStore::new_from_blocks(
283
+
parsed.blocks,
284
+
));
285
+
286
+
// 3. Extract and verify commit object from temporary storage
287
+
let commit_cid: IpldCid = self
288
+
.commit
289
+
.to_ipld()
290
+
.map_err(|e| crate::error::RepoError::invalid(format!("Invalid commit CID: {}", e)))?;
291
+
let commit_bytes = temp_storage
292
+
.get(&commit_cid)
293
+
.await?
294
+
.ok_or_else(|| crate::error::RepoError::not_found("commit block", &commit_cid))?;
295
+
296
+
let commit = super::Commit::from_cbor(&commit_bytes)?;
297
+
298
+
// Verify DID matches
299
+
if commit.did().as_ref() != self.repo.as_ref() {
300
+
return Err(crate::error::RepoError::invalid_commit(format!(
301
+
"DID mismatch: commit has {}, message has {}",
302
+
commit.did(),
303
+
self.repo
304
+
)));
305
+
}
306
+
307
+
// Verify signature
308
+
commit.verify(pubkey)?;
309
+
310
+
// 4. Load previous MST from prev_data (all blocks should be in temp_storage)
311
+
let prev_mst = Mst::load(temp_storage.clone(), prev_data_cid, None);
312
+
313
+
// 5. Load new MST from commit.data (claimed result)
314
+
let expected_root = *commit.data();
315
+
let new_mst = Mst::load(temp_storage, expected_root, None);
316
+
317
+
// 6. Compute diff to get verified write ops (with actual prev values from tree state)
318
+
let diff = prev_mst.diff(&new_mst).await?;
319
+
let verified_ops = diff.to_verified_ops();
320
+
321
+
// 7. Apply verified ops to prev MST
322
+
let computed_mst = prev_mst.batch(&verified_ops).await?;
323
+
324
+
// 8. Verify computed result matches claimed result
325
+
let computed_root = computed_mst.get_pointer().await?;
326
+
327
+
if computed_root != expected_root {
328
+
return Err(crate::error::RepoError::invalid_commit(format!(
329
+
"MST root mismatch: expected {}, got {}",
330
+
expected_root, computed_root
331
+
)));
332
+
}
333
+
334
+
Ok(expected_root)
335
+
}
336
+
}
+239
crates/jacquard-repo/src/commit/mod.rs
+239
crates/jacquard-repo/src/commit/mod.rs
···
1
+
//! Commit structures and signature verification for AT Protocol repositories.
2
+
//!
3
+
//! This module provides repository commit object handling with signature support.
4
+
5
+
pub mod firehose;
6
+
pub mod proof;
7
+
pub(crate) mod serde_bytes_helper;
8
+
use crate::error::{CommitError, Result};
9
+
use bytes::Bytes;
10
+
use cid::Cid as IpldCid;
11
+
use jacquard_common::IntoStatic;
12
+
use jacquard_common::types::crypto::PublicKey;
13
+
use jacquard_common::types::string::Did;
14
+
use jacquard_common::types::tid::Tid;
15
+
/// Repository commit object
16
+
///
17
+
/// This structure represents a signed commit in an AT Protocol repository.
18
+
/// Stored as a block in CAR files, identified by its CID.
19
+
///
20
+
/// **Version compatibility**: v2 and v3 commits differ only in how `prev` is
21
+
/// serialized (v2 uses it, v3 must include it even if null). This struct
22
+
/// handles both by always including `prev` in serialization.
23
+
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
24
+
pub struct Commit<'a> {
25
+
/// Repository DID
26
+
#[serde(borrow)]
27
+
pub did: Did<'a>,
28
+
29
+
/// Commit version (2 or 3)
30
+
pub version: i64,
31
+
32
+
/// MST root CID
33
+
pub data: IpldCid,
34
+
35
+
/// Revision TID
36
+
pub rev: Tid,
37
+
38
+
/// Previous commit CID (None for initial commit)
39
+
pub prev: Option<IpldCid>,
40
+
41
+
/// Signature bytes
42
+
#[serde(with = "serde_bytes_helper")]
43
+
pub sig: Bytes,
44
+
}
45
+
46
+
impl<'a> Commit<'a> {
47
+
/// Create new unsigned commit (version = 3, sig empty)
48
+
pub fn new_unsigned(did: Did<'a>, data: IpldCid, rev: Tid, prev: Option<IpldCid>) -> Self {
49
+
Self {
50
+
did,
51
+
version: 3,
52
+
data,
53
+
rev,
54
+
prev,
55
+
sig: Bytes::new(),
56
+
}
57
+
}
58
+
59
+
/// Sign this commit with a key
60
+
pub fn sign(mut self, key: &impl SigningKey) -> Result<Self> {
61
+
let unsigned = self.unsigned_bytes()?;
62
+
self.sig = key.sign_bytes(&unsigned)?;
63
+
Ok(self)
64
+
}
65
+
66
+
/// Get the repository DID
67
+
pub fn did(&self) -> &Did<'a> {
68
+
&self.did
69
+
}
70
+
71
+
/// Get the MST root CID
72
+
pub fn data(&self) -> &IpldCid {
73
+
&self.data
74
+
}
75
+
76
+
/// Get the revision TID
77
+
pub fn rev(&self) -> &Tid {
78
+
&self.rev
79
+
}
80
+
81
+
/// Get the previous commit CID
82
+
pub fn prev(&self) -> Option<&IpldCid> {
83
+
self.prev.as_ref()
84
+
}
85
+
86
+
/// Get the signature bytes
87
+
pub fn sig(&self) -> &Bytes {
88
+
&self.sig
89
+
}
90
+
91
+
/// Get unsigned commit bytes (for signing/verification)
92
+
pub(super) fn unsigned_bytes(&self) -> Result<Vec<u8>> {
93
+
// Serialize without signature field
94
+
let mut unsigned = self.clone();
95
+
unsigned.sig = Bytes::new();
96
+
serde_ipld_dagcbor::to_vec(&unsigned)
97
+
.map_err(|e| crate::error::CommitError::Serialization(Box::new(e)).into())
98
+
}
99
+
100
+
/// Serialize to DAG-CBOR
101
+
pub fn to_cbor(&self) -> Result<Vec<u8>> {
102
+
serde_ipld_dagcbor::to_vec(self).map_err(|e| CommitError::Serialization(Box::new(e)).into())
103
+
}
104
+
105
+
/// Deserialize from DAG-CBOR
106
+
pub fn from_cbor(data: &'a [u8]) -> Result<Self> {
107
+
serde_ipld_dagcbor::from_slice(data)
108
+
.map_err(|e| CommitError::Serialization(Box::new(e)).into())
109
+
}
110
+
111
+
/// Compute CID of this commit
112
+
pub fn to_cid(&self) -> Result<IpldCid> {
113
+
let cbor = self.to_cbor()?;
114
+
crate::mst::util::compute_cid(&cbor)
115
+
}
116
+
117
+
/// Verify signature against a public key from a DID document.
118
+
///
119
+
/// The key type is inferred from the PublicKey codec.
120
+
pub fn verify(&self, pubkey: &PublicKey) -> std::result::Result<(), CommitError> {
121
+
let unsigned = self
122
+
.unsigned_bytes()
123
+
.map_err(|e| CommitError::Serialization(e.into()))?;
124
+
let signature = self.sig();
125
+
126
+
use jacquard_common::types::crypto::KeyCodec;
127
+
match pubkey.codec {
128
+
KeyCodec::Ed25519 => {
129
+
let vk = pubkey
130
+
.to_ed25519()
131
+
.map_err(|e| CommitError::InvalidKey(e.to_string()))?;
132
+
let sig = ed25519_dalek::Signature::from_slice(signature.as_ref())
133
+
.map_err(|e| CommitError::InvalidSignature(e.to_string()))?;
134
+
vk.verify_strict(&unsigned, &sig)
135
+
.map_err(|_| CommitError::SignatureVerificationFailed)?;
136
+
}
137
+
KeyCodec::Secp256k1 => {
138
+
use k256::ecdsa::{Signature, VerifyingKey, signature::Verifier};
139
+
let vk = pubkey
140
+
.to_k256()
141
+
.map_err(|e| CommitError::InvalidKey(e.to_string()))?;
142
+
let verifying_key = VerifyingKey::from(&vk);
143
+
let sig = Signature::from_slice(signature.as_ref())
144
+
.map_err(|e| CommitError::InvalidSignature(e.to_string()))?;
145
+
verifying_key
146
+
.verify(&unsigned, &sig)
147
+
.map_err(|_| CommitError::SignatureVerificationFailed)?;
148
+
}
149
+
KeyCodec::P256 => {
150
+
use p256::ecdsa::{Signature, VerifyingKey, signature::Verifier};
151
+
let vk = pubkey
152
+
.to_p256()
153
+
.map_err(|e| CommitError::InvalidKey(e.to_string()))?;
154
+
let verifying_key = VerifyingKey::from(&vk);
155
+
let sig = Signature::from_slice(signature.as_ref())
156
+
.map_err(|e| CommitError::InvalidSignature(e.to_string()))?;
157
+
verifying_key
158
+
.verify(&unsigned, &sig)
159
+
.map_err(|_| CommitError::SignatureVerificationFailed)?;
160
+
}
161
+
KeyCodec::Unknown(code) => {
162
+
return Err(CommitError::UnsupportedKeyType(code));
163
+
}
164
+
}
165
+
166
+
Ok(())
167
+
}
168
+
}
169
+
170
+
impl IntoStatic for Commit<'_> {
171
+
type Output = Commit<'static>;
172
+
173
+
fn into_static(self) -> Self::Output {
174
+
Commit {
175
+
did: self.did.into_static(),
176
+
version: self.version,
177
+
data: self.data,
178
+
rev: self.rev,
179
+
prev: self.prev,
180
+
sig: self.sig,
181
+
}
182
+
}
183
+
}
184
+
185
+
/// Trait for signing keys.
186
+
///
187
+
/// Implemented for ed25519_dalek::SigningKey, k256::ecdsa::SigningKey, and p256::ecdsa::SigningKey.
188
+
pub trait SigningKey {
189
+
/// Sign the given data and return signature as Bytes
190
+
fn sign_bytes(&self, data: &[u8]) -> Result<Bytes>;
191
+
192
+
/// Get the public key bytes
193
+
fn public_key(&self) -> Vec<u8>;
194
+
}
195
+
196
+
// Ed25519 implementation
197
+
impl SigningKey for ed25519_dalek::SigningKey {
198
+
fn sign_bytes(&self, data: &[u8]) -> Result<Bytes> {
199
+
use ed25519_dalek::Signer;
200
+
let sig = Signer::sign(self, data);
201
+
Ok(Bytes::copy_from_slice(&sig.to_bytes()))
202
+
}
203
+
204
+
fn public_key(&self) -> Vec<u8> {
205
+
self.verifying_key().to_bytes().to_vec()
206
+
}
207
+
}
208
+
209
+
// K-256 (secp256k1) implementation
210
+
impl SigningKey for k256::ecdsa::SigningKey {
211
+
fn sign_bytes(&self, data: &[u8]) -> Result<Bytes> {
212
+
use k256::ecdsa::signature::Signer;
213
+
let sig: k256::ecdsa::Signature = Signer::sign(self, data);
214
+
Ok(Bytes::copy_from_slice(&sig.to_bytes()))
215
+
}
216
+
217
+
fn public_key(&self) -> Vec<u8> {
218
+
self.verifying_key()
219
+
.to_encoded_point(true)
220
+
.as_bytes()
221
+
.to_vec()
222
+
}
223
+
}
224
+
225
+
// P-256 implementation
226
+
impl SigningKey for p256::ecdsa::SigningKey {
227
+
fn sign_bytes(&self, data: &[u8]) -> Result<Bytes> {
228
+
use p256::ecdsa::signature::Signer;
229
+
let sig: p256::ecdsa::Signature = Signer::sign(self, data);
230
+
Ok(Bytes::copy_from_slice(&sig.to_bytes()))
231
+
}
232
+
233
+
fn public_key(&self) -> Vec<u8> {
234
+
self.verifying_key()
235
+
.to_encoded_point(true)
236
+
.as_bytes()
237
+
.to_vec()
238
+
}
239
+
}
+774
crates/jacquard-repo/src/commit/proof.rs
+774
crates/jacquard-repo/src/commit/proof.rs
···
1
+
//! Record proof verification
2
+
//!
3
+
//! Verifies merkle proofs for individual record existence/non-existence.
4
+
//!
5
+
//! **Proof structure:**
6
+
//! - CAR file containing:
7
+
//! - Commit block (with signature)
8
+
//! - MST node blocks along the path to the record(s)
9
+
//! - Record blocks (if proving existence)
10
+
//!
11
+
//! **Verification:**
12
+
//! 1. Parse CAR blocks into temporary storage
13
+
//! 2. Load and verify commit (signature + DID)
14
+
//! 3. Load MST using ONLY blocks from CAR
15
+
//! 4. For each claim, check if record exists/matches in MST
16
+
//!
17
+
//! This is distinct from firehose commit validation - proofs verify individual
18
+
//! records, not full repository commits.
19
+
20
+
use crate::BlockStore;
21
+
use crate::error::ProofError;
22
+
use crate::mst::Mst;
23
+
use crate::storage::MemoryBlockStore;
24
+
use cid::Cid as IpldCid;
25
+
use jacquard_common::types::string::Did;
26
+
use smol_str::format_smolstr;
27
+
use std::sync::Arc;
28
+
29
+
/// A claim about a record's CID at a specific path
30
+
#[derive(Debug, Clone, PartialEq, Eq)]
31
+
pub struct RecordClaim<'a> {
32
+
/// Collection NSID (e.g., "app.bsky.feed.post")
33
+
pub collection: jacquard_common::CowStr<'a>,
34
+
35
+
/// Record key (TID or other identifier)
36
+
pub rkey: jacquard_common::CowStr<'a>,
37
+
38
+
/// Expected CID of the record
39
+
/// - Some(cid): claiming record exists with this CID
40
+
/// - None: claiming record does not exist
41
+
pub cid: Option<IpldCid>,
42
+
}
43
+
44
+
/// Result of proof verification
45
+
#[derive(Debug)]
46
+
pub struct VerifyProofsOutput<'a> {
47
+
/// Claims that were successfully verified
48
+
pub verified: Vec<RecordClaim<'a>>,
49
+
50
+
/// Claims that failed verification
51
+
pub unverified: Vec<RecordClaim<'a>>,
52
+
}
53
+
54
+
/// Verify record proofs from a CAR file
55
+
///
56
+
/// **Inputs:**
57
+
/// - `car_bytes`: CAR file containing commit + MST blocks + record blocks
58
+
/// - `claims`: Records to verify (existence or non-existence)
59
+
/// - `did`: Expected DID of the repository
60
+
/// - `pubkey`: Public key for signature verification
61
+
///
62
+
/// **Returns:**
63
+
/// - `verified`: Claims that match the MST state
64
+
/// - `unverified`: Claims that don't match
65
+
///
66
+
/// **Security:**
67
+
/// - Verifies commit signature using provided pubkey
68
+
/// - Verifies DID matches
69
+
/// - Uses ONLY blocks from CAR (merkle proof property)
70
+
///
71
+
/// # Example
72
+
///
73
+
/// ```rust,ignore
74
+
/// let claims = vec![
75
+
/// RecordClaim {
76
+
/// collection: "app.bsky.feed.post".into(),
77
+
/// rkey: "3l4qpz7ajrc2a".into(),
78
+
/// cid: Some(record_cid), // Claiming this record exists
79
+
/// },
80
+
/// RecordClaim {
81
+
/// collection: "app.bsky.feed.post".into(),
82
+
/// rkey: "nonexistent".into(),
83
+
/// cid: None, // Claiming this record doesn't exist
84
+
/// },
85
+
/// ];
86
+
///
87
+
/// let result = verify_proofs(car_bytes, claims, did, pubkey).await?;
88
+
/// assert_eq!(result.verified.len(), 2); // Both claims verified
89
+
/// ```
90
+
pub async fn verify_proofs<'a>(
91
+
car_bytes: &[u8],
92
+
claims: Vec<RecordClaim<'a>>,
93
+
did: &Did<'_>,
94
+
pubkey: &jacquard_common::types::crypto::PublicKey<'_>,
95
+
) -> Result<VerifyProofsOutput<'a>, ProofError> {
96
+
// 1. Parse CAR file
97
+
let parsed =
98
+
crate::car::parse_car_bytes(car_bytes)
99
+
.await
100
+
.map_err(|e| ProofError::CarParseFailed {
101
+
source: Box::new(e),
102
+
})?;
103
+
104
+
// 2. Create storage with ONLY blocks from CAR (merkle proof property)
105
+
let storage = Arc::new(MemoryBlockStore::new_from_blocks(parsed.blocks));
106
+
107
+
// 3. Load commit from CAR root
108
+
let commit_cid = parsed.root;
109
+
let commit_bytes = storage
110
+
.get(&commit_cid)
111
+
.await
112
+
.map_err(|_| ProofError::CommitNotFound)?
113
+
.ok_or(ProofError::CommitNotFound)?;
114
+
115
+
let commit = super::Commit::from_cbor(&commit_bytes).map_err(|e| {
116
+
ProofError::CommitDeserializeFailed {
117
+
source: Box::new(e),
118
+
}
119
+
})?;
120
+
121
+
// 4. Verify DID matches
122
+
if commit.did().as_ref() != did.as_ref() {
123
+
return Err(ProofError::DidMismatch {
124
+
commit_did: commit.did().to_string(),
125
+
expected_did: did.to_string(),
126
+
}
127
+
.into());
128
+
}
129
+
130
+
// 5. Verify signature
131
+
// We need to extract the CommitError before it gets converted to RepoError
132
+
if let Err(e) = commit.verify(pubkey) {
133
+
return Err(ProofError::SignatureVerificationFailed { source: e }.into());
134
+
}
135
+
136
+
// 6. Load MST using ONLY blocks from CAR
137
+
let mst = Mst::load(storage.clone(), *commit.data(), None);
138
+
139
+
// 7. Verify each claim
140
+
let mut verified = Vec::new();
141
+
let mut unverified = Vec::new();
142
+
143
+
for claim in claims {
144
+
let key = format_smolstr!("{}/{}", claim.collection, claim.rkey);
145
+
let found_cid = mst.get(&key).await.ok().flatten();
146
+
147
+
match (&claim.cid, found_cid) {
148
+
// Claiming record doesn't exist
149
+
(None, None) => {
150
+
// Correct: record doesn't exist
151
+
verified.push(claim);
152
+
}
153
+
(None, Some(_)) => {
154
+
// Incorrect: claimed doesn't exist but it does
155
+
unverified.push(claim);
156
+
}
157
+
// Claiming record exists with specific CID
158
+
(Some(claimed_cid), Some(found)) if claimed_cid == &found => {
159
+
// Correct: CID matches
160
+
verified.push(claim);
161
+
}
162
+
(Some(_), _) => {
163
+
// Incorrect: CID mismatch or doesn't exist
164
+
unverified.push(claim);
165
+
}
166
+
}
167
+
}
168
+
169
+
Ok(VerifyProofsOutput {
170
+
verified,
171
+
unverified,
172
+
})
173
+
}
174
+
175
+
#[cfg(test)]
176
+
mod tests {
177
+
use super::*;
178
+
use crate::commit::Commit;
179
+
use crate::mst::Mst;
180
+
use crate::storage::MemoryBlockStore;
181
+
use jacquard_common::types::crypto::PublicKey;
182
+
use jacquard_common::types::string::Did;
183
+
184
+
fn test_signing_key() -> k256::ecdsa::SigningKey {
185
+
use k256::ecdsa::SigningKey;
186
+
use rand::rngs::OsRng;
187
+
SigningKey::random(&mut OsRng)
188
+
}
189
+
190
+
fn test_pubkey(sk: &k256::ecdsa::SigningKey) -> PublicKey<'static> {
191
+
use jacquard_common::types::crypto::KeyCodec;
192
+
use std::borrow::Cow;
193
+
let vk = sk.verifying_key();
194
+
PublicKey {
195
+
codec: KeyCodec::Secp256k1,
196
+
bytes: Cow::Owned(vk.to_encoded_point(true).as_bytes().to_vec()),
197
+
}
198
+
}
199
+
200
+
fn test_cid(n: u8) -> IpldCid {
201
+
let data = vec![n; 32];
202
+
let mh =
203
+
multihash::Multihash::wrap(jacquard_common::types::crypto::SHA2_256, &data).unwrap();
204
+
IpldCid::new_v1(crate::DAG_CBOR_CID_CODEC, mh)
205
+
}
206
+
207
+
#[tokio::test]
208
+
async fn test_verify_proofs_record_exists() {
209
+
// Create MST with records
210
+
let storage = Arc::new(MemoryBlockStore::new());
211
+
let mst = Mst::new(storage.clone());
212
+
213
+
let key1 = "app.bsky.feed.post/abc123";
214
+
let cid1 = test_cid(1);
215
+
216
+
let mst = mst.add(key1, cid1).await.unwrap();
217
+
let mst_root = mst.persist().await.unwrap();
218
+
219
+
// Create and sign commit
220
+
let sk = test_signing_key();
221
+
let pubkey = test_pubkey(&sk);
222
+
let did = Did::new("did:plc:test").unwrap();
223
+
224
+
let commit = Commit::new_unsigned(
225
+
did.clone(),
226
+
mst_root,
227
+
jacquard_common::types::tid::Ticker::new().next(None),
228
+
None,
229
+
)
230
+
.sign(&sk)
231
+
.unwrap();
232
+
233
+
let commit_cid = commit.to_cid().unwrap();
234
+
let commit_bytes = commit.to_cbor().unwrap();
235
+
storage.put(&commit_bytes).await.unwrap();
236
+
237
+
// Generate CAR proof with: commit + MST path blocks
238
+
let cids_for_proof = mst.cids_for_path(key1).await.unwrap();
239
+
let mut car_blocks = std::collections::BTreeMap::new();
240
+
241
+
// Add commit block
242
+
car_blocks.insert(commit_cid, bytes::Bytes::from(commit_bytes));
243
+
244
+
// Add MST blocks
245
+
for cid in &cids_for_proof[..cids_for_proof.len() - 1] {
246
+
// All except record CID
247
+
if let Some(block) = storage.get(cid).await.unwrap() {
248
+
car_blocks.insert(*cid, block);
249
+
}
250
+
}
251
+
252
+
// Add record block
253
+
car_blocks.insert(cid1, bytes::Bytes::from(vec![0x42])); // dummy record data
254
+
255
+
// Write CAR
256
+
let car_bytes = crate::car::write_car_bytes(commit_cid, car_blocks)
257
+
.await
258
+
.unwrap();
259
+
260
+
// Verify proof
261
+
let claims = vec![RecordClaim {
262
+
collection: "app.bsky.feed.post".into(),
263
+
rkey: "abc123".into(),
264
+
cid: Some(cid1),
265
+
}];
266
+
267
+
let result = verify_proofs(&car_bytes, claims, &did, &pubkey)
268
+
.await
269
+
.unwrap();
270
+
271
+
assert_eq!(result.verified.len(), 1);
272
+
assert_eq!(result.unverified.len(), 0);
273
+
}
274
+
275
+
#[tokio::test]
276
+
async fn test_verify_proofs_record_not_exists() {
277
+
// Create MST with one record
278
+
let storage = Arc::new(MemoryBlockStore::new());
279
+
let mst = Mst::new(storage.clone());
280
+
281
+
let key1 = "app.bsky.feed.post/abc123";
282
+
let cid1 = test_cid(1);
283
+
284
+
let mst = mst.add(key1, cid1).await.unwrap();
285
+
let mst_root = mst.persist().await.unwrap();
286
+
287
+
// Create and sign commit
288
+
let sk = test_signing_key();
289
+
let pubkey = test_pubkey(&sk);
290
+
let did = Did::new("did:plc:test").unwrap();
291
+
292
+
let commit = Commit::new_unsigned(
293
+
did.clone(),
294
+
mst_root,
295
+
jacquard_common::types::tid::Ticker::new().next(None),
296
+
None,
297
+
)
298
+
.sign(&sk)
299
+
.unwrap();
300
+
301
+
let commit_cid = commit.to_cid().unwrap();
302
+
let commit_bytes = commit.to_cbor().unwrap();
303
+
storage.put(&commit_bytes).await.unwrap();
304
+
305
+
// Generate proof for non-existent record
306
+
let nonexistent_key = "app.bsky.feed.post/xyz789";
307
+
let cids_for_proof = mst.cids_for_path(nonexistent_key).await.unwrap();
308
+
let mut car_blocks = std::collections::BTreeMap::new();
309
+
310
+
// Add commit block
311
+
car_blocks.insert(commit_cid, bytes::Bytes::from(commit_bytes));
312
+
313
+
// Add MST blocks (proof of absence)
314
+
for cid in &cids_for_proof {
315
+
if let Some(block) = storage.get(cid).await.unwrap() {
316
+
car_blocks.insert(*cid, block);
317
+
}
318
+
}
319
+
320
+
// Write CAR
321
+
let car_bytes = crate::car::write_car_bytes(commit_cid, car_blocks)
322
+
.await
323
+
.unwrap();
324
+
325
+
// Verify proof of non-existence
326
+
let claims = vec![RecordClaim {
327
+
collection: "app.bsky.feed.post".into(),
328
+
rkey: "xyz789".into(),
329
+
cid: None, // Claiming it doesn't exist
330
+
}];
331
+
332
+
let result = verify_proofs(&car_bytes, claims, &did, &pubkey)
333
+
.await
334
+
.unwrap();
335
+
336
+
assert_eq!(result.verified.len(), 1);
337
+
assert_eq!(result.unverified.len(), 0);
338
+
}
339
+
340
+
#[tokio::test]
341
+
async fn test_verify_proofs_multiple_claims_mixed() {
342
+
// Test verifying multiple claims - some valid, some invalid
343
+
let storage = Arc::new(MemoryBlockStore::new());
344
+
let mst = Mst::new(storage.clone());
345
+
346
+
let key1 = "app.bsky.feed.post/abc123";
347
+
let key2 = "app.bsky.feed.post/def456";
348
+
let cid1 = test_cid(1);
349
+
let cid2 = test_cid(2);
350
+
351
+
let mst = mst.add(key1, cid1).await.unwrap();
352
+
let mst = mst.add(key2, cid2).await.unwrap();
353
+
let mst_root = mst.persist().await.unwrap();
354
+
355
+
let sk = test_signing_key();
356
+
let pubkey = test_pubkey(&sk);
357
+
let did = Did::new("did:plc:test").unwrap();
358
+
359
+
let commit = Commit::new_unsigned(
360
+
did.clone(),
361
+
mst_root,
362
+
jacquard_common::types::tid::Ticker::new().next(None),
363
+
None,
364
+
)
365
+
.sign(&sk)
366
+
.unwrap();
367
+
368
+
let commit_cid = commit.to_cid().unwrap();
369
+
let commit_bytes = commit.to_cbor().unwrap();
370
+
storage.put(&commit_bytes).await.unwrap();
371
+
372
+
// Generate CAR with both records
373
+
let cids_for_proof1 = mst.cids_for_path(key1).await.unwrap();
374
+
let cids_for_proof2 = mst.cids_for_path(key2).await.unwrap();
375
+
let mut car_blocks = std::collections::BTreeMap::new();
376
+
377
+
car_blocks.insert(commit_cid, bytes::Bytes::from(commit_bytes));
378
+
379
+
// Add all MST blocks from both paths
380
+
for cid in cids_for_proof1
381
+
.iter()
382
+
.chain(cids_for_proof2.iter())
383
+
.take(cids_for_proof1.len() + cids_for_proof2.len() - 2)
384
+
{
385
+
if let Some(block) = storage.get(cid).await.unwrap() {
386
+
car_blocks.insert(*cid, block);
387
+
}
388
+
}
389
+
390
+
// Add record blocks
391
+
car_blocks.insert(cid1, bytes::Bytes::from(vec![0x41]));
392
+
car_blocks.insert(cid2, bytes::Bytes::from(vec![0x42]));
393
+
394
+
let car_bytes = crate::car::write_car_bytes(commit_cid, car_blocks)
395
+
.await
396
+
.unwrap();
397
+
398
+
// Mixed claims: valid, invalid CID, non-existent
399
+
let claims = vec![
400
+
RecordClaim {
401
+
collection: "app.bsky.feed.post".into(),
402
+
rkey: "abc123".into(),
403
+
cid: Some(cid1), // Valid
404
+
},
405
+
RecordClaim {
406
+
collection: "app.bsky.feed.post".into(),
407
+
rkey: "def456".into(),
408
+
cid: Some(test_cid(99)), // Wrong CID
409
+
},
410
+
RecordClaim {
411
+
collection: "app.bsky.feed.post".into(),
412
+
rkey: "xyz789".into(),
413
+
cid: None, // Correctly doesn't exist
414
+
},
415
+
];
416
+
417
+
let result = verify_proofs(&car_bytes, claims, &did, &pubkey)
418
+
.await
419
+
.unwrap();
420
+
421
+
assert_eq!(result.verified.len(), 2); // First and third should verify
422
+
assert_eq!(result.unverified.len(), 1); // Second should fail
423
+
}
424
+
425
+
#[tokio::test]
426
+
async fn test_verify_proofs_wrong_did() {
427
+
// Test that verification fails when DID doesn't match
428
+
let storage = Arc::new(MemoryBlockStore::new());
429
+
let mst = Mst::new(storage.clone());
430
+
431
+
let key1 = "app.bsky.feed.post/abc123";
432
+
let cid1 = test_cid(1);
433
+
434
+
let mst = mst.add(key1, cid1).await.unwrap();
435
+
let mst_root = mst.persist().await.unwrap();
436
+
437
+
let sk = test_signing_key();
438
+
let pubkey = test_pubkey(&sk);
439
+
let did = Did::new("did:plc:test").unwrap();
440
+
441
+
let commit = Commit::new_unsigned(
442
+
did.clone(),
443
+
mst_root,
444
+
jacquard_common::types::tid::Ticker::new().next(None),
445
+
None,
446
+
)
447
+
.sign(&sk)
448
+
.unwrap();
449
+
450
+
let commit_cid = commit.to_cid().unwrap();
451
+
let commit_bytes = commit.to_cbor().unwrap();
452
+
storage.put(&commit_bytes).await.unwrap();
453
+
454
+
let cids_for_proof = mst.cids_for_path(key1).await.unwrap();
455
+
let mut car_blocks = std::collections::BTreeMap::new();
456
+
car_blocks.insert(commit_cid, bytes::Bytes::from(commit_bytes));
457
+
458
+
for cid in &cids_for_proof[..cids_for_proof.len() - 1] {
459
+
if let Some(block) = storage.get(cid).await.unwrap() {
460
+
car_blocks.insert(*cid, block);
461
+
}
462
+
}
463
+
464
+
car_blocks.insert(cid1, bytes::Bytes::from(vec![0x42]));
465
+
466
+
let car_bytes = crate::car::write_car_bytes(commit_cid, car_blocks)
467
+
.await
468
+
.unwrap();
469
+
470
+
let claims = vec![RecordClaim {
471
+
collection: "app.bsky.feed.post".into(),
472
+
rkey: "abc123".into(),
473
+
cid: Some(cid1),
474
+
}];
475
+
476
+
// Try to verify with WRONG DID
477
+
let wrong_did = Did::new("did:plc:wrong").unwrap();
478
+
let result = verify_proofs(&car_bytes, claims, &wrong_did, &pubkey).await;
479
+
480
+
assert!(result.is_err());
481
+
assert!(result.unwrap_err().to_string().contains("DID mismatch"));
482
+
}
483
+
484
+
#[tokio::test]
485
+
async fn test_verify_proofs_bad_signature() {
486
+
// Test that verification fails with wrong public key
487
+
let storage = Arc::new(MemoryBlockStore::new());
488
+
let mst = Mst::new(storage.clone());
489
+
490
+
let key1 = "app.bsky.feed.post/abc123";
491
+
let cid1 = test_cid(1);
492
+
493
+
let mst = mst.add(key1, cid1).await.unwrap();
494
+
let mst_root = mst.persist().await.unwrap();
495
+
496
+
let sk = test_signing_key();
497
+
let did = Did::new("did:plc:test").unwrap();
498
+
499
+
let commit = Commit::new_unsigned(
500
+
did.clone(),
501
+
mst_root,
502
+
jacquard_common::types::tid::Ticker::new().next(None),
503
+
None,
504
+
)
505
+
.sign(&sk)
506
+
.unwrap();
507
+
508
+
let commit_cid = commit.to_cid().unwrap();
509
+
let commit_bytes = commit.to_cbor().unwrap();
510
+
storage.put(&commit_bytes).await.unwrap();
511
+
512
+
let cids_for_proof = mst.cids_for_path(key1).await.unwrap();
513
+
let mut car_blocks = std::collections::BTreeMap::new();
514
+
car_blocks.insert(commit_cid, bytes::Bytes::from(commit_bytes));
515
+
516
+
for cid in &cids_for_proof[..cids_for_proof.len() - 1] {
517
+
if let Some(block) = storage.get(cid).await.unwrap() {
518
+
car_blocks.insert(*cid, block);
519
+
}
520
+
}
521
+
522
+
car_blocks.insert(cid1, bytes::Bytes::from(vec![0x42]));
523
+
524
+
let car_bytes = crate::car::write_car_bytes(commit_cid, car_blocks)
525
+
.await
526
+
.unwrap();
527
+
528
+
let claims = vec![RecordClaim {
529
+
collection: "app.bsky.feed.post".into(),
530
+
rkey: "abc123".into(),
531
+
cid: Some(cid1),
532
+
}];
533
+
534
+
// Use WRONG public key
535
+
let wrong_sk = test_signing_key();
536
+
let wrong_pubkey = test_pubkey(&wrong_sk);
537
+
538
+
let result = verify_proofs(&car_bytes, claims, &did, &wrong_pubkey).await;
539
+
540
+
// Should fail signature verification
541
+
assert!(matches!(
542
+
result,
543
+
Err(ProofError::SignatureVerificationFailed { source: _ })
544
+
));
545
+
}
546
+
547
+
#[tokio::test]
548
+
async fn test_verify_proofs_missing_blocks() {
549
+
// Test that verification fails when CAR is missing necessary blocks
550
+
let storage = Arc::new(MemoryBlockStore::new());
551
+
let mst = Mst::new(storage.clone());
552
+
553
+
let key1 = "app.bsky.feed.post/abc123";
554
+
let cid1 = test_cid(1);
555
+
556
+
let mst = mst.add(key1, cid1).await.unwrap();
557
+
let mst_root = mst.persist().await.unwrap();
558
+
559
+
let sk = test_signing_key();
560
+
let pubkey = test_pubkey(&sk);
561
+
let did = Did::new("did:plc:test").unwrap();
562
+
563
+
let commit = Commit::new_unsigned(
564
+
did.clone(),
565
+
mst_root,
566
+
jacquard_common::types::tid::Ticker::new().next(None),
567
+
None,
568
+
)
569
+
.sign(&sk)
570
+
.unwrap();
571
+
572
+
let commit_cid = commit.to_cid().unwrap();
573
+
let commit_bytes = commit.to_cbor().unwrap();
574
+
storage.put(&commit_bytes).await.unwrap();
575
+
576
+
// Create CAR with ONLY commit block, missing MST blocks
577
+
let mut car_blocks = std::collections::BTreeMap::new();
578
+
car_blocks.insert(commit_cid, bytes::Bytes::from(commit_bytes));
579
+
// Intentionally NOT adding MST blocks or record blocks
580
+
581
+
let car_bytes = crate::car::write_car_bytes(commit_cid, car_blocks)
582
+
.await
583
+
.unwrap();
584
+
585
+
let claims = vec![RecordClaim {
586
+
collection: "app.bsky.feed.post".into(),
587
+
rkey: "abc123".into(),
588
+
cid: Some(cid1),
589
+
}];
590
+
591
+
let result = verify_proofs(&car_bytes, claims, &did, &pubkey)
592
+
.await
593
+
.unwrap();
594
+
595
+
assert!(result.verified.is_empty())
596
+
}
597
+
598
+
#[tokio::test]
599
+
async fn test_verify_proofs_empty_mst() {
600
+
// Test proof verification on empty MST (claiming non-existence)
601
+
let storage = Arc::new(MemoryBlockStore::new());
602
+
let mst = Mst::new(storage.clone());
603
+
604
+
let mst_root = mst.persist().await.unwrap();
605
+
606
+
let sk = test_signing_key();
607
+
let pubkey = test_pubkey(&sk);
608
+
let did = Did::new("did:plc:test").unwrap();
609
+
610
+
let commit = Commit::new_unsigned(
611
+
did.clone(),
612
+
mst_root,
613
+
jacquard_common::types::tid::Ticker::new().next(None),
614
+
None,
615
+
)
616
+
.sign(&sk)
617
+
.unwrap();
618
+
619
+
let commit_cid = commit.to_cid().unwrap();
620
+
let commit_bytes = commit.to_cbor().unwrap();
621
+
storage.put(&commit_bytes).await.unwrap();
622
+
623
+
let cids_for_proof = mst
624
+
.cids_for_path("app.bsky.feed.post/abc123")
625
+
.await
626
+
.unwrap();
627
+
let mut car_blocks = std::collections::BTreeMap::new();
628
+
car_blocks.insert(commit_cid, bytes::Bytes::from(commit_bytes));
629
+
630
+
// Add any MST blocks (empty MST might still have root node)
631
+
for cid in &cids_for_proof {
632
+
if let Some(block) = storage.get(cid).await.unwrap() {
633
+
car_blocks.insert(*cid, block);
634
+
}
635
+
}
636
+
637
+
let car_bytes = crate::car::write_car_bytes(commit_cid, car_blocks)
638
+
.await
639
+
.unwrap();
640
+
641
+
let claims = vec![RecordClaim {
642
+
collection: "app.bsky.feed.post".into(),
643
+
rkey: "abc123".into(),
644
+
cid: None, // Claiming doesn't exist
645
+
}];
646
+
647
+
let result = verify_proofs(&car_bytes, claims, &did, &pubkey)
648
+
.await
649
+
.unwrap();
650
+
651
+
assert_eq!(result.verified.len(), 1);
652
+
assert_eq!(result.unverified.len(), 0);
653
+
}
654
+
655
+
#[tokio::test]
656
+
async fn test_verify_proofs_claim_exists_in_empty_mst() {
657
+
// Test that claiming existence in empty MST fails
658
+
let storage = Arc::new(MemoryBlockStore::new());
659
+
let mst = Mst::new(storage.clone());
660
+
661
+
let mst_root = mst.persist().await.unwrap();
662
+
663
+
let sk = test_signing_key();
664
+
let pubkey = test_pubkey(&sk);
665
+
let did = Did::new("did:plc:test").unwrap();
666
+
667
+
let commit = Commit::new_unsigned(
668
+
did.clone(),
669
+
mst_root,
670
+
jacquard_common::types::tid::Ticker::new().next(None),
671
+
None,
672
+
)
673
+
.sign(&sk)
674
+
.unwrap();
675
+
676
+
let commit_cid = commit.to_cid().unwrap();
677
+
let commit_bytes = commit.to_cbor().unwrap();
678
+
storage.put(&commit_bytes).await.unwrap();
679
+
680
+
let cids_for_proof = mst
681
+
.cids_for_path("app.bsky.feed.post/abc123")
682
+
.await
683
+
.unwrap();
684
+
let mut car_blocks = std::collections::BTreeMap::new();
685
+
car_blocks.insert(commit_cid, bytes::Bytes::from(commit_bytes));
686
+
687
+
for cid in &cids_for_proof {
688
+
if let Some(block) = storage.get(cid).await.unwrap() {
689
+
car_blocks.insert(*cid, block);
690
+
}
691
+
}
692
+
693
+
let car_bytes = crate::car::write_car_bytes(commit_cid, car_blocks)
694
+
.await
695
+
.unwrap();
696
+
697
+
let claims = vec![RecordClaim {
698
+
collection: "app.bsky.feed.post".into(),
699
+
rkey: "abc123".into(),
700
+
cid: Some(test_cid(1)), // Claiming it exists
701
+
}];
702
+
703
+
let result = verify_proofs(&car_bytes, claims, &did, &pubkey)
704
+
.await
705
+
.unwrap();
706
+
707
+
assert_eq!(result.verified.len(), 0);
708
+
assert_eq!(result.unverified.len(), 1); // Should fail
709
+
}
710
+
711
+
#[tokio::test]
712
+
async fn test_verify_proofs_invalid_claim() {
713
+
// Create MST with records
714
+
let storage = Arc::new(MemoryBlockStore::new());
715
+
let mst = Mst::new(storage.clone());
716
+
717
+
let key1 = "app.bsky.feed.post/abc123";
718
+
let cid1 = test_cid(1);
719
+
720
+
let mst = mst.add(key1, cid1).await.unwrap();
721
+
let mst_root = mst.persist().await.unwrap();
722
+
723
+
// Create and sign commit
724
+
let sk = test_signing_key();
725
+
let pubkey = test_pubkey(&sk);
726
+
let did = Did::new("did:plc:test").unwrap();
727
+
728
+
let commit = Commit::new_unsigned(
729
+
did.clone(),
730
+
mst_root,
731
+
jacquard_common::types::tid::Ticker::new().next(None),
732
+
None,
733
+
)
734
+
.sign(&sk)
735
+
.unwrap();
736
+
737
+
let commit_cid = commit.to_cid().unwrap();
738
+
let commit_bytes = commit.to_cbor().unwrap();
739
+
storage.put(&commit_bytes).await.unwrap();
740
+
741
+
// Generate CAR proof
742
+
let cids_for_proof = mst.cids_for_path(key1).await.unwrap();
743
+
let mut car_blocks = std::collections::BTreeMap::new();
744
+
745
+
car_blocks.insert(commit_cid, bytes::Bytes::from(commit_bytes));
746
+
747
+
for cid in &cids_for_proof[..cids_for_proof.len() - 1] {
748
+
if let Some(block) = storage.get(cid).await.unwrap() {
749
+
car_blocks.insert(*cid, block);
750
+
}
751
+
}
752
+
753
+
car_blocks.insert(cid1, bytes::Bytes::from(vec![0x42]));
754
+
755
+
let car_bytes = crate::car::write_car_bytes(commit_cid, car_blocks)
756
+
.await
757
+
.unwrap();
758
+
759
+
// Verify proof with WRONG CID
760
+
let wrong_cid = test_cid(99);
761
+
let claims = vec![RecordClaim {
762
+
collection: "app.bsky.feed.post".into(),
763
+
rkey: "abc123".into(),
764
+
cid: Some(wrong_cid), // Wrong CID
765
+
}];
766
+
767
+
let result = verify_proofs(&car_bytes, claims, &did, &pubkey)
768
+
.await
769
+
.unwrap();
770
+
771
+
assert_eq!(result.verified.len(), 0);
772
+
assert_eq!(result.unverified.len(), 1); // Failed verification
773
+
}
774
+
}
+21
crates/jacquard-repo/src/commit/serde_bytes_helper.rs
+21
crates/jacquard-repo/src/commit/serde_bytes_helper.rs
···
1
+
//! Custom serde helpers for bytes::Bytes using serde_bytes
2
+
3
+
use bytes::Bytes;
4
+
use serde::{Deserializer, Serializer};
5
+
6
+
/// Serialize Bytes as a CBOR byte string
7
+
pub fn serialize<S>(bytes: &Bytes, serializer: S) -> Result<S::Ok, S::Error>
8
+
where
9
+
S: Serializer,
10
+
{
11
+
serde_bytes::serialize(bytes.as_ref(), serializer)
12
+
}
13
+
14
+
/// Deserialize Bytes from a CBOR byte string
15
+
pub fn deserialize<'de, D>(deserializer: D) -> Result<Bytes, D::Error>
16
+
where
17
+
D: Deserializer<'de>,
18
+
{
19
+
let vec: Vec<u8> = serde_bytes::deserialize(deserializer)?;
20
+
Ok(Bytes::from(vec))
21
+
}
+410
crates/jacquard-repo/src/error.rs
+410
crates/jacquard-repo/src/error.rs
···
1
+
//! Error types for repository operations
2
+
3
+
use std::error::Error;
4
+
use std::fmt;
5
+
6
+
/// Boxed error type for error sources
7
+
pub type BoxError = Box<dyn Error + Send + Sync + 'static>;
8
+
9
+
/// Result type alias for repository operations
10
+
pub type Result<T> = std::result::Result<T, RepoError>;
11
+
12
+
/// Repository operation error with rich diagnostics
13
+
#[derive(Debug, thiserror::Error, miette::Diagnostic)]
14
+
pub struct RepoError {
15
+
kind: RepoErrorKind,
16
+
#[source]
17
+
source: Option<BoxError>,
18
+
#[help]
19
+
help: Option<String>,
20
+
context: Option<String>,
21
+
}
22
+
23
+
/// Error categories for repository operations
24
+
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
25
+
pub enum RepoErrorKind {
26
+
/// Storage operation failed
27
+
Storage,
28
+
/// Invalid MST structure
29
+
InvalidMst,
30
+
/// Invalid commit structure
31
+
InvalidCommit,
32
+
/// Invalid key format
33
+
InvalidKey,
34
+
/// Invalid CID
35
+
InvalidCid,
36
+
/// Resource not found
37
+
NotFound,
38
+
/// Cryptographic operation failed
39
+
Crypto,
40
+
/// Serialization/deserialization failed
41
+
Serialization,
42
+
/// Data too large (exceeds protocol limits)
43
+
TooLarge,
44
+
/// CAR file operation failed
45
+
Car,
46
+
/// I/O error
47
+
Io,
48
+
}
49
+
50
+
impl RepoError {
51
+
/// Create a new error with the given kind and optional source
52
+
pub fn new(kind: RepoErrorKind, source: Option<BoxError>) -> Self {
53
+
Self {
54
+
kind,
55
+
source,
56
+
help: None,
57
+
context: None,
58
+
}
59
+
}
60
+
61
+
/// Add a help message to the error
62
+
pub fn with_help(mut self, help: impl Into<String>) -> Self {
63
+
self.help = Some(help.into());
64
+
self
65
+
}
66
+
67
+
/// Add context information to the error
68
+
pub fn with_context(mut self, context: impl Into<String>) -> Self {
69
+
self.context = Some(context.into());
70
+
self
71
+
}
72
+
73
+
/// Get the error kind
74
+
pub fn kind(&self) -> &RepoErrorKind {
75
+
&self.kind
76
+
}
77
+
78
+
// Constructors for different error kinds
79
+
80
+
/// Create a storage error
81
+
pub fn storage(source: impl Error + Send + Sync + 'static) -> Self {
82
+
Self::new(RepoErrorKind::Storage, Some(Box::new(source)))
83
+
}
84
+
85
+
/// Create an invalid MST error
86
+
pub fn invalid_mst(msg: impl Into<String>) -> Self {
87
+
Self::new(RepoErrorKind::InvalidMst, Some(msg.into().into()))
88
+
.with_help("MST nodes must follow protocol structure")
89
+
}
90
+
91
+
/// Create an invalid commit error
92
+
pub fn invalid_commit(msg: impl Into<String>) -> Self {
93
+
Self::new(RepoErrorKind::InvalidCommit, Some(msg.into().into()))
94
+
}
95
+
96
+
/// Create an invalid key error
97
+
pub fn invalid_key(key: impl Into<String>) -> Self {
98
+
Self::new(RepoErrorKind::InvalidKey, None)
99
+
.with_help("MST keys must match [a-zA-Z0-9._:~-]+, max 256 bytes")
100
+
.with_context(format!("key: {}", key.into()))
101
+
}
102
+
103
+
/// Create an invalid CID error
104
+
pub fn invalid_cid(msg: impl Into<String>) -> Self {
105
+
Self::new(RepoErrorKind::InvalidCid, Some(msg.into().into()))
106
+
}
107
+
108
+
/// Create a not found error
109
+
pub fn not_found(resource: &str, id: impl fmt::Display) -> Self {
110
+
Self::new(RepoErrorKind::NotFound, None)
111
+
.with_context(format!("{} not found: {}", resource, id))
112
+
}
113
+
114
+
/// Create an already exists error
115
+
pub fn already_exists(resource: &str, id: impl fmt::Display) -> Self {
116
+
Self::new(RepoErrorKind::InvalidMst, None)
117
+
.with_context(format!("{} already exists: {}", resource, id))
118
+
}
119
+
120
+
/// Create a crypto error
121
+
pub fn crypto(source: impl Error + Send + Sync + 'static) -> Self {
122
+
Self::new(RepoErrorKind::Crypto, Some(Box::new(source)))
123
+
}
124
+
125
+
/// Create a serialization error
126
+
pub fn serialization(source: impl Error + Send + Sync + 'static) -> Self {
127
+
Self::new(RepoErrorKind::Serialization, Some(Box::new(source)))
128
+
}
129
+
130
+
/// Create a too large error
131
+
pub fn too_large(what: &str, size: usize, max: usize) -> Self {
132
+
Self::new(RepoErrorKind::TooLarge, None)
133
+
.with_context(format!("{} is {} bytes, max {}", what, size, max))
134
+
.with_help("See sync v1.1 protocol limits")
135
+
}
136
+
137
+
/// Create a CAR file error
138
+
pub fn car(source: impl Error + Send + Sync + 'static) -> Self {
139
+
Self::new(RepoErrorKind::Car, Some(Box::new(source)))
140
+
}
141
+
142
+
/// Create a CAR parse error (alias for car)
143
+
pub fn car_parse(source: impl Error + Send + Sync + 'static) -> Self {
144
+
Self::car(source).with_context("Failed to parse CAR file".to_string())
145
+
}
146
+
147
+
/// Create an I/O error
148
+
pub fn io(source: impl Error + Send + Sync + 'static) -> Self {
149
+
Self::new(RepoErrorKind::Io, Some(Box::new(source)))
150
+
}
151
+
152
+
/// Create a generic invalid error
153
+
pub fn invalid(msg: impl Into<String>) -> Self {
154
+
Self::new(RepoErrorKind::InvalidMst, Some(msg.into().into()))
155
+
}
156
+
}
157
+
158
+
impl fmt::Display for RepoError {
159
+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
160
+
write!(f, "{:?}", self.kind)?;
161
+
162
+
if let Some(ctx) = &self.context {
163
+
write!(f, ": {}", ctx)?;
164
+
}
165
+
166
+
if let Some(src) = &self.source {
167
+
write!(f, ": {}", src)?;
168
+
}
169
+
170
+
Ok(())
171
+
}
172
+
}
173
+
174
+
// Internal granular errors
175
+
176
+
/// MST-specific errors
177
+
#[derive(Debug, thiserror::Error, miette::Diagnostic)]
178
+
pub enum MstError {
179
+
/// Empty key not allowed
180
+
#[error("Empty key not allowed")]
181
+
EmptyKey,
182
+
183
+
/// Key too long
184
+
#[error("Key too long: {len} bytes (max {max})")]
185
+
KeyTooLong {
186
+
/// Actual key length
187
+
len: usize,
188
+
/// Maximum allowed length
189
+
max: usize,
190
+
},
191
+
192
+
/// Invalid key characters
193
+
#[error("Invalid key characters: {key}")]
194
+
InvalidKeyChars {
195
+
/// The invalid key
196
+
key: String,
197
+
},
198
+
199
+
/// Node structure invalid
200
+
#[error("Node structure invalid: {0}")]
201
+
InvalidNode(String),
202
+
203
+
/// Serialization failed
204
+
#[error("Serialization failed")]
205
+
Serialization(#[source] BoxError),
206
+
}
207
+
208
+
impl From<MstError> for RepoError {
209
+
fn from(e: MstError) -> Self {
210
+
match e {
211
+
MstError::EmptyKey => RepoError::invalid_key(""),
212
+
MstError::KeyTooLong { len, max } => {
213
+
RepoError::invalid_key(format!("length {}/{}", len, max))
214
+
}
215
+
MstError::InvalidKeyChars { key } => RepoError::invalid_key(key),
216
+
MstError::InvalidNode(msg) => RepoError::invalid_mst(msg),
217
+
MstError::Serialization(e) => RepoError::new(RepoErrorKind::Serialization, Some(e)),
218
+
}
219
+
}
220
+
}
221
+
222
+
/// Commit-specific errors
223
+
#[derive(Debug, thiserror::Error, miette::Diagnostic)]
224
+
pub enum CommitError {
225
+
/// Invalid commit version
226
+
#[error("Invalid commit version: {0}")]
227
+
InvalidVersion(i64),
228
+
229
+
/// Invalid signature format
230
+
#[error("Invalid signature format: {0}")]
231
+
InvalidSignature(String),
232
+
233
+
/// Signature verification failed
234
+
#[error("Signature verification failed")]
235
+
SignatureVerificationFailed,
236
+
237
+
/// Invalid key format
238
+
#[error("Invalid key format: {0}")]
239
+
InvalidKey(String),
240
+
241
+
/// Unsupported key type
242
+
#[error("Unsupported key type: {0}")]
243
+
UnsupportedKeyType(u64),
244
+
245
+
/// Serialization failed
246
+
#[error("Serialization failed")]
247
+
Serialization(#[source] BoxError),
248
+
}
249
+
250
+
impl From<CommitError> for RepoError {
251
+
fn from(e: CommitError) -> Self {
252
+
match e {
253
+
CommitError::InvalidVersion(v) => {
254
+
RepoError::invalid_commit(format!("unsupported version {}", v))
255
+
}
256
+
CommitError::InvalidSignature(msg) => {
257
+
RepoError::new(RepoErrorKind::Crypto, Some(msg.into()))
258
+
.with_context("invalid signature format".to_string())
259
+
}
260
+
CommitError::SignatureVerificationFailed => RepoError::new(RepoErrorKind::Crypto, None)
261
+
.with_context("signature verification failed".to_string()),
262
+
CommitError::InvalidKey(msg) => RepoError::new(RepoErrorKind::Crypto, Some(msg.into()))
263
+
.with_context("invalid key format".to_string()),
264
+
CommitError::UnsupportedKeyType(code) => RepoError::new(RepoErrorKind::Crypto, None)
265
+
.with_context(format!("unsupported key type: 0x{:x}", code)),
266
+
CommitError::Serialization(e) => RepoError::new(RepoErrorKind::Serialization, Some(e)),
267
+
}
268
+
}
269
+
}
270
+
271
+
/// Diff-specific errors
272
+
#[derive(Debug, thiserror::Error)]
273
+
pub enum DiffError {
274
+
/// Too many operations
275
+
#[error("Too many operations: {count} (max {max})")]
276
+
TooManyOps {
277
+
/// Actual operation count
278
+
count: usize,
279
+
/// Maximum allowed operations
280
+
max: usize,
281
+
},
282
+
283
+
/// Diff too large
284
+
#[error("Diff too large: {size} bytes (max {max})")]
285
+
TooLarge {
286
+
/// Actual size
287
+
size: usize,
288
+
/// Maximum size
289
+
max: usize,
290
+
},
291
+
}
292
+
293
+
impl From<DiffError> for RepoError {
294
+
fn from(e: DiffError) -> Self {
295
+
match e {
296
+
DiffError::TooManyOps { count, max } => {
297
+
RepoError::too_large("diff operation count", count, max)
298
+
}
299
+
DiffError::TooLarge { size, max } => RepoError::too_large("diff size", size, max),
300
+
}
301
+
}
302
+
}
303
+
304
+
/// Proof verification errors
305
+
#[derive(Debug, thiserror::Error, miette::Diagnostic)]
306
+
pub enum ProofError {
307
+
/// CAR file has no root CID
308
+
#[error("CAR file has no root CID")]
309
+
#[diagnostic(
310
+
code(proof::no_root),
311
+
help("CAR files for proofs must have exactly one root CID pointing to the commit")
312
+
)]
313
+
NoRoot,
314
+
315
+
/// Commit block not found in CAR
316
+
#[error("Commit block not found in CAR")]
317
+
#[diagnostic(
318
+
code(proof::commit_not_found),
319
+
help("The CAR root CID must point to a valid commit block")
320
+
)]
321
+
CommitNotFound,
322
+
323
+
/// DID mismatch between commit and expected
324
+
#[error("DID mismatch: commit has {commit_did}, expected {expected_did}")]
325
+
#[diagnostic(
326
+
code(proof::did_mismatch),
327
+
help("The commit must be signed by the expected DID")
328
+
)]
329
+
DidMismatch {
330
+
/// DID in the commit
331
+
commit_did: String,
332
+
/// Expected DID
333
+
expected_did: String,
334
+
},
335
+
336
+
/// Signature verification failed
337
+
#[error("Signature verification failed")]
338
+
#[diagnostic(
339
+
code(proof::signature_failed),
340
+
help("The commit signature must be valid for the provided public key")
341
+
)]
342
+
SignatureVerificationFailed {
343
+
/// Underlying crypto error
344
+
#[source]
345
+
source: CommitError,
346
+
},
347
+
348
+
/// MST root block missing from CAR
349
+
#[error("MST root block missing from CAR: {cid}")]
350
+
#[diagnostic(
351
+
code(proof::missing_mst_block),
352
+
help("All MST blocks along the proof path must be included in the CAR file")
353
+
)]
354
+
MissingMstBlock {
355
+
/// The missing CID
356
+
cid: String,
357
+
},
358
+
359
+
/// Invalid commit structure
360
+
#[error("Invalid commit structure: {0}")]
361
+
#[diagnostic(code(proof::invalid_commit))]
362
+
InvalidCommit(String),
363
+
364
+
/// CAR parsing failed
365
+
#[error("CAR parsing failed")]
366
+
#[diagnostic(code(proof::car_parse_failed))]
367
+
CarParseFailed {
368
+
/// Underlying error
369
+
#[source]
370
+
source: BoxError,
371
+
},
372
+
373
+
/// Commit deserialization failed
374
+
#[error("Commit deserialization failed")]
375
+
#[diagnostic(code(proof::commit_deserialize_failed))]
376
+
CommitDeserializeFailed {
377
+
/// Underlying error
378
+
#[source]
379
+
source: BoxError,
380
+
},
381
+
}
382
+
383
+
impl From<ProofError> for RepoError {
384
+
fn from(e: ProofError) -> Self {
385
+
match &e {
386
+
ProofError::NoRoot => RepoError::invalid("CAR file has no root CID"),
387
+
ProofError::CommitNotFound => {
388
+
RepoError::new(RepoErrorKind::NotFound, Some(Box::new(e)))
389
+
}
390
+
ProofError::DidMismatch { .. } => {
391
+
RepoError::new(RepoErrorKind::InvalidCommit, Some(Box::new(e)))
392
+
}
393
+
ProofError::SignatureVerificationFailed { .. } => {
394
+
RepoError::new(RepoErrorKind::Crypto, Some(Box::new(e)))
395
+
}
396
+
ProofError::MissingMstBlock { .. } => {
397
+
RepoError::new(RepoErrorKind::NotFound, Some(Box::new(e)))
398
+
}
399
+
ProofError::InvalidCommit(_) => {
400
+
RepoError::new(RepoErrorKind::InvalidCommit, Some(Box::new(e)))
401
+
}
402
+
ProofError::CarParseFailed { .. } => {
403
+
RepoError::new(RepoErrorKind::Car, Some(Box::new(e)))
404
+
}
405
+
ProofError::CommitDeserializeFailed { .. } => {
406
+
RepoError::new(RepoErrorKind::Serialization, Some(Box::new(e)))
407
+
}
408
+
}
409
+
}
410
+
}
+62
crates/jacquard-repo/src/lib.rs
+62
crates/jacquard-repo/src/lib.rs
···
1
+
//! AT Protocol repository primitives
2
+
//!
3
+
//! This crate provides building blocks for working with AT Protocol repositories:
4
+
//!
5
+
//! - **MST (Merkle Search Tree)**: Immutable tree operations with deterministic structure
6
+
//! - **Commits**: Signed commit structures for Sync v1 (version 2) and v1.1 (version 3)
7
+
//! - **CAR I/O**: Import and export repositories in CAR (Content Addressable aRchive) format
8
+
//! - **Storage**: Pluggable block storage abstraction with in-memory and file-backed implementations
9
+
//!
10
+
//! # Design Philosophy
11
+
//!
12
+
//! - Core primitives are always available (MST, commits, storage)
13
+
//! - Optional high-level Repository API for convenience
14
+
//! - Immutable MST operations for referential transparency
15
+
//! - Zero-copy deserialization where possible
16
+
//! - Support for both current and future sync protocol versions
17
+
//!
18
+
//! # Example
19
+
//!
20
+
//! ```rust,ignore
21
+
//! use jacquard_repo::{Mst, MemoryBlockStore};
22
+
//! use cid::Cid;
23
+
//!
24
+
//! # async fn example() -> Result<(), Box<dyn std::error::Error>> {
25
+
//! let storage = MemoryBlockStore::new();
26
+
//! let mst = Mst::new(storage);
27
+
//!
28
+
//! // Add entries
29
+
//! let cid = /* ... */;
30
+
//! let new_mst = mst.add("app.bsky.feed.post/abc123", cid).await?;
31
+
//!
32
+
//! // Retrieve
33
+
//! if let Some(value) = new_mst.get("app.bsky.feed.post/abc123").await? {
34
+
//! println!("Found: {}", value);
35
+
//! }
36
+
//! # Ok(())
37
+
//! # }
38
+
//! ```
39
+
40
+
#![warn(missing_docs)]
41
+
#![warn(clippy::all)]
42
+
#![deny(unsafe_code)]
43
+
44
+
/// CAR (Content Addressable aRchive) utilities
45
+
pub mod car;
46
+
/// Commit structures and signature verification
47
+
pub mod commit;
48
+
pub mod error;
49
+
/// Merkle Search Tree implementation
50
+
pub mod mst;
51
+
/// High-level repository operations
52
+
pub mod repo;
53
+
/// Block storage abstraction
54
+
pub mod storage;
55
+
56
+
pub use error::{RepoError, RepoErrorKind, Result};
57
+
pub use mst::{Mst, MstDiff, WriteOp};
58
+
pub use repo::{CommitData, Repository};
59
+
pub use storage::{BlockStore, FileBlockStore, LayeredBlockStore, MemoryBlockStore};
60
+
61
+
/// DAG-CBOR codec identifier for CIDs (0x71)
62
+
pub const DAG_CBOR_CID_CODEC: u64 = 0x71;
+399
crates/jacquard-repo/src/mst/diff.rs
+399
crates/jacquard-repo/src/mst/diff.rs
···
1
+
//! MST diff calculation
2
+
3
+
use super::tree::Mst;
4
+
use crate::error::Result;
5
+
use crate::storage::BlockStore;
6
+
use cid::Cid as IpldCid;
7
+
use smol_str::SmolStr;
8
+
use std::collections::HashMap;
9
+
10
+
/// Diff between two MST states
11
+
///
12
+
/// Represents the changes needed to transform one tree into another.
13
+
/// Used for firehose validation and batch operations.
14
+
#[derive(Debug, Clone, PartialEq, Eq)]
15
+
pub struct MstDiff {
16
+
/// New records created (key, new CID)
17
+
pub creates: Vec<(SmolStr, IpldCid)>,
18
+
19
+
/// Records updated (key, new CID, old CID)
20
+
pub updates: Vec<(SmolStr, IpldCid, IpldCid)>,
21
+
22
+
/// Records deleted (key, old CID)
23
+
pub deletes: Vec<(SmolStr, IpldCid)>,
24
+
}
25
+
26
+
use super::tree::VerifiedWriteOp;
27
+
28
+
impl MstDiff {
29
+
/// Create empty diff
30
+
pub fn new() -> Self {
31
+
Self {
32
+
creates: Vec::new(),
33
+
updates: Vec::new(),
34
+
deletes: Vec::new(),
35
+
}
36
+
}
37
+
38
+
/// Check if diff is empty (no changes)
39
+
pub fn is_empty(&self) -> bool {
40
+
self.creates.is_empty() && self.updates.is_empty() && self.deletes.is_empty()
41
+
}
42
+
43
+
/// Count total operations
44
+
pub fn op_count(&self) -> usize {
45
+
self.creates.len() + self.updates.len() + self.deletes.len()
46
+
}
47
+
48
+
/// Validate against sync v1.1 limits
49
+
///
50
+
/// The sync protocol has a 200 operation limit per commit.
51
+
pub fn validate_limits(&self) -> Result<()> {
52
+
if self.op_count() > 200 {
53
+
return Err(crate::error::RepoError::too_large(
54
+
"diff operation count",
55
+
self.op_count(),
56
+
200,
57
+
));
58
+
}
59
+
Ok(())
60
+
}
61
+
62
+
/// Convert diff to verified write operations
63
+
///
64
+
/// Returns operations that can be safely applied with `batch()`.
65
+
/// All update/delete operations include verified prev CIDs.
66
+
pub fn to_verified_ops(&self) -> Vec<VerifiedWriteOp> {
67
+
let mut ops = Vec::with_capacity(self.op_count());
68
+
69
+
// Add creates
70
+
for (key, cid) in &self.creates {
71
+
ops.push(VerifiedWriteOp::Create {
72
+
key: key.clone(),
73
+
cid: *cid,
74
+
});
75
+
}
76
+
77
+
// Add updates (includes prev)
78
+
for (key, new_cid, old_cid) in &self.updates {
79
+
ops.push(VerifiedWriteOp::Update {
80
+
key: key.clone(),
81
+
cid: *new_cid,
82
+
prev: *old_cid,
83
+
});
84
+
}
85
+
86
+
// Add deletes (includes prev)
87
+
for (key, old_cid) in &self.deletes {
88
+
ops.push(VerifiedWriteOp::Delete {
89
+
key: key.clone(),
90
+
prev: *old_cid,
91
+
});
92
+
}
93
+
94
+
ops
95
+
}
96
+
97
+
/// Convert diff to firehose repository operations
98
+
///
99
+
/// Returns operations in the format used by `com.atproto.sync.subscribeRepos`.
100
+
/// All update/delete operations include prev CIDs for sync v1.1 validation.
101
+
pub fn to_repo_ops(&self) -> Vec<crate::commit::firehose::RepoOp<'_>> {
102
+
use jacquard_common::types::cid::CidLink;
103
+
104
+
let mut ops = Vec::with_capacity(self.op_count());
105
+
106
+
// Add creates
107
+
for (key, cid) in &self.creates {
108
+
ops.push(crate::commit::firehose::RepoOp {
109
+
action: "create".into(),
110
+
path: key.as_str().into(),
111
+
cid: Some(CidLink::from(*cid)),
112
+
prev: None,
113
+
});
114
+
}
115
+
116
+
// Add updates
117
+
for (key, new_cid, old_cid) in &self.updates {
118
+
ops.push(crate::commit::firehose::RepoOp {
119
+
action: "update".into(),
120
+
path: key.as_str().into(),
121
+
cid: Some(CidLink::from(*new_cid)),
122
+
prev: Some(CidLink::from(*old_cid)),
123
+
});
124
+
}
125
+
126
+
// Add deletes
127
+
for (key, old_cid) in &self.deletes {
128
+
ops.push(crate::commit::firehose::RepoOp {
129
+
action: "delete".into(),
130
+
path: key.as_str().into(),
131
+
cid: None, // null for deletes
132
+
prev: Some(CidLink::from(*old_cid)),
133
+
});
134
+
}
135
+
136
+
ops
137
+
}
138
+
}
139
+
140
+
impl Default for MstDiff {
141
+
fn default() -> Self {
142
+
Self::new()
143
+
}
144
+
}
145
+
146
+
impl<S: BlockStore + Sync + 'static> Mst<S> {
147
+
/// Compute diff from this tree to another
148
+
///
149
+
/// Returns operations needed to transform `self` into `other`.
150
+
/// - Creates: keys in `other` but not in `self`
151
+
/// - Updates: keys in both but with different CIDs
152
+
/// - Deletes: keys in `self` but not in `other`
153
+
pub async fn diff(&self, other: &Mst<S>) -> Result<MstDiff> {
154
+
// Collect all leaves from both trees
155
+
let self_leaves = self.leaves().await?;
156
+
let other_leaves = other.leaves().await?;
157
+
158
+
// Build hashmaps for efficient lookup
159
+
let self_map: HashMap<SmolStr, IpldCid> = self_leaves.into_iter().collect();
160
+
let other_map: HashMap<SmolStr, IpldCid> = other_leaves.into_iter().collect();
161
+
162
+
let mut diff = MstDiff::new();
163
+
164
+
// Find creates and updates
165
+
for (key, new_cid) in &other_map {
166
+
match self_map.get(key) {
167
+
Some(old_cid) => {
168
+
// Key exists in both - check if CID changed
169
+
if old_cid != new_cid {
170
+
diff.updates.push((key.clone(), *new_cid, *old_cid));
171
+
}
172
+
}
173
+
None => {
174
+
// Key only in other - create
175
+
diff.creates.push((key.clone(), *new_cid));
176
+
}
177
+
}
178
+
}
179
+
180
+
// Find deletes
181
+
for (key, old_cid) in &self_map {
182
+
if !other_map.contains_key(key) {
183
+
// Key only in self - delete
184
+
diff.deletes.push((key.clone(), *old_cid));
185
+
}
186
+
}
187
+
188
+
Ok(diff)
189
+
}
190
+
191
+
/// Compute diff from this tree to empty (all deletes)
192
+
///
193
+
/// Returns diff representing deletion of all records in this tree.
194
+
pub async fn diff_to_empty(&self) -> Result<MstDiff> {
195
+
let leaves = self.leaves().await?;
196
+
197
+
Ok(MstDiff {
198
+
creates: Vec::new(),
199
+
updates: Vec::new(),
200
+
deletes: leaves,
201
+
})
202
+
}
203
+
}
204
+
205
+
#[cfg(test)]
206
+
mod tests {
207
+
use jacquard_common::types::crypto::SHA2_256;
208
+
209
+
use super::*;
210
+
use crate::{DAG_CBOR_CID_CODEC, storage::memory::MemoryBlockStore};
211
+
use std::sync::Arc;
212
+
213
+
fn test_cid(n: u8) -> IpldCid {
214
+
let data = vec![n; 32];
215
+
let mh = multihash::Multihash::wrap(SHA2_256, &data).unwrap();
216
+
IpldCid::new_v1(DAG_CBOR_CID_CODEC, mh)
217
+
}
218
+
219
+
#[tokio::test]
220
+
async fn test_diff_empty_trees() {
221
+
let storage = Arc::new(MemoryBlockStore::new());
222
+
let tree1 = Mst::new(storage.clone());
223
+
let tree2 = Mst::new(storage);
224
+
225
+
let diff = tree1.diff(&tree2).await.unwrap();
226
+
227
+
assert!(diff.is_empty());
228
+
assert_eq!(diff.op_count(), 0);
229
+
}
230
+
231
+
#[tokio::test]
232
+
async fn test_diff_creates() {
233
+
let storage1 = Arc::new(MemoryBlockStore::new());
234
+
let tree1 = Mst::new(storage1);
235
+
236
+
let storage2 = Arc::new(MemoryBlockStore::new());
237
+
let tree2 = Mst::new(storage2);
238
+
let tree2 = tree2.add("a", test_cid(1)).await.unwrap();
239
+
let tree2 = tree2.add("b", test_cid(2)).await.unwrap();
240
+
241
+
let diff = tree1.diff(&tree2).await.unwrap();
242
+
243
+
assert_eq!(diff.creates.len(), 2);
244
+
assert_eq!(diff.updates.len(), 0);
245
+
assert_eq!(diff.deletes.len(), 0);
246
+
assert_eq!(diff.op_count(), 2);
247
+
248
+
// Check creates content
249
+
assert!(
250
+
diff.creates
251
+
.iter()
252
+
.any(|(k, c)| k == "a" && *c == test_cid(1))
253
+
);
254
+
assert!(
255
+
diff.creates
256
+
.iter()
257
+
.any(|(k, c)| k == "b" && *c == test_cid(2))
258
+
);
259
+
}
260
+
261
+
#[tokio::test]
262
+
async fn test_diff_deletes() {
263
+
let storage1 = Arc::new(MemoryBlockStore::new());
264
+
let tree1 = Mst::new(storage1);
265
+
let tree1 = tree1.add("a", test_cid(1)).await.unwrap();
266
+
let tree1 = tree1.add("b", test_cid(2)).await.unwrap();
267
+
268
+
let storage2 = Arc::new(MemoryBlockStore::new());
269
+
let tree2 = Mst::new(storage2);
270
+
271
+
let diff = tree1.diff(&tree2).await.unwrap();
272
+
273
+
assert_eq!(diff.creates.len(), 0);
274
+
assert_eq!(diff.updates.len(), 0);
275
+
assert_eq!(diff.deletes.len(), 2);
276
+
assert_eq!(diff.op_count(), 2);
277
+
278
+
// Check deletes content
279
+
assert!(
280
+
diff.deletes
281
+
.iter()
282
+
.any(|(k, c)| k == "a" && *c == test_cid(1))
283
+
);
284
+
assert!(
285
+
diff.deletes
286
+
.iter()
287
+
.any(|(k, c)| k == "b" && *c == test_cid(2))
288
+
);
289
+
}
290
+
291
+
#[tokio::test]
292
+
async fn test_diff_updates() {
293
+
let storage1 = Arc::new(MemoryBlockStore::new());
294
+
let tree1 = Mst::new(storage1);
295
+
let tree1 = tree1.add("a", test_cid(1)).await.unwrap();
296
+
let tree1 = tree1.add("b", test_cid(2)).await.unwrap();
297
+
298
+
let storage2 = Arc::new(MemoryBlockStore::new());
299
+
let tree2 = Mst::new(storage2);
300
+
let tree2 = tree2.add("a", test_cid(10)).await.unwrap(); // Changed CID
301
+
let tree2 = tree2.add("b", test_cid(2)).await.unwrap(); // Same CID
302
+
303
+
let diff = tree1.diff(&tree2).await.unwrap();
304
+
305
+
assert_eq!(diff.creates.len(), 0);
306
+
assert_eq!(diff.updates.len(), 1); // Only "a" changed
307
+
assert_eq!(diff.deletes.len(), 0);
308
+
assert_eq!(diff.op_count(), 1);
309
+
310
+
// Check update content
311
+
assert_eq!(diff.updates[0].0, "a");
312
+
assert_eq!(diff.updates[0].1, test_cid(10)); // new CID
313
+
assert_eq!(diff.updates[0].2, test_cid(1)); // old CID
314
+
}
315
+
316
+
#[tokio::test]
317
+
async fn test_diff_mixed_operations() {
318
+
let storage1 = Arc::new(MemoryBlockStore::new());
319
+
let tree1 = Mst::new(storage1);
320
+
let tree1 = tree1.add("a", test_cid(1)).await.unwrap();
321
+
let tree1 = tree1.add("b", test_cid(2)).await.unwrap();
322
+
let tree1 = tree1.add("c", test_cid(3)).await.unwrap();
323
+
324
+
let storage2 = Arc::new(MemoryBlockStore::new());
325
+
let tree2 = Mst::new(storage2);
326
+
let tree2 = tree2.add("a", test_cid(10)).await.unwrap(); // Updated
327
+
let tree2 = tree2.add("b", test_cid(2)).await.unwrap(); // Unchanged
328
+
// "c" deleted
329
+
let tree2 = tree2.add("d", test_cid(4)).await.unwrap(); // Created
330
+
331
+
let diff = tree1.diff(&tree2).await.unwrap();
332
+
333
+
assert_eq!(diff.creates.len(), 1); // "d"
334
+
assert_eq!(diff.updates.len(), 1); // "a"
335
+
assert_eq!(diff.deletes.len(), 1); // "c"
336
+
assert_eq!(diff.op_count(), 3);
337
+
}
338
+
339
+
#[tokio::test]
340
+
async fn test_diff_to_empty() {
341
+
let storage = Arc::new(MemoryBlockStore::new());
342
+
let tree = Mst::new(storage);
343
+
let tree = tree.add("a", test_cid(1)).await.unwrap();
344
+
let tree = tree.add("b", test_cid(2)).await.unwrap();
345
+
let tree = tree.add("c", test_cid(3)).await.unwrap();
346
+
347
+
let diff = tree.diff_to_empty().await.unwrap();
348
+
349
+
assert_eq!(diff.creates.len(), 0);
350
+
assert_eq!(diff.updates.len(), 0);
351
+
assert_eq!(diff.deletes.len(), 3);
352
+
assert_eq!(diff.op_count(), 3);
353
+
}
354
+
355
+
#[tokio::test]
356
+
async fn test_validate_limits() {
357
+
let mut diff = MstDiff::new();
358
+
359
+
// Add 200 creates (at limit)
360
+
for i in 0..200 {
361
+
diff.creates
362
+
.push((SmolStr::new(&format!("key{}", i)), test_cid(1)));
363
+
}
364
+
365
+
// Should be ok at exactly 200
366
+
assert!(diff.validate_limits().is_ok());
367
+
368
+
// Add one more - should fail
369
+
diff.creates.push((SmolStr::new("key201"), test_cid(1)));
370
+
assert!(diff.validate_limits().is_err());
371
+
}
372
+
373
+
#[tokio::test]
374
+
async fn test_diff_symmetry() {
375
+
// diff(A, B) should be inverse of diff(B, A)
376
+
let storage1 = Arc::new(MemoryBlockStore::new());
377
+
let tree1 = Mst::new(storage1);
378
+
let tree1 = tree1.add("a", test_cid(1)).await.unwrap();
379
+
let tree1 = tree1.add("b", test_cid(2)).await.unwrap();
380
+
381
+
let storage2 = Arc::new(MemoryBlockStore::new());
382
+
let tree2 = Mst::new(storage2);
383
+
let tree2 = tree2.add("b", test_cid(2)).await.unwrap();
384
+
let tree2 = tree2.add("c", test_cid(3)).await.unwrap();
385
+
386
+
let diff1 = tree1.diff(&tree2).await.unwrap();
387
+
let diff2 = tree2.diff(&tree1).await.unwrap();
388
+
389
+
// diff1: creates="c", deletes="a"
390
+
// diff2: creates="a", deletes="c"
391
+
assert_eq!(diff1.creates.len(), 1);
392
+
assert_eq!(diff1.deletes.len(), 1);
393
+
assert_eq!(diff2.creates.len(), 1);
394
+
assert_eq!(diff2.deletes.len(), 1);
395
+
396
+
assert_eq!(diff1.creates[0].0, diff2.deletes[0].0); // "c"
397
+
assert_eq!(diff1.deletes[0].0, diff2.creates[0].0); // "a"
398
+
}
399
+
}
+10
crates/jacquard-repo/src/mst/mod.rs
+10
crates/jacquard-repo/src/mst/mod.rs
+124
crates/jacquard-repo/src/mst/node.rs
+124
crates/jacquard-repo/src/mst/node.rs
···
1
+
//! MST node data structures
2
+
3
+
use bytes::Bytes;
4
+
use cid::Cid as IpldCid;
5
+
use smol_str::SmolStr;
6
+
7
+
/// Entry in an MST node - either a subtree or a leaf
8
+
///
9
+
/// This is the in-memory representation used for tree operations.
10
+
/// MST operations work on flat `Vec<NodeEntry>` where entries are interleaved:
11
+
/// `[Tree, Leaf, Tree, Leaf, Leaf, Tree]` etc.
12
+
///
13
+
/// The wire format (CBOR) is different - see `NodeData` and `TreeEntry`.
14
+
#[derive(Debug, Clone)]
15
+
pub enum NodeEntry<S: crate::storage::BlockStore> {
16
+
/// Subtree reference
17
+
///
18
+
/// Will be lazily loaded from storage when needed.
19
+
Tree(crate::mst::Mst<S>),
20
+
21
+
/// Leaf node with key-value pair
22
+
Leaf {
23
+
/// Full key (not prefix-compressed in memory)
24
+
key: SmolStr,
25
+
/// CID of the record value
26
+
value: IpldCid,
27
+
},
28
+
}
29
+
30
+
impl<S: crate::storage::BlockStore> NodeEntry<S> {
31
+
/// Check if this is a tree entry
32
+
pub fn is_tree(&self) -> bool {
33
+
matches!(self, NodeEntry::Tree(_))
34
+
}
35
+
36
+
/// Check if this is a leaf entry
37
+
pub fn is_leaf(&self) -> bool {
38
+
matches!(self, NodeEntry::Leaf { .. })
39
+
}
40
+
41
+
/// Get the key if this is a leaf
42
+
pub fn leaf_key(&self) -> Option<&str> {
43
+
match self {
44
+
NodeEntry::Leaf { key, .. } => Some(key.as_str()),
45
+
NodeEntry::Tree(_) => None,
46
+
}
47
+
}
48
+
}
49
+
50
+
// ============================================================================
51
+
// Wire format structures (for CBOR serialization)
52
+
// ============================================================================
53
+
//
54
+
// These represent the on-disk/network format with prefix compression.
55
+
// Conversion functions will be in util.rs.
56
+
57
+
/// Wire format entry (prefix-compressed leaf with optional subtree pointer)
58
+
///
59
+
/// This is what gets serialized to CBOR. In memory, we use the flat
60
+
/// `Vec<NodeEntry>` representation instead.
61
+
///
62
+
/// **IMPORTANT:** Fields MUST be in alphabetical order (k, p, t, v) to match
63
+
/// DAG-CBOR canonical form. Even though serde_ipld_dagcbor should handle this,
64
+
/// we define them in order to be explicit.
65
+
#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
66
+
pub struct TreeEntry {
67
+
/// Key suffix after prefix (stored as bytes in wire format)
68
+
///
69
+
/// Must use serde_bytes to serialize as CBOR byte string (major type 2)
70
+
/// instead of array of integers (major type 4)
71
+
#[serde(rename = "k", with = "crate::commit::serde_bytes_helper")]
72
+
pub key_suffix: Bytes,
73
+
74
+
/// Prefix length (shared chars with previous key in node)
75
+
///
76
+
/// Must be u8 (not usize) to match CBOR encoding in reference implementations
77
+
#[serde(rename = "p")]
78
+
pub prefix_len: u8,
79
+
80
+
/// Optional subtree pointer (CID of child MST node)
81
+
///
82
+
/// Serializes as explicit `null` when None (AT Protocol spec requirement for determinism).
83
+
#[serde(rename = "t")]
84
+
pub tree: Option<IpldCid>,
85
+
86
+
/// CID of the record value
87
+
#[serde(rename = "v")]
88
+
pub value: IpldCid,
89
+
}
90
+
91
+
/// Wire format node data (serialized as DAG-CBOR)
92
+
///
93
+
/// This is the structure that gets written to storage. The in-memory
94
+
/// representation uses `Vec<NodeEntry>` instead.
95
+
///
96
+
/// # Conversion rules
97
+
///
98
+
/// **Serialization (flat → wire):**
99
+
/// - First entry if `Tree` → becomes `left` pointer
100
+
/// - Each `Leaf` → becomes entry in `entries`
101
+
/// - `Tree` after `Leaf` → becomes that leaf's `tree` pointer
102
+
///
103
+
/// **Deserialization (wire → flat):**
104
+
/// - `left` if present → prepend `Tree` entry
105
+
/// - Each entry → append `Leaf`
106
+
/// - Each `tree` if present → append `Tree` entry
107
+
///
108
+
/// # Nullability requirement
109
+
///
110
+
/// **CRITICAL:** All `Option<T>` fields MUST serialize as explicit `null` (not skip).
111
+
/// This is an AT Protocol spec requirement for cross-implementation determinism.
112
+
/// Skipping vs explicit null produces different CBOR bytes → different CIDs → broken interop.
113
+
#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
114
+
pub struct NodeData {
115
+
/// Left-most subtree pointer
116
+
///
117
+
/// Serializes as explicit `null` when None (AT Protocol spec requirement for determinism).
118
+
#[serde(rename = "l")]
119
+
pub left: Option<IpldCid>,
120
+
121
+
/// Entries in this node (sorted by full key, prefix-compressed)
122
+
#[serde(rename = "e")]
123
+
pub entries: Vec<TreeEntry>,
124
+
}
+1571
crates/jacquard-repo/src/mst/tree.rs
+1571
crates/jacquard-repo/src/mst/tree.rs
···
1
+
//! Immutable Merkle Search Tree operations
2
+
3
+
use super::node::NodeEntry;
4
+
use super::util;
5
+
use crate::error::{RepoError, Result};
6
+
use crate::storage::BlockStore;
7
+
use cid::Cid as IpldCid;
8
+
use smol_str::SmolStr;
9
+
use std::sync::Arc;
10
+
use tokio::sync::RwLock;
11
+
12
+
/// Write operation for batch application
13
+
///
14
+
/// Represents a single operation to apply to an MST.
15
+
/// For firehose operations where `prev` may be optional (v3).
16
+
#[derive(Debug, Clone, PartialEq, Eq)]
17
+
pub enum WriteOp {
18
+
/// Create new record (error if exists)
19
+
Create {
20
+
/// Record key (collection/rkey)
21
+
key: SmolStr,
22
+
/// Record CID
23
+
cid: IpldCid,
24
+
},
25
+
26
+
/// Update existing record (error if not exists)
27
+
///
28
+
/// `prev` is optional in v3 (required in v2)
29
+
Update {
30
+
/// Record key (collection/rkey)
31
+
key: SmolStr,
32
+
/// New record CID
33
+
cid: IpldCid,
34
+
/// Previous CID (optional for validation)
35
+
prev: Option<IpldCid>,
36
+
},
37
+
38
+
/// Delete record
39
+
///
40
+
/// `prev` is optional in v3 (required in v2)
41
+
Delete {
42
+
/// Record key (collection/rkey)
43
+
key: SmolStr,
44
+
/// Previous CID (optional for validation)
45
+
prev: Option<IpldCid>,
46
+
},
47
+
}
48
+
49
+
/// Verified write operation with required prev fields
50
+
///
51
+
/// Used for operations where prev CID has been verified against tree state.
52
+
/// Safer than `WriteOp` because it always validates prev values.
53
+
#[derive(Debug, Clone, PartialEq, Eq)]
54
+
pub enum VerifiedWriteOp {
55
+
/// Create new record (verified not to exist)
56
+
Create {
57
+
/// Record key (collection/rkey)
58
+
key: SmolStr,
59
+
/// Record CID
60
+
cid: IpldCid,
61
+
},
62
+
63
+
/// Update existing record (with verified prev CID)
64
+
Update {
65
+
/// Record key (collection/rkey)
66
+
key: SmolStr,
67
+
/// New record CID
68
+
cid: IpldCid,
69
+
/// Previous CID (required, validated)
70
+
prev: IpldCid,
71
+
},
72
+
73
+
/// Delete record (with verified current CID)
74
+
Delete {
75
+
/// Record key (collection/rkey)
76
+
key: SmolStr,
77
+
/// Previous CID (required, validated)
78
+
prev: IpldCid,
79
+
},
80
+
}
81
+
82
+
/// Immutable Merkle Search Tree
83
+
///
84
+
/// MST operations return new tree instances, leaving the original unchanged.
85
+
/// This enables versioning and safe concurrent access to different tree versions.
86
+
///
87
+
/// # Architecture
88
+
///
89
+
/// The tree uses a flat `Vec<NodeEntry>` representation in memory, where
90
+
/// `NodeEntry` is an enum of `Tree` (subtree) and `Leaf` (key-value pair).
91
+
///
92
+
/// Entries are interleaved: `[Tree, Leaf, Tree, Leaf, Leaf, Tree]` etc.
93
+
/// This representation makes operations simple (Vec slicing, splicing).
94
+
///
95
+
/// The wire format (CBOR) uses prefix compression and pointers (left/tree).
96
+
/// See `NodeData` and `TreeEntry` in node.rs for serialization format.
97
+
///
98
+
/// # Layer-based structure
99
+
///
100
+
/// Keys are hashed (SHA-256) and leading zero bits determine layer:
101
+
/// - More leading zeros = higher layer (deeper in tree)
102
+
/// - Layer = floor(leading_zeros / 2) for ~4 fanout
103
+
/// - Deterministic and insertion-order independent
104
+
#[derive(Debug, Clone)]
105
+
pub struct Mst<S: BlockStore> {
106
+
/// Block storage for loading/saving nodes (shared via Arc)
107
+
storage: Arc<S>,
108
+
109
+
/// Flat list of entries (lazy-loaded, interior mutable)
110
+
///
111
+
/// `None` means not yet loaded from storage.
112
+
/// Empty `Vec` means tree has been loaded and has no entries.
113
+
entries: Arc<RwLock<Option<Vec<NodeEntry<S>>>>>,
114
+
115
+
/// CID pointer to this node in storage (interior mutable)
116
+
pointer: Arc<RwLock<IpldCid>>,
117
+
118
+
/// Whether pointer is stale (entries modified, interior mutable)
119
+
///
120
+
/// When `true`, `pointer` doesn't match current `entries`.
121
+
/// Call `get_pointer()` to recompute and update.
122
+
outdated_pointer: Arc<RwLock<bool>>,
123
+
124
+
/// Layer hint for this node
125
+
///
126
+
/// `None` means layer unknown (will be computed from entries).
127
+
/// Layer is the maximum layer of any key in this node.
128
+
layer: Option<usize>,
129
+
}
130
+
131
+
impl<S: BlockStore + Sync + 'static> Mst<S> {
132
+
/// Create empty MST
133
+
pub fn new(storage: Arc<S>) -> Self {
134
+
Self {
135
+
storage,
136
+
entries: Arc::new(RwLock::new(Some(Vec::new()))),
137
+
pointer: Arc::new(RwLock::new(IpldCid::default())),
138
+
outdated_pointer: Arc::new(RwLock::new(true)),
139
+
layer: Some(0),
140
+
}
141
+
}
142
+
143
+
/// Create MST with existing entries
144
+
///
145
+
/// Used internally for tree operations.
146
+
/// Computes CID from entries (doesn't persist to storage).
147
+
pub(crate) async fn create(
148
+
storage: Arc<S>,
149
+
entries: Vec<NodeEntry<S>>,
150
+
layer: Option<usize>,
151
+
) -> Result<Self> {
152
+
// Serialize and compute CID (don't persist yet)
153
+
let node_data = util::serialize_node_data(&entries).await?;
154
+
let cbor =
155
+
serde_ipld_dagcbor::to_vec(&node_data).map_err(|e| RepoError::serialization(e))?;
156
+
let cid = util::compute_cid(&cbor)?;
157
+
158
+
let mst = Self {
159
+
storage,
160
+
entries: Arc::new(RwLock::new(Some(entries))),
161
+
pointer: Arc::new(RwLock::new(cid)),
162
+
outdated_pointer: Arc::new(RwLock::new(false)),
163
+
layer,
164
+
};
165
+
166
+
Ok(mst)
167
+
}
168
+
169
+
/// Load MST from CID (lazy)
170
+
///
171
+
/// Doesn't actually load from storage until entries are accessed.
172
+
pub fn load(storage: Arc<S>, cid: IpldCid, layer: Option<usize>) -> Self {
173
+
Self {
174
+
storage,
175
+
entries: Arc::new(RwLock::new(None)), // Not loaded yet
176
+
pointer: Arc::new(RwLock::new(cid)),
177
+
outdated_pointer: Arc::new(RwLock::new(false)),
178
+
layer,
179
+
}
180
+
}
181
+
182
+
/// Create new tree with modified entries
183
+
///
184
+
/// Returns a new Mst with updated entries. Marks pointer as outdated.
185
+
async fn new_tree(&self, entries: Vec<NodeEntry<S>>) -> Result<Self> {
186
+
Ok(Self {
187
+
storage: self.storage.clone(),
188
+
entries: Arc::new(RwLock::new(Some(entries))),
189
+
pointer: self.pointer.clone(),
190
+
outdated_pointer: Arc::new(RwLock::new(true)),
191
+
layer: self.layer,
192
+
})
193
+
}
194
+
195
+
/// Get entries (lazy load if needed)
196
+
async fn get_entries(&self) -> Result<Vec<NodeEntry<S>>> {
197
+
{
198
+
let entries_guard = self.entries.read().await;
199
+
if let Some(ref entries) = *entries_guard {
200
+
return Ok(entries.clone());
201
+
}
202
+
}
203
+
204
+
// Load from storage
205
+
let pointer = *self.pointer.read().await;
206
+
let node_bytes = self
207
+
.storage
208
+
.get(&pointer)
209
+
.await?
210
+
.ok_or_else(|| RepoError::not_found("MST node", &pointer))?;
211
+
212
+
let node_data: super::node::NodeData =
213
+
serde_ipld_dagcbor::from_slice(&node_bytes).map_err(|e| RepoError::serialization(e))?;
214
+
215
+
let entries = util::deserialize_node_data(self.storage.clone(), &node_data, self.layer)?;
216
+
217
+
// Cache the loaded entries
218
+
{
219
+
let mut entries_guard = self.entries.write().await;
220
+
*entries_guard = Some(entries.clone());
221
+
}
222
+
223
+
Ok(entries)
224
+
}
225
+
226
+
/// Get CID pointer (recompute if outdated)
227
+
///
228
+
/// Computes CID from current entries but doesn't persist to storage.
229
+
/// Use `collect_blocks()` to gather blocks for persistence.
230
+
pub async fn get_pointer(&self) -> Result<IpldCid> {
231
+
let outdated = *self.outdated_pointer.read().await;
232
+
if !outdated {
233
+
return Ok(*self.pointer.read().await);
234
+
}
235
+
236
+
// Serialize and compute CID (don't persist yet)
237
+
let entries = self.get_entries().await?;
238
+
let node_data = util::serialize_node_data(&entries).await?;
239
+
let cbor =
240
+
serde_ipld_dagcbor::to_vec(&node_data).map_err(|e| RepoError::serialization(e))?;
241
+
let cid = util::compute_cid(&cbor)?;
242
+
243
+
// Update pointer and mark as fresh
244
+
{
245
+
let mut pointer_guard = self.pointer.write().await;
246
+
*pointer_guard = cid;
247
+
}
248
+
{
249
+
let mut outdated_guard = self.outdated_pointer.write().await;
250
+
*outdated_guard = false;
251
+
}
252
+
253
+
Ok(cid)
254
+
}
255
+
256
+
/// Get root CID (alias for get_pointer)
257
+
pub async fn root(&self) -> Result<IpldCid> {
258
+
self.get_pointer().await
259
+
}
260
+
261
+
/// Get shared reference to the block storage
262
+
///
263
+
/// Useful for CAR export and other operations that need direct storage access.
264
+
pub fn storage(&self) -> &Arc<S> {
265
+
&self.storage
266
+
}
267
+
268
+
/// Get the layer of this node
269
+
///
270
+
/// Layer is the maximum layer of any leaf key in this node.
271
+
/// For nodes with no leaves, recursively checks subtrees.
272
+
fn get_layer<'a>(
273
+
&'a self,
274
+
) -> std::pin::Pin<Box<dyn std::future::Future<Output = Result<usize>> + Send + 'a>> {
275
+
Box::pin(async move {
276
+
if let Some(layer) = self.layer {
277
+
return Ok(layer);
278
+
}
279
+
280
+
// Compute layer from entries
281
+
let entries = self.get_entries().await?;
282
+
283
+
// Find first leaf and get its layer
284
+
for entry in &entries {
285
+
if let NodeEntry::Leaf { key, .. } = entry {
286
+
let layer = util::layer_for_key(key.as_str());
287
+
return Ok(layer);
288
+
}
289
+
}
290
+
291
+
// No leaves found - check first subtree
292
+
for entry in &entries {
293
+
if let NodeEntry::Tree(subtree) = entry {
294
+
let child_layer = subtree.get_layer().await?;
295
+
return Ok(child_layer + 1);
296
+
}
297
+
}
298
+
299
+
// Empty tree
300
+
Ok(0)
301
+
})
302
+
}
303
+
304
+
/// Find index of first leaf >= key
305
+
///
306
+
/// Returns `entries.len()` if all leaves are < key.
307
+
fn find_gt_or_equal_leaf_index_in(entries: &[NodeEntry<S>], key: &str) -> usize {
308
+
for (i, entry) in entries.iter().enumerate() {
309
+
if let NodeEntry::Leaf { key: leaf_key, .. } = entry {
310
+
if leaf_key.as_str() >= key {
311
+
return i;
312
+
}
313
+
}
314
+
}
315
+
316
+
entries.len()
317
+
}
318
+
319
+
/// Get a value by key
320
+
pub fn get<'a>(
321
+
&'a self,
322
+
key: &'a str,
323
+
) -> std::pin::Pin<Box<dyn std::future::Future<Output = Result<Option<IpldCid>>> + Send + 'a>>
324
+
{
325
+
Box::pin(async move {
326
+
util::validate_key(key)?;
327
+
328
+
let entries = self.get_entries().await?;
329
+
let index = Self::find_gt_or_equal_leaf_index_in(&entries, key);
330
+
331
+
// Check if we found exact match
332
+
if index < entries.len() {
333
+
if let NodeEntry::Leaf {
334
+
key: leaf_key,
335
+
value,
336
+
} = &entries[index]
337
+
{
338
+
if leaf_key.as_str() == key {
339
+
return Ok(Some(*value));
340
+
}
341
+
}
342
+
}
343
+
344
+
// Not found at this level - check subtree before this index
345
+
if index > 0 {
346
+
if let NodeEntry::Tree(subtree) = &entries[index - 1] {
347
+
return subtree.get(key).await;
348
+
}
349
+
}
350
+
351
+
Ok(None)
352
+
})
353
+
}
354
+
355
+
/// Add a key-value pair (returns new tree)
356
+
pub fn add<'a>(
357
+
&'a self,
358
+
key: &'a str,
359
+
cid: IpldCid,
360
+
) -> std::pin::Pin<Box<dyn std::future::Future<Output = Result<Mst<S>>> + Send + 'a>> {
361
+
Box::pin(async move {
362
+
util::validate_key(key)?;
363
+
364
+
let key_layer = util::layer_for_key(key);
365
+
let node_layer = self.get_layer().await?;
366
+
let entries = self.get_entries().await?;
367
+
368
+
if key_layer == node_layer {
369
+
// Key belongs at this layer - insert here
370
+
let index = Self::find_gt_or_equal_leaf_index_in(&entries, key);
371
+
372
+
// Check if key already exists
373
+
if index < entries.len() {
374
+
if let NodeEntry::Leaf { key: leaf_key, .. } = &entries[index] {
375
+
if leaf_key.as_str() == key {
376
+
// Key exists - replace by just inserting at same position
377
+
let mut new_entries = entries.clone();
378
+
new_entries[index] = NodeEntry::Leaf {
379
+
key: smol_str::SmolStr::new(key),
380
+
value: cid,
381
+
};
382
+
return self.new_tree(new_entries).await;
383
+
}
384
+
}
385
+
}
386
+
387
+
// Check entry before insertion point
388
+
if index > 0 {
389
+
match &entries[index - 1] {
390
+
NodeEntry::Leaf { .. } => {
391
+
// Prev is Leaf - just splice in
392
+
self.splice_in(
393
+
NodeEntry::Leaf {
394
+
key: smol_str::SmolStr::new(key),
395
+
value: cid,
396
+
},
397
+
index,
398
+
)
399
+
.await
400
+
}
401
+
NodeEntry::Tree(subtree) => {
402
+
// Prev is Tree - split it around key
403
+
let (left, right) = subtree.split_around(key).await?;
404
+
self.replace_with_split(
405
+
index - 1,
406
+
left,
407
+
NodeEntry::Leaf {
408
+
key: smol_str::SmolStr::new(key),
409
+
value: cid,
410
+
},
411
+
right,
412
+
)
413
+
.await
414
+
}
415
+
}
416
+
} else {
417
+
// At far left - splice in
418
+
self.splice_in(
419
+
NodeEntry::Leaf {
420
+
key: smol_str::SmolStr::new(key),
421
+
value: cid,
422
+
},
423
+
index,
424
+
)
425
+
.await
426
+
}
427
+
} else if key_layer < node_layer {
428
+
// Key belongs on lower layer - recurse into subtree
429
+
let index = Self::find_gt_or_equal_leaf_index_in(&entries, key);
430
+
431
+
if index > 0 {
432
+
if let NodeEntry::Tree(prev_tree) = &entries[index - 1] {
433
+
// Prev is Tree - add to it
434
+
let new_subtree = prev_tree.add(key, cid).await?;
435
+
return self
436
+
.update_entry(index - 1, NodeEntry::Tree(new_subtree))
437
+
.await;
438
+
}
439
+
}
440
+
441
+
// No prev tree - create child and add to it
442
+
let child = self.create_child().await?;
443
+
let new_subtree = child.add(key, cid).await?;
444
+
self.splice_in(NodeEntry::Tree(new_subtree), index).await
445
+
} else {
446
+
// Key belongs on higher layer - create parent layers
447
+
let extra_layers = key_layer - node_layer;
448
+
449
+
let (mut left, mut right) = self.split_around(key).await?;
450
+
451
+
// Create intermediate layers if gap > 1
452
+
for _ in 1..extra_layers {
453
+
if let Some(l) = left {
454
+
left = Some(l.create_parent().await?);
455
+
}
456
+
if let Some(r) = right {
457
+
right = Some(r.create_parent().await?);
458
+
}
459
+
}
460
+
461
+
// Build new root
462
+
let mut new_entries = Vec::new();
463
+
if let Some(l) = left {
464
+
new_entries.push(NodeEntry::Tree(l));
465
+
}
466
+
new_entries.push(NodeEntry::Leaf {
467
+
key: smol_str::SmolStr::new(key),
468
+
value: cid,
469
+
});
470
+
if let Some(r) = right {
471
+
new_entries.push(NodeEntry::Tree(r));
472
+
}
473
+
474
+
Mst::create(self.storage.clone(), new_entries, Some(key_layer)).await
475
+
}
476
+
})
477
+
}
478
+
479
+
/// Delete a key (returns new tree)
480
+
pub fn delete<'a>(
481
+
&'a self,
482
+
key: &'a str,
483
+
) -> std::pin::Pin<Box<dyn std::future::Future<Output = Result<Mst<S>>> + Send + 'a>> {
484
+
Box::pin(async move {
485
+
util::validate_key(key)?;
486
+
487
+
let altered = self.delete_recurse(key).await?;
488
+
altered.trim_top().await
489
+
})
490
+
}
491
+
492
+
/// Recursively delete a key
493
+
fn delete_recurse<'a>(
494
+
&'a self,
495
+
key: &'a str,
496
+
) -> std::pin::Pin<Box<dyn std::future::Future<Output = Result<Mst<S>>> + Send + 'a>> {
497
+
Box::pin(async move {
498
+
let entries = self.get_entries().await?;
499
+
let index = Self::find_gt_or_equal_leaf_index_in(&entries, key);
500
+
501
+
// Check if found at this level
502
+
if index < entries.len() {
503
+
if let NodeEntry::Leaf { key: leaf_key, .. } = &entries[index] {
504
+
if leaf_key.as_str() == key {
505
+
// Found it - delete this entry
506
+
let prev = if index > 0 {
507
+
Some(&entries[index - 1])
508
+
} else {
509
+
None
510
+
};
511
+
let next = entries.get(index + 1);
512
+
513
+
// Check if we need to merge Trees
514
+
if let (
515
+
Some(NodeEntry::Tree(prev_tree)),
516
+
Some(NodeEntry::Tree(next_tree)),
517
+
) = (prev, next)
518
+
{
519
+
// Merge the two Trees
520
+
let merged = prev_tree.append_merge(next_tree).await?;
521
+
522
+
// Build: [0..index-1] + [merged] + [index+2..]
523
+
let mut new_entries = entries[..index - 1].to_vec();
524
+
new_entries.push(NodeEntry::Tree(merged));
525
+
new_entries.extend_from_slice(&entries[index + 2..]);
526
+
527
+
return self.new_tree(new_entries).await;
528
+
}
529
+
530
+
// Simple case: just remove the entry
531
+
return self.remove_entry(index).await;
532
+
}
533
+
}
534
+
}
535
+
536
+
// Not found at this level - recurse into prev Tree
537
+
if index > 0 {
538
+
if let NodeEntry::Tree(prev_tree) = &entries[index - 1] {
539
+
let subtree = prev_tree.delete_recurse(key).await?;
540
+
let subtree_entries = subtree.get_entries().await?;
541
+
542
+
if subtree_entries.is_empty() {
543
+
// Subtree is now empty - remove it
544
+
return self.remove_entry(index - 1).await;
545
+
} else {
546
+
// Update with new subtree
547
+
return self.update_entry(index - 1, NodeEntry::Tree(subtree)).await;
548
+
}
549
+
}
550
+
}
551
+
552
+
// Key not found
553
+
Err(RepoError::not_found("key", key))
554
+
})
555
+
}
556
+
557
+
/// Update an existing key (returns new tree)
558
+
pub async fn update(&self, key: &str, cid: IpldCid) -> Result<Mst<S>> {
559
+
util::validate_key(key)?;
560
+
561
+
// Check key exists
562
+
if self.get(key).await?.is_none() {
563
+
return Err(RepoError::not_found("key", key));
564
+
}
565
+
566
+
// Update is just add (which replaces)
567
+
self.add(key, cid).await
568
+
}
569
+
570
+
/// Update entry at index
571
+
async fn update_entry(&self, index: usize, entry: NodeEntry<S>) -> Result<Mst<S>> {
572
+
let mut entries = self.get_entries().await?;
573
+
entries[index] = entry;
574
+
self.new_tree(entries).await
575
+
}
576
+
577
+
/// Remove entry at index
578
+
async fn remove_entry(&self, index: usize) -> Result<Mst<S>> {
579
+
let mut entries = self.get_entries().await?;
580
+
entries.remove(index);
581
+
self.new_tree(entries).await
582
+
}
583
+
584
+
/// Append entry to end
585
+
async fn append(&self, entry: NodeEntry<S>) -> Result<Mst<S>> {
586
+
let mut entries = self.get_entries().await?;
587
+
entries.push(entry);
588
+
self.new_tree(entries).await
589
+
}
590
+
591
+
/// Prepend entry to start
592
+
async fn prepend(&self, entry: NodeEntry<S>) -> Result<Mst<S>> {
593
+
let mut entries = self.get_entries().await?;
594
+
entries.insert(0, entry);
595
+
self.new_tree(entries).await
596
+
}
597
+
598
+
/// Splice in entry at index
599
+
async fn splice_in(&self, entry: NodeEntry<S>, index: usize) -> Result<Mst<S>> {
600
+
let mut entries = self.get_entries().await?;
601
+
entries.insert(index, entry);
602
+
self.new_tree(entries).await
603
+
}
604
+
605
+
/// Get slice of entries
606
+
pub async fn slice(&self, start: usize, end: usize) -> Result<Vec<NodeEntry<S>>> {
607
+
let entries = self.get_entries().await?;
608
+
Ok(entries[start..end].to_vec())
609
+
}
610
+
611
+
/// Trim top node if it only contains one subtree
612
+
fn trim_top(
613
+
self,
614
+
) -> std::pin::Pin<Box<dyn std::future::Future<Output = Result<Mst<S>>> + Send>> {
615
+
Box::pin(async move {
616
+
let entries = self.get_entries().await?;
617
+
618
+
if entries.len() == 1 {
619
+
if let NodeEntry::Tree(subtree) = &entries[0] {
620
+
return subtree.clone().trim_top().await;
621
+
}
622
+
}
623
+
624
+
Ok(self)
625
+
})
626
+
}
627
+
628
+
/// Split tree around a key into left and right subtrees
629
+
///
630
+
/// Returns (left, right) where:
631
+
/// - left contains all entries < key
632
+
/// - right contains all entries >= key
633
+
///
634
+
/// Either side can be None if empty.
635
+
pub fn split_around<'a>(
636
+
&'a self,
637
+
key: &'a str,
638
+
) -> std::pin::Pin<
639
+
Box<dyn std::future::Future<Output = Result<(Option<Mst<S>>, Option<Mst<S>>)>> + Send + 'a>,
640
+
> {
641
+
Box::pin(async move {
642
+
let entries = self.get_entries().await?;
643
+
let index = Self::find_gt_or_equal_leaf_index_in(&entries, key);
644
+
645
+
// Split at index
646
+
let left_data = entries[..index].to_vec();
647
+
let right_data = entries[index..].to_vec();
648
+
649
+
let mut left = self.new_tree(left_data.clone()).await?;
650
+
let mut right = self.new_tree(right_data).await?;
651
+
652
+
// If last entry in left is a Tree, recursively split it
653
+
if let Some(NodeEntry::Tree(last_tree)) = left_data.last() {
654
+
let left_len = left_data.len();
655
+
left = left.remove_entry(left_len - 1).await?;
656
+
657
+
let (split_left, split_right) = last_tree.split_around(key).await?;
658
+
659
+
if let Some(sl) = split_left {
660
+
left = left.append(NodeEntry::Tree(sl)).await?;
661
+
}
662
+
if let Some(sr) = split_right {
663
+
right = right.prepend(NodeEntry::Tree(sr)).await?;
664
+
}
665
+
}
666
+
667
+
// Return None for empty sides
668
+
let left_out = if left.get_entries().await?.is_empty() {
669
+
None
670
+
} else {
671
+
Some(left)
672
+
};
673
+
674
+
let right_out = if right.get_entries().await?.is_empty() {
675
+
None
676
+
} else {
677
+
Some(right)
678
+
};
679
+
680
+
Ok((left_out, right_out))
681
+
})
682
+
}
683
+
684
+
/// Merge two adjacent subtrees
685
+
///
686
+
/// All keys in `to_merge` must be > all keys in `self`.
687
+
/// Used primarily for delete operations.
688
+
pub fn append_merge<'a>(
689
+
&'a self,
690
+
to_merge: &'a Mst<S>,
691
+
) -> std::pin::Pin<Box<dyn std::future::Future<Output = Result<Mst<S>>> + Send + 'a>> {
692
+
Box::pin(async move {
693
+
// Check same layer
694
+
let self_layer = self.get_layer().await?;
695
+
let merge_layer = to_merge.get_layer().await?;
696
+
697
+
if self_layer != merge_layer {
698
+
return Err(RepoError::invalid_mst(
699
+
"Cannot merge MST nodes from different layers",
700
+
));
701
+
}
702
+
703
+
let mut self_entries = self.get_entries().await?;
704
+
let merge_entries = to_merge.get_entries().await?;
705
+
706
+
// Check if we need to merge adjacent Trees
707
+
let last_is_tree = matches!(self_entries.last(), Some(NodeEntry::Tree(_)));
708
+
let first_is_tree = matches!(merge_entries.first(), Some(NodeEntry::Tree(_)));
709
+
710
+
if last_is_tree && first_is_tree {
711
+
// Both are Trees - recursively merge them
712
+
if let (Some(NodeEntry::Tree(left_tree)), Some(NodeEntry::Tree(right_tree))) =
713
+
(self_entries.last(), merge_entries.first())
714
+
{
715
+
let merged = left_tree.append_merge(right_tree).await?;
716
+
717
+
// Build new entries: self[..-1] + merged + merge[1..]
718
+
let mut new_entries = self_entries[..self_entries.len() - 1].to_vec();
719
+
new_entries.push(NodeEntry::Tree(merged));
720
+
new_entries.extend_from_slice(&merge_entries[1..]);
721
+
722
+
return self.new_tree(new_entries).await;
723
+
}
724
+
}
725
+
726
+
// Simple case: just concatenate
727
+
self_entries.extend(merge_entries);
728
+
self.new_tree(self_entries).await
729
+
})
730
+
}
731
+
732
+
/// Create empty child tree at layer-1
733
+
pub async fn create_child(&self) -> Result<Mst<S>> {
734
+
let layer = self.get_layer().await?;
735
+
let child_layer = if layer > 0 { Some(layer - 1) } else { Some(0) };
736
+
737
+
Mst::create(self.storage.clone(), Vec::new(), child_layer).await
738
+
}
739
+
740
+
/// Create parent tree at layer+1 containing self
741
+
pub async fn create_parent(self) -> Result<Mst<S>> {
742
+
let layer = self.get_layer().await?;
743
+
744
+
Mst::create(
745
+
self.storage.clone(),
746
+
vec![NodeEntry::Tree(self)],
747
+
Some(layer + 1),
748
+
)
749
+
.await
750
+
}
751
+
752
+
/// Replace entry at index with [left?, leaf, right?]
753
+
async fn replace_with_split(
754
+
&self,
755
+
index: usize,
756
+
left: Option<Mst<S>>,
757
+
leaf: NodeEntry<S>,
758
+
right: Option<Mst<S>>,
759
+
) -> Result<Mst<S>> {
760
+
let entries = self.get_entries().await?;
761
+
762
+
// Build: [0..index] + [left?] + [leaf] + [right?] + [index+1..]
763
+
let mut new_entries = entries[..index].to_vec();
764
+
765
+
if let Some(l) = left {
766
+
new_entries.push(NodeEntry::Tree(l));
767
+
}
768
+
new_entries.push(leaf);
769
+
if let Some(r) = right {
770
+
new_entries.push(NodeEntry::Tree(r));
771
+
}
772
+
773
+
new_entries.extend_from_slice(&entries[index + 1..]);
774
+
775
+
self.new_tree(new_entries).await
776
+
}
777
+
778
+
/// Get all leaf entries (key-CID pairs) in lexicographic order
779
+
///
780
+
/// Recursively traverses the tree to collect all leaves.
781
+
/// Used for diff calculation and tree listing.
782
+
pub fn leaves<'a>(
783
+
&'a self,
784
+
) -> std::pin::Pin<
785
+
Box<
786
+
dyn std::future::Future<Output = Result<Vec<(smol_str::SmolStr, IpldCid)>>> + Send + 'a,
787
+
>,
788
+
> {
789
+
Box::pin(async move {
790
+
let mut result = Vec::new();
791
+
self.collect_leaves(&mut result).await?;
792
+
Ok(result)
793
+
})
794
+
}
795
+
796
+
/// Recursively collect all leaves into the result vector
797
+
fn collect_leaves<'a>(
798
+
&'a self,
799
+
result: &'a mut Vec<(smol_str::SmolStr, IpldCid)>,
800
+
) -> std::pin::Pin<Box<dyn std::future::Future<Output = Result<()>> + Send + 'a>> {
801
+
Box::pin(async move {
802
+
let entries = self.get_entries().await?;
803
+
804
+
for entry in entries {
805
+
match entry {
806
+
NodeEntry::Tree(subtree) => {
807
+
// Recurse into subtree
808
+
subtree.collect_leaves(result).await?;
809
+
}
810
+
NodeEntry::Leaf { key, value } => {
811
+
// Add leaf to result
812
+
result.push((key, value));
813
+
}
814
+
}
815
+
}
816
+
817
+
Ok(())
818
+
})
819
+
}
820
+
821
+
/// Apply batch of verified write operations (returns new tree)
822
+
///
823
+
/// More efficient than individual operations as it only rebuilds
824
+
/// the tree structure once per operation. Operations are applied in order.
825
+
///
826
+
/// # Validation
827
+
///
828
+
/// - Create: errors if key already exists
829
+
/// - Update: errors if key doesn't exist OR prev CID doesn't match
830
+
/// - Delete: errors if key doesn't exist OR prev CID doesn't match
831
+
///
832
+
/// All operations validate prev CIDs against current tree state.
833
+
pub async fn batch(&self, ops: &[VerifiedWriteOp]) -> Result<Mst<S>> {
834
+
let mut tree = self.clone();
835
+
836
+
for op in ops {
837
+
tree = match op {
838
+
VerifiedWriteOp::Create { key, cid } => {
839
+
// Check doesn't exist
840
+
if tree.get(key.as_str()).await?.is_some() {
841
+
return Err(RepoError::invalid_mst(format!(
842
+
"Cannot create: key already exists: {}",
843
+
key
844
+
)));
845
+
}
846
+
tree.add(key.as_str(), *cid).await?
847
+
}
848
+
849
+
VerifiedWriteOp::Update { key, cid, prev } => {
850
+
// Check exists and validate prev
851
+
let current = tree
852
+
.get(key.as_str())
853
+
.await?
854
+
.ok_or_else(|| RepoError::not_found("key", key.as_str()))?;
855
+
856
+
if ¤t != prev {
857
+
return Err(RepoError::invalid_mst(format!(
858
+
"Update prev CID mismatch for key {}: expected {}, got {}",
859
+
key, prev, current
860
+
)));
861
+
}
862
+
863
+
tree.add(key.as_str(), *cid).await?
864
+
}
865
+
866
+
VerifiedWriteOp::Delete { key, prev } => {
867
+
// Check exists and validate prev
868
+
let current = tree
869
+
.get(key.as_str())
870
+
.await?
871
+
.ok_or_else(|| RepoError::not_found("key", key.as_str()))?;
872
+
873
+
if ¤t != prev {
874
+
return Err(RepoError::invalid_mst(format!(
875
+
"Delete prev CID mismatch for key {}: expected {}, got {}",
876
+
key, prev, current
877
+
)));
878
+
}
879
+
880
+
tree.delete(key.as_str()).await?
881
+
}
882
+
};
883
+
}
884
+
885
+
Ok(tree)
886
+
}
887
+
888
+
/// Collect all blocks that need persisting
889
+
///
890
+
/// Recursively walks the tree, serializing nodes and collecting blocks
891
+
/// that aren't already in storage. Skips nodes that are already persisted.
892
+
///
893
+
/// Returns (root_cid, blocks) where blocks is a map of CID → bytes.
894
+
pub fn collect_blocks<'a>(
895
+
&'a self,
896
+
) -> std::pin::Pin<
897
+
Box<
898
+
dyn std::future::Future<
899
+
Output = Result<(IpldCid, std::collections::BTreeMap<IpldCid, bytes::Bytes>)>,
900
+
> + Send
901
+
+ 'a,
902
+
>,
903
+
> {
904
+
Box::pin(async move {
905
+
use bytes::Bytes;
906
+
use std::collections::BTreeMap;
907
+
908
+
let mut blocks = BTreeMap::new();
909
+
let pointer = self.get_pointer().await?;
910
+
911
+
// Check if already in storage
912
+
if self.storage.has(&pointer).await? {
913
+
return Ok((pointer, blocks));
914
+
}
915
+
916
+
// Serialize this node
917
+
let entries = self.get_entries().await?;
918
+
let node_data = util::serialize_node_data(&entries).await?;
919
+
let cbor =
920
+
serde_ipld_dagcbor::to_vec(&node_data).map_err(|e| RepoError::serialization(e))?;
921
+
blocks.insert(pointer, Bytes::from(cbor));
922
+
923
+
// Recursively collect from subtrees
924
+
for entry in &entries {
925
+
if let NodeEntry::Tree(subtree) = entry {
926
+
let (_, subtree_blocks) = subtree.collect_blocks().await?;
927
+
blocks.extend(subtree_blocks);
928
+
}
929
+
}
930
+
931
+
Ok((pointer, blocks))
932
+
})
933
+
}
934
+
935
+
/// Persist all unstored blocks to storage
936
+
///
937
+
/// Convenience method that calls `collect_blocks()` and `put_many()`.
938
+
/// Returns the root CID after persisting.
939
+
pub async fn persist(&self) -> Result<IpldCid> {
940
+
let (root_cid, blocks) = self.collect_blocks().await?;
941
+
942
+
if !blocks.is_empty() {
943
+
self.storage.put_many(blocks).await?;
944
+
}
945
+
946
+
Ok(root_cid)
947
+
}
948
+
949
+
/// Get all CIDs in the merkle path to a key
950
+
///
951
+
/// Returns a list of CIDs representing the proof path from root to the target key:
952
+
/// - Always includes the root CID (this node's pointer)
953
+
/// - If key exists, includes the record CID
954
+
/// - Includes all intermediate MST node CIDs in the path
955
+
///
956
+
/// This is used for generating merkle proofs for record existence/non-existence.
957
+
///
958
+
/// # Example
959
+
///
960
+
/// For a key that exists:
961
+
/// - Returns: `[root_cid, intermediate_node_cid?, ..., record_cid]`
962
+
///
963
+
/// For a key that doesn't exist:
964
+
/// - Returns: `[root_cid, intermediate_node_cid?, ...]` (proves absence)
965
+
pub fn cids_for_path<'a>(
966
+
&'a self,
967
+
key: &'a str,
968
+
) -> std::pin::Pin<Box<dyn std::future::Future<Output = Result<Vec<IpldCid>>> + Send + 'a>>
969
+
{
970
+
Box::pin(async move {
971
+
util::validate_key(key)?;
972
+
973
+
let mut cids = vec![self.get_pointer().await?];
974
+
let entries = self.get_entries().await?;
975
+
let index = Self::find_gt_or_equal_leaf_index_in(&entries, key);
976
+
977
+
// Check if we found exact match at this level
978
+
if index < entries.len() {
979
+
if let NodeEntry::Leaf {
980
+
key: leaf_key,
981
+
value,
982
+
} = &entries[index]
983
+
{
984
+
if leaf_key.as_str() == key {
985
+
cids.push(*value);
986
+
return Ok(cids);
987
+
}
988
+
}
989
+
}
990
+
991
+
// Not found at this level - check subtree before this index
992
+
if index > 0 {
993
+
if let NodeEntry::Tree(subtree) = &entries[index - 1] {
994
+
let mut subtree_cids = subtree.cids_for_path(key).await?;
995
+
cids.append(&mut subtree_cids);
996
+
return Ok(cids);
997
+
}
998
+
}
999
+
1000
+
// Key not found in tree
1001
+
Ok(cids)
1002
+
})
1003
+
}
1004
+
1005
+
/// Write all MST and record blocks to CAR writer
1006
+
///
1007
+
/// Streams blocks directly to the writer as the tree is walked:
1008
+
/// - All MST node blocks (read from storage)
1009
+
/// - All leaf record blocks (read from storage)
1010
+
///
1011
+
/// This is suitable for CAR export and avoids loading all blocks into memory.
1012
+
pub async fn write_blocks_to_car<W: tokio::io::AsyncWrite + Send + Unpin>(
1013
+
&self,
1014
+
writer: &mut iroh_car::CarWriter<W>,
1015
+
) -> Result<()> {
1016
+
let mut leaf_cids = Vec::new();
1017
+
1018
+
// Walk tree, writing MST nodes and collecting leaf CIDs
1019
+
self.write_mst_nodes_to_car(writer, &mut leaf_cids).await?;
1020
+
1021
+
// Fetch and write all leaf record blocks
1022
+
let leaf_blocks = self.storage.get_many(&leaf_cids).await?;
1023
+
for (cid, maybe_data) in leaf_cids.iter().zip(leaf_blocks) {
1024
+
if let Some(data) = maybe_data {
1025
+
writer
1026
+
.write(*cid, &data)
1027
+
.await
1028
+
.map_err(|e| RepoError::car(e))?;
1029
+
}
1030
+
}
1031
+
1032
+
Ok(())
1033
+
}
1034
+
1035
+
/// Recursively write MST nodes to CAR and collect leaf CIDs
1036
+
fn write_mst_nodes_to_car<'a, W: tokio::io::AsyncWrite + Send + Unpin>(
1037
+
&'a self,
1038
+
writer: &'a mut iroh_car::CarWriter<W>,
1039
+
leaf_cids: &'a mut Vec<IpldCid>,
1040
+
) -> std::pin::Pin<Box<dyn std::future::Future<Output = Result<()>> + Send + 'a>> {
1041
+
Box::pin(async move {
1042
+
let pointer = self.get_pointer().await?;
1043
+
1044
+
// Read MST node from storage and write to CAR
1045
+
let node_bytes = self
1046
+
.storage
1047
+
.get(&pointer)
1048
+
.await?
1049
+
.ok_or_else(|| RepoError::not_found("MST node", &pointer))?;
1050
+
1051
+
writer
1052
+
.write(pointer, &node_bytes)
1053
+
.await
1054
+
.map_err(|e| RepoError::car(e))?;
1055
+
1056
+
// Parse to get entries
1057
+
let entries = self.get_entries().await?;
1058
+
1059
+
// Collect leaf CIDs and recurse into subtrees
1060
+
for entry in &entries {
1061
+
match entry {
1062
+
NodeEntry::Leaf { value, .. } => {
1063
+
leaf_cids.push(*value);
1064
+
}
1065
+
NodeEntry::Tree(subtree) => {
1066
+
subtree.write_mst_nodes_to_car(writer, leaf_cids).await?;
1067
+
}
1068
+
}
1069
+
}
1070
+
1071
+
Ok(())
1072
+
})
1073
+
}
1074
+
}
1075
+
1076
+
#[cfg(test)]
1077
+
mod tests {
1078
+
use super::*;
1079
+
use crate::{DAG_CBOR_CID_CODEC, storage::memory::MemoryBlockStore};
1080
+
use jacquard_common::types::crypto::SHA2_256;
1081
+
use smol_str::SmolStr;
1082
+
1083
+
fn test_cid(n: u8) -> IpldCid {
1084
+
let data = vec![n; 32];
1085
+
let mh = multihash::Multihash::wrap(SHA2_256, &data).unwrap();
1086
+
IpldCid::new_v1(DAG_CBOR_CID_CODEC, mh)
1087
+
}
1088
+
1089
+
#[tokio::test]
1090
+
async fn test_empty_tree() {
1091
+
let storage = Arc::new(MemoryBlockStore::new());
1092
+
let mst = Mst::new(storage);
1093
+
1094
+
let entries = mst.get_entries().await.unwrap();
1095
+
assert_eq!(entries.len(), 0);
1096
+
}
1097
+
1098
+
#[tokio::test]
1099
+
async fn test_get_from_empty() {
1100
+
let storage = Arc::new(MemoryBlockStore::new());
1101
+
let mst = Mst::new(storage);
1102
+
1103
+
let result = mst.get("test/key").await.unwrap();
1104
+
assert!(result.is_none());
1105
+
}
1106
+
1107
+
#[tokio::test]
1108
+
async fn test_manually_constructed_tree() {
1109
+
// Test with manually constructed entries (no CBOR)
1110
+
let storage = Arc::new(MemoryBlockStore::new());
1111
+
1112
+
let entries = vec![
1113
+
NodeEntry::Leaf {
1114
+
key: SmolStr::new("a"),
1115
+
value: test_cid(1),
1116
+
},
1117
+
NodeEntry::Leaf {
1118
+
key: SmolStr::new("b"),
1119
+
value: test_cid(2),
1120
+
},
1121
+
NodeEntry::Leaf {
1122
+
key: SmolStr::new("c"),
1123
+
value: test_cid(3),
1124
+
},
1125
+
];
1126
+
1127
+
let mst = Mst::create(storage, entries, Some(0)).await.unwrap();
1128
+
1129
+
assert_eq!(mst.get("a").await.unwrap(), Some(test_cid(1)));
1130
+
assert_eq!(mst.get("b").await.unwrap(), Some(test_cid(2)));
1131
+
assert_eq!(mst.get("c").await.unwrap(), Some(test_cid(3)));
1132
+
assert_eq!(mst.get("d").await.unwrap(), None);
1133
+
}
1134
+
1135
+
#[tokio::test]
1136
+
async fn test_add_single_key() {
1137
+
let storage = Arc::new(MemoryBlockStore::new());
1138
+
let mst = Mst::new(storage);
1139
+
1140
+
let updated = mst.add("test/key", test_cid(1)).await.unwrap();
1141
+
1142
+
assert_eq!(updated.get("test/key").await.unwrap(), Some(test_cid(1)));
1143
+
}
1144
+
1145
+
#[tokio::test]
1146
+
async fn test_add_multiple_keys() {
1147
+
let storage = Arc::new(MemoryBlockStore::new());
1148
+
let mst = Mst::new(storage);
1149
+
1150
+
let mst = mst.add("a", test_cid(1)).await.unwrap();
1151
+
let mst = mst.add("b", test_cid(2)).await.unwrap();
1152
+
let mst = mst.add("c", test_cid(3)).await.unwrap();
1153
+
1154
+
assert_eq!(mst.get("a").await.unwrap(), Some(test_cid(1)));
1155
+
assert_eq!(mst.get("b").await.unwrap(), Some(test_cid(2)));
1156
+
assert_eq!(mst.get("c").await.unwrap(), Some(test_cid(3)));
1157
+
}
1158
+
1159
+
#[tokio::test]
1160
+
async fn test_add_replace_key() {
1161
+
let storage = Arc::new(MemoryBlockStore::new());
1162
+
let mst = Mst::new(storage);
1163
+
1164
+
let mst = mst.add("test", test_cid(1)).await.unwrap();
1165
+
let mst = mst.add("test", test_cid(2)).await.unwrap();
1166
+
1167
+
assert_eq!(mst.get("test").await.unwrap(), Some(test_cid(2)));
1168
+
}
1169
+
1170
+
#[tokio::test]
1171
+
async fn test_delete_single_key() {
1172
+
let storage = Arc::new(MemoryBlockStore::new());
1173
+
let mst = Mst::new(storage);
1174
+
1175
+
let mst = mst.add("test", test_cid(1)).await.unwrap();
1176
+
let mst = mst.delete("test").await.unwrap();
1177
+
1178
+
assert_eq!(mst.get("test").await.unwrap(), None);
1179
+
assert_eq!(mst.get_entries().await.unwrap().len(), 0);
1180
+
}
1181
+
1182
+
#[tokio::test]
1183
+
async fn test_delete_from_multi_key_tree() {
1184
+
let storage = Arc::new(MemoryBlockStore::new());
1185
+
let mst = Mst::new(storage);
1186
+
1187
+
let mst = mst.add("a", test_cid(1)).await.unwrap();
1188
+
let mst = mst.add("b", test_cid(2)).await.unwrap();
1189
+
let mst = mst.add("c", test_cid(3)).await.unwrap();
1190
+
1191
+
let mst = mst.delete("b").await.unwrap();
1192
+
1193
+
assert_eq!(mst.get("a").await.unwrap(), Some(test_cid(1)));
1194
+
assert_eq!(mst.get("b").await.unwrap(), None);
1195
+
assert_eq!(mst.get("c").await.unwrap(), Some(test_cid(3)));
1196
+
}
1197
+
1198
+
#[tokio::test]
1199
+
async fn test_delete_nonexistent_key() {
1200
+
let storage = Arc::new(MemoryBlockStore::new());
1201
+
let mst = Mst::new(storage);
1202
+
1203
+
let mst = mst.add("a", test_cid(1)).await.unwrap();
1204
+
1205
+
let result = mst.delete("b").await;
1206
+
assert!(result.is_err());
1207
+
}
1208
+
1209
+
#[tokio::test]
1210
+
async fn test_serialization_roundtrip() {
1211
+
let storage = Arc::new(MemoryBlockStore::new());
1212
+
let mst = Mst::new(storage.clone());
1213
+
1214
+
let mst = mst.add("a", test_cid(1)).await.unwrap();
1215
+
let mst = mst.add("b", test_cid(2)).await.unwrap();
1216
+
let mst = mst.add("c", test_cid(3)).await.unwrap();
1217
+
1218
+
// Persist to storage
1219
+
let cid = mst.persist().await.unwrap();
1220
+
1221
+
// Load from storage
1222
+
let reloaded = Mst::load(storage, cid, Some(0));
1223
+
1224
+
// Verify all keys are present
1225
+
assert_eq!(reloaded.get("a").await.unwrap(), Some(test_cid(1)));
1226
+
assert_eq!(reloaded.get("b").await.unwrap(), Some(test_cid(2)));
1227
+
assert_eq!(reloaded.get("c").await.unwrap(), Some(test_cid(3)));
1228
+
}
1229
+
1230
+
#[tokio::test]
1231
+
async fn test_cid_determinism() {
1232
+
// Same keys inserted in same order should produce same CID
1233
+
let storage1 = Arc::new(MemoryBlockStore::new());
1234
+
let mst1 = Mst::new(storage1);
1235
+
let mst1 = mst1.add("a", test_cid(1)).await.unwrap();
1236
+
let mst1 = mst1.add("b", test_cid(2)).await.unwrap();
1237
+
let mst1 = mst1.add("c", test_cid(3)).await.unwrap();
1238
+
let cid1 = mst1.get_pointer().await.unwrap();
1239
+
1240
+
let storage2 = Arc::new(MemoryBlockStore::new());
1241
+
let mst2 = Mst::new(storage2);
1242
+
let mst2 = mst2.add("a", test_cid(1)).await.unwrap();
1243
+
let mst2 = mst2.add("b", test_cid(2)).await.unwrap();
1244
+
let mst2 = mst2.add("c", test_cid(3)).await.unwrap();
1245
+
let cid2 = mst2.get_pointer().await.unwrap();
1246
+
1247
+
assert_eq!(cid1, cid2);
1248
+
}
1249
+
1250
+
#[tokio::test]
1251
+
async fn test_insertion_order_determinism() {
1252
+
// Different insertion orders should produce same CID
1253
+
let storage1 = Arc::new(MemoryBlockStore::new());
1254
+
let mst1 = Mst::new(storage1);
1255
+
let mst1 = mst1.add("a", test_cid(1)).await.unwrap();
1256
+
let mst1 = mst1.add("b", test_cid(2)).await.unwrap();
1257
+
let mst1 = mst1.add("c", test_cid(3)).await.unwrap();
1258
+
let cid1 = mst1.get_pointer().await.unwrap();
1259
+
1260
+
let storage2 = Arc::new(MemoryBlockStore::new());
1261
+
let mst2 = Mst::new(storage2);
1262
+
let mst2 = mst2.add("c", test_cid(3)).await.unwrap();
1263
+
let mst2 = mst2.add("a", test_cid(1)).await.unwrap();
1264
+
let mst2 = mst2.add("b", test_cid(2)).await.unwrap();
1265
+
let cid2 = mst2.get_pointer().await.unwrap();
1266
+
1267
+
assert_eq!(cid1, cid2);
1268
+
}
1269
+
1270
+
#[tokio::test]
1271
+
async fn test_batch_creates() {
1272
+
let storage = Arc::new(MemoryBlockStore::new());
1273
+
let mst = Mst::new(storage);
1274
+
1275
+
let ops = vec![
1276
+
VerifiedWriteOp::Create {
1277
+
key: SmolStr::new("a"),
1278
+
cid: test_cid(1),
1279
+
},
1280
+
VerifiedWriteOp::Create {
1281
+
key: SmolStr::new("b"),
1282
+
cid: test_cid(2),
1283
+
},
1284
+
VerifiedWriteOp::Create {
1285
+
key: SmolStr::new("c"),
1286
+
cid: test_cid(3),
1287
+
},
1288
+
];
1289
+
1290
+
let mst = mst.batch(&ops).await.unwrap();
1291
+
1292
+
assert_eq!(mst.get("a").await.unwrap(), Some(test_cid(1)));
1293
+
assert_eq!(mst.get("b").await.unwrap(), Some(test_cid(2)));
1294
+
assert_eq!(mst.get("c").await.unwrap(), Some(test_cid(3)));
1295
+
}
1296
+
1297
+
#[tokio::test]
1298
+
async fn test_batch_mixed_operations() {
1299
+
let storage = Arc::new(MemoryBlockStore::new());
1300
+
let mst = Mst::new(storage);
1301
+
1302
+
// Start with some keys
1303
+
let mst = mst.add("a", test_cid(1)).await.unwrap();
1304
+
let mst = mst.add("b", test_cid(2)).await.unwrap();
1305
+
let mst = mst.add("c", test_cid(3)).await.unwrap();
1306
+
1307
+
let ops = vec![
1308
+
VerifiedWriteOp::Create {
1309
+
key: SmolStr::new("d"),
1310
+
cid: test_cid(4),
1311
+
},
1312
+
VerifiedWriteOp::Update {
1313
+
key: SmolStr::new("a"),
1314
+
cid: test_cid(10),
1315
+
prev: test_cid(1),
1316
+
},
1317
+
VerifiedWriteOp::Delete {
1318
+
key: SmolStr::new("b"),
1319
+
prev: test_cid(2),
1320
+
},
1321
+
];
1322
+
1323
+
let mst = mst.batch(&ops).await.unwrap();
1324
+
1325
+
assert_eq!(mst.get("a").await.unwrap(), Some(test_cid(10))); // Updated
1326
+
assert_eq!(mst.get("b").await.unwrap(), None); // Deleted
1327
+
assert_eq!(mst.get("c").await.unwrap(), Some(test_cid(3))); // Unchanged
1328
+
assert_eq!(mst.get("d").await.unwrap(), Some(test_cid(4))); // Created
1329
+
}
1330
+
1331
+
#[tokio::test]
1332
+
async fn test_batch_with_prev_validation() {
1333
+
let storage = Arc::new(MemoryBlockStore::new());
1334
+
let mst = Mst::new(storage);
1335
+
let mst = mst.add("a", test_cid(1)).await.unwrap();
1336
+
1337
+
// Update with correct prev - should succeed
1338
+
let ops = vec![VerifiedWriteOp::Update {
1339
+
key: SmolStr::new("a"),
1340
+
cid: test_cid(2),
1341
+
prev: test_cid(1),
1342
+
}];
1343
+
let mst = mst.batch(&ops).await.unwrap();
1344
+
assert_eq!(mst.get("a").await.unwrap(), Some(test_cid(2)));
1345
+
1346
+
// Update with wrong prev - should fail
1347
+
let ops = vec![VerifiedWriteOp::Update {
1348
+
key: SmolStr::new("a"),
1349
+
cid: test_cid(3),
1350
+
prev: test_cid(99), // Wrong CID
1351
+
}];
1352
+
assert!(mst.batch(&ops).await.is_err());
1353
+
1354
+
// Delete with correct prev - should succeed
1355
+
let ops = vec![VerifiedWriteOp::Delete {
1356
+
key: SmolStr::new("a"),
1357
+
prev: test_cid(2),
1358
+
}];
1359
+
let mst = mst.batch(&ops).await.unwrap();
1360
+
assert_eq!(mst.get("a").await.unwrap(), None);
1361
+
}
1362
+
1363
+
#[tokio::test]
1364
+
async fn test_batch_create_duplicate_error() {
1365
+
let storage = Arc::new(MemoryBlockStore::new());
1366
+
let mst = Mst::new(storage);
1367
+
let mst = mst.add("a", test_cid(1)).await.unwrap();
1368
+
1369
+
let ops = vec![VerifiedWriteOp::Create {
1370
+
key: SmolStr::new("a"),
1371
+
cid: test_cid(2),
1372
+
}];
1373
+
1374
+
// Should error because key already exists
1375
+
assert!(mst.batch(&ops).await.is_err());
1376
+
}
1377
+
1378
+
#[tokio::test]
1379
+
async fn test_batch_update_nonexistent_error() {
1380
+
let storage = Arc::new(MemoryBlockStore::new());
1381
+
let mst = Mst::new(storage);
1382
+
1383
+
let ops = vec![VerifiedWriteOp::Update {
1384
+
key: SmolStr::new("a"),
1385
+
cid: test_cid(1),
1386
+
prev: test_cid(99), // Doesn't matter since key doesn't exist
1387
+
}];
1388
+
1389
+
// Should error because key doesn't exist
1390
+
assert!(mst.batch(&ops).await.is_err());
1391
+
}
1392
+
1393
+
#[tokio::test]
1394
+
async fn test_batch_delete_nonexistent_error() {
1395
+
let storage = Arc::new(MemoryBlockStore::new());
1396
+
let mst = Mst::new(storage);
1397
+
1398
+
let ops = vec![VerifiedWriteOp::Delete {
1399
+
key: SmolStr::new("a"),
1400
+
prev: test_cid(99), // Doesn't matter since key doesn't exist
1401
+
}];
1402
+
1403
+
// Should error because key doesn't exist
1404
+
assert!(mst.batch(&ops).await.is_err());
1405
+
}
1406
+
1407
+
#[tokio::test]
1408
+
async fn test_batch_empty() {
1409
+
let storage = Arc::new(MemoryBlockStore::new());
1410
+
let mst = Mst::new(storage);
1411
+
let mst = mst.add("a", test_cid(1)).await.unwrap();
1412
+
1413
+
let ops = vec![];
1414
+
let mst = mst.batch(&ops).await.unwrap();
1415
+
1416
+
// Should be unchanged
1417
+
assert_eq!(mst.get("a").await.unwrap(), Some(test_cid(1)));
1418
+
}
1419
+
1420
+
#[tokio::test]
1421
+
async fn test_cids_for_path_simple() {
1422
+
// Test cids_for_path with a simple flat tree
1423
+
let storage = Arc::new(MemoryBlockStore::new());
1424
+
let mst = Mst::new(storage);
1425
+
1426
+
let mst = mst.add("a", test_cid(1)).await.unwrap();
1427
+
let mst = mst.add("b", test_cid(2)).await.unwrap();
1428
+
let mst = mst.add("c", test_cid(3)).await.unwrap();
1429
+
1430
+
// Get proof path for key "b"
1431
+
let cids = mst.cids_for_path("b").await.unwrap();
1432
+
1433
+
// Should contain: root CID, record CID
1434
+
assert_eq!(cids.len(), 2);
1435
+
assert_eq!(cids[0], mst.get_pointer().await.unwrap());
1436
+
assert_eq!(cids[1], test_cid(2));
1437
+
}
1438
+
1439
+
#[tokio::test]
1440
+
async fn test_cids_for_path_nonexistent() {
1441
+
// Test cids_for_path with a key that doesn't exist
1442
+
let storage = Arc::new(MemoryBlockStore::new());
1443
+
let mst = Mst::new(storage);
1444
+
1445
+
let mst = mst.add("a", test_cid(1)).await.unwrap();
1446
+
let mst = mst.add("c", test_cid(3)).await.unwrap();
1447
+
1448
+
// Get proof path for nonexistent key "b"
1449
+
let cids = mst.cids_for_path("b").await.unwrap();
1450
+
1451
+
// Should contain root CID first, and NOT contain the record CID (proves absence)
1452
+
assert!(cids.len() >= 1, "Should have at least the root CID");
1453
+
assert_eq!(
1454
+
cids[0],
1455
+
mst.get_pointer().await.unwrap(),
1456
+
"First CID should be root"
1457
+
);
1458
+
assert!(
1459
+
!cids.contains(&test_cid(2)),
1460
+
"Should not contain nonexistent record"
1461
+
);
1462
+
}
1463
+
1464
+
#[tokio::test]
1465
+
async fn test_cids_for_path_multiple_records() {
1466
+
// Test with multiple records across different collection paths (like rsky)
1467
+
let storage = Arc::new(MemoryBlockStore::new());
1468
+
let mst = Mst::new(storage);
1469
+
1470
+
// Simulate records from different collections (app.bsky.feed.post, app.bsky.feed.like)
1471
+
let keys = vec![
1472
+
"app.bsky.feed.post/3l4qpz7ajrc2a",
1473
+
"app.bsky.feed.post/3l4qpz7bjrc2b",
1474
+
"app.bsky.feed.like/3l4qpz7cjrc2c",
1475
+
"app.bsky.feed.like/3l4qpz7djrc2d",
1476
+
"app.bsky.graph.follow/3l4qpz7ejrc2e",
1477
+
];
1478
+
1479
+
let mut mst = mst;
1480
+
for (i, key) in keys.iter().enumerate() {
1481
+
mst = mst.add(key, test_cid((i + 1) as u8)).await.unwrap();
1482
+
}
1483
+
1484
+
// Get proof for each record
1485
+
for (i, key) in keys.iter().enumerate() {
1486
+
let cids = mst.cids_for_path(key).await.unwrap();
1487
+
1488
+
// Should have root CID first
1489
+
assert_eq!(cids[0], mst.get_pointer().await.unwrap());
1490
+
1491
+
// Should have record CID last (since record exists)
1492
+
assert_eq!(*cids.last().unwrap(), test_cid((i + 1) as u8));
1493
+
1494
+
// Should have at least root + record
1495
+
assert!(cids.len() >= 2);
1496
+
}
1497
+
}
1498
+
1499
+
#[tokio::test]
1500
+
async fn test_cids_for_path_proves_nonexistence() {
1501
+
// Test that we can prove a record doesn't exist in a tree with many records
1502
+
let storage = Arc::new(MemoryBlockStore::new());
1503
+
let mst = Mst::new(storage);
1504
+
1505
+
// Add several records
1506
+
let existing_keys = vec![
1507
+
"com.example.posts/key1",
1508
+
"com.example.posts/key2",
1509
+
"com.example.posts/key4",
1510
+
"com.example.posts/key5",
1511
+
];
1512
+
1513
+
let mut mst = mst;
1514
+
for (i, key) in existing_keys.iter().enumerate() {
1515
+
mst = mst.add(key, test_cid((i + 1) as u8)).await.unwrap();
1516
+
}
1517
+
1518
+
// Prove key3 doesn't exist (between key2 and key4)
1519
+
let nonexistent_key = "com.example.posts/key3";
1520
+
let cids = mst.cids_for_path(nonexistent_key).await.unwrap();
1521
+
1522
+
// Should have root CID
1523
+
assert_eq!(cids[0], mst.get_pointer().await.unwrap());
1524
+
1525
+
// Should NOT contain a record CID for key3
1526
+
assert!(!cids.contains(&test_cid(3)));
1527
+
1528
+
// Proof is just the path showing where key3 WOULD be (proves absence)
1529
+
assert!(cids.len() >= 1);
1530
+
}
1531
+
1532
+
#[tokio::test]
1533
+
async fn test_cids_for_path_collection_structure() {
1534
+
// Test proof generation for realistic collection/rkey structure
1535
+
let storage = Arc::new(MemoryBlockStore::new());
1536
+
let mst = Mst::new(storage);
1537
+
1538
+
// Simulate a repo with multiple collections and records
1539
+
let records = vec![
1540
+
("com.atproto.repo.strongRef", "abc123", test_cid(1)),
1541
+
("app.bsky.feed.post", "post1", test_cid(2)),
1542
+
("app.bsky.feed.post", "post2", test_cid(3)),
1543
+
("app.bsky.feed.like", "like1", test_cid(4)),
1544
+
("app.bsky.graph.follow", "follow1", test_cid(5)),
1545
+
];
1546
+
1547
+
let mut mst = mst;
1548
+
for (collection, rkey, cid) in &records {
1549
+
let key = format!("{}/{}", collection, rkey);
1550
+
mst = mst.add(&key, *cid).await.unwrap();
1551
+
}
1552
+
1553
+
// Persist to storage so we have real MST blocks
1554
+
let root_cid = mst.persist().await.unwrap();
1555
+
assert_eq!(root_cid, mst.get_pointer().await.unwrap());
1556
+
1557
+
// Get proofs for each record
1558
+
for (collection, rkey, expected_cid) in &records {
1559
+
let key = format!("{}/{}", collection, rkey);
1560
+
let cids = mst.cids_for_path(&key).await.unwrap();
1561
+
1562
+
// Verify structure
1563
+
assert_eq!(cids[0], root_cid, "First CID should be root");
1564
+
assert_eq!(
1565
+
*cids.last().unwrap(),
1566
+
*expected_cid,
1567
+
"Last CID should be record"
1568
+
);
1569
+
}
1570
+
}
1571
+
}
+278
crates/jacquard-repo/src/mst/util.rs
+278
crates/jacquard-repo/src/mst/util.rs
···
1
+
//! MST utility functions
2
+
3
+
use super::node::{NodeData, NodeEntry, TreeEntry};
4
+
use crate::error::{MstError, Result};
5
+
use crate::storage::BlockStore;
6
+
use bytes::Bytes;
7
+
use cid::Cid as IpldCid;
8
+
use sha2::{Digest, Sha256};
9
+
10
+
/// Compute CID from raw bytes
11
+
///
12
+
/// Uses SHA-256 hash and DAG-CBOR codec. Assumes data is already DAG-CBOR encoded.
13
+
pub fn compute_cid(data: &[u8]) -> Result<IpldCid> {
14
+
use jacquard_common::types::crypto::{DAG_CBOR, SHA2_256};
15
+
16
+
// SHA-256 hash
17
+
let mut sha = Sha256::new();
18
+
sha.update(data);
19
+
let hash = sha.finalize().to_vec();
20
+
// Build multihash using wrap (matches rsky approach)
21
+
let mh = multihash::Multihash::<64>::wrap(SHA2_256, hash.as_slice())
22
+
.map_err(|e| MstError::InvalidNode(e.to_string()))?;
23
+
24
+
// Build CID with DAG-CBOR codec
25
+
Ok(IpldCid::new_v1(DAG_CBOR, mh))
26
+
}
27
+
28
+
/// Serialize node to DAG-CBOR and compute CID
29
+
///
30
+
/// Uses SHA-256 hash and DAG-CBOR codec.
31
+
pub fn node_to_cid(node: &NodeData) -> Result<IpldCid> {
32
+
let cbor =
33
+
serde_ipld_dagcbor::to_vec(node).map_err(|e| MstError::Serialization(Box::new(e)))?;
34
+
compute_cid(&cbor)
35
+
}
36
+
37
+
/// Calculate layer (depth) for a key based on its hash
38
+
///
39
+
/// Per atproto spec: depth = floor(leading_zero_bits / 2)
40
+
/// This gives a fanout of 4 (counting 2-bit chunks of zeros).
41
+
/// More leading zeros = deeper layer.
42
+
pub fn layer_for_key(key: &str) -> usize {
43
+
let hash = Sha256::digest(key.as_bytes());
44
+
leading_zeros(&hash) / 2
45
+
}
46
+
47
+
/// Count leading zero bits in hash
48
+
fn leading_zeros(hash: &[u8]) -> usize {
49
+
let mut count = 0;
50
+
for byte in hash {
51
+
if *byte == 0 {
52
+
count += 8;
53
+
} else {
54
+
count += byte.leading_zeros() as usize;
55
+
break;
56
+
}
57
+
}
58
+
count
59
+
}
60
+
61
+
/// Validate MST key format
62
+
///
63
+
/// Keys must match: [a-zA-Z0-9._:~-]+
64
+
/// Max length: 256 bytes (atproto limit)
65
+
pub fn validate_key(key: &str) -> Result<()> {
66
+
if key.is_empty() {
67
+
return Err(MstError::EmptyKey.into());
68
+
}
69
+
70
+
if key.len() > 256 {
71
+
return Err(MstError::KeyTooLong {
72
+
len: key.len(),
73
+
max: 256,
74
+
}
75
+
.into());
76
+
}
77
+
78
+
if !key
79
+
.chars()
80
+
.all(|c| c.is_ascii_alphanumeric() || matches!(c, '.' | '_' | ':' | '~' | '-' | '/'))
81
+
{
82
+
return Err(MstError::InvalidKeyChars {
83
+
key: key.to_string(),
84
+
}
85
+
.into());
86
+
}
87
+
88
+
Ok(())
89
+
}
90
+
91
+
/// Count shared prefix length between two strings
92
+
pub fn common_prefix_len(a: &str, b: &str) -> usize {
93
+
a.chars().zip(b.chars()).take_while(|(x, y)| x == y).count()
94
+
}
95
+
96
+
/// Serialize flat entries to wire format (with prefix compression)
97
+
///
98
+
/// Converts in-memory `Vec<NodeEntry>` to DAG-CBOR `NodeData`.
99
+
/// - First `Tree` entry → `left` pointer
100
+
/// - Each `Leaf` → entry with prefix compression
101
+
/// - `Tree` after `Leaf` → that leaf's `tree` pointer
102
+
pub fn serialize_node_data<'a, S: BlockStore + Sync + 'static>(
103
+
entries: &'a [NodeEntry<S>],
104
+
) -> std::pin::Pin<Box<dyn std::future::Future<Output = Result<NodeData>> + Send + 'a>> {
105
+
Box::pin(async move {
106
+
let mut data = NodeData {
107
+
left: None,
108
+
entries: Vec::new(),
109
+
};
110
+
111
+
let mut i = 0;
112
+
113
+
// First entry if Tree → becomes left pointer
114
+
if let Some(NodeEntry::Tree(tree)) = entries.get(0) {
115
+
data.left = Some(tree.get_pointer().await?);
116
+
i += 1;
117
+
}
118
+
119
+
// Process remaining entries
120
+
let mut last_key = String::new();
121
+
while i < entries.len() {
122
+
let entry = &entries[i];
123
+
124
+
if let NodeEntry::Leaf { key, value } = entry {
125
+
i += 1;
126
+
127
+
// Calculate prefix with last key
128
+
let prefix_len = common_prefix_len(&last_key, key.as_str());
129
+
let key_suffix = &key.as_str()[prefix_len..];
130
+
131
+
// Check for Tree after this Leaf
132
+
let tree_ptr = if let Some(NodeEntry::Tree(tree)) = entries.get(i) {
133
+
i += 1;
134
+
Some(tree.get_pointer().await?)
135
+
} else {
136
+
None
137
+
};
138
+
139
+
data.entries.push(TreeEntry {
140
+
prefix_len: prefix_len.try_into().map_err(|_| {
141
+
MstError::InvalidNode(format!(
142
+
"Prefix length {} exceeds u8::MAX",
143
+
prefix_len
144
+
))
145
+
})?,
146
+
key_suffix: Bytes::copy_from_slice(key_suffix.as_bytes()),
147
+
value: *value,
148
+
tree: tree_ptr,
149
+
});
150
+
151
+
last_key = key.as_str().to_string();
152
+
} else {
153
+
return Err(
154
+
MstError::InvalidNode("Two Trees adjacent in flat entries".into()).into(),
155
+
);
156
+
}
157
+
}
158
+
159
+
Ok(data)
160
+
})
161
+
}
162
+
163
+
/// Deserialize wire format to flat entries
164
+
///
165
+
/// Converts DAG-CBOR `NodeData` to in-memory `Vec<NodeEntry>`.
166
+
/// - `left` pointer → prepend `Tree` entry
167
+
/// - Each entry → `Leaf` with reconstructed full key
168
+
/// - `tree` pointer → append `Tree` entry
169
+
pub fn deserialize_node_data<S: BlockStore + Sync + 'static>(
170
+
storage: std::sync::Arc<S>,
171
+
data: &NodeData,
172
+
layer: Option<usize>,
173
+
) -> Result<Vec<NodeEntry<S>>> {
174
+
use crate::mst::Mst;
175
+
176
+
let mut entries = Vec::new();
177
+
178
+
// Left pointer → prepend Tree
179
+
if let Some(left_cid) = data.left {
180
+
let child_layer = layer.map(|l| if l > 0 { l - 1 } else { 0 });
181
+
entries.push(NodeEntry::Tree(Mst::load(
182
+
storage.clone(),
183
+
left_cid,
184
+
child_layer,
185
+
)));
186
+
}
187
+
188
+
// Process entries
189
+
let mut last_key = String::new();
190
+
for entry in &data.entries {
191
+
// Reconstruct full key from prefix
192
+
let key_str = std::str::from_utf8(&entry.key_suffix)
193
+
.map_err(|e| MstError::InvalidNode(format!("Invalid UTF-8 in key suffix: {}", e)))?;
194
+
let prefix_len = entry.prefix_len as usize;
195
+
let full_key = format!("{}{}", &last_key[..prefix_len], key_str);
196
+
197
+
// Append Leaf
198
+
entries.push(NodeEntry::Leaf {
199
+
key: smol_str::SmolStr::new(&full_key),
200
+
value: entry.value,
201
+
});
202
+
203
+
last_key = full_key;
204
+
205
+
// Tree pointer → append Tree
206
+
if let Some(tree_cid) = entry.tree {
207
+
let child_layer = layer.map(|l| if l > 0 { l - 1 } else { 0 });
208
+
entries.push(NodeEntry::Tree(Mst::load(
209
+
storage.clone(),
210
+
tree_cid,
211
+
child_layer,
212
+
)));
213
+
}
214
+
}
215
+
216
+
Ok(entries)
217
+
}
218
+
219
+
#[cfg(test)]
220
+
mod tests {
221
+
use super::*;
222
+
223
+
#[test]
224
+
fn test_validate_key_valid() {
225
+
assert!(validate_key("app.bsky.feed.post/abc123").is_ok());
226
+
assert!(validate_key("foo.bar/test-key_2024").is_ok());
227
+
assert!(validate_key("a").is_ok());
228
+
}
229
+
230
+
#[test]
231
+
fn test_validate_key_empty() {
232
+
assert!(validate_key("").is_err());
233
+
}
234
+
235
+
#[test]
236
+
fn test_validate_key_too_long() {
237
+
let long_key = "a".repeat(257);
238
+
assert!(validate_key(&long_key).is_err());
239
+
}
240
+
241
+
#[test]
242
+
fn test_validate_key_invalid_chars() {
243
+
assert!(validate_key("key with spaces").is_err());
244
+
assert!(validate_key("key@invalid").is_err());
245
+
assert!(validate_key("key#hash").is_err());
246
+
}
247
+
248
+
#[test]
249
+
fn test_common_prefix_len() {
250
+
assert_eq!(common_prefix_len("hello", "help"), 3);
251
+
assert_eq!(common_prefix_len("abc", "abc"), 3);
252
+
assert_eq!(common_prefix_len("abc", "def"), 0);
253
+
assert_eq!(common_prefix_len("", "test"), 0);
254
+
}
255
+
256
+
#[test]
257
+
fn test_layer_for_key() {
258
+
// Just ensure it returns a reasonable value
259
+
let layer = layer_for_key("app.bsky.feed.post/test");
260
+
assert!(layer < 256); // SHA-256 has 256 bits max
261
+
262
+
// Same key should always give same layer
263
+
let layer2 = layer_for_key("app.bsky.feed.post/test");
264
+
assert_eq!(layer, layer2);
265
+
}
266
+
267
+
#[test]
268
+
fn test_leading_zeros() {
269
+
// [0, 0, 0, 1] = 8 + 8 + 8 + 7 = 31 leading zeros
270
+
assert_eq!(leading_zeros(&[0, 0, 0, 1]), 31);
271
+
// [0xFF, ...] = 0 leading zeros (first byte has leading 1s)
272
+
assert_eq!(leading_zeros(&[0xFF, 0, 0]), 0);
273
+
// [0, 0x80] = 8 + 0 = 8 leading zeros (0x80 = 0b10000000)
274
+
assert_eq!(leading_zeros(&[0, 0x80]), 8);
275
+
// [0, 0x01] = 8 + 7 = 15 leading zeros (0x01 = 0b00000001)
276
+
assert_eq!(leading_zeros(&[0, 0x01]), 15);
277
+
}
278
+
}
+888
crates/jacquard-repo/src/repo.rs
+888
crates/jacquard-repo/src/repo.rs
···
1
+
//! High-level repository operations
2
+
//!
3
+
//! Optional convenience layer over MST primitives. Provides type-safe record operations,
4
+
//! batch writes, commit creation, and CAR export.
5
+
6
+
use crate::MstDiff;
7
+
use crate::commit::Commit;
8
+
use crate::error::Result;
9
+
use crate::mst::{Mst, WriteOp};
10
+
use crate::storage::BlockStore;
11
+
use cid::Cid as IpldCid;
12
+
use jacquard_common::IntoStatic;
13
+
use jacquard_common::types::string::{Did, Nsid, RecordKey, Tid};
14
+
use jacquard_common::types::tid::Ticker;
15
+
use std::collections::BTreeMap;
16
+
use std::path::Path;
17
+
use std::sync::Arc;
18
+
19
+
/// Commit data for repository updates
20
+
///
21
+
/// Contains signed commit and all blocks needed for persistence.
22
+
/// Follows the rsky pattern of separating commit formatting from application.
23
+
#[derive(Debug, Clone)]
24
+
pub struct CommitData {
25
+
/// Commit CID
26
+
pub cid: IpldCid,
27
+
28
+
/// New revision TID
29
+
pub rev: Tid,
30
+
31
+
/// Previous revision TID (None for initial commit)
32
+
pub since: Option<Tid>,
33
+
34
+
/// Previous commit CID (None for initial commit)
35
+
pub prev: Option<IpldCid>,
36
+
37
+
/// New MST root CID
38
+
pub data: IpldCid,
39
+
40
+
/// Previous MST root CID (for sync v1.1)
41
+
pub prev_data: Option<IpldCid>,
42
+
43
+
/// All blocks to persist (MST nodes + commit block)
44
+
///
45
+
/// Includes:
46
+
/// - All new MST node blocks from `mst.collect_blocks()`
47
+
/// - The commit block itself
48
+
pub blocks: BTreeMap<IpldCid, bytes::Bytes>,
49
+
50
+
/// Relevant blocks for firehose (sync v1.1 inductive validation)
51
+
///
52
+
/// Subset of `blocks` containing:
53
+
/// - Commit block
54
+
/// - MST node blocks along paths for all changed keys
55
+
/// - Includes "adjacent" blocks needed for operation inversion
56
+
pub relevant_blocks: BTreeMap<IpldCid, bytes::Bytes>,
57
+
}
58
+
59
+
impl CommitData {
60
+
/// Generate a firehose commit message (sync v1.1)
61
+
///
62
+
/// Converts this commit into a `FirehoseCommit` with `prev_data` field
63
+
/// and relevant blocks for inductive validation.
64
+
pub async fn to_firehose_commit(
65
+
&self,
66
+
repo: &Did<'_>,
67
+
seq: i64,
68
+
time: jacquard_common::types::string::Datetime,
69
+
ops: Vec<crate::commit::firehose::RepoOp<'static>>,
70
+
blobs: Vec<jacquard_common::types::cid::CidLink<'static>>,
71
+
) -> Result<crate::commit::firehose::FirehoseCommit<'static>> {
72
+
use jacquard_common::types::cid::CidLink;
73
+
74
+
// Convert relevant blocks to CAR format
75
+
let blocks_car =
76
+
crate::car::write_car_bytes(self.cid, self.relevant_blocks.clone()).await?;
77
+
78
+
Ok(crate::commit::firehose::FirehoseCommit {
79
+
repo: repo.clone().into_static(),
80
+
rev: self.rev.clone(),
81
+
seq,
82
+
since: self.since.clone().unwrap_or_else(|| self.rev.clone()),
83
+
time,
84
+
commit: CidLink::from(self.cid),
85
+
blocks: blocks_car.into(),
86
+
ops,
87
+
prev_data: self.prev_data.map(CidLink::from),
88
+
blobs,
89
+
too_big: false,
90
+
rebase: false,
91
+
})
92
+
}
93
+
}
94
+
95
+
/// High-level repository operations
96
+
///
97
+
/// Provides a convenient API over MST primitives for common repository workflows.
98
+
///
99
+
/// # Example
100
+
///
101
+
/// ```rust,ignore
102
+
/// use jacquard_repo::{Repository, MemoryBlockStore};
103
+
/// use jacquard_common::types::string::{Did, Nsid, RecordKey};
104
+
///
105
+
/// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
106
+
/// let storage = Arc::new(MemoryBlockStore::new());
107
+
/// let mut repo = create_test_repo(storage).await;
108
+
///
109
+
/// let collection = Nsid::new("app.bsky.feed.post")?;
110
+
/// let rkey = RecordKey::new("3l5yhcgz7y42y")?;
111
+
/// let record_cid = /* ... compute CID of record ... */;
112
+
///
113
+
/// repo.create_record(&collection, &rkey, record_cid).await?;
114
+
///
115
+
/// let did = Did::new("did:plc:example")?;
116
+
/// let signing_key = /* ... load key ... */;
117
+
/// let commit_cid = repo.commit(&did, None, &signing_key).await?;
118
+
///
119
+
/// repo.export_car("repo.car", commit_cid).await?;
120
+
/// # Ok(())
121
+
/// # }
122
+
/// ```
123
+
pub struct Repository<S: BlockStore> {
124
+
mst: Mst<S>,
125
+
storage: Arc<S>,
126
+
commit: Commit<'static>,
127
+
commit_cid: IpldCid,
128
+
}
129
+
130
+
impl<S: BlockStore + Sync + 'static> Repository<S> {
131
+
/// Create repository from existing components
132
+
///
133
+
/// Static constructor for when you already have the MST, commit, and CID.
134
+
pub fn new(storage: Arc<S>, mst: Mst<S>, commit: Commit<'static>, commit_cid: IpldCid) -> Self {
135
+
Self {
136
+
storage,
137
+
mst,
138
+
commit,
139
+
commit_cid,
140
+
}
141
+
}
142
+
143
+
/// Load repository from commit CID
144
+
pub async fn from_commit(storage: Arc<S>, commit_cid: &IpldCid) -> Result<Self> {
145
+
let commit_bytes = storage
146
+
.get(commit_cid)
147
+
.await?
148
+
.ok_or_else(|| crate::error::RepoError::not_found("commit", commit_cid))?;
149
+
150
+
let commit = Commit::from_cbor(&commit_bytes)?;
151
+
let mst_root = commit.data();
152
+
153
+
let mst = Mst::load(storage.clone(), *mst_root, None);
154
+
155
+
Ok(Self {
156
+
mst,
157
+
storage,
158
+
commit: commit.into_static(),
159
+
commit_cid: *commit_cid,
160
+
})
161
+
}
162
+
163
+
/// Get a record by collection and rkey
164
+
pub async fn get_record<T: jacquard_common::types::recordkey::RecordKeyType>(
165
+
&self,
166
+
collection: &Nsid<'_>,
167
+
rkey: &RecordKey<T>,
168
+
) -> Result<Option<IpldCid>> {
169
+
let key = format!("{}/{}", collection.as_ref(), rkey.as_ref());
170
+
self.mst.get(&key).await
171
+
}
172
+
173
+
/// Create a record (error if exists)
174
+
pub async fn create_record<T: jacquard_common::types::recordkey::RecordKeyType>(
175
+
&mut self,
176
+
collection: &Nsid<'_>,
177
+
rkey: &RecordKey<T>,
178
+
record_cid: IpldCid,
179
+
) -> Result<()> {
180
+
let key = format!("{}/{}", collection.as_ref(), rkey.as_ref());
181
+
182
+
if self.mst.get(&key).await?.is_some() {
183
+
return Err(crate::error::RepoError::already_exists("record", &key));
184
+
}
185
+
186
+
self.mst = self.mst.add(&key, record_cid).await?;
187
+
Ok(())
188
+
}
189
+
190
+
/// Update a record (error if not exists, returns previous CID)
191
+
pub async fn update_record<T: jacquard_common::types::recordkey::RecordKeyType>(
192
+
&mut self,
193
+
collection: &Nsid<'_>,
194
+
rkey: &RecordKey<T>,
195
+
record_cid: IpldCid,
196
+
) -> Result<IpldCid> {
197
+
let key = format!("{}/{}", collection.as_ref(), rkey.as_ref());
198
+
199
+
let old_cid = self
200
+
.mst
201
+
.get(&key)
202
+
.await?
203
+
.ok_or_else(|| crate::error::RepoError::not_found("record", &key))?;
204
+
205
+
self.mst = self.mst.update(&key, record_cid).await?;
206
+
Ok(old_cid)
207
+
}
208
+
209
+
/// Delete a record (error if not exists, returns deleted CID)
210
+
pub async fn delete_record<T: jacquard_common::types::recordkey::RecordKeyType>(
211
+
&mut self,
212
+
collection: &Nsid<'_>,
213
+
rkey: &RecordKey<T>,
214
+
) -> Result<IpldCid> {
215
+
let key = format!("{}/{}", collection.as_ref(), rkey.as_ref());
216
+
217
+
let old_cid = self
218
+
.mst
219
+
.get(&key)
220
+
.await?
221
+
.ok_or_else(|| crate::error::RepoError::not_found("record", &key))?;
222
+
223
+
self.mst = self.mst.delete(&key).await?;
224
+
Ok(old_cid)
225
+
}
226
+
227
+
/// Apply write operations individually (validates existence/prev)
228
+
pub async fn create_writes(&mut self, ops: &[WriteOp]) -> Result<crate::mst::MstDiff> {
229
+
let old_mst = self.mst.clone();
230
+
231
+
// Apply operations individually (add/update/delete verify existence)
232
+
for op in ops {
233
+
self.mst = match op {
234
+
WriteOp::Create { key, cid } => {
235
+
// Check doesn't exist
236
+
if self.mst.get(key.as_str()).await?.is_some() {
237
+
return Err(crate::error::RepoError::already_exists(
238
+
"record",
239
+
key.as_str(),
240
+
));
241
+
}
242
+
self.mst.add(key.as_str(), *cid).await?
243
+
}
244
+
WriteOp::Update { key, cid, prev } => {
245
+
// Check exists
246
+
let current = self.mst.get(key.as_str()).await?.ok_or_else(|| {
247
+
crate::error::RepoError::not_found("record", key.as_str())
248
+
})?;
249
+
250
+
// Validate prev if provided
251
+
if let Some(prev_cid) = prev {
252
+
if ¤t != prev_cid {
253
+
return Err(crate::error::RepoError::invalid(format!(
254
+
"Update prev CID mismatch for key {}: expected {}, got {}",
255
+
key, prev_cid, current
256
+
)));
257
+
}
258
+
}
259
+
260
+
self.mst.add(key.as_str(), *cid).await?
261
+
}
262
+
WriteOp::Delete { key, prev } => {
263
+
// Check exists
264
+
let current = self.mst.get(key.as_str()).await?.ok_or_else(|| {
265
+
crate::error::RepoError::not_found("record", key.as_str())
266
+
})?;
267
+
268
+
// Validate prev if provided
269
+
if let Some(prev_cid) = prev {
270
+
if ¤t != prev_cid {
271
+
return Err(crate::error::RepoError::invalid(format!(
272
+
"Delete prev CID mismatch for key {}: expected {}, got {}",
273
+
key, prev_cid, current
274
+
)));
275
+
}
276
+
}
277
+
278
+
self.mst.delete(key.as_str()).await?
279
+
}
280
+
};
281
+
}
282
+
283
+
old_mst.diff(&self.mst).await
284
+
}
285
+
286
+
/// Apply write operations and create a commit
287
+
///
288
+
/// Convenience method that calls `create_writes()` and `commit()`.
289
+
pub async fn apply_writes<K>(&mut self, ops: &[WriteOp], signing_key: &K) -> Result<MstDiff>
290
+
where
291
+
K: crate::commit::SigningKey,
292
+
{
293
+
let did = &self.commit.did.clone();
294
+
let cid = &self.commit_cid.clone();
295
+
let diff = self.create_writes(ops).await?;
296
+
self.commit(&did, Some(*cid), signing_key).await?;
297
+
Ok(diff)
298
+
}
299
+
300
+
/// Format a commit (create signed commit + collect blocks)
301
+
///
302
+
/// Creates signed commit and collects blocks for persistence and firehose:
303
+
/// - All MST node blocks from `mst.collect_blocks()`
304
+
/// - Commit block itself
305
+
/// - Relevant blocks for sync v1.1 (walks paths for all changed keys)
306
+
///
307
+
/// Returns `(ops, CommitData)` - ops are needed for `to_firehose_commit()`.
308
+
pub async fn format_commit<K>(
309
+
&self,
310
+
did: &Did<'_>,
311
+
prev: Option<IpldCid>,
312
+
signing_key: &K,
313
+
) -> Result<(Vec<crate::commit::firehose::RepoOp<'static>>, CommitData)>
314
+
where
315
+
K: crate::commit::SigningKey,
316
+
{
317
+
let rev = Ticker::new().next(Some(self.commit.rev.clone()));
318
+
let data = self.mst.root().await?;
319
+
let prev_data = *self.commit.data();
320
+
321
+
// Create signed commit
322
+
let commit = Commit::new_unsigned(did.clone().into_static(), data, rev.clone(), prev)
323
+
.sign(signing_key)?;
324
+
325
+
// Load previous MST to compute diff
326
+
let prev_mst = Mst::load(self.storage.clone(), prev_data, None);
327
+
let diff = prev_mst.diff(&self.mst).await?;
328
+
329
+
// Collect all MST blocks for persistence
330
+
let (_root_cid, mut blocks) = self.mst.collect_blocks().await?;
331
+
332
+
// Collect relevant blocks for firehose (walk paths for all changed keys)
333
+
let mut relevant_blocks = BTreeMap::new();
334
+
335
+
// Walk paths for creates
336
+
for (key, _cid) in &diff.creates {
337
+
let path_cids = self.mst.cids_for_path(key.as_str()).await?;
338
+
for path_cid in path_cids {
339
+
if let Some(block) = blocks.get(&path_cid) {
340
+
relevant_blocks.insert(path_cid, block.clone());
341
+
} else if let Some(block) = self.storage.get(&path_cid).await? {
342
+
relevant_blocks.insert(path_cid, block);
343
+
}
344
+
}
345
+
}
346
+
347
+
// Walk paths for updates
348
+
for (key, _new_cid, _old_cid) in &diff.updates {
349
+
let path_cids = self.mst.cids_for_path(key.as_str()).await?;
350
+
for path_cid in path_cids {
351
+
if let Some(block) = blocks.get(&path_cid) {
352
+
relevant_blocks.insert(path_cid, block.clone());
353
+
} else if let Some(block) = self.storage.get(&path_cid).await? {
354
+
relevant_blocks.insert(path_cid, block);
355
+
}
356
+
}
357
+
}
358
+
359
+
// Walk paths for deletes (path may not exist in new tree, but walk as far as possible)
360
+
for (key, _old_cid) in &diff.deletes {
361
+
let path_cids = self.mst.cids_for_path(key.as_str()).await?;
362
+
for path_cid in path_cids {
363
+
if let Some(block) = blocks.get(&path_cid) {
364
+
relevant_blocks.insert(path_cid, block.clone());
365
+
} else if let Some(block) = self.storage.get(&path_cid).await? {
366
+
relevant_blocks.insert(path_cid, block);
367
+
}
368
+
}
369
+
}
370
+
371
+
// Add commit block to both collections
372
+
let commit_cbor = commit.to_cbor()?;
373
+
let commit_cid = crate::mst::util::compute_cid(&commit_cbor)?;
374
+
let commit_bytes = bytes::Bytes::from(commit_cbor);
375
+
blocks.insert(commit_cid, commit_bytes.clone());
376
+
relevant_blocks.insert(commit_cid, commit_bytes);
377
+
378
+
// Convert diff to repository operations
379
+
let ops = diff
380
+
.to_repo_ops()
381
+
.into_iter()
382
+
.map(|op| op.into_static())
383
+
.collect();
384
+
385
+
Ok((
386
+
ops,
387
+
CommitData {
388
+
cid: commit_cid,
389
+
rev,
390
+
since: Some(self.commit.rev.clone()),
391
+
prev,
392
+
data,
393
+
prev_data: Some(prev_data),
394
+
blocks,
395
+
relevant_blocks,
396
+
},
397
+
))
398
+
}
399
+
400
+
/// Apply a commit (persist blocks to storage)
401
+
///
402
+
/// Persists all blocks from `CommitData` and updates internal state.
403
+
pub async fn apply_commit(&mut self, commit_data: CommitData) -> Result<IpldCid> {
404
+
let commit_cid = commit_data.cid;
405
+
406
+
// Persist all blocks (MST + commit)
407
+
self.storage.put_many(commit_data.blocks).await?;
408
+
409
+
// Load and update internal state
410
+
let commit_bytes = self
411
+
.storage
412
+
.get(&commit_cid)
413
+
.await?
414
+
.ok_or_else(|| crate::error::RepoError::not_found("commit block", &commit_cid))?;
415
+
let commit = Commit::from_cbor(&commit_bytes)?;
416
+
417
+
self.commit = commit.into_static();
418
+
self.commit_cid = commit_cid;
419
+
420
+
// Reload MST from new root
421
+
self.mst = Mst::load(self.storage.clone(), *self.commit.data(), None);
422
+
423
+
Ok(commit_cid)
424
+
}
425
+
426
+
/// Create a commit for the current repository state
427
+
///
428
+
/// Convenience method that calls `format_commit()` and `apply_commit()`.
429
+
pub async fn commit<K>(
430
+
&mut self,
431
+
did: &Did<'_>,
432
+
prev: Option<IpldCid>,
433
+
signing_key: &K,
434
+
) -> Result<(Vec<crate::commit::firehose::RepoOp<'static>>, IpldCid)>
435
+
where
436
+
K: crate::commit::SigningKey,
437
+
{
438
+
let (ops, commit_data) = self.format_commit(did, prev, signing_key).await?;
439
+
Ok((ops, self.apply_commit(commit_data).await?))
440
+
}
441
+
442
+
/// Export repository to CAR file
443
+
pub async fn export_car(&self, path: impl AsRef<Path>, commit_cid: IpldCid) -> Result<()> {
444
+
crate::car::export_repo_car(path, commit_cid, &self.mst).await
445
+
}
446
+
447
+
/// Get the underlying MST
448
+
pub fn mst(&self) -> &Mst<S> {
449
+
&self.mst
450
+
}
451
+
452
+
/// Get reference to the storage
453
+
pub fn storage(&self) -> &Arc<S> {
454
+
&self.storage
455
+
}
456
+
457
+
/// Get the current commit
458
+
pub fn current_commit(&self) -> &Commit<'static> {
459
+
&self.commit
460
+
}
461
+
462
+
/// Get the current commit CID
463
+
pub fn current_commit_cid(&self) -> &IpldCid {
464
+
&self.commit_cid
465
+
}
466
+
467
+
/// Get the DID from the current commit
468
+
pub fn did(&self) -> &Did<'_> {
469
+
self.commit.did()
470
+
}
471
+
}
472
+
473
+
#[cfg(test)]
474
+
mod tests {
475
+
use std::str::FromStr;
476
+
477
+
use super::*;
478
+
use crate::storage::MemoryBlockStore;
479
+
use jacquard_common::types::recordkey::Rkey;
480
+
481
+
fn make_test_cid(value: u8) -> IpldCid {
482
+
use crate::DAG_CBOR_CID_CODEC;
483
+
use jacquard_common::types::crypto::SHA2_256;
484
+
use sha2::{Digest, Sha256};
485
+
486
+
let hash = Sha256::digest(&[value]);
487
+
let mh = multihash::Multihash::wrap(SHA2_256, &hash).unwrap();
488
+
IpldCid::new_v1(DAG_CBOR_CID_CODEC, mh)
489
+
}
490
+
491
+
async fn create_test_repo(storage: Arc<MemoryBlockStore>) -> Repository<MemoryBlockStore> {
492
+
let did = Did::new("did:plc:test").unwrap();
493
+
let signing_key = k256::ecdsa::SigningKey::random(&mut rand::rngs::OsRng);
494
+
495
+
let mst = Mst::new(storage.clone());
496
+
let data = mst.persist().await.unwrap(); // Persist empty MST
497
+
498
+
let rev = Ticker::new().next(None);
499
+
let commit = Commit::new_unsigned(did.into_static(), data, rev, None)
500
+
.sign(&signing_key)
501
+
.unwrap();
502
+
503
+
let commit_cbor = commit.to_cbor().unwrap();
504
+
let commit_cid = storage.put(&commit_cbor).await.unwrap();
505
+
506
+
Repository::new(storage, mst, commit.into_static(), commit_cid)
507
+
}
508
+
509
+
#[tokio::test]
510
+
async fn test_create_and_get_record() {
511
+
let storage = Arc::new(MemoryBlockStore::new());
512
+
let mut repo = create_test_repo(storage.clone()).await;
513
+
514
+
let collection = Nsid::new("app.bsky.feed.post").unwrap();
515
+
let rkey = RecordKey(Rkey::new("abc123").unwrap());
516
+
let cid = make_test_cid(1);
517
+
518
+
repo.create_record(&collection, &rkey, cid).await.unwrap();
519
+
520
+
let retrieved = repo.get_record(&collection, &rkey).await.unwrap();
521
+
assert_eq!(retrieved, Some(cid));
522
+
}
523
+
524
+
#[tokio::test]
525
+
async fn test_create_duplicate_fails() {
526
+
let storage = Arc::new(MemoryBlockStore::new());
527
+
let mut repo = create_test_repo(storage).await;
528
+
529
+
let collection = Nsid::new("app.bsky.feed.post").unwrap();
530
+
let rkey = RecordKey(Rkey::new("abc123").unwrap());
531
+
let cid = make_test_cid(1);
532
+
533
+
repo.create_record(&collection, &rkey, cid).await.unwrap();
534
+
535
+
let result = repo
536
+
.create_record(&collection, &rkey, make_test_cid(2))
537
+
.await;
538
+
assert!(result.is_err());
539
+
}
540
+
541
+
#[tokio::test]
542
+
async fn test_update_record() {
543
+
let storage = Arc::new(MemoryBlockStore::new());
544
+
let mut repo = create_test_repo(storage).await;
545
+
546
+
let collection = Nsid::new("app.bsky.feed.post").unwrap();
547
+
let rkey = RecordKey(Rkey::new("abc123").unwrap());
548
+
let cid1 = make_test_cid(1);
549
+
let cid2 = make_test_cid(2);
550
+
551
+
repo.create_record(&collection, &rkey, cid1).await.unwrap();
552
+
553
+
let old = repo.update_record(&collection, &rkey, cid2).await.unwrap();
554
+
assert_eq!(old, cid1);
555
+
556
+
let retrieved = repo.get_record(&collection, &rkey).await.unwrap();
557
+
assert_eq!(retrieved, Some(cid2));
558
+
}
559
+
560
+
#[tokio::test]
561
+
async fn test_update_nonexistent_fails() {
562
+
let storage = Arc::new(MemoryBlockStore::new());
563
+
let mut repo = create_test_repo(storage).await;
564
+
565
+
let collection = Nsid::new("app.bsky.feed.post").unwrap();
566
+
let rkey = RecordKey(Rkey::new("abc123").unwrap());
567
+
let cid = make_test_cid(1);
568
+
569
+
let result = repo.update_record(&collection, &rkey, cid).await;
570
+
assert!(result.is_err());
571
+
}
572
+
573
+
#[tokio::test]
574
+
async fn test_delete_record() {
575
+
let storage = Arc::new(MemoryBlockStore::new());
576
+
let mut repo = create_test_repo(storage).await;
577
+
578
+
let collection = Nsid::new("app.bsky.feed.post").unwrap();
579
+
let rkey = RecordKey(Rkey::new("abc123").unwrap());
580
+
let cid = make_test_cid(1);
581
+
582
+
repo.create_record(&collection, &rkey, cid).await.unwrap();
583
+
584
+
let deleted = repo.delete_record(&collection, &rkey).await.unwrap();
585
+
assert_eq!(deleted, cid);
586
+
587
+
let retrieved = repo.get_record(&collection, &rkey).await.unwrap();
588
+
assert_eq!(retrieved, None);
589
+
}
590
+
591
+
#[tokio::test]
592
+
async fn test_delete_nonexistent_fails() {
593
+
let storage = Arc::new(MemoryBlockStore::new());
594
+
let mut repo = create_test_repo(storage).await;
595
+
596
+
let collection = Nsid::new("app.bsky.feed.post").unwrap();
597
+
let rkey = RecordKey(Rkey::new("abc123").unwrap());
598
+
599
+
let result = repo.delete_record(&collection, &rkey).await;
600
+
assert!(result.is_err());
601
+
}
602
+
603
+
#[tokio::test]
604
+
async fn test_apply_writes() {
605
+
let storage = Arc::new(MemoryBlockStore::new());
606
+
let mut repo = create_test_repo(storage).await;
607
+
608
+
let ops = vec![
609
+
WriteOp::Create {
610
+
key: "app.bsky.feed.post/abc123".into(),
611
+
cid: make_test_cid(1),
612
+
},
613
+
WriteOp::Create {
614
+
key: "app.bsky.feed.post/def456".into(),
615
+
cid: make_test_cid(2),
616
+
},
617
+
];
618
+
619
+
let diff = repo.create_writes(&ops).await.unwrap();
620
+
assert_eq!(diff.creates.len(), 2);
621
+
assert_eq!(diff.updates.len(), 0);
622
+
assert_eq!(diff.deletes.len(), 0);
623
+
}
624
+
625
+
#[tokio::test]
626
+
async fn test_from_commit() {
627
+
let storage = Arc::new(MemoryBlockStore::new());
628
+
let mut repo = create_test_repo(storage.clone()).await;
629
+
630
+
let collection = Nsid::new("app.bsky.feed.post").unwrap();
631
+
let rkey = RecordKey(Rkey::new("abc123").unwrap());
632
+
let cid = make_test_cid(1);
633
+
634
+
repo.create_record(&collection, &rkey, cid).await.unwrap();
635
+
636
+
// Persist MST
637
+
repo.mst.persist().await.unwrap();
638
+
639
+
// Create commit (need a signing key for this test)
640
+
let did = Did::new("did:plc:test").unwrap();
641
+
let signing_key = k256::ecdsa::SigningKey::random(&mut rand::rngs::OsRng);
642
+
let (_, commit_cid) = repo.commit(&did, None, &signing_key).await.unwrap();
643
+
644
+
// Load from commit
645
+
let loaded_repo = Repository::from_commit(storage, &commit_cid).await.unwrap();
646
+
647
+
let retrieved = loaded_repo.get_record(&collection, &rkey).await.unwrap();
648
+
assert_eq!(retrieved, Some(cid));
649
+
}
650
+
651
+
#[tokio::test]
652
+
async fn test_commit_creates_valid_commit() {
653
+
let storage = Arc::new(MemoryBlockStore::new());
654
+
let mut repo = create_test_repo(storage.clone()).await;
655
+
656
+
let collection = Nsid::new("app.bsky.feed.post").unwrap();
657
+
let rkey = RecordKey(Rkey::new("abc123").unwrap());
658
+
let cid = make_test_cid(1);
659
+
660
+
repo.create_record(&collection, &rkey, cid).await.unwrap();
661
+
repo.mst.persist().await.unwrap();
662
+
663
+
let did = Did::new("did:plc:test").unwrap();
664
+
let signing_key = k256::ecdsa::SigningKey::random(&mut rand::rngs::OsRng);
665
+
let (_, commit_cid) = repo.commit(&did, None, &signing_key).await.unwrap();
666
+
667
+
// Verify commit was stored
668
+
let commit_bytes = storage.get(&commit_cid).await.unwrap();
669
+
assert!(commit_bytes.is_some());
670
+
671
+
// Verify commit can be deserialized
672
+
let bytes = commit_bytes.unwrap();
673
+
let commit = Commit::from_cbor(&bytes).unwrap();
674
+
assert_eq!(commit.did().as_ref(), did.as_ref());
675
+
let root_cid = repo.mst.root().await.unwrap();
676
+
assert_eq!(commit.data(), &root_cid);
677
+
}
678
+
679
+
#[tokio::test]
680
+
async fn test_sequential_operations() {
681
+
let storage = Arc::new(MemoryBlockStore::new());
682
+
let mut repo = create_test_repo(storage.clone()).await;
683
+
684
+
let collection = Nsid::new("app.bsky.feed.post").unwrap();
685
+
let rkey = RecordKey(Rkey::new("test1").unwrap());
686
+
let cid1 = make_test_cid(1);
687
+
let cid2 = make_test_cid(2);
688
+
689
+
// Create
690
+
repo.create_record(&collection, &rkey, cid1).await.unwrap();
691
+
let got = repo.get_record(&collection, &rkey).await.unwrap();
692
+
assert_eq!(got, Some(cid1));
693
+
694
+
// Update
695
+
let old = repo.update_record(&collection, &rkey, cid2).await.unwrap();
696
+
assert_eq!(old, cid1);
697
+
let got = repo.get_record(&collection, &rkey).await.unwrap();
698
+
assert_eq!(got, Some(cid2));
699
+
700
+
// Delete
701
+
let deleted = repo.delete_record(&collection, &rkey).await.unwrap();
702
+
assert_eq!(deleted, cid2);
703
+
let got = repo.get_record(&collection, &rkey).await.unwrap();
704
+
assert!(got.is_none());
705
+
}
706
+
707
+
#[tokio::test]
708
+
async fn test_large_scale_operations() {
709
+
let storage = Arc::new(MemoryBlockStore::new());
710
+
let mut repo = create_test_repo(storage.clone()).await;
711
+
712
+
let collection = Nsid::new("app.bsky.feed.post").unwrap();
713
+
let mut ticker = Ticker::new();
714
+
715
+
// Add 100 records
716
+
let mut records = Vec::new();
717
+
for i in 0..100 {
718
+
let tid_str = ticker.next(None).into_static();
719
+
let rkey = RecordKey(Rkey::from_str(tid_str.as_str()).unwrap());
720
+
let cid = make_test_cid((i % 256) as u8);
721
+
repo.create_record(&collection, &rkey, cid).await.unwrap();
722
+
records.push((rkey, cid));
723
+
}
724
+
725
+
// Verify all records exist
726
+
for (rkey, cid) in &records {
727
+
let got = repo.get_record(&collection, rkey).await.unwrap();
728
+
assert_eq!(got, Some(*cid));
729
+
}
730
+
731
+
// Update first 20 records
732
+
for i in 0..20 {
733
+
let (rkey, _old_cid) = &records[i];
734
+
let new_cid = make_test_cid(((i + 100) % 256) as u8);
735
+
repo.update_record(&collection, rkey, new_cid)
736
+
.await
737
+
.unwrap();
738
+
739
+
let got = repo.get_record(&collection, rkey).await.unwrap();
740
+
assert_eq!(got, Some(new_cid));
741
+
}
742
+
743
+
// Delete last 20 records
744
+
for i in 80..100 {
745
+
let (rkey, cid) = &records[i];
746
+
let deleted = repo.delete_record(&collection, rkey).await.unwrap();
747
+
assert_eq!(deleted, *cid);
748
+
749
+
let got = repo.get_record(&collection, rkey).await.unwrap();
750
+
assert!(got.is_none());
751
+
}
752
+
}
753
+
754
+
#[tokio::test]
755
+
async fn test_commit_signature_verification() {
756
+
use jacquard_common::types::crypto::{KeyCodec, PublicKey};
757
+
758
+
let storage = Arc::new(MemoryBlockStore::new());
759
+
let mut repo = create_test_repo(storage.clone()).await;
760
+
761
+
let collection = Nsid::new("app.bsky.feed.post").unwrap();
762
+
let rkey = RecordKey(Rkey::new("abc123").unwrap());
763
+
let cid = make_test_cid(1);
764
+
765
+
repo.create_record(&collection, &rkey, cid).await.unwrap();
766
+
repo.mst.persist().await.unwrap();
767
+
768
+
let did = Did::new("did:plc:test").unwrap();
769
+
let signing_key = k256::ecdsa::SigningKey::random(&mut rand::rngs::OsRng);
770
+
771
+
// Get public key from signing key
772
+
let verifying_key = signing_key.verifying_key();
773
+
let pubkey_bytes = verifying_key.to_encoded_point(true).as_bytes().to_vec();
774
+
let pubkey = PublicKey {
775
+
codec: KeyCodec::Secp256k1,
776
+
bytes: pubkey_bytes.into(),
777
+
};
778
+
779
+
let (_, commit_cid) = repo.commit(&did, None, &signing_key).await.unwrap();
780
+
781
+
// Load commit and verify signature
782
+
let commit_bytes = storage.get(&commit_cid).await.unwrap().unwrap();
783
+
let commit = Commit::from_cbor(&commit_bytes).unwrap();
784
+
785
+
// Signature verification should succeed
786
+
commit.verify(&pubkey).unwrap();
787
+
}
788
+
789
+
#[tokio::test]
790
+
async fn test_load_from_storage_with_multiple_commits() {
791
+
let storage = Arc::new(MemoryBlockStore::new());
792
+
let mut repo = create_test_repo(storage.clone()).await;
793
+
794
+
let collection = Nsid::new("app.bsky.feed.post").unwrap();
795
+
let did = Did::new("did:plc:test").unwrap();
796
+
let signing_key = k256::ecdsa::SigningKey::random(&mut rand::rngs::OsRng);
797
+
798
+
// Add some records and commit
799
+
let mut records = Vec::new();
800
+
for i in 0..10 {
801
+
let rkey = RecordKey(Rkey::from_str(&format!("record{}", i)).unwrap());
802
+
let cid = make_test_cid(i as u8);
803
+
repo.create_record(&collection, &rkey, cid).await.unwrap();
804
+
records.push((rkey, cid));
805
+
}
806
+
repo.mst.persist().await.unwrap();
807
+
let (_, commit_cid) = repo
808
+
.commit(&did, Some(repo.current_commit_cid().clone()), &signing_key)
809
+
.await
810
+
.unwrap();
811
+
812
+
// Load repository from storage
813
+
let loaded_repo = Repository::from_commit(storage.clone(), &commit_cid)
814
+
.await
815
+
.unwrap();
816
+
817
+
// Verify all records are accessible
818
+
for (rkey, cid) in &records {
819
+
let got = loaded_repo.get_record(&collection, rkey).await.unwrap();
820
+
assert_eq!(got, Some(*cid));
821
+
}
822
+
823
+
// Verify metadata matches
824
+
assert_eq!(loaded_repo.did().as_ref(), did.as_ref());
825
+
assert_eq!(loaded_repo.current_commit().version, 3);
826
+
assert_eq!(loaded_repo.current_commit_cid(), &commit_cid);
827
+
}
828
+
829
+
#[tokio::test]
830
+
async fn test_batch_mixed_operations() {
831
+
let storage = Arc::new(MemoryBlockStore::new());
832
+
let mut repo = create_test_repo(storage.clone()).await;
833
+
834
+
let collection = Nsid::new("app.bsky.feed.post").unwrap();
835
+
836
+
// Pre-populate with some records
837
+
let rkey1 = RecordKey(Rkey::new("existing1").unwrap());
838
+
let rkey2 = RecordKey(Rkey::new("existing2").unwrap());
839
+
let rkey3 = RecordKey(Rkey::new("existing3").unwrap());
840
+
repo.create_record(&collection, &rkey1, make_test_cid(1))
841
+
.await
842
+
.unwrap();
843
+
repo.create_record(&collection, &rkey2, make_test_cid(2))
844
+
.await
845
+
.unwrap();
846
+
repo.create_record(&collection, &rkey3, make_test_cid(3))
847
+
.await
848
+
.unwrap();
849
+
850
+
// Batch operation: create new, update existing, delete existing
851
+
let ops = vec![
852
+
WriteOp::Create {
853
+
key: format!("{}/{}", collection.as_ref(), "new1").into(),
854
+
cid: make_test_cid(10),
855
+
},
856
+
WriteOp::Update {
857
+
key: format!("{}/{}", collection.as_ref(), "existing1").into(),
858
+
cid: make_test_cid(11),
859
+
prev: None,
860
+
},
861
+
WriteOp::Delete {
862
+
key: format!("{}/{}", collection.as_ref(), "existing2").into(),
863
+
prev: None,
864
+
},
865
+
];
866
+
867
+
let diff = repo.create_writes(&ops).await.unwrap();
868
+
assert_eq!(diff.creates.len(), 1);
869
+
assert_eq!(diff.updates.len(), 1);
870
+
assert_eq!(diff.deletes.len(), 1);
871
+
872
+
// Verify final state
873
+
let new_rkey = RecordKey(Rkey::new("new1").unwrap());
874
+
assert_eq!(
875
+
repo.get_record(&collection, &new_rkey).await.unwrap(),
876
+
Some(make_test_cid(10))
877
+
);
878
+
assert_eq!(
879
+
repo.get_record(&collection, &rkey1).await.unwrap(),
880
+
Some(make_test_cid(11))
881
+
);
882
+
assert_eq!(repo.get_record(&collection, &rkey2).await.unwrap(), None);
883
+
assert_eq!(
884
+
repo.get_record(&collection, &rkey3).await.unwrap(),
885
+
Some(make_test_cid(3))
886
+
);
887
+
}
888
+
}
+276
crates/jacquard-repo/src/storage/file.rs
+276
crates/jacquard-repo/src/storage/file.rs
···
1
+
//! CAR file-backed block storage
2
+
3
+
use std::collections::BTreeMap;
4
+
use std::path::PathBuf;
5
+
use std::sync::{Arc, RwLock};
6
+
7
+
use bytes::Bytes;
8
+
use cid::Cid as IpldCid;
9
+
10
+
use crate::error::Result;
11
+
use crate::storage::BlockStore;
12
+
13
+
/// CAR file-backed block storage
14
+
///
15
+
/// Loads entire CAR file into memory on construction, writes back on flush.
16
+
/// For very large CAR files, consider database-backed storage instead.
17
+
///
18
+
/// Primarily useful for testing and simple file-based persistence.
19
+
#[derive(Debug, Clone)]
20
+
pub struct FileBlockStore {
21
+
path: PathBuf,
22
+
blocks: Arc<RwLock<BTreeMap<IpldCid, Bytes>>>,
23
+
roots: Arc<RwLock<Vec<IpldCid>>>,
24
+
dirty: Arc<RwLock<bool>>,
25
+
}
26
+
27
+
impl FileBlockStore {
28
+
/// Load from existing CAR file
29
+
pub async fn load(path: impl Into<PathBuf>) -> Result<Self> {
30
+
let path = path.into();
31
+
32
+
// Read header to get roots
33
+
let roots = crate::car::read_car_header(&path).await?;
34
+
35
+
// Read all blocks
36
+
let blocks = crate::car::read_car(&path).await?;
37
+
38
+
Ok(Self {
39
+
path,
40
+
blocks: Arc::new(RwLock::new(blocks)),
41
+
roots: Arc::new(RwLock::new(roots)),
42
+
dirty: Arc::new(RwLock::new(false)),
43
+
})
44
+
}
45
+
46
+
/// Create new CAR file storage (empty)
47
+
///
48
+
/// Creates an empty in-memory storage that will write to the given path
49
+
/// when `flush()` is called.
50
+
///
51
+
/// The file is not created until the first flush.
52
+
pub fn new(path: impl Into<PathBuf>) -> Self {
53
+
Self {
54
+
path: path.into(),
55
+
blocks: Arc::new(RwLock::new(BTreeMap::new())),
56
+
roots: Arc::new(RwLock::new(Vec::new())),
57
+
dirty: Arc::new(RwLock::new(false)),
58
+
}
59
+
}
60
+
61
+
/// Get the CAR file roots
62
+
///
63
+
/// In a repository CAR file, roots typically contain the commit CID(s).
64
+
pub fn roots(&self) -> Vec<IpldCid> {
65
+
self.roots.read().unwrap().clone()
66
+
}
67
+
68
+
/// Set the CAR file roots (for writing)
69
+
///
70
+
/// This marks the storage as dirty. Call `flush()` to persist the change.
71
+
pub fn set_roots(&self, new_roots: Vec<IpldCid>) {
72
+
*self.roots.write().unwrap() = new_roots;
73
+
*self.dirty.write().unwrap() = true;
74
+
}
75
+
76
+
/// Write blocks back to CAR file if dirty
77
+
///
78
+
/// This is an async operation that writes the entire block store to the
79
+
/// CAR file. Only writes if there have been changes since the last flush.
80
+
///
81
+
/// # Errors
82
+
///
83
+
/// Returns an error if the CAR file cannot be written.
84
+
pub async fn flush(&self) -> Result<()> {
85
+
if !*self.dirty.read().unwrap() {
86
+
return Ok(());
87
+
}
88
+
89
+
let blocks = self.blocks.read().unwrap().clone();
90
+
let roots = self.roots.read().unwrap().clone();
91
+
crate::car::write_car(&self.path, roots, blocks).await?;
92
+
93
+
*self.dirty.write().unwrap() = false;
94
+
Ok(())
95
+
}
96
+
97
+
/// Check if store has unflushed changes
98
+
pub fn is_dirty(&self) -> bool {
99
+
*self.dirty.read().unwrap()
100
+
}
101
+
102
+
/// Get the path to the CAR file
103
+
pub fn path(&self) -> &std::path::Path {
104
+
&self.path
105
+
}
106
+
}
107
+
108
+
impl BlockStore for FileBlockStore {
109
+
async fn get(&self, cid: &IpldCid) -> Result<Option<Bytes>> {
110
+
Ok(self.blocks.read().unwrap().get(cid).cloned())
111
+
}
112
+
113
+
async fn put(&self, data: &[u8]) -> Result<IpldCid> {
114
+
let cid = crate::mst::util::compute_cid(data)?;
115
+
self.blocks
116
+
.write()
117
+
.unwrap()
118
+
.insert(cid, Bytes::copy_from_slice(data));
119
+
*self.dirty.write().unwrap() = true;
120
+
Ok(cid)
121
+
}
122
+
123
+
async fn has(&self, cid: &IpldCid) -> Result<bool> {
124
+
Ok(self.blocks.read().unwrap().contains_key(cid))
125
+
}
126
+
127
+
async fn put_many(
128
+
&self,
129
+
blocks: impl IntoIterator<Item = (IpldCid, Bytes)> + Send,
130
+
) -> Result<()> {
131
+
let mut store = self.blocks.write().unwrap();
132
+
for (cid, data) in blocks {
133
+
store.insert(cid, data);
134
+
}
135
+
*self.dirty.write().unwrap() = true;
136
+
Ok(())
137
+
}
138
+
139
+
async fn get_many(&self, cids: &[IpldCid]) -> Result<Vec<Option<Bytes>>> {
140
+
let store = self.blocks.read().unwrap();
141
+
let mut results = Vec::with_capacity(cids.len());
142
+
for cid in cids {
143
+
results.push(store.get(cid).cloned());
144
+
}
145
+
Ok(results)
146
+
}
147
+
}
148
+
149
+
#[cfg(test)]
150
+
mod tests {
151
+
use super::*;
152
+
use crate::DAG_CBOR_CID_CODEC;
153
+
use jacquard_common::types::crypto::SHA2_256;
154
+
use tempfile::NamedTempFile;
155
+
156
+
fn test_cid(n: u8) -> IpldCid {
157
+
let data = vec![n; 32];
158
+
let mh = multihash::Multihash::wrap(SHA2_256, &data).unwrap();
159
+
IpldCid::new_v1(DAG_CBOR_CID_CODEC, mh)
160
+
}
161
+
162
+
#[tokio::test]
163
+
async fn test_new_empty_store() {
164
+
let temp_file = NamedTempFile::new().unwrap();
165
+
let storage = FileBlockStore::new(temp_file.path());
166
+
167
+
assert!(storage.roots().is_empty());
168
+
assert!(!storage.is_dirty());
169
+
assert_eq!(storage.path(), temp_file.path());
170
+
}
171
+
172
+
#[tokio::test]
173
+
async fn test_put_and_get() {
174
+
let temp_file = NamedTempFile::new().unwrap();
175
+
let storage = FileBlockStore::new(temp_file.path());
176
+
177
+
let data = b"test data";
178
+
let cid = storage.put(data).await.unwrap();
179
+
180
+
assert!(storage.is_dirty());
181
+
182
+
let retrieved = storage.get(&cid).await.unwrap().unwrap();
183
+
assert_eq!(retrieved.as_ref(), data);
184
+
}
185
+
186
+
#[tokio::test]
187
+
async fn test_has() {
188
+
let temp_file = NamedTempFile::new().unwrap();
189
+
let storage = FileBlockStore::new(temp_file.path());
190
+
191
+
let data = b"test data";
192
+
let cid = storage.put(data).await.unwrap();
193
+
194
+
assert!(storage.has(&cid).await.unwrap());
195
+
assert!(!storage.has(&test_cid(99)).await.unwrap());
196
+
}
197
+
198
+
#[tokio::test]
199
+
async fn test_flush_and_reload() {
200
+
let temp_file = NamedTempFile::new().unwrap();
201
+
202
+
// Create store, add data, flush
203
+
let storage = FileBlockStore::new(temp_file.path());
204
+
let data1 = b"test data 1";
205
+
let data2 = b"test data 2";
206
+
let cid1 = storage.put(data1).await.unwrap();
207
+
let cid2 = storage.put(data2).await.unwrap();
208
+
209
+
storage.set_roots(vec![cid1]);
210
+
assert!(storage.is_dirty());
211
+
212
+
storage.flush().await.unwrap();
213
+
assert!(!storage.is_dirty());
214
+
215
+
// Reload from file
216
+
let storage2 = FileBlockStore::load(temp_file.path()).await.unwrap();
217
+
218
+
assert_eq!(storage2.roots(), vec![cid1]);
219
+
assert_eq!(storage2.get(&cid1).await.unwrap().unwrap().as_ref(), data1);
220
+
assert_eq!(storage2.get(&cid2).await.unwrap().unwrap().as_ref(), data2);
221
+
assert!(!storage2.is_dirty());
222
+
}
223
+
224
+
#[tokio::test]
225
+
async fn test_put_many() {
226
+
let temp_file = NamedTempFile::new().unwrap();
227
+
let storage = FileBlockStore::new(temp_file.path());
228
+
229
+
let data1 = Bytes::from_static(b"data 1");
230
+
let data2 = Bytes::from_static(b"data 2");
231
+
let cid1 = test_cid(1);
232
+
let cid2 = test_cid(2);
233
+
234
+
storage
235
+
.put_many(vec![(cid1, data1.clone()), (cid2, data2.clone())])
236
+
.await
237
+
.unwrap();
238
+
239
+
assert!(storage.is_dirty());
240
+
assert_eq!(storage.get(&cid1).await.unwrap().unwrap(), data1);
241
+
assert_eq!(storage.get(&cid2).await.unwrap().unwrap(), data2);
242
+
}
243
+
244
+
#[tokio::test]
245
+
async fn test_get_many() {
246
+
let temp_file = NamedTempFile::new().unwrap();
247
+
let storage = FileBlockStore::new(temp_file.path());
248
+
249
+
let data1 = b"data 1";
250
+
let data2 = b"data 2";
251
+
let cid1 = storage.put(data1).await.unwrap();
252
+
let cid2 = storage.put(data2).await.unwrap();
253
+
let cid3 = test_cid(99); // Non-existent
254
+
255
+
let results = storage.get_many(&[cid1, cid2, cid3]).await.unwrap();
256
+
257
+
assert_eq!(results.len(), 3);
258
+
assert_eq!(results[0].as_ref().unwrap().as_ref(), data1);
259
+
assert_eq!(results[1].as_ref().unwrap().as_ref(), data2);
260
+
assert!(results[2].is_none());
261
+
}
262
+
263
+
#[tokio::test]
264
+
async fn test_set_roots_marks_dirty() {
265
+
let temp_file = NamedTempFile::new().unwrap();
266
+
let storage = FileBlockStore::new(temp_file.path());
267
+
268
+
assert!(!storage.is_dirty());
269
+
270
+
storage.set_roots(vec![test_cid(1)]);
271
+
assert!(storage.is_dirty());
272
+
273
+
storage.flush().await.unwrap();
274
+
assert!(!storage.is_dirty());
275
+
}
276
+
}
+210
crates/jacquard-repo/src/storage/layered.rs
+210
crates/jacquard-repo/src/storage/layered.rs
···
1
+
//! Layered block storage for efficient firehose validation
2
+
//!
3
+
//! Provides a two-layer storage that reads from a writable layer first,
4
+
//! then falls back to a read-only base layer. All writes go to the writable layer.
5
+
//!
6
+
//! This is used for firehose validation to avoid copying the entire previous MST tree.
7
+
8
+
use crate::error::Result;
9
+
use crate::storage::BlockStore;
10
+
use bytes::Bytes;
11
+
use cid::Cid as IpldCid;
12
+
13
+
/// Layered block storage with a writable overlay and read-only base
14
+
///
15
+
/// Reads check the writable layer first, then fall back to the base layer.
16
+
/// All writes go to the writable layer only.
17
+
///
18
+
/// # Use Case
19
+
///
20
+
/// Firehose validation needs to:
21
+
/// 1. Load previous MST state from existing storage (base layer)
22
+
/// 2. Apply new operations that create new MST nodes (writable layer)
23
+
/// 3. Not pollute the base storage with temporary validation blocks
24
+
///
25
+
/// Without layering, we'd need to copy all previous MST blocks to temporary storage.
26
+
/// With layering, we just overlay temp storage on top of base storage.
27
+
///
28
+
/// # Example
29
+
///
30
+
/// ```rust,ignore
31
+
/// use jacquard_repo::storage::{LayeredBlockStore, MemoryBlockStore};
32
+
/// use std::sync::Arc;
33
+
///
34
+
/// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
35
+
/// let base = Arc::new(MemoryBlockStore::new()); // existing repo storage
36
+
/// let writable = MemoryBlockStore::new(); // temp storage for validation
37
+
///
38
+
/// let layered = LayeredBlockStore::new(writable, base);
39
+
///
40
+
/// // Reads check writable first, then base
41
+
/// // Writes only go to writable
42
+
/// # Ok(())
43
+
/// # }
44
+
/// ```
45
+
#[derive(Clone)]
46
+
pub struct LayeredBlockStore<W: BlockStore, B: BlockStore> {
47
+
writable: W,
48
+
base: std::sync::Arc<B>,
49
+
}
50
+
51
+
impl<W: BlockStore, B: BlockStore> LayeredBlockStore<W, B> {
52
+
/// Create a new layered storage
53
+
///
54
+
/// - `writable`: Top layer receiving all writes
55
+
/// - `base`: Bottom layer for fallback reads (read-only, Arc-wrapped to avoid cloning)
56
+
pub fn new(writable: W, base: std::sync::Arc<B>) -> Self {
57
+
Self { writable, base }
58
+
}
59
+
60
+
/// Get reference to the writable layer
61
+
pub fn writable(&self) -> &W {
62
+
&self.writable
63
+
}
64
+
65
+
/// Get reference to the base layer
66
+
pub fn base(&self) -> &std::sync::Arc<B> {
67
+
&self.base
68
+
}
69
+
}
70
+
71
+
impl<W: BlockStore + Sync + 'static, B: BlockStore + Sync + 'static> BlockStore
72
+
for LayeredBlockStore<W, B>
73
+
{
74
+
async fn get(&self, cid: &IpldCid) -> Result<Option<Bytes>> {
75
+
// Check writable layer first
76
+
if let Some(data) = self.writable.get(cid).await? {
77
+
return Ok(Some(data));
78
+
}
79
+
80
+
// Fall back to base layer
81
+
self.base.get(cid).await
82
+
}
83
+
84
+
async fn put(&self, data: &[u8]) -> Result<IpldCid> {
85
+
// All writes go to writable layer
86
+
self.writable.put(data).await
87
+
}
88
+
89
+
async fn has(&self, cid: &IpldCid) -> Result<bool> {
90
+
// Check writable first
91
+
if self.writable.has(cid).await? {
92
+
return Ok(true);
93
+
}
94
+
95
+
// Fall back to base
96
+
self.base.has(cid).await
97
+
}
98
+
99
+
async fn put_many(&self, blocks: impl IntoIterator<Item = (IpldCid, Bytes)> + Send) -> Result<()> {
100
+
// All writes go to writable layer
101
+
self.writable.put_many(blocks).await
102
+
}
103
+
104
+
async fn get_many(&self, cids: &[IpldCid]) -> Result<Vec<Option<Bytes>>> {
105
+
let mut results = Vec::with_capacity(cids.len());
106
+
107
+
for cid in cids {
108
+
results.push(self.get(cid).await?);
109
+
}
110
+
111
+
Ok(results)
112
+
}
113
+
}
114
+
115
+
#[cfg(test)]
116
+
mod tests {
117
+
use super::*;
118
+
use crate::storage::MemoryBlockStore;
119
+
120
+
#[tokio::test]
121
+
async fn test_layered_read_from_writable() {
122
+
let base = std::sync::Arc::new(MemoryBlockStore::new());
123
+
let writable = MemoryBlockStore::new();
124
+
125
+
// Put data in writable layer
126
+
let cid = writable.put(b"test data").await.unwrap();
127
+
128
+
let layered = LayeredBlockStore::new(writable, base);
129
+
130
+
// Should read from writable layer
131
+
let data = layered.get(&cid).await.unwrap();
132
+
assert_eq!(&*data.unwrap(), b"test data");
133
+
}
134
+
135
+
#[tokio::test]
136
+
async fn test_layered_fallback_to_base() {
137
+
let base = std::sync::Arc::new(MemoryBlockStore::new());
138
+
let writable = MemoryBlockStore::new();
139
+
140
+
// Put data in base layer
141
+
let cid = base.put(b"base data").await.unwrap();
142
+
143
+
let layered = LayeredBlockStore::new(writable, base);
144
+
145
+
// Should fall back to base layer
146
+
let data = layered.get(&cid).await.unwrap();
147
+
assert_eq!(&*data.unwrap(), b"base data");
148
+
}
149
+
150
+
#[tokio::test]
151
+
async fn test_layered_writable_overrides_base() {
152
+
let base = std::sync::Arc::new(MemoryBlockStore::new());
153
+
let writable = MemoryBlockStore::new();
154
+
155
+
// Put same content in both layers (will have same CID)
156
+
let cid = base.put(b"original").await.unwrap();
157
+
let cid2 = writable.put(b"original").await.unwrap();
158
+
assert_eq!(cid, cid2); // Same content = same CID
159
+
160
+
// Now put different content with manual override (in real usage this wouldn't happen,
161
+
// but testing the layer priority)
162
+
// Actually, we can't manually set CIDs, so let's test differently:
163
+
164
+
// Put different data in each layer
165
+
let base_cid = base.put(b"base content").await.unwrap();
166
+
let writable_cid = writable.put(b"writable content").await.unwrap();
167
+
168
+
let layered = LayeredBlockStore::new(writable, base);
169
+
170
+
// Should get writable content for writable CID
171
+
let data1 = layered.get(&writable_cid).await.unwrap().unwrap();
172
+
assert_eq!(&*data1, b"writable content");
173
+
174
+
// Should get base content for base CID
175
+
let data2 = layered.get(&base_cid).await.unwrap().unwrap();
176
+
assert_eq!(&*data2, b"base content");
177
+
}
178
+
179
+
#[tokio::test]
180
+
async fn test_layered_writes_to_writable_only() {
181
+
let base = std::sync::Arc::new(MemoryBlockStore::new());
182
+
let writable = MemoryBlockStore::new();
183
+
184
+
let layered = LayeredBlockStore::new(writable.clone(), base.clone());
185
+
186
+
// Write through layered storage
187
+
let cid = layered.put(b"new data").await.unwrap();
188
+
189
+
// Should be in writable layer
190
+
assert!(writable.has(&cid).await.unwrap());
191
+
192
+
// Should NOT be in base layer
193
+
assert!(!base.has(&cid).await.unwrap());
194
+
}
195
+
196
+
#[tokio::test]
197
+
async fn test_layered_has_checks_both_layers() {
198
+
let base = std::sync::Arc::new(MemoryBlockStore::new());
199
+
let writable = MemoryBlockStore::new();
200
+
201
+
let base_cid = base.put(b"base").await.unwrap();
202
+
let writable_cid = writable.put(b"writable").await.unwrap();
203
+
204
+
let layered = LayeredBlockStore::new(writable, base);
205
+
206
+
// Should find in both layers
207
+
assert!(layered.has(&base_cid).await.unwrap());
208
+
assert!(layered.has(&writable_cid).await.unwrap());
209
+
}
210
+
}
+210
crates/jacquard-repo/src/storage/memory.rs
+210
crates/jacquard-repo/src/storage/memory.rs
···
1
+
//! In-memory block storage implementation
2
+
3
+
use crate::error::Result;
4
+
use crate::storage::BlockStore;
5
+
use bytes::Bytes;
6
+
use cid::Cid as IpldCid;
7
+
use std::collections::BTreeMap;
8
+
use std::sync::{Arc, RwLock};
9
+
10
+
/// In-memory block storage using BTreeMap
11
+
///
12
+
/// Useful for:
13
+
/// - Testing
14
+
/// - Temporary operations
15
+
/// - Small repositories that fit in memory
16
+
///
17
+
/// Uses `Bytes` for efficient reference-counted storage with cheap cloning.
18
+
///
19
+
/// # Example
20
+
///
21
+
/// ```rust,ignore
22
+
/// use jacquard_repo::storage::{BlockStore, MemoryBlockStore};
23
+
///
24
+
/// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
25
+
/// let storage = MemoryBlockStore::new();
26
+
///
27
+
/// let data = b"hello world";
28
+
/// let cid = storage.put(data).await?;
29
+
///
30
+
/// let retrieved = storage.get(&cid).await?;
31
+
/// assert_eq!(retrieved.as_deref(), Some(&data[..]));
32
+
/// # Ok(())
33
+
/// # }
34
+
/// ```
35
+
#[derive(Debug, Clone)]
36
+
pub struct MemoryBlockStore {
37
+
blocks: Arc<RwLock<BTreeMap<IpldCid, Bytes>>>,
38
+
}
39
+
40
+
impl MemoryBlockStore {
41
+
/// Create new empty memory store
42
+
pub fn new() -> Self {
43
+
Self {
44
+
blocks: Arc::new(RwLock::new(BTreeMap::new())),
45
+
}
46
+
}
47
+
48
+
/// Create new memory store from a map of blocks
49
+
pub fn new_from_blocks(blocks: BTreeMap<IpldCid, Bytes>) -> Self {
50
+
Self {
51
+
blocks: Arc::new(RwLock::new(blocks)),
52
+
}
53
+
}
54
+
55
+
/// Get number of blocks stored
56
+
pub fn len(&self) -> usize {
57
+
self.blocks.read().unwrap().len()
58
+
}
59
+
60
+
/// Check if store is empty
61
+
pub fn is_empty(&self) -> bool {
62
+
self.blocks.read().unwrap().is_empty()
63
+
}
64
+
65
+
/// Clear all blocks
66
+
pub fn clear(&self) {
67
+
self.blocks.write().unwrap().clear();
68
+
}
69
+
70
+
/// Put a block with a pre-computed CID (for testing)
71
+
///
72
+
/// # Note
73
+
///
74
+
/// This bypasses CID verification. Only use for testing.
75
+
#[cfg(test)]
76
+
pub(crate) async fn put_with_cid(&self, cid: IpldCid, data: impl Into<Bytes>) -> Result<()> {
77
+
self.blocks.write().unwrap().insert(cid, data.into());
78
+
Ok(())
79
+
}
80
+
}
81
+
82
+
impl Default for MemoryBlockStore {
83
+
fn default() -> Self {
84
+
Self::new()
85
+
}
86
+
}
87
+
88
+
impl BlockStore for MemoryBlockStore {
89
+
async fn get(&self, cid: &IpldCid) -> Result<Option<Bytes>> {
90
+
Ok(self.blocks.read().unwrap().get(cid).cloned())
91
+
}
92
+
93
+
async fn put(&self, data: &[u8]) -> Result<IpldCid> {
94
+
let cid = crate::mst::util::compute_cid(data)?;
95
+
self.blocks
96
+
.write()
97
+
.unwrap()
98
+
.insert(cid, Bytes::copy_from_slice(data));
99
+
Ok(cid)
100
+
}
101
+
102
+
async fn has(&self, cid: &IpldCid) -> Result<bool> {
103
+
Ok(self.blocks.read().unwrap().contains_key(cid))
104
+
}
105
+
106
+
async fn put_many(&self, blocks: impl IntoIterator<Item = (IpldCid, Bytes)> + Send) -> Result<()> {
107
+
let mut store = self.blocks.write().unwrap();
108
+
for (cid, data) in blocks {
109
+
store.insert(cid, data);
110
+
}
111
+
Ok(())
112
+
}
113
+
114
+
async fn get_many(&self, cids: &[IpldCid]) -> Result<Vec<Option<Bytes>>> {
115
+
let store = self.blocks.read().unwrap();
116
+
let mut results = Vec::with_capacity(cids.len());
117
+
for cid in cids {
118
+
results.push(store.get(cid).cloned());
119
+
}
120
+
Ok(results)
121
+
}
122
+
}
123
+
124
+
#[cfg(test)]
125
+
mod tests {
126
+
use super::*;
127
+
128
+
#[tokio::test]
129
+
async fn test_put_and_get() {
130
+
let store = MemoryBlockStore::new();
131
+
let data = b"test data";
132
+
133
+
let cid = store.put(data).await.unwrap();
134
+
let retrieved = store.get(&cid).await.unwrap();
135
+
136
+
assert_eq!(retrieved.as_deref(), Some(&data[..]));
137
+
}
138
+
139
+
#[tokio::test]
140
+
async fn test_has() {
141
+
let store = MemoryBlockStore::new();
142
+
let data = b"test data";
143
+
144
+
let cid = store.put(data).await.unwrap();
145
+
assert!(store.has(&cid).await.unwrap());
146
+
147
+
let fake_cid = IpldCid::default();
148
+
assert!(!store.has(&fake_cid).await.unwrap());
149
+
}
150
+
151
+
#[tokio::test]
152
+
async fn test_put_many() {
153
+
let store = MemoryBlockStore::new();
154
+
155
+
let data1 = b"data1";
156
+
let data2 = b"data2";
157
+
let cid1 = crate::mst::util::compute_cid(data1).unwrap();
158
+
let cid2 = crate::mst::util::compute_cid(data2).unwrap();
159
+
160
+
store
161
+
.put_many(vec![
162
+
(cid1, Bytes::from_static(data1)),
163
+
(cid2, Bytes::from_static(data2)),
164
+
])
165
+
.await
166
+
.unwrap();
167
+
168
+
assert_eq!(store.len(), 2);
169
+
assert!(store.has(&cid1).await.unwrap());
170
+
assert!(store.has(&cid2).await.unwrap());
171
+
}
172
+
173
+
#[tokio::test]
174
+
async fn test_get_many() {
175
+
let store = MemoryBlockStore::new();
176
+
177
+
let data1 = b"data1";
178
+
let data2 = b"data2";
179
+
let cid1 = store.put(data1).await.unwrap();
180
+
let cid2 = store.put(data2).await.unwrap();
181
+
let fake_cid = IpldCid::default();
182
+
183
+
let results = store.get_many(&[cid1, fake_cid, cid2]).await.unwrap();
184
+
185
+
assert_eq!(results.len(), 3);
186
+
assert_eq!(results[0].as_deref(), Some(&data1[..]));
187
+
assert_eq!(results[1], None);
188
+
assert_eq!(results[2].as_deref(), Some(&data2[..]));
189
+
}
190
+
191
+
#[tokio::test]
192
+
async fn test_clear() {
193
+
let store = MemoryBlockStore::new();
194
+
store.put(b"data").await.unwrap();
195
+
196
+
assert_eq!(store.len(), 1);
197
+
store.clear();
198
+
assert_eq!(store.len(), 0);
199
+
assert!(store.is_empty());
200
+
}
201
+
202
+
#[tokio::test]
203
+
async fn test_clone_shares_storage() {
204
+
let store1 = MemoryBlockStore::new();
205
+
let store2 = store1.clone();
206
+
207
+
let cid = store1.put(b"test").await.unwrap();
208
+
assert!(store2.has(&cid).await.unwrap());
209
+
}
210
+
}
+88
crates/jacquard-repo/src/storage/mod.rs
+88
crates/jacquard-repo/src/storage/mod.rs
···
1
+
//! Block storage abstraction for MST nodes and records
2
+
3
+
use bytes::Bytes;
4
+
use cid::Cid as IpldCid;
5
+
use crate::error::Result;
6
+
7
+
/// Async block storage trait
8
+
///
9
+
/// Provides CID-keyed block storage for MST nodes, commits, and record data.
10
+
/// Implementations might use:
11
+
/// - In-memory HashMap ([`MemoryBlockStore`](memory::MemoryBlockStore))
12
+
/// - CAR file ([`FileBlockStore`](file::FileBlockStore))
13
+
/// - SQLite/RocksDB (user-provided)
14
+
/// - Remote HTTP storage (user-provided)
15
+
///
16
+
/// Clone is required so MST can share storage references across tree operations.
17
+
///
18
+
/// # WASM Compatibility
19
+
///
20
+
/// The trait uses `trait_variant` to conditionally require `Send` only on non-WASM targets,
21
+
/// allowing it to work in browser environments where `Send` is not available.
22
+
///
23
+
/// # Example
24
+
///
25
+
/// ```rust,ignore
26
+
/// use jacquard_repo::storage::{BlockStore, MemoryBlockStore};
27
+
///
28
+
/// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
29
+
/// let storage = MemoryBlockStore::new();
30
+
///
31
+
/// // Store a block
32
+
/// let data = b"hello world";
33
+
/// let cid = storage.put(data).await?;
34
+
///
35
+
/// // Retrieve it
36
+
/// if let Some(retrieved) = storage.get(&cid).await? {
37
+
/// assert_eq!(retrieved, data);
38
+
/// }
39
+
/// # Ok(())
40
+
/// # }
41
+
/// ```
42
+
#[trait_variant::make(Send)]
43
+
pub trait BlockStore: Clone {
44
+
/// Get a block by CID
45
+
///
46
+
/// Returns `None` if the block is not found.
47
+
async fn get(&self, cid: &IpldCid) -> Result<Option<Bytes>>;
48
+
49
+
/// Put a block, return its CID
50
+
///
51
+
/// The CID is calculated from the data using SHA-256 hash and DAG-CBOR codec.
52
+
/// This ensures content addressing: the same data always produces the same CID.
53
+
async fn put(&self, data: &[u8]) -> Result<IpldCid>;
54
+
55
+
/// Check if a block exists without retrieving it
56
+
///
57
+
/// This can be more efficient than `get()` for implementations that can check
58
+
/// existence without reading the full block data.
59
+
async fn has(&self, cid: &IpldCid) -> Result<bool>;
60
+
61
+
/// Put many blocks at once (optimization for batch writes)
62
+
///
63
+
/// Implementations should optimize this for batch operations where possible
64
+
/// (e.g., single transaction, bulk insert). A simple implementation can just
65
+
/// call `put()` individually.
66
+
///
67
+
/// # Note
68
+
///
69
+
/// The provided CIDs should match the data, but implementations may choose to
70
+
/// recalculate and validate them.
71
+
async fn put_many(&self, blocks: impl IntoIterator<Item = (IpldCid, Bytes)> + Send) -> Result<()>;
72
+
73
+
/// Get multiple blocks at once (optimization for batch reads)
74
+
///
75
+
/// Implementations should optimize this for batch operations where possible.
76
+
/// A simple implementation can just call `get()` individually.
77
+
///
78
+
/// Returns a vec of the same length as the input, with `None` for missing blocks.
79
+
async fn get_many(&self, cids: &[IpldCid]) -> Result<Vec<Option<Bytes>>>;
80
+
}
81
+
82
+
pub mod file;
83
+
pub mod layered;
84
+
pub mod memory;
85
+
86
+
pub use file::FileBlockStore;
87
+
pub use layered::LayeredBlockStore;
88
+
pub use memory::MemoryBlockStore;
+96
crates/jacquard-repo/tests/fixtures/commit_proof.json
+96
crates/jacquard-repo/tests/fixtures/commit_proof.json
···
1
+
[
2
+
{
3
+
"comment": "two deep split",
4
+
"leafValue": "bafyreie5cvv4h45feadgeuwhbcutmh6t2ceseocckahdoe6uat64zmz454",
5
+
"keys": [
6
+
"A0/501344",
7
+
"B1/293486",
8
+
"C0/535043",
9
+
"E0/922708",
10
+
"F1/415452",
11
+
"G0/714257"
12
+
],
13
+
"adds": ["D2/915466"],
14
+
"dels": [],
15
+
"rootBeforeCommit": "bafyreibthlzzn3rwvmomwf4dz6utt7yeh5eyn6qwbumvjfv35gwanh7ovq",
16
+
"rootAfterCommit": "bafyreidb6bxxylhmlzs4a6ruhcunv3fd32o6i5phlzkmjk6arletj2ua6a",
17
+
"blocksInProof": [
18
+
"bafyreidb6bxxylhmlzs4a6ruhcunv3fd32o6i5phlzkmjk6arletj2ua6a",
19
+
"bafyreifjsxnultnc3tbvnrawqpmrk6d76ymcstwcr5e3hn6u472nasb2xq",
20
+
"bafyreibzch5k5j5xkg6dcwmur2p6jqwavyjhdtvifr6g2gnccwhixibzsi",
21
+
"bafyreiamcu5ud3j4ovclrgq2sdyev5oajsmpnl2fdu5ffgpfint64n2jme",
22
+
"bafyreidxvw3sbdg4t5b2mbtozitnyu7kjien2zcrtgdj4ssgmjb72mzawe"
23
+
]
24
+
},
25
+
{
26
+
"comment": "two deep leafless split",
27
+
"leafValue": "bafyreie5cvv4h45feadgeuwhbcutmh6t2ceseocckahdoe6uat64zmz454",
28
+
"keys": ["A0/501344", "B0/436099", "D0/360671", "E0/922708"],
29
+
"adds": ["C2/953910"],
30
+
"dels": [],
31
+
"rootBeforeCommit": "bafyreid7jnvjg7mr4akmyf7rtaz47duex2l47rz36nvs4i7yjnpuhfmehe",
32
+
"rootAfterCommit": "bafyreih2ry5gae5r4m47unhhuw4w2qjdhe6oprw3w2uico2tzbflwi74eu",
33
+
"blocksInProof": [
34
+
"bafyreih2ry5gae5r4m47unhhuw4w2qjdhe6oprw3w2uico2tzbflwi74eu",
35
+
"bafyreiag5ata5gtynbpef26l4kus2uz4nshuo526h275oljwlm5dwsvhqm",
36
+
"bafyreiaybgpm7ahyiy7fko7c4czjokhzajvimot6lfi6mxqzw2bzwoddn4",
37
+
"bafyreiheqxxydll4b4zlyemmegb7q3chs7aacczuotpxkqils6bufnsyse",
38
+
"bafyreigkijiuasyl5x4f2j3kxzou2vsdyc3vockx63r6bvgoip4ybhj2sa"
39
+
]
40
+
},
41
+
{
42
+
"comment": "add on edge with neighbor two layers down",
43
+
"leafValue": "bafyreie5cvv4h45feadgeuwhbcutmh6t2ceseocckahdoe6uat64zmz454",
44
+
"keys": ["A0/501344", "B2/303249", "C0/535043"],
45
+
"adds": ["D2/915466"],
46
+
"dels": [],
47
+
"rootBeforeCommit": "bafyreifoy7ierkqljk37wozudqhqjuuahjnubqvd3qprx5ocwcfrx5v3hm",
48
+
"rootAfterCommit": "bafyreid2i3nxmsvv3ifb53nlkjh3qaymygrrxuno6z22gctzdme5lbptky",
49
+
"blocksInProof": [
50
+
"bafyreid2i3nxmsvv3ifb53nlkjh3qaymygrrxuno6z22gctzdme5lbptky",
51
+
"bafyreiagiwrefvm27hvgryirykp7reqcpz56v6txzksgbargjlibtpsqwu",
52
+
"bafyreiewdvzcopoza6bdntvhmvdfqeolql6sckkiu75jpvfnwwnfi57jia"
53
+
]
54
+
},
55
+
{
56
+
"comment": "merge and split in multi-op commit",
57
+
"leafValue": "bafyreie5cvv4h45feadgeuwhbcutmh6t2ceseocckahdoe6uat64zmz454",
58
+
"keys": ["A0/501344", "B2/303249", "D2/915466", "E0/922708"],
59
+
"adds": ["C2/953910"],
60
+
"dels": ["B2/303249", "D2/915466"],
61
+
"rootBeforeCommit": "bafyreielnllkafudlseizljjx32rkkivlgxziqayhctgbxncw2srrox7ny",
62
+
"rootAfterCommit": "bafyreih6464tr7ue67qgllhiekgfmwiz45zuthrv72gwi2tjpuu5dbxt3a",
63
+
"blocksInProof": [
64
+
"bafyreih6464tr7ue67qgllhiekgfmwiz45zuthrv72gwi2tjpuu5dbxt3a",
65
+
"bafyreihexby6fnhajsjzzqkmegqpqt2lrr3rpesyl6kt3t3xppid7tuvfy",
66
+
"bafyreiciix65xuk62hu6ew6jdy3m2swqstvnuhuwcwffidk3nduf7eaoh4",
67
+
"bafyreieneexkszoung4zc5jzkjukjbbxm74ukz6mylydj7q2v42zqp6vmy",
68
+
"bafyreidxvw3sbdg4t5b2mbtozitnyu7kjien2zcrtgdj4ssgmjb72mzawe"
69
+
]
70
+
},
71
+
{
72
+
"comment": "complex multi-op commit",
73
+
"leafValue": "bafyreie5cvv4h45feadgeuwhbcutmh6t2ceseocckahdoe6uat64zmz454",
74
+
"keys": [
75
+
"B0/436099",
76
+
"C2/953910",
77
+
"D0/360671",
78
+
"E2/413113",
79
+
"F0/606463",
80
+
"H0/740256"
81
+
],
82
+
"adds": ["A2/239654", "G2/536869"],
83
+
"dels": ["C2/953910"],
84
+
"rootBeforeCommit": "bafyreiej4jqggfhidabjfrjgogdwed5eglhnboepxscbwfrss4uclnrrmi",
85
+
"rootAfterCommit": "bafyreifykpu67c4w4ynkx4lvjfjwxdofax6gx7j2wxrl6ewt3yslezcb6i",
86
+
"blocksInProof": [
87
+
"bafyreifykpu67c4w4ynkx4lvjfjwxdofax6gx7j2wxrl6ewt3yslezcb6i",
88
+
"bafyreig5pe2hdnhfbqleo6yyipkw3tdiju7tlm4sqp7btsiicxe4tex5de",
89
+
"bafyreievjgro75jk6ma3xwuqvalsydtzgvbbaduhazbvajvslaf3l6kcxu",
90
+
"bafyreieax6243224jnbout6ynursux2dvt6fabonofdu47dxupkxvmflvu",
91
+
"bafyreie44qmlnwlyeh6ubb2eocfko6st7gmbarplmcci6c7ilx24vh4iym",
92
+
"bafyreihlhqn4quwcgbum5g4wzkini2c42j7zi5dsjdgkzm55jxyvebndue",
93
+
"bafyreiggcbzkb2wgenvyfhkh2nggf7pohb7uzjm6bs7hixhjxw2xpmnq6u"
94
+
]
95
+
}
96
+
]
+15
crates/jacquard-repo/tests/fixtures/common_prefix.json
+15
crates/jacquard-repo/tests/fixtures/common_prefix.json
···
1
+
[
2
+
{"left": "", "right": "", "len": 0},
3
+
{"left": "abc", "right": "abc", "len": 3},
4
+
{"left": "", "right": "abc", "len": 0},
5
+
{"left": "abc", "right": "", "len": 0},
6
+
{"left": "ab", "right": "abc", "len": 2},
7
+
{"left": "abc", "right": "ab", "len": 2},
8
+
{"left": "abcde", "right": "abc", "len": 3},
9
+
{"left": "abc", "right": "abcde", "len": 3},
10
+
{"left": "abcde", "right": "abc1", "len": 3},
11
+
{"left": "abcde", "right": "abb", "len": 2},
12
+
{"left": "abcde", "right": "qbb", "len": 0},
13
+
{"left": "abc", "right": "abc\u0000", "len": 3},
14
+
{"left": "abc\u0000", "right": "abc", "len": 3}
15
+
]
+81
crates/jacquard-repo/tests/fixtures/example_keys.txt
+81
crates/jacquard-repo/tests/fixtures/example_keys.txt
···
1
+
A0/501344
2
+
A1/700567
3
+
A2/239654
4
+
A3/570745
5
+
A4/231700
6
+
A5/343219
7
+
B0/436099
8
+
B1/293486
9
+
B2/303249
10
+
B3/690557
11
+
B4/522003
12
+
B5/528640
13
+
C0/535043
14
+
C1/970596
15
+
C2/953910
16
+
C3/016643
17
+
C4/687126
18
+
C5/136391
19
+
D0/360671
20
+
D1/637976
21
+
D2/915466
22
+
D3/722333
23
+
D4/816246
24
+
D5/611412
25
+
E0/922708
26
+
E1/710014
27
+
E2/413113
28
+
E3/226890
29
+
E4/349347
30
+
E5/574759
31
+
F0/606463
32
+
F1/415452
33
+
F2/410478
34
+
F3/000172
35
+
F4/438093
36
+
F5/131765
37
+
G0/714257
38
+
G1/254594
39
+
G2/536869
40
+
G3/188348
41
+
G4/627086
42
+
G5/436727
43
+
H0/740256
44
+
H1/113887
45
+
H2/783135
46
+
H3/911996
47
+
H4/413212
48
+
H5/205035
49
+
I0/123247
50
+
I1/186251
51
+
I2/455864
52
+
I3/874267
53
+
I4/700662
54
+
I5/355687
55
+
J0/651505
56
+
J1/747356
57
+
J2/880562
58
+
J3/337247
59
+
J4/333302
60
+
J5/802321
61
+
K0/513509
62
+
K1/512199
63
+
K2/998695
64
+
K3/030175
65
+
K4/843537
66
+
K5/621841
67
+
L0/110539
68
+
L1/902119
69
+
L2/433601
70
+
L3/578589
71
+
L4/179159
72
+
L5/411430
73
+
M0/233209
74
+
M1/807305
75
+
M2/593452
76
+
M3/412948
77
+
M4/230935
78
+
M5/340624
79
+
N0/719700
80
+
N1/322330
81
+
N2/554
+37
crates/jacquard-repo/tests/fixtures/gen_keys.py
+37
crates/jacquard-repo/tests/fixtures/gen_keys.py
···
1
+
#!/usr/bin/env python3
2
+
3
+
"""
4
+
Helper script to output MST keys with different letter prefixes, at given heights. Eg:
5
+
6
+
A0/asdf - at MST height 0
7
+
"""
8
+
9
+
import hashlib
10
+
import random
11
+
12
+
def height(key):
13
+
h = hashlib.sha256(key).hexdigest()
14
+
i = 0
15
+
for c in h:
16
+
if c > '4':
17
+
return i*2
18
+
if c != '0':
19
+
return i*2+1
20
+
i = i+1
21
+
raise Exception("very suss")
22
+
23
+
def rand_key(letter, level):
24
+
num = random.randint(0, 999999)
25
+
return f"{letter}{level}/{num:06}".encode("utf8")
26
+
27
+
def gen_key(letter, level):
28
+
while True:
29
+
key = rand_key(letter, level)
30
+
if height(key) == level:
31
+
print(key.decode("utf-8"))
32
+
return
33
+
34
+
if __name__=="__main__":
35
+
for letter in "ABCDEFGHIJKLMNOPQRSTUVWXYZ":
36
+
for level in [0,1,2,3,4,5]:
37
+
gen_key(letter, level)
+11
crates/jacquard-repo/tests/fixtures/key_heights.json
+11
crates/jacquard-repo/tests/fixtures/key_heights.json
···
1
+
[
2
+
{"key": "", "height": 0},
3
+
{"key": "asdf", "height": 0},
4
+
{"key": "blue", "height": 1},
5
+
{"key": "2653ae71", "height": 0},
6
+
{"key": "88bfafc7", "height": 2},
7
+
{"key": "2a92d355", "height": 4},
8
+
{"key": "884976f5", "height": 6},
9
+
{"key": "app.bsky.feed.post/454397e440ec", "height": 4},
10
+
{"key": "app.bsky.feed.post/9adeb165882c", "height": 8}
11
+
]
+1314
crates/jacquard-repo/tests/interop.rs
+1314
crates/jacquard-repo/tests/interop.rs
···
1
+
//! Interoperability tests using test vectors from atproto-interop-tests
2
+
//!
3
+
//! See: https://github.com/bluesky-social/atproto-interop-tests/tree/main/mst
4
+
//!
5
+
//! ## Current Status (Determinism Bug)
6
+
//!
7
+
//! ### Fixed Issues
8
+
//! 1. **split_around bug**: When split_idx=0 (all entries go right), node.left wasn't being split
9
+
//! - Fixed in tree.rs:640-649 by checking node.left when left_entries is empty
10
+
//! 2. **insert_entry_at_layer bug**: When insert_idx=0 with node.left, left pointer wasn't split
11
+
//! - Fixed in tree.rs:520-565 by splitting node.left around new key
12
+
//!
13
+
//! ### Remaining Issues
14
+
//! - **Non-deterministic tree structure**: Forward vs reverse insertion produces different root CIDs
15
+
//! - All keys are retrievable (no corruption)
16
+
//! - But tree structure differs (different node layouts/pointers)
17
+
//! - Fails even with just 10 keys (test_first_10_keys_determinism)
18
+
//!
19
+
//! ### Next Steps
20
+
//! - Compare tree architecture with rsky-repo and atproto implementations
21
+
//! - May need to restructure how nodes/splits are handled to match reference implementations
22
+
//! - Possible issues: recompress logic, subtree attachment, split handling at different layers
23
+
24
+
use std::sync::Arc;
25
+
26
+
use jacquard_common::types::crypto::SHA2_256;
27
+
use jacquard_repo::DAG_CBOR_CID_CODEC;
28
+
use jacquard_repo::mst::tree::{Mst, VerifiedWriteOp};
29
+
use jacquard_repo::mst::util::{common_prefix_len, layer_for_key};
30
+
use jacquard_repo::storage::BlockStore;
31
+
use jacquard_repo::storage::memory::MemoryBlockStore;
32
+
use rand::Rng;
33
+
use serde::Deserialize;
34
+
35
+
/// Test helper: Generate a random key at a specific layer
36
+
///
37
+
/// Reimplementation of gen_keys.py from atproto-interop-tests for Rust tests.
38
+
/// Generates keys like "A0/123456" that hash to a specific MST layer.
39
+
fn gen_key_at_layer(letter: char, layer: usize) -> String {
40
+
let mut rng = rand::thread_rng();
41
+
loop {
42
+
let num: u32 = rng.gen_range(0..1_000_000);
43
+
let key = format!("{}{}/{:06}", letter, layer, num);
44
+
if layer_for_key(&key) == layer {
45
+
return key;
46
+
}
47
+
}
48
+
}
49
+
50
+
#[derive(Debug, Deserialize)]
51
+
struct CommonPrefixTest {
52
+
left: String,
53
+
right: String,
54
+
len: usize,
55
+
}
56
+
57
+
#[derive(Debug, Deserialize)]
58
+
struct KeyHeightTest {
59
+
key: String,
60
+
height: usize,
61
+
}
62
+
63
+
#[test]
64
+
fn test_common_prefix_interop() {
65
+
let json = include_str!("fixtures/common_prefix.json");
66
+
let tests: Vec<CommonPrefixTest> = serde_ipld_dagjson::from_slice(json.as_bytes()).unwrap();
67
+
68
+
for test in tests {
69
+
let result = common_prefix_len(&test.left, &test.right);
70
+
assert_eq!(
71
+
result, test.len,
72
+
"common_prefix_len({:?}, {:?}) = {}, expected {}",
73
+
test.left, test.right, result, test.len
74
+
);
75
+
}
76
+
}
77
+
78
+
#[test]
79
+
fn test_layer_for_key_interop() {
80
+
let json = include_str!("fixtures/key_heights.json");
81
+
let tests: Vec<KeyHeightTest> = serde_ipld_dagjson::from_slice(json.as_bytes()).unwrap();
82
+
83
+
for test in tests {
84
+
if test.key.is_empty() {
85
+
// Empty key is invalid, skip
86
+
continue;
87
+
}
88
+
89
+
let result = layer_for_key(&test.key);
90
+
assert_eq!(
91
+
result, test.height,
92
+
"layer_for_key({:?}) = {}, expected {}",
93
+
test.key, result, test.height
94
+
);
95
+
}
96
+
}
97
+
98
+
#[tokio::test]
99
+
async fn test_example_keys_tree_ops() {
100
+
// Load example keys
101
+
let keys_txt = include_str!("fixtures/example_keys.txt");
102
+
let keys: Vec<&str> = keys_txt.lines().collect();
103
+
104
+
let storage = Arc::new(MemoryBlockStore::new());
105
+
let mut mst = Mst::new(storage);
106
+
107
+
// Helper to create test CIDs
108
+
fn test_cid(n: u8) -> cid::Cid {
109
+
let data = vec![n; 32];
110
+
let mh = multihash::Multihash::wrap(SHA2_256, &data).unwrap();
111
+
cid::Cid::new_v1(DAG_CBOR_CID_CODEC, mh)
112
+
}
113
+
114
+
// Add all keys
115
+
for (i, &key) in keys.iter().enumerate() {
116
+
mst = mst.add(key, test_cid(i as u8)).await.unwrap();
117
+
}
118
+
119
+
// Verify all keys can be retrieved
120
+
for (i, &key) in keys.iter().enumerate() {
121
+
let retrieved = mst.get(key).await.unwrap();
122
+
assert_eq!(
123
+
retrieved,
124
+
Some(test_cid(i as u8)),
125
+
"Failed to retrieve key: {}",
126
+
key
127
+
);
128
+
}
129
+
130
+
// Delete half the keys
131
+
for (i, &key) in keys.iter().enumerate() {
132
+
if i % 2 == 0 {
133
+
mst = mst.delete(key).await.unwrap();
134
+
}
135
+
}
136
+
137
+
// Verify deleted keys are gone and remaining keys still exist
138
+
for (i, &key) in keys.iter().enumerate() {
139
+
let retrieved = mst.get(key).await.unwrap();
140
+
if i % 2 == 0 {
141
+
assert_eq!(retrieved, None, "Key should be deleted: {}", key);
142
+
} else {
143
+
assert_eq!(
144
+
retrieved,
145
+
Some(test_cid(i as u8)),
146
+
"Key should still exist: {}",
147
+
key
148
+
);
149
+
}
150
+
}
151
+
}
152
+
153
+
#[tokio::test]
154
+
async fn test_determinism_with_example_keys() {
155
+
// Tree structure should be deterministic regardless of insertion order
156
+
let keys_txt = include_str!("fixtures/example_keys.txt");
157
+
let keys: Vec<&str> = keys_txt.lines().filter(|s| !s.is_empty()).collect();
158
+
159
+
fn test_cid(n: u8) -> cid::Cid {
160
+
let data = vec![n; 32];
161
+
let mh = multihash::Multihash::wrap(SHA2_256, &data).unwrap();
162
+
cid::Cid::new_v1(DAG_CBOR_CID_CODEC, mh)
163
+
}
164
+
165
+
// Build tree in forward order
166
+
let storage1 = Arc::new(MemoryBlockStore::new());
167
+
let mut mst1 = Mst::new(storage1);
168
+
for (i, &key) in keys.iter().enumerate() {
169
+
mst1 = mst1.add(key, test_cid(i as u8)).await.unwrap();
170
+
}
171
+
172
+
// Build tree in reverse order
173
+
let storage2 = Arc::new(MemoryBlockStore::new());
174
+
let mut mst2 = Mst::new(storage2);
175
+
for (i, &key) in keys.iter().rev().enumerate() {
176
+
let idx = keys.len() - 1 - i;
177
+
mst2 = mst2.add(key, test_cid(idx as u8)).await.unwrap();
178
+
}
179
+
180
+
// Check if all keys are retrievable from both trees
181
+
let mut missing_in_1 = Vec::new();
182
+
let mut missing_in_2 = Vec::new();
183
+
184
+
for key in keys.iter() {
185
+
let v1 = mst1.get(key).await.unwrap();
186
+
let v2 = mst2.get(key).await.unwrap();
187
+
188
+
if v1.is_none() {
189
+
missing_in_1.push(key);
190
+
}
191
+
if v2.is_none() {
192
+
missing_in_2.push(key);
193
+
}
194
+
}
195
+
196
+
if !missing_in_1.is_empty() {
197
+
eprintln!("Missing in mst1 ({} keys):", missing_in_1.len());
198
+
for key in missing_in_1.iter().take(5) {
199
+
eprintln!(" {}", key);
200
+
}
201
+
}
202
+
203
+
if !missing_in_2.is_empty() {
204
+
eprintln!("Missing in mst2 ({} keys):", missing_in_2.len());
205
+
for key in missing_in_2.iter().take(5) {
206
+
eprintln!(" {}", key);
207
+
}
208
+
}
209
+
210
+
eprintln!("Keys missing in mst1: {}", missing_in_1.len());
211
+
eprintln!("Keys missing in mst2: {}", missing_in_2.len());
212
+
213
+
// Root CIDs should match
214
+
eprintln!("mst1 root: {:?}", mst1.root().await.unwrap());
215
+
eprintln!("mst2 root: {:?}", mst2.root().await.unwrap());
216
+
217
+
assert_eq!(
218
+
mst1.root().await.unwrap(),
219
+
mst2.root().await.unwrap(),
220
+
"Tree structure should be deterministic"
221
+
);
222
+
}
223
+
224
+
#[tokio::test]
225
+
async fn test_minimal_determinism() {
226
+
// Minimal test with just a few keys
227
+
let keys = vec!["A0/501344", "A1/700567", "B0/436099"];
228
+
229
+
fn test_cid(n: u8) -> cid::Cid {
230
+
let data = vec![n; 32];
231
+
let mh = multihash::Multihash::wrap(SHA2_256, &data).unwrap();
232
+
cid::Cid::new_v1(DAG_CBOR_CID_CODEC, mh)
233
+
}
234
+
235
+
// Build tree in forward order
236
+
let storage1 = Arc::new(MemoryBlockStore::new());
237
+
let mut mst1 = Mst::new(storage1);
238
+
for (i, &key) in keys.iter().enumerate() {
239
+
println!("MST1: Adding {}", key);
240
+
mst1 = mst1.add(key, test_cid(i as u8)).await.unwrap();
241
+
}
242
+
243
+
// Build tree in reverse order
244
+
let storage2 = Arc::new(MemoryBlockStore::new());
245
+
let mut mst2 = Mst::new(storage2);
246
+
for (i, &key) in keys.iter().rev().enumerate() {
247
+
let idx = keys.len() - 1 - i;
248
+
println!("MST2: Adding {}", key);
249
+
mst2 = mst2.add(key, test_cid(idx as u8)).await.unwrap();
250
+
}
251
+
252
+
// Check if all keys exist in both trees
253
+
for key in keys.iter() {
254
+
let v1 = mst1.get(key).await.unwrap();
255
+
let v2 = mst2.get(key).await.unwrap();
256
+
println!(
257
+
"Key {}: mst1={:?}, mst2={:?}",
258
+
key,
259
+
v1.is_some(),
260
+
v2.is_some()
261
+
);
262
+
assert_eq!(v1.is_some(), v2.is_some(), "Key {} mismatch", key);
263
+
}
264
+
265
+
// Root CIDs should match
266
+
println!("mst1 root: {:?}", mst1.root().await.unwrap());
267
+
println!("mst2 root: {:?}", mst2.root().await.unwrap());
268
+
269
+
// Trees should be identical
270
+
assert_eq!(
271
+
mst1.root().await.unwrap(),
272
+
mst2.root().await.unwrap(),
273
+
"Tree structure should be deterministic"
274
+
);
275
+
}
276
+
277
+
#[tokio::test]
278
+
async fn test_first_10_keys_determinism() {
279
+
// Test first 10 keys from example_keys.txt
280
+
let keys_txt = include_str!("fixtures/example_keys.txt");
281
+
let keys: Vec<&str> = keys_txt
282
+
.lines()
283
+
.filter(|s| !s.is_empty())
284
+
.take(10)
285
+
.collect();
286
+
287
+
fn test_cid(n: u8) -> cid::Cid {
288
+
let data = vec![n; 32];
289
+
let mh = multihash::Multihash::wrap(SHA2_256, &data).unwrap();
290
+
cid::Cid::new_v1(DAG_CBOR_CID_CODEC, mh)
291
+
}
292
+
293
+
let storage1 = Arc::new(MemoryBlockStore::new());
294
+
let mut mst1 = Mst::new(storage1);
295
+
for (i, &key) in keys.iter().enumerate() {
296
+
mst1 = mst1.add(key, test_cid(i as u8)).await.unwrap();
297
+
}
298
+
299
+
let storage2 = Arc::new(MemoryBlockStore::new());
300
+
let mut mst2 = Mst::new(storage2);
301
+
for (i, &key) in keys.iter().rev().enumerate() {
302
+
let idx = keys.len() - 1 - i;
303
+
mst2 = mst2.add(key, test_cid(idx as u8)).await.unwrap();
304
+
}
305
+
306
+
// Check all keys present
307
+
for &key in &keys {
308
+
assert!(mst1.get(key).await.unwrap().is_some());
309
+
assert!(mst2.get(key).await.unwrap().is_some());
310
+
}
311
+
312
+
eprintln!("mst1 root: {:?}", mst1.root().await.unwrap());
313
+
eprintln!("mst2 root: {:?}", mst2.root().await.unwrap());
314
+
315
+
assert_eq!(
316
+
mst1.root().await.unwrap(),
317
+
mst2.root().await.unwrap(),
318
+
"Tree structure should be deterministic"
319
+
);
320
+
}
321
+
322
+
#[tokio::test]
323
+
async fn test_minimal_corruption_case() {
324
+
// Minimal reproduction of the corruption bug
325
+
let storage = Arc::new(MemoryBlockStore::new());
326
+
let mut mst = Mst::new(storage);
327
+
328
+
fn test_cid(n: u8) -> cid::Cid {
329
+
let data = vec![n; 32];
330
+
let mh = multihash::Multihash::wrap(SHA2_256, &data).unwrap();
331
+
cid::Cid::new_v1(DAG_CBOR_CID_CODEC, mh)
332
+
}
333
+
334
+
// Add N0 (layer 0) first
335
+
println!("Adding N0/719700 (layer {})", layer_for_key("N0/719700"));
336
+
mst = mst.add("N0/719700", test_cid(0)).await.unwrap();
337
+
338
+
// Verify N0 is retrievable
339
+
assert!(
340
+
mst.get("N0/719700").await.unwrap().is_some(),
341
+
"N0 should exist after adding it"
342
+
);
343
+
344
+
// Add M5 (layer 5)
345
+
println!("Adding M5/340624 (layer {})", layer_for_key("M5/340624"));
346
+
mst = mst.add("M5/340624", test_cid(1)).await.unwrap();
347
+
348
+
// Verify both are retrievable
349
+
assert!(
350
+
mst.get("N0/719700").await.unwrap().is_some(),
351
+
"N0 should still exist after adding M5"
352
+
);
353
+
assert!(
354
+
mst.get("M5/340624").await.unwrap().is_some(),
355
+
"M5 should exist after adding it"
356
+
);
357
+
}
358
+
359
+
#[tokio::test]
360
+
async fn test_generated_keys_at_specific_layers() {
361
+
// Generate keys at different layers and verify they work correctly
362
+
let storage = Arc::new(MemoryBlockStore::new());
363
+
let mut mst = Mst::new(storage);
364
+
365
+
fn test_cid(n: u8) -> cid::Cid {
366
+
let data = vec![n; 32];
367
+
let mh = multihash::Multihash::wrap(SHA2_256, &data).unwrap();
368
+
cid::Cid::new_v1(DAG_CBOR_CID_CODEC, mh)
369
+
}
370
+
371
+
// Generate keys at layers 0-5
372
+
let mut keys_by_layer: Vec<(String, usize)> = Vec::new();
373
+
for layer in 0..=5 {
374
+
let key = gen_key_at_layer('T', layer);
375
+
// Verify it's actually at the expected layer
376
+
assert_eq!(layer_for_key(&key), layer);
377
+
keys_by_layer.push((key, layer));
378
+
}
379
+
380
+
// Add all keys to tree
381
+
for (i, (key, _layer)) in keys_by_layer.iter().enumerate() {
382
+
mst = mst.add(key, test_cid(i as u8)).await.unwrap();
383
+
}
384
+
385
+
// Verify all keys can be retrieved
386
+
for (i, (key, _layer)) in keys_by_layer.iter().enumerate() {
387
+
let retrieved = mst.get(key).await.unwrap();
388
+
assert_eq!(retrieved, Some(test_cid(i as u8)));
389
+
}
390
+
}
391
+
392
+
#[tokio::test]
393
+
async fn test_first_n_keys_determinism() {
394
+
// Test varying numbers of keys to find breaking point
395
+
let all_keys = vec![
396
+
"A0/501344",
397
+
"A1/700567",
398
+
"A2/239654",
399
+
"A3/570745",
400
+
"A4/231700",
401
+
"A5/343219",
402
+
"B0/436099",
403
+
"B1/293486",
404
+
"B2/303249",
405
+
"B3/690557",
406
+
];
407
+
408
+
fn test_cid(n: u8) -> cid::Cid {
409
+
let data = vec![n; 32];
410
+
let mh = multihash::Multihash::wrap(SHA2_256, &data).unwrap();
411
+
cid::Cid::new_v1(DAG_CBOR_CID_CODEC, mh)
412
+
}
413
+
414
+
for n in 3..=10 {
415
+
let keys: Vec<&str> = all_keys.iter().take(n).copied().collect();
416
+
417
+
let storage1 = Arc::new(MemoryBlockStore::new());
418
+
let mut mst1 = Mst::new(storage1);
419
+
for (i, &key) in keys.iter().enumerate() {
420
+
mst1 = mst1.add(key, test_cid(i as u8)).await.unwrap();
421
+
}
422
+
423
+
let storage2 = Arc::new(MemoryBlockStore::new());
424
+
let mut mst2 = Mst::new(storage2);
425
+
for (i, &key) in keys.iter().rev().enumerate() {
426
+
let idx = keys.len() - 1 - i;
427
+
mst2 = mst2.add(key, test_cid(idx as u8)).await.unwrap();
428
+
}
429
+
430
+
let match_result = mst1.root().await.unwrap() == mst2.root().await.unwrap();
431
+
eprintln!(
432
+
"{} keys - Match: {} (mst1: {:?}, mst2: {:?})",
433
+
n,
434
+
match_result,
435
+
mst1.root().await.unwrap(),
436
+
mst2.root().await.unwrap()
437
+
);
438
+
439
+
if !match_result {
440
+
panic!("Determinism breaks at {} keys!", n);
441
+
}
442
+
}
443
+
}
444
+
445
+
// ============================================================================
446
+
// Commit Proof Fixture Tests (Phase 2.5)
447
+
// ============================================================================
448
+
449
+
#[derive(Debug, Deserialize)]
450
+
struct CommitProofFixture {
451
+
comment: String,
452
+
#[serde(rename = "leafValue")]
453
+
leaf_value: String,
454
+
keys: Vec<String>,
455
+
adds: Vec<String>,
456
+
dels: Vec<String>,
457
+
#[serde(rename = "rootBeforeCommit")]
458
+
root_before_commit: String,
459
+
#[serde(rename = "rootAfterCommit")]
460
+
root_after_commit: String,
461
+
#[serde(rename = "blocksInProof")]
462
+
_blocks_in_proof: Vec<String>,
463
+
}
464
+
465
+
#[tokio::test]
466
+
async fn test_commit_proof_fixtures() {
467
+
let json = include_str!("fixtures/commit_proof.json");
468
+
let fixtures: Vec<CommitProofFixture> =
469
+
serde_ipld_dagjson::from_slice(json.as_bytes()).unwrap();
470
+
471
+
for fixture in fixtures {
472
+
println!("\n=== Testing: {} ===", fixture.comment);
473
+
474
+
// Parse the leaf value CID
475
+
let leaf_cid: cid::Cid = fixture.leaf_value.parse().unwrap();
476
+
477
+
// Build initial tree from keys
478
+
let storage = Arc::new(MemoryBlockStore::new());
479
+
let mut mst = Mst::new(storage);
480
+
481
+
for key in &fixture.keys {
482
+
mst = mst.add(key, leaf_cid).await.unwrap();
483
+
}
484
+
485
+
// Verify root before commit
486
+
let root_before = mst.root().await.unwrap();
487
+
let expected_before: cid::Cid = fixture.root_before_commit.parse().unwrap();
488
+
489
+
assert_eq!(
490
+
root_before.to_string(),
491
+
expected_before.to_string(),
492
+
"Root CID mismatch before commit (fixture: {})",
493
+
fixture.comment
494
+
);
495
+
496
+
// Apply adds
497
+
for key in &fixture.adds {
498
+
mst = mst.add(key, leaf_cid).await.unwrap();
499
+
}
500
+
501
+
// Apply deletes
502
+
for key in &fixture.dels {
503
+
mst = mst.delete(key).await.unwrap();
504
+
}
505
+
506
+
// Verify root after commit
507
+
let root_after = mst.root().await.unwrap();
508
+
let expected_after: cid::Cid = fixture.root_after_commit.parse().unwrap();
509
+
510
+
assert_eq!(
511
+
root_after.to_string(),
512
+
expected_after.to_string(),
513
+
"Root CID mismatch after commit (fixture: {})",
514
+
fixture.comment
515
+
);
516
+
517
+
println!("✓ Passed: {}", fixture.comment);
518
+
}
519
+
}
520
+
521
+
#[tokio::test]
522
+
async fn test_commit_proof_using_batch() {
523
+
// Same as above but using batch operations instead of individual add/delete
524
+
let json = include_str!("fixtures/commit_proof.json");
525
+
let fixtures: Vec<CommitProofFixture> =
526
+
serde_ipld_dagjson::from_slice(json.as_bytes()).unwrap();
527
+
528
+
for fixture in fixtures {
529
+
println!("\n=== Testing (batch): {} ===", fixture.comment);
530
+
531
+
let leaf_cid: cid::Cid = fixture.leaf_value.parse().unwrap();
532
+
533
+
// Build initial tree
534
+
let storage = Arc::new(MemoryBlockStore::new());
535
+
let mut mst = Mst::new(storage);
536
+
537
+
for key in &fixture.keys {
538
+
mst = mst.add(key, leaf_cid).await.unwrap();
539
+
}
540
+
541
+
// Verify before state
542
+
let root_before = mst.root().await.unwrap();
543
+
let expected_before: cid::Cid = fixture.root_before_commit.parse().unwrap();
544
+
assert_eq!(root_before.to_string(), expected_before.to_string());
545
+
546
+
// Build batch operations
547
+
use smol_str::SmolStr;
548
+
549
+
let mut ops = Vec::new();
550
+
551
+
// Note: adds in commit fixtures might include keys that already exist
552
+
// In that case we should use Update instead of Create
553
+
for key in &fixture.adds {
554
+
// Check if key already exists
555
+
if mst.get(key).await.unwrap().is_some() {
556
+
// Update existing key
557
+
ops.push(VerifiedWriteOp::Update {
558
+
key: SmolStr::new(key),
559
+
cid: leaf_cid,
560
+
prev: leaf_cid, // Same CID since we're using uniform leaf values
561
+
});
562
+
} else {
563
+
// Create new key
564
+
ops.push(VerifiedWriteOp::Create {
565
+
key: SmolStr::new(key),
566
+
cid: leaf_cid,
567
+
});
568
+
}
569
+
}
570
+
571
+
for key in &fixture.dels {
572
+
ops.push(VerifiedWriteOp::Delete {
573
+
key: SmolStr::new(key),
574
+
prev: leaf_cid, // We know the value from the fixture
575
+
});
576
+
}
577
+
578
+
// Apply batch
579
+
mst = mst.batch(&ops).await.unwrap();
580
+
581
+
// Verify after state
582
+
let root_after = mst.root().await.unwrap();
583
+
let expected_after: cid::Cid = fixture.root_after_commit.parse().unwrap();
584
+
585
+
assert_eq!(
586
+
root_after.to_string(),
587
+
expected_after.to_string(),
588
+
"Root CID mismatch after batch ops (fixture: {})",
589
+
fixture.comment
590
+
);
591
+
592
+
println!("✓ Passed (batch): {}", fixture.comment);
593
+
}
594
+
}
595
+
596
+
#[tokio::test]
597
+
async fn test_commit_proof_diff_validation() {
598
+
// Verify that diff calculation matches the expected adds/dels from fixtures
599
+
let json = include_str!("fixtures/commit_proof.json");
600
+
let fixtures: Vec<CommitProofFixture> =
601
+
serde_ipld_dagjson::from_slice(json.as_bytes()).unwrap();
602
+
603
+
for fixture in fixtures {
604
+
println!("\n=== Testing diff: {} ===", fixture.comment);
605
+
606
+
let leaf_cid: cid::Cid = fixture.leaf_value.parse().unwrap();
607
+
608
+
// Build "before" tree
609
+
let storage_before = Arc::new(MemoryBlockStore::new());
610
+
let mut mst_before = Mst::new(storage_before);
611
+
for key in &fixture.keys {
612
+
mst_before = mst_before.add(key, leaf_cid).await.unwrap();
613
+
}
614
+
615
+
// Build "after" tree
616
+
let storage_after = Arc::new(MemoryBlockStore::new());
617
+
let mut mst_after = Mst::new(storage_after);
618
+
619
+
// Start with same keys
620
+
for key in &fixture.keys {
621
+
mst_after = mst_after.add(key, leaf_cid).await.unwrap();
622
+
}
623
+
624
+
// Apply ops to after tree
625
+
for key in &fixture.adds {
626
+
mst_after = mst_after.add(key, leaf_cid).await.unwrap();
627
+
}
628
+
for key in &fixture.dels {
629
+
mst_after = mst_after.delete(key).await.unwrap();
630
+
}
631
+
632
+
// Compute diff
633
+
let diff = mst_before.diff(&mst_after).await.unwrap();
634
+
635
+
// Verify diff matches expected operations
636
+
println!(
637
+
" Diff: {} creates, {} updates, {} deletes",
638
+
diff.creates.len(),
639
+
diff.updates.len(),
640
+
diff.deletes.len()
641
+
);
642
+
println!(
643
+
" Expected: {} adds, {} dels",
644
+
fixture.adds.len(),
645
+
fixture.dels.len()
646
+
);
647
+
648
+
// Creates should match adds (keys not in original tree)
649
+
let added_keys: std::collections::HashSet<_> =
650
+
fixture.adds.iter().map(|s| s.as_str()).collect();
651
+
let _deleted_keys: std::collections::HashSet<_> =
652
+
fixture.dels.iter().map(|s| s.as_str()).collect();
653
+
let original_keys: std::collections::HashSet<_> =
654
+
fixture.keys.iter().map(|s| s.as_str()).collect();
655
+
656
+
// Compute expected creates (adds that weren't in original)
657
+
let expected_creates: Vec<_> = added_keys.difference(&original_keys).map(|s| *s).collect();
658
+
659
+
// Compute expected updates (adds that WERE in original - replacing same CID)
660
+
let expected_updates: Vec<_> = added_keys
661
+
.intersection(&original_keys)
662
+
.map(|s| *s)
663
+
.collect();
664
+
665
+
println!(" Expected creates: {}", expected_creates.len());
666
+
println!(" Expected updates: {}", expected_updates.len());
667
+
668
+
// Total ops should match
669
+
let total_diff_ops = diff.creates.len() + diff.updates.len() + diff.deletes.len();
670
+
let total_expected_ops = fixture.adds.len() + fixture.dels.len();
671
+
672
+
assert_eq!(
673
+
total_diff_ops, total_expected_ops,
674
+
"Total operations mismatch in diff (fixture: {})",
675
+
fixture.comment
676
+
);
677
+
678
+
println!("✓ Passed diff: {}", fixture.comment);
679
+
}
680
+
}
681
+
682
+
#[tokio::test]
683
+
async fn test_commit_proof_incremental_cids() {
684
+
// Show CID after each key insertion to find where we diverge
685
+
let json = include_str!("fixtures/commit_proof.json");
686
+
let fixtures: Vec<CommitProofFixture> =
687
+
serde_ipld_dagjson::from_slice(json.as_bytes()).unwrap();
688
+
689
+
let fixture = &fixtures[0]; // "two deep split"
690
+
println!("\n=== {} ===", fixture.comment);
691
+
println!("Expected final CID: {}", fixture.root_before_commit);
692
+
693
+
let leaf_cid: cid::Cid = fixture.leaf_value.parse().unwrap();
694
+
println!("Leaf value CID: {}", leaf_cid);
695
+
696
+
let storage = Arc::new(MemoryBlockStore::new());
697
+
let mut mst = Mst::new(storage);
698
+
699
+
for (i, key) in fixture.keys.iter().enumerate() {
700
+
mst = mst.add(key, leaf_cid).await.unwrap();
701
+
let root = mst.root().await.unwrap();
702
+
println!("After adding key {}: {} -> root CID: {}", i, key, root);
703
+
}
704
+
705
+
println!("\nFinal root CID: {}", mst.root().await.unwrap());
706
+
println!("Expected: {}", fixture.root_before_commit);
707
+
}
708
+
709
+
#[tokio::test]
710
+
async fn test_rsky_simple_case() {
711
+
// From rsky's "handle_new_layers_that_are_two_higher_than_existing" test
712
+
// Simple case: 2 keys at layer 0
713
+
let cid1: cid::Cid = "bafyreie5cvv4h45feadgeuwhbcutmh6t2ceseocckahdoe6uat64zmz454"
714
+
.parse()
715
+
.unwrap();
716
+
let storage = Arc::new(MemoryBlockStore::new());
717
+
let mut mst = Mst::new(storage);
718
+
719
+
// Add A (layer 0)
720
+
mst = mst
721
+
.add("com.example.record/3jqfcqzm3ft2j", cid1)
722
+
.await
723
+
.unwrap();
724
+
println!("After A: {}", mst.root().await.unwrap());
725
+
726
+
// Add C (layer 0)
727
+
mst = mst
728
+
.add("com.example.record/3jqfcqzm3fz2j", cid1)
729
+
.await
730
+
.unwrap();
731
+
732
+
let root = mst.root().await.unwrap();
733
+
let expected = "bafyreidfcktqnfmykz2ps3dbul35pepleq7kvv526g47xahuz3rqtptmky";
734
+
735
+
println!("Our CID: {}", root);
736
+
println!("Expected CID: {}", expected);
737
+
738
+
assert_eq!(
739
+
root.to_string(),
740
+
expected,
741
+
"CID mismatch for simple 2-key tree"
742
+
);
743
+
}
744
+
745
+
#[tokio::test]
746
+
async fn test_inspect_single_key_serialization() {
747
+
// Inspect what we're actually serializing for a single key
748
+
use jacquard_repo::mst::util::layer_for_key;
749
+
750
+
let key = "com.example.record/3jqfcqzm3ft2j";
751
+
let cid1: cid::Cid = "bafyreie5cvv4h45feadgeuwhbcutmh6t2ceseocckahdoe6uat64zmz454"
752
+
.parse()
753
+
.unwrap();
754
+
755
+
println!("Key: {}", key);
756
+
println!("Layer: {}", layer_for_key(key));
757
+
println!("Value CID: {}", cid1);
758
+
759
+
let storage = Arc::new(MemoryBlockStore::new());
760
+
let mut mst = Mst::new(storage.clone());
761
+
762
+
mst = mst.add(key, cid1).await.unwrap();
763
+
764
+
// Persist to storage so we can inspect serialized bytes
765
+
let root_cid = mst.persist().await.unwrap();
766
+
767
+
println!("\nRoot CID: {}", root_cid);
768
+
println!(
769
+
"Expected: bafyreicphm6sin567zmcmw2yrbguhsdqwzkxs62rcayyk6ylivxfguazgi (from test output)"
770
+
);
771
+
772
+
// Fetch the actual serialized bytes from storage
773
+
let node_bytes = storage.get(&root_cid).await.unwrap().unwrap();
774
+
println!("\nSerialized node ({} bytes):", node_bytes.len());
775
+
println!("Hex: {}", hex::encode(&node_bytes));
776
+
777
+
// Deserialize to see structure
778
+
use jacquard_repo::mst::node::NodeData;
779
+
let node: NodeData = serde_ipld_dagcbor::from_slice(&node_bytes).unwrap();
780
+
println!("\nNodeData:");
781
+
println!(" left: {:?}", node.left);
782
+
println!(" entries: {} entries", node.entries.len());
783
+
for (i, entry) in node.entries.iter().enumerate() {
784
+
println!(
785
+
" [{}] prefix_len={}, key_suffix={:?}, value={}, tree={:?}",
786
+
i,
787
+
entry.prefix_len,
788
+
String::from_utf8_lossy(&entry.key_suffix),
789
+
entry.value,
790
+
entry.tree
791
+
);
792
+
}
793
+
}
794
+
795
+
#[tokio::test]
796
+
async fn test_inspect_two_key_serialization() {
797
+
// Inspect 2-key tree structure
798
+
use jacquard_repo::mst::util::layer_for_key;
799
+
800
+
let key1 = "com.example.record/3jqfcqzm3ft2j"; // A
801
+
let key2 = "com.example.record/3jqfcqzm3fz2j"; // C
802
+
let cid1: cid::Cid = "bafyreie5cvv4h45feadgeuwhbcutmh6t2ceseocckahdoe6uat64zmz454"
803
+
.parse()
804
+
.unwrap();
805
+
806
+
println!("Key 1 (A): {} (layer {})", key1, layer_for_key(key1));
807
+
println!("Key 2 (C): {} (layer {})", key2, layer_for_key(key2));
808
+
809
+
let storage = Arc::new(MemoryBlockStore::new());
810
+
let mut mst = Mst::new(storage.clone());
811
+
812
+
mst = mst.add(key1, cid1).await.unwrap();
813
+
mst = mst.add(key2, cid1).await.unwrap();
814
+
815
+
// Persist to storage so we can inspect serialized bytes
816
+
let root_cid = mst.persist().await.unwrap();
817
+
818
+
println!("\nRoot CID: {}", root_cid);
819
+
println!("Expected: bafyreidfcktqnfmykz2ps3dbul35pepleq7kvv526g47xahuz3rqtptmky");
820
+
821
+
// Fetch and inspect
822
+
let node_bytes = storage.get(&root_cid).await.unwrap().unwrap();
823
+
println!("\nSerialized node ({} bytes):", node_bytes.len());
824
+
println!("Hex: {}", hex::encode(&node_bytes));
825
+
826
+
use jacquard_repo::mst::node::NodeData;
827
+
let node: NodeData = serde_ipld_dagcbor::from_slice(&node_bytes).unwrap();
828
+
println!("\nNodeData:");
829
+
println!(" left: {:?}", node.left);
830
+
println!(" entries: {} entries", node.entries.len());
831
+
for (i, entry) in node.entries.iter().enumerate() {
832
+
println!(
833
+
" [{}] prefix_len={}, key_suffix={:?}, value={}, tree={:?}",
834
+
i,
835
+
entry.prefix_len,
836
+
String::from_utf8_lossy(&entry.key_suffix),
837
+
entry.value,
838
+
entry.tree
839
+
);
840
+
}
841
+
842
+
// Calculate what prefix compression SHOULD be
843
+
let prefix_len = jacquard_repo::mst::util::common_prefix_len(key1, key2);
844
+
println!("\nCommon prefix length between keys: {}", prefix_len);
845
+
println!("Common prefix: {:?}", &key1[..prefix_len]);
846
+
println!("Key1 suffix: {:?}", &key1[prefix_len..]);
847
+
println!("Key2 suffix: {:?}", &key2[prefix_len..]);
848
+
}
849
+
850
+
#[tokio::test]
851
+
async fn test_real_repo_car_roundtrip() {
852
+
use jacquard_repo::car::{read_car, write_car};
853
+
use std::path::Path;
854
+
use tempfile::NamedTempFile;
855
+
856
+
// Check if fixture exists (local only - not in CI)
857
+
let fixture_path = Path::new(concat!(
858
+
env!("CARGO_MANIFEST_DIR"),
859
+
"/tests/fixtures/repo-nonbinary.computer-2025-10-21T13_05_55.090Z.car"
860
+
));
861
+
862
+
if !fixture_path.exists() {
863
+
eprintln!("⚠️ Skipping test_real_repo_car_roundtrip - fixture not present");
864
+
eprintln!(" (This is expected in CI - test only runs locally)");
865
+
return;
866
+
}
867
+
868
+
println!("✓ Found real repo CAR fixture");
869
+
870
+
// Read the CAR file
871
+
let blocks = read_car(fixture_path)
872
+
.await
873
+
.expect("Failed to read CAR file");
874
+
println!("✓ Loaded {} blocks from CAR", blocks.len());
875
+
876
+
assert!(!blocks.is_empty(), "CAR file should contain blocks");
877
+
878
+
// Write to a temp file
879
+
let temp_file = NamedTempFile::new().unwrap();
880
+
881
+
// Note: We can't easily extract the original roots without parsing the CAR header
882
+
// For now, just use the first block's CID as the root (if it exists)
883
+
let roots: Vec<_> = blocks.keys().take(1).copied().collect();
884
+
885
+
write_car(temp_file.path(), roots.clone(), blocks.clone())
886
+
.await
887
+
.expect("Failed to write CAR file");
888
+
println!("✓ Wrote CAR to temp file");
889
+
890
+
// Read it back
891
+
let blocks_roundtrip = read_car(temp_file.path())
892
+
.await
893
+
.expect("Failed to read roundtrip CAR");
894
+
println!(
895
+
"✓ Read {} blocks from roundtrip CAR",
896
+
blocks_roundtrip.len()
897
+
);
898
+
899
+
// Verify all blocks match
900
+
assert_eq!(
901
+
blocks.len(),
902
+
blocks_roundtrip.len(),
903
+
"Block count mismatch after roundtrip"
904
+
);
905
+
906
+
for (cid, data) in &blocks {
907
+
let roundtrip_data = blocks_roundtrip
908
+
.get(cid)
909
+
.expect(&format!("Missing block after roundtrip: {}", cid));
910
+
assert_eq!(data, roundtrip_data, "Block data mismatch for CID: {}", cid);
911
+
}
912
+
913
+
println!("✓ All {} blocks match after roundtrip", blocks.len());
914
+
}
915
+
916
+
#[tokio::test]
917
+
async fn test_real_repo_car_header() {
918
+
use jacquard_repo::car::read_car_header;
919
+
use std::path::Path;
920
+
921
+
let fixture_path = Path::new(concat!(
922
+
env!("CARGO_MANIFEST_DIR"),
923
+
"/tests/fixtures/repo-nonbinary.computer-2025-10-21T13_05_55.090Z.car"
924
+
));
925
+
926
+
if !fixture_path.exists() {
927
+
eprintln!("⚠️ Skipping test_real_repo_car_header - fixture not present");
928
+
return;
929
+
}
930
+
931
+
let roots = read_car_header(fixture_path)
932
+
.await
933
+
.expect("Failed to read CAR header");
934
+
935
+
println!("✓ CAR file has {} root(s)", roots.len());
936
+
937
+
assert!(!roots.is_empty(), "CAR should have at least one root");
938
+
939
+
for (i, root) in roots.iter().enumerate() {
940
+
println!(" Root {}: {}", i, root);
941
+
}
942
+
}
943
+
944
+
#[tokio::test]
945
+
async fn test_real_repo_car_streaming() {
946
+
use jacquard_repo::car::stream_car;
947
+
use std::path::Path;
948
+
949
+
let fixture_path = Path::new(concat!(
950
+
env!("CARGO_MANIFEST_DIR"),
951
+
"/tests/fixtures/repo-nonbinary.computer-2025-10-21T13_05_55.090Z.car"
952
+
));
953
+
954
+
if !fixture_path.exists() {
955
+
eprintln!("⚠️ Skipping test_real_repo_car_streaming - fixture not present");
956
+
return;
957
+
}
958
+
959
+
let mut stream = stream_car(fixture_path)
960
+
.await
961
+
.expect("Failed to create CAR stream");
962
+
963
+
println!("✓ Created CAR stream");
964
+
println!(" Roots: {:?}", stream.roots());
965
+
966
+
let mut block_count = 0;
967
+
while let Some((cid, data)) = stream.next().await.expect("Stream error") {
968
+
block_count += 1;
969
+
if block_count <= 5 {
970
+
println!(" Block {}: {} ({} bytes)", block_count, cid, data.len());
971
+
}
972
+
}
973
+
974
+
println!("✓ Streamed {} blocks total", block_count);
975
+
assert!(block_count > 0, "Should have streamed at least one block");
976
+
}
977
+
978
+
#[tokio::test]
979
+
async fn test_real_repo_mst_structure() {
980
+
use jacquard_repo::car::read_car;
981
+
use jacquard_repo::mst::tree::Mst;
982
+
use jacquard_repo::storage::memory::MemoryBlockStore;
983
+
use std::path::Path;
984
+
985
+
let fixture_path = Path::new(concat!(
986
+
env!("CARGO_MANIFEST_DIR"),
987
+
"/tests/fixtures/repo-nonbinary.computer-2025-10-21T13_05_55.090Z.car"
988
+
));
989
+
990
+
if !fixture_path.exists() {
991
+
eprintln!("⚠️ Skipping test_real_repo_mst_structure - fixture not present");
992
+
return;
993
+
}
994
+
995
+
println!("✓ Loading real repo CAR file");
996
+
997
+
// Read CAR and load into storage
998
+
let blocks = read_car(fixture_path).await.expect("Failed to read CAR");
999
+
println!("✓ Loaded {} blocks", blocks.len());
1000
+
1001
+
let storage = Arc::new(MemoryBlockStore::new());
1002
+
1003
+
// Load all blocks into storage
1004
+
let mut block_vec = Vec::new();
1005
+
for (cid, data) in blocks.iter() {
1006
+
block_vec.push((*cid, data.clone()));
1007
+
}
1008
+
storage
1009
+
.put_many(block_vec)
1010
+
.await
1011
+
.expect("Failed to store blocks");
1012
+
println!("✓ Loaded all blocks into storage");
1013
+
1014
+
// Get roots from CAR header
1015
+
let roots = jacquard_repo::car::read_car_header(fixture_path)
1016
+
.await
1017
+
.expect("Failed to read header");
1018
+
1019
+
assert!(!roots.is_empty(), "CAR should have at least one root");
1020
+
let commit_cid = roots[0];
1021
+
println!("✓ Commit CID: {}", commit_cid);
1022
+
1023
+
// Parse commit to get MST root
1024
+
#[derive(serde::Deserialize)]
1025
+
struct Commit {
1026
+
data: cid::Cid,
1027
+
// We only care about the data field (MST root)
1028
+
}
1029
+
1030
+
let commit_bytes = storage
1031
+
.get(&commit_cid)
1032
+
.await
1033
+
.expect("Failed to get commit")
1034
+
.expect("Commit not found");
1035
+
1036
+
let commit: Commit =
1037
+
serde_ipld_dagcbor::from_slice(&commit_bytes).expect("Failed to parse commit");
1038
+
1039
+
let mst_root = commit.data;
1040
+
println!("✓ MST root CID: {}", mst_root);
1041
+
1042
+
// Load MST
1043
+
let mst = Mst::load(storage.clone(), mst_root, None);
1044
+
println!("✓ Loaded MST from storage");
1045
+
1046
+
// Verify we can get the root CID
1047
+
let root_cid = mst.root().await.expect("Failed to get root CID");
1048
+
assert_eq!(root_cid, mst_root, "MST root CID should match");
1049
+
println!("✓ MST root CID matches");
1050
+
1051
+
// Get all leaves to verify tree structure
1052
+
let leaves = mst.leaves().await.expect("Failed to get leaves");
1053
+
println!("✓ MST contains {} leaf entries", leaves.len());
1054
+
1055
+
assert!(!leaves.is_empty(), "MST should have at least one leaf");
1056
+
1057
+
// Verify leaves are in lexicographic order
1058
+
for i in 1..leaves.len() {
1059
+
let prev_key = &leaves[i - 1].0;
1060
+
let curr_key = &leaves[i].0;
1061
+
assert!(
1062
+
prev_key < curr_key,
1063
+
"Leaves should be in lexicographic order: {:?} >= {:?}",
1064
+
prev_key,
1065
+
curr_key
1066
+
);
1067
+
}
1068
+
println!("✓ All leaves are in lexicographic order");
1069
+
1070
+
// Test get operation on first few keys
1071
+
for (i, (key, expected_cid)) in leaves.iter().take(10).enumerate() {
1072
+
let retrieved = mst.get(key).await.expect("Failed to get key");
1073
+
assert_eq!(
1074
+
retrieved,
1075
+
Some(*expected_cid),
1076
+
"Get operation failed for key {}: {}",
1077
+
i,
1078
+
key
1079
+
);
1080
+
}
1081
+
println!("✓ Get operations work correctly on sampled keys");
1082
+
1083
+
// Verify all leaves are retrievable via get
1084
+
println!(" Verifying all {} keys are retrievable...", leaves.len());
1085
+
for (key, expected_cid) in &leaves {
1086
+
let retrieved = mst.get(key).await.expect("Failed to get key");
1087
+
assert_eq!(
1088
+
retrieved,
1089
+
Some(*expected_cid),
1090
+
"Get operation failed for key: {}",
1091
+
key
1092
+
);
1093
+
}
1094
+
println!("✓ All {} keys are retrievable via get()", leaves.len());
1095
+
}
1096
+
1097
+
#[tokio::test]
1098
+
async fn test_real_repo_mst_operations() {
1099
+
use jacquard_repo::car::read_car;
1100
+
use jacquard_repo::mst::tree::Mst;
1101
+
use jacquard_repo::storage::memory::MemoryBlockStore;
1102
+
use std::path::Path;
1103
+
1104
+
let fixture_path = Path::new(concat!(
1105
+
env!("CARGO_MANIFEST_DIR"),
1106
+
"/tests/fixtures/repo-nonbinary.computer-2025-10-21T13_05_55.090Z.car"
1107
+
));
1108
+
1109
+
if !fixture_path.exists() {
1110
+
eprintln!("⚠️ Skipping test_real_repo_mst_operations - fixture not present");
1111
+
return;
1112
+
}
1113
+
1114
+
// Load CAR and set up storage
1115
+
let blocks = read_car(fixture_path).await.expect("Failed to read CAR");
1116
+
let storage = Arc::new(MemoryBlockStore::new());
1117
+
1118
+
let mut block_vec = Vec::new();
1119
+
for (cid, data) in blocks.iter() {
1120
+
block_vec.push((*cid, data.clone()));
1121
+
}
1122
+
storage
1123
+
.put_many(block_vec)
1124
+
.await
1125
+
.expect("Failed to store blocks");
1126
+
1127
+
// Get MST root
1128
+
let roots = jacquard_repo::car::read_car_header(fixture_path)
1129
+
.await
1130
+
.expect("Failed to read header");
1131
+
let commit_cid = roots[0];
1132
+
1133
+
#[derive(serde::Deserialize)]
1134
+
struct Commit {
1135
+
data: cid::Cid,
1136
+
}
1137
+
1138
+
let commit_bytes = storage.get(&commit_cid).await.unwrap().unwrap();
1139
+
let commit: Commit = serde_ipld_dagcbor::from_slice(&commit_bytes).unwrap();
1140
+
let mst_root = commit.data;
1141
+
1142
+
// Load original MST
1143
+
let original_mst = Mst::load(storage.clone(), mst_root, None);
1144
+
let original_leaves = original_mst.leaves().await.expect("Failed to get leaves");
1145
+
println!("✓ Loaded MST with {} leaves", original_leaves.len());
1146
+
1147
+
// Test adding a new key
1148
+
fn test_cid(n: u8) -> cid::Cid {
1149
+
let data = vec![n; 32];
1150
+
let mh = multihash::Multihash::wrap(SHA2_256, &data).unwrap();
1151
+
cid::Cid::new_v1(DAG_CBOR_CID_CODEC, mh)
1152
+
}
1153
+
1154
+
let new_key = "app.bsky.feed.post/zzztestkey123";
1155
+
let modified_mst = original_mst
1156
+
.add(new_key, test_cid(99))
1157
+
.await
1158
+
.expect("Failed to add key");
1159
+
1160
+
// Verify new key exists
1161
+
assert_eq!(
1162
+
modified_mst.get(new_key).await.unwrap(),
1163
+
Some(test_cid(99)),
1164
+
"New key should be retrievable"
1165
+
);
1166
+
println!("✓ Successfully added new key to MST");
1167
+
1168
+
// Verify old keys still exist
1169
+
for (key, cid) in original_leaves.iter().take(10) {
1170
+
assert_eq!(
1171
+
modified_mst.get(key).await.unwrap(),
1172
+
Some(*cid),
1173
+
"Original keys should still be retrievable"
1174
+
);
1175
+
}
1176
+
println!("✓ Original keys still retrievable after add");
1177
+
1178
+
// Test that modified MST has one more leaf
1179
+
let modified_leaves = modified_mst.leaves().await.unwrap();
1180
+
assert_eq!(
1181
+
modified_leaves.len(),
1182
+
original_leaves.len() + 1,
1183
+
"Modified MST should have one more leaf"
1184
+
);
1185
+
println!("✓ Modified MST has correct leaf count");
1186
+
1187
+
// Test deleting a key
1188
+
if let Some((key_to_delete, _)) = original_leaves.first() {
1189
+
let mst_after_delete = modified_mst
1190
+
.delete(key_to_delete)
1191
+
.await
1192
+
.expect("Failed to delete key");
1193
+
1194
+
assert_eq!(
1195
+
mst_after_delete.get(key_to_delete).await.unwrap(),
1196
+
None,
1197
+
"Deleted key should not be retrievable"
1198
+
);
1199
+
println!("✓ Successfully deleted key from MST");
1200
+
1201
+
// Verify other keys still exist
1202
+
for (key, cid) in original_leaves.iter().skip(1).take(10) {
1203
+
assert_eq!(
1204
+
mst_after_delete.get(key).await.unwrap(),
1205
+
Some(*cid),
1206
+
"Other keys should still be retrievable after delete"
1207
+
);
1208
+
}
1209
+
println!("✓ Other keys still retrievable after delete");
1210
+
}
1211
+
}
1212
+
1213
+
#[tokio::test]
1214
+
async fn test_real_repo_mst_determinism() {
1215
+
use jacquard_repo::car::read_car;
1216
+
use jacquard_repo::mst::tree::Mst;
1217
+
use jacquard_repo::storage::memory::MemoryBlockStore;
1218
+
use std::path::Path;
1219
+
1220
+
let fixture_path = Path::new(concat!(
1221
+
env!("CARGO_MANIFEST_DIR"),
1222
+
"/tests/fixtures/repo-nonbinary.computer-2025-10-21T13_05_55.090Z.car"
1223
+
));
1224
+
1225
+
if !fixture_path.exists() {
1226
+
eprintln!("⚠️ Skipping test_real_repo_mst_determinism - fixture not present");
1227
+
return;
1228
+
}
1229
+
1230
+
// Load CAR and set up storage
1231
+
let blocks = read_car(fixture_path).await.expect("Failed to read CAR");
1232
+
let storage = Arc::new(MemoryBlockStore::new());
1233
+
1234
+
let mut block_vec = Vec::new();
1235
+
for (cid, data) in blocks.iter() {
1236
+
block_vec.push((*cid, data.clone()));
1237
+
}
1238
+
storage
1239
+
.put_many(block_vec)
1240
+
.await
1241
+
.expect("Failed to store blocks");
1242
+
1243
+
// Get MST root and leaves
1244
+
let roots = jacquard_repo::car::read_car_header(fixture_path)
1245
+
.await
1246
+
.expect("Failed to read header");
1247
+
let commit_cid = roots[0];
1248
+
1249
+
#[derive(serde::Deserialize)]
1250
+
struct Commit {
1251
+
data: cid::Cid,
1252
+
}
1253
+
1254
+
let commit_bytes = storage.get(&commit_cid).await.unwrap().unwrap();
1255
+
let commit: Commit = serde_ipld_dagcbor::from_slice(&commit_bytes).unwrap();
1256
+
let original_mst_root = commit.data;
1257
+
1258
+
let original_mst = Mst::load(storage.clone(), original_mst_root, None);
1259
+
let leaves = original_mst.leaves().await.expect("Failed to get leaves");
1260
+
println!(
1261
+
"✓ Loaded MST with {} leaves for determinism test",
1262
+
leaves.len()
1263
+
);
1264
+
1265
+
// Take first 100 keys and rebuild tree in different order
1266
+
let test_leaves: Vec<_> = leaves.iter().take(100).cloned().collect();
1267
+
println!(" Testing determinism with {} keys", test_leaves.len());
1268
+
1269
+
// Build tree in original order
1270
+
let storage1 = Arc::new(MemoryBlockStore::new());
1271
+
let mut mst1 = Mst::new(storage1);
1272
+
for (key, cid) in &test_leaves {
1273
+
mst1 = mst1.add(key, *cid).await.unwrap();
1274
+
}
1275
+
let cid1 = mst1.root().await.unwrap();
1276
+
1277
+
// Build tree in reverse order
1278
+
let storage2 = Arc::new(MemoryBlockStore::new());
1279
+
let mut mst2 = Mst::new(storage2);
1280
+
for (key, cid) in test_leaves.iter().rev() {
1281
+
mst2 = mst2.add(key, *cid).await.unwrap();
1282
+
}
1283
+
let cid2 = mst2.root().await.unwrap();
1284
+
1285
+
println!(" MST1 root: {}", cid1);
1286
+
println!(" MST2 root: {}", cid2);
1287
+
1288
+
// Verify all keys are present in both trees
1289
+
for (key, expected_cid) in &test_leaves {
1290
+
let v1 = mst1.get(key).await.unwrap();
1291
+
let v2 = mst2.get(key).await.unwrap();
1292
+
1293
+
assert_eq!(
1294
+
v1,
1295
+
Some(*expected_cid),
1296
+
"Key should be retrievable from mst1: {}",
1297
+
key
1298
+
);
1299
+
assert_eq!(
1300
+
v2,
1301
+
Some(*expected_cid),
1302
+
"Key should be retrievable from mst2: {}",
1303
+
key
1304
+
);
1305
+
}
1306
+
println!("✓ All keys retrievable from both trees");
1307
+
1308
+
// Check if root CIDs match (determinism test)
1309
+
assert_eq!(
1310
+
cid1, cid2,
1311
+
"Tree structure must be deterministic - root CIDs should match"
1312
+
);
1313
+
println!("✓ Root CIDs match - tree structure is deterministic!");
1314
+
}
+1
-1
crates/jacquard/src/moderation.rs
+1
-1
crates/jacquard/src/moderation.rs
···
51
51
#[cfg(feature = "api_bluesky")]
52
52
pub use fetch::{fetch_labeler_defs, fetch_labeler_defs_direct};
53
53
pub use labeled::{Labeled, LabeledRecord};
54
-
pub use moderatable::Moderateable;
54
+
pub use moderatable::{ModeratableIterExt, Moderateable};
55
55
pub use types::{
56
56
Blur, LabelCause, LabelPref, LabelTarget, LabelerDefs, ModerationDecision, ModerationPrefs,
57
57
};
+34
crates/jacquard/src/moderation/moderatable.rs
+34
crates/jacquard/src/moderation/moderatable.rs
···
36
36
) -> Vec<(&'static str, ModerationDecision)>;
37
37
}
38
38
39
+
/// Extension trait for applying moderation to iterators
40
+
///
41
+
/// Provides convenience methods for filtering and mapping moderation decisions
42
+
/// over collections.
43
+
pub trait ModeratableIterExt<'a, T: Moderateable<'a> + 'a>: Iterator<Item = &'a T> + Sized {
44
+
/// Map each item to a tuple of (item, decision)
45
+
fn with_moderation(
46
+
self,
47
+
prefs: &'a ModerationPrefs<'_>,
48
+
defs: &'a LabelerDefs<'_>,
49
+
accepted_labelers: &'a [Did<'_>],
50
+
) -> impl Iterator<Item = (&'a T, Vec<(&'static str, ModerationDecision)>)> {
51
+
self.map(move |item| {
52
+
let scoped_decisions = item.moderate_all(prefs, defs, accepted_labelers);
53
+
(item, scoped_decisions)
54
+
})
55
+
}
56
+
57
+
/// Filter out items that should be hidden
58
+
fn filter_moderated(
59
+
self,
60
+
prefs: &'a ModerationPrefs<'_>,
61
+
defs: &'a LabelerDefs<'_>,
62
+
accepted_labelers: &'a [Did<'_>],
63
+
) -> impl Iterator<Item = &'a T> {
64
+
self.filter(move |item| {
65
+
let scoped_decisions = item.moderate_all(prefs, defs, accepted_labelers);
66
+
!scoped_decisions.iter().any(|(_, decision)| decision.filter)
67
+
})
68
+
}
69
+
}
70
+
71
+
impl<'a, T: Moderateable<'a> + 'a, I: Iterator<Item = &'a T>> ModeratableIterExt<'a, T> for I {}
72
+
39
73
// Implementations for common Bluesky types
40
74
#[cfg(feature = "api_bluesky")]
41
75
mod bluesky_impls {