tangled
alpha
login
or
join now
nonbinary.computer
/
jacquard
81
fork
atom
A better Rust ATProto crate
81
fork
atom
overview
issues
9
pulls
pipelines
swapped repo firehose types to use generated api
Orual
1 day ago
f2f45e46
bc139173
0/1
build.yml
failed
1min 11s
+244
-377
6 changed files
expand all
collapse all
unified
split
Cargo.lock
crates
jacquard-repo
Cargo.toml
src
commit
firehose.rs
mst
diff.rs
repo.rs
tests
large_proof_tests.rs
+1
Cargo.lock
···
2628
2628
"ed25519-dalek",
2629
2629
"hex",
2630
2630
"iroh-car",
2631
2631
+
"jacquard-api",
2631
2632
"jacquard-common",
2632
2633
"jacquard-derive",
2633
2634
"k256",
+1
crates/jacquard-repo/Cargo.toml
···
18
18
# Internal
19
19
jacquard-common = { path = "../jacquard-common", version = "0.9", features = ["crypto-ed25519", "crypto-k256", "crypto-p256"] }
20
20
jacquard-derive = { path = "../jacquard-derive", version = "0.9" }
21
21
+
jacquard-api = { path = "../jacquard-api", version = "0.9", features = ["streaming"] }
21
22
22
23
# Serialization
23
24
serde.workspace = true
+215
-360
crates/jacquard-repo/src/commit/firehose.rs
···
4
4
//! to avoid a dependency on the full API crate. They represent firehose protocol messages,
5
5
//! which are DISTINCT from repository commit objects.
6
6
7
7
-
use bytes::Bytes;
8
8
-
use jacquard_common::types::cid::CidLink;
7
7
+
pub use jacquard_api::com_atproto::sync::subscribe_repos::Commit as FirehoseCommit;
8
8
+
pub use jacquard_api::com_atproto::sync::subscribe_repos::RepoOp;
9
9
+
use jacquard_api::com_atproto::sync::subscribe_repos::{Commit, RepoOpAction};
9
10
use jacquard_common::types::crypto::PublicKey;
10
10
-
use jacquard_common::types::string::{Datetime, Did, Tid};
11
11
-
use jacquard_common::{CowStr, IntoStatic};
12
11
use smol_str::ToSmolStr;
13
12
14
14
-
/// Firehose commit message (sync v1.0 and v1.1)
15
15
-
///
16
16
-
/// Represents an update of repository state in the firehose stream.
17
17
-
/// This is the message format sent over `com.atproto.sync.subscribeRepos`.
13
13
+
/// Convert to VerifiedWriteOp for v1.1 validation
18
14
///
19
19
-
/// **Sync v1.0 vs v1.1:**
20
20
-
/// - v1.0: `prev_data` is None/skipped, consumers must have sufficient previous repository state to validate
21
21
-
/// - v1.1: `prev_data` includes previous MST root for inductive validation
22
22
-
#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
23
23
-
#[serde(rename_all = "camelCase")]
24
24
-
pub struct FirehoseCommit<'a> {
25
25
-
/// The repo this event comes from
26
26
-
#[serde(borrow)]
27
27
-
pub repo: Did<'a>,
28
28
-
29
29
-
/// The rev of the emitted commit
30
30
-
pub rev: Tid,
31
31
-
32
32
-
/// The stream sequence number of this message
33
33
-
pub seq: i64,
34
34
-
35
35
-
/// The rev of the last emitted commit from this repo (if any)
36
36
-
pub since: Tid,
37
37
-
38
38
-
/// Timestamp of when this message was originally broadcast
39
39
-
pub time: Datetime,
40
40
-
41
41
-
/// Repo commit object CID
42
42
-
///
43
43
-
/// This CID points to the repository commit block (with did, version, data, rev, prev, sig).
44
44
-
/// It must be the first entry in the CAR header 'roots' list.
45
45
-
#[serde(borrow)]
46
46
-
pub commit: CidLink<'a>,
47
47
-
48
48
-
/// CAR file containing relevant blocks
49
49
-
///
50
50
-
/// Contains blocks as a diff since the previous repo state. The commit block
51
51
-
/// must be included, and its CID must be the first root in the CAR header.
52
52
-
///
53
53
-
/// For sync v1.1, may include additional MST node blocks needed for operation inversion.
54
54
-
#[serde(with = "super::serde_bytes_helper")]
55
55
-
pub blocks: Bytes,
56
56
-
57
57
-
/// Operations in this commit
58
58
-
#[serde(borrow)]
59
59
-
pub ops: Vec<RepoOp<'a>>,
60
60
-
61
61
-
/// Previous MST root CID (sync v1.1 only)
62
62
-
///
63
63
-
/// The root CID of the MST tree for the previous commit (indicated by the 'since' field).
64
64
-
/// Corresponds to the 'data' field in the previous repo commit object.
65
65
-
///
66
66
-
/// **Sync v1.1 inductive validation:**
67
67
-
/// - Enables validation without local MST state
68
68
-
/// - Operations can be inverted (creates→deletes, deletes→creates with prev values)
69
69
-
/// - Required for "inductive firehose" consumption
70
70
-
///
71
71
-
/// **Sync v1.0:**
72
72
-
/// - This field is None
73
73
-
/// - Consumers must have previous repository state
74
74
-
#[serde(skip_serializing_if = "Option::is_none")]
75
75
-
#[serde(borrow)]
76
76
-
pub prev_data: Option<CidLink<'a>>,
77
77
-
78
78
-
/// Blob CIDs referenced in this commit
79
79
-
#[serde(borrow)]
80
80
-
pub blobs: Vec<CidLink<'a>>,
81
81
-
82
82
-
/// DEPRECATED: Replaced by #sync event and data limits
83
83
-
///
84
84
-
/// Indicates that this commit contained too many ops, or data size was too large.
85
85
-
/// Consumers will need to make a separate request to get missing data.
86
86
-
pub too_big: bool,
87
87
-
88
88
-
/// DEPRECATED: Unused
89
89
-
pub rebase: bool,
90
90
-
}
91
91
-
92
92
-
/// A repository operation (mutation of a single record)
93
93
-
#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
94
94
-
#[serde(rename_all = "camelCase")]
95
95
-
pub struct RepoOp<'a> {
96
96
-
/// Operation type: "create", "update", or "delete"
97
97
-
#[serde(borrow)]
98
98
-
pub action: CowStr<'a>,
99
99
-
100
100
-
/// Collection/rkey path (e.g., "app.bsky.feed.post/abc123")
101
101
-
#[serde(borrow)]
102
102
-
pub path: CowStr<'a>,
103
103
-
104
104
-
/// For creates and updates, the new record CID. For deletions, None (null).
105
105
-
#[serde(skip_serializing_if = "Option::is_none")]
106
106
-
#[serde(borrow)]
107
107
-
pub cid: Option<CidLink<'a>>,
108
108
-
109
109
-
/// For updates and deletes, the previous record CID
110
110
-
///
111
111
-
/// Required for sync v1.1 inductive firehose validation.
112
112
-
/// For creates, this field should not be defined.
113
113
-
#[serde(skip_serializing_if = "Option::is_none")]
114
114
-
#[serde(borrow)]
115
115
-
pub prev: Option<CidLink<'a>>,
116
116
-
}
117
117
-
118
118
-
impl<'a> RepoOp<'a> {
119
119
-
/// Convert to VerifiedWriteOp for v1.1 validation
120
120
-
///
121
121
-
/// Validates that all required fields are present for inversion.
122
122
-
pub fn to_invertible_op(&self) -> Result<VerifiedWriteOp> {
123
123
-
let key = self.path.to_smolstr();
124
124
-
125
125
-
match self.action.as_ref() {
126
126
-
"create" => {
127
127
-
let cid = self
128
128
-
.cid
129
129
-
.as_ref()
130
130
-
.ok_or_else(|| RepoError::invalid_commit("create operation missing cid field"))?
131
131
-
.to_ipld()
132
132
-
.map_err(|e| RepoError::invalid_cid_conversion(e, "create cid"))?;
133
133
-
134
134
-
Ok(VerifiedWriteOp::Create { key, cid })
135
135
-
}
136
136
-
"update" => {
137
137
-
let cid = self
138
138
-
.cid
139
139
-
.as_ref()
140
140
-
.ok_or_else(|| RepoError::invalid_commit("update operation missing cid field"))?
141
141
-
.to_ipld()
142
142
-
.map_err(|e| RepoError::invalid_cid_conversion(e, "update cid"))?;
143
143
-
144
144
-
let prev = self
145
145
-
.prev
146
146
-
.as_ref()
147
147
-
.ok_or_else(|| {
148
148
-
RepoError::invalid_commit(
149
149
-
"update operation missing prev field for v1.1 validation",
150
150
-
)
151
151
-
})?
152
152
-
.to_ipld()
153
153
-
.map_err(|e| RepoError::invalid_cid_conversion(e, "update prev"))?;
154
154
-
155
155
-
Ok(VerifiedWriteOp::Update { key, cid, prev })
156
156
-
}
157
157
-
"delete" => {
158
158
-
let prev = self
159
159
-
.prev
160
160
-
.as_ref()
161
161
-
.ok_or_else(|| {
162
162
-
RepoError::invalid_commit(
163
163
-
"delete operation missing prev field for v1.1 validation",
164
164
-
)
165
165
-
})?
166
166
-
.to_ipld()
167
167
-
.map_err(|e| RepoError::invalid_cid_conversion(e, "delete prev"))?;
15
15
+
/// Validates that all required fields are present for inversion.
16
16
+
pub fn to_invertible_op(op: &RepoOp<'_>) -> Result<VerifiedWriteOp> {
17
17
+
let key = op.path.to_smolstr();
18
18
+
match op.action {
19
19
+
RepoOpAction::Create => {
20
20
+
let cid = op
21
21
+
.cid
22
22
+
.as_ref()
23
23
+
.ok_or_else(|| RepoError::invalid_commit("create operation missing cid field"))?
24
24
+
.to_ipld()
25
25
+
.map_err(|e| RepoError::invalid_cid_conversion(e, "create cid"))?;
168
26
169
169
-
Ok(VerifiedWriteOp::Delete { key, prev })
170
170
-
}
171
171
-
action => Err(RepoError::invalid_commit(format!(
172
172
-
"unknown action type: {}",
173
173
-
action
174
174
-
))),
27
27
+
Ok(VerifiedWriteOp::Create { key, cid })
175
28
}
176
176
-
}
177
177
-
}
29
29
+
RepoOpAction::Update => {
30
30
+
let cid = op
31
31
+
.cid
32
32
+
.as_ref()
33
33
+
.ok_or_else(|| RepoError::invalid_commit("update operation missing cid field"))?
34
34
+
.to_ipld()
35
35
+
.map_err(|e| RepoError::invalid_cid_conversion(e, "update cid"))?;
178
36
179
179
-
impl IntoStatic for FirehoseCommit<'_> {
180
180
-
type Output = FirehoseCommit<'static>;
37
37
+
let prev = op
38
38
+
.prev
39
39
+
.as_ref()
40
40
+
.ok_or_else(|| {
41
41
+
RepoError::invalid_commit(
42
42
+
"update operation missing prev field for v1.1 validation",
43
43
+
)
44
44
+
})?
45
45
+
.to_ipld()
46
46
+
.map_err(|e| RepoError::invalid_cid_conversion(e, "update prev"))?;
181
47
182
182
-
fn into_static(self) -> Self::Output {
183
183
-
FirehoseCommit {
184
184
-
repo: self.repo.into_static(),
185
185
-
rev: self.rev,
186
186
-
seq: self.seq,
187
187
-
since: self.since,
188
188
-
time: self.time,
189
189
-
commit: self.commit.into_static(),
190
190
-
blocks: self.blocks,
191
191
-
ops: self.ops.into_iter().map(|op| op.into_static()).collect(),
192
192
-
prev_data: self.prev_data.map(|pd| pd.into_static()),
193
193
-
blobs: self.blobs.into_iter().map(|b| b.into_static()).collect(),
194
194
-
too_big: self.too_big,
195
195
-
rebase: self.rebase,
48
48
+
Ok(VerifiedWriteOp::Update { key, cid, prev })
196
49
}
197
197
-
}
198
198
-
}
199
199
-
200
200
-
impl IntoStatic for RepoOp<'_> {
201
201
-
type Output = RepoOp<'static>;
50
50
+
RepoOpAction::Delete => {
51
51
+
let prev = op
52
52
+
.prev
53
53
+
.as_ref()
54
54
+
.ok_or_else(|| {
55
55
+
RepoError::invalid_commit(
56
56
+
"delete operation missing prev field for v1.1 validation",
57
57
+
)
58
58
+
})?
59
59
+
.to_ipld()
60
60
+
.map_err(|e| RepoError::invalid_cid_conversion(e, "delete prev"))?;
202
61
203
203
-
fn into_static(self) -> Self::Output {
204
204
-
RepoOp {
205
205
-
action: self.action.into_static(),
206
206
-
path: self.path.into_static(),
207
207
-
cid: self.cid.into_static(),
208
208
-
prev: self.prev.map(|p| p.into_static()),
62
62
+
Ok(VerifiedWriteOp::Delete { key, prev })
209
63
}
64
64
+
RepoOpAction::Other(ref action) => Err(RepoError::invalid_commit(format!(
65
65
+
"unknown action type: {}",
66
66
+
action
67
67
+
))),
210
68
}
211
69
}
212
70
···
220
78
use cid::Cid as IpldCid;
221
79
use std::sync::Arc;
222
80
223
223
-
impl<'a> FirehoseCommit<'a> {
224
224
-
/// Validate a sync v1.0 commit
225
225
-
///
226
226
-
/// **Requirements:**
227
227
-
/// - Must have previous MST state (potentially full repository)
228
228
-
/// - All blocks needed for validation must be in `self.blocks`
229
229
-
///
230
230
-
/// **Validation steps:**
231
231
-
/// 1. Parse CAR blocks from `self.blocks` into temporary storage
232
232
-
/// 2. Load commit object and verify signature
233
233
-
/// 3. Apply operations to previous MST (using temporary storage for new blocks)
234
234
-
/// 4. Verify result matches commit.data (new MST root)
235
235
-
///
236
236
-
/// Returns the new MST root CID on success.
237
237
-
pub async fn validate_v1_0<S: BlockStore + Sync + 'static>(
238
238
-
&self,
239
239
-
prev_mst_root: Option<IpldCid>,
240
240
-
prev_storage: Arc<S>,
241
241
-
pubkey: &PublicKey<'_>,
242
242
-
) -> Result<IpldCid> {
243
243
-
// 1. Parse CAR blocks from the firehose message into temporary storage
244
244
-
let parsed = parse_car_bytes(&self.blocks).await?;
245
245
-
let temp_storage = MemoryBlockStore::new_from_blocks(parsed.blocks);
81
81
+
/// Validate a sync v1.0 commit
82
82
+
///
83
83
+
/// **Requirements:**
84
84
+
/// - Must have previous MST state (potentially full repository)
85
85
+
/// - All blocks needed for validation must be in `self.blocks`
86
86
+
///
87
87
+
/// **Validation steps:**
88
88
+
/// 1. Parse CAR blocks from `self.blocks` into temporary storage
89
89
+
/// 2. Load commit object and verify signature
90
90
+
/// 3. Apply operations to previous MST (using temporary storage for new blocks)
91
91
+
/// 4. Verify result matches commit.data (new MST root)
92
92
+
///
93
93
+
/// Returns the new MST root CID on success.
94
94
+
pub async fn validate_v1_0<S: BlockStore + Sync + 'static>(
95
95
+
fh_commit: &Commit<'_>,
96
96
+
prev_mst_root: Option<IpldCid>,
97
97
+
prev_storage: Arc<S>,
98
98
+
pubkey: &PublicKey<'_>,
99
99
+
) -> Result<IpldCid> {
100
100
+
// 1. Parse CAR blocks from the firehose message into temporary storage
101
101
+
let parsed = parse_car_bytes(&fh_commit.blocks).await?;
102
102
+
let temp_storage = MemoryBlockStore::new_from_blocks(parsed.blocks);
246
103
247
247
-
// 2. Create layered storage: reads from temp first, then prev; writes to temp only
248
248
-
// This avoids copying all previous MST blocks
249
249
-
let layered_storage = LayeredBlockStore::new(temp_storage.clone(), prev_storage);
104
104
+
// 2. Create layered storage: reads from temp first, then prev; writes to temp only
105
105
+
// This avoids copying all previous MST blocks
106
106
+
let layered_storage = LayeredBlockStore::new(temp_storage.clone(), prev_storage);
250
107
251
251
-
// 3. Extract and verify commit object from temporary storage
252
252
-
let commit_cid: IpldCid = self
253
253
-
.commit
254
254
-
.to_ipld()
255
255
-
.map_err(|e| RepoError::invalid_cid_conversion(e, "commit CID"))?;
256
256
-
let commit_bytes = temp_storage
257
257
-
.get(&commit_cid)
258
258
-
.await?
259
259
-
.ok_or_else(|| RepoError::not_found("commit block", &commit_cid))?;
108
108
+
// 3. Extract and verify commit object from temporary storage
109
109
+
let commit_cid: IpldCid = fh_commit
110
110
+
.commit
111
111
+
.to_ipld()
112
112
+
.map_err(|e| RepoError::invalid_cid_conversion(e, "commit CID"))?;
113
113
+
let commit_bytes = temp_storage
114
114
+
.get(&commit_cid)
115
115
+
.await?
116
116
+
.ok_or_else(|| RepoError::not_found("commit block", &commit_cid))?;
260
117
261
261
-
let commit = super::Commit::from_cbor(&commit_bytes)?;
118
118
+
let commit = super::Commit::from_cbor(&commit_bytes)?;
262
119
263
263
-
// Verify DID matches
264
264
-
if commit.did().as_ref() != self.repo.as_ref() {
265
265
-
return Err(RepoError::invalid_commit(format!(
120
120
+
// Verify DID matches
121
121
+
if commit.did().as_ref() != fh_commit.repo.as_ref() {
122
122
+
return Err(RepoError::invalid_commit(format!(
266
123
"DID mismatch: commit has {}, message has {}",
267
124
commit.did(),
268
268
-
self.repo
125
125
+
fh_commit.repo
269
126
))
270
127
.with_help("DID mismatch indicates the commit was signed by a different identity - verify the commit is from the expected repository"));
271
271
-
}
272
272
-
273
273
-
// Verify signature
274
274
-
commit.verify(pubkey)?;
128
128
+
}
275
129
276
276
-
let layered_arc = Arc::new(layered_storage);
130
130
+
// Verify signature
131
131
+
commit.verify(pubkey)?;
277
132
278
278
-
// 4. Load previous MST state from layered storage (or start empty)
279
279
-
let prev_mst = if let Some(prev_root) = prev_mst_root {
280
280
-
Mst::load(layered_arc.clone(), prev_root, None)
281
281
-
} else {
282
282
-
Mst::new(layered_arc.clone())
283
283
-
};
133
133
+
let layered_arc = Arc::new(layered_storage);
284
134
285
285
-
// 5. Load new MST from commit.data (claimed result)
286
286
-
let expected_root = *commit.data();
287
287
-
let new_mst = Mst::load(layered_arc, expected_root, None);
135
135
+
// 4. Load previous MST state from layered storage (or start empty)
136
136
+
let prev_mst = if let Some(prev_root) = prev_mst_root {
137
137
+
Mst::load(layered_arc.clone(), prev_root, None)
138
138
+
} else {
139
139
+
Mst::new(layered_arc.clone())
140
140
+
};
288
141
289
289
-
// 6. Compute diff to get verified write ops (with actual prev values from tree state)
290
290
-
let diff = prev_mst.diff(&new_mst).await?;
291
291
-
let verified_ops = diff.to_verified_ops();
142
142
+
// 5. Load new MST from commit.data (claimed result)
143
143
+
let expected_root = *commit.data();
144
144
+
let new_mst = Mst::load(layered_arc, expected_root, None);
292
145
293
293
-
// 7. Apply verified ops to prev MST
294
294
-
let computed_mst = prev_mst.batch(&verified_ops).await?;
146
146
+
// 6. Compute diff to get verified write ops (with actual prev values from tree state)
147
147
+
let diff = prev_mst.diff(&new_mst).await?;
148
148
+
let verified_ops = diff.to_verified_ops();
295
149
296
296
-
// 8. Verify computed result matches claimed result
297
297
-
let computed_root = computed_mst.get_pointer().await?;
150
150
+
// 7. Apply verified ops to prev MST
151
151
+
let computed_mst = prev_mst.batch(&verified_ops).await?;
298
152
299
299
-
if computed_root != expected_root {
300
300
-
return Err(RepoError::cid_mismatch(format!(
301
301
-
"MST root mismatch: expected {}, got {}",
302
302
-
expected_root, computed_root
303
303
-
)));
304
304
-
}
153
153
+
// 8. Verify computed result matches claimed result
154
154
+
let computed_root = computed_mst.get_pointer().await?;
305
155
306
306
-
Ok(expected_root)
156
156
+
if computed_root != expected_root {
157
157
+
return Err(RepoError::cid_mismatch(format!(
158
158
+
"MST root mismatch: expected {}, got {}",
159
159
+
expected_root, computed_root
160
160
+
)));
307
161
}
308
162
309
309
-
/// Validate a sync v1.1 commit (inductive validation)
310
310
-
///
311
311
-
/// **Requirements:**
312
312
-
/// - `self.prev_data` must be Some (contains previous MST root)
313
313
-
/// - All blocks needed for validation must be in `self.blocks`
314
314
-
///
315
315
-
/// **Validation steps:**
316
316
-
/// 1. Parse CAR blocks from `self.blocks` into temporary storage
317
317
-
/// 2. Load commit object and verify signature
318
318
-
/// 3. Start from `prev_data` MST root (loaded from temp storage)
319
319
-
/// 4. Apply operations (with prev CID validation for updates/deletes)
320
320
-
/// 5. Verify result matches commit.data (new MST root)
321
321
-
///
322
322
-
/// Returns the new MST root CID on success.
323
323
-
///
324
324
-
/// **Inductive property:** Can validate without any external state besides the blocks
325
325
-
/// in this message. The `prev_data` field provides the starting MST root, and operations
326
326
-
/// include `prev` CIDs for validation. All necessary blocks must be in the CAR bytes.
327
327
-
///
328
328
-
/// Note: Because this uses the same merkle search tree struct as the repository itself,
329
329
-
/// this is far from the most efficient possible validation function possible. The repo
330
330
-
/// tree struct carries extra information. However,
331
331
-
/// it has the virtue of making everything self-validating.
332
332
-
pub async fn validate_v1_1(&self, pubkey: &PublicKey<'_>) -> Result<IpldCid> {
333
333
-
// 1. Require prev_data for v1.1
334
334
-
let prev_data_cid: IpldCid = self
335
335
-
.prev_data
336
336
-
.as_ref()
337
337
-
.ok_or_else(|| {
338
338
-
RepoError::invalid_commit("Sync v1.1 validation requires prev_data field")
339
339
-
})?
340
340
-
.to_ipld()
341
341
-
.map_err(|e| RepoError::invalid_cid_conversion(e, "prev_data CID"))?;
163
163
+
Ok(expected_root)
164
164
+
}
165
165
+
166
166
+
/// Validate a sync v1.1 commit (inductive validation)
167
167
+
///
168
168
+
/// **Requirements:**
169
169
+
/// - `self.prev_data` must be Some (contains previous MST root)
170
170
+
/// - All blocks needed for validation must be in `self.blocks`
171
171
+
///
172
172
+
/// **Validation steps:**
173
173
+
/// 1. Parse CAR blocks from `self.blocks` into temporary storage
174
174
+
/// 2. Load commit object and verify signature
175
175
+
/// 3. Start from `prev_data` MST root (loaded from temp storage)
176
176
+
/// 4. Apply operations (with prev CID validation for updates/deletes)
177
177
+
/// 5. Verify result matches commit.data (new MST root)
178
178
+
///
179
179
+
/// Returns the new MST root CID on success.
180
180
+
///
181
181
+
/// **Inductive property:** Can validate without any external state besides the blocks
182
182
+
/// in this message. The `prev_data` field provides the starting MST root, and operations
183
183
+
/// include `prev` CIDs for validation. All necessary blocks must be in the CAR bytes.
184
184
+
///
185
185
+
/// Note: Because this uses the same merkle search tree struct as the repository itself,
186
186
+
/// this is far from the most efficient possible validation function possible. The repo
187
187
+
/// tree struct carries extra information. However,
188
188
+
/// it has the virtue of making everything self-validating.
189
189
+
pub async fn validate_v1_1(fh_commit: &Commit<'_>, pubkey: &PublicKey<'_>) -> Result<IpldCid> {
190
190
+
// 1. Require prev_data for v1.1
191
191
+
let prev_data_cid: IpldCid = fh_commit
192
192
+
.prev_data
193
193
+
.as_ref()
194
194
+
.ok_or_else(|| RepoError::invalid_commit("Sync v1.1 validation requires prev_data field"))?
195
195
+
.to_ipld()
196
196
+
.map_err(|e| RepoError::invalid_cid_conversion(e, "prev_data CID"))?;
342
197
343
343
-
// 2. Parse CAR blocks from the firehose message into temporary storage
344
344
-
let parsed = parse_car_bytes(&self.blocks).await?;
198
198
+
// 2. Parse CAR blocks from the firehose message into temporary storage
199
199
+
let parsed = parse_car_bytes(&fh_commit.blocks).await?;
345
200
346
346
-
let temp_storage = Arc::new(MemoryBlockStore::new_from_blocks(parsed.blocks));
201
201
+
let temp_storage = Arc::new(MemoryBlockStore::new_from_blocks(parsed.blocks));
347
202
348
348
-
// 3. Extract and verify commit object from temporary storage
349
349
-
let commit_cid: IpldCid = self
350
350
-
.commit
351
351
-
.to_ipld()
352
352
-
.map_err(|e| RepoError::invalid_cid_conversion(e, "commit CID"))?;
353
353
-
let commit_bytes = temp_storage
354
354
-
.get(&commit_cid)
355
355
-
.await?
356
356
-
.ok_or_else(|| RepoError::not_found("commit block", &commit_cid))?;
203
203
+
// 3. Extract and verify commit object from temporary storage
204
204
+
let commit_cid: IpldCid = fh_commit
205
205
+
.commit
206
206
+
.to_ipld()
207
207
+
.map_err(|e| RepoError::invalid_cid_conversion(e, "commit CID"))?;
208
208
+
let commit_bytes = temp_storage
209
209
+
.get(&commit_cid)
210
210
+
.await?
211
211
+
.ok_or_else(|| RepoError::not_found("commit block", &commit_cid))?;
357
212
358
358
-
let commit = super::Commit::from_cbor(&commit_bytes)?;
213
213
+
let commit = super::Commit::from_cbor(&commit_bytes)?;
359
214
360
360
-
// Verify DID matches
361
361
-
if commit.did().as_ref() != self.repo.as_ref() {
362
362
-
return Err(RepoError::invalid_commit(format!(
215
215
+
// Verify DID matches
216
216
+
if commit.did().as_ref() != fh_commit.repo.as_ref() {
217
217
+
return Err(RepoError::invalid_commit(format!(
363
218
"DID mismatch: commit has {}, message has {}",
364
219
commit.did(),
365
365
-
self.repo
220
220
+
fh_commit.repo
366
221
))
367
222
.with_help("DID mismatch indicates the commit was signed by a different identity - verify the commit is from the expected repository"));
368
368
-
}
223
223
+
}
369
224
370
370
-
// Verify signature
371
371
-
commit.verify(pubkey)?;
225
225
+
// Verify signature
226
226
+
commit.verify(pubkey)?;
372
227
373
373
-
// 5. Load new MST from commit.data (claimed result)
374
374
-
let expected_root = *commit.data();
228
228
+
// 5. Load new MST from commit.data (claimed result)
229
229
+
let expected_root = *commit.data();
375
230
376
376
-
let mut new_mst = Mst::load(temp_storage, expected_root, None);
231
231
+
let mut new_mst = Mst::load(temp_storage, expected_root, None);
377
232
378
378
-
let verified_ops = self
379
379
-
.ops
380
380
-
.iter()
381
381
-
.filter_map(|op| op.to_invertible_op().ok())
382
382
-
.collect::<Vec<_>>();
383
383
-
if verified_ops.len() != self.ops.len() {
384
384
-
return Err(RepoError::invalid_commit(format!(
385
385
-
"Invalid commit: expected {} ops, got {}",
386
386
-
self.ops.len(),
387
387
-
verified_ops.len()
388
388
-
)));
389
389
-
}
233
233
+
let verified_ops = fh_commit
234
234
+
.ops
235
235
+
.iter()
236
236
+
.filter_map(|op| to_invertible_op(op).ok())
237
237
+
.collect::<Vec<_>>();
238
238
+
if verified_ops.len() != fh_commit.ops.len() {
239
239
+
return Err(RepoError::invalid_commit(format!(
240
240
+
"Invalid commit: expected {} ops, got {}",
241
241
+
fh_commit.ops.len(),
242
242
+
verified_ops.len()
243
243
+
)));
244
244
+
}
390
245
391
391
-
for op in verified_ops {
392
392
-
if let Ok(inverted) = new_mst.invert_op(op.clone()).await {
393
393
-
if !inverted {
394
394
-
return Err(RepoError::invalid_commit(format!(
395
395
-
"Invalid commit: op {:?} is not invertible",
396
396
-
op
397
397
-
)));
398
398
-
}
246
246
+
for op in verified_ops {
247
247
+
if let Ok(inverted) = new_mst.invert_op(op.clone()).await {
248
248
+
if !inverted {
249
249
+
return Err(RepoError::invalid_commit(format!(
250
250
+
"Invalid commit: op {:?} is not invertible",
251
251
+
op
252
252
+
)));
399
253
}
400
254
}
401
401
-
// 8. Verify computed previous state matches claimed previous state
402
402
-
let computed_root = new_mst.get_pointer().await?;
255
255
+
}
256
256
+
// 8. Verify computed previous state matches claimed previous state
257
257
+
let computed_root = new_mst.get_pointer().await?;
403
258
404
404
-
if computed_root != prev_data_cid {
405
405
-
return Err(RepoError::cid_mismatch(format!(
406
406
-
"MST root mismatch: expected {}, got {}",
407
407
-
prev_data_cid, computed_root
408
408
-
)));
409
409
-
}
259
259
+
if computed_root != prev_data_cid {
260
260
+
return Err(RepoError::cid_mismatch(format!(
261
261
+
"MST root mismatch: expected {}, got {}",
262
262
+
prev_data_cid, computed_root
263
263
+
)));
264
264
+
}
410
265
411
411
-
Ok(expected_root)
412
412
-
}
266
266
+
Ok(expected_root)
413
267
}
414
268
415
269
#[cfg(test)]
···
419
273
use crate::commit::Commit;
420
274
use crate::mst::{Mst, RecordWriteOp};
421
275
use crate::storage::MemoryBlockStore;
276
276
+
use jacquard_common::IntoStatic;
422
277
use jacquard_common::types::crypto::{KeyCodec, PublicKey};
278
278
+
use jacquard_common::types::did::Did;
423
279
use jacquard_common::types::recordkey::Rkey;
424
424
-
use jacquard_common::types::string::{Nsid, RecordKey};
280
280
+
use jacquard_common::types::string::{Datetime, Nsid, RecordKey};
425
281
use jacquard_common::types::tid::Ticker;
426
282
use jacquard_common::types::value::RawData;
427
283
use smol_str::SmolStr;
···
507
363
.unwrap();
508
364
509
365
// Validate using v1.1 validation
510
510
-
let result = firehose_commit.validate_v1_1(&pubkey).await;
366
366
+
let result = validate_v1_1(&firehose_commit, &pubkey).await;
511
367
if let Err(ref e) = result {
512
368
eprintln!("Validation error: {}", e);
513
369
}
···
560
416
firehose_commit.prev_data = None;
561
417
562
418
// Validate using v1.0 validation with previous storage
563
563
-
let result = firehose_commit
564
564
-
.validate_v1_0(Some(prev_root), storage.clone(), &pubkey)
565
565
-
.await;
419
419
+
let result =
420
420
+
validate_v1_0(&firehose_commit, Some(prev_root), storage.clone(), &pubkey).await;
566
421
567
422
assert!(result.is_ok(), "Valid v1.0 commit should pass validation");
568
423
···
612
467
.await
613
468
.unwrap();
614
469
615
615
-
let result = firehose_commit.validate_v1_1(&pubkey).await;
470
470
+
let result = validate_v1_1(&firehose_commit, &pubkey).await;
616
471
assert!(result.is_ok(), "Multiple creates should validate");
617
472
}
618
473
···
685
540
.await
686
541
.unwrap();
687
542
688
688
-
let result = firehose_commit.validate_v1_1(&pubkey).await;
543
543
+
let result = validate_v1_1(&firehose_commit, &pubkey).await;
689
544
assert!(
690
545
result.is_ok(),
691
546
"Update and delete operations should validate"
···
740
595
741
596
firehose_commit.blocks = bad_car.into();
742
597
743
743
-
let result = firehose_commit.validate_v1_1(&pubkey).await;
598
598
+
let result = validate_v1_1(&firehose_commit, &pubkey).await;
744
599
assert!(
745
600
result.is_err(),
746
601
"Validation should fail when commit block is missing"
···
802
657
803
658
firehose_commit.blocks = bad_car.into();
804
659
805
805
-
let result = firehose_commit.validate_v1_1(&pubkey).await;
660
660
+
let result = validate_v1_1(&firehose_commit, &pubkey).await;
806
661
assert!(
807
662
result.is_err(),
808
663
"Validation should fail when MST blocks are missing"
···
863
718
.await
864
719
.unwrap();
865
720
866
866
-
let result = firehose_commit.validate_v1_1(&pubkey).await;
721
721
+
let result = validate_v1_1(&firehose_commit, &pubkey).await;
867
722
assert!(
868
723
result.is_err(),
869
724
"Validation should fail when commit has wrong MST root"
···
905
760
906
761
firehose_commit.repo = wrong_did;
907
762
908
908
-
let result = firehose_commit.validate_v1_1(&pubkey).await;
763
763
+
let result = validate_v1_1(&firehose_commit, &pubkey).await;
909
764
assert!(
910
765
result.is_err(),
911
766
"Validation should fail with mismatched DID"
···
952
807
.await
953
808
.unwrap();
954
809
955
955
-
let result = firehose_commit.validate_v1_1(&wrong_pubkey).await;
810
810
+
let result = validate_v1_1(&firehose_commit, &wrong_pubkey).await;
956
811
assert!(
957
812
result.is_err(),
958
813
"Validation should fail with wrong public key"
···
993
848
// Strip prev_data to make it invalid for v1.1
994
849
firehose_commit.prev_data = None;
995
850
996
996
-
let result = firehose_commit.validate_v1_1(&pubkey).await;
851
851
+
let result = validate_v1_1(&firehose_commit, &pubkey).await;
997
852
assert!(
998
853
result.is_err(),
999
854
"v1.1 validation should fail without prev_data"
···
1040
895
// Use wrong prev_data CID (point to commit instead of MST root)
1041
896
firehose_commit.prev_data = Some(firehose_commit.commit.clone());
1042
897
1043
1043
-
let result = firehose_commit.validate_v1_1(&pubkey).await;
898
898
+
let result = validate_v1_1(&firehose_commit, &pubkey).await;
1044
899
assert!(
1045
900
result.is_err(),
1046
901
"Validation should fail with wrong prev_data CID"
+19
-7
crates/jacquard-repo/src/mst/diff.rs
···
170
170
path: key.as_str().into(),
171
171
cid: Some(CidLink::from(*cid)),
172
172
prev: None,
173
173
+
extra_data: None,
173
174
});
174
175
}
175
176
···
180
181
path: key.as_str().into(),
181
182
cid: Some(CidLink::from(*new_cid)),
182
183
prev: Some(CidLink::from(*old_cid)),
184
184
+
extra_data: None,
183
185
});
184
186
}
185
187
···
190
192
path: key.as_str().into(),
191
193
cid: None, // null for deletes
192
194
prev: Some(CidLink::from(*old_cid)),
195
195
+
extra_data: None,
193
196
});
194
197
}
195
198
···
220
223
// Remove duplicate blocks: nodes that appear in both new_mst_blocks and removed_mst_blocks
221
224
// are unchanged nodes that were traversed during the diff but shouldn't be counted as created/deleted.
222
225
// This happens when we step into subtrees with different parent CIDs but encounter identical child nodes.
223
223
-
let created_set: std::collections::HashSet<_> = diff.new_mst_blocks.keys().copied().collect();
224
224
-
let removed_set: std::collections::HashSet<_> = diff.removed_mst_blocks.iter().copied().collect();
225
225
-
let duplicates: std::collections::HashSet<_> = created_set.intersection(&removed_set).copied().collect();
226
226
+
let created_set: std::collections::HashSet<_> =
227
227
+
diff.new_mst_blocks.keys().copied().collect();
228
228
+
let removed_set: std::collections::HashSet<_> =
229
229
+
diff.removed_mst_blocks.iter().copied().collect();
230
230
+
let duplicates: std::collections::HashSet<_> =
231
231
+
created_set.intersection(&removed_set).copied().collect();
226
232
227
227
-
diff.new_mst_blocks.retain(|cid, _| !duplicates.contains(cid));
228
228
-
diff.removed_mst_blocks.retain(|cid| !duplicates.contains(cid));
233
233
+
diff.new_mst_blocks
234
234
+
.retain(|cid, _| !duplicates.contains(cid));
235
235
+
diff.removed_mst_blocks
236
236
+
.retain(|cid| !duplicates.contains(cid));
229
237
230
238
Ok(diff)
231
239
}
···
420
428
// Serialize the MST node
421
429
let entries = tree.get_entries().await?;
422
430
let node_data = serialize_node_data(&entries).await?;
423
423
-
let cbor = serde_ipld_dagcbor::to_vec(&node_data)
424
424
-
.map_err(|e| RepoError::serialization(e).with_context(format!("serializing MST node for diff tracking: {}", tree_cid)))?;
431
431
+
let cbor = serde_ipld_dagcbor::to_vec(&node_data).map_err(|e| {
432
432
+
RepoError::serialization(e).with_context(format!(
433
433
+
"serializing MST node for diff tracking: {}",
434
434
+
tree_cid
435
435
+
))
436
436
+
})?;
425
437
426
438
// Track the serialized block
427
439
diff.new_mst_blocks.insert(tree_cid, Bytes::from(cbor));
+2
-1
crates/jacquard-repo/src/repo.rs
···
82
82
repo: repo.clone().into_static(),
83
83
rev: self.rev.clone(),
84
84
seq,
85
85
-
since: self.since.clone().unwrap_or_else(|| self.rev.clone()),
85
85
+
since: Some(self.since.clone().unwrap_or_else(|| self.rev.clone())),
86
86
time,
87
87
commit: CidLink::from(self.cid),
88
88
blocks: blocks_car.into(),
···
91
91
blobs,
92
92
too_big: false,
93
93
rebase: false,
94
94
+
extra_data: None,
94
95
})
95
96
}
96
97
}
+6
-9
crates/jacquard-repo/tests/large_proof_tests.rs
···
10
10
use jacquard_common::types::value::RawData;
11
11
use jacquard_repo::Repository;
12
12
use jacquard_repo::car::read_car_header;
13
13
+
use jacquard_repo::commit::firehose::validate_v1_1;
13
14
use jacquard_repo::mst::RecordWriteOp;
14
15
use jacquard_repo::storage::{BlockStore, MemoryBlockStore};
15
16
use rand::Rng;
···
224
225
.await
225
226
.unwrap();
226
227
227
227
-
firehose_commit
228
228
-
.validate_v1_1(&pubkey)
228
228
+
validate_v1_1(&firehose_commit, &pubkey)
229
229
.await
230
230
.expect("Initial batch should validate");
231
231
···
266
266
.await
267
267
.unwrap();
268
268
269
269
-
firehose_commit
270
270
-
.validate_v1_1(&pubkey)
269
269
+
validate_v1_1(&firehose_commit, &pubkey)
271
270
.await
272
271
.unwrap_or_else(|e| {
273
272
eprintln!(
···
336
335
.await
337
336
.unwrap();
338
337
339
339
-
firehose_commit.validate_v1_1(&pubkey).await.unwrap();
338
338
+
validate_v1_1(&firehose_commit, &pubkey).await.unwrap();
340
339
341
340
for batch_num in 1..=5000 {
342
341
let batch_size = rng.gen_range(1..=20);
···
355
354
.await
356
355
.unwrap();
357
356
358
358
-
firehose_commit
359
359
-
.validate_v1_1(&pubkey)
357
357
+
validate_v1_1(&firehose_commit, &pubkey)
360
358
.await
361
359
.unwrap_or_else(|e| {
362
360
panic!(
···
441
439
.await
442
440
.unwrap();
443
441
444
444
-
firehose_commit
445
445
-
.validate_v1_1(&pubkey)
442
442
+
validate_v1_1(&firehose_commit, &pubkey)
446
443
.await
447
444
.unwrap_or_else(|e| panic!("Fixture validation failed at batch {}: {}", batch_num, e));
448
445
}