A better Rust ATProto crate

down to 4.5% oversupply validating against retr0id's test suite and figuring out the pattern

Orual 3f2c672b 2a8ccfe2

Changed files
+774 -9
crates
+2
Cargo.lock
··· 2579 2579 name = "jacquard-repo" 2580 2580 version = "0.7.0" 2581 2581 dependencies = [ 2582 + "anyhow", 2582 2583 "bytes", 2583 2584 "cid", 2584 2585 "ed25519-dalek", ··· 2597 2598 "serde_bytes", 2598 2599 "serde_ipld_dagcbor", 2599 2600 "serde_ipld_dagjson", 2601 + "serde_json", 2600 2602 "sha2", 2601 2603 "smol_str", 2602 2604 "tempfile",
+3 -1
crates/jacquard-repo/Cargo.toml
··· 56 56 57 57 [dev-dependencies] 58 58 serde_ipld_dagjson = "0.2" 59 - tokio = { workspace = true, features = ["macros", "rt", "rt-multi-thread"] } 59 + tokio = { workspace = true, features = ["macros", "rt", "rt-multi-thread", "fs"] } 60 60 tempfile = "3.14" 61 61 rand = "0.8" 62 62 hex = "0.4" 63 + anyhow = "1.0" 64 + serde_json = "1.0" 63 65 64 66 [package.metadata.docs.rs] 65 67 all-features = true
+11
crates/jacquard-repo/src/mst/diff.rs
··· 216 216 pub async fn diff(&self, other: &Mst<S>) -> Result<MstDiff> { 217 217 let mut diff = MstDiff::new(); 218 218 diff_recursive(self, other, &mut diff).await?; 219 + 220 + // Remove duplicate blocks: nodes that appear in both new_mst_blocks and removed_mst_blocks 221 + // are unchanged nodes that were traversed during the diff but shouldn't be counted as created/deleted. 222 + // This happens when we step into subtrees with different parent CIDs but encounter identical child nodes. 223 + let created_set: std::collections::HashSet<_> = diff.new_mst_blocks.keys().copied().collect(); 224 + let removed_set: std::collections::HashSet<_> = diff.removed_mst_blocks.iter().copied().collect(); 225 + let duplicates: std::collections::HashSet<_> = created_set.intersection(&removed_set).copied().collect(); 226 + 227 + diff.new_mst_blocks.retain(|cid, _| !duplicates.contains(cid)); 228 + diff.removed_mst_blocks.retain(|cid| !duplicates.contains(cid)); 229 + 219 230 Ok(diff) 220 231 } 221 232 }
+21 -8
crates/jacquard-repo/src/repo.rs
··· 469 469 .collect(); 470 470 471 471 // Step 4: Build blocks and relevant_blocks collections using diff tracking 472 + // 473 + // CRITICAL: This logic is validated against 16384 test cases in tests/mst_diff_suite.rs 474 + // Any changes here MUST pass that test (zero missing blocks required for inductive validation) 475 + // 476 + // Inductive validation requirements (sync v1.1): 477 + // - Include MST nodes along operation paths in BOTH old and new trees 478 + // - Filter out deleted MST blocks (they're in removed_mst_blocks) 479 + // - Include all new record data (leaf_blocks) 472 480 let mut blocks = diff.new_mst_blocks; 481 + blocks.extend(leaf_blocks.clone()); // Include record data blocks 473 482 let mut relevant_blocks = BTreeMap::new(); 483 + relevant_blocks.extend(leaf_blocks); // Include record data in relevant blocks too 474 484 475 485 for op in ops { 476 486 let key = format_smolstr!("{}/{}", op.collection().as_ref(), op.rkey().as_ref()); 487 + // New tree path (inclusion proof for creates/updates, exclusion for deletes) 477 488 updated_tree 478 489 .blocks_for_path(&key, &mut relevant_blocks) 479 490 .await?; 480 491 481 - // For CREATE ops in multi-op commits, include old tree paths. 482 - // Empirically necessary: tree restructuring from multiple creates 483 - // can access old MST nodes during inversion (reason TBD). 484 - if let RecordWriteOp::Create { .. } = op 485 - && ops.len() > 1 486 - { 487 - self.mst.blocks_for_path(&key, &mut relevant_blocks).await?; 488 - } 492 + // Old tree path (needed for inductive validation) 493 + // - CREATE: exclusion proof (key didn't exist) 494 + // - UPDATE: show what changed 495 + // - DELETE: show what was deleted 496 + self.mst.blocks_for_path(&key, &mut relevant_blocks).await?; 489 497 } 498 + 499 + // Filter out deleted blocks before combining 500 + let removed_set: std::collections::HashSet<_> = 501 + diff.removed_mst_blocks.iter().copied().collect(); 502 + relevant_blocks.retain(|cid, _| !removed_set.contains(cid)); 490 503 491 504 let deleted_cids = diff.removed_cids; 492 505
+210
crates/jacquard-repo/tests/mst_diff_debug.rs
··· 1 + //! Debug test for inspecting MST diff block tracking 2 + //! 3 + //! Loads a specific failing test case and shows exactly which blocks we compute 4 + //! vs what's expected. 5 + 6 + use jacquard_repo::mst::Mst; 7 + use jacquard_repo::storage::MemoryBlockStore; 8 + use jacquard_repo::car::parse_car_bytes; 9 + use std::collections::{BTreeMap, BTreeSet}; 10 + use std::path::Path; 11 + use cid::Cid as IpldCid; 12 + use bytes::Bytes; 13 + use serde::{Deserialize, Serialize}; 14 + use std::sync::Arc; 15 + 16 + const TEST_SUITE_PATH: &str = "/home/orual/Git_Repos/mst-test-suite"; 17 + 18 + #[derive(Debug, Deserialize, Serialize)] 19 + struct MstDiffTestCase { 20 + #[serde(rename = "$type")] 21 + test_type: String, 22 + description: String, 23 + inputs: TestInputs, 24 + results: ExpectedResults, 25 + } 26 + 27 + #[derive(Debug, Deserialize, Serialize)] 28 + struct TestInputs { 29 + mst_a: String, 30 + mst_b: String, 31 + } 32 + 33 + #[derive(Debug, Deserialize, Serialize)] 34 + struct ExpectedResults { 35 + created_nodes: Vec<String>, 36 + deleted_nodes: Vec<String>, 37 + record_ops: Vec<serde_json::Value>, 38 + proof_nodes: Vec<String>, 39 + inductive_proof_nodes: Vec<String>, 40 + #[serde(skip_serializing_if = "Option::is_none")] 41 + firehose_cids: Option<serde_json::Value>, 42 + } 43 + 44 + async fn load_car(path: &Path) -> anyhow::Result<(IpldCid, BTreeMap<IpldCid, Bytes>)> { 45 + let bytes = tokio::fs::read(path).await?; 46 + let parsed = parse_car_bytes(&bytes).await?; 47 + Ok((parsed.root, parsed.blocks)) 48 + } 49 + 50 + fn cid_to_string(cid: &IpldCid) -> String { 51 + cid.to_string() 52 + } 53 + 54 + #[tokio::test] 55 + #[ignore] // Local-only: requires mst-test-suite at /home/orual/Git_Repos/mst-test-suite 56 + async fn debug_exhaustive_001_009() { 57 + let suite_root = Path::new(TEST_SUITE_PATH); 58 + let test_path = suite_root.join("tests/diff/exhaustive/exhaustive_001_009.json"); 59 + 60 + // Load test case 61 + let test_json = tokio::fs::read_to_string(&test_path).await.unwrap(); 62 + let test_case: MstDiffTestCase = serde_json::from_str(&test_json).unwrap(); 63 + 64 + // Load CAR files 65 + let car_a_path = suite_root.join(&test_case.inputs.mst_a); 66 + let car_b_path = suite_root.join(&test_case.inputs.mst_b); 67 + 68 + let (root_a, blocks_a) = load_car(&car_a_path).await.unwrap(); 69 + let (root_b, blocks_b) = load_car(&car_b_path).await.unwrap(); 70 + 71 + // Create storage 72 + let mut all_blocks = blocks_a; 73 + all_blocks.extend(blocks_b); 74 + let storage = Arc::new(MemoryBlockStore::new_from_blocks(all_blocks)); 75 + 76 + // Load MSTs 77 + let mst_a = Mst::load(storage.clone(), root_a, None); 78 + let mst_b = Mst::load(storage.clone(), root_b, None); 79 + 80 + // Compute diff 81 + let diff = mst_a.diff(&mst_b).await.unwrap(); 82 + 83 + // Replicate create_commit's relevant_blocks logic 84 + let mut relevant_blocks = BTreeMap::new(); 85 + let ops_count = diff.creates.len() + diff.updates.len() + diff.deletes.len(); 86 + 87 + for (key, _cid) in &diff.creates { 88 + mst_b.blocks_for_path(key.as_str(), &mut relevant_blocks).await.unwrap(); 89 + if ops_count > 1 { 90 + mst_a.blocks_for_path(key.as_str(), &mut relevant_blocks).await.unwrap(); 91 + } 92 + } 93 + 94 + for (key, _new_cid, _old_cid) in &diff.updates { 95 + mst_b.blocks_for_path(key.as_str(), &mut relevant_blocks).await.unwrap(); 96 + } 97 + 98 + for (key, _old_cid) in &diff.deletes { 99 + mst_b.blocks_for_path(key.as_str(), &mut relevant_blocks).await.unwrap(); 100 + } 101 + 102 + // Filter out removed blocks before combining 103 + let removed_set: std::collections::HashSet<_> = diff.removed_mst_blocks.iter().copied().collect(); 104 + let filtered_relevant: BTreeMap<_, _> = relevant_blocks 105 + .into_iter() 106 + .filter(|(cid, _)| !removed_set.contains(cid)) 107 + .collect(); 108 + 109 + let mut all_proof_blocks = diff.new_mst_blocks.clone(); 110 + all_proof_blocks.extend(filtered_relevant); 111 + 112 + // Compare created_nodes 113 + let actual_created: BTreeSet<String> = diff 114 + .new_mst_blocks 115 + .keys() 116 + .map(cid_to_string) 117 + .collect(); 118 + let expected_created: BTreeSet<String> = test_case 119 + .results 120 + .created_nodes 121 + .iter() 122 + .cloned() 123 + .collect(); 124 + 125 + println!("\n=== Created Nodes ==="); 126 + println!("Expected ({} blocks):", expected_created.len()); 127 + for cid in &expected_created { 128 + println!(" {}", cid); 129 + } 130 + println!("\nActual ({} blocks):", actual_created.len()); 131 + for cid in &actual_created { 132 + let marker = if expected_created.contains(cid) { " " } else { "* EXTRA" }; 133 + println!(" {}{}", cid, marker); 134 + } 135 + 136 + // Compare deleted_nodes 137 + let actual_deleted: BTreeSet<String> = diff 138 + .removed_mst_blocks 139 + .iter() 140 + .map(cid_to_string) 141 + .collect(); 142 + let expected_deleted: BTreeSet<String> = test_case 143 + .results 144 + .deleted_nodes 145 + .iter() 146 + .cloned() 147 + .collect(); 148 + 149 + println!("\n=== Deleted Nodes ==="); 150 + println!("Expected ({} blocks):", expected_deleted.len()); 151 + for cid in &expected_deleted { 152 + println!(" {}", cid); 153 + } 154 + println!("\nActual ({} blocks):", actual_deleted.len()); 155 + for cid in &actual_deleted { 156 + let marker = if expected_deleted.contains(cid) { " " } else { "* EXTRA" }; 157 + println!(" {}{}", cid, marker); 158 + } 159 + 160 + // Show record operations 161 + println!("\n=== Record Operations ==="); 162 + println!("Creates: {}", diff.creates.len()); 163 + for (key, cid) in &diff.creates { 164 + println!(" CREATE {} -> {}", key, cid_to_string(cid)); 165 + } 166 + println!("Updates: {}", diff.updates.len()); 167 + for (key, new_cid, old_cid) in &diff.updates { 168 + println!(" UPDATE {} {} -> {}", key, cid_to_string(old_cid), cid_to_string(new_cid)); 169 + } 170 + println!("Deletes: {}", diff.deletes.len()); 171 + for (key, cid) in &diff.deletes { 172 + println!(" DELETE {} (was {})", key, cid_to_string(cid)); 173 + } 174 + 175 + // Show proof nodes comparison 176 + println!("\n=== Proof Nodes (for reference) ==="); 177 + println!("Expected proof_nodes ({} blocks):", test_case.results.proof_nodes.len()); 178 + for cid in &test_case.results.proof_nodes { 179 + println!(" {}", cid); 180 + } 181 + 182 + println!("\nExpected inductive_proof_nodes ({} blocks):", test_case.results.inductive_proof_nodes.len()); 183 + for cid in &test_case.results.inductive_proof_nodes { 184 + let marker = if test_case.results.proof_nodes.contains(cid) { " " } else { "* EXTRA for inductive" }; 185 + println!(" {}{}", cid, marker); 186 + } 187 + 188 + println!("\n=== Our Computed Proof (all_proof_blocks) ==="); 189 + let computed_proof: BTreeSet<String> = all_proof_blocks.keys().map(cid_to_string).collect(); 190 + let expected_inductive: BTreeSet<String> = test_case.results.inductive_proof_nodes.iter().cloned().collect(); 191 + 192 + println!("Computed ({} blocks):", computed_proof.len()); 193 + for cid in &computed_proof { 194 + let marker = if expected_inductive.contains(cid) { 195 + "" 196 + } else { 197 + " * EXTRA (not in expected)" 198 + }; 199 + println!(" {}{}", cid, marker); 200 + } 201 + 202 + println!("\nMissing from our computation:"); 203 + for cid in &expected_inductive { 204 + if !computed_proof.contains(cid) { 205 + println!(" {} * MISSING", cid); 206 + } 207 + } 208 + 209 + // Don't fail the test, just show info 210 + }
+527
crates/jacquard-repo/tests/mst_diff_suite.rs
··· 1 + //! MST diff test suite runner 2 + //! 3 + //! Runs the mst-test-suite exhaustive diff test cases to validate: 4 + //! - record_ops (creates/updates/deletes with CIDs) 5 + //! - created_nodes (new MST blocks) 6 + //! - deleted_nodes (removed MST blocks) 7 + //! - proof_nodes (blocks needed for inclusion/exclusion proofs) 8 + //! - inductive_proof_nodes (blocks needed for inductive validation) 9 + 10 + use bytes::Bytes; 11 + use cid::Cid as IpldCid; 12 + use jacquard_repo::car::parse_car_bytes; 13 + use jacquard_repo::mst::{Mst, MstDiff}; 14 + use jacquard_repo::storage::MemoryBlockStore; 15 + use serde::{Deserialize, Serialize}; 16 + use std::collections::{BTreeMap, BTreeSet}; 17 + use std::path::{Path, PathBuf}; 18 + use std::sync::Arc; 19 + 20 + const TEST_SUITE_PATH: &str = "/home/orual/Git_Repos/mst-test-suite"; 21 + 22 + /// Test case format from mst-test-suite 23 + #[derive(Debug, Deserialize, Serialize)] 24 + struct MstDiffTestCase { 25 + #[serde(rename = "$type")] 26 + test_type: String, 27 + 28 + description: String, 29 + 30 + inputs: TestInputs, 31 + 32 + results: ExpectedResults, 33 + } 34 + 35 + #[derive(Debug, Deserialize, Serialize)] 36 + struct TestInputs { 37 + /// Path to CAR file for tree A (relative to test suite root) 38 + mst_a: String, 39 + 40 + /// Path to CAR file for tree B (relative to test suite root) 41 + mst_b: String, 42 + } 43 + 44 + #[derive(Debug, Deserialize, Serialize)] 45 + struct ExpectedResults { 46 + /// CIDs of newly created MST node blocks 47 + created_nodes: Vec<String>, 48 + 49 + /// CIDs of deleted MST node blocks 50 + deleted_nodes: Vec<String>, 51 + 52 + /// Record operations (sorted by rpath) 53 + record_ops: Vec<RecordOp>, 54 + 55 + /// CIDs of MST nodes required for inclusion/exclusion proofs 56 + proof_nodes: Vec<String>, 57 + 58 + /// CIDs of MST nodes required for inductive validation 59 + inductive_proof_nodes: Vec<String>, 60 + 61 + /// CIDs expected in firehose broadcast (mostly marked TODO in fixtures) 62 + #[serde(skip_serializing_if = "Option::is_none")] 63 + firehose_cids: Option<serde_json::Value>, 64 + } 65 + 66 + #[derive(Debug, Deserialize, Serialize, PartialEq, Eq, PartialOrd, Ord)] 67 + struct RecordOp { 68 + /// Record path (rpath) 69 + rpath: String, 70 + 71 + /// Old CID (null for creates) 72 + old_value: Option<String>, 73 + 74 + /// New CID (null for deletes) 75 + new_value: Option<String>, 76 + } 77 + 78 + /// Load and parse a CAR file, returning blocks and root CID 79 + async fn load_car(path: &Path) -> anyhow::Result<(IpldCid, BTreeMap<IpldCid, Bytes>)> { 80 + let bytes = tokio::fs::read(path).await?; 81 + let parsed = parse_car_bytes(&bytes).await?; 82 + Ok((parsed.root, parsed.blocks)) 83 + } 84 + 85 + /// Convert base32 CID string to IpldCid 86 + fn parse_cid(cid_str: &str) -> anyhow::Result<IpldCid> { 87 + Ok(cid_str.parse()?) 88 + } 89 + 90 + /// Convert IpldCid to base32 string (for comparison) 91 + fn cid_to_string(cid: &IpldCid) -> String { 92 + cid.to_string() 93 + } 94 + 95 + /// Find all .json test files in a directory recursively 96 + fn find_test_files(dir: &Path) -> std::io::Result<Vec<PathBuf>> { 97 + let mut test_files = Vec::new(); 98 + 99 + if dir.is_dir() { 100 + for entry in std::fs::read_dir(dir)? { 101 + let entry = entry?; 102 + let path = entry.path(); 103 + 104 + if path.is_dir() { 105 + test_files.extend(find_test_files(&path)?); 106 + } else if path.extension().and_then(|s| s.to_str()) == Some("json") { 107 + test_files.push(path); 108 + } 109 + } 110 + } 111 + 112 + Ok(test_files) 113 + } 114 + 115 + /// Run a single test case 116 + async fn run_test_case(test_path: &Path, suite_root: &Path) -> anyhow::Result<TestResult> { 117 + // Parse test case JSON 118 + let test_json = tokio::fs::read_to_string(test_path).await?; 119 + let test_case: MstDiffTestCase = serde_json::from_str(&test_json)?; 120 + 121 + // Load CAR files 122 + let car_a_path = suite_root.join(&test_case.inputs.mst_a); 123 + let car_b_path = suite_root.join(&test_case.inputs.mst_b); 124 + 125 + let (root_a, blocks_a) = load_car(&car_a_path).await?; 126 + let (root_b, blocks_b) = load_car(&car_b_path).await?; 127 + 128 + // Create storage with both sets of blocks 129 + let mut all_blocks = blocks_a; 130 + all_blocks.extend(blocks_b); 131 + let storage = Arc::new(MemoryBlockStore::new_from_blocks(all_blocks)); 132 + 133 + // Load MST instances 134 + let mst_a = Mst::load(storage.clone(), root_a, None); 135 + let mst_b = Mst::load(storage.clone(), root_b, None); 136 + 137 + // Compute diff 138 + let diff = mst_a.diff(&mst_b).await?; 139 + 140 + // Replicate create_commit's relevant_blocks logic (from repo.rs:276-290) 141 + let mut relevant_blocks = BTreeMap::new(); 142 + let ops_count = diff.creates.len() + diff.updates.len() + diff.deletes.len(); 143 + 144 + // For each operation, collect blocks along the path in BOTH trees for inductive validation 145 + for (key, _cid) in &diff.creates { 146 + mst_b 147 + .blocks_for_path(key.as_str(), &mut relevant_blocks) 148 + .await?; 149 + // Always include old tree paths for CREATE (needed for exclusion proof) 150 + mst_a 151 + .blocks_for_path(key.as_str(), &mut relevant_blocks) 152 + .await?; 153 + } 154 + 155 + for (key, _new_cid, _old_cid) in &diff.updates { 156 + mst_b 157 + .blocks_for_path(key.as_str(), &mut relevant_blocks) 158 + .await?; 159 + // Include old tree paths for UPDATE (needed for inductive validation) 160 + mst_a 161 + .blocks_for_path(key.as_str(), &mut relevant_blocks) 162 + .await?; 163 + } 164 + 165 + for (key, _old_cid) in &diff.deletes { 166 + mst_b 167 + .blocks_for_path(key.as_str(), &mut relevant_blocks) 168 + .await?; 169 + // Include old tree paths for DELETE (needed for inductive validation) 170 + mst_a 171 + .blocks_for_path(key.as_str(), &mut relevant_blocks) 172 + .await?; 173 + } 174 + 175 + // Union of new_mst_blocks and relevant_blocks (for inductive proof) 176 + // NOTE: relevant_blocks may contain blocks from both old and new trees, 177 + // but we should exclude blocks that were deleted (in removed_mst_blocks) 178 + let removed_set: std::collections::HashSet<_> = 179 + diff.removed_mst_blocks.iter().copied().collect(); 180 + let filtered_relevant: BTreeMap<_, _> = relevant_blocks 181 + .into_iter() 182 + .filter(|(cid, _)| !removed_set.contains(cid)) 183 + .collect(); 184 + 185 + let mut all_proof_blocks = diff.new_mst_blocks.clone(); 186 + all_proof_blocks.extend(filtered_relevant); 187 + 188 + // Validate results 189 + let mut result = TestResult { 190 + test_name: test_path.file_name().unwrap().to_string_lossy().to_string(), 191 + description: test_case.description.clone(), 192 + passed: true, 193 + record_ops_match: false, 194 + created_nodes_match: false, 195 + deleted_nodes_match: false, 196 + proof_nodes_info: None, 197 + inductive_proof_nodes_info: None, 198 + errors: Vec::new(), 199 + }; 200 + 201 + // Validate record_ops 202 + let actual_ops = diff_to_record_ops(&diff); 203 + let expected_ops = test_case.results.record_ops; 204 + result.record_ops_match = actual_ops == expected_ops; 205 + if !result.record_ops_match { 206 + result.errors.push(format!( 207 + "Record ops mismatch: expected {} ops, got {}", 208 + expected_ops.len(), 209 + actual_ops.len() 210 + )); 211 + result.passed = false; 212 + } 213 + 214 + // Validate created_nodes 215 + let actual_created: BTreeSet<String> = diff.new_mst_blocks.keys().map(cid_to_string).collect(); 216 + let expected_created: BTreeSet<String> = 217 + test_case.results.created_nodes.iter().cloned().collect(); 218 + result.created_nodes_match = actual_created == expected_created; 219 + if !result.created_nodes_match { 220 + result.errors.push(format!( 221 + "Created nodes mismatch: expected {}, got {}", 222 + expected_created.len(), 223 + actual_created.len() 224 + )); 225 + result.passed = false; 226 + } 227 + 228 + // Validate deleted_nodes 229 + let actual_deleted: BTreeSet<String> = 230 + diff.removed_mst_blocks.iter().map(cid_to_string).collect(); 231 + let expected_deleted: BTreeSet<String> = 232 + test_case.results.deleted_nodes.iter().cloned().collect(); 233 + result.deleted_nodes_match = actual_deleted == expected_deleted; 234 + if !result.deleted_nodes_match { 235 + result.errors.push(format!( 236 + "Deleted nodes mismatch: expected {}, got {}", 237 + expected_deleted.len(), 238 + actual_deleted.len() 239 + )); 240 + result.passed = false; 241 + } 242 + 243 + // Compare proof_nodes (should equal new_mst_blocks) 244 + let expected_proof: BTreeSet<String> = test_case.results.proof_nodes.iter().cloned().collect(); 245 + let actual_proof: BTreeSet<String> = diff.new_mst_blocks.keys().map(cid_to_string).collect(); 246 + let proof_match_status = compute_match_status(&actual_proof, &expected_proof); 247 + 248 + result.proof_nodes_info = Some(ProofNodesInfo { 249 + expected: expected_proof.clone(), 250 + actual: actual_proof.clone(), 251 + match_status: proof_match_status, 252 + }); 253 + 254 + // Compare inductive_proof_nodes (should equal all_proof_blocks) 255 + let expected_inductive: BTreeSet<String> = test_case 256 + .results 257 + .inductive_proof_nodes 258 + .iter() 259 + .cloned() 260 + .collect(); 261 + let actual_inductive: BTreeSet<String> = all_proof_blocks.keys().map(cid_to_string).collect(); 262 + let inductive_match_status = compute_match_status(&actual_inductive, &expected_inductive); 263 + 264 + result.inductive_proof_nodes_info = Some(ProofNodesInfo { 265 + expected: expected_inductive.clone(), 266 + actual: actual_inductive.clone(), 267 + match_status: inductive_match_status, 268 + }); 269 + 270 + Ok(result) 271 + } 272 + 273 + /// Compute match status between actual and expected sets 274 + fn compute_match_status(actual: &BTreeSet<String>, expected: &BTreeSet<String>) -> MatchStatus { 275 + if actual == expected { 276 + MatchStatus::Exact 277 + } else if actual.is_subset(expected) { 278 + MatchStatus::Subset 279 + } else if actual.is_superset(expected) { 280 + MatchStatus::Superset 281 + } else { 282 + MatchStatus::Different 283 + } 284 + } 285 + 286 + /// Convert MstDiff to sorted record operations 287 + fn diff_to_record_ops(diff: &MstDiff) -> Vec<RecordOp> { 288 + let mut ops = Vec::new(); 289 + 290 + // Creates 291 + for (key, cid) in &diff.creates { 292 + ops.push(RecordOp { 293 + rpath: key.to_string(), 294 + old_value: None, 295 + new_value: Some(cid_to_string(cid)), 296 + }); 297 + } 298 + 299 + // Updates 300 + for (key, new_cid, old_cid) in &diff.updates { 301 + ops.push(RecordOp { 302 + rpath: key.to_string(), 303 + old_value: Some(cid_to_string(old_cid)), 304 + new_value: Some(cid_to_string(new_cid)), 305 + }); 306 + } 307 + 308 + // Deletes 309 + for (key, old_cid) in &diff.deletes { 310 + ops.push(RecordOp { 311 + rpath: key.to_string(), 312 + old_value: Some(cid_to_string(old_cid)), 313 + new_value: None, 314 + }); 315 + } 316 + 317 + // Sort by rpath 318 + ops.sort(); 319 + ops 320 + } 321 + 322 + /// Test result for a single test case 323 + #[derive(Debug)] 324 + struct TestResult { 325 + test_name: String, 326 + description: String, 327 + passed: bool, 328 + record_ops_match: bool, 329 + created_nodes_match: bool, 330 + deleted_nodes_match: bool, 331 + proof_nodes_info: Option<ProofNodesInfo>, 332 + inductive_proof_nodes_info: Option<ProofNodesInfo>, 333 + errors: Vec<String>, 334 + } 335 + 336 + #[derive(Debug)] 337 + struct ProofNodesInfo { 338 + expected: BTreeSet<String>, 339 + actual: BTreeSet<String>, 340 + match_status: MatchStatus, 341 + } 342 + 343 + #[derive(Debug)] 344 + enum MatchStatus { 345 + Exact, 346 + Subset, // actual is subset of expected (missing blocks) 347 + Superset, // actual is superset of expected (extra blocks) 348 + Different, // neither subset nor superset 349 + NotImplemented, 350 + } 351 + 352 + /// Summary statistics across all tests 353 + #[derive(Debug, Default)] 354 + struct TestSummary { 355 + total_tests: usize, 356 + passed_tests: usize, 357 + failed_tests: usize, 358 + record_ops_matches: usize, 359 + created_nodes_matches: usize, 360 + deleted_nodes_matches: usize, 361 + proof_exact_matches: usize, 362 + proof_subset_matches: usize, 363 + proof_superset_matches: usize, 364 + inductive_exact_matches: usize, 365 + inductive_subset_matches: usize, 366 + inductive_superset_matches: usize, 367 + } 368 + 369 + #[tokio::test] 370 + #[ignore] // Local-only: requires mst-test-suite at /home/orual/Git_Repos/mst-test-suite 371 + async fn run_mst_diff_suite() { 372 + let suite_root = Path::new(TEST_SUITE_PATH); 373 + let tests_dir = suite_root.join("tests"); 374 + 375 + // Find all test files 376 + let test_files = find_test_files(&tests_dir).expect("Failed to find test files"); 377 + 378 + println!("Found {} test files", test_files.len()); 379 + 380 + let mut summary = TestSummary::default(); 381 + let mut failed_tests = Vec::new(); 382 + 383 + for test_path in &test_files { 384 + summary.total_tests += 1; 385 + 386 + match run_test_case(test_path, suite_root).await { 387 + Ok(result) => { 388 + let passed = result.passed; 389 + let record_ops_match = result.record_ops_match; 390 + let created_nodes_match = result.created_nodes_match; 391 + let deleted_nodes_match = result.deleted_nodes_match; 392 + 393 + // Track proof node match status 394 + if let Some(ref proof_info) = result.proof_nodes_info { 395 + match proof_info.match_status { 396 + MatchStatus::Exact => summary.proof_exact_matches += 1, 397 + MatchStatus::Subset => summary.proof_subset_matches += 1, 398 + MatchStatus::Superset => summary.proof_superset_matches += 1, 399 + _ => {} 400 + } 401 + } 402 + 403 + if let Some(ref inductive_info) = result.inductive_proof_nodes_info { 404 + match inductive_info.match_status { 405 + MatchStatus::Exact => summary.inductive_exact_matches += 1, 406 + MatchStatus::Subset => summary.inductive_subset_matches += 1, 407 + MatchStatus::Superset => summary.inductive_superset_matches += 1, 408 + _ => {} 409 + } 410 + } 411 + 412 + if passed { 413 + summary.passed_tests += 1; 414 + } else { 415 + summary.failed_tests += 1; 416 + failed_tests.push(result); 417 + } 418 + 419 + if record_ops_match { 420 + summary.record_ops_matches += 1; 421 + } 422 + if created_nodes_match { 423 + summary.created_nodes_matches += 1; 424 + } 425 + if deleted_nodes_match { 426 + summary.deleted_nodes_matches += 1; 427 + } 428 + } 429 + Err(e) => { 430 + summary.failed_tests += 1; 431 + eprintln!("Error running test {:?}: {}", test_path.file_name(), e); 432 + } 433 + } 434 + } 435 + 436 + // Print summary 437 + println!("\n=== MST Diff Suite Summary ==="); 438 + println!("Total tests: {}", summary.total_tests); 439 + println!("Passed: {}", summary.passed_tests); 440 + println!("Failed: {}", summary.failed_tests); 441 + println!(); 442 + println!( 443 + "Record ops matches: {}/{}", 444 + summary.record_ops_matches, summary.total_tests 445 + ); 446 + println!( 447 + "Created nodes matches: {}/{}", 448 + summary.created_nodes_matches, summary.total_tests 449 + ); 450 + println!( 451 + "Deleted nodes matches: {}/{}", 452 + summary.deleted_nodes_matches, summary.total_tests 453 + ); 454 + println!(); 455 + println!("Proof nodes (forward diff):"); 456 + println!(" Exact: {}", summary.proof_exact_matches); 457 + println!( 458 + " Subset (missing blocks): {}", 459 + summary.proof_subset_matches 460 + ); 461 + println!( 462 + " Superset (extra blocks): {}", 463 + summary.proof_superset_matches 464 + ); 465 + println!(); 466 + println!("Inductive proof nodes:"); 467 + println!(" Exact: {}", summary.inductive_exact_matches); 468 + println!( 469 + " Subset (missing blocks): {}", 470 + summary.inductive_subset_matches 471 + ); 472 + println!( 473 + " Superset (extra blocks): {}", 474 + summary.inductive_superset_matches 475 + ); 476 + 477 + // Collect tests with missing inductive proof blocks 478 + let mut missing_block_cases = Vec::new(); 479 + for test_path in &test_files { 480 + match run_test_case(test_path, suite_root).await { 481 + Ok(result) => { 482 + if let Some(ref info) = result.inductive_proof_nodes_info { 483 + if matches!(info.match_status, MatchStatus::Subset) { 484 + let missing: Vec<_> = info.expected.difference(&info.actual).cloned().collect(); 485 + missing_block_cases.push((result.test_name, missing)); 486 + } 487 + } 488 + } 489 + Err(_) => {} 490 + } 491 + } 492 + 493 + if !missing_block_cases.is_empty() { 494 + println!("\n=== CRITICAL: Tests Missing Inductive Proof Blocks ==="); 495 + println!("Total cases missing blocks: {}", missing_block_cases.len()); 496 + println!("\nFirst 10 cases:"); 497 + for (test_name, missing) in missing_block_cases.iter().take(10) { 498 + println!("\n{}", test_name); 499 + println!(" Missing {} blocks:", missing.len()); 500 + for cid in missing { 501 + println!(" {}", cid); 502 + } 503 + } 504 + } 505 + 506 + // Print first few failures for debugging 507 + if !failed_tests.is_empty() { 508 + println!("\n=== First 5 Failures (detailed) ==="); 509 + for result in failed_tests.iter().take(5) { 510 + println!("\nTest: {}", result.test_name); 511 + println!("Description: {}", result.description); 512 + for error in &result.errors { 513 + println!(" - {}", error); 514 + } 515 + } 516 + 517 + println!("\n=== Failure Summary ==="); 518 + println!("Total failures: {}", failed_tests.len()); 519 + } 520 + 521 + // Assert all tests passed 522 + assert_eq!( 523 + summary.failed_tests, 0, 524 + "{} tests failed (see output above)", 525 + summary.failed_tests 526 + ); 527 + }