Fast and robust atproto CAR file processing in rust

Compare changes

Choose any two refs to compare.

+336 -3
+260 -3
Cargo.lock
··· 94 94 source = "registry+https://github.com/rust-lang/crates.io-index" 95 95 checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61" 96 96 97 + [[package]] 98 + name = "arrayref" 99 + version = "0.3.9" 100 + source = "registry+https://github.com/rust-lang/crates.io-index" 101 + checksum = "76a2e8124351fda1ef8aaaa3bbd7ebbcb486bbcd4225aca0aa0d84bb2db8fecb" 102 + 103 + [[package]] 104 + name = "arrayvec" 105 + version = "0.7.6" 106 + source = "registry+https://github.com/rust-lang/crates.io-index" 107 + checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" 108 + 97 109 [[package]] 98 110 name = "autocfg" 99 111 version = "1.5.0" ··· 131 143 "match-lookup", 132 144 ] 133 145 146 + [[package]] 147 + name = "bitflags" 148 + version = "1.3.2" 149 + source = "registry+https://github.com/rust-lang/crates.io-index" 150 + checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" 151 + 134 152 [[package]] 135 153 name = "bitflags" 136 154 version = "2.9.4" 137 155 source = "registry+https://github.com/rust-lang/crates.io-index" 138 156 checksum = "2261d10cca569e4643e526d8dc2e62e433cc8aba21ab764233731f8d369bf394" 139 157 158 + [[package]] 159 + name = "blake2b_simd" 160 + version = "1.0.4" 161 + source = "registry+https://github.com/rust-lang/crates.io-index" 162 + checksum = "b79834656f71332577234b50bfc009996f7449e0c056884e6a02492ded0ca2f3" 163 + dependencies = [ 164 + "arrayref", 165 + "arrayvec", 166 + "constant_time_eq", 167 + ] 168 + 169 + [[package]] 170 + name = "blake2s_simd" 171 + version = "1.0.4" 172 + source = "registry+https://github.com/rust-lang/crates.io-index" 173 + checksum = "ee29928bad1e3f94c9d1528da29e07a1d3d04817ae8332de1e8b846c8439f4b3" 174 + dependencies = [ 175 + "arrayref", 176 + "arrayvec", 177 + "constant_time_eq", 178 + ] 179 + 180 + [[package]] 181 + name = "blake3" 182 + version = "1.8.3" 183 + source = "registry+https://github.com/rust-lang/crates.io-index" 184 + checksum = "2468ef7d57b3fb7e16b576e8377cdbde2320c60e1491e961d11da40fc4f02a2d" 185 + dependencies = [ 186 + "arrayref", 187 + "arrayvec", 188 + "cc", 189 + "cfg-if", 190 + "constant_time_eq", 191 + "cpufeatures", 192 + ] 193 + 140 194 [[package]] 141 195 name = "block-buffer" 142 196 version = "0.10.4" ··· 152 206 source = "registry+https://github.com/rust-lang/crates.io-index" 153 207 checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43" 154 208 209 + [[package]] 210 + name = "byteorder" 211 + version = "1.5.0" 212 + source = "registry+https://github.com/rust-lang/crates.io-index" 213 + checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" 214 + 155 215 [[package]] 156 216 name = "byteorder-lite" 157 217 version = "0.1.0" ··· 300 360 source = "registry+https://github.com/rust-lang/crates.io-index" 301 361 checksum = "2f421161cb492475f1661ddc9815a745a1c894592070661180fdec3d4872e9c3" 302 362 363 + [[package]] 364 + name = "constant_time_eq" 365 + version = "0.4.2" 366 + source = "registry+https://github.com/rust-lang/crates.io-index" 367 + checksum = "3d52eff69cd5e647efe296129160853a42795992097e8af39800e1060caeea9b" 368 + 303 369 [[package]] 304 370 name = "core2" 305 371 version = "0.4.0" ··· 710 776 source = "registry+https://github.com/rust-lang/crates.io-index" 711 777 checksum = "ad6880c8d4a9ebf39c6e8b77007ce223f646a4d21ce29d99f70cb16420545425" 712 778 779 + [[package]] 780 + name = "indexmap" 781 + version = "2.13.0" 782 + source = "registry+https://github.com/rust-lang/crates.io-index" 783 + checksum = "7714e70437a7dc3ac8eb7e6f8df75fd8eb422675fc7678aff7364301092b1017" 784 + dependencies = [ 785 + "equivalent", 786 + "hashbrown 0.16.1", 787 + ] 788 + 713 789 [[package]] 714 790 name = "interval-heap" 715 791 version = "0.0.5" ··· 725 801 source = "registry+https://github.com/rust-lang/crates.io-index" 726 802 checksum = "046fa2d4d00aea763528b4950358d0ead425372445dc8ff86312b3c69ff7727b" 727 803 dependencies = [ 728 - "bitflags", 804 + "bitflags 2.9.4", 729 805 "cfg-if", 730 806 "libc", 731 807 ] ··· 812 888 "wasm-bindgen", 813 889 ] 814 890 891 + [[package]] 892 + name = "keccak" 893 + version = "0.1.5" 894 + source = "registry+https://github.com/rust-lang/crates.io-index" 895 + checksum = "ecc2af9a1119c51f12a14607e783cb977bde58bc069ff0c3da1095e635d70654" 896 + dependencies = [ 897 + "cpufeatures", 898 + ] 899 + 815 900 [[package]] 816 901 name = "libc" 817 902 version = "0.2.176" ··· 939 1024 "unsigned-varint 0.8.0", 940 1025 ] 941 1026 1027 + [[package]] 1028 + name = "multihash-codetable" 1029 + version = "0.1.4" 1030 + source = "registry+https://github.com/rust-lang/crates.io-index" 1031 + checksum = "67996849749d25f1da9f238e8ace2ece8f9d6bdf3f9750aaf2ae7de3a5cad8ea" 1032 + dependencies = [ 1033 + "blake2b_simd", 1034 + "blake2s_simd", 1035 + "blake3", 1036 + "core2", 1037 + "digest", 1038 + "multihash-derive", 1039 + "ripemd", 1040 + "sha1", 1041 + "sha2", 1042 + "sha3", 1043 + "strobe-rs", 1044 + ] 1045 + 1046 + [[package]] 1047 + name = "multihash-derive" 1048 + version = "0.9.1" 1049 + source = "registry+https://github.com/rust-lang/crates.io-index" 1050 + checksum = "1f1b7edab35d920890b88643a765fc9bd295cf0201f4154dda231bef9b8404eb" 1051 + dependencies = [ 1052 + "core2", 1053 + "multihash", 1054 + "multihash-derive-impl", 1055 + ] 1056 + 1057 + [[package]] 1058 + name = "multihash-derive-impl" 1059 + version = "0.1.2" 1060 + source = "registry+https://github.com/rust-lang/crates.io-index" 1061 + checksum = "e3dc7141bd06405929948754f0628d247f5ca1865be745099205e5086da957cb" 1062 + dependencies = [ 1063 + "proc-macro-crate", 1064 + "proc-macro2", 1065 + "quote", 1066 + "syn 2.0.106", 1067 + "synstructure", 1068 + ] 1069 + 942 1070 [[package]] 943 1071 name = "num-traits" 944 1072 version = "0.2.19" ··· 1053 1181 "portable-atomic", 1054 1182 ] 1055 1183 1184 + [[package]] 1185 + name = "proc-macro-crate" 1186 + version = "3.4.0" 1187 + source = "registry+https://github.com/rust-lang/crates.io-index" 1188 + checksum = "219cb19e96be00ab2e37d6e299658a0cfa83e52429179969b0f0121b4ac46983" 1189 + dependencies = [ 1190 + "toml_edit", 1191 + ] 1192 + 1056 1193 [[package]] 1057 1194 name = "proc-macro2" 1058 1195 version = "1.0.101" ··· 1113 1250 source = "registry+https://github.com/rust-lang/crates.io-index" 1114 1251 checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d" 1115 1252 dependencies = [ 1116 - "bitflags", 1253 + "bitflags 2.9.4", 1117 1254 ] 1118 1255 1119 1256 [[package]] ··· 1160 1297 "log", 1161 1298 "mimalloc", 1162 1299 "multibase", 1300 + "multihash-codetable", 1163 1301 "serde", 1164 1302 "serde_bytes", 1165 1303 "serde_ipld_dagcbor", ··· 1169 1307 "tokio", 1170 1308 ] 1171 1309 1310 + [[package]] 1311 + name = "ripemd" 1312 + version = "0.1.3" 1313 + source = "registry+https://github.com/rust-lang/crates.io-index" 1314 + checksum = "bd124222d17ad93a644ed9d011a40f4fb64aa54275c08cc216524a9ea82fb09f" 1315 + dependencies = [ 1316 + "digest", 1317 + ] 1318 + 1172 1319 [[package]] 1173 1320 name = "rustc-demangle" 1174 1321 version = "0.1.26" ··· 1187 1334 source = "registry+https://github.com/rust-lang/crates.io-index" 1188 1335 checksum = "cd15f8a2c5551a84d56efdc1cd049089e409ac19a3072d5037a17fd70719ff3e" 1189 1336 dependencies = [ 1190 - "bitflags", 1337 + "bitflags 2.9.4", 1191 1338 "errno", 1192 1339 "libc", 1193 1340 "linux-raw-sys", ··· 1303 1450 "xxhash-rust", 1304 1451 ] 1305 1452 1453 + [[package]] 1454 + name = "sha1" 1455 + version = "0.10.6" 1456 + source = "registry+https://github.com/rust-lang/crates.io-index" 1457 + checksum = "e3bf829a2d51ab4a5ddf1352d8470c140cadc8301b2ae1789db023f01cedd6ba" 1458 + dependencies = [ 1459 + "cfg-if", 1460 + "cpufeatures", 1461 + "digest", 1462 + ] 1463 + 1306 1464 [[package]] 1307 1465 name = "sha2" 1308 1466 version = "0.10.9" ··· 1314 1472 "digest", 1315 1473 ] 1316 1474 1475 + [[package]] 1476 + name = "sha3" 1477 + version = "0.10.8" 1478 + source = "registry+https://github.com/rust-lang/crates.io-index" 1479 + checksum = "75872d278a8f37ef87fa0ddbda7802605cb18344497949862c0d4dcb291eba60" 1480 + dependencies = [ 1481 + "digest", 1482 + "keccak", 1483 + ] 1484 + 1317 1485 [[package]] 1318 1486 name = "shlex" 1319 1487 version = "1.3.0" ··· 1360 1528 "lock_api", 1361 1529 ] 1362 1530 1531 + [[package]] 1532 + name = "strobe-rs" 1533 + version = "0.10.0" 1534 + source = "registry+https://github.com/rust-lang/crates.io-index" 1535 + checksum = "98fe17535ea31344936cc58d29fec9b500b0452ddc4cc24c429c8a921a0e84e5" 1536 + dependencies = [ 1537 + "bitflags 1.3.2", 1538 + "byteorder", 1539 + "keccak", 1540 + "subtle", 1541 + "zeroize", 1542 + ] 1543 + 1363 1544 [[package]] 1364 1545 name = "strsim" 1365 1546 version = "0.11.1" 1366 1547 source = "registry+https://github.com/rust-lang/crates.io-index" 1367 1548 checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" 1368 1549 1550 + [[package]] 1551 + name = "subtle" 1552 + version = "2.6.1" 1553 + source = "registry+https://github.com/rust-lang/crates.io-index" 1554 + checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" 1555 + 1369 1556 [[package]] 1370 1557 name = "syn" 1371 1558 version = "1.0.109" ··· 1388 1575 "unicode-ident", 1389 1576 ] 1390 1577 1578 + [[package]] 1579 + name = "synstructure" 1580 + version = "0.13.2" 1581 + source = "registry+https://github.com/rust-lang/crates.io-index" 1582 + checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" 1583 + dependencies = [ 1584 + "proc-macro2", 1585 + "quote", 1586 + "syn 2.0.106", 1587 + ] 1588 + 1391 1589 [[package]] 1392 1590 name = "tempfile" 1393 1591 version = "3.23.0" ··· 1482 1680 "syn 2.0.106", 1483 1681 ] 1484 1682 1683 + [[package]] 1684 + name = "toml_datetime" 1685 + version = "0.7.5+spec-1.1.0" 1686 + source = "registry+https://github.com/rust-lang/crates.io-index" 1687 + checksum = "92e1cfed4a3038bc5a127e35a2d360f145e1f4b971b551a2ba5fd7aedf7e1347" 1688 + dependencies = [ 1689 + "serde_core", 1690 + ] 1691 + 1692 + [[package]] 1693 + name = "toml_edit" 1694 + version = "0.23.10+spec-1.0.0" 1695 + source = "registry+https://github.com/rust-lang/crates.io-index" 1696 + checksum = "84c8b9f757e028cee9fa244aea147aab2a9ec09d5325a9b01e0a49730c2b5269" 1697 + dependencies = [ 1698 + "indexmap", 1699 + "toml_datetime", 1700 + "toml_parser", 1701 + "winnow", 1702 + ] 1703 + 1704 + [[package]] 1705 + name = "toml_parser" 1706 + version = "1.0.6+spec-1.1.0" 1707 + source = "registry+https://github.com/rust-lang/crates.io-index" 1708 + checksum = "a3198b4b0a8e11f09dd03e133c0280504d0801269e9afa46362ffde1cbeebf44" 1709 + dependencies = [ 1710 + "winnow", 1711 + ] 1712 + 1485 1713 [[package]] 1486 1714 name = "typenum" 1487 1715 version = "1.19.0" ··· 1789 2017 source = "registry+https://github.com/rust-lang/crates.io-index" 1790 2018 checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650" 1791 2019 2020 + [[package]] 2021 + name = "winnow" 2022 + version = "0.7.14" 2023 + source = "registry+https://github.com/rust-lang/crates.io-index" 2024 + checksum = "5a5364e9d77fcdeeaa6062ced926ee3381faa2ee02d3eb83a5c27a8825540829" 2025 + dependencies = [ 2026 + "memchr", 2027 + ] 2028 + 1792 2029 [[package]] 1793 2030 name = "wit-bindgen" 1794 2031 version = "0.46.0" ··· 1820 2057 "quote", 1821 2058 "syn 2.0.106", 1822 2059 ] 2060 + 2061 + [[package]] 2062 + name = "zeroize" 2063 + version = "1.8.2" 2064 + source = "registry+https://github.com/rust-lang/crates.io-index" 2065 + checksum = "b97154e67e32c85465826e8bcc1c59429aaaf107c1e4a9e53c8d8ccd5eff88d0" 2066 + dependencies = [ 2067 + "zeroize_derive", 2068 + ] 2069 + 2070 + [[package]] 2071 + name = "zeroize_derive" 2072 + version = "1.4.3" 2073 + source = "registry+https://github.com/rust-lang/crates.io-index" 2074 + checksum = "85a5b4158499876c763cb03bc4e49185d3cccbabb15b33c627f7884f43db852e" 2075 + dependencies = [ 2076 + "proc-macro2", 2077 + "quote", 2078 + "syn 2.0.106", 2079 + ]
+31
Cargo.toml
··· 18 18 sha2 = "0.10.9" # note: hmac-sha256 is simpler, smaller, benches ~15ns slower 19 19 thiserror = "2.0.17" 20 20 tokio = { version = "1.47.1", features = ["rt", "sync"] } 21 + multihash-codetable = { version = "0.1.4", features = ["sha2"] } 21 22 22 23 [dev-dependencies] 23 24 clap = { version = "4.5.48", features = ["derive"] } 25 + 26 + 27 + 28 + 29 + 30 + 31 + 32 + 33 + 34 + 35 + 36 + 37 + 38 + 39 + 40 + 41 + 42 + 43 + 44 + 45 + 46 + 47 + 48 + # [[bench]] 49 + # name = "leading" 50 + # harness = false 51 + 52 + [[bench]] 53 + name = "cid-check" 54 + harness = false
+45
benches/cid-check.rs
··· 1 + use cid::Cid; 2 + use criterion::{Criterion, criterion_group, criterion_main}; 3 + use multihash_codetable::{Code, MultihashDigest}; 4 + use sha2::{Digest, Sha256}; 5 + 6 + fn multihash_verify(given: Cid, block: &[u8]) -> bool { 7 + let calculated = Cid::new_v1(0x71, Code::Sha2_256.digest(block)); 8 + calculated == given 9 + } 10 + 11 + fn effortful_verify(given: Cid, block: &[u8]) -> bool { 12 + // we know we're in atproto, so we can make a few assumptions 13 + if given.version() != cid::Version::V1 { 14 + return false; 15 + } 16 + let (codec, given_digest, _) = given.hash().into_inner(); 17 + if codec != 0x12 { 18 + return false; 19 + } 20 + given_digest[..32] == *Sha256::digest(block) 21 + } 22 + 23 + fn fastloose_verify(given: Cid, block: &[u8]) -> bool { 24 + let (_, given_digest, _) = given.hash().into_inner(); 25 + given_digest[..32] == *Sha256::digest(block) 26 + } 27 + 28 + pub fn criterion_benchmark(c: &mut Criterion) { 29 + let some_bytes: Vec<u8> = vec![0x1a, 0x00, 0xAA, 0x39, 0x8C].repeat(100); 30 + let cid = Cid::new_v1(0x71, Code::Sha2_256.digest(&some_bytes)); 31 + 32 + let mut g = c.benchmark_group("CID check"); 33 + g.bench_function("multihash", |b| { 34 + b.iter(|| multihash_verify(cid, &some_bytes)) 35 + }); 36 + g.bench_function("effortful", |b| { 37 + b.iter(|| effortful_verify(cid, &some_bytes)) 38 + }); 39 + g.bench_function("fastloose", |b| { 40 + b.iter(|| fastloose_verify(cid, &some_bytes)) 41 + }); 42 + } 43 + 44 + criterion_group!(benches, criterion_benchmark); 45 + criterion_main!(benches);