Constellation, Spacedust, Slingshot, UFOs: atproto crates and services for microcosm

Compare changes

Choose any two refs to compare.

Changed files
+805 -130
constellation
src
consumer
links
slingshot
spacedust
+293 -66
Cargo.lock
··· 112 112 113 113 [[package]] 114 114 name = "anyhow" 115 - version = "1.0.97" 115 + version = "1.0.100" 116 116 source = "registry+https://github.com/rust-lang/crates.io-index" 117 - checksum = "dcfed56ad506cb2c684a14971b8861fdc3baaaae314b9e5f9bb532cbe3ba7a4f" 117 + checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61" 118 118 119 119 [[package]] 120 120 name = "arbitrary" ··· 127 127 version = "1.7.1" 128 128 source = "registry+https://github.com/rust-lang/crates.io-index" 129 129 checksum = "69f7f8c3906b62b754cd5326047894316021dcfe5a194c8ea52bdd94934a3457" 130 + 131 + [[package]] 132 + name = "arrayref" 133 + version = "0.3.9" 134 + source = "registry+https://github.com/rust-lang/crates.io-index" 135 + checksum = "76a2e8124351fda1ef8aaaa3bbd7ebbcb486bbcd4225aca0aa0d84bb2db8fecb" 130 136 131 137 [[package]] 132 138 name = "arrayvec" ··· 192 198 "nom", 193 199 "num-traits", 194 200 "rusticata-macros", 195 - "thiserror 2.0.16", 201 + "thiserror 2.0.17", 196 202 "time", 197 203 ] 198 204 ··· 644 650 "axum", 645 651 "handlebars", 646 652 "serde", 647 - "thiserror 2.0.16", 653 + "thiserror 2.0.17", 648 654 ] 649 655 650 656 [[package]] ··· 673 679 version = "0.2.0" 674 680 source = "registry+https://github.com/rust-lang/crates.io-index" 675 681 checksum = "4c7f02d4ea65f2c1853089ffd8d2787bdbc63de2f0d29dedbcf8ccdfa0ccd4cf" 682 + 683 + [[package]] 684 + name = "base256emoji" 685 + version = "1.0.2" 686 + source = "registry+https://github.com/rust-lang/crates.io-index" 687 + checksum = "b5e9430d9a245a77c92176e649af6e275f20839a48389859d1661e9a128d077c" 688 + dependencies = [ 689 + "const-str", 690 + "match-lookup", 691 + ] 676 692 677 693 [[package]] 678 694 name = "base64" ··· 812 828 checksum = "5c8214115b7bf84099f1309324e63141d4c5d7cc26862f97a0a857dbefe165bd" 813 829 814 830 [[package]] 831 + name = "blake3" 832 + version = "1.8.2" 833 + source = "registry+https://github.com/rust-lang/crates.io-index" 834 + checksum = "3888aaa89e4b2a40fca9848e400f6a658a5a3978de7be858e209cafa8be9a4a0" 835 + dependencies = [ 836 + "arrayref", 837 + "arrayvec", 838 + "cc", 839 + "cfg-if", 840 + "constant_time_eq", 841 + ] 842 + 843 + [[package]] 815 844 name = "block-buffer" 816 845 version = "0.10.4" 817 846 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 839 868 checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" 840 869 841 870 [[package]] 871 + name = "byteorder-lite" 872 + version = "0.1.0" 873 + source = "registry+https://github.com/rust-lang/crates.io-index" 874 + checksum = "8f1fe948ff07f4bd06c30984e69f5b4899c516a3ef74f34df92a2df2ab535495" 875 + 876 + [[package]] 842 877 name = "bytes" 843 878 version = "1.10.1" 844 879 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 851 886 checksum = "6236364b88b9b6d0bc181ba374cf1ab55ba3ef97a1cb6f8cddad48a273767fb5" 852 887 853 888 [[package]] 889 + name = "byteview" 890 + version = "0.8.0" 891 + source = "registry+https://github.com/rust-lang/crates.io-index" 892 + checksum = "1e6b0e42e210b794e14b152c6fe1a55831e30ef4a0f5dc39d73d714fb5f1906c" 893 + 894 + [[package]] 854 895 name = "bzip2-sys" 855 896 version = "0.1.13+1.0.8" 856 897 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 892 933 ] 893 934 894 935 [[package]] 936 + name = "cbor4ii" 937 + version = "0.2.14" 938 + source = "registry+https://github.com/rust-lang/crates.io-index" 939 + checksum = "b544cf8c89359205f4f990d0e6f3828db42df85b5dac95d09157a250eb0749c4" 940 + dependencies = [ 941 + "serde", 942 + ] 943 + 944 + [[package]] 945 + name = "cbor4ii" 946 + version = "1.2.0" 947 + source = "registry+https://github.com/rust-lang/crates.io-index" 948 + checksum = "b28d2802395e3bccd95cc4ae984bff7444b6c1f5981da46a41360c42a2c7e2d9" 949 + 950 + [[package]] 895 951 name = "cc" 896 952 version = "1.2.18" 897 953 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 976 1032 "multihash", 977 1033 "serde", 978 1034 "serde_bytes", 979 - "unsigned-varint", 1035 + "unsigned-varint 0.8.0", 980 1036 ] 981 1037 982 1038 [[package]] ··· 1087 1143 checksum = "c2459377285ad874054d797f3ccebf984978aa39129f6eafde5cdc8315b612f8" 1088 1144 1089 1145 [[package]] 1146 + name = "const-str" 1147 + version = "0.4.3" 1148 + source = "registry+https://github.com/rust-lang/crates.io-index" 1149 + checksum = "2f421161cb492475f1661ddc9815a745a1c894592070661180fdec3d4872e9c3" 1150 + 1151 + [[package]] 1152 + name = "constant_time_eq" 1153 + version = "0.3.1" 1154 + source = "registry+https://github.com/rust-lang/crates.io-index" 1155 + checksum = "7c74b8349d32d297c9134b8c88677813a227df8f779daa29bfc29c183fe3dca6" 1156 + 1157 + [[package]] 1090 1158 name = "constellation" 1091 1159 version = "0.1.0" 1092 1160 dependencies = [ ··· 1353 1421 ] 1354 1422 1355 1423 [[package]] 1424 + name = "dasl" 1425 + version = "0.2.0" 1426 + source = "registry+https://github.com/rust-lang/crates.io-index" 1427 + checksum = "b59666035a4386b0fd272bd78da4cbc3ccb558941e97579ab00f0eb4639f2a49" 1428 + dependencies = [ 1429 + "blake3", 1430 + "cbor4ii 1.2.0", 1431 + "data-encoding", 1432 + "data-encoding-macro", 1433 + "scopeguard", 1434 + "serde", 1435 + "serde_bytes", 1436 + "sha2", 1437 + "thiserror 2.0.17", 1438 + ] 1439 + 1440 + [[package]] 1356 1441 name = "data-encoding" 1357 - version = "2.8.0" 1442 + version = "2.9.0" 1358 1443 source = "registry+https://github.com/rust-lang/crates.io-index" 1359 - checksum = "575f75dfd25738df5b91b8e43e14d44bda14637a58fae779fd2b064f8bf3e010" 1444 + checksum = "2a2330da5de22e8a3cb63252ce2abb30116bf5265e89c0e01bc17015ce30a476" 1360 1445 1361 1446 [[package]] 1362 1447 name = "data-encoding-macro" 1363 - version = "0.1.17" 1448 + version = "0.1.18" 1364 1449 source = "registry+https://github.com/rust-lang/crates.io-index" 1365 - checksum = "9f9724adfcf41f45bf652b3995837669d73c4d49a1b5ac1ff82905ac7d9b5558" 1450 + checksum = "47ce6c96ea0102f01122a185683611bd5ac8d99e62bc59dd12e6bda344ee673d" 1366 1451 dependencies = [ 1367 1452 "data-encoding", 1368 1453 "data-encoding-macro-internal", ··· 1370 1455 1371 1456 [[package]] 1372 1457 name = "data-encoding-macro-internal" 1373 - version = "0.1.15" 1458 + version = "0.1.16" 1374 1459 source = "registry+https://github.com/rust-lang/crates.io-index" 1375 - checksum = "18e4fdb82bd54a12e42fb58a800dcae6b9e13982238ce2296dc3570b92148e1f" 1460 + checksum = "8d162beedaa69905488a8da94f5ac3edb4dd4788b732fadb7bd120b2625c1976" 1376 1461 dependencies = [ 1377 1462 "data-encoding", 1378 1463 "syn 2.0.106", ··· 1579 1664 "slog-bunyan", 1580 1665 "slog-json", 1581 1666 "slog-term", 1582 - "thiserror 2.0.16", 1667 + "thiserror 2.0.17", 1583 1668 "tokio", 1584 1669 "tokio-rustls 0.25.0", 1585 1670 "toml 0.9.7", ··· 1783 1868 checksum = "0b25ad44cd4360a0448a9b5a0a6f1c7a621101cca4578706d43c9a821418aebc" 1784 1869 dependencies = [ 1785 1870 "byteorder", 1786 - "byteview", 1871 + "byteview 0.6.1", 1787 1872 "dashmap", 1788 1873 "log", 1789 - "lsm-tree", 1874 + "lsm-tree 2.10.4", 1790 1875 "path-absolutize", 1791 1876 "std-semaphore", 1792 1877 "tempfile", ··· 1799 1884 source = "git+https://github.com/fjall-rs/fjall.git#42d811f7c8cc9004407d520d37d2a1d8d246c03d" 1800 1885 dependencies = [ 1801 1886 "byteorder", 1802 - "byteview", 1887 + "byteview 0.6.1", 1803 1888 "dashmap", 1804 1889 "log", 1805 - "lsm-tree", 1890 + "lsm-tree 2.10.4", 1806 1891 "path-absolutize", 1892 + "std-semaphore", 1893 + "tempfile", 1894 + "xxhash-rust", 1895 + ] 1896 + 1897 + [[package]] 1898 + name = "fjall" 1899 + version = "3.0.0-pre.0" 1900 + source = "registry+https://github.com/rust-lang/crates.io-index" 1901 + checksum = "467588c1f15d1cfa9e43f02a45cf55d82fa1f12a6ae961b848c520458525600c" 1902 + dependencies = [ 1903 + "byteorder-lite", 1904 + "byteview 0.8.0", 1905 + "dashmap", 1906 + "log", 1907 + "lsm-tree 3.0.0-pre.0", 1807 1908 "std-semaphore", 1808 1909 "tempfile", 1809 1910 "xxhash-rust", ··· 1891 1992 "mixtrics", 1892 1993 "pin-project", 1893 1994 "serde", 1894 - "thiserror 2.0.16", 1995 + "thiserror 2.0.17", 1895 1996 "tokio", 1896 1997 "tracing", 1897 1998 ] ··· 1911 2012 "parking_lot", 1912 2013 "pin-project", 1913 2014 "serde", 1914 - "thiserror 2.0.16", 2015 + "thiserror 2.0.17", 1915 2016 "tokio", 1916 2017 "twox-hash", 1917 2018 ] ··· 1944 2045 "parking_lot", 1945 2046 "pin-project", 1946 2047 "serde", 1947 - "thiserror 2.0.16", 2048 + "thiserror 2.0.17", 1948 2049 "tokio", 1949 2050 "tracing", 1950 2051 ] ··· 1976 2077 "pin-project", 1977 2078 "rand 0.9.1", 1978 2079 "serde", 1979 - "thiserror 2.0.16", 2080 + "thiserror 2.0.17", 1980 2081 "tokio", 1981 2082 "tracing", 1982 2083 "twox-hash", ··· 2220 2321 "pest_derive", 2221 2322 "serde", 2222 2323 "serde_json", 2223 - "thiserror 2.0.16", 2324 + "thiserror 2.0.17", 2224 2325 "walkdir", 2225 2326 ] 2226 2327 ··· 2345 2446 "once_cell", 2346 2447 "rand 0.9.1", 2347 2448 "ring", 2348 - "thiserror 2.0.16", 2449 + "thiserror 2.0.17", 2349 2450 "tinyvec", 2350 2451 "tokio", 2351 2452 "tracing", ··· 2368 2469 "rand 0.9.1", 2369 2470 "resolv-conf", 2370 2471 "smallvec", 2371 - "thiserror 2.0.16", 2472 + "thiserror 2.0.17", 2372 2473 "tokio", 2373 2474 "tracing", 2374 2475 ] ··· 2800 2901 ] 2801 2902 2802 2903 [[package]] 2904 + name = "iroh-car" 2905 + version = "0.5.1" 2906 + source = "registry+https://github.com/rust-lang/crates.io-index" 2907 + checksum = "cb7f8cd4cb9aa083fba8b52e921764252d0b4dcb1cd6d120b809dbfe1106e81a" 2908 + dependencies = [ 2909 + "anyhow", 2910 + "cid", 2911 + "futures", 2912 + "serde", 2913 + "serde_ipld_dagcbor", 2914 + "thiserror 1.0.69", 2915 + "tokio", 2916 + "unsigned-varint 0.7.2", 2917 + ] 2918 + 2919 + [[package]] 2803 2920 name = "is-terminal" 2804 2921 version = "0.4.16" 2805 2922 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 2863 2980 "metrics", 2864 2981 "serde", 2865 2982 "serde_json", 2866 - "thiserror 2.0.16", 2983 + "thiserror 2.0.17", 2867 2984 "tokio", 2868 2985 "tokio-tungstenite 0.26.2", 2869 2986 "url", ··· 3116 3233 version = "0.1.0" 3117 3234 dependencies = [ 3118 3235 "anyhow", 3236 + "dasl", 3119 3237 "fluent-uri", 3120 3238 "nom", 3121 - "thiserror 2.0.16", 3239 + "serde", 3240 + "thiserror 2.0.17", 3122 3241 "tinyjson", 3123 3242 ] 3124 3243 ··· 3186 3305 3187 3306 [[package]] 3188 3307 name = "lsm-tree" 3189 - version = "2.10.2" 3308 + version = "2.10.4" 3190 3309 source = "registry+https://github.com/rust-lang/crates.io-index" 3191 - checksum = "55b6d7475a8dd22e749186968daacf8e2a77932b061b1bd263157987bbfc0c6c" 3310 + checksum = "799399117a2bfb37660e08be33f470958babb98386b04185288d829df362ea15" 3192 3311 dependencies = [ 3193 3312 "byteorder", 3194 3313 "crossbeam-skiplist", ··· 3209 3328 ] 3210 3329 3211 3330 [[package]] 3331 + name = "lsm-tree" 3332 + version = "3.0.0-pre.0" 3333 + source = "registry+https://github.com/rust-lang/crates.io-index" 3334 + checksum = "be375d45e348328e78582dffbda4f1709dd52fca27c1a81c7bf6ca134e6335f7" 3335 + dependencies = [ 3336 + "byteorder-lite", 3337 + "byteview 0.8.0", 3338 + "crossbeam-skiplist", 3339 + "enum_dispatch", 3340 + "interval-heap", 3341 + "log", 3342 + "lz4_flex", 3343 + "quick_cache", 3344 + "rustc-hash 2.1.1", 3345 + "self_cell", 3346 + "sfa", 3347 + "tempfile", 3348 + "varint-rs", 3349 + "xxhash-rust", 3350 + ] 3351 + 3352 + [[package]] 3212 3353 name = "lz4" 3213 3354 version = "1.28.1" 3214 3355 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 3229 3370 3230 3371 [[package]] 3231 3372 name = "lz4_flex" 3232 - version = "0.11.3" 3373 + version = "0.11.5" 3233 3374 source = "registry+https://github.com/rust-lang/crates.io-index" 3234 - checksum = "75761162ae2b0e580d7e7c390558127e5f01b4194debd6221fd8c207fc80e3f5" 3375 + checksum = "08ab2867e3eeeca90e844d1940eab391c9dc5228783db2ed999acbc0a9ed375a" 3235 3376 3236 3377 [[package]] 3237 3378 name = "mach2" ··· 3297 3438 ] 3298 3439 3299 3440 [[package]] 3441 + name = "match-lookup" 3442 + version = "0.1.1" 3443 + source = "registry+https://github.com/rust-lang/crates.io-index" 3444 + checksum = "1265724d8cb29dbbc2b0f06fffb8bf1a8c0cf73a78eede9ba73a4a66c52a981e" 3445 + dependencies = [ 3446 + "proc-macro2", 3447 + "quote", 3448 + "syn 1.0.109", 3449 + ] 3450 + 3451 + [[package]] 3300 3452 name = "match_cfg" 3301 3453 version = "0.1.0" 3302 3454 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 3384 3536 "metrics", 3385 3537 "metrics-util 0.20.0", 3386 3538 "quanta", 3387 - "thiserror 2.0.16", 3539 + "thiserror 2.0.17", 3388 3540 "tokio", 3389 3541 "tracing", 3390 3542 ] ··· 3531 3683 3532 3684 [[package]] 3533 3685 name = "multibase" 3534 - version = "0.9.1" 3686 + version = "0.9.2" 3535 3687 source = "registry+https://github.com/rust-lang/crates.io-index" 3536 - checksum = "9b3539ec3c1f04ac9748a260728e855f261b4977f5c3406612c884564f329404" 3688 + checksum = "8694bb4835f452b0e3bb06dbebb1d6fc5385b6ca1caf2e55fd165c042390ec77" 3537 3689 dependencies = [ 3538 3690 "base-x", 3691 + "base256emoji", 3539 3692 "data-encoding", 3540 3693 "data-encoding-macro", 3541 3694 ] ··· 3548 3701 dependencies = [ 3549 3702 "core2", 3550 3703 "serde", 3551 - "unsigned-varint", 3704 + "unsigned-varint 0.8.0", 3552 3705 ] 3553 3706 3554 3707 [[package]] ··· 3926 4079 checksum = "1db05f56d34358a8b1066f67cbb203ee3e7ed2ba674a6263a1d5ec6db2204323" 3927 4080 dependencies = [ 3928 4081 "memchr", 3929 - "thiserror 2.0.16", 4082 + "thiserror 2.0.17", 3930 4083 "ucd-trie", 3931 4084 ] 3932 4085 ··· 4036 4189 "rusqlite", 4037 4190 "serde", 4038 4191 "serde_json", 4039 - "thiserror 2.0.16", 4192 + "thiserror 2.0.17", 4040 4193 "tokio", 4041 4194 "tracing-subscriber", 4042 4195 ] ··· 4079 4232 "smallvec", 4080 4233 "sync_wrapper", 4081 4234 "tempfile", 4082 - "thiserror 2.0.16", 4235 + "thiserror 2.0.17", 4083 4236 "tokio", 4084 4237 "tokio-rustls 0.26.2", 4085 4238 "tokio-stream", ··· 4123 4276 "serde_json", 4124 4277 "serde_urlencoded", 4125 4278 "serde_yaml", 4126 - "thiserror 2.0.16", 4279 + "thiserror 2.0.17", 4127 4280 "tokio", 4128 4281 ] 4129 4282 ··· 4142 4295 "quote", 4143 4296 "regex", 4144 4297 "syn 2.0.106", 4145 - "thiserror 2.0.16", 4298 + "thiserror 2.0.17", 4146 4299 ] 4147 4300 4148 4301 [[package]] ··· 4269 4422 4270 4423 [[package]] 4271 4424 name = "quick_cache" 4272 - version = "0.6.12" 4425 + version = "0.6.16" 4273 4426 source = "registry+https://github.com/rust-lang/crates.io-index" 4274 - checksum = "8f8ed0655cbaf18a26966142ad23b95d8ab47221c50c4f73a1db7d0d2d6e3da8" 4427 + checksum = "9ad6644cb07b7f3488b9f3d2fde3b4c0a7fa367cafefb39dff93a659f76eb786" 4275 4428 dependencies = [ 4276 4429 "equivalent", 4277 4430 "hashbrown 0.15.2", ··· 4291 4444 "rustc-hash 2.1.1", 4292 4445 "rustls 0.23.31", 4293 4446 "socket2 0.5.9", 4294 - "thiserror 2.0.16", 4447 + "thiserror 2.0.17", 4295 4448 "tokio", 4296 4449 "tracing", 4297 4450 "web-time", ··· 4312 4465 "rustls 0.23.31", 4313 4466 "rustls-pki-types", 4314 4467 "slab", 4315 - "thiserror 2.0.16", 4468 + "thiserror 2.0.17", 4316 4469 "tinyvec", 4317 4470 "tracing", 4318 4471 "web-time", ··· 4538 4691 checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" 4539 4692 4540 4693 [[package]] 4694 + name = "repo-stream" 4695 + version = "0.2.2" 4696 + source = "registry+https://github.com/rust-lang/crates.io-index" 4697 + checksum = "093b48e604c138949bf3d4a1a9bc1165feb1db28a73af0101c84eb703d279f43" 4698 + dependencies = [ 4699 + "bincode 2.0.1", 4700 + "futures", 4701 + "futures-core", 4702 + "ipld-core", 4703 + "iroh-car", 4704 + "log", 4705 + "multibase", 4706 + "rusqlite", 4707 + "serde", 4708 + "serde_bytes", 4709 + "serde_ipld_dagcbor", 4710 + "sha2", 4711 + "thiserror 2.0.17", 4712 + "tokio", 4713 + ] 4714 + 4715 + [[package]] 4541 4716 name = "reqwest" 4542 - version = "0.12.23" 4717 + version = "0.12.24" 4543 4718 source = "registry+https://github.com/rust-lang/crates.io-index" 4544 - checksum = "d429f34c8092b2d42c7c93cec323bb4adeb7c67698f70839adec842ec10c7ceb" 4719 + checksum = "9d0946410b9f7b082a427e4ef5c8ff541a88b357bc6c637c40db3a68ac70a36f" 4545 4720 dependencies = [ 4546 4721 "async-compression", 4547 4722 "base64 0.22.1", ··· 4581 4756 "url", 4582 4757 "wasm-bindgen", 4583 4758 "wasm-bindgen-futures", 4759 + "wasm-streams", 4584 4760 "web-sys", 4585 4761 ] 4586 4762 ··· 4962 5138 4963 5139 [[package]] 4964 5140 name = "self_cell" 4965 - version = "1.1.0" 5141 + version = "1.2.0" 4966 5142 source = "registry+https://github.com/rust-lang/crates.io-index" 4967 - checksum = "c2fdfc24bc566f839a2da4c4295b82db7d25a24253867d5c64355abb5799bdbe" 5143 + checksum = "0f7d95a54511e0c7be3f51e8867aa8cf35148d7b9445d44de2f943e2b206e749" 4968 5144 4969 5145 [[package]] 4970 5146 name = "semver" ··· 4984 5160 4985 5161 [[package]] 4986 5162 name = "serde_bytes" 4987 - version = "0.11.17" 5163 + version = "0.11.19" 4988 5164 source = "registry+https://github.com/rust-lang/crates.io-index" 4989 - checksum = "8437fd221bde2d4ca316d61b90e337e9e702b3820b87d63caa9ba6c02bd06d96" 5165 + checksum = "a5d440709e79d88e51ac01c4b72fc6cb7314017bb7da9eeff678aa94c10e3ea8" 4990 5166 dependencies = [ 4991 5167 "serde", 5168 + "serde_core", 4992 5169 ] 4993 5170 4994 5171 [[package]] ··· 5036 5213 ] 5037 5214 5038 5215 [[package]] 5216 + name = "serde_ipld_dagcbor" 5217 + version = "0.6.4" 5218 + source = "registry+https://github.com/rust-lang/crates.io-index" 5219 + checksum = "46182f4f08349a02b45c998ba3215d3f9de826246ba02bb9dddfe9a2a2100778" 5220 + dependencies = [ 5221 + "cbor4ii 0.2.14", 5222 + "ipld-core", 5223 + "scopeguard", 5224 + "serde", 5225 + ] 5226 + 5227 + [[package]] 5039 5228 name = "serde_json" 5040 5229 version = "1.0.145" 5041 5230 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 5068 5257 "percent-encoding", 5069 5258 "ryu", 5070 5259 "serde", 5071 - "thiserror 2.0.16", 5260 + "thiserror 2.0.17", 5072 5261 ] 5073 5262 5074 5263 [[package]] ··· 5157 5346 ] 5158 5347 5159 5348 [[package]] 5349 + name = "sfa" 5350 + version = "0.0.1" 5351 + source = "registry+https://github.com/rust-lang/crates.io-index" 5352 + checksum = "e5f5f9dc21f55409f15103d5a7e7601b804935923c7fe4746dc806c3a422a038" 5353 + dependencies = [ 5354 + "byteorder-lite", 5355 + "log", 5356 + "xxhash-rust", 5357 + ] 5358 + 5359 + [[package]] 5160 5360 name = "sha1" 5161 5361 version = "0.10.6" 5162 5362 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 5220 5420 dependencies = [ 5221 5421 "num-bigint", 5222 5422 "num-traits", 5223 - "thiserror 2.0.16", 5423 + "thiserror 2.0.17", 5224 5424 "time", 5225 5425 ] 5226 5426 ··· 5262 5462 "rustls 0.23.31", 5263 5463 "serde", 5264 5464 "serde_json", 5265 - "thiserror 2.0.16", 5465 + "thiserror 2.0.17", 5266 5466 "time", 5267 5467 "tokio", 5268 5468 "tokio-util", ··· 5355 5555 name = "spacedust" 5356 5556 version = "0.1.0" 5357 5557 dependencies = [ 5558 + "anyhow", 5559 + "async-channel", 5358 5560 "async-trait", 5359 5561 "clap", 5360 5562 "ctrlc", 5563 + "dasl", 5361 5564 "dropshot", 5362 5565 "env_logger", 5566 + "fjall 3.0.0-pre.0", 5363 5567 "futures", 5364 5568 "http", 5569 + "ipld-core", 5365 5570 "jetstream", 5366 5571 "links", 5367 5572 "log", 5368 5573 "metrics", 5369 5574 "metrics-exporter-prometheus 0.17.2", 5370 5575 "rand 0.9.1", 5576 + "repo-stream", 5577 + "reqwest", 5371 5578 "schemars", 5372 5579 "semver", 5373 5580 "serde", 5581 + "serde_ipld_dagcbor", 5374 5582 "serde_json", 5375 5583 "serde_qs", 5376 - "thiserror 2.0.16", 5584 + "thiserror 2.0.17", 5377 5585 "tinyjson", 5378 5586 "tokio", 5379 5587 "tokio-tungstenite 0.27.0", ··· 5506 5714 5507 5715 [[package]] 5508 5716 name = "tempfile" 5509 - version = "3.19.1" 5717 + version = "3.23.0" 5510 5718 source = "registry+https://github.com/rust-lang/crates.io-index" 5511 - checksum = "7437ac7763b9b123ccf33c338a5cc1bac6f69b45a136c19bdd8a65e3916435bf" 5719 + checksum = "2d31c77bdf42a745371d260a26ca7163f1e0924b64afa0b688e61b5a9fa02f16" 5512 5720 dependencies = [ 5513 5721 "fastrand", 5514 5722 "getrandom 0.3.3", ··· 5539 5747 5540 5748 [[package]] 5541 5749 name = "thiserror" 5542 - version = "2.0.16" 5750 + version = "2.0.17" 5543 5751 source = "registry+https://github.com/rust-lang/crates.io-index" 5544 - checksum = "3467d614147380f2e4e374161426ff399c91084acd2363eaf549172b3d5e60c0" 5752 + checksum = "f63587ca0f12b72a0600bcba1d40081f830876000bb46dd2337a3051618f4fc8" 5545 5753 dependencies = [ 5546 - "thiserror-impl 2.0.16", 5754 + "thiserror-impl 2.0.17", 5547 5755 ] 5548 5756 5549 5757 [[package]] ··· 5559 5767 5560 5768 [[package]] 5561 5769 name = "thiserror-impl" 5562 - version = "2.0.16" 5770 + version = "2.0.17" 5563 5771 source = "registry+https://github.com/rust-lang/crates.io-index" 5564 - checksum = "6c5e1be1c48b9172ee610da68fd9cd2770e7a4056cb3fc98710ee6906f0c7960" 5772 + checksum = "3ff15c8ecd7de3849db632e14d18d2571fa09dfc5ed93479bc4485c7a517c913" 5565 5773 dependencies = [ 5566 5774 "proc-macro2", 5567 5775 "quote", ··· 5993 6201 "native-tls", 5994 6202 "rand 0.9.1", 5995 6203 "sha1", 5996 - "thiserror 2.0.16", 6204 + "thiserror 2.0.17", 5997 6205 "url", 5998 6206 "utf-8", 5999 6207 ] ··· 6011 6219 "log", 6012 6220 "rand 0.9.1", 6013 6221 "sha1", 6014 - "thiserror 2.0.16", 6222 + "thiserror 2.0.17", 6015 6223 "utf-8", 6016 6224 ] 6017 6225 ··· 6054 6262 "http", 6055 6263 "jetstream", 6056 6264 "log", 6057 - "lsm-tree", 6265 + "lsm-tree 2.10.4", 6058 6266 "metrics", 6059 6267 "metrics-exporter-prometheus 0.17.2", 6060 6268 "schemars", ··· 6064 6272 "serde_qs", 6065 6273 "sha2", 6066 6274 "tempfile", 6067 - "thiserror 2.0.16", 6275 + "thiserror 2.0.17", 6068 6276 "tikv-jemallocator", 6069 6277 "tokio", 6070 6278 "tokio-util", ··· 6117 6325 6118 6326 [[package]] 6119 6327 name = "unsigned-varint" 6328 + version = "0.7.2" 6329 + source = "registry+https://github.com/rust-lang/crates.io-index" 6330 + checksum = "6889a77d49f1f013504cec6bf97a2c730394adedaeb1deb5ea08949a50541105" 6331 + 6332 + [[package]] 6333 + name = "unsigned-varint" 6120 6334 version = "0.8.0" 6121 6335 source = "registry+https://github.com/rust-lang/crates.io-index" 6122 6336 checksum = "eb066959b24b5196ae73cb057f45598450d2c5f71460e98c49b738086eff9c06" ··· 6193 6407 checksum = "62fc7c4ce161f049607ecea654dca3f2d727da5371ae85e2e4f14ce2b98ed67c" 6194 6408 dependencies = [ 6195 6409 "byteorder", 6196 - "byteview", 6410 + "byteview 0.6.1", 6197 6411 "interval-heap", 6198 6412 "log", 6199 6413 "path-absolutize", ··· 6342 6556 ] 6343 6557 6344 6558 [[package]] 6559 + name = "wasm-streams" 6560 + version = "0.4.2" 6561 + source = "registry+https://github.com/rust-lang/crates.io-index" 6562 + checksum = "15053d8d85c7eccdbefef60f06769760a563c7f0a9d6902a13d35c7800b0ad65" 6563 + dependencies = [ 6564 + "futures-util", 6565 + "js-sys", 6566 + "wasm-bindgen", 6567 + "wasm-bindgen-futures", 6568 + "web-sys", 6569 + ] 6570 + 6571 + [[package]] 6345 6572 name = "web-sys" 6346 6573 version = "0.3.77" 6347 6574 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 6400 6627 "reqwest", 6401 6628 "serde", 6402 6629 "serde_json", 6403 - "thiserror 2.0.16", 6630 + "thiserror 2.0.17", 6404 6631 "tokio", 6405 6632 "tokio-util", 6406 6633 "url", ··· 6758 6985 "nom", 6759 6986 "oid-registry", 6760 6987 "rusticata-macros", 6761 - "thiserror 2.0.16", 6988 + "thiserror 2.0.17", 6762 6989 "time", 6763 6990 ] 6764 6991
+6 -13
constellation/src/consumer/jetstream.rs
··· 226 226 println!("jetstream closed the websocket cleanly."); 227 227 break; 228 228 } 229 - Err(_) => { 230 - counter!("jetstream_read_fail", "url" => stream.clone(), "reason" => "dirty close").increment(1); 231 - println!("jetstream failed to close the websocket cleanly."); 232 - break; 233 - } 234 - Ok(r) => { 235 - eprintln!("jetstream: close result after error: {r:?}"); 236 - counter!("jetstream_read_fail", "url" => stream.clone(), "reason" => "read error") 237 - .increment(1); 238 - // if we didn't immediately get ConnectionClosed, we should keep polling read 239 - // until we get it. 240 - continue; 241 - } 229 + r => eprintln!("jetstream: close result after error: {r:?}"), 242 230 } 231 + counter!("jetstream_read_fail", "url" => stream.clone(), "reason" => "read error") 232 + .increment(1); 233 + // if we didn't immediately get ConnectionClosed, we should keep polling read 234 + // until we get it. 235 + continue; 243 236 } 244 237 }; 245 238
+2
links/Cargo.toml
··· 5 5 6 6 [dependencies] 7 7 anyhow = "1.0.95" 8 + dasl = "0.2.0" 8 9 fluent-uri = "0.3.2" 9 10 nom = "7.1.3" 11 + serde = { version = "1.0.228", features = ["derive"] } 10 12 thiserror = "2.0.9" 11 13 tinyjson = "2.5.1"
+3 -2
links/src/lib.rs
··· 1 1 use fluent_uri::Uri; 2 + use serde::{Deserialize, Serialize}; 2 3 3 4 pub mod at_uri; 4 5 pub mod did; ··· 6 7 7 8 pub use record::collect_links; 8 9 9 - #[derive(Debug, Clone, Ord, Eq, PartialOrd, PartialEq)] 10 + #[derive(Debug, Clone, Ord, Eq, PartialOrd, PartialEq, Serialize, Deserialize)] 10 11 pub enum Link { 11 12 AtUri(String), 12 13 Uri(String), ··· 59 60 } 60 61 } 61 62 62 - #[derive(Debug, PartialEq)] 63 + #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] 63 64 pub struct CollectedLink { 64 65 pub path: String, 65 66 pub target: Link,
+41
links/src/record.rs
··· 1 + use dasl::drisl::Value as DrislValue; 1 2 use tinyjson::JsonValue; 2 3 3 4 use crate::{parse_any_link, CollectedLink}; ··· 36 37 } 37 38 } 38 39 40 + pub fn walk_drisl(path: &str, v: &DrislValue, found: &mut Vec<CollectedLink>) { 41 + match v { 42 + DrislValue::Map(o) => { 43 + for (key, child) in o { 44 + walk_drisl(&format!("{path}.{key}"), child, found) 45 + } 46 + } 47 + DrislValue::Array(a) => { 48 + for child in a { 49 + let child_p = match child { 50 + DrislValue::Map(o) => { 51 + if let Some(DrislValue::Text(t)) = o.get("$type") { 52 + format!("{path}[{t}]") 53 + } else { 54 + format!("{path}[]") 55 + } 56 + } 57 + _ => format!("{path}[]"), 58 + }; 59 + walk_drisl(&child_p, child, found) 60 + } 61 + } 62 + DrislValue::Text(s) => { 63 + if let Some(link) = parse_any_link(s) { 64 + found.push(CollectedLink { 65 + path: path.to_string(), 66 + target: link, 67 + }); 68 + } 69 + } 70 + _ => {} 71 + } 72 + } 73 + 39 74 pub fn collect_links(v: &JsonValue) -> Vec<CollectedLink> { 40 75 let mut found = vec![]; 41 76 walk_record("", v, &mut found); 77 + found 78 + } 79 + 80 + pub fn collect_links_drisl(v: &DrislValue) -> Vec<CollectedLink> { 81 + let mut found = vec![]; 82 + walk_drisl("", v, &mut found); 42 83 found 43 84 } 44 85
+2 -4
slingshot/src/firehose_cache.rs
··· 4 4 5 5 pub async fn firehose_cache( 6 6 cache_dir: impl AsRef<Path>, 7 - memory_mb: usize, 8 - disk_gb: usize, 9 7 ) -> Result<HybridCache<String, CachedRecord>, String> { 10 8 let cache = HybridCacheBuilder::new() 11 9 .with_name("firehose") 12 - .memory(memory_mb * 2_usize.pow(20)) 10 + .memory(64 * 2_usize.pow(20)) 13 11 .with_weighter(|k: &String, v| k.len() + std::mem::size_of_val(v)) 14 12 .storage(Engine::large()) 15 13 .with_device_options( 16 14 DirectFsDeviceOptions::new(cache_dir) 17 - .with_capacity(disk_gb * 2_usize.pow(30)) 15 + .with_capacity(2_usize.pow(30)) // TODO: configurable (1GB to have something) 18 16 .with_file_size(16 * 2_usize.pow(20)), // note: this does limit the max cached item size, warning jumbo records 19 17 ) 20 18 .build()
+5 -26
slingshot/src/main.rs
··· 25 25 /// where to keep disk caches 26 26 #[arg(long)] 27 27 cache_dir: PathBuf, 28 - /// memory cache size in MB 29 - #[arg(long, default_value_t = 64)] 30 - cache_memory_mb: usize, 31 - /// disk cache size in GB 32 - #[arg(long, default_value_t = 1)] 33 - cache_disk_gb: usize, 34 - /// host for HTTP server (when not using --domain) 35 - #[arg(long, default_value = "127.0.0.1")] 36 - host: String, 37 - /// port for HTTP server (when not using --domain) 38 - #[arg(long, default_value_t = 3000)] 39 - port: u16, 40 - /// port for metrics/prometheus server 41 - #[arg(long, default_value_t = 8765)] 42 - metrics_port: u16, 43 28 /// the domain pointing to this server 44 29 /// 45 30 /// if present: ··· 77 62 78 63 let args = Args::parse(); 79 64 80 - if let Err(e) = install_metrics_server(args.metrics_port) { 65 + if let Err(e) = install_metrics_server() { 81 66 log::error!("failed to install metrics server: {e:?}"); 82 67 } else { 83 - log::info!("metrics listening at http://0.0.0.0:{}", args.metrics_port); 68 + log::info!("metrics listening at http://0.0.0.0:8765"); 84 69 } 85 70 86 71 std::fs::create_dir_all(&args.cache_dir).map_err(|e| { ··· 98 83 log::info!("cache dir ready at at {cache_dir:?}."); 99 84 100 85 log::info!("setting up firehose cache..."); 101 - let cache = firehose_cache( 102 - cache_dir.join("./firehose"), 103 - args.cache_memory_mb, 104 - args.cache_disk_gb, 105 - ) 106 - .await?; 86 + let cache = firehose_cache(cache_dir.join("./firehose")).await?; 107 87 log::info!("firehose cache ready."); 108 88 109 89 let mut tasks: tokio::task::JoinSet<Result<(), MainTaskError>> = tokio::task::JoinSet::new(); ··· 132 112 args.domain, 133 113 args.acme_contact, 134 114 args.certs, 135 - args.host, 136 - args.port, 137 115 server_shutdown, 138 116 ) 139 117 .await?; ··· 194 172 Ok(()) 195 173 } 196 174 197 - fn install_metrics_server(port: u16) -> Result<(), metrics_exporter_prometheus::BuildError> { 175 + fn install_metrics_server() -> Result<(), metrics_exporter_prometheus::BuildError> { 198 176 log::info!("installing metrics server..."); 199 177 let host = [0, 0, 0, 0]; 178 + let port = 8765; 200 179 PrometheusBuilder::new() 201 180 .set_quantiles(&[0.5, 0.9, 0.99, 1.0])? 202 181 .set_bucket_duration(std::time::Duration::from_secs(300))?
+12 -19
slingshot/src/server.rs
··· 437 437 Ok(did) => did, 438 438 Err(_) => { 439 439 let Ok(alleged_handle) = Handle::new(identifier) else { 440 - return invalid("Identifier was not a valid DID or handle"); 440 + return invalid("identifier was not a valid DID or handle"); 441 441 }; 442 442 443 443 match self.identity.handle_to_did(alleged_handle.clone()).await { ··· 453 453 Err(e) => { 454 454 log::debug!("failed to resolve handle: {e}"); 455 455 // TODO: ServerError not BadRequest 456 - return invalid("Errored while trying to resolve handle to DID"); 456 + return invalid("errored while trying to resolve handle to DID"); 457 457 } 458 458 } 459 459 } 460 460 }; 461 461 let Ok(partial_doc) = self.identity.did_to_partial_mini_doc(&did).await else { 462 - return invalid("Failed to get DID doc"); 462 + return invalid("failed to get DID doc"); 463 463 }; 464 464 let Some(partial_doc) = partial_doc else { 465 - return invalid("Failed to find DID doc"); 465 + return invalid("failed to find DID doc"); 466 466 }; 467 467 468 468 // ok so here's where we're at: ··· 483 483 .handle_to_did(partial_doc.unverified_handle.clone()) 484 484 .await 485 485 else { 486 - return invalid("Failed to get DID doc's handle"); 486 + return invalid("failed to get did doc's handle"); 487 487 }; 488 488 let Some(handle_did) = handle_did else { 489 - return invalid("Failed to resolve DID doc's handle"); 489 + return invalid("failed to resolve did doc's handle"); 490 490 }; 491 491 if handle_did == did { 492 492 partial_doc.unverified_handle.to_string() ··· 516 516 let Ok(handle) = Handle::new(repo) else { 517 517 return GetRecordResponse::BadRequest(xrpc_error( 518 518 "InvalidRequest", 519 - "Repo was not a valid DID or handle", 519 + "repo was not a valid DID or handle", 520 520 )); 521 521 }; 522 522 match self.identity.handle_to_did(handle).await { ··· 534 534 log::debug!("handle resolution failed: {e}"); 535 535 return GetRecordResponse::ServerError(xrpc_error( 536 536 "ResolutionFailed", 537 - "Errored while trying to resolve handle to DID", 537 + "errored while trying to resolve handle to DID", 538 538 )); 539 539 } 540 540 } ··· 544 544 let Ok(collection) = Nsid::new(collection) else { 545 545 return GetRecordResponse::BadRequest(xrpc_error( 546 546 "InvalidRequest", 547 - "Invalid NSID for collection", 547 + "invalid NSID for collection", 548 548 )); 549 549 }; 550 550 551 551 let Ok(rkey) = RecordKey::new(rkey) else { 552 - return GetRecordResponse::BadRequest(xrpc_error("InvalidRequest", "Invalid rkey")); 552 + return GetRecordResponse::BadRequest(xrpc_error("InvalidRequest", "invalid rkey")); 553 553 }; 554 554 555 555 let cid: Option<Cid> = if let Some(cid) = cid { 556 556 let Ok(cid) = Cid::from_str(&cid) else { 557 - return GetRecordResponse::BadRequest(xrpc_error("InvalidRequest", "Invalid CID")); 557 + return GetRecordResponse::BadRequest(xrpc_error("InvalidRequest", "invalid CID")); 558 558 }; 559 559 Some(cid) 560 560 } else { ··· 694 694 domain: Option<String>, 695 695 acme_contact: Option<String>, 696 696 certs: Option<PathBuf>, 697 - host: String, 698 - port: u16, 699 697 shutdown: CancellationToken, 700 698 ) -> Result<(), ServerError> { 701 699 let repo = Arc::new(repo); ··· 754 752 ) 755 753 .await 756 754 } else { 757 - run( 758 - TcpListener::bind(format!("{host}:{port}")), 759 - app, 760 - shutdown, 761 - ) 762 - .await 755 + run(TcpListener::bind("127.0.0.1:3000"), app, shutdown).await 763 756 } 764 757 } 765 758
+8
spacedust/Cargo.toml
··· 4 4 edition = "2024" 5 5 6 6 [dependencies] 7 + anyhow = "1.0.100" 8 + async-channel = "2.5.0" 7 9 async-trait = "0.1.88" 8 10 clap = { version = "4.5.40", features = ["derive"] } 9 11 ctrlc = "3.4.7" 12 + dasl = "0.2.0" 10 13 dropshot = "0.16.2" 11 14 env_logger = "0.11.8" 15 + fjall = "3.0.0-pre.0" 12 16 futures = "0.3.31" 13 17 http = "1.3.1" 18 + ipld-core = { version = "0.4.2", features = ["serde"] } 14 19 jetstream = { path = "../jetstream", features = ["metrics"] } 15 20 links = { path = "../links" } 16 21 log = "0.4.27" 17 22 metrics = "0.24.2" 18 23 metrics-exporter-prometheus = { version = "0.17.1", features = ["http-listener"] } 19 24 rand = "0.9.1" 25 + repo-stream = "0.2.2" 26 + reqwest = { version = "0.12.24", features = ["json", "stream"] } 20 27 schemars = "0.8.22" 21 28 semver = "1.0.26" 22 29 serde = { version = "1.0.219", features = ["derive"] } 30 + serde_ipld_dagcbor = "0.6.4" 23 31 serde_json = "1.0.140" 24 32 serde_qs = "1.0.0-rc.3" 25 33 thiserror = "2.0.12"
+21
spacedust/src/bin/import_car_file.rs
··· 1 + use clap::Parser; 2 + use std::path::PathBuf; 3 + 4 + type Result<T> = std::result::Result<T, Box<dyn std::error::Error>>; 5 + 6 + #[derive(Debug, Parser)] 7 + struct Args { 8 + #[arg()] 9 + file: PathBuf, 10 + } 11 + 12 + #[tokio::main] 13 + async fn main() -> Result<()> { 14 + env_logger::init(); 15 + 16 + let Args { file } = Args::parse(); 17 + 18 + let _reader = tokio::fs::File::open(file).await?; 19 + 20 + Ok(()) 21 + }
+258
spacedust/src/bin/import_scraped.rs
··· 1 + use clap::Parser; 2 + use links::CollectedLink; 3 + use repo_stream::{ 4 + DiskBuilder, DiskStore, Driver, DriverBuilder, Processable, drive::DriverBuilderWithProcessor, 5 + drive::NeedDisk, 6 + }; 7 + use std::path::PathBuf; 8 + use std::sync::{ 9 + Arc, 10 + atomic::{AtomicUsize, Ordering}, 11 + }; 12 + use tokio::{io::AsyncRead, task::JoinSet}; 13 + 14 + type Result<T> = anyhow::Result<T>; //std::result::Result<T, Box<dyn std::error::Error>>; 15 + 16 + #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] 17 + struct CollectedProcessed(CollectedLink); 18 + 19 + impl Processable for CollectedProcessed { 20 + fn get_size(&self) -> usize { 21 + self.0.path.capacity() + self.0.target.as_str().len() 22 + } 23 + } 24 + 25 + #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] 26 + struct ErrString(String); 27 + 28 + impl Processable for ErrString { 29 + fn get_size(&self) -> usize { 30 + self.0.capacity() 31 + } 32 + } 33 + 34 + type Processed = std::result::Result<Vec<CollectedProcessed>, ErrString>; 35 + 36 + /// hacky for now: put errors in strings ๐Ÿคทโ€โ™€๏ธ 37 + fn process(block: Vec<u8>) -> Processed { 38 + let value: dasl::drisl::Value = dasl::drisl::from_slice(&block) 39 + .map_err(|e| ErrString(format!("failed to parse block with drisl: {e:?}")))?; 40 + let links = links::record::collect_links_drisl(&value) 41 + .into_iter() 42 + .map(CollectedProcessed) 43 + .collect(); 44 + Ok(links) 45 + } 46 + 47 + #[derive(Debug, Parser)] 48 + struct Args { 49 + #[arg(long)] 50 + cars_folder: PathBuf, 51 + #[arg(long)] 52 + mem_workers: usize, 53 + #[arg(long)] 54 + disk_workers: usize, 55 + #[arg(long)] 56 + disk_folder: PathBuf, 57 + } 58 + 59 + async fn get_cars( 60 + cars_folder: PathBuf, 61 + tx: async_channel::Sender<tokio::io::BufReader<tokio::fs::File>>, 62 + ) -> Result<()> { 63 + let mut dir = tokio::fs::read_dir(cars_folder).await?; 64 + while let Some(entry) = dir.next_entry().await? { 65 + if !entry.file_type().await?.is_file() { 66 + continue; 67 + } 68 + let reader = tokio::fs::File::open(&entry.path()).await?; 69 + let reader = tokio::io::BufReader::new(reader); 70 + tx.send(reader).await?; 71 + } 72 + Ok(()) 73 + } 74 + 75 + async fn drive_mem<R: AsyncRead + Unpin + Send + Sync + 'static>( 76 + f: R, 77 + builder: &DriverBuilderWithProcessor<Processed>, 78 + disk_tx: &async_channel::Sender<NeedDisk<R, Processed>>, 79 + ) -> Result<Option<(usize, usize)>> { 80 + let mut n = 0; 81 + let mut n_records = 0; 82 + match builder.load_car(f).await? { 83 + Driver::Memory(_commit, mut driver) => { 84 + while let Some(chunk) = driver.next_chunk(512).await? { 85 + n_records += chunk.len(); 86 + for (_key, links) in chunk { 87 + match links { 88 + Ok(links) => n += links.len(), 89 + Err(e) => eprintln!("wat: {e:?}"), 90 + } 91 + } 92 + } 93 + Ok(Some((n, n_records))) 94 + } 95 + Driver::Disk(need_disk) => { 96 + disk_tx.send(need_disk).await?; 97 + Ok(None) 98 + } 99 + } 100 + } 101 + 102 + async fn mem_worker<R: AsyncRead + Unpin + Send + Sync + 'static>( 103 + car_rx: async_channel::Receiver<R>, 104 + disk_tx: async_channel::Sender<NeedDisk<R, Processed>>, 105 + n: Arc<AtomicUsize>, 106 + n_records: Arc<AtomicUsize>, 107 + ) -> Result<()> { 108 + let builder = DriverBuilder::new() 109 + .with_block_processor(process) // don't care just counting records 110 + .with_mem_limit_mb(128); 111 + while let Ok(f) = car_rx.recv().await { 112 + let driven = match drive_mem(f, &builder, &disk_tx).await { 113 + Ok(d) => d, 114 + Err(e) => { 115 + eprintln!("failed to drive mem: {e:?}. skipping..."); 116 + continue; 117 + } 118 + }; 119 + if let Some((drove, recs)) = driven { 120 + n.fetch_add(drove, Ordering::Relaxed); 121 + n_records.fetch_add(recs, Ordering::Relaxed); 122 + } 123 + } 124 + Ok(()) 125 + } 126 + 127 + async fn drive_disk<R: AsyncRead + Unpin>( 128 + needed: NeedDisk<R, Processed>, 129 + store: DiskStore, 130 + ) -> Result<(usize, usize, DiskStore)> { 131 + let (_commit, mut driver) = needed.finish_loading(store).await?; 132 + let mut n = 0; 133 + let mut n_records = 0; 134 + while let Some(chunk) = driver.next_chunk(512).await? { 135 + n_records += chunk.len(); 136 + for (_key, links) in chunk { 137 + match links { 138 + Ok(links) => n += links.len(), 139 + Err(e) => eprintln!("wat: {e:?}"), 140 + } 141 + } 142 + } 143 + let store = driver.reset_store().await?; 144 + Ok((n, n_records, store)) 145 + } 146 + 147 + async fn disk_worker<R: AsyncRead + Unpin>( 148 + worker_id: usize, 149 + disk_rx: async_channel::Receiver<NeedDisk<R, Processed>>, 150 + folder: PathBuf, 151 + n: Arc<AtomicUsize>, 152 + n_records: Arc<AtomicUsize>, 153 + disk_workers_active: Arc<AtomicUsize>, 154 + ) -> Result<()> { 155 + let mut file = folder; 156 + file.push(format!("disk-worker-{worker_id}.sqlite")); 157 + let builder = DiskBuilder::new().with_cache_size_mb(128); 158 + let mut store = builder.open(file.clone()).await?; 159 + while let Ok(needed) = disk_rx.recv().await { 160 + let active = disk_workers_active.fetch_add(1, Ordering::AcqRel); 161 + println!("-> disk workers active: {}", active + 1); 162 + let (drove, records) = match drive_disk(needed, store).await { 163 + Ok((d, r, s)) => { 164 + store = s; 165 + (d, r) 166 + } 167 + Err(e) => { 168 + eprintln!("failed to drive disk: {e:?}. skipping..."); 169 + store = builder.open(file.clone()).await?; 170 + continue; 171 + } 172 + }; 173 + n.fetch_add(drove, Ordering::Relaxed); 174 + n_records.fetch_add(records, Ordering::Relaxed); 175 + let were_active = disk_workers_active.fetch_sub(1, Ordering::AcqRel); 176 + println!("<- disk workers active: {}", were_active - 1); 177 + } 178 + Ok(()) 179 + } 180 + 181 + #[tokio::main] 182 + async fn main() -> Result<()> { 183 + env_logger::init(); 184 + 185 + let Args { 186 + cars_folder, 187 + disk_folder, 188 + disk_workers, 189 + mem_workers, 190 + } = Args::parse(); 191 + 192 + let mut set = JoinSet::<Result<()>>::new(); 193 + 194 + let (cars_tx, cars_rx) = async_channel::bounded(2); 195 + set.spawn(get_cars(cars_folder, cars_tx)); 196 + 197 + let n: Arc<AtomicUsize> = Arc::new(0.into()); 198 + let n_records: Arc<AtomicUsize> = Arc::new(0.into()); 199 + let disk_workers_active: Arc<AtomicUsize> = Arc::new(0.into()); 200 + 201 + set.spawn({ 202 + let n = n.clone(); 203 + let n_records = n_records.clone(); 204 + let mut interval = tokio::time::interval(std::time::Duration::from_secs(10)); 205 + async move { 206 + let mut last_n = n.load(Ordering::Relaxed); 207 + let mut last_n_records = n.load(Ordering::Relaxed); 208 + loop { 209 + interval.tick().await; 210 + let n = n.load(Ordering::Relaxed); 211 + let n_records = n_records.load(Ordering::Relaxed); 212 + let diff_n = n - last_n; 213 + let diff_records = n_records - last_n_records; 214 + println!("rate: {} rec/sec; {} n/sec", diff_records / 10, diff_n / 10); 215 + if n_records > 0 && diff_records == 0 { 216 + println!("zero encountered, stopping rate calculation polling."); 217 + break Ok(()); 218 + } 219 + last_n = n; 220 + last_n_records = n_records; 221 + } 222 + } 223 + }); 224 + 225 + let (needs_disk_tx, needs_disk_rx) = async_channel::bounded(disk_workers); 226 + 227 + for _ in 0..mem_workers { 228 + set.spawn(mem_worker( 229 + cars_rx.clone(), 230 + needs_disk_tx.clone(), 231 + n.clone(), 232 + n_records.clone(), 233 + )); 234 + } 235 + drop(cars_rx); 236 + drop(needs_disk_tx); 237 + 238 + tokio::fs::create_dir_all(disk_folder.clone()).await?; 239 + for id in 0..disk_workers { 240 + set.spawn(disk_worker( 241 + id, 242 + needs_disk_rx.clone(), 243 + disk_folder.clone(), 244 + n.clone(), 245 + n_records.clone(), 246 + disk_workers_active.clone(), 247 + )); 248 + } 249 + drop(needs_disk_rx); 250 + 251 + while let Some(res) = set.join_next().await { 252 + println!("task from set joined: {res:?}"); 253 + } 254 + 255 + eprintln!("total records processed: {n_records:?}; total n: {n:?}"); 256 + 257 + Ok(()) 258 + }
+137
spacedust/src/bin/scrape_pds.rs
··· 1 + use clap::Parser; 2 + use reqwest::Url; 3 + use serde::Deserialize; 4 + use std::path::PathBuf; 5 + use tokio::io::AsyncWriteExt; 6 + use tokio::{sync::mpsc, time}; 7 + 8 + type Result<T> = std::result::Result<T, Box<dyn std::error::Error>>; 9 + 10 + use futures::StreamExt; 11 + 12 + #[derive(Debug, Parser)] 13 + struct Args { 14 + #[arg(long)] 15 + pds: Url, 16 + #[arg(long)] 17 + throttle_ms: u64, // 100ms per pds? 18 + #[arg(long)] 19 + folder: PathBuf, 20 + } 21 + 22 + async fn download_repo( 23 + client: &reqwest::Client, 24 + mut pds: Url, 25 + did: String, 26 + mut path: PathBuf, 27 + ) -> Result<()> { 28 + path.push(format!("{did}.car")); 29 + let f = tokio::fs::File::create(path).await?; 30 + let mut w = tokio::io::BufWriter::new(f); 31 + 32 + pds.set_path("/xrpc/com.atproto.sync.getRepo"); 33 + pds.set_query(Some(&format!("did={did}"))); 34 + let mut byte_stream = client.get(pds).send().await?.bytes_stream(); 35 + 36 + while let Some(stuff) = byte_stream.next().await { 37 + tokio::io::copy(&mut stuff?.as_ref(), &mut w).await?; 38 + } 39 + w.flush().await?; 40 + 41 + Ok(()) 42 + } 43 + 44 + #[derive(Debug, Deserialize)] 45 + struct RepoInfo { 46 + did: String, 47 + active: bool, 48 + } 49 + 50 + #[derive(Debug, Deserialize)] 51 + struct ListReposResponse { 52 + cursor: Option<String>, 53 + repos: Vec<RepoInfo>, 54 + } 55 + 56 + fn get_pds_dids(client: reqwest::Client, mut pds: Url) -> mpsc::Receiver<String> { 57 + let (tx, rx) = mpsc::channel(2); 58 + tokio::task::spawn(async move { 59 + pds.set_path("/xrpc/com.atproto.sync.listRepos"); 60 + let mut cursor = None; 61 + 62 + loop { 63 + if let Some(c) = cursor { 64 + pds.set_query(Some(&format!("cursor={c}"))); 65 + } 66 + let res: ListReposResponse = client 67 + .get(pds.clone()) 68 + .send() 69 + .await 70 + .expect("to send request") 71 + .error_for_status() 72 + .expect("to be ok") 73 + .json() 74 + .await 75 + .expect("json response"); 76 + for repo in res.repos { 77 + if repo.active { 78 + tx.send(repo.did) 79 + .await 80 + .expect("to be able to send on the channel"); 81 + } 82 + } 83 + cursor = res.cursor; 84 + if cursor.is_none() { 85 + break; 86 + } 87 + } 88 + }); 89 + rx 90 + } 91 + 92 + #[tokio::main] 93 + async fn main() -> Result<()> { 94 + env_logger::init(); 95 + 96 + let Args { 97 + pds, 98 + throttle_ms, 99 + folder, 100 + } = Args::parse(); 101 + 102 + tokio::fs::create_dir_all(folder.clone()).await?; 103 + 104 + let client = reqwest::Client::builder() 105 + .user_agent("microcosm/spacedust-testing") 106 + .build()?; 107 + 108 + let mut dids = get_pds_dids(client.clone(), pds.clone()); 109 + 110 + let mut interval = time::interval(time::Duration::from_millis(throttle_ms)); 111 + let mut oks = 0; 112 + let mut single_fails = 0; 113 + let mut double_fails = 0; 114 + 115 + while let Some(did) = dids.recv().await { 116 + interval.tick().await; 117 + println!("did: {did:?}"); 118 + if let Err(e) = download_repo(&client, pds.clone(), did.clone(), folder.clone()).await { 119 + single_fails += 1; 120 + eprintln!("failed to download repo for did: {did:?}: {e:?}. retrying in a moment..."); 121 + tokio::time::sleep(time::Duration::from_secs(3)).await; 122 + interval.reset(); 123 + if let Err(e) = download_repo(&client, pds.clone(), did.clone(), folder.clone()).await { 124 + double_fails += 1; 125 + eprintln!("failed again: {e:?}. moving on in a moment..."); 126 + tokio::time::sleep(time::Duration::from_secs(1)).await; 127 + continue; 128 + } 129 + } 130 + oks += 1; 131 + println!(" -> done. did: {did:?}"); 132 + } 133 + 134 + eprintln!("got {oks} repos. single fails: {single_fails}; doubles: {double_fails}."); 135 + 136 + Ok(()) 137 + }
+1
spacedust/src/lib.rs
··· 3 3 pub mod error; 4 4 pub mod removable_delay_queue; 5 5 pub mod server; 6 + pub mod storage; 6 7 pub mod subscriber; 7 8 8 9 use jetstream::events::CommitEvent;
spacedust/src/storage/car/drive.rs

This is a binary file and will not be displayed.

+1
spacedust/src/storage/car/mod.rs
··· 1 +
spacedust/src/storage/car/walk.rs

This is a binary file and will not be displayed.

+9
spacedust/src/storage/fjall/mod.rs
··· 1 + use crate::storage::Storage; 2 + 3 + pub struct FjallStorage {} 4 + 5 + impl Storage for FjallStorage { 6 + fn import_car() { 7 + todo!() 8 + } 9 + }
+6
spacedust/src/storage/mod.rs
··· 1 + pub mod car; 2 + pub mod fjall; 3 + 4 + pub trait Storage { 5 + fn import_car() {} 6 + }