Constellation, Spacedust, Slingshot, UFOs: atproto crates and services for microcosm

Compare changes

Choose any two refs to compare.

+300 -73
Cargo.lock
··· 112 113 [[package]] 114 name = "anyhow" 115 - version = "1.0.97" 116 source = "registry+https://github.com/rust-lang/crates.io-index" 117 - checksum = "dcfed56ad506cb2c684a14971b8861fdc3baaaae314b9e5f9bb532cbe3ba7a4f" 118 119 [[package]] 120 name = "arbitrary" ··· 127 version = "1.7.1" 128 source = "registry+https://github.com/rust-lang/crates.io-index" 129 checksum = "69f7f8c3906b62b754cd5326047894316021dcfe5a194c8ea52bdd94934a3457" 130 131 [[package]] 132 name = "arrayvec" ··· 192 "nom", 193 "num-traits", 194 "rusticata-macros", 195 - "thiserror 2.0.16", 196 "time", 197 ] 198 ··· 644 "axum", 645 "handlebars", 646 "serde", 647 - "thiserror 2.0.16", 648 ] 649 650 [[package]] ··· 673 version = "0.2.0" 674 source = "registry+https://github.com/rust-lang/crates.io-index" 675 checksum = "4c7f02d4ea65f2c1853089ffd8d2787bdbc63de2f0d29dedbcf8ccdfa0ccd4cf" 676 677 [[package]] 678 name = "base64" ··· 812 checksum = "5c8214115b7bf84099f1309324e63141d4c5d7cc26862f97a0a857dbefe165bd" 813 814 [[package]] 815 name = "block-buffer" 816 version = "0.10.4" 817 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 839 checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" 840 841 [[package]] 842 name = "bytes" 843 version = "1.10.1" 844 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 851 checksum = "6236364b88b9b6d0bc181ba374cf1ab55ba3ef97a1cb6f8cddad48a273767fb5" 852 853 [[package]] 854 name = "bzip2-sys" 855 version = "0.1.13+1.0.8" 856 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 890 "enum_dispatch", 891 "serde", 892 ] 893 894 [[package]] 895 name = "cc" ··· 976 "multihash", 977 "serde", 978 "serde_bytes", 979 - "unsigned-varint", 980 ] 981 982 [[package]] ··· 992 993 [[package]] 994 name = "clap" 995 - version = "4.5.47" 996 source = "registry+https://github.com/rust-lang/crates.io-index" 997 - checksum = "7eac00902d9d136acd712710d71823fb8ac8004ca445a89e73a41d45aa712931" 998 dependencies = [ 999 "clap_builder", 1000 "clap_derive", ··· 1002 1003 [[package]] 1004 name = "clap_builder" 1005 - version = "4.5.47" 1006 source = "registry+https://github.com/rust-lang/crates.io-index" 1007 - checksum = "2ad9bbf750e73b5884fb8a211a9424a1906c1e156724260fdae972f31d70e1d6" 1008 dependencies = [ 1009 "anstream", 1010 "anstyle", ··· 1085 version = "0.9.6" 1086 source = "registry+https://github.com/rust-lang/crates.io-index" 1087 checksum = "c2459377285ad874054d797f3ccebf984978aa39129f6eafde5cdc8315b612f8" 1088 1089 [[package]] 1090 name = "constellation" ··· 1353 ] 1354 1355 [[package]] 1356 name = "data-encoding" 1357 - version = "2.8.0" 1358 source = "registry+https://github.com/rust-lang/crates.io-index" 1359 - checksum = "575f75dfd25738df5b91b8e43e14d44bda14637a58fae779fd2b064f8bf3e010" 1360 1361 [[package]] 1362 name = "data-encoding-macro" 1363 - version = "0.1.17" 1364 source = "registry+https://github.com/rust-lang/crates.io-index" 1365 - checksum = "9f9724adfcf41f45bf652b3995837669d73c4d49a1b5ac1ff82905ac7d9b5558" 1366 dependencies = [ 1367 "data-encoding", 1368 "data-encoding-macro-internal", ··· 1370 1371 [[package]] 1372 name = "data-encoding-macro-internal" 1373 - version = "0.1.15" 1374 source = "registry+https://github.com/rust-lang/crates.io-index" 1375 - checksum = "18e4fdb82bd54a12e42fb58a800dcae6b9e13982238ce2296dc3570b92148e1f" 1376 dependencies = [ 1377 "data-encoding", 1378 - "syn 1.0.109", 1379 ] 1380 1381 [[package]] ··· 1579 "slog-bunyan", 1580 "slog-json", 1581 "slog-term", 1582 - "thiserror 2.0.16", 1583 "tokio", 1584 "tokio-rustls 0.25.0", 1585 "toml 0.9.7", ··· 1783 checksum = "0b25ad44cd4360a0448a9b5a0a6f1c7a621101cca4578706d43c9a821418aebc" 1784 dependencies = [ 1785 "byteorder", 1786 - "byteview", 1787 "dashmap", 1788 "log", 1789 - "lsm-tree", 1790 "path-absolutize", 1791 "std-semaphore", 1792 "tempfile", ··· 1799 source = "git+https://github.com/fjall-rs/fjall.git#42d811f7c8cc9004407d520d37d2a1d8d246c03d" 1800 dependencies = [ 1801 "byteorder", 1802 - "byteview", 1803 "dashmap", 1804 "log", 1805 - "lsm-tree", 1806 "path-absolutize", 1807 "std-semaphore", 1808 "tempfile", ··· 1810 ] 1811 1812 [[package]] 1813 name = "flate2" 1814 version = "1.1.2" 1815 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 1891 "mixtrics", 1892 "pin-project", 1893 "serde", 1894 - "thiserror 2.0.16", 1895 "tokio", 1896 "tracing", 1897 ] ··· 1911 "parking_lot", 1912 "pin-project", 1913 "serde", 1914 - "thiserror 2.0.16", 1915 "tokio", 1916 "twox-hash", 1917 ] ··· 1944 "parking_lot", 1945 "pin-project", 1946 "serde", 1947 - "thiserror 2.0.16", 1948 "tokio", 1949 "tracing", 1950 ] ··· 1976 "pin-project", 1977 "rand 0.9.1", 1978 "serde", 1979 - "thiserror 2.0.16", 1980 "tokio", 1981 "tracing", 1982 "twox-hash", ··· 2220 "pest_derive", 2221 "serde", 2222 "serde_json", 2223 - "thiserror 2.0.16", 2224 "walkdir", 2225 ] 2226 ··· 2345 "once_cell", 2346 "rand 0.9.1", 2347 "ring", 2348 - "thiserror 2.0.16", 2349 "tinyvec", 2350 "tokio", 2351 "tracing", ··· 2368 "rand 0.9.1", 2369 "resolv-conf", 2370 "smallvec", 2371 - "thiserror 2.0.16", 2372 "tokio", 2373 "tracing", 2374 ] ··· 2800 ] 2801 2802 [[package]] 2803 name = "is-terminal" 2804 version = "0.4.16" 2805 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 2863 "metrics", 2864 "serde", 2865 "serde_json", 2866 - "thiserror 2.0.16", 2867 "tokio", 2868 "tokio-tungstenite 0.26.2", 2869 "url", ··· 3045 checksum = "fc2f4eb4bc735547cfed7c0a4922cbd04a4655978c09b54f1f7b228750664c34" 3046 dependencies = [ 3047 "cfg-if", 3048 - "windows-targets 0.48.5", 3049 ] 3050 3051 [[package]] ··· 3116 version = "0.1.0" 3117 dependencies = [ 3118 "anyhow", 3119 "fluent-uri", 3120 "nom", 3121 - "thiserror 2.0.16", 3122 "tinyjson", 3123 ] 3124 ··· 3186 3187 [[package]] 3188 name = "lsm-tree" 3189 - version = "2.10.2" 3190 source = "registry+https://github.com/rust-lang/crates.io-index" 3191 - checksum = "55b6d7475a8dd22e749186968daacf8e2a77932b061b1bd263157987bbfc0c6c" 3192 dependencies = [ 3193 "byteorder", 3194 "crossbeam-skiplist", ··· 3209 ] 3210 3211 [[package]] 3212 name = "lz4" 3213 version = "1.28.1" 3214 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 3229 3230 [[package]] 3231 name = "lz4_flex" 3232 - version = "0.11.3" 3233 source = "registry+https://github.com/rust-lang/crates.io-index" 3234 - checksum = "75761162ae2b0e580d7e7c390558127e5f01b4194debd6221fd8c207fc80e3f5" 3235 3236 [[package]] 3237 name = "mach2" ··· 3297 ] 3298 3299 [[package]] 3300 name = "match_cfg" 3301 version = "0.1.0" 3302 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 3384 "metrics", 3385 "metrics-util 0.20.0", 3386 "quanta", 3387 - "thiserror 2.0.16", 3388 "tokio", 3389 "tracing", 3390 ] ··· 3531 3532 [[package]] 3533 name = "multibase" 3534 - version = "0.9.1" 3535 source = "registry+https://github.com/rust-lang/crates.io-index" 3536 - checksum = "9b3539ec3c1f04ac9748a260728e855f261b4977f5c3406612c884564f329404" 3537 dependencies = [ 3538 "base-x", 3539 "data-encoding", 3540 "data-encoding-macro", 3541 ] ··· 3548 dependencies = [ 3549 "core2", 3550 "serde", 3551 - "unsigned-varint", 3552 ] 3553 3554 [[package]] ··· 3926 checksum = "1db05f56d34358a8b1066f67cbb203ee3e7ed2ba674a6263a1d5ec6db2204323" 3927 dependencies = [ 3928 "memchr", 3929 - "thiserror 2.0.16", 3930 "ucd-trie", 3931 ] 3932 ··· 4036 "rusqlite", 4037 "serde", 4038 "serde_json", 4039 - "thiserror 2.0.16", 4040 "tokio", 4041 "tracing-subscriber", 4042 ] ··· 4079 "smallvec", 4080 "sync_wrapper", 4081 "tempfile", 4082 - "thiserror 2.0.16", 4083 "tokio", 4084 "tokio-rustls 0.26.2", 4085 "tokio-stream", ··· 4123 "serde_json", 4124 "serde_urlencoded", 4125 "serde_yaml", 4126 - "thiserror 2.0.16", 4127 "tokio", 4128 ] 4129 ··· 4142 "quote", 4143 "regex", 4144 "syn 2.0.106", 4145 - "thiserror 2.0.16", 4146 ] 4147 4148 [[package]] ··· 4269 4270 [[package]] 4271 name = "quick_cache" 4272 - version = "0.6.12" 4273 source = "registry+https://github.com/rust-lang/crates.io-index" 4274 - checksum = "8f8ed0655cbaf18a26966142ad23b95d8ab47221c50c4f73a1db7d0d2d6e3da8" 4275 dependencies = [ 4276 "equivalent", 4277 "hashbrown 0.15.2", ··· 4291 "rustc-hash 2.1.1", 4292 "rustls 0.23.31", 4293 "socket2 0.5.9", 4294 - "thiserror 2.0.16", 4295 "tokio", 4296 "tracing", 4297 "web-time", ··· 4312 "rustls 0.23.31", 4313 "rustls-pki-types", 4314 "slab", 4315 - "thiserror 2.0.16", 4316 "tinyvec", 4317 "tracing", 4318 "web-time", ··· 4538 checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" 4539 4540 [[package]] 4541 name = "reqwest" 4542 - version = "0.12.22" 4543 source = "registry+https://github.com/rust-lang/crates.io-index" 4544 - checksum = "cbc931937e6ca3a06e3b6c0aa7841849b160a90351d6ab467a8b9b9959767531" 4545 dependencies = [ 4546 "async-compression", 4547 "base64 0.22.1", ··· 4581 "url", 4582 "wasm-bindgen", 4583 "wasm-bindgen-futures", 4584 "web-sys", 4585 ] 4586 ··· 4962 4963 [[package]] 4964 name = "self_cell" 4965 - version = "1.1.0" 4966 source = "registry+https://github.com/rust-lang/crates.io-index" 4967 - checksum = "c2fdfc24bc566f839a2da4c4295b82db7d25a24253867d5c64355abb5799bdbe" 4968 4969 [[package]] 4970 name = "semver" ··· 4984 4985 [[package]] 4986 name = "serde_bytes" 4987 - version = "0.11.17" 4988 source = "registry+https://github.com/rust-lang/crates.io-index" 4989 - checksum = "8437fd221bde2d4ca316d61b90e337e9e702b3820b87d63caa9ba6c02bd06d96" 4990 dependencies = [ 4991 "serde", 4992 ] 4993 4994 [[package]] ··· 5036 ] 5037 5038 [[package]] 5039 name = "serde_json" 5040 version = "1.0.145" 5041 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 5068 "percent-encoding", 5069 "ryu", 5070 "serde", 5071 - "thiserror 2.0.16", 5072 ] 5073 5074 [[package]] ··· 5157 ] 5158 5159 [[package]] 5160 name = "sha1" 5161 version = "0.10.6" 5162 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 5220 dependencies = [ 5221 "num-bigint", 5222 "num-traits", 5223 - "thiserror 2.0.16", 5224 "time", 5225 ] 5226 ··· 5262 "rustls 0.23.31", 5263 "serde", 5264 "serde_json", 5265 - "thiserror 2.0.16", 5266 "time", 5267 "tokio", 5268 "tokio-util", ··· 5355 name = "spacedust" 5356 version = "0.1.0" 5357 dependencies = [ 5358 "async-trait", 5359 "clap", 5360 "ctrlc", 5361 "dropshot", 5362 "env_logger", 5363 "futures", 5364 "http", 5365 "jetstream", 5366 "links", 5367 "log", 5368 "metrics", 5369 "metrics-exporter-prometheus 0.17.2", 5370 "rand 0.9.1", 5371 "schemars", 5372 "semver", 5373 "serde", 5374 "serde_json", 5375 "serde_qs", 5376 - "thiserror 2.0.16", 5377 "tinyjson", 5378 "tokio", 5379 "tokio-tungstenite 0.27.0", ··· 5506 5507 [[package]] 5508 name = "tempfile" 5509 - version = "3.19.1" 5510 source = "registry+https://github.com/rust-lang/crates.io-index" 5511 - checksum = "7437ac7763b9b123ccf33c338a5cc1bac6f69b45a136c19bdd8a65e3916435bf" 5512 dependencies = [ 5513 "fastrand", 5514 "getrandom 0.3.3", ··· 5539 5540 [[package]] 5541 name = "thiserror" 5542 - version = "2.0.16" 5543 source = "registry+https://github.com/rust-lang/crates.io-index" 5544 - checksum = "3467d614147380f2e4e374161426ff399c91084acd2363eaf549172b3d5e60c0" 5545 dependencies = [ 5546 - "thiserror-impl 2.0.16", 5547 ] 5548 5549 [[package]] ··· 5559 5560 [[package]] 5561 name = "thiserror-impl" 5562 - version = "2.0.16" 5563 source = "registry+https://github.com/rust-lang/crates.io-index" 5564 - checksum = "6c5e1be1c48b9172ee610da68fd9cd2770e7a4056cb3fc98710ee6906f0c7960" 5565 dependencies = [ 5566 "proc-macro2", 5567 "quote", ··· 5993 "native-tls", 5994 "rand 0.9.1", 5995 "sha1", 5996 - "thiserror 2.0.16", 5997 "url", 5998 "utf-8", 5999 ] ··· 6011 "log", 6012 "rand 0.9.1", 6013 "sha1", 6014 - "thiserror 2.0.16", 6015 "utf-8", 6016 ] 6017 ··· 6054 "http", 6055 "jetstream", 6056 "log", 6057 - "lsm-tree", 6058 "metrics", 6059 "metrics-exporter-prometheus 0.17.2", 6060 "schemars", ··· 6064 "serde_qs", 6065 "sha2", 6066 "tempfile", 6067 - "thiserror 2.0.16", 6068 "tikv-jemallocator", 6069 "tokio", 6070 "tokio-util", ··· 6117 6118 [[package]] 6119 name = "unsigned-varint" 6120 version = "0.8.0" 6121 source = "registry+https://github.com/rust-lang/crates.io-index" 6122 checksum = "eb066959b24b5196ae73cb057f45598450d2c5f71460e98c49b738086eff9c06" ··· 6193 checksum = "62fc7c4ce161f049607ecea654dca3f2d727da5371ae85e2e4f14ce2b98ed67c" 6194 dependencies = [ 6195 "byteorder", 6196 - "byteview", 6197 "interval-heap", 6198 "log", 6199 "path-absolutize", ··· 6342 ] 6343 6344 [[package]] 6345 name = "web-sys" 6346 version = "0.3.77" 6347 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 6400 "reqwest", 6401 "serde", 6402 "serde_json", 6403 - "thiserror 2.0.16", 6404 "tokio", 6405 "tokio-util", 6406 "url", ··· 6440 source = "registry+https://github.com/rust-lang/crates.io-index" 6441 checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" 6442 dependencies = [ 6443 - "windows-sys 0.48.0", 6444 ] 6445 6446 [[package]] ··· 6758 "nom", 6759 "oid-registry", 6760 "rusticata-macros", 6761 - "thiserror 2.0.16", 6762 "time", 6763 ] 6764
··· 112 113 [[package]] 114 name = "anyhow" 115 + version = "1.0.100" 116 source = "registry+https://github.com/rust-lang/crates.io-index" 117 + checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61" 118 119 [[package]] 120 name = "arbitrary" ··· 127 version = "1.7.1" 128 source = "registry+https://github.com/rust-lang/crates.io-index" 129 checksum = "69f7f8c3906b62b754cd5326047894316021dcfe5a194c8ea52bdd94934a3457" 130 + 131 + [[package]] 132 + name = "arrayref" 133 + version = "0.3.9" 134 + source = "registry+https://github.com/rust-lang/crates.io-index" 135 + checksum = "76a2e8124351fda1ef8aaaa3bbd7ebbcb486bbcd4225aca0aa0d84bb2db8fecb" 136 137 [[package]] 138 name = "arrayvec" ··· 198 "nom", 199 "num-traits", 200 "rusticata-macros", 201 + "thiserror 2.0.17", 202 "time", 203 ] 204 ··· 650 "axum", 651 "handlebars", 652 "serde", 653 + "thiserror 2.0.17", 654 ] 655 656 [[package]] ··· 679 version = "0.2.0" 680 source = "registry+https://github.com/rust-lang/crates.io-index" 681 checksum = "4c7f02d4ea65f2c1853089ffd8d2787bdbc63de2f0d29dedbcf8ccdfa0ccd4cf" 682 + 683 + [[package]] 684 + name = "base256emoji" 685 + version = "1.0.2" 686 + source = "registry+https://github.com/rust-lang/crates.io-index" 687 + checksum = "b5e9430d9a245a77c92176e649af6e275f20839a48389859d1661e9a128d077c" 688 + dependencies = [ 689 + "const-str", 690 + "match-lookup", 691 + ] 692 693 [[package]] 694 name = "base64" ··· 828 checksum = "5c8214115b7bf84099f1309324e63141d4c5d7cc26862f97a0a857dbefe165bd" 829 830 [[package]] 831 + name = "blake3" 832 + version = "1.8.2" 833 + source = "registry+https://github.com/rust-lang/crates.io-index" 834 + checksum = "3888aaa89e4b2a40fca9848e400f6a658a5a3978de7be858e209cafa8be9a4a0" 835 + dependencies = [ 836 + "arrayref", 837 + "arrayvec", 838 + "cc", 839 + "cfg-if", 840 + "constant_time_eq", 841 + ] 842 + 843 + [[package]] 844 name = "block-buffer" 845 version = "0.10.4" 846 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 868 checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" 869 870 [[package]] 871 + name = "byteorder-lite" 872 + version = "0.1.0" 873 + source = "registry+https://github.com/rust-lang/crates.io-index" 874 + checksum = "8f1fe948ff07f4bd06c30984e69f5b4899c516a3ef74f34df92a2df2ab535495" 875 + 876 + [[package]] 877 name = "bytes" 878 version = "1.10.1" 879 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 886 checksum = "6236364b88b9b6d0bc181ba374cf1ab55ba3ef97a1cb6f8cddad48a273767fb5" 887 888 [[package]] 889 + name = "byteview" 890 + version = "0.8.0" 891 + source = "registry+https://github.com/rust-lang/crates.io-index" 892 + checksum = "1e6b0e42e210b794e14b152c6fe1a55831e30ef4a0f5dc39d73d714fb5f1906c" 893 + 894 + [[package]] 895 name = "bzip2-sys" 896 version = "0.1.13+1.0.8" 897 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 931 "enum_dispatch", 932 "serde", 933 ] 934 + 935 + [[package]] 936 + name = "cbor4ii" 937 + version = "0.2.14" 938 + source = "registry+https://github.com/rust-lang/crates.io-index" 939 + checksum = "b544cf8c89359205f4f990d0e6f3828db42df85b5dac95d09157a250eb0749c4" 940 + dependencies = [ 941 + "serde", 942 + ] 943 + 944 + [[package]] 945 + name = "cbor4ii" 946 + version = "1.2.0" 947 + source = "registry+https://github.com/rust-lang/crates.io-index" 948 + checksum = "b28d2802395e3bccd95cc4ae984bff7444b6c1f5981da46a41360c42a2c7e2d9" 949 950 [[package]] 951 name = "cc" ··· 1032 "multihash", 1033 "serde", 1034 "serde_bytes", 1035 + "unsigned-varint 0.8.0", 1036 ] 1037 1038 [[package]] ··· 1048 1049 [[package]] 1050 name = "clap" 1051 + version = "4.5.48" 1052 source = "registry+https://github.com/rust-lang/crates.io-index" 1053 + checksum = "e2134bb3ea021b78629caa971416385309e0131b351b25e01dc16fb54e1b5fae" 1054 dependencies = [ 1055 "clap_builder", 1056 "clap_derive", ··· 1058 1059 [[package]] 1060 name = "clap_builder" 1061 + version = "4.5.48" 1062 source = "registry+https://github.com/rust-lang/crates.io-index" 1063 + checksum = "c2ba64afa3c0a6df7fa517765e31314e983f51dda798ffba27b988194fb65dc9" 1064 dependencies = [ 1065 "anstream", 1066 "anstyle", ··· 1141 version = "0.9.6" 1142 source = "registry+https://github.com/rust-lang/crates.io-index" 1143 checksum = "c2459377285ad874054d797f3ccebf984978aa39129f6eafde5cdc8315b612f8" 1144 + 1145 + [[package]] 1146 + name = "const-str" 1147 + version = "0.4.3" 1148 + source = "registry+https://github.com/rust-lang/crates.io-index" 1149 + checksum = "2f421161cb492475f1661ddc9815a745a1c894592070661180fdec3d4872e9c3" 1150 + 1151 + [[package]] 1152 + name = "constant_time_eq" 1153 + version = "0.3.1" 1154 + source = "registry+https://github.com/rust-lang/crates.io-index" 1155 + checksum = "7c74b8349d32d297c9134b8c88677813a227df8f779daa29bfc29c183fe3dca6" 1156 1157 [[package]] 1158 name = "constellation" ··· 1421 ] 1422 1423 [[package]] 1424 + name = "dasl" 1425 + version = "0.2.0" 1426 + source = "registry+https://github.com/rust-lang/crates.io-index" 1427 + checksum = "b59666035a4386b0fd272bd78da4cbc3ccb558941e97579ab00f0eb4639f2a49" 1428 + dependencies = [ 1429 + "blake3", 1430 + "cbor4ii 1.2.0", 1431 + "data-encoding", 1432 + "data-encoding-macro", 1433 + "scopeguard", 1434 + "serde", 1435 + "serde_bytes", 1436 + "sha2", 1437 + "thiserror 2.0.17", 1438 + ] 1439 + 1440 + [[package]] 1441 name = "data-encoding" 1442 + version = "2.9.0" 1443 source = "registry+https://github.com/rust-lang/crates.io-index" 1444 + checksum = "2a2330da5de22e8a3cb63252ce2abb30116bf5265e89c0e01bc17015ce30a476" 1445 1446 [[package]] 1447 name = "data-encoding-macro" 1448 + version = "0.1.18" 1449 source = "registry+https://github.com/rust-lang/crates.io-index" 1450 + checksum = "47ce6c96ea0102f01122a185683611bd5ac8d99e62bc59dd12e6bda344ee673d" 1451 dependencies = [ 1452 "data-encoding", 1453 "data-encoding-macro-internal", ··· 1455 1456 [[package]] 1457 name = "data-encoding-macro-internal" 1458 + version = "0.1.16" 1459 source = "registry+https://github.com/rust-lang/crates.io-index" 1460 + checksum = "8d162beedaa69905488a8da94f5ac3edb4dd4788b732fadb7bd120b2625c1976" 1461 dependencies = [ 1462 "data-encoding", 1463 + "syn 2.0.106", 1464 ] 1465 1466 [[package]] ··· 1664 "slog-bunyan", 1665 "slog-json", 1666 "slog-term", 1667 + "thiserror 2.0.17", 1668 "tokio", 1669 "tokio-rustls 0.25.0", 1670 "toml 0.9.7", ··· 1868 checksum = "0b25ad44cd4360a0448a9b5a0a6f1c7a621101cca4578706d43c9a821418aebc" 1869 dependencies = [ 1870 "byteorder", 1871 + "byteview 0.6.1", 1872 "dashmap", 1873 "log", 1874 + "lsm-tree 2.10.4", 1875 "path-absolutize", 1876 "std-semaphore", 1877 "tempfile", ··· 1884 source = "git+https://github.com/fjall-rs/fjall.git#42d811f7c8cc9004407d520d37d2a1d8d246c03d" 1885 dependencies = [ 1886 "byteorder", 1887 + "byteview 0.6.1", 1888 "dashmap", 1889 "log", 1890 + "lsm-tree 2.10.4", 1891 "path-absolutize", 1892 "std-semaphore", 1893 "tempfile", ··· 1895 ] 1896 1897 [[package]] 1898 + name = "fjall" 1899 + version = "3.0.0-pre.0" 1900 + source = "registry+https://github.com/rust-lang/crates.io-index" 1901 + checksum = "467588c1f15d1cfa9e43f02a45cf55d82fa1f12a6ae961b848c520458525600c" 1902 + dependencies = [ 1903 + "byteorder-lite", 1904 + "byteview 0.8.0", 1905 + "dashmap", 1906 + "log", 1907 + "lsm-tree 3.0.0-pre.0", 1908 + "std-semaphore", 1909 + "tempfile", 1910 + "xxhash-rust", 1911 + ] 1912 + 1913 + [[package]] 1914 name = "flate2" 1915 version = "1.1.2" 1916 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 1992 "mixtrics", 1993 "pin-project", 1994 "serde", 1995 + "thiserror 2.0.17", 1996 "tokio", 1997 "tracing", 1998 ] ··· 2012 "parking_lot", 2013 "pin-project", 2014 "serde", 2015 + "thiserror 2.0.17", 2016 "tokio", 2017 "twox-hash", 2018 ] ··· 2045 "parking_lot", 2046 "pin-project", 2047 "serde", 2048 + "thiserror 2.0.17", 2049 "tokio", 2050 "tracing", 2051 ] ··· 2077 "pin-project", 2078 "rand 0.9.1", 2079 "serde", 2080 + "thiserror 2.0.17", 2081 "tokio", 2082 "tracing", 2083 "twox-hash", ··· 2321 "pest_derive", 2322 "serde", 2323 "serde_json", 2324 + "thiserror 2.0.17", 2325 "walkdir", 2326 ] 2327 ··· 2446 "once_cell", 2447 "rand 0.9.1", 2448 "ring", 2449 + "thiserror 2.0.17", 2450 "tinyvec", 2451 "tokio", 2452 "tracing", ··· 2469 "rand 0.9.1", 2470 "resolv-conf", 2471 "smallvec", 2472 + "thiserror 2.0.17", 2473 "tokio", 2474 "tracing", 2475 ] ··· 2901 ] 2902 2903 [[package]] 2904 + name = "iroh-car" 2905 + version = "0.5.1" 2906 + source = "registry+https://github.com/rust-lang/crates.io-index" 2907 + checksum = "cb7f8cd4cb9aa083fba8b52e921764252d0b4dcb1cd6d120b809dbfe1106e81a" 2908 + dependencies = [ 2909 + "anyhow", 2910 + "cid", 2911 + "futures", 2912 + "serde", 2913 + "serde_ipld_dagcbor", 2914 + "thiserror 1.0.69", 2915 + "tokio", 2916 + "unsigned-varint 0.7.2", 2917 + ] 2918 + 2919 + [[package]] 2920 name = "is-terminal" 2921 version = "0.4.16" 2922 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 2980 "metrics", 2981 "serde", 2982 "serde_json", 2983 + "thiserror 2.0.17", 2984 "tokio", 2985 "tokio-tungstenite 0.26.2", 2986 "url", ··· 3162 checksum = "fc2f4eb4bc735547cfed7c0a4922cbd04a4655978c09b54f1f7b228750664c34" 3163 dependencies = [ 3164 "cfg-if", 3165 + "windows-targets 0.52.6", 3166 ] 3167 3168 [[package]] ··· 3233 version = "0.1.0" 3234 dependencies = [ 3235 "anyhow", 3236 + "dasl", 3237 "fluent-uri", 3238 "nom", 3239 + "serde", 3240 + "thiserror 2.0.17", 3241 "tinyjson", 3242 ] 3243 ··· 3305 3306 [[package]] 3307 name = "lsm-tree" 3308 + version = "2.10.4" 3309 source = "registry+https://github.com/rust-lang/crates.io-index" 3310 + checksum = "799399117a2bfb37660e08be33f470958babb98386b04185288d829df362ea15" 3311 dependencies = [ 3312 "byteorder", 3313 "crossbeam-skiplist", ··· 3328 ] 3329 3330 [[package]] 3331 + name = "lsm-tree" 3332 + version = "3.0.0-pre.0" 3333 + source = "registry+https://github.com/rust-lang/crates.io-index" 3334 + checksum = "be375d45e348328e78582dffbda4f1709dd52fca27c1a81c7bf6ca134e6335f7" 3335 + dependencies = [ 3336 + "byteorder-lite", 3337 + "byteview 0.8.0", 3338 + "crossbeam-skiplist", 3339 + "enum_dispatch", 3340 + "interval-heap", 3341 + "log", 3342 + "lz4_flex", 3343 + "quick_cache", 3344 + "rustc-hash 2.1.1", 3345 + "self_cell", 3346 + "sfa", 3347 + "tempfile", 3348 + "varint-rs", 3349 + "xxhash-rust", 3350 + ] 3351 + 3352 + [[package]] 3353 name = "lz4" 3354 version = "1.28.1" 3355 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 3370 3371 [[package]] 3372 name = "lz4_flex" 3373 + version = "0.11.5" 3374 source = "registry+https://github.com/rust-lang/crates.io-index" 3375 + checksum = "08ab2867e3eeeca90e844d1940eab391c9dc5228783db2ed999acbc0a9ed375a" 3376 3377 [[package]] 3378 name = "mach2" ··· 3438 ] 3439 3440 [[package]] 3441 + name = "match-lookup" 3442 + version = "0.1.1" 3443 + source = "registry+https://github.com/rust-lang/crates.io-index" 3444 + checksum = "1265724d8cb29dbbc2b0f06fffb8bf1a8c0cf73a78eede9ba73a4a66c52a981e" 3445 + dependencies = [ 3446 + "proc-macro2", 3447 + "quote", 3448 + "syn 1.0.109", 3449 + ] 3450 + 3451 + [[package]] 3452 name = "match_cfg" 3453 version = "0.1.0" 3454 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 3536 "metrics", 3537 "metrics-util 0.20.0", 3538 "quanta", 3539 + "thiserror 2.0.17", 3540 "tokio", 3541 "tracing", 3542 ] ··· 3683 3684 [[package]] 3685 name = "multibase" 3686 + version = "0.9.2" 3687 source = "registry+https://github.com/rust-lang/crates.io-index" 3688 + checksum = "8694bb4835f452b0e3bb06dbebb1d6fc5385b6ca1caf2e55fd165c042390ec77" 3689 dependencies = [ 3690 "base-x", 3691 + "base256emoji", 3692 "data-encoding", 3693 "data-encoding-macro", 3694 ] ··· 3701 dependencies = [ 3702 "core2", 3703 "serde", 3704 + "unsigned-varint 0.8.0", 3705 ] 3706 3707 [[package]] ··· 4079 checksum = "1db05f56d34358a8b1066f67cbb203ee3e7ed2ba674a6263a1d5ec6db2204323" 4080 dependencies = [ 4081 "memchr", 4082 + "thiserror 2.0.17", 4083 "ucd-trie", 4084 ] 4085 ··· 4189 "rusqlite", 4190 "serde", 4191 "serde_json", 4192 + "thiserror 2.0.17", 4193 "tokio", 4194 "tracing-subscriber", 4195 ] ··· 4232 "smallvec", 4233 "sync_wrapper", 4234 "tempfile", 4235 + "thiserror 2.0.17", 4236 "tokio", 4237 "tokio-rustls 0.26.2", 4238 "tokio-stream", ··· 4276 "serde_json", 4277 "serde_urlencoded", 4278 "serde_yaml", 4279 + "thiserror 2.0.17", 4280 "tokio", 4281 ] 4282 ··· 4295 "quote", 4296 "regex", 4297 "syn 2.0.106", 4298 + "thiserror 2.0.17", 4299 ] 4300 4301 [[package]] ··· 4422 4423 [[package]] 4424 name = "quick_cache" 4425 + version = "0.6.16" 4426 source = "registry+https://github.com/rust-lang/crates.io-index" 4427 + checksum = "9ad6644cb07b7f3488b9f3d2fde3b4c0a7fa367cafefb39dff93a659f76eb786" 4428 dependencies = [ 4429 "equivalent", 4430 "hashbrown 0.15.2", ··· 4444 "rustc-hash 2.1.1", 4445 "rustls 0.23.31", 4446 "socket2 0.5.9", 4447 + "thiserror 2.0.17", 4448 "tokio", 4449 "tracing", 4450 "web-time", ··· 4465 "rustls 0.23.31", 4466 "rustls-pki-types", 4467 "slab", 4468 + "thiserror 2.0.17", 4469 "tinyvec", 4470 "tracing", 4471 "web-time", ··· 4691 checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" 4692 4693 [[package]] 4694 + name = "repo-stream" 4695 + version = "0.2.2" 4696 + source = "registry+https://github.com/rust-lang/crates.io-index" 4697 + checksum = "093b48e604c138949bf3d4a1a9bc1165feb1db28a73af0101c84eb703d279f43" 4698 + dependencies = [ 4699 + "bincode 2.0.1", 4700 + "futures", 4701 + "futures-core", 4702 + "ipld-core", 4703 + "iroh-car", 4704 + "log", 4705 + "multibase", 4706 + "rusqlite", 4707 + "serde", 4708 + "serde_bytes", 4709 + "serde_ipld_dagcbor", 4710 + "sha2", 4711 + "thiserror 2.0.17", 4712 + "tokio", 4713 + ] 4714 + 4715 + [[package]] 4716 name = "reqwest" 4717 + version = "0.12.24" 4718 source = "registry+https://github.com/rust-lang/crates.io-index" 4719 + checksum = "9d0946410b9f7b082a427e4ef5c8ff541a88b357bc6c637c40db3a68ac70a36f" 4720 dependencies = [ 4721 "async-compression", 4722 "base64 0.22.1", ··· 4756 "url", 4757 "wasm-bindgen", 4758 "wasm-bindgen-futures", 4759 + "wasm-streams", 4760 "web-sys", 4761 ] 4762 ··· 5138 5139 [[package]] 5140 name = "self_cell" 5141 + version = "1.2.0" 5142 source = "registry+https://github.com/rust-lang/crates.io-index" 5143 + checksum = "0f7d95a54511e0c7be3f51e8867aa8cf35148d7b9445d44de2f943e2b206e749" 5144 5145 [[package]] 5146 name = "semver" ··· 5160 5161 [[package]] 5162 name = "serde_bytes" 5163 + version = "0.11.19" 5164 source = "registry+https://github.com/rust-lang/crates.io-index" 5165 + checksum = "a5d440709e79d88e51ac01c4b72fc6cb7314017bb7da9eeff678aa94c10e3ea8" 5166 dependencies = [ 5167 "serde", 5168 + "serde_core", 5169 ] 5170 5171 [[package]] ··· 5213 ] 5214 5215 [[package]] 5216 + name = "serde_ipld_dagcbor" 5217 + version = "0.6.4" 5218 + source = "registry+https://github.com/rust-lang/crates.io-index" 5219 + checksum = "46182f4f08349a02b45c998ba3215d3f9de826246ba02bb9dddfe9a2a2100778" 5220 + dependencies = [ 5221 + "cbor4ii 0.2.14", 5222 + "ipld-core", 5223 + "scopeguard", 5224 + "serde", 5225 + ] 5226 + 5227 + [[package]] 5228 name = "serde_json" 5229 version = "1.0.145" 5230 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 5257 "percent-encoding", 5258 "ryu", 5259 "serde", 5260 + "thiserror 2.0.17", 5261 ] 5262 5263 [[package]] ··· 5346 ] 5347 5348 [[package]] 5349 + name = "sfa" 5350 + version = "0.0.1" 5351 + source = "registry+https://github.com/rust-lang/crates.io-index" 5352 + checksum = "e5f5f9dc21f55409f15103d5a7e7601b804935923c7fe4746dc806c3a422a038" 5353 + dependencies = [ 5354 + "byteorder-lite", 5355 + "log", 5356 + "xxhash-rust", 5357 + ] 5358 + 5359 + [[package]] 5360 name = "sha1" 5361 version = "0.10.6" 5362 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 5420 dependencies = [ 5421 "num-bigint", 5422 "num-traits", 5423 + "thiserror 2.0.17", 5424 "time", 5425 ] 5426 ··· 5462 "rustls 0.23.31", 5463 "serde", 5464 "serde_json", 5465 + "thiserror 2.0.17", 5466 "time", 5467 "tokio", 5468 "tokio-util", ··· 5555 name = "spacedust" 5556 version = "0.1.0" 5557 dependencies = [ 5558 + "anyhow", 5559 + "async-channel", 5560 "async-trait", 5561 "clap", 5562 "ctrlc", 5563 + "dasl", 5564 "dropshot", 5565 "env_logger", 5566 + "fjall 3.0.0-pre.0", 5567 "futures", 5568 "http", 5569 + "ipld-core", 5570 "jetstream", 5571 "links", 5572 "log", 5573 "metrics", 5574 "metrics-exporter-prometheus 0.17.2", 5575 "rand 0.9.1", 5576 + "repo-stream", 5577 + "reqwest", 5578 "schemars", 5579 "semver", 5580 "serde", 5581 + "serde_ipld_dagcbor", 5582 "serde_json", 5583 "serde_qs", 5584 + "thiserror 2.0.17", 5585 "tinyjson", 5586 "tokio", 5587 "tokio-tungstenite 0.27.0", ··· 5714 5715 [[package]] 5716 name = "tempfile" 5717 + version = "3.23.0" 5718 source = "registry+https://github.com/rust-lang/crates.io-index" 5719 + checksum = "2d31c77bdf42a745371d260a26ca7163f1e0924b64afa0b688e61b5a9fa02f16" 5720 dependencies = [ 5721 "fastrand", 5722 "getrandom 0.3.3", ··· 5747 5748 [[package]] 5749 name = "thiserror" 5750 + version = "2.0.17" 5751 source = "registry+https://github.com/rust-lang/crates.io-index" 5752 + checksum = "f63587ca0f12b72a0600bcba1d40081f830876000bb46dd2337a3051618f4fc8" 5753 dependencies = [ 5754 + "thiserror-impl 2.0.17", 5755 ] 5756 5757 [[package]] ··· 5767 5768 [[package]] 5769 name = "thiserror-impl" 5770 + version = "2.0.17" 5771 source = "registry+https://github.com/rust-lang/crates.io-index" 5772 + checksum = "3ff15c8ecd7de3849db632e14d18d2571fa09dfc5ed93479bc4485c7a517c913" 5773 dependencies = [ 5774 "proc-macro2", 5775 "quote", ··· 6201 "native-tls", 6202 "rand 0.9.1", 6203 "sha1", 6204 + "thiserror 2.0.17", 6205 "url", 6206 "utf-8", 6207 ] ··· 6219 "log", 6220 "rand 0.9.1", 6221 "sha1", 6222 + "thiserror 2.0.17", 6223 "utf-8", 6224 ] 6225 ··· 6262 "http", 6263 "jetstream", 6264 "log", 6265 + "lsm-tree 2.10.4", 6266 "metrics", 6267 "metrics-exporter-prometheus 0.17.2", 6268 "schemars", ··· 6272 "serde_qs", 6273 "sha2", 6274 "tempfile", 6275 + "thiserror 2.0.17", 6276 "tikv-jemallocator", 6277 "tokio", 6278 "tokio-util", ··· 6325 6326 [[package]] 6327 name = "unsigned-varint" 6328 + version = "0.7.2" 6329 + source = "registry+https://github.com/rust-lang/crates.io-index" 6330 + checksum = "6889a77d49f1f013504cec6bf97a2c730394adedaeb1deb5ea08949a50541105" 6331 + 6332 + [[package]] 6333 + name = "unsigned-varint" 6334 version = "0.8.0" 6335 source = "registry+https://github.com/rust-lang/crates.io-index" 6336 checksum = "eb066959b24b5196ae73cb057f45598450d2c5f71460e98c49b738086eff9c06" ··· 6407 checksum = "62fc7c4ce161f049607ecea654dca3f2d727da5371ae85e2e4f14ce2b98ed67c" 6408 dependencies = [ 6409 "byteorder", 6410 + "byteview 0.6.1", 6411 "interval-heap", 6412 "log", 6413 "path-absolutize", ··· 6556 ] 6557 6558 [[package]] 6559 + name = "wasm-streams" 6560 + version = "0.4.2" 6561 + source = "registry+https://github.com/rust-lang/crates.io-index" 6562 + checksum = "15053d8d85c7eccdbefef60f06769760a563c7f0a9d6902a13d35c7800b0ad65" 6563 + dependencies = [ 6564 + "futures-util", 6565 + "js-sys", 6566 + "wasm-bindgen", 6567 + "wasm-bindgen-futures", 6568 + "web-sys", 6569 + ] 6570 + 6571 + [[package]] 6572 name = "web-sys" 6573 version = "0.3.77" 6574 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 6627 "reqwest", 6628 "serde", 6629 "serde_json", 6630 + "thiserror 2.0.17", 6631 "tokio", 6632 "tokio-util", 6633 "url", ··· 6667 source = "registry+https://github.com/rust-lang/crates.io-index" 6668 checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" 6669 dependencies = [ 6670 + "windows-sys 0.59.0", 6671 ] 6672 6673 [[package]] ··· 6985 "nom", 6986 "oid-registry", 6987 "rusticata-macros", 6988 + "thiserror 2.0.17", 6989 "time", 6990 ] 6991
+29 -10
constellation/src/bin/main.rs
··· 54 /// Saved jsonl from jetstream to use instead of a live subscription 55 #[arg(short, long)] 56 fixture: Option<PathBuf>, 57 } 58 59 #[derive(Debug, Clone, ValueEnum)] ··· 115 rocks.start_backup(backup_dir, auto_backup, stay_alive.clone())?; 116 } 117 println!("rocks ready."); 118 - run( 119 - rocks, 120 - fixture, 121 - args.data, 122 - stream, 123 - bind, 124 - metrics_bind, 125 - stay_alive, 126 - ) 127 } 128 } 129 } ··· 213 214 'monitor: loop { 215 match readable.get_stats() { 216 - Ok(StorageStats { dids, targetables, linking_records }) => { 217 metrics::gauge!("storage.stats.dids").set(dids as f64); 218 metrics::gauge!("storage.stats.targetables").set(targetables as f64); 219 metrics::gauge!("storage.stats.linking_records").set(linking_records as f64);
··· 54 /// Saved jsonl from jetstream to use instead of a live subscription 55 #[arg(short, long)] 56 fixture: Option<PathBuf>, 57 + /// run a scan across the target id table and write all key -> ids to id -> keys 58 + #[arg(long, action)] 59 + repair_target_ids: bool, 60 } 61 62 #[derive(Debug, Clone, ValueEnum)] ··· 118 rocks.start_backup(backup_dir, auto_backup, stay_alive.clone())?; 119 } 120 println!("rocks ready."); 121 + std::thread::scope(|s| { 122 + if args.repair_target_ids { 123 + let rocks = rocks.clone(); 124 + let stay_alive = stay_alive.clone(); 125 + s.spawn(move || { 126 + let rep = rocks.run_repair(time::Duration::from_millis(0), stay_alive); 127 + eprintln!("repair finished: {rep:?}"); 128 + rep 129 + }); 130 + } 131 + s.spawn(|| { 132 + let r = run( 133 + rocks, 134 + fixture, 135 + args.data, 136 + stream, 137 + bind, 138 + metrics_bind, 139 + stay_alive, 140 + ); 141 + eprintln!("run finished: {r:?}"); 142 + r 143 + }); 144 + }); 145 + Ok(()) 146 } 147 } 148 } ··· 232 233 'monitor: loop { 234 match readable.get_stats() { 235 + Ok(StorageStats { dids, targetables, linking_records, .. }) => { 236 metrics::gauge!("storage.stats.dids").set(dids as f64); 237 metrics::gauge!("storage.stats.targetables").set(targetables as f64); 238 metrics::gauge!("storage.stats.linking_records").set(linking_records as f64);
+8 -6
constellation/src/server/filters.rs
··· 5 Ok({ 6 if let Some(link) = parse_any_link(s) { 7 match link { 8 - Link::AtUri(at_uri) => at_uri.strip_prefix("at://").map(|noproto| { 9 - format!("https://atproto-browser-plus-links.vercel.app/at/{noproto}") 10 - }), 11 - Link::Did(did) => Some(format!( 12 - "https://atproto-browser-plus-links.vercel.app/at/{did}" 13 - )), 14 Link::Uri(uri) => Some(uri), 15 } 16 } else { ··· 22 pub fn human_number(n: &u64) -> askama::Result<String> { 23 Ok(n.to_formatted_string(&Locale::en)) 24 }
··· 5 Ok({ 6 if let Some(link) = parse_any_link(s) { 7 match link { 8 + Link::AtUri(at_uri) => at_uri 9 + .strip_prefix("at://") 10 + .map(|noproto| format!("https://pdsls.dev/at://{noproto}")), 11 + Link::Did(did) => Some(format!("https://pdsls.dev/at://{did}")), 12 Link::Uri(uri) => Some(uri), 13 } 14 } else { ··· 20 pub fn human_number(n: &u64) -> askama::Result<String> { 21 Ok(n.to_formatted_string(&Locale::en)) 22 } 23 + 24 + pub fn to_u64(n: usize) -> askama::Result<u64> { 25 + Ok(n as u64) 26 + }
+289 -18
constellation/src/server/mod.rs
··· 14 use std::collections::{HashMap, HashSet}; 15 use std::time::{Duration, UNIX_EPOCH}; 16 use tokio::net::{TcpListener, ToSocketAddrs}; 17 - use tokio::task::block_in_place; 18 use tokio_util::sync::CancellationToken; 19 20 use crate::storage::{LinkReader, StorageStats}; ··· 28 const DEFAULT_CURSOR_LIMIT: u64 = 16; 29 const DEFAULT_CURSOR_LIMIT_MAX: u64 = 100; 30 31 - const INDEX_BEGAN_AT_TS: u64 = 1738083600; // TODO: not this 32 33 pub async fn serve<S, A>(store: S, addr: A, stay_alive: CancellationToken) -> anyhow::Result<()> 34 where ··· 41 "/", 42 get({ 43 let store = store.clone(); 44 - move |accept| async { block_in_place(|| hello(accept, store)) } 45 }), 46 ) 47 .route( 48 "/links/count", 49 get({ 50 let store = store.clone(); 51 - move |accept, query| async { block_in_place(|| count_links(accept, query, store)) } 52 }), 53 ) 54 .route( ··· 56 get({ 57 let store = store.clone(); 58 move |accept, query| async { 59 - block_in_place(|| count_distinct_dids(accept, query, store)) 60 } 61 }), 62 ) ··· 64 "/links", 65 get({ 66 let store = store.clone(); 67 - move |accept, query| async { block_in_place(|| get_links(accept, query, store)) } 68 }), 69 ) 70 .route( ··· 72 get({ 73 let store = store.clone(); 74 move |accept, query| async { 75 - block_in_place(|| get_distinct_dids(accept, query, store)) 76 } 77 }), 78 ) ··· 82 get({ 83 let store = store.clone(); 84 move |accept, query| async { 85 - block_in_place(|| count_all_links(accept, query, store)) 86 } 87 }), 88 ) ··· 91 get({ 92 let store = store.clone(); 93 move |accept, query| async { 94 - block_in_place(|| explore_links(accept, query, store)) 95 } 96 }), 97 ) ··· 150 #[template(path = "hello.html.j2")] 151 struct HelloReponse { 152 help: &'static str, 153 - days_indexed: u64, 154 stats: StorageStats, 155 } 156 fn hello( ··· 160 let stats = store 161 .get_stats() 162 .map_err(|_| http::StatusCode::INTERNAL_SERVER_ERROR)?; 163 - let days_indexed = (UNIX_EPOCH + Duration::from_secs(INDEX_BEGAN_AT_TS)) 164 - .elapsed() 165 .map_err(|_| http::StatusCode::INTERNAL_SERVER_ERROR)? 166 - .as_secs() 167 - / 86400; 168 Ok(acceptable(accept, HelloReponse { 169 help: "open this URL in a web browser (or request with Accept: text/html) for information about this API.", 170 days_indexed, ··· 173 } 174 175 #[derive(Clone, Deserialize)] 176 struct GetLinksCountQuery { 177 target: String, 178 collection: String, ··· 233 } 234 235 #[derive(Clone, Deserialize)] 236 struct GetLinkItemsQuery { 237 target: String, 238 collection: String, ··· 251 /// 252 /// deprecated: use `did`, which can be repeated multiple times 253 from_dids: Option<String>, // comma separated: gross 254 - #[serde(default = "get_default_limit")] 255 limit: u64, 256 // TODO: allow reverse (er, forward) order as well 257 - } 258 - fn get_default_limit() -> u64 { 259 - DEFAULT_CURSOR_LIMIT 260 } 261 #[derive(Template, Serialize)] 262 #[template(path = "links.html.j2")] ··· 475 OpaqueApiCursor(bincode::DefaultOptions::new().serialize(&item).unwrap()) 476 } 477 }
··· 14 use std::collections::{HashMap, HashSet}; 15 use std::time::{Duration, UNIX_EPOCH}; 16 use tokio::net::{TcpListener, ToSocketAddrs}; 17 + use tokio::task::spawn_blocking; 18 use tokio_util::sync::CancellationToken; 19 20 use crate::storage::{LinkReader, StorageStats}; ··· 28 const DEFAULT_CURSOR_LIMIT: u64 = 16; 29 const DEFAULT_CURSOR_LIMIT_MAX: u64 = 100; 30 31 + fn get_default_cursor_limit() -> u64 { 32 + DEFAULT_CURSOR_LIMIT 33 + } 34 + 35 + fn to500(e: tokio::task::JoinError) -> http::StatusCode { 36 + eprintln!("handler error: {e}"); 37 + http::StatusCode::INTERNAL_SERVER_ERROR 38 + } 39 40 pub async fn serve<S, A>(store: S, addr: A, stay_alive: CancellationToken) -> anyhow::Result<()> 41 where ··· 48 "/", 49 get({ 50 let store = store.clone(); 51 + move |accept| async { 52 + spawn_blocking(|| hello(accept, store)) 53 + .await 54 + .map_err(to500)? 55 + } 56 + }), 57 + ) 58 + .route( 59 + "/xrpc/blue.microcosm.links.getManyToManyCounts", 60 + get({ 61 + let store = store.clone(); 62 + move |accept, query| async { 63 + spawn_blocking(|| get_many_to_many_counts(accept, query, store)) 64 + .await 65 + .map_err(to500)? 66 + } 67 }), 68 ) 69 .route( 70 "/links/count", 71 get({ 72 let store = store.clone(); 73 + move |accept, query| async { 74 + spawn_blocking(|| count_links(accept, query, store)) 75 + .await 76 + .map_err(to500)? 77 + } 78 }), 79 ) 80 .route( ··· 82 get({ 83 let store = store.clone(); 84 move |accept, query| async { 85 + spawn_blocking(|| count_distinct_dids(accept, query, store)) 86 + .await 87 + .map_err(to500)? 88 + } 89 + }), 90 + ) 91 + .route( 92 + "/xrpc/blue.microcosm.links.getBacklinks", 93 + get({ 94 + let store = store.clone(); 95 + move |accept, query| async { 96 + spawn_blocking(|| get_backlinks(accept, query, store)) 97 + .await 98 + .map_err(to500)? 99 } 100 }), 101 ) ··· 103 "/links", 104 get({ 105 let store = store.clone(); 106 + move |accept, query| async { 107 + spawn_blocking(|| get_links(accept, query, store)) 108 + .await 109 + .map_err(to500)? 110 + } 111 }), 112 ) 113 .route( ··· 115 get({ 116 let store = store.clone(); 117 move |accept, query| async { 118 + spawn_blocking(|| get_distinct_dids(accept, query, store)) 119 + .await 120 + .map_err(to500)? 121 } 122 }), 123 ) ··· 127 get({ 128 let store = store.clone(); 129 move |accept, query| async { 130 + spawn_blocking(|| count_all_links(accept, query, store)) 131 + .await 132 + .map_err(to500)? 133 } 134 }), 135 ) ··· 138 get({ 139 let store = store.clone(); 140 move |accept, query| async { 141 + spawn_blocking(|| explore_links(accept, query, store)) 142 + .await 143 + .map_err(to500)? 144 } 145 }), 146 ) ··· 199 #[template(path = "hello.html.j2")] 200 struct HelloReponse { 201 help: &'static str, 202 + days_indexed: Option<u64>, 203 stats: StorageStats, 204 } 205 fn hello( ··· 209 let stats = store 210 .get_stats() 211 .map_err(|_| http::StatusCode::INTERNAL_SERVER_ERROR)?; 212 + let days_indexed = stats 213 + .started_at 214 + .map(|c| (UNIX_EPOCH + Duration::from_micros(c)).elapsed()) 215 + .transpose() 216 .map_err(|_| http::StatusCode::INTERNAL_SERVER_ERROR)? 217 + .map(|d| d.as_secs() / 86_400); 218 Ok(acceptable(accept, HelloReponse { 219 help: "open this URL in a web browser (or request with Accept: text/html) for information about this API.", 220 days_indexed, ··· 223 } 224 225 #[derive(Clone, Deserialize)] 226 + #[serde(rename_all = "camelCase")] 227 + struct GetManyToManyCountsQuery { 228 + subject: String, 229 + source: String, 230 + /// path to the secondary link in the linking record 231 + path_to_other: String, 232 + /// filter to linking records (join of the m2m) by these DIDs 233 + #[serde(default)] 234 + did: Vec<String>, 235 + /// filter to specific secondary records 236 + #[serde(default)] 237 + other_subject: Vec<String>, 238 + cursor: Option<OpaqueApiCursor>, 239 + /// Set the max number of links to return per page of results 240 + #[serde(default = "get_default_cursor_limit")] 241 + limit: u64, 242 + } 243 + #[derive(Serialize)] 244 + struct OtherSubjectCount { 245 + subject: String, 246 + total: u64, 247 + distinct: u64, 248 + } 249 + #[derive(Template, Serialize)] 250 + #[template(path = "get-many-to-many-counts.html.j2")] 251 + struct GetManyToManyCountsResponse { 252 + counts_by_other_subject: Vec<OtherSubjectCount>, 253 + cursor: Option<OpaqueApiCursor>, 254 + #[serde(skip_serializing)] 255 + query: GetManyToManyCountsQuery, 256 + } 257 + fn get_many_to_many_counts( 258 + accept: ExtractAccept, 259 + query: axum_extra::extract::Query<GetManyToManyCountsQuery>, 260 + store: impl LinkReader, 261 + ) -> Result<impl IntoResponse, http::StatusCode> { 262 + let cursor_key = query 263 + .cursor 264 + .clone() 265 + .map(|oc| ApiKeyedCursor::try_from(oc).map_err(|_| http::StatusCode::BAD_REQUEST)) 266 + .transpose()? 267 + .map(|c| c.next); 268 + 269 + let limit = query.limit; 270 + if limit > DEFAULT_CURSOR_LIMIT_MAX { 271 + return Err(http::StatusCode::BAD_REQUEST); 272 + } 273 + 274 + let filter_dids: HashSet<Did> = HashSet::from_iter( 275 + query 276 + .did 277 + .iter() 278 + .map(|d| d.trim()) 279 + .filter(|d| !d.is_empty()) 280 + .map(|d| Did(d.to_string())), 281 + ); 282 + 283 + let filter_other_subjects: HashSet<String> = HashSet::from_iter( 284 + query 285 + .other_subject 286 + .iter() 287 + .map(|s| s.trim().to_string()) 288 + .filter(|s| !s.is_empty()), 289 + ); 290 + 291 + let Some((collection, path)) = query.source.split_once(':') else { 292 + return Err(http::StatusCode::BAD_REQUEST); 293 + }; 294 + let path = format!(".{path}"); 295 + 296 + let path_to_other = format!(".{}", query.path_to_other); 297 + 298 + let paged = store 299 + .get_many_to_many_counts( 300 + &query.subject, 301 + collection, 302 + &path, 303 + &path_to_other, 304 + limit, 305 + cursor_key, 306 + &filter_dids, 307 + &filter_other_subjects, 308 + ) 309 + .map_err(|_| http::StatusCode::INTERNAL_SERVER_ERROR)?; 310 + 311 + let cursor = paged.next.map(|next| ApiKeyedCursor { next }.into()); 312 + 313 + let items = paged 314 + .items 315 + .into_iter() 316 + .map(|(subject, total, distinct)| OtherSubjectCount { 317 + subject, 318 + total, 319 + distinct, 320 + }) 321 + .collect(); 322 + 323 + Ok(acceptable( 324 + accept, 325 + GetManyToManyCountsResponse { 326 + counts_by_other_subject: items, 327 + cursor, 328 + query: (*query).clone(), 329 + }, 330 + )) 331 + } 332 + 333 + #[derive(Clone, Deserialize)] 334 struct GetLinksCountQuery { 335 target: String, 336 collection: String, ··· 391 } 392 393 #[derive(Clone, Deserialize)] 394 + struct GetBacklinksQuery { 395 + /// The link target 396 + /// 397 + /// can be an AT-URI, plain DID, or regular URI 398 + subject: String, 399 + /// Filter links only from this link source 400 + /// 401 + /// eg.: `app.bsky.feed.like:subject.uri` 402 + source: String, 403 + cursor: Option<OpaqueApiCursor>, 404 + /// Filter links only from these DIDs 405 + /// 406 + /// include multiple times to filter by multiple source DIDs 407 + #[serde(default)] 408 + did: Vec<String>, 409 + /// Set the max number of links to return per page of results 410 + #[serde(default = "get_default_cursor_limit")] 411 + limit: u64, 412 + // TODO: allow reverse (er, forward) order as well 413 + } 414 + #[derive(Template, Serialize)] 415 + #[template(path = "get-backlinks.html.j2")] 416 + struct GetBacklinksResponse { 417 + total: u64, 418 + records: Vec<RecordId>, 419 + cursor: Option<OpaqueApiCursor>, 420 + #[serde(skip_serializing)] 421 + query: GetBacklinksQuery, 422 + #[serde(skip_serializing)] 423 + collection: String, 424 + #[serde(skip_serializing)] 425 + path: String, 426 + } 427 + fn get_backlinks( 428 + accept: ExtractAccept, 429 + query: axum_extra::extract::Query<GetBacklinksQuery>, // supports multiple param occurrences 430 + store: impl LinkReader, 431 + ) -> Result<impl IntoResponse, http::StatusCode> { 432 + let until = query 433 + .cursor 434 + .clone() 435 + .map(|oc| ApiCursor::try_from(oc).map_err(|_| http::StatusCode::BAD_REQUEST)) 436 + .transpose()? 437 + .map(|c| c.next); 438 + 439 + let limit = query.limit; 440 + if limit > DEFAULT_CURSOR_LIMIT_MAX { 441 + return Err(http::StatusCode::BAD_REQUEST); 442 + } 443 + 444 + let filter_dids: HashSet<Did> = HashSet::from_iter( 445 + query 446 + .did 447 + .iter() 448 + .map(|d| d.trim()) 449 + .filter(|d| !d.is_empty()) 450 + .map(|d| Did(d.to_string())), 451 + ); 452 + 453 + let Some((collection, path)) = query.source.split_once(':') else { 454 + return Err(http::StatusCode::BAD_REQUEST); 455 + }; 456 + let path = format!(".{path}"); 457 + 458 + let paged = store 459 + .get_links( 460 + &query.subject, 461 + collection, 462 + &path, 463 + limit, 464 + until, 465 + &filter_dids, 466 + ) 467 + .map_err(|_| http::StatusCode::INTERNAL_SERVER_ERROR)?; 468 + 469 + let cursor = paged.next.map(|next| { 470 + ApiCursor { 471 + version: paged.version, 472 + next, 473 + } 474 + .into() 475 + }); 476 + 477 + Ok(acceptable( 478 + accept, 479 + GetBacklinksResponse { 480 + total: paged.total, 481 + records: paged.items, 482 + cursor, 483 + query: (*query).clone(), 484 + collection: collection.to_string(), 485 + path, 486 + }, 487 + )) 488 + } 489 + 490 + #[derive(Clone, Deserialize)] 491 struct GetLinkItemsQuery { 492 target: String, 493 collection: String, ··· 506 /// 507 /// deprecated: use `did`, which can be repeated multiple times 508 from_dids: Option<String>, // comma separated: gross 509 + #[serde(default = "get_default_cursor_limit")] 510 limit: u64, 511 // TODO: allow reverse (er, forward) order as well 512 } 513 #[derive(Template, Serialize)] 514 #[template(path = "links.html.j2")] ··· 727 OpaqueApiCursor(bincode::DefaultOptions::new().serialize(&item).unwrap()) 728 } 729 } 730 + 731 + #[derive(Serialize, Deserialize)] // for bincode 732 + struct ApiKeyedCursor { 733 + next: String, // the key 734 + } 735 + 736 + impl TryFrom<OpaqueApiCursor> for ApiKeyedCursor { 737 + type Error = bincode::Error; 738 + 739 + fn try_from(item: OpaqueApiCursor) -> Result<Self, Self::Error> { 740 + bincode::DefaultOptions::new().deserialize(&item.0) 741 + } 742 + } 743 + 744 + impl From<ApiKeyedCursor> for OpaqueApiCursor { 745 + fn from(item: ApiKeyedCursor) -> Self { 746 + OpaqueApiCursor(bincode::DefaultOptions::new().serialize(&item).unwrap()) 747 + } 748 + }
+78 -1
constellation/src/storage/mem_store.rs
··· 1 - use super::{LinkReader, LinkStorage, PagedAppendingCollection, StorageStats}; 2 use crate::{ActionableEvent, CountsByCount, Did, RecordId}; 3 use anyhow::Result; 4 use links::CollectedLink; ··· 132 } 133 134 impl LinkReader for MemStorage { 135 fn get_count(&self, target: &str, collection: &str, path: &str) -> Result<u64> { 136 let data = self.0.lock().unwrap(); 137 let Some(paths) = data.targets.get(&Target::new(target)) else { ··· 353 dids, 354 targetables, 355 linking_records, 356 }) 357 } 358 }
··· 1 + use super::{ 2 + LinkReader, LinkStorage, PagedAppendingCollection, PagedOrderedCollection, StorageStats, 3 + }; 4 use crate::{ActionableEvent, CountsByCount, Did, RecordId}; 5 use anyhow::Result; 6 use links::CollectedLink; ··· 134 } 135 136 impl LinkReader for MemStorage { 137 + fn get_many_to_many_counts( 138 + &self, 139 + target: &str, 140 + collection: &str, 141 + path: &str, 142 + path_to_other: &str, 143 + limit: u64, 144 + after: Option<String>, 145 + filter_dids: &HashSet<Did>, 146 + filter_to_targets: &HashSet<String>, 147 + ) -> Result<PagedOrderedCollection<(String, u64, u64), String>> { 148 + let data = self.0.lock().unwrap(); 149 + let Some(paths) = data.targets.get(&Target::new(target)) else { 150 + return Ok(PagedOrderedCollection::default()); 151 + }; 152 + let Some(linkers) = paths.get(&Source::new(collection, path)) else { 153 + return Ok(PagedOrderedCollection::default()); 154 + }; 155 + 156 + let path_to_other = RecordPath::new(path_to_other); 157 + let filter_to_targets: HashSet<Target> = 158 + HashSet::from_iter(filter_to_targets.iter().map(|s| Target::new(s))); 159 + 160 + let mut grouped_counts: HashMap<Target, (u64, HashSet<Did>)> = HashMap::new(); 161 + for (did, rkey) in linkers.iter().flatten().cloned() { 162 + if !filter_dids.is_empty() && !filter_dids.contains(&did) { 163 + continue; 164 + } 165 + if let Some(fwd_target) = data 166 + .links 167 + .get(&did) 168 + .unwrap_or(&HashMap::new()) 169 + .get(&RepoId { 170 + collection: collection.to_string(), 171 + rkey, 172 + }) 173 + .unwrap_or(&Vec::new()) 174 + .iter() 175 + .filter_map(|(path, target)| { 176 + if *path == path_to_other 177 + && (filter_to_targets.is_empty() || filter_to_targets.contains(target)) 178 + { 179 + Some(target) 180 + } else { 181 + None 182 + } 183 + }) 184 + .take(1) 185 + .next() 186 + { 187 + let e = grouped_counts.entry(fwd_target.clone()).or_default(); 188 + e.0 += 1; 189 + e.1.insert(did.clone()); 190 + } 191 + } 192 + let mut items: Vec<(String, u64, u64)> = grouped_counts 193 + .iter() 194 + .map(|(k, (n, u))| (k.0.clone(), *n, u.len() as u64)) 195 + .collect(); 196 + items.sort(); 197 + items = items 198 + .into_iter() 199 + .skip_while(|(t, _, _)| after.as_ref().map(|a| t <= a).unwrap_or(false)) 200 + .take(limit as usize) 201 + .collect(); 202 + let next = if items.len() as u64 >= limit { 203 + items.last().map(|(t, _, _)| t.clone()) 204 + } else { 205 + None 206 + }; 207 + Ok(PagedOrderedCollection { items, next }) 208 + } 209 + 210 fn get_count(&self, target: &str, collection: &str, path: &str) -> Result<u64> { 211 let data = self.0.lock().unwrap(); 212 let Some(paths) = data.targets.get(&Target::new(target)) else { ··· 428 dids, 429 targetables, 430 linking_records, 431 + started_at: None, 432 + other_data: Default::default(), 433 }) 434 } 435 }
+225
constellation/src/storage/mod.rs
··· 19 pub total: u64, 20 } 21 22 #[derive(Debug, Deserialize, Serialize, PartialEq)] 23 pub struct StorageStats { 24 /// estimate of how many accounts we've seen create links. the _subjects_ of any links are not represented here. ··· 33 /// records with multiple links are single-counted. 34 /// for LSM stores, deleted links don't decrement this, and updated records with any links will likely increment it. 35 pub linking_records: u64, 36 } 37 38 pub trait LinkStorage: Send + Sync { ··· 48 } 49 50 pub trait LinkReader: Clone + Send + Sync + 'static { 51 fn get_count(&self, target: &str, collection: &str, path: &str) -> Result<u64>; 52 53 fn get_distinct_did_count(&self, target: &str, collection: &str, path: &str) -> Result<u64>; ··· 1326 counts 1327 }); 1328 assert_stats(storage.get_stats()?, 1..=1, 2..=2, 1..=1); 1329 }); 1330 }
··· 19 pub total: u64, 20 } 21 22 + /// A paged collection whose keys are sorted instead of indexed 23 + /// 24 + /// this has weaker guarantees than PagedAppendingCollection: it might 25 + /// return a totally consistent snapshot. but it should avoid duplicates 26 + /// and each page should at least be internally consistent. 27 + #[derive(Debug, PartialEq, Default)] 28 + pub struct PagedOrderedCollection<T, K: Ord> { 29 + pub items: Vec<T>, 30 + pub next: Option<K>, 31 + } 32 + 33 #[derive(Debug, Deserialize, Serialize, PartialEq)] 34 pub struct StorageStats { 35 /// estimate of how many accounts we've seen create links. the _subjects_ of any links are not represented here. ··· 44 /// records with multiple links are single-counted. 45 /// for LSM stores, deleted links don't decrement this, and updated records with any links will likely increment it. 46 pub linking_records: u64, 47 + 48 + /// first jetstream cursor when this instance first started 49 + pub started_at: Option<u64>, 50 + 51 + /// anything else we want to throw in 52 + pub other_data: HashMap<String, u64>, 53 } 54 55 pub trait LinkStorage: Send + Sync { ··· 65 } 66 67 pub trait LinkReader: Clone + Send + Sync + 'static { 68 + #[allow(clippy::too_many_arguments)] 69 + fn get_many_to_many_counts( 70 + &self, 71 + target: &str, 72 + collection: &str, 73 + path: &str, 74 + path_to_other: &str, 75 + limit: u64, 76 + after: Option<String>, 77 + filter_dids: &HashSet<Did>, 78 + filter_to_targets: &HashSet<String>, 79 + ) -> Result<PagedOrderedCollection<(String, u64, u64), String>>; 80 + 81 fn get_count(&self, target: &str, collection: &str, path: &str) -> Result<u64>; 82 83 fn get_distinct_did_count(&self, target: &str, collection: &str, path: &str) -> Result<u64>; ··· 1356 counts 1357 }); 1358 assert_stats(storage.get_stats()?, 1..=1, 2..=2, 1..=1); 1359 + }); 1360 + 1361 + //////// many-to-many ///////// 1362 + 1363 + test_each_storage!(get_m2m_counts_empty, |storage| { 1364 + assert_eq!( 1365 + storage.get_many_to_many_counts( 1366 + "a.com", 1367 + "a.b.c", 1368 + ".d.e", 1369 + ".f.g", 1370 + 10, 1371 + None, 1372 + &HashSet::new(), 1373 + &HashSet::new(), 1374 + )?, 1375 + PagedOrderedCollection { 1376 + items: vec![], 1377 + next: None, 1378 + } 1379 + ); 1380 + }); 1381 + 1382 + test_each_storage!(get_m2m_counts_single, |storage| { 1383 + storage.push( 1384 + &ActionableEvent::CreateLinks { 1385 + record_id: RecordId { 1386 + did: "did:plc:asdf".into(), 1387 + collection: "app.t.c".into(), 1388 + rkey: "asdf".into(), 1389 + }, 1390 + links: vec![ 1391 + CollectedLink { 1392 + target: Link::Uri("a.com".into()), 1393 + path: ".abc.uri".into(), 1394 + }, 1395 + CollectedLink { 1396 + target: Link::Uri("b.com".into()), 1397 + path: ".def.uri".into(), 1398 + }, 1399 + CollectedLink { 1400 + target: Link::Uri("b.com".into()), 1401 + path: ".ghi.uri".into(), 1402 + }, 1403 + ], 1404 + }, 1405 + 0, 1406 + )?; 1407 + assert_eq!( 1408 + storage.get_many_to_many_counts( 1409 + "a.com", 1410 + "app.t.c", 1411 + ".abc.uri", 1412 + ".def.uri", 1413 + 10, 1414 + None, 1415 + &HashSet::new(), 1416 + &HashSet::new(), 1417 + )?, 1418 + PagedOrderedCollection { 1419 + items: vec![("b.com".to_string(), 1, 1)], 1420 + next: None, 1421 + } 1422 + ); 1423 + }); 1424 + 1425 + test_each_storage!(get_m2m_counts_filters, |storage| { 1426 + storage.push( 1427 + &ActionableEvent::CreateLinks { 1428 + record_id: RecordId { 1429 + did: "did:plc:asdf".into(), 1430 + collection: "app.t.c".into(), 1431 + rkey: "asdf".into(), 1432 + }, 1433 + links: vec![ 1434 + CollectedLink { 1435 + target: Link::Uri("a.com".into()), 1436 + path: ".abc.uri".into(), 1437 + }, 1438 + CollectedLink { 1439 + target: Link::Uri("b.com".into()), 1440 + path: ".def.uri".into(), 1441 + }, 1442 + ], 1443 + }, 1444 + 0, 1445 + )?; 1446 + storage.push( 1447 + &ActionableEvent::CreateLinks { 1448 + record_id: RecordId { 1449 + did: "did:plc:asdfasdf".into(), 1450 + collection: "app.t.c".into(), 1451 + rkey: "asdf".into(), 1452 + }, 1453 + links: vec![ 1454 + CollectedLink { 1455 + target: Link::Uri("a.com".into()), 1456 + path: ".abc.uri".into(), 1457 + }, 1458 + CollectedLink { 1459 + target: Link::Uri("b.com".into()), 1460 + path: ".def.uri".into(), 1461 + }, 1462 + ], 1463 + }, 1464 + 1, 1465 + )?; 1466 + storage.push( 1467 + &ActionableEvent::CreateLinks { 1468 + record_id: RecordId { 1469 + did: "did:plc:fdsa".into(), 1470 + collection: "app.t.c".into(), 1471 + rkey: "asdf".into(), 1472 + }, 1473 + links: vec![ 1474 + CollectedLink { 1475 + target: Link::Uri("a.com".into()), 1476 + path: ".abc.uri".into(), 1477 + }, 1478 + CollectedLink { 1479 + target: Link::Uri("c.com".into()), 1480 + path: ".def.uri".into(), 1481 + }, 1482 + ], 1483 + }, 1484 + 2, 1485 + )?; 1486 + storage.push( 1487 + &ActionableEvent::CreateLinks { 1488 + record_id: RecordId { 1489 + did: "did:plc:fdsa".into(), 1490 + collection: "app.t.c".into(), 1491 + rkey: "asdf2".into(), 1492 + }, 1493 + links: vec![ 1494 + CollectedLink { 1495 + target: Link::Uri("a.com".into()), 1496 + path: ".abc.uri".into(), 1497 + }, 1498 + CollectedLink { 1499 + target: Link::Uri("c.com".into()), 1500 + path: ".def.uri".into(), 1501 + }, 1502 + ], 1503 + }, 1504 + 3, 1505 + )?; 1506 + assert_eq!( 1507 + storage.get_many_to_many_counts( 1508 + "a.com", 1509 + "app.t.c", 1510 + ".abc.uri", 1511 + ".def.uri", 1512 + 10, 1513 + None, 1514 + &HashSet::new(), 1515 + &HashSet::new(), 1516 + )?, 1517 + PagedOrderedCollection { 1518 + items: vec![("b.com".to_string(), 2, 2), ("c.com".to_string(), 2, 1),], 1519 + next: None, 1520 + } 1521 + ); 1522 + assert_eq!( 1523 + storage.get_many_to_many_counts( 1524 + "a.com", 1525 + "app.t.c", 1526 + ".abc.uri", 1527 + ".def.uri", 1528 + 10, 1529 + None, 1530 + &HashSet::from_iter([Did("did:plc:fdsa".to_string())]), 1531 + &HashSet::new(), 1532 + )?, 1533 + PagedOrderedCollection { 1534 + items: vec![("c.com".to_string(), 2, 1),], 1535 + next: None, 1536 + } 1537 + ); 1538 + assert_eq!( 1539 + storage.get_many_to_many_counts( 1540 + "a.com", 1541 + "app.t.c", 1542 + ".abc.uri", 1543 + ".def.uri", 1544 + 10, 1545 + None, 1546 + &HashSet::new(), 1547 + &HashSet::from_iter(["b.com".to_string()]), 1548 + )?, 1549 + PagedOrderedCollection { 1550 + items: vec![("b.com".to_string(), 2, 2),], 1551 + next: None, 1552 + } 1553 + ); 1554 }); 1555 }
+342 -40
constellation/src/storage/rocks_store.rs
··· 1 - use super::{ActionableEvent, LinkReader, LinkStorage, PagedAppendingCollection, StorageStats}; 2 use crate::{CountsByCount, Did, RecordId}; 3 use anyhow::{bail, Result}; 4 use bincode::Options as BincodeOptions; ··· 11 MultiThreaded, Options, PrefixRange, ReadOptions, WriteBatch, 12 }; 13 use serde::{Deserialize, Serialize}; 14 - use std::collections::{HashMap, HashSet}; 15 use std::io::Read; 16 use std::marker::PhantomData; 17 use std::path::{Path, PathBuf}; ··· 20 Arc, 21 }; 22 use std::thread; 23 - use std::time::{Duration, Instant}; 24 use tokio_util::sync::CancellationToken; 25 26 static DID_IDS_CF: &str = "did_ids"; ··· 29 static LINK_TARGETS_CF: &str = "link_targets"; 30 31 static JETSTREAM_CURSOR_KEY: &str = "jetstream_cursor"; 32 33 // todo: actually understand and set these options probably better 34 fn rocks_opts_base() -> Options { ··· 56 #[derive(Debug, Clone)] 57 pub struct RocksStorage { 58 pub db: Arc<DBWithThreadMode<MultiThreaded>>, // TODO: mov seqs here (concat merge op will be fun) 59 - did_id_table: IdTable<Did, DidIdValue, true>, 60 - target_id_table: IdTable<TargetKey, TargetId, false>, 61 is_writer: bool, 62 backup_task: Arc<Option<thread::JoinHandle<Result<()>>>>, 63 } ··· 85 fn cf_descriptor(&self) -> ColumnFamilyDescriptor { 86 ColumnFamilyDescriptor::new(&self.name, rocks_opts_base()) 87 } 88 - fn init<const WITH_REVERSE: bool>( 89 - self, 90 - db: &DBWithThreadMode<MultiThreaded>, 91 - ) -> Result<IdTable<Orig, IdVal, WITH_REVERSE>> { 92 if db.cf_handle(&self.name).is_none() { 93 bail!("failed to get cf handle from db -- was the db open with our .cf_descriptor()?"); 94 } ··· 119 } 120 } 121 #[derive(Debug, Clone)] 122 - struct IdTable<Orig, IdVal: IdTableValue, const WITH_REVERSE: bool> 123 where 124 Orig: KeyFromRocks, 125 for<'a> &'a Orig: AsRocksKey, ··· 127 base: IdTableBase<Orig, IdVal>, 128 priv_id_seq: u64, 129 } 130 - impl<Orig: Clone, IdVal: IdTableValue, const WITH_REVERSE: bool> IdTable<Orig, IdVal, WITH_REVERSE> 131 where 132 Orig: KeyFromRocks, 133 for<'v> &'v IdVal: AsRocksValue, ··· 139 _key_marker: PhantomData, 140 _val_marker: PhantomData, 141 name: name.into(), 142 - id_seq: Arc::new(AtomicU64::new(0)), // zero is "uninint", first seq num will be 1 143 } 144 } 145 fn get_id_val( ··· 178 id_value 179 })) 180 } 181 fn estimate_count(&self) -> u64 { 182 self.base.id_seq.load(Ordering::SeqCst) - 1 // -1 because seq zero is reserved 183 } 184 - } 185 - impl<Orig: Clone, IdVal: IdTableValue> IdTable<Orig, IdVal, true> 186 - where 187 - Orig: KeyFromRocks, 188 - for<'v> &'v IdVal: AsRocksValue, 189 - for<'k> &'k Orig: AsRocksKey, 190 - { 191 fn get_or_create_id_val( 192 &mut self, 193 db: &DBWithThreadMode<MultiThreaded>, ··· 215 } 216 } 217 } 218 - impl<Orig: Clone, IdVal: IdTableValue> IdTable<Orig, IdVal, false> 219 - where 220 - Orig: KeyFromRocks, 221 - for<'v> &'v IdVal: AsRocksValue, 222 - for<'k> &'k Orig: AsRocksKey, 223 - { 224 - fn get_or_create_id_val( 225 - &mut self, 226 - db: &DBWithThreadMode<MultiThreaded>, 227 - batch: &mut WriteBatch, 228 - orig: &Orig, 229 - ) -> Result<IdVal> { 230 - let cf = db.cf_handle(&self.base.name).unwrap(); 231 - self.__get_or_create_id_val(&cf, db, batch, orig) 232 - } 233 - } 234 235 impl IdTableValue for DidIdValue { 236 fn new(v: u64) -> Self { ··· 249 } 250 } 251 252 impl RocksStorage { 253 pub fn new(path: impl AsRef<Path>) -> Result<Self> { 254 Self::describe_metrics(); 255 - RocksStorage::open_readmode(path, false) 256 } 257 258 pub fn open_readonly(path: impl AsRef<Path>) -> Result<Self> { ··· 260 } 261 262 fn open_readmode(path: impl AsRef<Path>, readonly: bool) -> Result<Self> { 263 - let did_id_table = IdTable::<_, _, true>::setup(DID_IDS_CF); 264 - let target_id_table = IdTable::<_, _, false>::setup(TARGET_IDS_CF); 265 266 let cfs = vec![ 267 // id reference tables 268 did_id_table.cf_descriptor(), ··· 296 is_writer: !readonly, 297 backup_task: None.into(), 298 }) 299 } 300 301 pub fn start_backup( ··· 826 } 827 828 impl LinkReader for RocksStorage { 829 fn get_count(&self, target: &str, collection: &str, path: &str) -> Result<u64> { 830 let target_key = TargetKey( 831 Target(target.to_string()), ··· 1042 .map(|s| s.parse::<u64>()) 1043 .transpose()? 1044 .unwrap_or(0); 1045 Ok(StorageStats { 1046 dids, 1047 targetables, 1048 linking_records, 1049 }) 1050 } 1051 } ··· 1071 impl AsRocksValue for &TargetId {} 1072 impl KeyFromRocks for TargetKey {} 1073 impl ValueFromRocks for TargetId {} 1074 1075 // target_links table 1076 impl AsRocksKey for &TargetId {} ··· 1142 } 1143 1144 // target ids 1145 - #[derive(Debug, Clone, Serialize, Deserialize)] 1146 struct TargetId(u64); // key 1147 1148 - #[derive(Debug, Clone, Serialize, Deserialize)] 1149 pub struct Target(pub String); // the actual target/uri 1150 1151 // targets (uris, dids, etc.): the reverse index
··· 1 + use super::{ 2 + ActionableEvent, LinkReader, LinkStorage, PagedAppendingCollection, PagedOrderedCollection, 3 + StorageStats, 4 + }; 5 use crate::{CountsByCount, Did, RecordId}; 6 use anyhow::{bail, Result}; 7 use bincode::Options as BincodeOptions; ··· 14 MultiThreaded, Options, PrefixRange, ReadOptions, WriteBatch, 15 }; 16 use serde::{Deserialize, Serialize}; 17 + use std::collections::{BTreeMap, HashMap, HashSet}; 18 use std::io::Read; 19 use std::marker::PhantomData; 20 use std::path::{Path, PathBuf}; ··· 23 Arc, 24 }; 25 use std::thread; 26 + use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH}; 27 use tokio_util::sync::CancellationToken; 28 29 static DID_IDS_CF: &str = "did_ids"; ··· 32 static LINK_TARGETS_CF: &str = "link_targets"; 33 34 static JETSTREAM_CURSOR_KEY: &str = "jetstream_cursor"; 35 + static STARTED_AT_KEY: &str = "jetstream_first_cursor"; 36 + // add reverse mappings for targets if this db was running before that was a thing 37 + static TARGET_ID_REPAIR_STATE_KEY: &str = "target_id_table_repair_state"; 38 + 39 + static COZY_FIRST_CURSOR: u64 = 1_738_083_600_000_000; // constellation.microcosm.blue started 40 + 41 + #[derive(Debug, Clone, Serialize, Deserialize)] 42 + struct TargetIdRepairState { 43 + /// start time for repair, microseconds timestamp 44 + current_us_started_at: u64, 45 + /// id table's latest id when repair started 46 + id_when_started: u64, 47 + /// id table id 48 + latest_repaired_i: u64, 49 + } 50 + impl AsRocksValue for TargetIdRepairState {} 51 + impl ValueFromRocks for TargetIdRepairState {} 52 53 // todo: actually understand and set these options probably better 54 fn rocks_opts_base() -> Options { ··· 76 #[derive(Debug, Clone)] 77 pub struct RocksStorage { 78 pub db: Arc<DBWithThreadMode<MultiThreaded>>, // TODO: mov seqs here (concat merge op will be fun) 79 + did_id_table: IdTable<Did, DidIdValue>, 80 + target_id_table: IdTable<TargetKey, TargetId>, 81 is_writer: bool, 82 backup_task: Arc<Option<thread::JoinHandle<Result<()>>>>, 83 } ··· 105 fn cf_descriptor(&self) -> ColumnFamilyDescriptor { 106 ColumnFamilyDescriptor::new(&self.name, rocks_opts_base()) 107 } 108 + fn init(self, db: &DBWithThreadMode<MultiThreaded>) -> Result<IdTable<Orig, IdVal>> { 109 if db.cf_handle(&self.name).is_none() { 110 bail!("failed to get cf handle from db -- was the db open with our .cf_descriptor()?"); 111 } ··· 136 } 137 } 138 #[derive(Debug, Clone)] 139 + struct IdTable<Orig, IdVal: IdTableValue> 140 where 141 Orig: KeyFromRocks, 142 for<'a> &'a Orig: AsRocksKey, ··· 144 base: IdTableBase<Orig, IdVal>, 145 priv_id_seq: u64, 146 } 147 + impl<Orig: Clone, IdVal: IdTableValue> IdTable<Orig, IdVal> 148 where 149 Orig: KeyFromRocks, 150 for<'v> &'v IdVal: AsRocksValue, ··· 156 _key_marker: PhantomData, 157 _val_marker: PhantomData, 158 name: name.into(), 159 + id_seq: Arc::new(AtomicU64::new(0)), // zero is "uninit", first seq num will be 1 160 } 161 } 162 fn get_id_val( ··· 195 id_value 196 })) 197 } 198 + 199 fn estimate_count(&self) -> u64 { 200 self.base.id_seq.load(Ordering::SeqCst) - 1 // -1 because seq zero is reserved 201 } 202 + 203 fn get_or_create_id_val( 204 &mut self, 205 db: &DBWithThreadMode<MultiThreaded>, ··· 227 } 228 } 229 } 230 231 impl IdTableValue for DidIdValue { 232 fn new(v: u64) -> Self { ··· 245 } 246 } 247 248 + fn now() -> u64 { 249 + SystemTime::now() 250 + .duration_since(UNIX_EPOCH) 251 + .unwrap() 252 + .as_micros() as u64 253 + } 254 + 255 impl RocksStorage { 256 pub fn new(path: impl AsRef<Path>) -> Result<Self> { 257 Self::describe_metrics(); 258 + let me = RocksStorage::open_readmode(path, false)?; 259 + me.global_init()?; 260 + Ok(me) 261 } 262 263 pub fn open_readonly(path: impl AsRef<Path>) -> Result<Self> { ··· 265 } 266 267 fn open_readmode(path: impl AsRef<Path>, readonly: bool) -> Result<Self> { 268 + let did_id_table = IdTable::setup(DID_IDS_CF); 269 + let target_id_table = IdTable::setup(TARGET_IDS_CF); 270 271 + // note: global stuff like jetstream cursor goes in the default cf 272 + // these are bonus extra cfs 273 let cfs = vec![ 274 // id reference tables 275 did_id_table.cf_descriptor(), ··· 303 is_writer: !readonly, 304 backup_task: None.into(), 305 }) 306 + } 307 + 308 + fn global_init(&self) -> Result<()> { 309 + let first_run = self.db.get(JETSTREAM_CURSOR_KEY)?.is_some(); 310 + if first_run { 311 + self.db.put(STARTED_AT_KEY, _rv(now()))?; 312 + 313 + // hack / temporary: if we're a new db, put in a completed repair 314 + // state so we don't run repairs (repairs are for old-code dbs) 315 + let completed = TargetIdRepairState { 316 + id_when_started: 0, 317 + current_us_started_at: 0, 318 + latest_repaired_i: 0, 319 + }; 320 + self.db.put(TARGET_ID_REPAIR_STATE_KEY, _rv(completed))?; 321 + } 322 + Ok(()) 323 + } 324 + 325 + pub fn run_repair(&self, breather: Duration, stay_alive: CancellationToken) -> Result<bool> { 326 + let mut state = match self 327 + .db 328 + .get(TARGET_ID_REPAIR_STATE_KEY)? 329 + .map(|s| _vr(&s)) 330 + .transpose()? 331 + { 332 + Some(s) => s, 333 + None => TargetIdRepairState { 334 + id_when_started: self.did_id_table.priv_id_seq, 335 + current_us_started_at: now(), 336 + latest_repaired_i: 0, 337 + }, 338 + }; 339 + 340 + eprintln!("initial repair state: {state:?}"); 341 + 342 + let cf = self.db.cf_handle(TARGET_IDS_CF).unwrap(); 343 + 344 + let mut iter = self.db.raw_iterator_cf(&cf); 345 + iter.seek_to_first(); 346 + 347 + eprintln!("repair iterator sent to first key"); 348 + 349 + // skip ahead if we're done some, or take a single first step 350 + for _ in 0..state.latest_repaired_i { 351 + iter.next(); 352 + } 353 + 354 + eprintln!( 355 + "repair iterator skipped to {}th key", 356 + state.latest_repaired_i 357 + ); 358 + 359 + let mut maybe_done = false; 360 + 361 + let mut write_fast = rocksdb::WriteOptions::default(); 362 + write_fast.set_sync(false); 363 + write_fast.disable_wal(true); 364 + 365 + while !stay_alive.is_cancelled() && !maybe_done { 366 + // let mut batch = WriteBatch::default(); 367 + 368 + let mut any_written = false; 369 + 370 + for _ in 0..1000 { 371 + if state.latest_repaired_i % 1_000_000 == 0 { 372 + eprintln!("target iter at {}", state.latest_repaired_i); 373 + } 374 + state.latest_repaired_i += 1; 375 + 376 + if !iter.valid() { 377 + eprintln!("invalid iter, are we done repairing?"); 378 + maybe_done = true; 379 + break; 380 + }; 381 + 382 + // eprintln!("iterator seems to be valid! getting the key..."); 383 + let raw_key = iter.key().unwrap(); 384 + if raw_key.len() == 8 { 385 + // eprintln!("found an 8-byte key, skipping it since it's probably an id..."); 386 + iter.next(); 387 + continue; 388 + } 389 + let target: TargetKey = _kr::<TargetKey>(raw_key)?; 390 + let target_id: TargetId = _vr(iter.value().unwrap())?; 391 + 392 + self.db 393 + .put_cf_opt(&cf, target_id.id().to_be_bytes(), _rv(&target), &write_fast)?; 394 + any_written = true; 395 + iter.next(); 396 + } 397 + 398 + if any_written { 399 + self.db 400 + .put(TARGET_ID_REPAIR_STATE_KEY, _rv(state.clone()))?; 401 + std::thread::sleep(breather); 402 + } 403 + } 404 + 405 + eprintln!("repair iterator done."); 406 + 407 + Ok(false) 408 } 409 410 pub fn start_backup( ··· 935 } 936 937 impl LinkReader for RocksStorage { 938 + fn get_many_to_many_counts( 939 + &self, 940 + target: &str, 941 + collection: &str, 942 + path: &str, 943 + path_to_other: &str, 944 + limit: u64, 945 + after: Option<String>, 946 + filter_dids: &HashSet<Did>, 947 + filter_to_targets: &HashSet<String>, 948 + ) -> Result<PagedOrderedCollection<(String, u64, u64), String>> { 949 + let collection = Collection(collection.to_string()); 950 + let path = RPath(path.to_string()); 951 + 952 + let target_key = TargetKey(Target(target.to_string()), collection.clone(), path.clone()); 953 + 954 + // unfortunately the cursor is a, uh, stringified number. 955 + // this was easier for the memstore (plain target, not target id), and 956 + // making it generic is a bit awful. 957 + // so... parse the number out of a string here :( 958 + // TODO: this should bubble up to a BAD_REQUEST response 959 + let after = after.map(|s| s.parse::<u64>().map(TargetId)).transpose()?; 960 + 961 + let Some(target_id) = self.target_id_table.get_id_val(&self.db, &target_key)? else { 962 + eprintln!("nothin doin for this target, {target_key:?}"); 963 + return Ok(Default::default()); 964 + }; 965 + 966 + let filter_did_ids: HashMap<DidId, bool> = filter_dids 967 + .iter() 968 + .filter_map(|did| self.did_id_table.get_id_val(&self.db, did).transpose()) 969 + .collect::<Result<Vec<DidIdValue>>>()? 970 + .into_iter() 971 + .map(|DidIdValue(id, active)| (id, active)) 972 + .collect(); 973 + 974 + // stored targets are keyed by triples of (target, collection, path). 975 + // target filtering only consideres the target itself, so we actually 976 + // need to do a prefix iteration of all target ids for this target and 977 + // keep them all. 978 + // i *think* the number of keys at a target prefix should usually be 979 + // pretty small, so this is hopefully fine. but if it turns out to be 980 + // large, we can push this filtering back into the main links loop and 981 + // do forward db queries per backlink to get the raw target back out. 982 + let mut filter_to_target_ids: HashSet<TargetId> = HashSet::new(); 983 + for t in filter_to_targets { 984 + for (_, target_id) in self.iter_targets_for_target(&Target(t.to_string())) { 985 + filter_to_target_ids.insert(target_id); 986 + } 987 + } 988 + 989 + let linkers = self.get_target_linkers(&target_id)?; 990 + 991 + let mut grouped_counts: BTreeMap<TargetId, (u64, HashSet<DidId>)> = BTreeMap::new(); 992 + 993 + for (did_id, rkey) in linkers.0 { 994 + if did_id.is_empty() { 995 + continue; 996 + } 997 + 998 + if !filter_did_ids.is_empty() && filter_did_ids.get(&did_id) != Some(&true) { 999 + continue; 1000 + } 1001 + 1002 + let record_link_key = RecordLinkKey(did_id, collection.clone(), rkey); 1003 + let Some(targets) = self.get_record_link_targets(&record_link_key)? else { 1004 + continue; 1005 + }; 1006 + 1007 + let Some(fwd_target) = targets 1008 + .0 1009 + .into_iter() 1010 + .filter_map(|RecordLinkTarget(rpath, target_id)| { 1011 + if rpath.0 == path_to_other 1012 + && (filter_to_target_ids.is_empty() 1013 + || filter_to_target_ids.contains(&target_id)) 1014 + { 1015 + Some(target_id) 1016 + } else { 1017 + None 1018 + } 1019 + }) 1020 + .take(1) 1021 + .next() 1022 + else { 1023 + eprintln!("no forward match"); 1024 + continue; 1025 + }; 1026 + 1027 + // small relief: we page over target ids, so we can already bail 1028 + // reprocessing previous pages here 1029 + if after.as_ref().map(|a| fwd_target <= *a).unwrap_or(false) { 1030 + continue; 1031 + } 1032 + 1033 + // aand we can skip target ids that must be on future pages 1034 + // (this check continues after the did-lookup, which we have to do) 1035 + let page_is_full = grouped_counts.len() as u64 >= limit; 1036 + if page_is_full { 1037 + let current_max = grouped_counts.keys().next_back().unwrap(); // limit should be non-zero bleh 1038 + if fwd_target > *current_max { 1039 + continue; 1040 + } 1041 + } 1042 + 1043 + // bit painful: 2-step lookup to make sure this did is active 1044 + let Some(did) = self.did_id_table.get_val_from_id(&self.db, did_id.0)? else { 1045 + eprintln!("failed to look up did from did_id {did_id:?}"); 1046 + continue; 1047 + }; 1048 + let Some(DidIdValue(_, active)) = self.did_id_table.get_id_val(&self.db, &did)? else { 1049 + eprintln!("failed to look up did_value from did_id {did_id:?}: {did:?}: data consistency bug?"); 1050 + continue; 1051 + }; 1052 + if !active { 1053 + continue; 1054 + } 1055 + 1056 + // page-management, continued 1057 + // if we have a full page, and we're inserting a *new* key less than 1058 + // the current max, then we can evict the current max 1059 + let mut should_evict = false; 1060 + let entry = grouped_counts.entry(fwd_target.clone()).or_insert_with(|| { 1061 + // this is a *new* key, so kick the max if we're full 1062 + should_evict = page_is_full; 1063 + Default::default() 1064 + }); 1065 + entry.0 += 1; 1066 + entry.1.insert(did_id); 1067 + 1068 + if should_evict { 1069 + grouped_counts.pop_last(); 1070 + } 1071 + } 1072 + 1073 + let mut items: Vec<(String, u64, u64)> = Vec::with_capacity(grouped_counts.len()); 1074 + for (target_id, (n, dids)) in &grouped_counts { 1075 + let Some(target) = self 1076 + .target_id_table 1077 + .get_val_from_id(&self.db, target_id.0)? 1078 + else { 1079 + eprintln!("failed to look up target from target_id {target_id:?}"); 1080 + continue; 1081 + }; 1082 + items.push((target.0 .0, *n, dids.len() as u64)); 1083 + } 1084 + 1085 + let next = if grouped_counts.len() as u64 >= limit { 1086 + // yeah.... it's a number saved as a string......sorry 1087 + grouped_counts 1088 + .keys() 1089 + .next_back() 1090 + .map(|k| format!("{}", k.0)) 1091 + } else { 1092 + None 1093 + }; 1094 + 1095 + Ok(PagedOrderedCollection { items, next }) 1096 + } 1097 + 1098 fn get_count(&self, target: &str, collection: &str, path: &str) -> Result<u64> { 1099 let target_key = TargetKey( 1100 Target(target.to_string()), ··· 1311 .map(|s| s.parse::<u64>()) 1312 .transpose()? 1313 .unwrap_or(0); 1314 + let started_at = self 1315 + .db 1316 + .get(STARTED_AT_KEY)? 1317 + .map(|c| _vr(&c)) 1318 + .transpose()? 1319 + .unwrap_or(COZY_FIRST_CURSOR); 1320 + 1321 + let other_data = self 1322 + .db 1323 + .get(TARGET_ID_REPAIR_STATE_KEY)? 1324 + .map(|s| _vr(&s)) 1325 + .transpose()? 1326 + .map( 1327 + |TargetIdRepairState { 1328 + current_us_started_at, 1329 + id_when_started, 1330 + latest_repaired_i, 1331 + }| { 1332 + HashMap::from([ 1333 + ("current_us_started_at".to_string(), current_us_started_at), 1334 + ("id_when_started".to_string(), id_when_started), 1335 + ("latest_repaired_i".to_string(), latest_repaired_i), 1336 + ]) 1337 + }, 1338 + ) 1339 + .unwrap_or(HashMap::default()); 1340 + 1341 Ok(StorageStats { 1342 dids, 1343 targetables, 1344 linking_records, 1345 + started_at: Some(started_at), 1346 + other_data, 1347 }) 1348 } 1349 } ··· 1369 impl AsRocksValue for &TargetId {} 1370 impl KeyFromRocks for TargetKey {} 1371 impl ValueFromRocks for TargetId {} 1372 + 1373 + // temp? 1374 + impl KeyFromRocks for TargetId {} 1375 + impl AsRocksValue for &TargetKey {} 1376 1377 // target_links table 1378 impl AsRocksKey for &TargetId {} ··· 1444 } 1445 1446 // target ids 1447 + #[derive(Debug, Clone, Serialize, Deserialize, PartialOrd, Ord, PartialEq, Eq, Hash)] 1448 struct TargetId(u64); // key 1449 1450 + #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)] 1451 pub struct Target(pub String); // the actual target/uri 1452 1453 // targets (uris, dids, etc.): the reverse index
+1 -1
constellation/templates/dids.html.j2
··· 27 {% for did in linking_dids %} 28 <pre style="display: block; margin: 1em 2em" class="code"><strong>DID</strong>: {{ did.0 }} 29 -> see <a href="/links/all?target={{ did.0|urlencode }}">links to this DID</a> 30 - -> browse <a href="https://atproto-browser-plus-links.vercel.app/at/{{ did.0|urlencode }}">this DID record</a></pre> 31 {% endfor %} 32 33 {% if let Some(c) = cursor %}
··· 27 {% for did in linking_dids %} 28 <pre style="display: block; margin: 1em 2em" class="code"><strong>DID</strong>: {{ did.0 }} 29 -> see <a href="/links/all?target={{ did.0|urlencode }}">links to this DID</a> 30 + -> browse <a href="https://pdsls.dev/at://{{ did.0|urlencode }}">this DID record</a></pre> 31 {% endfor %} 32 33 {% if let Some(c) = cursor %}
+54
constellation/templates/get-backlinks.html.j2
···
··· 1 + {% extends "base.html.j2" %} 2 + {% import "try-it-macros.html.j2" as try_it %} 3 + 4 + {% block title %}Backlinks{% endblock %} 5 + {% block description %}All {{ query.source }} records with links to {{ query.subject }}{% endblock %} 6 + 7 + {% block content %} 8 + 9 + {% call try_it::get_backlinks(query.subject, query.source, query.did, query.limit) %} 10 + 11 + <h2> 12 + Links to <code>{{ query.subject }}</code> 13 + {% if let Some(browseable_uri) = query.subject|to_browseable %} 14 + <small style="font-weight: normal; font-size: 1rem"><a href="{{ browseable_uri }}">browse record</a></small> 15 + {% endif %} 16 + </h2> 17 + 18 + <p><strong>{{ total|human_number }} links</strong> from <code>{{ query.source }}</code>.</p> 19 + 20 + <ul> 21 + <li>See distinct linking DIDs at <code>/links/distinct-dids</code>: <a href="/links/distinct-dids?target={{ query.subject|urlencode }}&collection={{ collection|urlencode }}&path={{ path|urlencode }}">/links/distinct-dids?target={{ query.subject }}&collection={{ collection }}&path={{ path }}</a></li> 22 + <li>See all links to this target at <code>/links/all</code>: <a href="/links/all?target={{ query.subject|urlencode }}">/links/all?target={{ query.subject }}</a></li> 23 + </ul> 24 + 25 + <h3>Links, most recent first:</h3> 26 + 27 + {% for record in records %} 28 + <pre style="display: block; margin: 1em 2em" class="code"><strong>DID</strong>: {{ record.did().0 }} (<a href="/links/all?target={{ record.did().0|urlencode }}">DID links</a>) 29 + <strong>Collection</strong>: {{ record.collection }} 30 + <strong>RKey</strong>: {{ record.rkey }} 31 + -> <a href="https://pdsls.dev/at://{{ record.did().0 }}/{{ record.collection }}/{{ record.rkey }}">browse record</a></pre> 32 + {% endfor %} 33 + 34 + {% if let Some(c) = cursor %} 35 + <form method="get" action="/xrpc/blue.microcosm.links.getBacklinks"> 36 + <input type="hidden" name="subject" value="{{ query.subject }}" /> 37 + <input type="hidden" name="source" value="{{ query.source }}" /> 38 + <input type="hidden" name="limit" value="{{ query.limit }}" /> 39 + {% for did in query.did %} 40 + <input type="hidden" name="did" value="{{ did }}" /> 41 + {% endfor %} 42 + <input type="hidden" name="cursor" value={{ c|json|safe }} /> 43 + <button type="submit">next page&hellip;</button> 44 + </form> 45 + {% else %} 46 + <button disabled><em>end of results</em></button> 47 + {% endif %} 48 + 49 + <details> 50 + <summary>Raw JSON response</summary> 51 + <pre class="code">{{ self|tojson }}</pre> 52 + </details> 53 + 54 + {% endblock %}
+67
constellation/templates/get-many-to-many-counts.html.j2
···
··· 1 + {% extends "base.html.j2" %} 2 + {% import "try-it-macros.html.j2" as try_it %} 3 + 4 + {% block title %}Many to Many counts{% endblock %} 5 + {% block description %}Counts of many-to-many {{ query.source }} join records with links to {{ query.subject }} and a secondary target at {{ query.path_to_other }}{% endblock %} 6 + 7 + {% block content %} 8 + 9 + {% call try_it::get_many_to_many_counts( 10 + query.subject, 11 + query.source, 12 + query.path_to_other, 13 + query.did, 14 + query.other_subject, 15 + query.limit, 16 + ) %} 17 + 18 + <h2> 19 + Many-to-many links to <code>{{ query.subject }}</code> joining through <code>{{ query.path_to_other }}</code> 20 + {% if let Some(browseable_uri) = query.subject|to_browseable %} 21 + <small style="font-weight: normal; font-size: 1rem"><a href="{{ browseable_uri }}">browse record</a></small> 22 + {% endif %} 23 + </h2> 24 + 25 + <p><strong>{% if cursor.is_some() || query.cursor.is_some() %}more than {% endif %}{{ counts_by_other_subject.len()|to_u64|human_number }} joins</strong> <code>{{ query.source }}โ†’{{ query.path_to_other }}</code></p> 26 + 27 + <ul> 28 + <li>See direct backlinks at <code>/xrpc/blue.microcosm.links.getBacklinks</code>: <a href="/xrpc/blue.microcosm.links.getBacklinks?subject={{ query.subject|urlencode }}&source={{ query.source|urlencode }}">/xrpc/blue.microcosm.links.getBacklinks?subject={{ query.subject }}&source={{ query.source }}</a></li> 29 + <li>See all links to this target at <code>/links/all</code>: <a href="/links/all?target={{ query.subject|urlencode }}">/links/all?target={{ query.subject }}</a></li> 30 + </ul> 31 + 32 + <h3>Counts by other subject:</h3> 33 + 34 + {% for counts in counts_by_other_subject %} 35 + <pre style="display: block; margin: 1em 2em" class="code"><strong>Joined subject</strong>: {{ counts.subject }} 36 + <strong>Joining records</strong>: {{ counts.total }} 37 + <strong>Unique joiner ids</strong>: {{ counts.distinct }} 38 + -> {% if let Some(browseable_uri) = counts.subject|to_browseable -%} 39 + <a href="{{ browseable_uri }}">browse record</a> 40 + {%- endif %}</pre> 41 + {% endfor %} 42 + 43 + {% if let Some(c) = cursor %} 44 + <form method="get" action="/xrpc/blue.microcosm.links.getManyToManyCounts"> 45 + <input type="hidden" name="subject" value="{{ query.subject }}" /> 46 + <input type="hidden" name="source" value="{{ query.source }}" /> 47 + <input type="hidden" name="pathToOther" value="{{ query.path_to_other }}" /> 48 + {% for did in query.did %} 49 + <input type="hidden" name="did" value="{{ did }}" /> 50 + {% endfor %} 51 + {% for otherSubject in query.other_subject %} 52 + <input type="hidden" name="otherSubject" value="{{ otherSubject }}" /> 53 + {% endfor %} 54 + <input type="hidden" name="limit" value="{{ query.limit }}" /> 55 + <input type="hidden" name="cursor" value={{ c|json|safe }} /> 56 + <button type="submit">next page&hellip;</button> 57 + </form> 58 + {% else %} 59 + <button disabled><em>end of results</em></button> 60 + {% endif %} 61 + 62 + <details> 63 + <summary>Raw JSON response</summary> 64 + <pre class="code">{{ self|tojson }}</pre> 65 + </details> 66 + 67 + {% endblock %}
+57 -2
constellation/templates/hello.html.j2
··· 19 <p>It works by recursively walking <em>all</em> records coming through the firehose, searching for anything that looks like a link. Links are indexed by the target they point at, the collection the record came from, and the JSON path to the link in that record.</p> 20 21 <p> 22 - This server has indexed <span class="stat">{{ stats.linking_records|human_number }}</span> links between <span class="stat">{{ stats.targetables|human_number }}</span> targets and sources from <span class="stat">{{ stats.dids|human_number }}</span> identities over <span class="stat">{{ days_indexed|human_number }}</span> days.<br/> 23 <small>(indexing new records in real time, backfill coming soon!)</small> 24 </p> 25 26 - <p>But feel free to use it! If you want to be nice, put your project name and bsky username (or email) in your user-agent header for api requests.</p> 27 28 29 <h2>API Endpoints</h2> 30 31 <h3 class="route"><code>GET /links</code></h3> 32 33 <p>A list of records linking to a target.</p> 34 35 <h4>Query parameters:</h4> 36
··· 19 <p>It works by recursively walking <em>all</em> records coming through the firehose, searching for anything that looks like a link. Links are indexed by the target they point at, the collection the record came from, and the JSON path to the link in that record.</p> 20 21 <p> 22 + This server has indexed <span class="stat">{{ stats.linking_records|human_number }}</span> links between <span class="stat">{{ stats.targetables|human_number }}</span> targets and sources from <span class="stat">{{ stats.dids|human_number }}</span> identities over <span class="stat"> 23 + {%- if let Some(days) = days_indexed %} 24 + {{ days|human_number }} 25 + {% else %} 26 + ??? 27 + {% endif -%} 28 + </span> days.<br/> 29 <small>(indexing new records in real time, backfill coming soon!)</small> 30 </p> 31 32 + {# {% for k, v in stats.other_data.iter() %} 33 + <p><strong>{{ k }}</strong>: {{ v }}</p> 34 + {% endfor %} #} 35 + 36 + <p>You're welcome to use this public instance! Please do not build the torment nexus. If you want to be nice, put your project name and bsky username (or email) in your user-agent header for api requests.</p> 37 38 39 <h2>API Endpoints</h2> 40 41 + <h3 class="route"><code>GET /xrpc/blue.microcosm.links.getBacklinks</code></h3> 42 + 43 + <p>A list of records linking to any record, identity, or uri.</p> 44 + 45 + <h4>Query parameters:</h4> 46 + 47 + <ul> 48 + <li><p><code>subject</code>: required, must url-encode. Example: <code>at://did:plc:vc7f4oafdgxsihk4cry2xpze/app.bsky.feed.post/3lgwdn7vd722r</code></p></li> 49 + <li><p><code>source</code>: required. Example: <code>app.bsky.feed.like:subject.uri</code></p></li> 50 + <li><p><code>did</code>: optional, filter links to those from specific users. Include multiple times to filter by multiple users. Example: <code>did=did:plc:vc7f4oafdgxsihk4cry2xpze&did=did:plc:vc7f4oafdgxsihk4cry2xpze</code></p></li> 51 + <li><p><code>limit</code>: optional. Default: <code>16</code>. Maximum: <code>100</code></p></li> 52 + </ul> 53 + 54 + <p style="margin-bottom: 0"><strong>Try it:</strong></p> 55 + {% call try_it::get_backlinks("at://did:plc:a4pqq234yw7fqbddawjo7y35/app.bsky.feed.post/3m237ilwc372e", "app.bsky.feed.like:subject.uri", [""], 16) %} 56 + 57 + 58 + <h3 class="route"><code>GET /xrpc/blue.microcosm.links.getManyToManyCounts</code></h3> 59 + 60 + <p>TODO: description</p> 61 + 62 + <h4>Query parameters:</h4> 63 + 64 + <ul> 65 + <li><p><code>subject</code>: required, must url-encode. Example: <code>at://did:plc:vc7f4oafdgxsihk4cry2xpze/app.bsky.feed.post/3lgwdn7vd722r</code></p></li> 66 + <li><p><code>source</code>: required. Example: <code>app.bsky.feed.like:subject.uri</code></p></li> 67 + <li><p><code>pathToOther</code>: required. Path to the secondary link in the many-to-many record. Example: <code>otherThing.uri</code></p></li> 68 + <li><p><code>did</code>: optional, filter links to those from specific users. Include multiple times to filter by multiple users. Example: <code>did=did:plc:vc7f4oafdgxsihk4cry2xpze&did=did:plc:vc7f4oafdgxsihk4cry2xpze</code></p></li> 69 + <li><p><code>otherSubject</code>: optional, filter secondary links to specific subjects. Include multiple times to filter by multiple users. Example: <code>at://did:plc:vc7f4oafdgxsihk4cry2xpze/app.bsky.feed.post/3lgwdn7vd722r</code></p></li> 70 + <li><p><code>limit</code>: optional. Default: <code>16</code>. Maximum: <code>100</code></p></li> 71 + </ul> 72 + 73 + <p style="margin-bottom: 0"><strong>Try it:</strong></p> 74 + {% call try_it::get_many_to_many_counts( 75 + "at://did:plc:wshs7t2adsemcrrd4snkeqli/sh.tangled.label.definition/good-first-issue", 76 + "sh.tangled.label.op:add[].key", 77 + "subject", 78 + [""], 79 + [""], 80 + 25, 81 + ) %} 82 + 83 + 84 <h3 class="route"><code>GET /links</code></h3> 85 86 <p>A list of records linking to a target.</p> 87 + 88 + <p>[DEPRECATED]: use <code>GET /xrpc/blue.microcosm.links.getBacklinks</code>. New apps should avoid it, but this endpoint <strong>will</strong> remain supported for the forseeable future.</p> 89 90 <h4>Query parameters:</h4> 91
+1 -1
constellation/templates/links.html.j2
··· 28 <pre style="display: block; margin: 1em 2em" class="code"><strong>DID</strong>: {{ record.did().0 }} (<a href="/links/all?target={{ record.did().0|urlencode }}">DID links</a>) 29 <strong>Collection</strong>: {{ record.collection }} 30 <strong>RKey</strong>: {{ record.rkey }} 31 - -> <a href="https://atproto-browser-plus-links.vercel.app/at/{{ record.did().0|urlencode }}/{{ record.collection }}/{{ record.rkey }}">browse record</a></pre> 32 {% endfor %} 33 34 {% if let Some(c) = cursor %}
··· 28 <pre style="display: block; margin: 1em 2em" class="code"><strong>DID</strong>: {{ record.did().0 }} (<a href="/links/all?target={{ record.did().0|urlencode }}">DID links</a>) 29 <strong>Collection</strong>: {{ record.collection }} 30 <strong>RKey</strong>: {{ record.rkey }} 31 + -> <a href="https://pdsls.dev/at://{{ record.did().0 }}/{{ record.collection }}/{{ record.rkey }}">browse record</a></pre> 32 {% endfor %} 33 34 {% if let Some(c) = cursor %}
+68 -1
constellation/templates/try-it-macros.html.j2
··· 1 {% macro links(target, collection, path, dids, limit) %} 2 <form method="get" action="/links"> 3 <pre class="code"><strong>GET</strong> /links ··· 24 }); 25 </script> 26 {% endmacro %} 27 - 28 29 {% macro dids(target, collection, path) %} 30 <form method="get" action="/links/distinct-dids">
··· 1 + {% macro get_backlinks(subject, source, dids, limit) %} 2 + <form method="get" action="/xrpc/blue.microcosm.links.getBacklinks"> 3 + <pre class="code"><strong>GET</strong> /xrpc/blue.microcosm.links.getBacklinks 4 + ?subject= <input type="text" name="subject" value="{{ subject }}" placeholder="at-uri, did, uri..." /> 5 + &source= <input type="text" name="source" value="{{ source }}" placeholder="app.bsky.feed.like:subject.uri" /> 6 + {%- for did in dids %}{% if !did.is_empty() %} 7 + &did= <input type="text" name="did" value="{{ did }}" placeholder="did:plc:..." />{% endif %}{% endfor %} 8 + <span id="did-placeholder"></span> <button id="add-did">+ did filter</button> 9 + &limit= <input type="number" name="limit" value="{{ limit }}" max="100" placeholder="100" /> <button type="submit">get links</button></pre> 10 + </form> 11 + <script> 12 + const addDidButton = document.getElementById('add-did'); 13 + const didPlaceholder = document.getElementById('did-placeholder'); 14 + addDidButton.addEventListener('click', e => { 15 + e.preventDefault(); 16 + const i = document.createElement('input'); 17 + i.placeholder = 'did:plc:...'; 18 + i.name = "did" 19 + const p = addDidButton.parentNode; 20 + p.insertBefore(document.createTextNode('&did= '), didPlaceholder); 21 + p.insertBefore(i, didPlaceholder); 22 + p.insertBefore(document.createTextNode('\n '), didPlaceholder); 23 + }); 24 + </script> 25 + {% endmacro %} 26 + 27 + {% macro get_many_to_many_counts(subject, source, pathToOther, dids, otherSubjects, limit) %} 28 + <form method="get" action="/xrpc/blue.microcosm.links.getManyToManyCounts"> 29 + <pre class="code"><strong>GET</strong> /xrpc/blue.microcosm.links.getManyToManyCounts 30 + ?subject= <input type="text" name="subject" value="{{ subject }}" placeholder="at-uri, did, uri..." /> 31 + &source= <input type="text" name="source" value="{{ source }}" placeholder="app.bsky.feed.like:subject.uri" /> 32 + &pathToOther= <input type="text" name="pathToOther" value="{{ pathToOther }}" placeholder="otherThing.uri" /> 33 + {%- for did in dids %}{% if !did.is_empty() %} 34 + &did= <input type="text" name="did" value="{{ did }}" placeholder="did:plc:..." />{% endif %}{% endfor %} 35 + <span id="m2m-subject-placeholder"></span> <button id="m2m-add-subject">+ other subject filter</button> 36 + {%- for otherSubject in otherSubjects %}{% if !otherSubject.is_empty() %} 37 + &otherSubject= <input type="text" name="did" value="{{ otherSubject }}" placeholder="at-uri, did, uri..." />{% endif %}{% endfor %} 38 + <span id="m2m-did-placeholder"></span> <button id="m2m-add-did">+ did filter</button> 39 + &limit= <input type="number" name="limit" value="{{ limit }}" max="100" placeholder="100" /> <button type="submit">get links</button></pre> 40 + </form> 41 + <script> 42 + const m2mAddDidButton = document.getElementById('m2m-add-did'); 43 + const m2mDidPlaceholder = document.getElementById('m2m-did-placeholder'); 44 + m2mAddDidButton.addEventListener('click', e => { 45 + e.preventDefault(); 46 + const i = document.createElement('input'); 47 + i.placeholder = 'did:plc:...'; 48 + i.name = "did" 49 + const p = m2mAddDidButton.parentNode; 50 + p.insertBefore(document.createTextNode('&did= '), m2mDidPlaceholder); 51 + p.insertBefore(i, m2mDidPlaceholder); 52 + p.insertBefore(document.createTextNode('\n '), m2mDidPlaceholder); 53 + }); 54 + const m2mAddSubjectButton = document.getElementById('m2m-add-subject'); 55 + const m2mSubjectPlaceholder = document.getElementById('m2m-subject-placeholder'); 56 + m2mAddSubjectButton.addEventListener('click', e => { 57 + e.preventDefault(); 58 + const i = document.createElement('input'); 59 + i.placeholder = 'at-uri, did, uri...'; 60 + i.name = "otherSubject" 61 + const p = m2mAddSubjectButton.parentNode; 62 + p.insertBefore(document.createTextNode('&otherSubject= '), m2mSubjectPlaceholder); 63 + p.insertBefore(i, m2mSubjectPlaceholder); 64 + p.insertBefore(document.createTextNode('\n '), m2mSubjectPlaceholder); 65 + }); 66 + </script> 67 + {% endmacro %} 68 + 69 {% macro links(target, collection, path, dids, limit) %} 70 <form method="get" action="/links"> 71 <pre class="code"><strong>GET</strong> /links ··· 92 }); 93 </script> 94 {% endmacro %} 95 96 {% macro dids(target, collection, path) %} 97 <form method="get" action="/links/distinct-dids">
+2
links/Cargo.toml
··· 5 6 [dependencies] 7 anyhow = "1.0.95" 8 fluent-uri = "0.3.2" 9 nom = "7.1.3" 10 thiserror = "2.0.9" 11 tinyjson = "2.5.1"
··· 5 6 [dependencies] 7 anyhow = "1.0.95" 8 + dasl = "0.2.0" 9 fluent-uri = "0.3.2" 10 nom = "7.1.3" 11 + serde = { version = "1.0.228", features = ["derive"] } 12 thiserror = "2.0.9" 13 tinyjson = "2.5.1"
+3 -2
links/src/lib.rs
··· 1 use fluent_uri::Uri; 2 3 pub mod at_uri; 4 pub mod did; ··· 6 7 pub use record::collect_links; 8 9 - #[derive(Debug, Clone, Ord, Eq, PartialOrd, PartialEq)] 10 pub enum Link { 11 AtUri(String), 12 Uri(String), ··· 59 } 60 } 61 62 - #[derive(Debug, PartialEq)] 63 pub struct CollectedLink { 64 pub path: String, 65 pub target: Link,
··· 1 use fluent_uri::Uri; 2 + use serde::{Deserialize, Serialize}; 3 4 pub mod at_uri; 5 pub mod did; ··· 7 8 pub use record::collect_links; 9 10 + #[derive(Debug, Clone, Ord, Eq, PartialOrd, PartialEq, Serialize, Deserialize)] 11 pub enum Link { 12 AtUri(String), 13 Uri(String), ··· 60 } 61 } 62 63 + #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] 64 pub struct CollectedLink { 65 pub path: String, 66 pub target: Link,
+41
links/src/record.rs
··· 1 use tinyjson::JsonValue; 2 3 use crate::{parse_any_link, CollectedLink}; ··· 36 } 37 } 38 39 pub fn collect_links(v: &JsonValue) -> Vec<CollectedLink> { 40 let mut found = vec![]; 41 walk_record("", v, &mut found); 42 found 43 } 44
··· 1 + use dasl::drisl::Value as DrislValue; 2 use tinyjson::JsonValue; 3 4 use crate::{parse_any_link, CollectedLink}; ··· 37 } 38 } 39 40 + pub fn walk_drisl(path: &str, v: &DrislValue, found: &mut Vec<CollectedLink>) { 41 + match v { 42 + DrislValue::Map(o) => { 43 + for (key, child) in o { 44 + walk_drisl(&format!("{path}.{key}"), child, found) 45 + } 46 + } 47 + DrislValue::Array(a) => { 48 + for child in a { 49 + let child_p = match child { 50 + DrislValue::Map(o) => { 51 + if let Some(DrislValue::Text(t)) = o.get("$type") { 52 + format!("{path}[{t}]") 53 + } else { 54 + format!("{path}[]") 55 + } 56 + } 57 + _ => format!("{path}[]"), 58 + }; 59 + walk_drisl(&child_p, child, found) 60 + } 61 + } 62 + DrislValue::Text(s) => { 63 + if let Some(link) = parse_any_link(s) { 64 + found.push(CollectedLink { 65 + path: path.to_string(), 66 + target: link, 67 + }); 68 + } 69 + } 70 + _ => {} 71 + } 72 + } 73 + 74 pub fn collect_links(v: &JsonValue) -> Vec<CollectedLink> { 75 let mut found = vec![]; 76 walk_record("", v, &mut found); 77 + found 78 + } 79 + 80 + pub fn collect_links_drisl(v: &DrislValue) -> Vec<CollectedLink> { 81 + let mut found = vec![]; 82 + walk_drisl("", v, &mut found); 83 found 84 } 85
+8
spacedust/Cargo.toml
··· 4 edition = "2024" 5 6 [dependencies] 7 async-trait = "0.1.88" 8 clap = { version = "4.5.40", features = ["derive"] } 9 ctrlc = "3.4.7" 10 dropshot = "0.16.2" 11 env_logger = "0.11.8" 12 futures = "0.3.31" 13 http = "1.3.1" 14 jetstream = { path = "../jetstream", features = ["metrics"] } 15 links = { path = "../links" } 16 log = "0.4.27" 17 metrics = "0.24.2" 18 metrics-exporter-prometheus = { version = "0.17.1", features = ["http-listener"] } 19 rand = "0.9.1" 20 schemars = "0.8.22" 21 semver = "1.0.26" 22 serde = { version = "1.0.219", features = ["derive"] } 23 serde_json = "1.0.140" 24 serde_qs = "1.0.0-rc.3" 25 thiserror = "2.0.12"
··· 4 edition = "2024" 5 6 [dependencies] 7 + anyhow = "1.0.100" 8 + async-channel = "2.5.0" 9 async-trait = "0.1.88" 10 clap = { version = "4.5.40", features = ["derive"] } 11 ctrlc = "3.4.7" 12 + dasl = "0.2.0" 13 dropshot = "0.16.2" 14 env_logger = "0.11.8" 15 + fjall = "3.0.0-pre.0" 16 futures = "0.3.31" 17 http = "1.3.1" 18 + ipld-core = { version = "0.4.2", features = ["serde"] } 19 jetstream = { path = "../jetstream", features = ["metrics"] } 20 links = { path = "../links" } 21 log = "0.4.27" 22 metrics = "0.24.2" 23 metrics-exporter-prometheus = { version = "0.17.1", features = ["http-listener"] } 24 rand = "0.9.1" 25 + repo-stream = "0.2.2" 26 + reqwest = { version = "0.12.24", features = ["json", "stream"] } 27 schemars = "0.8.22" 28 semver = "1.0.26" 29 serde = { version = "1.0.219", features = ["derive"] } 30 + serde_ipld_dagcbor = "0.6.4" 31 serde_json = "1.0.140" 32 serde_qs = "1.0.0-rc.3" 33 thiserror = "2.0.12"
+21
spacedust/src/bin/import_car_file.rs
···
··· 1 + use clap::Parser; 2 + use std::path::PathBuf; 3 + 4 + type Result<T> = std::result::Result<T, Box<dyn std::error::Error>>; 5 + 6 + #[derive(Debug, Parser)] 7 + struct Args { 8 + #[arg()] 9 + file: PathBuf, 10 + } 11 + 12 + #[tokio::main] 13 + async fn main() -> Result<()> { 14 + env_logger::init(); 15 + 16 + let Args { file } = Args::parse(); 17 + 18 + let _reader = tokio::fs::File::open(file).await?; 19 + 20 + Ok(()) 21 + }
+258
spacedust/src/bin/import_scraped.rs
···
··· 1 + use clap::Parser; 2 + use links::CollectedLink; 3 + use repo_stream::{ 4 + DiskBuilder, DiskStore, Driver, DriverBuilder, Processable, drive::DriverBuilderWithProcessor, 5 + drive::NeedDisk, 6 + }; 7 + use std::path::PathBuf; 8 + use std::sync::{ 9 + Arc, 10 + atomic::{AtomicUsize, Ordering}, 11 + }; 12 + use tokio::{io::AsyncRead, task::JoinSet}; 13 + 14 + type Result<T> = anyhow::Result<T>; //std::result::Result<T, Box<dyn std::error::Error>>; 15 + 16 + #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] 17 + struct CollectedProcessed(CollectedLink); 18 + 19 + impl Processable for CollectedProcessed { 20 + fn get_size(&self) -> usize { 21 + self.0.path.capacity() + self.0.target.as_str().len() 22 + } 23 + } 24 + 25 + #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] 26 + struct ErrString(String); 27 + 28 + impl Processable for ErrString { 29 + fn get_size(&self) -> usize { 30 + self.0.capacity() 31 + } 32 + } 33 + 34 + type Processed = std::result::Result<Vec<CollectedProcessed>, ErrString>; 35 + 36 + /// hacky for now: put errors in strings ๐Ÿคทโ€โ™€๏ธ 37 + fn process(block: Vec<u8>) -> Processed { 38 + let value: dasl::drisl::Value = dasl::drisl::from_slice(&block) 39 + .map_err(|e| ErrString(format!("failed to parse block with drisl: {e:?}")))?; 40 + let links = links::record::collect_links_drisl(&value) 41 + .into_iter() 42 + .map(CollectedProcessed) 43 + .collect(); 44 + Ok(links) 45 + } 46 + 47 + #[derive(Debug, Parser)] 48 + struct Args { 49 + #[arg(long)] 50 + cars_folder: PathBuf, 51 + #[arg(long)] 52 + mem_workers: usize, 53 + #[arg(long)] 54 + disk_workers: usize, 55 + #[arg(long)] 56 + disk_folder: PathBuf, 57 + } 58 + 59 + async fn get_cars( 60 + cars_folder: PathBuf, 61 + tx: async_channel::Sender<tokio::io::BufReader<tokio::fs::File>>, 62 + ) -> Result<()> { 63 + let mut dir = tokio::fs::read_dir(cars_folder).await?; 64 + while let Some(entry) = dir.next_entry().await? { 65 + if !entry.file_type().await?.is_file() { 66 + continue; 67 + } 68 + let reader = tokio::fs::File::open(&entry.path()).await?; 69 + let reader = tokio::io::BufReader::new(reader); 70 + tx.send(reader).await?; 71 + } 72 + Ok(()) 73 + } 74 + 75 + async fn drive_mem<R: AsyncRead + Unpin + Send + Sync + 'static>( 76 + f: R, 77 + builder: &DriverBuilderWithProcessor<Processed>, 78 + disk_tx: &async_channel::Sender<NeedDisk<R, Processed>>, 79 + ) -> Result<Option<(usize, usize)>> { 80 + let mut n = 0; 81 + let mut n_records = 0; 82 + match builder.load_car(f).await? { 83 + Driver::Memory(_commit, mut driver) => { 84 + while let Some(chunk) = driver.next_chunk(512).await? { 85 + n_records += chunk.len(); 86 + for (_key, links) in chunk { 87 + match links { 88 + Ok(links) => n += links.len(), 89 + Err(e) => eprintln!("wat: {e:?}"), 90 + } 91 + } 92 + } 93 + Ok(Some((n, n_records))) 94 + } 95 + Driver::Disk(need_disk) => { 96 + disk_tx.send(need_disk).await?; 97 + Ok(None) 98 + } 99 + } 100 + } 101 + 102 + async fn mem_worker<R: AsyncRead + Unpin + Send + Sync + 'static>( 103 + car_rx: async_channel::Receiver<R>, 104 + disk_tx: async_channel::Sender<NeedDisk<R, Processed>>, 105 + n: Arc<AtomicUsize>, 106 + n_records: Arc<AtomicUsize>, 107 + ) -> Result<()> { 108 + let builder = DriverBuilder::new() 109 + .with_block_processor(process) // don't care just counting records 110 + .with_mem_limit_mb(128); 111 + while let Ok(f) = car_rx.recv().await { 112 + let driven = match drive_mem(f, &builder, &disk_tx).await { 113 + Ok(d) => d, 114 + Err(e) => { 115 + eprintln!("failed to drive mem: {e:?}. skipping..."); 116 + continue; 117 + } 118 + }; 119 + if let Some((drove, recs)) = driven { 120 + n.fetch_add(drove, Ordering::Relaxed); 121 + n_records.fetch_add(recs, Ordering::Relaxed); 122 + } 123 + } 124 + Ok(()) 125 + } 126 + 127 + async fn drive_disk<R: AsyncRead + Unpin>( 128 + needed: NeedDisk<R, Processed>, 129 + store: DiskStore, 130 + ) -> Result<(usize, usize, DiskStore)> { 131 + let (_commit, mut driver) = needed.finish_loading(store).await?; 132 + let mut n = 0; 133 + let mut n_records = 0; 134 + while let Some(chunk) = driver.next_chunk(512).await? { 135 + n_records += chunk.len(); 136 + for (_key, links) in chunk { 137 + match links { 138 + Ok(links) => n += links.len(), 139 + Err(e) => eprintln!("wat: {e:?}"), 140 + } 141 + } 142 + } 143 + let store = driver.reset_store().await?; 144 + Ok((n, n_records, store)) 145 + } 146 + 147 + async fn disk_worker<R: AsyncRead + Unpin>( 148 + worker_id: usize, 149 + disk_rx: async_channel::Receiver<NeedDisk<R, Processed>>, 150 + folder: PathBuf, 151 + n: Arc<AtomicUsize>, 152 + n_records: Arc<AtomicUsize>, 153 + disk_workers_active: Arc<AtomicUsize>, 154 + ) -> Result<()> { 155 + let mut file = folder; 156 + file.push(format!("disk-worker-{worker_id}.sqlite")); 157 + let builder = DiskBuilder::new().with_cache_size_mb(128); 158 + let mut store = builder.open(file.clone()).await?; 159 + while let Ok(needed) = disk_rx.recv().await { 160 + let active = disk_workers_active.fetch_add(1, Ordering::AcqRel); 161 + println!("-> disk workers active: {}", active + 1); 162 + let (drove, records) = match drive_disk(needed, store).await { 163 + Ok((d, r, s)) => { 164 + store = s; 165 + (d, r) 166 + } 167 + Err(e) => { 168 + eprintln!("failed to drive disk: {e:?}. skipping..."); 169 + store = builder.open(file.clone()).await?; 170 + continue; 171 + } 172 + }; 173 + n.fetch_add(drove, Ordering::Relaxed); 174 + n_records.fetch_add(records, Ordering::Relaxed); 175 + let were_active = disk_workers_active.fetch_sub(1, Ordering::AcqRel); 176 + println!("<- disk workers active: {}", were_active - 1); 177 + } 178 + Ok(()) 179 + } 180 + 181 + #[tokio::main] 182 + async fn main() -> Result<()> { 183 + env_logger::init(); 184 + 185 + let Args { 186 + cars_folder, 187 + disk_folder, 188 + disk_workers, 189 + mem_workers, 190 + } = Args::parse(); 191 + 192 + let mut set = JoinSet::<Result<()>>::new(); 193 + 194 + let (cars_tx, cars_rx) = async_channel::bounded(2); 195 + set.spawn(get_cars(cars_folder, cars_tx)); 196 + 197 + let n: Arc<AtomicUsize> = Arc::new(0.into()); 198 + let n_records: Arc<AtomicUsize> = Arc::new(0.into()); 199 + let disk_workers_active: Arc<AtomicUsize> = Arc::new(0.into()); 200 + 201 + set.spawn({ 202 + let n = n.clone(); 203 + let n_records = n_records.clone(); 204 + let mut interval = tokio::time::interval(std::time::Duration::from_secs(10)); 205 + async move { 206 + let mut last_n = n.load(Ordering::Relaxed); 207 + let mut last_n_records = n.load(Ordering::Relaxed); 208 + loop { 209 + interval.tick().await; 210 + let n = n.load(Ordering::Relaxed); 211 + let n_records = n_records.load(Ordering::Relaxed); 212 + let diff_n = n - last_n; 213 + let diff_records = n_records - last_n_records; 214 + println!("rate: {} rec/sec; {} n/sec", diff_records / 10, diff_n / 10); 215 + if n_records > 0 && diff_records == 0 { 216 + println!("zero encountered, stopping rate calculation polling."); 217 + break Ok(()); 218 + } 219 + last_n = n; 220 + last_n_records = n_records; 221 + } 222 + } 223 + }); 224 + 225 + let (needs_disk_tx, needs_disk_rx) = async_channel::bounded(disk_workers); 226 + 227 + for _ in 0..mem_workers { 228 + set.spawn(mem_worker( 229 + cars_rx.clone(), 230 + needs_disk_tx.clone(), 231 + n.clone(), 232 + n_records.clone(), 233 + )); 234 + } 235 + drop(cars_rx); 236 + drop(needs_disk_tx); 237 + 238 + tokio::fs::create_dir_all(disk_folder.clone()).await?; 239 + for id in 0..disk_workers { 240 + set.spawn(disk_worker( 241 + id, 242 + needs_disk_rx.clone(), 243 + disk_folder.clone(), 244 + n.clone(), 245 + n_records.clone(), 246 + disk_workers_active.clone(), 247 + )); 248 + } 249 + drop(needs_disk_rx); 250 + 251 + while let Some(res) = set.join_next().await { 252 + println!("task from set joined: {res:?}"); 253 + } 254 + 255 + eprintln!("total records processed: {n_records:?}; total n: {n:?}"); 256 + 257 + Ok(()) 258 + }
+137
spacedust/src/bin/scrape_pds.rs
···
··· 1 + use clap::Parser; 2 + use reqwest::Url; 3 + use serde::Deserialize; 4 + use std::path::PathBuf; 5 + use tokio::io::AsyncWriteExt; 6 + use tokio::{sync::mpsc, time}; 7 + 8 + type Result<T> = std::result::Result<T, Box<dyn std::error::Error>>; 9 + 10 + use futures::StreamExt; 11 + 12 + #[derive(Debug, Parser)] 13 + struct Args { 14 + #[arg(long)] 15 + pds: Url, 16 + #[arg(long)] 17 + throttle_ms: u64, // 100ms per pds? 18 + #[arg(long)] 19 + folder: PathBuf, 20 + } 21 + 22 + async fn download_repo( 23 + client: &reqwest::Client, 24 + mut pds: Url, 25 + did: String, 26 + mut path: PathBuf, 27 + ) -> Result<()> { 28 + path.push(format!("{did}.car")); 29 + let f = tokio::fs::File::create(path).await?; 30 + let mut w = tokio::io::BufWriter::new(f); 31 + 32 + pds.set_path("/xrpc/com.atproto.sync.getRepo"); 33 + pds.set_query(Some(&format!("did={did}"))); 34 + let mut byte_stream = client.get(pds).send().await?.bytes_stream(); 35 + 36 + while let Some(stuff) = byte_stream.next().await { 37 + tokio::io::copy(&mut stuff?.as_ref(), &mut w).await?; 38 + } 39 + w.flush().await?; 40 + 41 + Ok(()) 42 + } 43 + 44 + #[derive(Debug, Deserialize)] 45 + struct RepoInfo { 46 + did: String, 47 + active: bool, 48 + } 49 + 50 + #[derive(Debug, Deserialize)] 51 + struct ListReposResponse { 52 + cursor: Option<String>, 53 + repos: Vec<RepoInfo>, 54 + } 55 + 56 + fn get_pds_dids(client: reqwest::Client, mut pds: Url) -> mpsc::Receiver<String> { 57 + let (tx, rx) = mpsc::channel(2); 58 + tokio::task::spawn(async move { 59 + pds.set_path("/xrpc/com.atproto.sync.listRepos"); 60 + let mut cursor = None; 61 + 62 + loop { 63 + if let Some(c) = cursor { 64 + pds.set_query(Some(&format!("cursor={c}"))); 65 + } 66 + let res: ListReposResponse = client 67 + .get(pds.clone()) 68 + .send() 69 + .await 70 + .expect("to send request") 71 + .error_for_status() 72 + .expect("to be ok") 73 + .json() 74 + .await 75 + .expect("json response"); 76 + for repo in res.repos { 77 + if repo.active { 78 + tx.send(repo.did) 79 + .await 80 + .expect("to be able to send on the channel"); 81 + } 82 + } 83 + cursor = res.cursor; 84 + if cursor.is_none() { 85 + break; 86 + } 87 + } 88 + }); 89 + rx 90 + } 91 + 92 + #[tokio::main] 93 + async fn main() -> Result<()> { 94 + env_logger::init(); 95 + 96 + let Args { 97 + pds, 98 + throttle_ms, 99 + folder, 100 + } = Args::parse(); 101 + 102 + tokio::fs::create_dir_all(folder.clone()).await?; 103 + 104 + let client = reqwest::Client::builder() 105 + .user_agent("microcosm/spacedust-testing") 106 + .build()?; 107 + 108 + let mut dids = get_pds_dids(client.clone(), pds.clone()); 109 + 110 + let mut interval = time::interval(time::Duration::from_millis(throttle_ms)); 111 + let mut oks = 0; 112 + let mut single_fails = 0; 113 + let mut double_fails = 0; 114 + 115 + while let Some(did) = dids.recv().await { 116 + interval.tick().await; 117 + println!("did: {did:?}"); 118 + if let Err(e) = download_repo(&client, pds.clone(), did.clone(), folder.clone()).await { 119 + single_fails += 1; 120 + eprintln!("failed to download repo for did: {did:?}: {e:?}. retrying in a moment..."); 121 + tokio::time::sleep(time::Duration::from_secs(3)).await; 122 + interval.reset(); 123 + if let Err(e) = download_repo(&client, pds.clone(), did.clone(), folder.clone()).await { 124 + double_fails += 1; 125 + eprintln!("failed again: {e:?}. moving on in a moment..."); 126 + tokio::time::sleep(time::Duration::from_secs(1)).await; 127 + continue; 128 + } 129 + } 130 + oks += 1; 131 + println!(" -> done. did: {did:?}"); 132 + } 133 + 134 + eprintln!("got {oks} repos. single fails: {single_fails}; doubles: {double_fails}."); 135 + 136 + Ok(()) 137 + }
+1
spacedust/src/lib.rs
··· 3 pub mod error; 4 pub mod removable_delay_queue; 5 pub mod server; 6 pub mod subscriber; 7 8 use jetstream::events::CommitEvent;
··· 3 pub mod error; 4 pub mod removable_delay_queue; 5 pub mod server; 6 + pub mod storage; 7 pub mod subscriber; 8 9 use jetstream::events::CommitEvent;
spacedust/src/storage/car/drive.rs

This is a binary file and will not be displayed.

+1
spacedust/src/storage/car/mod.rs
···
··· 1 +
spacedust/src/storage/car/walk.rs

This is a binary file and will not be displayed.

+9
spacedust/src/storage/fjall/mod.rs
···
··· 1 + use crate::storage::Storage; 2 + 3 + pub struct FjallStorage {} 4 + 5 + impl Storage for FjallStorage { 6 + fn import_car() { 7 + todo!() 8 + } 9 + }
+6
spacedust/src/storage/mod.rs
···
··· 1 + pub mod car; 2 + pub mod fjall; 3 + 4 + pub trait Storage { 5 + fn import_car() {} 6 + }