+300
-73
Cargo.lock
+300
-73
Cargo.lock
···
112
113
[[package]]
114
name = "anyhow"
115
-
version = "1.0.97"
116
source = "registry+https://github.com/rust-lang/crates.io-index"
117
-
checksum = "dcfed56ad506cb2c684a14971b8861fdc3baaaae314b9e5f9bb532cbe3ba7a4f"
118
119
[[package]]
120
name = "arbitrary"
···
127
version = "1.7.1"
128
source = "registry+https://github.com/rust-lang/crates.io-index"
129
checksum = "69f7f8c3906b62b754cd5326047894316021dcfe5a194c8ea52bdd94934a3457"
130
131
[[package]]
132
name = "arrayvec"
···
192
"nom",
193
"num-traits",
194
"rusticata-macros",
195
-
"thiserror 2.0.16",
196
"time",
197
]
198
···
644
"axum",
645
"handlebars",
646
"serde",
647
-
"thiserror 2.0.16",
648
]
649
650
[[package]]
···
673
version = "0.2.0"
674
source = "registry+https://github.com/rust-lang/crates.io-index"
675
checksum = "4c7f02d4ea65f2c1853089ffd8d2787bdbc63de2f0d29dedbcf8ccdfa0ccd4cf"
676
677
[[package]]
678
name = "base64"
···
812
checksum = "5c8214115b7bf84099f1309324e63141d4c5d7cc26862f97a0a857dbefe165bd"
813
814
[[package]]
815
name = "block-buffer"
816
version = "0.10.4"
817
source = "registry+https://github.com/rust-lang/crates.io-index"
···
839
checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
840
841
[[package]]
842
name = "bytes"
843
version = "1.10.1"
844
source = "registry+https://github.com/rust-lang/crates.io-index"
···
851
checksum = "6236364b88b9b6d0bc181ba374cf1ab55ba3ef97a1cb6f8cddad48a273767fb5"
852
853
[[package]]
854
name = "bzip2-sys"
855
version = "0.1.13+1.0.8"
856
source = "registry+https://github.com/rust-lang/crates.io-index"
···
890
"enum_dispatch",
891
"serde",
892
]
893
894
[[package]]
895
name = "cc"
···
976
"multihash",
977
"serde",
978
"serde_bytes",
979
-
"unsigned-varint",
980
]
981
982
[[package]]
···
992
993
[[package]]
994
name = "clap"
995
-
version = "4.5.47"
996
source = "registry+https://github.com/rust-lang/crates.io-index"
997
-
checksum = "7eac00902d9d136acd712710d71823fb8ac8004ca445a89e73a41d45aa712931"
998
dependencies = [
999
"clap_builder",
1000
"clap_derive",
···
1002
1003
[[package]]
1004
name = "clap_builder"
1005
-
version = "4.5.47"
1006
source = "registry+https://github.com/rust-lang/crates.io-index"
1007
-
checksum = "2ad9bbf750e73b5884fb8a211a9424a1906c1e156724260fdae972f31d70e1d6"
1008
dependencies = [
1009
"anstream",
1010
"anstyle",
···
1085
version = "0.9.6"
1086
source = "registry+https://github.com/rust-lang/crates.io-index"
1087
checksum = "c2459377285ad874054d797f3ccebf984978aa39129f6eafde5cdc8315b612f8"
1088
1089
[[package]]
1090
name = "constellation"
···
1353
]
1354
1355
[[package]]
1356
name = "data-encoding"
1357
-
version = "2.8.0"
1358
source = "registry+https://github.com/rust-lang/crates.io-index"
1359
-
checksum = "575f75dfd25738df5b91b8e43e14d44bda14637a58fae779fd2b064f8bf3e010"
1360
1361
[[package]]
1362
name = "data-encoding-macro"
1363
-
version = "0.1.17"
1364
source = "registry+https://github.com/rust-lang/crates.io-index"
1365
-
checksum = "9f9724adfcf41f45bf652b3995837669d73c4d49a1b5ac1ff82905ac7d9b5558"
1366
dependencies = [
1367
"data-encoding",
1368
"data-encoding-macro-internal",
···
1370
1371
[[package]]
1372
name = "data-encoding-macro-internal"
1373
-
version = "0.1.15"
1374
source = "registry+https://github.com/rust-lang/crates.io-index"
1375
-
checksum = "18e4fdb82bd54a12e42fb58a800dcae6b9e13982238ce2296dc3570b92148e1f"
1376
dependencies = [
1377
"data-encoding",
1378
-
"syn 1.0.109",
1379
]
1380
1381
[[package]]
···
1579
"slog-bunyan",
1580
"slog-json",
1581
"slog-term",
1582
-
"thiserror 2.0.16",
1583
"tokio",
1584
"tokio-rustls 0.25.0",
1585
"toml 0.9.7",
···
1783
checksum = "0b25ad44cd4360a0448a9b5a0a6f1c7a621101cca4578706d43c9a821418aebc"
1784
dependencies = [
1785
"byteorder",
1786
-
"byteview",
1787
"dashmap",
1788
"log",
1789
-
"lsm-tree",
1790
"path-absolutize",
1791
"std-semaphore",
1792
"tempfile",
···
1799
source = "git+https://github.com/fjall-rs/fjall.git#42d811f7c8cc9004407d520d37d2a1d8d246c03d"
1800
dependencies = [
1801
"byteorder",
1802
-
"byteview",
1803
"dashmap",
1804
"log",
1805
-
"lsm-tree",
1806
"path-absolutize",
1807
"std-semaphore",
1808
"tempfile",
···
1810
]
1811
1812
[[package]]
1813
name = "flate2"
1814
version = "1.1.2"
1815
source = "registry+https://github.com/rust-lang/crates.io-index"
···
1891
"mixtrics",
1892
"pin-project",
1893
"serde",
1894
-
"thiserror 2.0.16",
1895
"tokio",
1896
"tracing",
1897
]
···
1911
"parking_lot",
1912
"pin-project",
1913
"serde",
1914
-
"thiserror 2.0.16",
1915
"tokio",
1916
"twox-hash",
1917
]
···
1944
"parking_lot",
1945
"pin-project",
1946
"serde",
1947
-
"thiserror 2.0.16",
1948
"tokio",
1949
"tracing",
1950
]
···
1976
"pin-project",
1977
"rand 0.9.1",
1978
"serde",
1979
-
"thiserror 2.0.16",
1980
"tokio",
1981
"tracing",
1982
"twox-hash",
···
2220
"pest_derive",
2221
"serde",
2222
"serde_json",
2223
-
"thiserror 2.0.16",
2224
"walkdir",
2225
]
2226
···
2345
"once_cell",
2346
"rand 0.9.1",
2347
"ring",
2348
-
"thiserror 2.0.16",
2349
"tinyvec",
2350
"tokio",
2351
"tracing",
···
2368
"rand 0.9.1",
2369
"resolv-conf",
2370
"smallvec",
2371
-
"thiserror 2.0.16",
2372
"tokio",
2373
"tracing",
2374
]
···
2800
]
2801
2802
[[package]]
2803
name = "is-terminal"
2804
version = "0.4.16"
2805
source = "registry+https://github.com/rust-lang/crates.io-index"
···
2863
"metrics",
2864
"serde",
2865
"serde_json",
2866
-
"thiserror 2.0.16",
2867
"tokio",
2868
"tokio-tungstenite 0.26.2",
2869
"url",
···
3045
checksum = "fc2f4eb4bc735547cfed7c0a4922cbd04a4655978c09b54f1f7b228750664c34"
3046
dependencies = [
3047
"cfg-if",
3048
-
"windows-targets 0.48.5",
3049
]
3050
3051
[[package]]
···
3116
version = "0.1.0"
3117
dependencies = [
3118
"anyhow",
3119
"fluent-uri",
3120
"nom",
3121
-
"thiserror 2.0.16",
3122
"tinyjson",
3123
]
3124
···
3186
3187
[[package]]
3188
name = "lsm-tree"
3189
-
version = "2.10.2"
3190
source = "registry+https://github.com/rust-lang/crates.io-index"
3191
-
checksum = "55b6d7475a8dd22e749186968daacf8e2a77932b061b1bd263157987bbfc0c6c"
3192
dependencies = [
3193
"byteorder",
3194
"crossbeam-skiplist",
···
3209
]
3210
3211
[[package]]
3212
name = "lz4"
3213
version = "1.28.1"
3214
source = "registry+https://github.com/rust-lang/crates.io-index"
···
3229
3230
[[package]]
3231
name = "lz4_flex"
3232
-
version = "0.11.3"
3233
source = "registry+https://github.com/rust-lang/crates.io-index"
3234
-
checksum = "75761162ae2b0e580d7e7c390558127e5f01b4194debd6221fd8c207fc80e3f5"
3235
3236
[[package]]
3237
name = "mach2"
···
3297
]
3298
3299
[[package]]
3300
name = "match_cfg"
3301
version = "0.1.0"
3302
source = "registry+https://github.com/rust-lang/crates.io-index"
···
3384
"metrics",
3385
"metrics-util 0.20.0",
3386
"quanta",
3387
-
"thiserror 2.0.16",
3388
"tokio",
3389
"tracing",
3390
]
···
3531
3532
[[package]]
3533
name = "multibase"
3534
-
version = "0.9.1"
3535
source = "registry+https://github.com/rust-lang/crates.io-index"
3536
-
checksum = "9b3539ec3c1f04ac9748a260728e855f261b4977f5c3406612c884564f329404"
3537
dependencies = [
3538
"base-x",
3539
"data-encoding",
3540
"data-encoding-macro",
3541
]
···
3548
dependencies = [
3549
"core2",
3550
"serde",
3551
-
"unsigned-varint",
3552
]
3553
3554
[[package]]
···
3926
checksum = "1db05f56d34358a8b1066f67cbb203ee3e7ed2ba674a6263a1d5ec6db2204323"
3927
dependencies = [
3928
"memchr",
3929
-
"thiserror 2.0.16",
3930
"ucd-trie",
3931
]
3932
···
4036
"rusqlite",
4037
"serde",
4038
"serde_json",
4039
-
"thiserror 2.0.16",
4040
"tokio",
4041
"tracing-subscriber",
4042
]
···
4079
"smallvec",
4080
"sync_wrapper",
4081
"tempfile",
4082
-
"thiserror 2.0.16",
4083
"tokio",
4084
"tokio-rustls 0.26.2",
4085
"tokio-stream",
···
4123
"serde_json",
4124
"serde_urlencoded",
4125
"serde_yaml",
4126
-
"thiserror 2.0.16",
4127
"tokio",
4128
]
4129
···
4142
"quote",
4143
"regex",
4144
"syn 2.0.106",
4145
-
"thiserror 2.0.16",
4146
]
4147
4148
[[package]]
···
4269
4270
[[package]]
4271
name = "quick_cache"
4272
-
version = "0.6.12"
4273
source = "registry+https://github.com/rust-lang/crates.io-index"
4274
-
checksum = "8f8ed0655cbaf18a26966142ad23b95d8ab47221c50c4f73a1db7d0d2d6e3da8"
4275
dependencies = [
4276
"equivalent",
4277
"hashbrown 0.15.2",
···
4291
"rustc-hash 2.1.1",
4292
"rustls 0.23.31",
4293
"socket2 0.5.9",
4294
-
"thiserror 2.0.16",
4295
"tokio",
4296
"tracing",
4297
"web-time",
···
4312
"rustls 0.23.31",
4313
"rustls-pki-types",
4314
"slab",
4315
-
"thiserror 2.0.16",
4316
"tinyvec",
4317
"tracing",
4318
"web-time",
···
4538
checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c"
4539
4540
[[package]]
4541
name = "reqwest"
4542
-
version = "0.12.22"
4543
source = "registry+https://github.com/rust-lang/crates.io-index"
4544
-
checksum = "cbc931937e6ca3a06e3b6c0aa7841849b160a90351d6ab467a8b9b9959767531"
4545
dependencies = [
4546
"async-compression",
4547
"base64 0.22.1",
···
4581
"url",
4582
"wasm-bindgen",
4583
"wasm-bindgen-futures",
4584
"web-sys",
4585
]
4586
···
4962
4963
[[package]]
4964
name = "self_cell"
4965
-
version = "1.1.0"
4966
source = "registry+https://github.com/rust-lang/crates.io-index"
4967
-
checksum = "c2fdfc24bc566f839a2da4c4295b82db7d25a24253867d5c64355abb5799bdbe"
4968
4969
[[package]]
4970
name = "semver"
···
4984
4985
[[package]]
4986
name = "serde_bytes"
4987
-
version = "0.11.17"
4988
source = "registry+https://github.com/rust-lang/crates.io-index"
4989
-
checksum = "8437fd221bde2d4ca316d61b90e337e9e702b3820b87d63caa9ba6c02bd06d96"
4990
dependencies = [
4991
"serde",
4992
]
4993
4994
[[package]]
···
5036
]
5037
5038
[[package]]
5039
name = "serde_json"
5040
version = "1.0.145"
5041
source = "registry+https://github.com/rust-lang/crates.io-index"
···
5068
"percent-encoding",
5069
"ryu",
5070
"serde",
5071
-
"thiserror 2.0.16",
5072
]
5073
5074
[[package]]
···
5157
]
5158
5159
[[package]]
5160
name = "sha1"
5161
version = "0.10.6"
5162
source = "registry+https://github.com/rust-lang/crates.io-index"
···
5220
dependencies = [
5221
"num-bigint",
5222
"num-traits",
5223
-
"thiserror 2.0.16",
5224
"time",
5225
]
5226
···
5262
"rustls 0.23.31",
5263
"serde",
5264
"serde_json",
5265
-
"thiserror 2.0.16",
5266
"time",
5267
"tokio",
5268
"tokio-util",
···
5355
name = "spacedust"
5356
version = "0.1.0"
5357
dependencies = [
5358
"async-trait",
5359
"clap",
5360
"ctrlc",
5361
"dropshot",
5362
"env_logger",
5363
"futures",
5364
"http",
5365
"jetstream",
5366
"links",
5367
"log",
5368
"metrics",
5369
"metrics-exporter-prometheus 0.17.2",
5370
"rand 0.9.1",
5371
"schemars",
5372
"semver",
5373
"serde",
5374
"serde_json",
5375
"serde_qs",
5376
-
"thiserror 2.0.16",
5377
"tinyjson",
5378
"tokio",
5379
"tokio-tungstenite 0.27.0",
···
5506
5507
[[package]]
5508
name = "tempfile"
5509
-
version = "3.19.1"
5510
source = "registry+https://github.com/rust-lang/crates.io-index"
5511
-
checksum = "7437ac7763b9b123ccf33c338a5cc1bac6f69b45a136c19bdd8a65e3916435bf"
5512
dependencies = [
5513
"fastrand",
5514
"getrandom 0.3.3",
···
5539
5540
[[package]]
5541
name = "thiserror"
5542
-
version = "2.0.16"
5543
source = "registry+https://github.com/rust-lang/crates.io-index"
5544
-
checksum = "3467d614147380f2e4e374161426ff399c91084acd2363eaf549172b3d5e60c0"
5545
dependencies = [
5546
-
"thiserror-impl 2.0.16",
5547
]
5548
5549
[[package]]
···
5559
5560
[[package]]
5561
name = "thiserror-impl"
5562
-
version = "2.0.16"
5563
source = "registry+https://github.com/rust-lang/crates.io-index"
5564
-
checksum = "6c5e1be1c48b9172ee610da68fd9cd2770e7a4056cb3fc98710ee6906f0c7960"
5565
dependencies = [
5566
"proc-macro2",
5567
"quote",
···
5993
"native-tls",
5994
"rand 0.9.1",
5995
"sha1",
5996
-
"thiserror 2.0.16",
5997
"url",
5998
"utf-8",
5999
]
···
6011
"log",
6012
"rand 0.9.1",
6013
"sha1",
6014
-
"thiserror 2.0.16",
6015
"utf-8",
6016
]
6017
···
6054
"http",
6055
"jetstream",
6056
"log",
6057
-
"lsm-tree",
6058
"metrics",
6059
"metrics-exporter-prometheus 0.17.2",
6060
"schemars",
···
6064
"serde_qs",
6065
"sha2",
6066
"tempfile",
6067
-
"thiserror 2.0.16",
6068
"tikv-jemallocator",
6069
"tokio",
6070
"tokio-util",
···
6117
6118
[[package]]
6119
name = "unsigned-varint"
6120
version = "0.8.0"
6121
source = "registry+https://github.com/rust-lang/crates.io-index"
6122
checksum = "eb066959b24b5196ae73cb057f45598450d2c5f71460e98c49b738086eff9c06"
···
6193
checksum = "62fc7c4ce161f049607ecea654dca3f2d727da5371ae85e2e4f14ce2b98ed67c"
6194
dependencies = [
6195
"byteorder",
6196
-
"byteview",
6197
"interval-heap",
6198
"log",
6199
"path-absolutize",
···
6342
]
6343
6344
[[package]]
6345
name = "web-sys"
6346
version = "0.3.77"
6347
source = "registry+https://github.com/rust-lang/crates.io-index"
···
6400
"reqwest",
6401
"serde",
6402
"serde_json",
6403
-
"thiserror 2.0.16",
6404
"tokio",
6405
"tokio-util",
6406
"url",
···
6440
source = "registry+https://github.com/rust-lang/crates.io-index"
6441
checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb"
6442
dependencies = [
6443
-
"windows-sys 0.48.0",
6444
]
6445
6446
[[package]]
···
6758
"nom",
6759
"oid-registry",
6760
"rusticata-macros",
6761
-
"thiserror 2.0.16",
6762
"time",
6763
]
6764
···
112
113
[[package]]
114
name = "anyhow"
115
+
version = "1.0.100"
116
source = "registry+https://github.com/rust-lang/crates.io-index"
117
+
checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61"
118
119
[[package]]
120
name = "arbitrary"
···
127
version = "1.7.1"
128
source = "registry+https://github.com/rust-lang/crates.io-index"
129
checksum = "69f7f8c3906b62b754cd5326047894316021dcfe5a194c8ea52bdd94934a3457"
130
+
131
+
[[package]]
132
+
name = "arrayref"
133
+
version = "0.3.9"
134
+
source = "registry+https://github.com/rust-lang/crates.io-index"
135
+
checksum = "76a2e8124351fda1ef8aaaa3bbd7ebbcb486bbcd4225aca0aa0d84bb2db8fecb"
136
137
[[package]]
138
name = "arrayvec"
···
198
"nom",
199
"num-traits",
200
"rusticata-macros",
201
+
"thiserror 2.0.17",
202
"time",
203
]
204
···
650
"axum",
651
"handlebars",
652
"serde",
653
+
"thiserror 2.0.17",
654
]
655
656
[[package]]
···
679
version = "0.2.0"
680
source = "registry+https://github.com/rust-lang/crates.io-index"
681
checksum = "4c7f02d4ea65f2c1853089ffd8d2787bdbc63de2f0d29dedbcf8ccdfa0ccd4cf"
682
+
683
+
[[package]]
684
+
name = "base256emoji"
685
+
version = "1.0.2"
686
+
source = "registry+https://github.com/rust-lang/crates.io-index"
687
+
checksum = "b5e9430d9a245a77c92176e649af6e275f20839a48389859d1661e9a128d077c"
688
+
dependencies = [
689
+
"const-str",
690
+
"match-lookup",
691
+
]
692
693
[[package]]
694
name = "base64"
···
828
checksum = "5c8214115b7bf84099f1309324e63141d4c5d7cc26862f97a0a857dbefe165bd"
829
830
[[package]]
831
+
name = "blake3"
832
+
version = "1.8.2"
833
+
source = "registry+https://github.com/rust-lang/crates.io-index"
834
+
checksum = "3888aaa89e4b2a40fca9848e400f6a658a5a3978de7be858e209cafa8be9a4a0"
835
+
dependencies = [
836
+
"arrayref",
837
+
"arrayvec",
838
+
"cc",
839
+
"cfg-if",
840
+
"constant_time_eq",
841
+
]
842
+
843
+
[[package]]
844
name = "block-buffer"
845
version = "0.10.4"
846
source = "registry+https://github.com/rust-lang/crates.io-index"
···
868
checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
869
870
[[package]]
871
+
name = "byteorder-lite"
872
+
version = "0.1.0"
873
+
source = "registry+https://github.com/rust-lang/crates.io-index"
874
+
checksum = "8f1fe948ff07f4bd06c30984e69f5b4899c516a3ef74f34df92a2df2ab535495"
875
+
876
+
[[package]]
877
name = "bytes"
878
version = "1.10.1"
879
source = "registry+https://github.com/rust-lang/crates.io-index"
···
886
checksum = "6236364b88b9b6d0bc181ba374cf1ab55ba3ef97a1cb6f8cddad48a273767fb5"
887
888
[[package]]
889
+
name = "byteview"
890
+
version = "0.8.0"
891
+
source = "registry+https://github.com/rust-lang/crates.io-index"
892
+
checksum = "1e6b0e42e210b794e14b152c6fe1a55831e30ef4a0f5dc39d73d714fb5f1906c"
893
+
894
+
[[package]]
895
name = "bzip2-sys"
896
version = "0.1.13+1.0.8"
897
source = "registry+https://github.com/rust-lang/crates.io-index"
···
931
"enum_dispatch",
932
"serde",
933
]
934
+
935
+
[[package]]
936
+
name = "cbor4ii"
937
+
version = "0.2.14"
938
+
source = "registry+https://github.com/rust-lang/crates.io-index"
939
+
checksum = "b544cf8c89359205f4f990d0e6f3828db42df85b5dac95d09157a250eb0749c4"
940
+
dependencies = [
941
+
"serde",
942
+
]
943
+
944
+
[[package]]
945
+
name = "cbor4ii"
946
+
version = "1.2.0"
947
+
source = "registry+https://github.com/rust-lang/crates.io-index"
948
+
checksum = "b28d2802395e3bccd95cc4ae984bff7444b6c1f5981da46a41360c42a2c7e2d9"
949
950
[[package]]
951
name = "cc"
···
1032
"multihash",
1033
"serde",
1034
"serde_bytes",
1035
+
"unsigned-varint 0.8.0",
1036
]
1037
1038
[[package]]
···
1048
1049
[[package]]
1050
name = "clap"
1051
+
version = "4.5.48"
1052
source = "registry+https://github.com/rust-lang/crates.io-index"
1053
+
checksum = "e2134bb3ea021b78629caa971416385309e0131b351b25e01dc16fb54e1b5fae"
1054
dependencies = [
1055
"clap_builder",
1056
"clap_derive",
···
1058
1059
[[package]]
1060
name = "clap_builder"
1061
+
version = "4.5.48"
1062
source = "registry+https://github.com/rust-lang/crates.io-index"
1063
+
checksum = "c2ba64afa3c0a6df7fa517765e31314e983f51dda798ffba27b988194fb65dc9"
1064
dependencies = [
1065
"anstream",
1066
"anstyle",
···
1141
version = "0.9.6"
1142
source = "registry+https://github.com/rust-lang/crates.io-index"
1143
checksum = "c2459377285ad874054d797f3ccebf984978aa39129f6eafde5cdc8315b612f8"
1144
+
1145
+
[[package]]
1146
+
name = "const-str"
1147
+
version = "0.4.3"
1148
+
source = "registry+https://github.com/rust-lang/crates.io-index"
1149
+
checksum = "2f421161cb492475f1661ddc9815a745a1c894592070661180fdec3d4872e9c3"
1150
+
1151
+
[[package]]
1152
+
name = "constant_time_eq"
1153
+
version = "0.3.1"
1154
+
source = "registry+https://github.com/rust-lang/crates.io-index"
1155
+
checksum = "7c74b8349d32d297c9134b8c88677813a227df8f779daa29bfc29c183fe3dca6"
1156
1157
[[package]]
1158
name = "constellation"
···
1421
]
1422
1423
[[package]]
1424
+
name = "dasl"
1425
+
version = "0.2.0"
1426
+
source = "registry+https://github.com/rust-lang/crates.io-index"
1427
+
checksum = "b59666035a4386b0fd272bd78da4cbc3ccb558941e97579ab00f0eb4639f2a49"
1428
+
dependencies = [
1429
+
"blake3",
1430
+
"cbor4ii 1.2.0",
1431
+
"data-encoding",
1432
+
"data-encoding-macro",
1433
+
"scopeguard",
1434
+
"serde",
1435
+
"serde_bytes",
1436
+
"sha2",
1437
+
"thiserror 2.0.17",
1438
+
]
1439
+
1440
+
[[package]]
1441
name = "data-encoding"
1442
+
version = "2.9.0"
1443
source = "registry+https://github.com/rust-lang/crates.io-index"
1444
+
checksum = "2a2330da5de22e8a3cb63252ce2abb30116bf5265e89c0e01bc17015ce30a476"
1445
1446
[[package]]
1447
name = "data-encoding-macro"
1448
+
version = "0.1.18"
1449
source = "registry+https://github.com/rust-lang/crates.io-index"
1450
+
checksum = "47ce6c96ea0102f01122a185683611bd5ac8d99e62bc59dd12e6bda344ee673d"
1451
dependencies = [
1452
"data-encoding",
1453
"data-encoding-macro-internal",
···
1455
1456
[[package]]
1457
name = "data-encoding-macro-internal"
1458
+
version = "0.1.16"
1459
source = "registry+https://github.com/rust-lang/crates.io-index"
1460
+
checksum = "8d162beedaa69905488a8da94f5ac3edb4dd4788b732fadb7bd120b2625c1976"
1461
dependencies = [
1462
"data-encoding",
1463
+
"syn 2.0.106",
1464
]
1465
1466
[[package]]
···
1664
"slog-bunyan",
1665
"slog-json",
1666
"slog-term",
1667
+
"thiserror 2.0.17",
1668
"tokio",
1669
"tokio-rustls 0.25.0",
1670
"toml 0.9.7",
···
1868
checksum = "0b25ad44cd4360a0448a9b5a0a6f1c7a621101cca4578706d43c9a821418aebc"
1869
dependencies = [
1870
"byteorder",
1871
+
"byteview 0.6.1",
1872
"dashmap",
1873
"log",
1874
+
"lsm-tree 2.10.4",
1875
"path-absolutize",
1876
"std-semaphore",
1877
"tempfile",
···
1884
source = "git+https://github.com/fjall-rs/fjall.git#42d811f7c8cc9004407d520d37d2a1d8d246c03d"
1885
dependencies = [
1886
"byteorder",
1887
+
"byteview 0.6.1",
1888
"dashmap",
1889
"log",
1890
+
"lsm-tree 2.10.4",
1891
"path-absolutize",
1892
"std-semaphore",
1893
"tempfile",
···
1895
]
1896
1897
[[package]]
1898
+
name = "fjall"
1899
+
version = "3.0.0-pre.0"
1900
+
source = "registry+https://github.com/rust-lang/crates.io-index"
1901
+
checksum = "467588c1f15d1cfa9e43f02a45cf55d82fa1f12a6ae961b848c520458525600c"
1902
+
dependencies = [
1903
+
"byteorder-lite",
1904
+
"byteview 0.8.0",
1905
+
"dashmap",
1906
+
"log",
1907
+
"lsm-tree 3.0.0-pre.0",
1908
+
"std-semaphore",
1909
+
"tempfile",
1910
+
"xxhash-rust",
1911
+
]
1912
+
1913
+
[[package]]
1914
name = "flate2"
1915
version = "1.1.2"
1916
source = "registry+https://github.com/rust-lang/crates.io-index"
···
1992
"mixtrics",
1993
"pin-project",
1994
"serde",
1995
+
"thiserror 2.0.17",
1996
"tokio",
1997
"tracing",
1998
]
···
2012
"parking_lot",
2013
"pin-project",
2014
"serde",
2015
+
"thiserror 2.0.17",
2016
"tokio",
2017
"twox-hash",
2018
]
···
2045
"parking_lot",
2046
"pin-project",
2047
"serde",
2048
+
"thiserror 2.0.17",
2049
"tokio",
2050
"tracing",
2051
]
···
2077
"pin-project",
2078
"rand 0.9.1",
2079
"serde",
2080
+
"thiserror 2.0.17",
2081
"tokio",
2082
"tracing",
2083
"twox-hash",
···
2321
"pest_derive",
2322
"serde",
2323
"serde_json",
2324
+
"thiserror 2.0.17",
2325
"walkdir",
2326
]
2327
···
2446
"once_cell",
2447
"rand 0.9.1",
2448
"ring",
2449
+
"thiserror 2.0.17",
2450
"tinyvec",
2451
"tokio",
2452
"tracing",
···
2469
"rand 0.9.1",
2470
"resolv-conf",
2471
"smallvec",
2472
+
"thiserror 2.0.17",
2473
"tokio",
2474
"tracing",
2475
]
···
2901
]
2902
2903
[[package]]
2904
+
name = "iroh-car"
2905
+
version = "0.5.1"
2906
+
source = "registry+https://github.com/rust-lang/crates.io-index"
2907
+
checksum = "cb7f8cd4cb9aa083fba8b52e921764252d0b4dcb1cd6d120b809dbfe1106e81a"
2908
+
dependencies = [
2909
+
"anyhow",
2910
+
"cid",
2911
+
"futures",
2912
+
"serde",
2913
+
"serde_ipld_dagcbor",
2914
+
"thiserror 1.0.69",
2915
+
"tokio",
2916
+
"unsigned-varint 0.7.2",
2917
+
]
2918
+
2919
+
[[package]]
2920
name = "is-terminal"
2921
version = "0.4.16"
2922
source = "registry+https://github.com/rust-lang/crates.io-index"
···
2980
"metrics",
2981
"serde",
2982
"serde_json",
2983
+
"thiserror 2.0.17",
2984
"tokio",
2985
"tokio-tungstenite 0.26.2",
2986
"url",
···
3162
checksum = "fc2f4eb4bc735547cfed7c0a4922cbd04a4655978c09b54f1f7b228750664c34"
3163
dependencies = [
3164
"cfg-if",
3165
+
"windows-targets 0.52.6",
3166
]
3167
3168
[[package]]
···
3233
version = "0.1.0"
3234
dependencies = [
3235
"anyhow",
3236
+
"dasl",
3237
"fluent-uri",
3238
"nom",
3239
+
"serde",
3240
+
"thiserror 2.0.17",
3241
"tinyjson",
3242
]
3243
···
3305
3306
[[package]]
3307
name = "lsm-tree"
3308
+
version = "2.10.4"
3309
source = "registry+https://github.com/rust-lang/crates.io-index"
3310
+
checksum = "799399117a2bfb37660e08be33f470958babb98386b04185288d829df362ea15"
3311
dependencies = [
3312
"byteorder",
3313
"crossbeam-skiplist",
···
3328
]
3329
3330
[[package]]
3331
+
name = "lsm-tree"
3332
+
version = "3.0.0-pre.0"
3333
+
source = "registry+https://github.com/rust-lang/crates.io-index"
3334
+
checksum = "be375d45e348328e78582dffbda4f1709dd52fca27c1a81c7bf6ca134e6335f7"
3335
+
dependencies = [
3336
+
"byteorder-lite",
3337
+
"byteview 0.8.0",
3338
+
"crossbeam-skiplist",
3339
+
"enum_dispatch",
3340
+
"interval-heap",
3341
+
"log",
3342
+
"lz4_flex",
3343
+
"quick_cache",
3344
+
"rustc-hash 2.1.1",
3345
+
"self_cell",
3346
+
"sfa",
3347
+
"tempfile",
3348
+
"varint-rs",
3349
+
"xxhash-rust",
3350
+
]
3351
+
3352
+
[[package]]
3353
name = "lz4"
3354
version = "1.28.1"
3355
source = "registry+https://github.com/rust-lang/crates.io-index"
···
3370
3371
[[package]]
3372
name = "lz4_flex"
3373
+
version = "0.11.5"
3374
source = "registry+https://github.com/rust-lang/crates.io-index"
3375
+
checksum = "08ab2867e3eeeca90e844d1940eab391c9dc5228783db2ed999acbc0a9ed375a"
3376
3377
[[package]]
3378
name = "mach2"
···
3438
]
3439
3440
[[package]]
3441
+
name = "match-lookup"
3442
+
version = "0.1.1"
3443
+
source = "registry+https://github.com/rust-lang/crates.io-index"
3444
+
checksum = "1265724d8cb29dbbc2b0f06fffb8bf1a8c0cf73a78eede9ba73a4a66c52a981e"
3445
+
dependencies = [
3446
+
"proc-macro2",
3447
+
"quote",
3448
+
"syn 1.0.109",
3449
+
]
3450
+
3451
+
[[package]]
3452
name = "match_cfg"
3453
version = "0.1.0"
3454
source = "registry+https://github.com/rust-lang/crates.io-index"
···
3536
"metrics",
3537
"metrics-util 0.20.0",
3538
"quanta",
3539
+
"thiserror 2.0.17",
3540
"tokio",
3541
"tracing",
3542
]
···
3683
3684
[[package]]
3685
name = "multibase"
3686
+
version = "0.9.2"
3687
source = "registry+https://github.com/rust-lang/crates.io-index"
3688
+
checksum = "8694bb4835f452b0e3bb06dbebb1d6fc5385b6ca1caf2e55fd165c042390ec77"
3689
dependencies = [
3690
"base-x",
3691
+
"base256emoji",
3692
"data-encoding",
3693
"data-encoding-macro",
3694
]
···
3701
dependencies = [
3702
"core2",
3703
"serde",
3704
+
"unsigned-varint 0.8.0",
3705
]
3706
3707
[[package]]
···
4079
checksum = "1db05f56d34358a8b1066f67cbb203ee3e7ed2ba674a6263a1d5ec6db2204323"
4080
dependencies = [
4081
"memchr",
4082
+
"thiserror 2.0.17",
4083
"ucd-trie",
4084
]
4085
···
4189
"rusqlite",
4190
"serde",
4191
"serde_json",
4192
+
"thiserror 2.0.17",
4193
"tokio",
4194
"tracing-subscriber",
4195
]
···
4232
"smallvec",
4233
"sync_wrapper",
4234
"tempfile",
4235
+
"thiserror 2.0.17",
4236
"tokio",
4237
"tokio-rustls 0.26.2",
4238
"tokio-stream",
···
4276
"serde_json",
4277
"serde_urlencoded",
4278
"serde_yaml",
4279
+
"thiserror 2.0.17",
4280
"tokio",
4281
]
4282
···
4295
"quote",
4296
"regex",
4297
"syn 2.0.106",
4298
+
"thiserror 2.0.17",
4299
]
4300
4301
[[package]]
···
4422
4423
[[package]]
4424
name = "quick_cache"
4425
+
version = "0.6.16"
4426
source = "registry+https://github.com/rust-lang/crates.io-index"
4427
+
checksum = "9ad6644cb07b7f3488b9f3d2fde3b4c0a7fa367cafefb39dff93a659f76eb786"
4428
dependencies = [
4429
"equivalent",
4430
"hashbrown 0.15.2",
···
4444
"rustc-hash 2.1.1",
4445
"rustls 0.23.31",
4446
"socket2 0.5.9",
4447
+
"thiserror 2.0.17",
4448
"tokio",
4449
"tracing",
4450
"web-time",
···
4465
"rustls 0.23.31",
4466
"rustls-pki-types",
4467
"slab",
4468
+
"thiserror 2.0.17",
4469
"tinyvec",
4470
"tracing",
4471
"web-time",
···
4691
checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c"
4692
4693
[[package]]
4694
+
name = "repo-stream"
4695
+
version = "0.2.2"
4696
+
source = "registry+https://github.com/rust-lang/crates.io-index"
4697
+
checksum = "093b48e604c138949bf3d4a1a9bc1165feb1db28a73af0101c84eb703d279f43"
4698
+
dependencies = [
4699
+
"bincode 2.0.1",
4700
+
"futures",
4701
+
"futures-core",
4702
+
"ipld-core",
4703
+
"iroh-car",
4704
+
"log",
4705
+
"multibase",
4706
+
"rusqlite",
4707
+
"serde",
4708
+
"serde_bytes",
4709
+
"serde_ipld_dagcbor",
4710
+
"sha2",
4711
+
"thiserror 2.0.17",
4712
+
"tokio",
4713
+
]
4714
+
4715
+
[[package]]
4716
name = "reqwest"
4717
+
version = "0.12.24"
4718
source = "registry+https://github.com/rust-lang/crates.io-index"
4719
+
checksum = "9d0946410b9f7b082a427e4ef5c8ff541a88b357bc6c637c40db3a68ac70a36f"
4720
dependencies = [
4721
"async-compression",
4722
"base64 0.22.1",
···
4756
"url",
4757
"wasm-bindgen",
4758
"wasm-bindgen-futures",
4759
+
"wasm-streams",
4760
"web-sys",
4761
]
4762
···
5138
5139
[[package]]
5140
name = "self_cell"
5141
+
version = "1.2.0"
5142
source = "registry+https://github.com/rust-lang/crates.io-index"
5143
+
checksum = "0f7d95a54511e0c7be3f51e8867aa8cf35148d7b9445d44de2f943e2b206e749"
5144
5145
[[package]]
5146
name = "semver"
···
5160
5161
[[package]]
5162
name = "serde_bytes"
5163
+
version = "0.11.19"
5164
source = "registry+https://github.com/rust-lang/crates.io-index"
5165
+
checksum = "a5d440709e79d88e51ac01c4b72fc6cb7314017bb7da9eeff678aa94c10e3ea8"
5166
dependencies = [
5167
"serde",
5168
+
"serde_core",
5169
]
5170
5171
[[package]]
···
5213
]
5214
5215
[[package]]
5216
+
name = "serde_ipld_dagcbor"
5217
+
version = "0.6.4"
5218
+
source = "registry+https://github.com/rust-lang/crates.io-index"
5219
+
checksum = "46182f4f08349a02b45c998ba3215d3f9de826246ba02bb9dddfe9a2a2100778"
5220
+
dependencies = [
5221
+
"cbor4ii 0.2.14",
5222
+
"ipld-core",
5223
+
"scopeguard",
5224
+
"serde",
5225
+
]
5226
+
5227
+
[[package]]
5228
name = "serde_json"
5229
version = "1.0.145"
5230
source = "registry+https://github.com/rust-lang/crates.io-index"
···
5257
"percent-encoding",
5258
"ryu",
5259
"serde",
5260
+
"thiserror 2.0.17",
5261
]
5262
5263
[[package]]
···
5346
]
5347
5348
[[package]]
5349
+
name = "sfa"
5350
+
version = "0.0.1"
5351
+
source = "registry+https://github.com/rust-lang/crates.io-index"
5352
+
checksum = "e5f5f9dc21f55409f15103d5a7e7601b804935923c7fe4746dc806c3a422a038"
5353
+
dependencies = [
5354
+
"byteorder-lite",
5355
+
"log",
5356
+
"xxhash-rust",
5357
+
]
5358
+
5359
+
[[package]]
5360
name = "sha1"
5361
version = "0.10.6"
5362
source = "registry+https://github.com/rust-lang/crates.io-index"
···
5420
dependencies = [
5421
"num-bigint",
5422
"num-traits",
5423
+
"thiserror 2.0.17",
5424
"time",
5425
]
5426
···
5462
"rustls 0.23.31",
5463
"serde",
5464
"serde_json",
5465
+
"thiserror 2.0.17",
5466
"time",
5467
"tokio",
5468
"tokio-util",
···
5555
name = "spacedust"
5556
version = "0.1.0"
5557
dependencies = [
5558
+
"anyhow",
5559
+
"async-channel",
5560
"async-trait",
5561
"clap",
5562
"ctrlc",
5563
+
"dasl",
5564
"dropshot",
5565
"env_logger",
5566
+
"fjall 3.0.0-pre.0",
5567
"futures",
5568
"http",
5569
+
"ipld-core",
5570
"jetstream",
5571
"links",
5572
"log",
5573
"metrics",
5574
"metrics-exporter-prometheus 0.17.2",
5575
"rand 0.9.1",
5576
+
"repo-stream",
5577
+
"reqwest",
5578
"schemars",
5579
"semver",
5580
"serde",
5581
+
"serde_ipld_dagcbor",
5582
"serde_json",
5583
"serde_qs",
5584
+
"thiserror 2.0.17",
5585
"tinyjson",
5586
"tokio",
5587
"tokio-tungstenite 0.27.0",
···
5714
5715
[[package]]
5716
name = "tempfile"
5717
+
version = "3.23.0"
5718
source = "registry+https://github.com/rust-lang/crates.io-index"
5719
+
checksum = "2d31c77bdf42a745371d260a26ca7163f1e0924b64afa0b688e61b5a9fa02f16"
5720
dependencies = [
5721
"fastrand",
5722
"getrandom 0.3.3",
···
5747
5748
[[package]]
5749
name = "thiserror"
5750
+
version = "2.0.17"
5751
source = "registry+https://github.com/rust-lang/crates.io-index"
5752
+
checksum = "f63587ca0f12b72a0600bcba1d40081f830876000bb46dd2337a3051618f4fc8"
5753
dependencies = [
5754
+
"thiserror-impl 2.0.17",
5755
]
5756
5757
[[package]]
···
5767
5768
[[package]]
5769
name = "thiserror-impl"
5770
+
version = "2.0.17"
5771
source = "registry+https://github.com/rust-lang/crates.io-index"
5772
+
checksum = "3ff15c8ecd7de3849db632e14d18d2571fa09dfc5ed93479bc4485c7a517c913"
5773
dependencies = [
5774
"proc-macro2",
5775
"quote",
···
6201
"native-tls",
6202
"rand 0.9.1",
6203
"sha1",
6204
+
"thiserror 2.0.17",
6205
"url",
6206
"utf-8",
6207
]
···
6219
"log",
6220
"rand 0.9.1",
6221
"sha1",
6222
+
"thiserror 2.0.17",
6223
"utf-8",
6224
]
6225
···
6262
"http",
6263
"jetstream",
6264
"log",
6265
+
"lsm-tree 2.10.4",
6266
"metrics",
6267
"metrics-exporter-prometheus 0.17.2",
6268
"schemars",
···
6272
"serde_qs",
6273
"sha2",
6274
"tempfile",
6275
+
"thiserror 2.0.17",
6276
"tikv-jemallocator",
6277
"tokio",
6278
"tokio-util",
···
6325
6326
[[package]]
6327
name = "unsigned-varint"
6328
+
version = "0.7.2"
6329
+
source = "registry+https://github.com/rust-lang/crates.io-index"
6330
+
checksum = "6889a77d49f1f013504cec6bf97a2c730394adedaeb1deb5ea08949a50541105"
6331
+
6332
+
[[package]]
6333
+
name = "unsigned-varint"
6334
version = "0.8.0"
6335
source = "registry+https://github.com/rust-lang/crates.io-index"
6336
checksum = "eb066959b24b5196ae73cb057f45598450d2c5f71460e98c49b738086eff9c06"
···
6407
checksum = "62fc7c4ce161f049607ecea654dca3f2d727da5371ae85e2e4f14ce2b98ed67c"
6408
dependencies = [
6409
"byteorder",
6410
+
"byteview 0.6.1",
6411
"interval-heap",
6412
"log",
6413
"path-absolutize",
···
6556
]
6557
6558
[[package]]
6559
+
name = "wasm-streams"
6560
+
version = "0.4.2"
6561
+
source = "registry+https://github.com/rust-lang/crates.io-index"
6562
+
checksum = "15053d8d85c7eccdbefef60f06769760a563c7f0a9d6902a13d35c7800b0ad65"
6563
+
dependencies = [
6564
+
"futures-util",
6565
+
"js-sys",
6566
+
"wasm-bindgen",
6567
+
"wasm-bindgen-futures",
6568
+
"web-sys",
6569
+
]
6570
+
6571
+
[[package]]
6572
name = "web-sys"
6573
version = "0.3.77"
6574
source = "registry+https://github.com/rust-lang/crates.io-index"
···
6627
"reqwest",
6628
"serde",
6629
"serde_json",
6630
+
"thiserror 2.0.17",
6631
"tokio",
6632
"tokio-util",
6633
"url",
···
6667
source = "registry+https://github.com/rust-lang/crates.io-index"
6668
checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb"
6669
dependencies = [
6670
+
"windows-sys 0.59.0",
6671
]
6672
6673
[[package]]
···
6985
"nom",
6986
"oid-registry",
6987
"rusticata-macros",
6988
+
"thiserror 2.0.17",
6989
"time",
6990
]
6991
+29
-10
constellation/src/bin/main.rs
+29
-10
constellation/src/bin/main.rs
···
54
/// Saved jsonl from jetstream to use instead of a live subscription
55
#[arg(short, long)]
56
fixture: Option<PathBuf>,
57
}
58
59
#[derive(Debug, Clone, ValueEnum)]
···
115
rocks.start_backup(backup_dir, auto_backup, stay_alive.clone())?;
116
}
117
println!("rocks ready.");
118
-
run(
119
-
rocks,
120
-
fixture,
121
-
args.data,
122
-
stream,
123
-
bind,
124
-
metrics_bind,
125
-
stay_alive,
126
-
)
127
}
128
}
129
}
···
213
214
'monitor: loop {
215
match readable.get_stats() {
216
-
Ok(StorageStats { dids, targetables, linking_records }) => {
217
metrics::gauge!("storage.stats.dids").set(dids as f64);
218
metrics::gauge!("storage.stats.targetables").set(targetables as f64);
219
metrics::gauge!("storage.stats.linking_records").set(linking_records as f64);
···
54
/// Saved jsonl from jetstream to use instead of a live subscription
55
#[arg(short, long)]
56
fixture: Option<PathBuf>,
57
+
/// run a scan across the target id table and write all key -> ids to id -> keys
58
+
#[arg(long, action)]
59
+
repair_target_ids: bool,
60
}
61
62
#[derive(Debug, Clone, ValueEnum)]
···
118
rocks.start_backup(backup_dir, auto_backup, stay_alive.clone())?;
119
}
120
println!("rocks ready.");
121
+
std::thread::scope(|s| {
122
+
if args.repair_target_ids {
123
+
let rocks = rocks.clone();
124
+
let stay_alive = stay_alive.clone();
125
+
s.spawn(move || {
126
+
let rep = rocks.run_repair(time::Duration::from_millis(0), stay_alive);
127
+
eprintln!("repair finished: {rep:?}");
128
+
rep
129
+
});
130
+
}
131
+
s.spawn(|| {
132
+
let r = run(
133
+
rocks,
134
+
fixture,
135
+
args.data,
136
+
stream,
137
+
bind,
138
+
metrics_bind,
139
+
stay_alive,
140
+
);
141
+
eprintln!("run finished: {r:?}");
142
+
r
143
+
});
144
+
});
145
+
Ok(())
146
}
147
}
148
}
···
232
233
'monitor: loop {
234
match readable.get_stats() {
235
+
Ok(StorageStats { dids, targetables, linking_records, .. }) => {
236
metrics::gauge!("storage.stats.dids").set(dids as f64);
237
metrics::gauge!("storage.stats.targetables").set(targetables as f64);
238
metrics::gauge!("storage.stats.linking_records").set(linking_records as f64);
+8
-6
constellation/src/server/filters.rs
+8
-6
constellation/src/server/filters.rs
···
5
Ok({
6
if let Some(link) = parse_any_link(s) {
7
match link {
8
-
Link::AtUri(at_uri) => at_uri.strip_prefix("at://").map(|noproto| {
9
-
format!("https://atproto-browser-plus-links.vercel.app/at/{noproto}")
10
-
}),
11
-
Link::Did(did) => Some(format!(
12
-
"https://atproto-browser-plus-links.vercel.app/at/{did}"
13
-
)),
14
Link::Uri(uri) => Some(uri),
15
}
16
} else {
···
22
pub fn human_number(n: &u64) -> askama::Result<String> {
23
Ok(n.to_formatted_string(&Locale::en))
24
}
···
5
Ok({
6
if let Some(link) = parse_any_link(s) {
7
match link {
8
+
Link::AtUri(at_uri) => at_uri
9
+
.strip_prefix("at://")
10
+
.map(|noproto| format!("https://pdsls.dev/at://{noproto}")),
11
+
Link::Did(did) => Some(format!("https://pdsls.dev/at://{did}")),
12
Link::Uri(uri) => Some(uri),
13
}
14
} else {
···
20
pub fn human_number(n: &u64) -> askama::Result<String> {
21
Ok(n.to_formatted_string(&Locale::en))
22
}
23
+
24
+
pub fn to_u64(n: usize) -> askama::Result<u64> {
25
+
Ok(n as u64)
26
+
}
+289
-18
constellation/src/server/mod.rs
+289
-18
constellation/src/server/mod.rs
···
14
use std::collections::{HashMap, HashSet};
15
use std::time::{Duration, UNIX_EPOCH};
16
use tokio::net::{TcpListener, ToSocketAddrs};
17
-
use tokio::task::block_in_place;
18
use tokio_util::sync::CancellationToken;
19
20
use crate::storage::{LinkReader, StorageStats};
···
28
const DEFAULT_CURSOR_LIMIT: u64 = 16;
29
const DEFAULT_CURSOR_LIMIT_MAX: u64 = 100;
30
31
-
const INDEX_BEGAN_AT_TS: u64 = 1738083600; // TODO: not this
32
33
pub async fn serve<S, A>(store: S, addr: A, stay_alive: CancellationToken) -> anyhow::Result<()>
34
where
···
41
"/",
42
get({
43
let store = store.clone();
44
-
move |accept| async { block_in_place(|| hello(accept, store)) }
45
}),
46
)
47
.route(
48
"/links/count",
49
get({
50
let store = store.clone();
51
-
move |accept, query| async { block_in_place(|| count_links(accept, query, store)) }
52
}),
53
)
54
.route(
···
56
get({
57
let store = store.clone();
58
move |accept, query| async {
59
-
block_in_place(|| count_distinct_dids(accept, query, store))
60
}
61
}),
62
)
···
64
"/links",
65
get({
66
let store = store.clone();
67
-
move |accept, query| async { block_in_place(|| get_links(accept, query, store)) }
68
}),
69
)
70
.route(
···
72
get({
73
let store = store.clone();
74
move |accept, query| async {
75
-
block_in_place(|| get_distinct_dids(accept, query, store))
76
}
77
}),
78
)
···
82
get({
83
let store = store.clone();
84
move |accept, query| async {
85
-
block_in_place(|| count_all_links(accept, query, store))
86
}
87
}),
88
)
···
91
get({
92
let store = store.clone();
93
move |accept, query| async {
94
-
block_in_place(|| explore_links(accept, query, store))
95
}
96
}),
97
)
···
150
#[template(path = "hello.html.j2")]
151
struct HelloReponse {
152
help: &'static str,
153
-
days_indexed: u64,
154
stats: StorageStats,
155
}
156
fn hello(
···
160
let stats = store
161
.get_stats()
162
.map_err(|_| http::StatusCode::INTERNAL_SERVER_ERROR)?;
163
-
let days_indexed = (UNIX_EPOCH + Duration::from_secs(INDEX_BEGAN_AT_TS))
164
-
.elapsed()
165
.map_err(|_| http::StatusCode::INTERNAL_SERVER_ERROR)?
166
-
.as_secs()
167
-
/ 86400;
168
Ok(acceptable(accept, HelloReponse {
169
help: "open this URL in a web browser (or request with Accept: text/html) for information about this API.",
170
days_indexed,
···
173
}
174
175
#[derive(Clone, Deserialize)]
176
struct GetLinksCountQuery {
177
target: String,
178
collection: String,
···
233
}
234
235
#[derive(Clone, Deserialize)]
236
struct GetLinkItemsQuery {
237
target: String,
238
collection: String,
···
251
///
252
/// deprecated: use `did`, which can be repeated multiple times
253
from_dids: Option<String>, // comma separated: gross
254
-
#[serde(default = "get_default_limit")]
255
limit: u64,
256
// TODO: allow reverse (er, forward) order as well
257
-
}
258
-
fn get_default_limit() -> u64 {
259
-
DEFAULT_CURSOR_LIMIT
260
}
261
#[derive(Template, Serialize)]
262
#[template(path = "links.html.j2")]
···
475
OpaqueApiCursor(bincode::DefaultOptions::new().serialize(&item).unwrap())
476
}
477
}
···
14
use std::collections::{HashMap, HashSet};
15
use std::time::{Duration, UNIX_EPOCH};
16
use tokio::net::{TcpListener, ToSocketAddrs};
17
+
use tokio::task::spawn_blocking;
18
use tokio_util::sync::CancellationToken;
19
20
use crate::storage::{LinkReader, StorageStats};
···
28
const DEFAULT_CURSOR_LIMIT: u64 = 16;
29
const DEFAULT_CURSOR_LIMIT_MAX: u64 = 100;
30
31
+
fn get_default_cursor_limit() -> u64 {
32
+
DEFAULT_CURSOR_LIMIT
33
+
}
34
+
35
+
fn to500(e: tokio::task::JoinError) -> http::StatusCode {
36
+
eprintln!("handler error: {e}");
37
+
http::StatusCode::INTERNAL_SERVER_ERROR
38
+
}
39
40
pub async fn serve<S, A>(store: S, addr: A, stay_alive: CancellationToken) -> anyhow::Result<()>
41
where
···
48
"/",
49
get({
50
let store = store.clone();
51
+
move |accept| async {
52
+
spawn_blocking(|| hello(accept, store))
53
+
.await
54
+
.map_err(to500)?
55
+
}
56
+
}),
57
+
)
58
+
.route(
59
+
"/xrpc/blue.microcosm.links.getManyToManyCounts",
60
+
get({
61
+
let store = store.clone();
62
+
move |accept, query| async {
63
+
spawn_blocking(|| get_many_to_many_counts(accept, query, store))
64
+
.await
65
+
.map_err(to500)?
66
+
}
67
}),
68
)
69
.route(
70
"/links/count",
71
get({
72
let store = store.clone();
73
+
move |accept, query| async {
74
+
spawn_blocking(|| count_links(accept, query, store))
75
+
.await
76
+
.map_err(to500)?
77
+
}
78
}),
79
)
80
.route(
···
82
get({
83
let store = store.clone();
84
move |accept, query| async {
85
+
spawn_blocking(|| count_distinct_dids(accept, query, store))
86
+
.await
87
+
.map_err(to500)?
88
+
}
89
+
}),
90
+
)
91
+
.route(
92
+
"/xrpc/blue.microcosm.links.getBacklinks",
93
+
get({
94
+
let store = store.clone();
95
+
move |accept, query| async {
96
+
spawn_blocking(|| get_backlinks(accept, query, store))
97
+
.await
98
+
.map_err(to500)?
99
}
100
}),
101
)
···
103
"/links",
104
get({
105
let store = store.clone();
106
+
move |accept, query| async {
107
+
spawn_blocking(|| get_links(accept, query, store))
108
+
.await
109
+
.map_err(to500)?
110
+
}
111
}),
112
)
113
.route(
···
115
get({
116
let store = store.clone();
117
move |accept, query| async {
118
+
spawn_blocking(|| get_distinct_dids(accept, query, store))
119
+
.await
120
+
.map_err(to500)?
121
}
122
}),
123
)
···
127
get({
128
let store = store.clone();
129
move |accept, query| async {
130
+
spawn_blocking(|| count_all_links(accept, query, store))
131
+
.await
132
+
.map_err(to500)?
133
}
134
}),
135
)
···
138
get({
139
let store = store.clone();
140
move |accept, query| async {
141
+
spawn_blocking(|| explore_links(accept, query, store))
142
+
.await
143
+
.map_err(to500)?
144
}
145
}),
146
)
···
199
#[template(path = "hello.html.j2")]
200
struct HelloReponse {
201
help: &'static str,
202
+
days_indexed: Option<u64>,
203
stats: StorageStats,
204
}
205
fn hello(
···
209
let stats = store
210
.get_stats()
211
.map_err(|_| http::StatusCode::INTERNAL_SERVER_ERROR)?;
212
+
let days_indexed = stats
213
+
.started_at
214
+
.map(|c| (UNIX_EPOCH + Duration::from_micros(c)).elapsed())
215
+
.transpose()
216
.map_err(|_| http::StatusCode::INTERNAL_SERVER_ERROR)?
217
+
.map(|d| d.as_secs() / 86_400);
218
Ok(acceptable(accept, HelloReponse {
219
help: "open this URL in a web browser (or request with Accept: text/html) for information about this API.",
220
days_indexed,
···
223
}
224
225
#[derive(Clone, Deserialize)]
226
+
#[serde(rename_all = "camelCase")]
227
+
struct GetManyToManyCountsQuery {
228
+
subject: String,
229
+
source: String,
230
+
/// path to the secondary link in the linking record
231
+
path_to_other: String,
232
+
/// filter to linking records (join of the m2m) by these DIDs
233
+
#[serde(default)]
234
+
did: Vec<String>,
235
+
/// filter to specific secondary records
236
+
#[serde(default)]
237
+
other_subject: Vec<String>,
238
+
cursor: Option<OpaqueApiCursor>,
239
+
/// Set the max number of links to return per page of results
240
+
#[serde(default = "get_default_cursor_limit")]
241
+
limit: u64,
242
+
}
243
+
#[derive(Serialize)]
244
+
struct OtherSubjectCount {
245
+
subject: String,
246
+
total: u64,
247
+
distinct: u64,
248
+
}
249
+
#[derive(Template, Serialize)]
250
+
#[template(path = "get-many-to-many-counts.html.j2")]
251
+
struct GetManyToManyCountsResponse {
252
+
counts_by_other_subject: Vec<OtherSubjectCount>,
253
+
cursor: Option<OpaqueApiCursor>,
254
+
#[serde(skip_serializing)]
255
+
query: GetManyToManyCountsQuery,
256
+
}
257
+
fn get_many_to_many_counts(
258
+
accept: ExtractAccept,
259
+
query: axum_extra::extract::Query<GetManyToManyCountsQuery>,
260
+
store: impl LinkReader,
261
+
) -> Result<impl IntoResponse, http::StatusCode> {
262
+
let cursor_key = query
263
+
.cursor
264
+
.clone()
265
+
.map(|oc| ApiKeyedCursor::try_from(oc).map_err(|_| http::StatusCode::BAD_REQUEST))
266
+
.transpose()?
267
+
.map(|c| c.next);
268
+
269
+
let limit = query.limit;
270
+
if limit > DEFAULT_CURSOR_LIMIT_MAX {
271
+
return Err(http::StatusCode::BAD_REQUEST);
272
+
}
273
+
274
+
let filter_dids: HashSet<Did> = HashSet::from_iter(
275
+
query
276
+
.did
277
+
.iter()
278
+
.map(|d| d.trim())
279
+
.filter(|d| !d.is_empty())
280
+
.map(|d| Did(d.to_string())),
281
+
);
282
+
283
+
let filter_other_subjects: HashSet<String> = HashSet::from_iter(
284
+
query
285
+
.other_subject
286
+
.iter()
287
+
.map(|s| s.trim().to_string())
288
+
.filter(|s| !s.is_empty()),
289
+
);
290
+
291
+
let Some((collection, path)) = query.source.split_once(':') else {
292
+
return Err(http::StatusCode::BAD_REQUEST);
293
+
};
294
+
let path = format!(".{path}");
295
+
296
+
let path_to_other = format!(".{}", query.path_to_other);
297
+
298
+
let paged = store
299
+
.get_many_to_many_counts(
300
+
&query.subject,
301
+
collection,
302
+
&path,
303
+
&path_to_other,
304
+
limit,
305
+
cursor_key,
306
+
&filter_dids,
307
+
&filter_other_subjects,
308
+
)
309
+
.map_err(|_| http::StatusCode::INTERNAL_SERVER_ERROR)?;
310
+
311
+
let cursor = paged.next.map(|next| ApiKeyedCursor { next }.into());
312
+
313
+
let items = paged
314
+
.items
315
+
.into_iter()
316
+
.map(|(subject, total, distinct)| OtherSubjectCount {
317
+
subject,
318
+
total,
319
+
distinct,
320
+
})
321
+
.collect();
322
+
323
+
Ok(acceptable(
324
+
accept,
325
+
GetManyToManyCountsResponse {
326
+
counts_by_other_subject: items,
327
+
cursor,
328
+
query: (*query).clone(),
329
+
},
330
+
))
331
+
}
332
+
333
+
#[derive(Clone, Deserialize)]
334
struct GetLinksCountQuery {
335
target: String,
336
collection: String,
···
391
}
392
393
#[derive(Clone, Deserialize)]
394
+
struct GetBacklinksQuery {
395
+
/// The link target
396
+
///
397
+
/// can be an AT-URI, plain DID, or regular URI
398
+
subject: String,
399
+
/// Filter links only from this link source
400
+
///
401
+
/// eg.: `app.bsky.feed.like:subject.uri`
402
+
source: String,
403
+
cursor: Option<OpaqueApiCursor>,
404
+
/// Filter links only from these DIDs
405
+
///
406
+
/// include multiple times to filter by multiple source DIDs
407
+
#[serde(default)]
408
+
did: Vec<String>,
409
+
/// Set the max number of links to return per page of results
410
+
#[serde(default = "get_default_cursor_limit")]
411
+
limit: u64,
412
+
// TODO: allow reverse (er, forward) order as well
413
+
}
414
+
#[derive(Template, Serialize)]
415
+
#[template(path = "get-backlinks.html.j2")]
416
+
struct GetBacklinksResponse {
417
+
total: u64,
418
+
records: Vec<RecordId>,
419
+
cursor: Option<OpaqueApiCursor>,
420
+
#[serde(skip_serializing)]
421
+
query: GetBacklinksQuery,
422
+
#[serde(skip_serializing)]
423
+
collection: String,
424
+
#[serde(skip_serializing)]
425
+
path: String,
426
+
}
427
+
fn get_backlinks(
428
+
accept: ExtractAccept,
429
+
query: axum_extra::extract::Query<GetBacklinksQuery>, // supports multiple param occurrences
430
+
store: impl LinkReader,
431
+
) -> Result<impl IntoResponse, http::StatusCode> {
432
+
let until = query
433
+
.cursor
434
+
.clone()
435
+
.map(|oc| ApiCursor::try_from(oc).map_err(|_| http::StatusCode::BAD_REQUEST))
436
+
.transpose()?
437
+
.map(|c| c.next);
438
+
439
+
let limit = query.limit;
440
+
if limit > DEFAULT_CURSOR_LIMIT_MAX {
441
+
return Err(http::StatusCode::BAD_REQUEST);
442
+
}
443
+
444
+
let filter_dids: HashSet<Did> = HashSet::from_iter(
445
+
query
446
+
.did
447
+
.iter()
448
+
.map(|d| d.trim())
449
+
.filter(|d| !d.is_empty())
450
+
.map(|d| Did(d.to_string())),
451
+
);
452
+
453
+
let Some((collection, path)) = query.source.split_once(':') else {
454
+
return Err(http::StatusCode::BAD_REQUEST);
455
+
};
456
+
let path = format!(".{path}");
457
+
458
+
let paged = store
459
+
.get_links(
460
+
&query.subject,
461
+
collection,
462
+
&path,
463
+
limit,
464
+
until,
465
+
&filter_dids,
466
+
)
467
+
.map_err(|_| http::StatusCode::INTERNAL_SERVER_ERROR)?;
468
+
469
+
let cursor = paged.next.map(|next| {
470
+
ApiCursor {
471
+
version: paged.version,
472
+
next,
473
+
}
474
+
.into()
475
+
});
476
+
477
+
Ok(acceptable(
478
+
accept,
479
+
GetBacklinksResponse {
480
+
total: paged.total,
481
+
records: paged.items,
482
+
cursor,
483
+
query: (*query).clone(),
484
+
collection: collection.to_string(),
485
+
path,
486
+
},
487
+
))
488
+
}
489
+
490
+
#[derive(Clone, Deserialize)]
491
struct GetLinkItemsQuery {
492
target: String,
493
collection: String,
···
506
///
507
/// deprecated: use `did`, which can be repeated multiple times
508
from_dids: Option<String>, // comma separated: gross
509
+
#[serde(default = "get_default_cursor_limit")]
510
limit: u64,
511
// TODO: allow reverse (er, forward) order as well
512
}
513
#[derive(Template, Serialize)]
514
#[template(path = "links.html.j2")]
···
727
OpaqueApiCursor(bincode::DefaultOptions::new().serialize(&item).unwrap())
728
}
729
}
730
+
731
+
#[derive(Serialize, Deserialize)] // for bincode
732
+
struct ApiKeyedCursor {
733
+
next: String, // the key
734
+
}
735
+
736
+
impl TryFrom<OpaqueApiCursor> for ApiKeyedCursor {
737
+
type Error = bincode::Error;
738
+
739
+
fn try_from(item: OpaqueApiCursor) -> Result<Self, Self::Error> {
740
+
bincode::DefaultOptions::new().deserialize(&item.0)
741
+
}
742
+
}
743
+
744
+
impl From<ApiKeyedCursor> for OpaqueApiCursor {
745
+
fn from(item: ApiKeyedCursor) -> Self {
746
+
OpaqueApiCursor(bincode::DefaultOptions::new().serialize(&item).unwrap())
747
+
}
748
+
}
+78
-1
constellation/src/storage/mem_store.rs
+78
-1
constellation/src/storage/mem_store.rs
···
1
-
use super::{LinkReader, LinkStorage, PagedAppendingCollection, StorageStats};
2
use crate::{ActionableEvent, CountsByCount, Did, RecordId};
3
use anyhow::Result;
4
use links::CollectedLink;
···
132
}
133
134
impl LinkReader for MemStorage {
135
fn get_count(&self, target: &str, collection: &str, path: &str) -> Result<u64> {
136
let data = self.0.lock().unwrap();
137
let Some(paths) = data.targets.get(&Target::new(target)) else {
···
353
dids,
354
targetables,
355
linking_records,
356
})
357
}
358
}
···
1
+
use super::{
2
+
LinkReader, LinkStorage, PagedAppendingCollection, PagedOrderedCollection, StorageStats,
3
+
};
4
use crate::{ActionableEvent, CountsByCount, Did, RecordId};
5
use anyhow::Result;
6
use links::CollectedLink;
···
134
}
135
136
impl LinkReader for MemStorage {
137
+
fn get_many_to_many_counts(
138
+
&self,
139
+
target: &str,
140
+
collection: &str,
141
+
path: &str,
142
+
path_to_other: &str,
143
+
limit: u64,
144
+
after: Option<String>,
145
+
filter_dids: &HashSet<Did>,
146
+
filter_to_targets: &HashSet<String>,
147
+
) -> Result<PagedOrderedCollection<(String, u64, u64), String>> {
148
+
let data = self.0.lock().unwrap();
149
+
let Some(paths) = data.targets.get(&Target::new(target)) else {
150
+
return Ok(PagedOrderedCollection::default());
151
+
};
152
+
let Some(linkers) = paths.get(&Source::new(collection, path)) else {
153
+
return Ok(PagedOrderedCollection::default());
154
+
};
155
+
156
+
let path_to_other = RecordPath::new(path_to_other);
157
+
let filter_to_targets: HashSet<Target> =
158
+
HashSet::from_iter(filter_to_targets.iter().map(|s| Target::new(s)));
159
+
160
+
let mut grouped_counts: HashMap<Target, (u64, HashSet<Did>)> = HashMap::new();
161
+
for (did, rkey) in linkers.iter().flatten().cloned() {
162
+
if !filter_dids.is_empty() && !filter_dids.contains(&did) {
163
+
continue;
164
+
}
165
+
if let Some(fwd_target) = data
166
+
.links
167
+
.get(&did)
168
+
.unwrap_or(&HashMap::new())
169
+
.get(&RepoId {
170
+
collection: collection.to_string(),
171
+
rkey,
172
+
})
173
+
.unwrap_or(&Vec::new())
174
+
.iter()
175
+
.filter_map(|(path, target)| {
176
+
if *path == path_to_other
177
+
&& (filter_to_targets.is_empty() || filter_to_targets.contains(target))
178
+
{
179
+
Some(target)
180
+
} else {
181
+
None
182
+
}
183
+
})
184
+
.take(1)
185
+
.next()
186
+
{
187
+
let e = grouped_counts.entry(fwd_target.clone()).or_default();
188
+
e.0 += 1;
189
+
e.1.insert(did.clone());
190
+
}
191
+
}
192
+
let mut items: Vec<(String, u64, u64)> = grouped_counts
193
+
.iter()
194
+
.map(|(k, (n, u))| (k.0.clone(), *n, u.len() as u64))
195
+
.collect();
196
+
items.sort();
197
+
items = items
198
+
.into_iter()
199
+
.skip_while(|(t, _, _)| after.as_ref().map(|a| t <= a).unwrap_or(false))
200
+
.take(limit as usize)
201
+
.collect();
202
+
let next = if items.len() as u64 >= limit {
203
+
items.last().map(|(t, _, _)| t.clone())
204
+
} else {
205
+
None
206
+
};
207
+
Ok(PagedOrderedCollection { items, next })
208
+
}
209
+
210
fn get_count(&self, target: &str, collection: &str, path: &str) -> Result<u64> {
211
let data = self.0.lock().unwrap();
212
let Some(paths) = data.targets.get(&Target::new(target)) else {
···
428
dids,
429
targetables,
430
linking_records,
431
+
started_at: None,
432
+
other_data: Default::default(),
433
})
434
}
435
}
+225
constellation/src/storage/mod.rs
+225
constellation/src/storage/mod.rs
···
19
pub total: u64,
20
}
21
22
#[derive(Debug, Deserialize, Serialize, PartialEq)]
23
pub struct StorageStats {
24
/// estimate of how many accounts we've seen create links. the _subjects_ of any links are not represented here.
···
33
/// records with multiple links are single-counted.
34
/// for LSM stores, deleted links don't decrement this, and updated records with any links will likely increment it.
35
pub linking_records: u64,
36
}
37
38
pub trait LinkStorage: Send + Sync {
···
48
}
49
50
pub trait LinkReader: Clone + Send + Sync + 'static {
51
fn get_count(&self, target: &str, collection: &str, path: &str) -> Result<u64>;
52
53
fn get_distinct_did_count(&self, target: &str, collection: &str, path: &str) -> Result<u64>;
···
1326
counts
1327
});
1328
assert_stats(storage.get_stats()?, 1..=1, 2..=2, 1..=1);
1329
});
1330
}
···
19
pub total: u64,
20
}
21
22
+
/// A paged collection whose keys are sorted instead of indexed
23
+
///
24
+
/// this has weaker guarantees than PagedAppendingCollection: it might
25
+
/// return a totally consistent snapshot. but it should avoid duplicates
26
+
/// and each page should at least be internally consistent.
27
+
#[derive(Debug, PartialEq, Default)]
28
+
pub struct PagedOrderedCollection<T, K: Ord> {
29
+
pub items: Vec<T>,
30
+
pub next: Option<K>,
31
+
}
32
+
33
#[derive(Debug, Deserialize, Serialize, PartialEq)]
34
pub struct StorageStats {
35
/// estimate of how many accounts we've seen create links. the _subjects_ of any links are not represented here.
···
44
/// records with multiple links are single-counted.
45
/// for LSM stores, deleted links don't decrement this, and updated records with any links will likely increment it.
46
pub linking_records: u64,
47
+
48
+
/// first jetstream cursor when this instance first started
49
+
pub started_at: Option<u64>,
50
+
51
+
/// anything else we want to throw in
52
+
pub other_data: HashMap<String, u64>,
53
}
54
55
pub trait LinkStorage: Send + Sync {
···
65
}
66
67
pub trait LinkReader: Clone + Send + Sync + 'static {
68
+
#[allow(clippy::too_many_arguments)]
69
+
fn get_many_to_many_counts(
70
+
&self,
71
+
target: &str,
72
+
collection: &str,
73
+
path: &str,
74
+
path_to_other: &str,
75
+
limit: u64,
76
+
after: Option<String>,
77
+
filter_dids: &HashSet<Did>,
78
+
filter_to_targets: &HashSet<String>,
79
+
) -> Result<PagedOrderedCollection<(String, u64, u64), String>>;
80
+
81
fn get_count(&self, target: &str, collection: &str, path: &str) -> Result<u64>;
82
83
fn get_distinct_did_count(&self, target: &str, collection: &str, path: &str) -> Result<u64>;
···
1356
counts
1357
});
1358
assert_stats(storage.get_stats()?, 1..=1, 2..=2, 1..=1);
1359
+
});
1360
+
1361
+
//////// many-to-many /////////
1362
+
1363
+
test_each_storage!(get_m2m_counts_empty, |storage| {
1364
+
assert_eq!(
1365
+
storage.get_many_to_many_counts(
1366
+
"a.com",
1367
+
"a.b.c",
1368
+
".d.e",
1369
+
".f.g",
1370
+
10,
1371
+
None,
1372
+
&HashSet::new(),
1373
+
&HashSet::new(),
1374
+
)?,
1375
+
PagedOrderedCollection {
1376
+
items: vec![],
1377
+
next: None,
1378
+
}
1379
+
);
1380
+
});
1381
+
1382
+
test_each_storage!(get_m2m_counts_single, |storage| {
1383
+
storage.push(
1384
+
&ActionableEvent::CreateLinks {
1385
+
record_id: RecordId {
1386
+
did: "did:plc:asdf".into(),
1387
+
collection: "app.t.c".into(),
1388
+
rkey: "asdf".into(),
1389
+
},
1390
+
links: vec![
1391
+
CollectedLink {
1392
+
target: Link::Uri("a.com".into()),
1393
+
path: ".abc.uri".into(),
1394
+
},
1395
+
CollectedLink {
1396
+
target: Link::Uri("b.com".into()),
1397
+
path: ".def.uri".into(),
1398
+
},
1399
+
CollectedLink {
1400
+
target: Link::Uri("b.com".into()),
1401
+
path: ".ghi.uri".into(),
1402
+
},
1403
+
],
1404
+
},
1405
+
0,
1406
+
)?;
1407
+
assert_eq!(
1408
+
storage.get_many_to_many_counts(
1409
+
"a.com",
1410
+
"app.t.c",
1411
+
".abc.uri",
1412
+
".def.uri",
1413
+
10,
1414
+
None,
1415
+
&HashSet::new(),
1416
+
&HashSet::new(),
1417
+
)?,
1418
+
PagedOrderedCollection {
1419
+
items: vec![("b.com".to_string(), 1, 1)],
1420
+
next: None,
1421
+
}
1422
+
);
1423
+
});
1424
+
1425
+
test_each_storage!(get_m2m_counts_filters, |storage| {
1426
+
storage.push(
1427
+
&ActionableEvent::CreateLinks {
1428
+
record_id: RecordId {
1429
+
did: "did:plc:asdf".into(),
1430
+
collection: "app.t.c".into(),
1431
+
rkey: "asdf".into(),
1432
+
},
1433
+
links: vec![
1434
+
CollectedLink {
1435
+
target: Link::Uri("a.com".into()),
1436
+
path: ".abc.uri".into(),
1437
+
},
1438
+
CollectedLink {
1439
+
target: Link::Uri("b.com".into()),
1440
+
path: ".def.uri".into(),
1441
+
},
1442
+
],
1443
+
},
1444
+
0,
1445
+
)?;
1446
+
storage.push(
1447
+
&ActionableEvent::CreateLinks {
1448
+
record_id: RecordId {
1449
+
did: "did:plc:asdfasdf".into(),
1450
+
collection: "app.t.c".into(),
1451
+
rkey: "asdf".into(),
1452
+
},
1453
+
links: vec![
1454
+
CollectedLink {
1455
+
target: Link::Uri("a.com".into()),
1456
+
path: ".abc.uri".into(),
1457
+
},
1458
+
CollectedLink {
1459
+
target: Link::Uri("b.com".into()),
1460
+
path: ".def.uri".into(),
1461
+
},
1462
+
],
1463
+
},
1464
+
1,
1465
+
)?;
1466
+
storage.push(
1467
+
&ActionableEvent::CreateLinks {
1468
+
record_id: RecordId {
1469
+
did: "did:plc:fdsa".into(),
1470
+
collection: "app.t.c".into(),
1471
+
rkey: "asdf".into(),
1472
+
},
1473
+
links: vec![
1474
+
CollectedLink {
1475
+
target: Link::Uri("a.com".into()),
1476
+
path: ".abc.uri".into(),
1477
+
},
1478
+
CollectedLink {
1479
+
target: Link::Uri("c.com".into()),
1480
+
path: ".def.uri".into(),
1481
+
},
1482
+
],
1483
+
},
1484
+
2,
1485
+
)?;
1486
+
storage.push(
1487
+
&ActionableEvent::CreateLinks {
1488
+
record_id: RecordId {
1489
+
did: "did:plc:fdsa".into(),
1490
+
collection: "app.t.c".into(),
1491
+
rkey: "asdf2".into(),
1492
+
},
1493
+
links: vec![
1494
+
CollectedLink {
1495
+
target: Link::Uri("a.com".into()),
1496
+
path: ".abc.uri".into(),
1497
+
},
1498
+
CollectedLink {
1499
+
target: Link::Uri("c.com".into()),
1500
+
path: ".def.uri".into(),
1501
+
},
1502
+
],
1503
+
},
1504
+
3,
1505
+
)?;
1506
+
assert_eq!(
1507
+
storage.get_many_to_many_counts(
1508
+
"a.com",
1509
+
"app.t.c",
1510
+
".abc.uri",
1511
+
".def.uri",
1512
+
10,
1513
+
None,
1514
+
&HashSet::new(),
1515
+
&HashSet::new(),
1516
+
)?,
1517
+
PagedOrderedCollection {
1518
+
items: vec![("b.com".to_string(), 2, 2), ("c.com".to_string(), 2, 1),],
1519
+
next: None,
1520
+
}
1521
+
);
1522
+
assert_eq!(
1523
+
storage.get_many_to_many_counts(
1524
+
"a.com",
1525
+
"app.t.c",
1526
+
".abc.uri",
1527
+
".def.uri",
1528
+
10,
1529
+
None,
1530
+
&HashSet::from_iter([Did("did:plc:fdsa".to_string())]),
1531
+
&HashSet::new(),
1532
+
)?,
1533
+
PagedOrderedCollection {
1534
+
items: vec![("c.com".to_string(), 2, 1),],
1535
+
next: None,
1536
+
}
1537
+
);
1538
+
assert_eq!(
1539
+
storage.get_many_to_many_counts(
1540
+
"a.com",
1541
+
"app.t.c",
1542
+
".abc.uri",
1543
+
".def.uri",
1544
+
10,
1545
+
None,
1546
+
&HashSet::new(),
1547
+
&HashSet::from_iter(["b.com".to_string()]),
1548
+
)?,
1549
+
PagedOrderedCollection {
1550
+
items: vec![("b.com".to_string(), 2, 2),],
1551
+
next: None,
1552
+
}
1553
+
);
1554
});
1555
}
+342
-40
constellation/src/storage/rocks_store.rs
+342
-40
constellation/src/storage/rocks_store.rs
···
1
-
use super::{ActionableEvent, LinkReader, LinkStorage, PagedAppendingCollection, StorageStats};
2
use crate::{CountsByCount, Did, RecordId};
3
use anyhow::{bail, Result};
4
use bincode::Options as BincodeOptions;
···
11
MultiThreaded, Options, PrefixRange, ReadOptions, WriteBatch,
12
};
13
use serde::{Deserialize, Serialize};
14
-
use std::collections::{HashMap, HashSet};
15
use std::io::Read;
16
use std::marker::PhantomData;
17
use std::path::{Path, PathBuf};
···
20
Arc,
21
};
22
use std::thread;
23
-
use std::time::{Duration, Instant};
24
use tokio_util::sync::CancellationToken;
25
26
static DID_IDS_CF: &str = "did_ids";
···
29
static LINK_TARGETS_CF: &str = "link_targets";
30
31
static JETSTREAM_CURSOR_KEY: &str = "jetstream_cursor";
32
33
// todo: actually understand and set these options probably better
34
fn rocks_opts_base() -> Options {
···
56
#[derive(Debug, Clone)]
57
pub struct RocksStorage {
58
pub db: Arc<DBWithThreadMode<MultiThreaded>>, // TODO: mov seqs here (concat merge op will be fun)
59
-
did_id_table: IdTable<Did, DidIdValue, true>,
60
-
target_id_table: IdTable<TargetKey, TargetId, false>,
61
is_writer: bool,
62
backup_task: Arc<Option<thread::JoinHandle<Result<()>>>>,
63
}
···
85
fn cf_descriptor(&self) -> ColumnFamilyDescriptor {
86
ColumnFamilyDescriptor::new(&self.name, rocks_opts_base())
87
}
88
-
fn init<const WITH_REVERSE: bool>(
89
-
self,
90
-
db: &DBWithThreadMode<MultiThreaded>,
91
-
) -> Result<IdTable<Orig, IdVal, WITH_REVERSE>> {
92
if db.cf_handle(&self.name).is_none() {
93
bail!("failed to get cf handle from db -- was the db open with our .cf_descriptor()?");
94
}
···
119
}
120
}
121
#[derive(Debug, Clone)]
122
-
struct IdTable<Orig, IdVal: IdTableValue, const WITH_REVERSE: bool>
123
where
124
Orig: KeyFromRocks,
125
for<'a> &'a Orig: AsRocksKey,
···
127
base: IdTableBase<Orig, IdVal>,
128
priv_id_seq: u64,
129
}
130
-
impl<Orig: Clone, IdVal: IdTableValue, const WITH_REVERSE: bool> IdTable<Orig, IdVal, WITH_REVERSE>
131
where
132
Orig: KeyFromRocks,
133
for<'v> &'v IdVal: AsRocksValue,
···
139
_key_marker: PhantomData,
140
_val_marker: PhantomData,
141
name: name.into(),
142
-
id_seq: Arc::new(AtomicU64::new(0)), // zero is "uninint", first seq num will be 1
143
}
144
}
145
fn get_id_val(
···
178
id_value
179
}))
180
}
181
fn estimate_count(&self) -> u64 {
182
self.base.id_seq.load(Ordering::SeqCst) - 1 // -1 because seq zero is reserved
183
}
184
-
}
185
-
impl<Orig: Clone, IdVal: IdTableValue> IdTable<Orig, IdVal, true>
186
-
where
187
-
Orig: KeyFromRocks,
188
-
for<'v> &'v IdVal: AsRocksValue,
189
-
for<'k> &'k Orig: AsRocksKey,
190
-
{
191
fn get_or_create_id_val(
192
&mut self,
193
db: &DBWithThreadMode<MultiThreaded>,
···
215
}
216
}
217
}
218
-
impl<Orig: Clone, IdVal: IdTableValue> IdTable<Orig, IdVal, false>
219
-
where
220
-
Orig: KeyFromRocks,
221
-
for<'v> &'v IdVal: AsRocksValue,
222
-
for<'k> &'k Orig: AsRocksKey,
223
-
{
224
-
fn get_or_create_id_val(
225
-
&mut self,
226
-
db: &DBWithThreadMode<MultiThreaded>,
227
-
batch: &mut WriteBatch,
228
-
orig: &Orig,
229
-
) -> Result<IdVal> {
230
-
let cf = db.cf_handle(&self.base.name).unwrap();
231
-
self.__get_or_create_id_val(&cf, db, batch, orig)
232
-
}
233
-
}
234
235
impl IdTableValue for DidIdValue {
236
fn new(v: u64) -> Self {
···
249
}
250
}
251
252
impl RocksStorage {
253
pub fn new(path: impl AsRef<Path>) -> Result<Self> {
254
Self::describe_metrics();
255
-
RocksStorage::open_readmode(path, false)
256
}
257
258
pub fn open_readonly(path: impl AsRef<Path>) -> Result<Self> {
···
260
}
261
262
fn open_readmode(path: impl AsRef<Path>, readonly: bool) -> Result<Self> {
263
-
let did_id_table = IdTable::<_, _, true>::setup(DID_IDS_CF);
264
-
let target_id_table = IdTable::<_, _, false>::setup(TARGET_IDS_CF);
265
266
let cfs = vec![
267
// id reference tables
268
did_id_table.cf_descriptor(),
···
296
is_writer: !readonly,
297
backup_task: None.into(),
298
})
299
}
300
301
pub fn start_backup(
···
826
}
827
828
impl LinkReader for RocksStorage {
829
fn get_count(&self, target: &str, collection: &str, path: &str) -> Result<u64> {
830
let target_key = TargetKey(
831
Target(target.to_string()),
···
1042
.map(|s| s.parse::<u64>())
1043
.transpose()?
1044
.unwrap_or(0);
1045
Ok(StorageStats {
1046
dids,
1047
targetables,
1048
linking_records,
1049
})
1050
}
1051
}
···
1071
impl AsRocksValue for &TargetId {}
1072
impl KeyFromRocks for TargetKey {}
1073
impl ValueFromRocks for TargetId {}
1074
1075
// target_links table
1076
impl AsRocksKey for &TargetId {}
···
1142
}
1143
1144
// target ids
1145
-
#[derive(Debug, Clone, Serialize, Deserialize)]
1146
struct TargetId(u64); // key
1147
1148
-
#[derive(Debug, Clone, Serialize, Deserialize)]
1149
pub struct Target(pub String); // the actual target/uri
1150
1151
// targets (uris, dids, etc.): the reverse index
···
1
+
use super::{
2
+
ActionableEvent, LinkReader, LinkStorage, PagedAppendingCollection, PagedOrderedCollection,
3
+
StorageStats,
4
+
};
5
use crate::{CountsByCount, Did, RecordId};
6
use anyhow::{bail, Result};
7
use bincode::Options as BincodeOptions;
···
14
MultiThreaded, Options, PrefixRange, ReadOptions, WriteBatch,
15
};
16
use serde::{Deserialize, Serialize};
17
+
use std::collections::{BTreeMap, HashMap, HashSet};
18
use std::io::Read;
19
use std::marker::PhantomData;
20
use std::path::{Path, PathBuf};
···
23
Arc,
24
};
25
use std::thread;
26
+
use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};
27
use tokio_util::sync::CancellationToken;
28
29
static DID_IDS_CF: &str = "did_ids";
···
32
static LINK_TARGETS_CF: &str = "link_targets";
33
34
static JETSTREAM_CURSOR_KEY: &str = "jetstream_cursor";
35
+
static STARTED_AT_KEY: &str = "jetstream_first_cursor";
36
+
// add reverse mappings for targets if this db was running before that was a thing
37
+
static TARGET_ID_REPAIR_STATE_KEY: &str = "target_id_table_repair_state";
38
+
39
+
static COZY_FIRST_CURSOR: u64 = 1_738_083_600_000_000; // constellation.microcosm.blue started
40
+
41
+
#[derive(Debug, Clone, Serialize, Deserialize)]
42
+
struct TargetIdRepairState {
43
+
/// start time for repair, microseconds timestamp
44
+
current_us_started_at: u64,
45
+
/// id table's latest id when repair started
46
+
id_when_started: u64,
47
+
/// id table id
48
+
latest_repaired_i: u64,
49
+
}
50
+
impl AsRocksValue for TargetIdRepairState {}
51
+
impl ValueFromRocks for TargetIdRepairState {}
52
53
// todo: actually understand and set these options probably better
54
fn rocks_opts_base() -> Options {
···
76
#[derive(Debug, Clone)]
77
pub struct RocksStorage {
78
pub db: Arc<DBWithThreadMode<MultiThreaded>>, // TODO: mov seqs here (concat merge op will be fun)
79
+
did_id_table: IdTable<Did, DidIdValue>,
80
+
target_id_table: IdTable<TargetKey, TargetId>,
81
is_writer: bool,
82
backup_task: Arc<Option<thread::JoinHandle<Result<()>>>>,
83
}
···
105
fn cf_descriptor(&self) -> ColumnFamilyDescriptor {
106
ColumnFamilyDescriptor::new(&self.name, rocks_opts_base())
107
}
108
+
fn init(self, db: &DBWithThreadMode<MultiThreaded>) -> Result<IdTable<Orig, IdVal>> {
109
if db.cf_handle(&self.name).is_none() {
110
bail!("failed to get cf handle from db -- was the db open with our .cf_descriptor()?");
111
}
···
136
}
137
}
138
#[derive(Debug, Clone)]
139
+
struct IdTable<Orig, IdVal: IdTableValue>
140
where
141
Orig: KeyFromRocks,
142
for<'a> &'a Orig: AsRocksKey,
···
144
base: IdTableBase<Orig, IdVal>,
145
priv_id_seq: u64,
146
}
147
+
impl<Orig: Clone, IdVal: IdTableValue> IdTable<Orig, IdVal>
148
where
149
Orig: KeyFromRocks,
150
for<'v> &'v IdVal: AsRocksValue,
···
156
_key_marker: PhantomData,
157
_val_marker: PhantomData,
158
name: name.into(),
159
+
id_seq: Arc::new(AtomicU64::new(0)), // zero is "uninit", first seq num will be 1
160
}
161
}
162
fn get_id_val(
···
195
id_value
196
}))
197
}
198
+
199
fn estimate_count(&self) -> u64 {
200
self.base.id_seq.load(Ordering::SeqCst) - 1 // -1 because seq zero is reserved
201
}
202
+
203
fn get_or_create_id_val(
204
&mut self,
205
db: &DBWithThreadMode<MultiThreaded>,
···
227
}
228
}
229
}
230
231
impl IdTableValue for DidIdValue {
232
fn new(v: u64) -> Self {
···
245
}
246
}
247
248
+
fn now() -> u64 {
249
+
SystemTime::now()
250
+
.duration_since(UNIX_EPOCH)
251
+
.unwrap()
252
+
.as_micros() as u64
253
+
}
254
+
255
impl RocksStorage {
256
pub fn new(path: impl AsRef<Path>) -> Result<Self> {
257
Self::describe_metrics();
258
+
let me = RocksStorage::open_readmode(path, false)?;
259
+
me.global_init()?;
260
+
Ok(me)
261
}
262
263
pub fn open_readonly(path: impl AsRef<Path>) -> Result<Self> {
···
265
}
266
267
fn open_readmode(path: impl AsRef<Path>, readonly: bool) -> Result<Self> {
268
+
let did_id_table = IdTable::setup(DID_IDS_CF);
269
+
let target_id_table = IdTable::setup(TARGET_IDS_CF);
270
271
+
// note: global stuff like jetstream cursor goes in the default cf
272
+
// these are bonus extra cfs
273
let cfs = vec![
274
// id reference tables
275
did_id_table.cf_descriptor(),
···
303
is_writer: !readonly,
304
backup_task: None.into(),
305
})
306
+
}
307
+
308
+
fn global_init(&self) -> Result<()> {
309
+
let first_run = self.db.get(JETSTREAM_CURSOR_KEY)?.is_some();
310
+
if first_run {
311
+
self.db.put(STARTED_AT_KEY, _rv(now()))?;
312
+
313
+
// hack / temporary: if we're a new db, put in a completed repair
314
+
// state so we don't run repairs (repairs are for old-code dbs)
315
+
let completed = TargetIdRepairState {
316
+
id_when_started: 0,
317
+
current_us_started_at: 0,
318
+
latest_repaired_i: 0,
319
+
};
320
+
self.db.put(TARGET_ID_REPAIR_STATE_KEY, _rv(completed))?;
321
+
}
322
+
Ok(())
323
+
}
324
+
325
+
pub fn run_repair(&self, breather: Duration, stay_alive: CancellationToken) -> Result<bool> {
326
+
let mut state = match self
327
+
.db
328
+
.get(TARGET_ID_REPAIR_STATE_KEY)?
329
+
.map(|s| _vr(&s))
330
+
.transpose()?
331
+
{
332
+
Some(s) => s,
333
+
None => TargetIdRepairState {
334
+
id_when_started: self.did_id_table.priv_id_seq,
335
+
current_us_started_at: now(),
336
+
latest_repaired_i: 0,
337
+
},
338
+
};
339
+
340
+
eprintln!("initial repair state: {state:?}");
341
+
342
+
let cf = self.db.cf_handle(TARGET_IDS_CF).unwrap();
343
+
344
+
let mut iter = self.db.raw_iterator_cf(&cf);
345
+
iter.seek_to_first();
346
+
347
+
eprintln!("repair iterator sent to first key");
348
+
349
+
// skip ahead if we're done some, or take a single first step
350
+
for _ in 0..state.latest_repaired_i {
351
+
iter.next();
352
+
}
353
+
354
+
eprintln!(
355
+
"repair iterator skipped to {}th key",
356
+
state.latest_repaired_i
357
+
);
358
+
359
+
let mut maybe_done = false;
360
+
361
+
let mut write_fast = rocksdb::WriteOptions::default();
362
+
write_fast.set_sync(false);
363
+
write_fast.disable_wal(true);
364
+
365
+
while !stay_alive.is_cancelled() && !maybe_done {
366
+
// let mut batch = WriteBatch::default();
367
+
368
+
let mut any_written = false;
369
+
370
+
for _ in 0..1000 {
371
+
if state.latest_repaired_i % 1_000_000 == 0 {
372
+
eprintln!("target iter at {}", state.latest_repaired_i);
373
+
}
374
+
state.latest_repaired_i += 1;
375
+
376
+
if !iter.valid() {
377
+
eprintln!("invalid iter, are we done repairing?");
378
+
maybe_done = true;
379
+
break;
380
+
};
381
+
382
+
// eprintln!("iterator seems to be valid! getting the key...");
383
+
let raw_key = iter.key().unwrap();
384
+
if raw_key.len() == 8 {
385
+
// eprintln!("found an 8-byte key, skipping it since it's probably an id...");
386
+
iter.next();
387
+
continue;
388
+
}
389
+
let target: TargetKey = _kr::<TargetKey>(raw_key)?;
390
+
let target_id: TargetId = _vr(iter.value().unwrap())?;
391
+
392
+
self.db
393
+
.put_cf_opt(&cf, target_id.id().to_be_bytes(), _rv(&target), &write_fast)?;
394
+
any_written = true;
395
+
iter.next();
396
+
}
397
+
398
+
if any_written {
399
+
self.db
400
+
.put(TARGET_ID_REPAIR_STATE_KEY, _rv(state.clone()))?;
401
+
std::thread::sleep(breather);
402
+
}
403
+
}
404
+
405
+
eprintln!("repair iterator done.");
406
+
407
+
Ok(false)
408
}
409
410
pub fn start_backup(
···
935
}
936
937
impl LinkReader for RocksStorage {
938
+
fn get_many_to_many_counts(
939
+
&self,
940
+
target: &str,
941
+
collection: &str,
942
+
path: &str,
943
+
path_to_other: &str,
944
+
limit: u64,
945
+
after: Option<String>,
946
+
filter_dids: &HashSet<Did>,
947
+
filter_to_targets: &HashSet<String>,
948
+
) -> Result<PagedOrderedCollection<(String, u64, u64), String>> {
949
+
let collection = Collection(collection.to_string());
950
+
let path = RPath(path.to_string());
951
+
952
+
let target_key = TargetKey(Target(target.to_string()), collection.clone(), path.clone());
953
+
954
+
// unfortunately the cursor is a, uh, stringified number.
955
+
// this was easier for the memstore (plain target, not target id), and
956
+
// making it generic is a bit awful.
957
+
// so... parse the number out of a string here :(
958
+
// TODO: this should bubble up to a BAD_REQUEST response
959
+
let after = after.map(|s| s.parse::<u64>().map(TargetId)).transpose()?;
960
+
961
+
let Some(target_id) = self.target_id_table.get_id_val(&self.db, &target_key)? else {
962
+
eprintln!("nothin doin for this target, {target_key:?}");
963
+
return Ok(Default::default());
964
+
};
965
+
966
+
let filter_did_ids: HashMap<DidId, bool> = filter_dids
967
+
.iter()
968
+
.filter_map(|did| self.did_id_table.get_id_val(&self.db, did).transpose())
969
+
.collect::<Result<Vec<DidIdValue>>>()?
970
+
.into_iter()
971
+
.map(|DidIdValue(id, active)| (id, active))
972
+
.collect();
973
+
974
+
// stored targets are keyed by triples of (target, collection, path).
975
+
// target filtering only consideres the target itself, so we actually
976
+
// need to do a prefix iteration of all target ids for this target and
977
+
// keep them all.
978
+
// i *think* the number of keys at a target prefix should usually be
979
+
// pretty small, so this is hopefully fine. but if it turns out to be
980
+
// large, we can push this filtering back into the main links loop and
981
+
// do forward db queries per backlink to get the raw target back out.
982
+
let mut filter_to_target_ids: HashSet<TargetId> = HashSet::new();
983
+
for t in filter_to_targets {
984
+
for (_, target_id) in self.iter_targets_for_target(&Target(t.to_string())) {
985
+
filter_to_target_ids.insert(target_id);
986
+
}
987
+
}
988
+
989
+
let linkers = self.get_target_linkers(&target_id)?;
990
+
991
+
let mut grouped_counts: BTreeMap<TargetId, (u64, HashSet<DidId>)> = BTreeMap::new();
992
+
993
+
for (did_id, rkey) in linkers.0 {
994
+
if did_id.is_empty() {
995
+
continue;
996
+
}
997
+
998
+
if !filter_did_ids.is_empty() && filter_did_ids.get(&did_id) != Some(&true) {
999
+
continue;
1000
+
}
1001
+
1002
+
let record_link_key = RecordLinkKey(did_id, collection.clone(), rkey);
1003
+
let Some(targets) = self.get_record_link_targets(&record_link_key)? else {
1004
+
continue;
1005
+
};
1006
+
1007
+
let Some(fwd_target) = targets
1008
+
.0
1009
+
.into_iter()
1010
+
.filter_map(|RecordLinkTarget(rpath, target_id)| {
1011
+
if rpath.0 == path_to_other
1012
+
&& (filter_to_target_ids.is_empty()
1013
+
|| filter_to_target_ids.contains(&target_id))
1014
+
{
1015
+
Some(target_id)
1016
+
} else {
1017
+
None
1018
+
}
1019
+
})
1020
+
.take(1)
1021
+
.next()
1022
+
else {
1023
+
eprintln!("no forward match");
1024
+
continue;
1025
+
};
1026
+
1027
+
// small relief: we page over target ids, so we can already bail
1028
+
// reprocessing previous pages here
1029
+
if after.as_ref().map(|a| fwd_target <= *a).unwrap_or(false) {
1030
+
continue;
1031
+
}
1032
+
1033
+
// aand we can skip target ids that must be on future pages
1034
+
// (this check continues after the did-lookup, which we have to do)
1035
+
let page_is_full = grouped_counts.len() as u64 >= limit;
1036
+
if page_is_full {
1037
+
let current_max = grouped_counts.keys().next_back().unwrap(); // limit should be non-zero bleh
1038
+
if fwd_target > *current_max {
1039
+
continue;
1040
+
}
1041
+
}
1042
+
1043
+
// bit painful: 2-step lookup to make sure this did is active
1044
+
let Some(did) = self.did_id_table.get_val_from_id(&self.db, did_id.0)? else {
1045
+
eprintln!("failed to look up did from did_id {did_id:?}");
1046
+
continue;
1047
+
};
1048
+
let Some(DidIdValue(_, active)) = self.did_id_table.get_id_val(&self.db, &did)? else {
1049
+
eprintln!("failed to look up did_value from did_id {did_id:?}: {did:?}: data consistency bug?");
1050
+
continue;
1051
+
};
1052
+
if !active {
1053
+
continue;
1054
+
}
1055
+
1056
+
// page-management, continued
1057
+
// if we have a full page, and we're inserting a *new* key less than
1058
+
// the current max, then we can evict the current max
1059
+
let mut should_evict = false;
1060
+
let entry = grouped_counts.entry(fwd_target.clone()).or_insert_with(|| {
1061
+
// this is a *new* key, so kick the max if we're full
1062
+
should_evict = page_is_full;
1063
+
Default::default()
1064
+
});
1065
+
entry.0 += 1;
1066
+
entry.1.insert(did_id);
1067
+
1068
+
if should_evict {
1069
+
grouped_counts.pop_last();
1070
+
}
1071
+
}
1072
+
1073
+
let mut items: Vec<(String, u64, u64)> = Vec::with_capacity(grouped_counts.len());
1074
+
for (target_id, (n, dids)) in &grouped_counts {
1075
+
let Some(target) = self
1076
+
.target_id_table
1077
+
.get_val_from_id(&self.db, target_id.0)?
1078
+
else {
1079
+
eprintln!("failed to look up target from target_id {target_id:?}");
1080
+
continue;
1081
+
};
1082
+
items.push((target.0 .0, *n, dids.len() as u64));
1083
+
}
1084
+
1085
+
let next = if grouped_counts.len() as u64 >= limit {
1086
+
// yeah.... it's a number saved as a string......sorry
1087
+
grouped_counts
1088
+
.keys()
1089
+
.next_back()
1090
+
.map(|k| format!("{}", k.0))
1091
+
} else {
1092
+
None
1093
+
};
1094
+
1095
+
Ok(PagedOrderedCollection { items, next })
1096
+
}
1097
+
1098
fn get_count(&self, target: &str, collection: &str, path: &str) -> Result<u64> {
1099
let target_key = TargetKey(
1100
Target(target.to_string()),
···
1311
.map(|s| s.parse::<u64>())
1312
.transpose()?
1313
.unwrap_or(0);
1314
+
let started_at = self
1315
+
.db
1316
+
.get(STARTED_AT_KEY)?
1317
+
.map(|c| _vr(&c))
1318
+
.transpose()?
1319
+
.unwrap_or(COZY_FIRST_CURSOR);
1320
+
1321
+
let other_data = self
1322
+
.db
1323
+
.get(TARGET_ID_REPAIR_STATE_KEY)?
1324
+
.map(|s| _vr(&s))
1325
+
.transpose()?
1326
+
.map(
1327
+
|TargetIdRepairState {
1328
+
current_us_started_at,
1329
+
id_when_started,
1330
+
latest_repaired_i,
1331
+
}| {
1332
+
HashMap::from([
1333
+
("current_us_started_at".to_string(), current_us_started_at),
1334
+
("id_when_started".to_string(), id_when_started),
1335
+
("latest_repaired_i".to_string(), latest_repaired_i),
1336
+
])
1337
+
},
1338
+
)
1339
+
.unwrap_or(HashMap::default());
1340
+
1341
Ok(StorageStats {
1342
dids,
1343
targetables,
1344
linking_records,
1345
+
started_at: Some(started_at),
1346
+
other_data,
1347
})
1348
}
1349
}
···
1369
impl AsRocksValue for &TargetId {}
1370
impl KeyFromRocks for TargetKey {}
1371
impl ValueFromRocks for TargetId {}
1372
+
1373
+
// temp?
1374
+
impl KeyFromRocks for TargetId {}
1375
+
impl AsRocksValue for &TargetKey {}
1376
1377
// target_links table
1378
impl AsRocksKey for &TargetId {}
···
1444
}
1445
1446
// target ids
1447
+
#[derive(Debug, Clone, Serialize, Deserialize, PartialOrd, Ord, PartialEq, Eq, Hash)]
1448
struct TargetId(u64); // key
1449
1450
+
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
1451
pub struct Target(pub String); // the actual target/uri
1452
1453
// targets (uris, dids, etc.): the reverse index
+1
-1
constellation/templates/dids.html.j2
+1
-1
constellation/templates/dids.html.j2
···
27
{% for did in linking_dids %}
28
<pre style="display: block; margin: 1em 2em" class="code"><strong>DID</strong>: {{ did.0 }}
29
-> see <a href="/links/all?target={{ did.0|urlencode }}">links to this DID</a>
30
-
-> browse <a href="https://atproto-browser-plus-links.vercel.app/at/{{ did.0|urlencode }}">this DID record</a></pre>
31
{% endfor %}
32
33
{% if let Some(c) = cursor %}
···
27
{% for did in linking_dids %}
28
<pre style="display: block; margin: 1em 2em" class="code"><strong>DID</strong>: {{ did.0 }}
29
-> see <a href="/links/all?target={{ did.0|urlencode }}">links to this DID</a>
30
+
-> browse <a href="https://pdsls.dev/at://{{ did.0|urlencode }}">this DID record</a></pre>
31
{% endfor %}
32
33
{% if let Some(c) = cursor %}
+54
constellation/templates/get-backlinks.html.j2
+54
constellation/templates/get-backlinks.html.j2
···
···
1
+
{% extends "base.html.j2" %}
2
+
{% import "try-it-macros.html.j2" as try_it %}
3
+
4
+
{% block title %}Backlinks{% endblock %}
5
+
{% block description %}All {{ query.source }} records with links to {{ query.subject }}{% endblock %}
6
+
7
+
{% block content %}
8
+
9
+
{% call try_it::get_backlinks(query.subject, query.source, query.did, query.limit) %}
10
+
11
+
<h2>
12
+
Links to <code>{{ query.subject }}</code>
13
+
{% if let Some(browseable_uri) = query.subject|to_browseable %}
14
+
<small style="font-weight: normal; font-size: 1rem"><a href="{{ browseable_uri }}">browse record</a></small>
15
+
{% endif %}
16
+
</h2>
17
+
18
+
<p><strong>{{ total|human_number }} links</strong> from <code>{{ query.source }}</code>.</p>
19
+
20
+
<ul>
21
+
<li>See distinct linking DIDs at <code>/links/distinct-dids</code>: <a href="/links/distinct-dids?target={{ query.subject|urlencode }}&collection={{ collection|urlencode }}&path={{ path|urlencode }}">/links/distinct-dids?target={{ query.subject }}&collection={{ collection }}&path={{ path }}</a></li>
22
+
<li>See all links to this target at <code>/links/all</code>: <a href="/links/all?target={{ query.subject|urlencode }}">/links/all?target={{ query.subject }}</a></li>
23
+
</ul>
24
+
25
+
<h3>Links, most recent first:</h3>
26
+
27
+
{% for record in records %}
28
+
<pre style="display: block; margin: 1em 2em" class="code"><strong>DID</strong>: {{ record.did().0 }} (<a href="/links/all?target={{ record.did().0|urlencode }}">DID links</a>)
29
+
<strong>Collection</strong>: {{ record.collection }}
30
+
<strong>RKey</strong>: {{ record.rkey }}
31
+
-> <a href="https://pdsls.dev/at://{{ record.did().0 }}/{{ record.collection }}/{{ record.rkey }}">browse record</a></pre>
32
+
{% endfor %}
33
+
34
+
{% if let Some(c) = cursor %}
35
+
<form method="get" action="/xrpc/blue.microcosm.links.getBacklinks">
36
+
<input type="hidden" name="subject" value="{{ query.subject }}" />
37
+
<input type="hidden" name="source" value="{{ query.source }}" />
38
+
<input type="hidden" name="limit" value="{{ query.limit }}" />
39
+
{% for did in query.did %}
40
+
<input type="hidden" name="did" value="{{ did }}" />
41
+
{% endfor %}
42
+
<input type="hidden" name="cursor" value={{ c|json|safe }} />
43
+
<button type="submit">next page…</button>
44
+
</form>
45
+
{% else %}
46
+
<button disabled><em>end of results</em></button>
47
+
{% endif %}
48
+
49
+
<details>
50
+
<summary>Raw JSON response</summary>
51
+
<pre class="code">{{ self|tojson }}</pre>
52
+
</details>
53
+
54
+
{% endblock %}
+67
constellation/templates/get-many-to-many-counts.html.j2
+67
constellation/templates/get-many-to-many-counts.html.j2
···
···
1
+
{% extends "base.html.j2" %}
2
+
{% import "try-it-macros.html.j2" as try_it %}
3
+
4
+
{% block title %}Many to Many counts{% endblock %}
5
+
{% block description %}Counts of many-to-many {{ query.source }} join records with links to {{ query.subject }} and a secondary target at {{ query.path_to_other }}{% endblock %}
6
+
7
+
{% block content %}
8
+
9
+
{% call try_it::get_many_to_many_counts(
10
+
query.subject,
11
+
query.source,
12
+
query.path_to_other,
13
+
query.did,
14
+
query.other_subject,
15
+
query.limit,
16
+
) %}
17
+
18
+
<h2>
19
+
Many-to-many links to <code>{{ query.subject }}</code> joining through <code>{{ query.path_to_other }}</code>
20
+
{% if let Some(browseable_uri) = query.subject|to_browseable %}
21
+
<small style="font-weight: normal; font-size: 1rem"><a href="{{ browseable_uri }}">browse record</a></small>
22
+
{% endif %}
23
+
</h2>
24
+
25
+
<p><strong>{% if cursor.is_some() || query.cursor.is_some() %}more than {% endif %}{{ counts_by_other_subject.len()|to_u64|human_number }} joins</strong> <code>{{ query.source }}โ{{ query.path_to_other }}</code></p>
26
+
27
+
<ul>
28
+
<li>See direct backlinks at <code>/xrpc/blue.microcosm.links.getBacklinks</code>: <a href="/xrpc/blue.microcosm.links.getBacklinks?subject={{ query.subject|urlencode }}&source={{ query.source|urlencode }}">/xrpc/blue.microcosm.links.getBacklinks?subject={{ query.subject }}&source={{ query.source }}</a></li>
29
+
<li>See all links to this target at <code>/links/all</code>: <a href="/links/all?target={{ query.subject|urlencode }}">/links/all?target={{ query.subject }}</a></li>
30
+
</ul>
31
+
32
+
<h3>Counts by other subject:</h3>
33
+
34
+
{% for counts in counts_by_other_subject %}
35
+
<pre style="display: block; margin: 1em 2em" class="code"><strong>Joined subject</strong>: {{ counts.subject }}
36
+
<strong>Joining records</strong>: {{ counts.total }}
37
+
<strong>Unique joiner ids</strong>: {{ counts.distinct }}
38
+
-> {% if let Some(browseable_uri) = counts.subject|to_browseable -%}
39
+
<a href="{{ browseable_uri }}">browse record</a>
40
+
{%- endif %}</pre>
41
+
{% endfor %}
42
+
43
+
{% if let Some(c) = cursor %}
44
+
<form method="get" action="/xrpc/blue.microcosm.links.getManyToManyCounts">
45
+
<input type="hidden" name="subject" value="{{ query.subject }}" />
46
+
<input type="hidden" name="source" value="{{ query.source }}" />
47
+
<input type="hidden" name="pathToOther" value="{{ query.path_to_other }}" />
48
+
{% for did in query.did %}
49
+
<input type="hidden" name="did" value="{{ did }}" />
50
+
{% endfor %}
51
+
{% for otherSubject in query.other_subject %}
52
+
<input type="hidden" name="otherSubject" value="{{ otherSubject }}" />
53
+
{% endfor %}
54
+
<input type="hidden" name="limit" value="{{ query.limit }}" />
55
+
<input type="hidden" name="cursor" value={{ c|json|safe }} />
56
+
<button type="submit">next page…</button>
57
+
</form>
58
+
{% else %}
59
+
<button disabled><em>end of results</em></button>
60
+
{% endif %}
61
+
62
+
<details>
63
+
<summary>Raw JSON response</summary>
64
+
<pre class="code">{{ self|tojson }}</pre>
65
+
</details>
66
+
67
+
{% endblock %}
+57
-2
constellation/templates/hello.html.j2
+57
-2
constellation/templates/hello.html.j2
···
19
<p>It works by recursively walking <em>all</em> records coming through the firehose, searching for anything that looks like a link. Links are indexed by the target they point at, the collection the record came from, and the JSON path to the link in that record.</p>
20
21
<p>
22
-
This server has indexed <span class="stat">{{ stats.linking_records|human_number }}</span> links between <span class="stat">{{ stats.targetables|human_number }}</span> targets and sources from <span class="stat">{{ stats.dids|human_number }}</span> identities over <span class="stat">{{ days_indexed|human_number }}</span> days.<br/>
23
<small>(indexing new records in real time, backfill coming soon!)</small>
24
</p>
25
26
-
<p>But feel free to use it! If you want to be nice, put your project name and bsky username (or email) in your user-agent header for api requests.</p>
27
28
29
<h2>API Endpoints</h2>
30
31
<h3 class="route"><code>GET /links</code></h3>
32
33
<p>A list of records linking to a target.</p>
34
35
<h4>Query parameters:</h4>
36
···
19
<p>It works by recursively walking <em>all</em> records coming through the firehose, searching for anything that looks like a link. Links are indexed by the target they point at, the collection the record came from, and the JSON path to the link in that record.</p>
20
21
<p>
22
+
This server has indexed <span class="stat">{{ stats.linking_records|human_number }}</span> links between <span class="stat">{{ stats.targetables|human_number }}</span> targets and sources from <span class="stat">{{ stats.dids|human_number }}</span> identities over <span class="stat">
23
+
{%- if let Some(days) = days_indexed %}
24
+
{{ days|human_number }}
25
+
{% else %}
26
+
???
27
+
{% endif -%}
28
+
</span> days.<br/>
29
<small>(indexing new records in real time, backfill coming soon!)</small>
30
</p>
31
32
+
{# {% for k, v in stats.other_data.iter() %}
33
+
<p><strong>{{ k }}</strong>: {{ v }}</p>
34
+
{% endfor %} #}
35
+
36
+
<p>You're welcome to use this public instance! Please do not build the torment nexus. If you want to be nice, put your project name and bsky username (or email) in your user-agent header for api requests.</p>
37
38
39
<h2>API Endpoints</h2>
40
41
+
<h3 class="route"><code>GET /xrpc/blue.microcosm.links.getBacklinks</code></h3>
42
+
43
+
<p>A list of records linking to any record, identity, or uri.</p>
44
+
45
+
<h4>Query parameters:</h4>
46
+
47
+
<ul>
48
+
<li><p><code>subject</code>: required, must url-encode. Example: <code>at://did:plc:vc7f4oafdgxsihk4cry2xpze/app.bsky.feed.post/3lgwdn7vd722r</code></p></li>
49
+
<li><p><code>source</code>: required. Example: <code>app.bsky.feed.like:subject.uri</code></p></li>
50
+
<li><p><code>did</code>: optional, filter links to those from specific users. Include multiple times to filter by multiple users. Example: <code>did=did:plc:vc7f4oafdgxsihk4cry2xpze&did=did:plc:vc7f4oafdgxsihk4cry2xpze</code></p></li>
51
+
<li><p><code>limit</code>: optional. Default: <code>16</code>. Maximum: <code>100</code></p></li>
52
+
</ul>
53
+
54
+
<p style="margin-bottom: 0"><strong>Try it:</strong></p>
55
+
{% call try_it::get_backlinks("at://did:plc:a4pqq234yw7fqbddawjo7y35/app.bsky.feed.post/3m237ilwc372e", "app.bsky.feed.like:subject.uri", [""], 16) %}
56
+
57
+
58
+
<h3 class="route"><code>GET /xrpc/blue.microcosm.links.getManyToManyCounts</code></h3>
59
+
60
+
<p>TODO: description</p>
61
+
62
+
<h4>Query parameters:</h4>
63
+
64
+
<ul>
65
+
<li><p><code>subject</code>: required, must url-encode. Example: <code>at://did:plc:vc7f4oafdgxsihk4cry2xpze/app.bsky.feed.post/3lgwdn7vd722r</code></p></li>
66
+
<li><p><code>source</code>: required. Example: <code>app.bsky.feed.like:subject.uri</code></p></li>
67
+
<li><p><code>pathToOther</code>: required. Path to the secondary link in the many-to-many record. Example: <code>otherThing.uri</code></p></li>
68
+
<li><p><code>did</code>: optional, filter links to those from specific users. Include multiple times to filter by multiple users. Example: <code>did=did:plc:vc7f4oafdgxsihk4cry2xpze&did=did:plc:vc7f4oafdgxsihk4cry2xpze</code></p></li>
69
+
<li><p><code>otherSubject</code>: optional, filter secondary links to specific subjects. Include multiple times to filter by multiple users. Example: <code>at://did:plc:vc7f4oafdgxsihk4cry2xpze/app.bsky.feed.post/3lgwdn7vd722r</code></p></li>
70
+
<li><p><code>limit</code>: optional. Default: <code>16</code>. Maximum: <code>100</code></p></li>
71
+
</ul>
72
+
73
+
<p style="margin-bottom: 0"><strong>Try it:</strong></p>
74
+
{% call try_it::get_many_to_many_counts(
75
+
"at://did:plc:wshs7t2adsemcrrd4snkeqli/sh.tangled.label.definition/good-first-issue",
76
+
"sh.tangled.label.op:add[].key",
77
+
"subject",
78
+
[""],
79
+
[""],
80
+
25,
81
+
) %}
82
+
83
+
84
<h3 class="route"><code>GET /links</code></h3>
85
86
<p>A list of records linking to a target.</p>
87
+
88
+
<p>[DEPRECATED]: use <code>GET /xrpc/blue.microcosm.links.getBacklinks</code>. New apps should avoid it, but this endpoint <strong>will</strong> remain supported for the forseeable future.</p>
89
90
<h4>Query parameters:</h4>
91
+1
-1
constellation/templates/links.html.j2
+1
-1
constellation/templates/links.html.j2
···
28
<pre style="display: block; margin: 1em 2em" class="code"><strong>DID</strong>: {{ record.did().0 }} (<a href="/links/all?target={{ record.did().0|urlencode }}">DID links</a>)
29
<strong>Collection</strong>: {{ record.collection }}
30
<strong>RKey</strong>: {{ record.rkey }}
31
-
-> <a href="https://atproto-browser-plus-links.vercel.app/at/{{ record.did().0|urlencode }}/{{ record.collection }}/{{ record.rkey }}">browse record</a></pre>
32
{% endfor %}
33
34
{% if let Some(c) = cursor %}
···
28
<pre style="display: block; margin: 1em 2em" class="code"><strong>DID</strong>: {{ record.did().0 }} (<a href="/links/all?target={{ record.did().0|urlencode }}">DID links</a>)
29
<strong>Collection</strong>: {{ record.collection }}
30
<strong>RKey</strong>: {{ record.rkey }}
31
+
-> <a href="https://pdsls.dev/at://{{ record.did().0 }}/{{ record.collection }}/{{ record.rkey }}">browse record</a></pre>
32
{% endfor %}
33
34
{% if let Some(c) = cursor %}
+68
-1
constellation/templates/try-it-macros.html.j2
+68
-1
constellation/templates/try-it-macros.html.j2
···
1
+
{% macro get_backlinks(subject, source, dids, limit) %}
2
+
<form method="get" action="/xrpc/blue.microcosm.links.getBacklinks">
3
+
<pre class="code"><strong>GET</strong> /xrpc/blue.microcosm.links.getBacklinks
4
+
?subject= <input type="text" name="subject" value="{{ subject }}" placeholder="at-uri, did, uri..." />
5
+
&source= <input type="text" name="source" value="{{ source }}" placeholder="app.bsky.feed.like:subject.uri" />
6
+
{%- for did in dids %}{% if !did.is_empty() %}
7
+
&did= <input type="text" name="did" value="{{ did }}" placeholder="did:plc:..." />{% endif %}{% endfor %}
8
+
<span id="did-placeholder"></span> <button id="add-did">+ did filter</button>
9
+
&limit= <input type="number" name="limit" value="{{ limit }}" max="100" placeholder="100" /> <button type="submit">get links</button></pre>
10
+
</form>
11
+
<script>
12
+
const addDidButton = document.getElementById('add-did');
13
+
const didPlaceholder = document.getElementById('did-placeholder');
14
+
addDidButton.addEventListener('click', e => {
15
+
e.preventDefault();
16
+
const i = document.createElement('input');
17
+
i.placeholder = 'did:plc:...';
18
+
i.name = "did"
19
+
const p = addDidButton.parentNode;
20
+
p.insertBefore(document.createTextNode('&did= '), didPlaceholder);
21
+
p.insertBefore(i, didPlaceholder);
22
+
p.insertBefore(document.createTextNode('\n '), didPlaceholder);
23
+
});
24
+
</script>
25
+
{% endmacro %}
26
+
27
+
{% macro get_many_to_many_counts(subject, source, pathToOther, dids, otherSubjects, limit) %}
28
+
<form method="get" action="/xrpc/blue.microcosm.links.getManyToManyCounts">
29
+
<pre class="code"><strong>GET</strong> /xrpc/blue.microcosm.links.getManyToManyCounts
30
+
?subject= <input type="text" name="subject" value="{{ subject }}" placeholder="at-uri, did, uri..." />
31
+
&source= <input type="text" name="source" value="{{ source }}" placeholder="app.bsky.feed.like:subject.uri" />
32
+
&pathToOther= <input type="text" name="pathToOther" value="{{ pathToOther }}" placeholder="otherThing.uri" />
33
+
{%- for did in dids %}{% if !did.is_empty() %}
34
+
&did= <input type="text" name="did" value="{{ did }}" placeholder="did:plc:..." />{% endif %}{% endfor %}
35
+
<span id="m2m-subject-placeholder"></span> <button id="m2m-add-subject">+ other subject filter</button>
36
+
{%- for otherSubject in otherSubjects %}{% if !otherSubject.is_empty() %}
37
+
&otherSubject= <input type="text" name="did" value="{{ otherSubject }}" placeholder="at-uri, did, uri..." />{% endif %}{% endfor %}
38
+
<span id="m2m-did-placeholder"></span> <button id="m2m-add-did">+ did filter</button>
39
+
&limit= <input type="number" name="limit" value="{{ limit }}" max="100" placeholder="100" /> <button type="submit">get links</button></pre>
40
+
</form>
41
+
<script>
42
+
const m2mAddDidButton = document.getElementById('m2m-add-did');
43
+
const m2mDidPlaceholder = document.getElementById('m2m-did-placeholder');
44
+
m2mAddDidButton.addEventListener('click', e => {
45
+
e.preventDefault();
46
+
const i = document.createElement('input');
47
+
i.placeholder = 'did:plc:...';
48
+
i.name = "did"
49
+
const p = m2mAddDidButton.parentNode;
50
+
p.insertBefore(document.createTextNode('&did= '), m2mDidPlaceholder);
51
+
p.insertBefore(i, m2mDidPlaceholder);
52
+
p.insertBefore(document.createTextNode('\n '), m2mDidPlaceholder);
53
+
});
54
+
const m2mAddSubjectButton = document.getElementById('m2m-add-subject');
55
+
const m2mSubjectPlaceholder = document.getElementById('m2m-subject-placeholder');
56
+
m2mAddSubjectButton.addEventListener('click', e => {
57
+
e.preventDefault();
58
+
const i = document.createElement('input');
59
+
i.placeholder = 'at-uri, did, uri...';
60
+
i.name = "otherSubject"
61
+
const p = m2mAddSubjectButton.parentNode;
62
+
p.insertBefore(document.createTextNode('&otherSubject= '), m2mSubjectPlaceholder);
63
+
p.insertBefore(i, m2mSubjectPlaceholder);
64
+
p.insertBefore(document.createTextNode('\n '), m2mSubjectPlaceholder);
65
+
});
66
+
</script>
67
+
{% endmacro %}
68
+
69
{% macro links(target, collection, path, dids, limit) %}
70
<form method="get" action="/links">
71
<pre class="code"><strong>GET</strong> /links
···
92
});
93
</script>
94
{% endmacro %}
95
96
{% macro dids(target, collection, path) %}
97
<form method="get" action="/links/distinct-dids">
+2
links/Cargo.toml
+2
links/Cargo.toml
+3
-2
links/src/lib.rs
+3
-2
links/src/lib.rs
···
1
use fluent_uri::Uri;
2
3
pub mod at_uri;
4
pub mod did;
···
6
7
pub use record::collect_links;
8
9
-
#[derive(Debug, Clone, Ord, Eq, PartialOrd, PartialEq)]
10
pub enum Link {
11
AtUri(String),
12
Uri(String),
···
59
}
60
}
61
62
-
#[derive(Debug, PartialEq)]
63
pub struct CollectedLink {
64
pub path: String,
65
pub target: Link,
···
1
use fluent_uri::Uri;
2
+
use serde::{Deserialize, Serialize};
3
4
pub mod at_uri;
5
pub mod did;
···
7
8
pub use record::collect_links;
9
10
+
#[derive(Debug, Clone, Ord, Eq, PartialOrd, PartialEq, Serialize, Deserialize)]
11
pub enum Link {
12
AtUri(String),
13
Uri(String),
···
60
}
61
}
62
63
+
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
64
pub struct CollectedLink {
65
pub path: String,
66
pub target: Link,
+41
links/src/record.rs
+41
links/src/record.rs
···
1
+
use dasl::drisl::Value as DrislValue;
2
use tinyjson::JsonValue;
3
4
use crate::{parse_any_link, CollectedLink};
···
37
}
38
}
39
40
+
pub fn walk_drisl(path: &str, v: &DrislValue, found: &mut Vec<CollectedLink>) {
41
+
match v {
42
+
DrislValue::Map(o) => {
43
+
for (key, child) in o {
44
+
walk_drisl(&format!("{path}.{key}"), child, found)
45
+
}
46
+
}
47
+
DrislValue::Array(a) => {
48
+
for child in a {
49
+
let child_p = match child {
50
+
DrislValue::Map(o) => {
51
+
if let Some(DrislValue::Text(t)) = o.get("$type") {
52
+
format!("{path}[{t}]")
53
+
} else {
54
+
format!("{path}[]")
55
+
}
56
+
}
57
+
_ => format!("{path}[]"),
58
+
};
59
+
walk_drisl(&child_p, child, found)
60
+
}
61
+
}
62
+
DrislValue::Text(s) => {
63
+
if let Some(link) = parse_any_link(s) {
64
+
found.push(CollectedLink {
65
+
path: path.to_string(),
66
+
target: link,
67
+
});
68
+
}
69
+
}
70
+
_ => {}
71
+
}
72
+
}
73
+
74
pub fn collect_links(v: &JsonValue) -> Vec<CollectedLink> {
75
let mut found = vec![];
76
walk_record("", v, &mut found);
77
+
found
78
+
}
79
+
80
+
pub fn collect_links_drisl(v: &DrislValue) -> Vec<CollectedLink> {
81
+
let mut found = vec![];
82
+
walk_drisl("", v, &mut found);
83
found
84
}
85
+8
spacedust/Cargo.toml
+8
spacedust/Cargo.toml
···
4
edition = "2024"
5
6
[dependencies]
7
async-trait = "0.1.88"
8
clap = { version = "4.5.40", features = ["derive"] }
9
ctrlc = "3.4.7"
10
dropshot = "0.16.2"
11
env_logger = "0.11.8"
12
futures = "0.3.31"
13
http = "1.3.1"
14
jetstream = { path = "../jetstream", features = ["metrics"] }
15
links = { path = "../links" }
16
log = "0.4.27"
17
metrics = "0.24.2"
18
metrics-exporter-prometheus = { version = "0.17.1", features = ["http-listener"] }
19
rand = "0.9.1"
20
schemars = "0.8.22"
21
semver = "1.0.26"
22
serde = { version = "1.0.219", features = ["derive"] }
23
serde_json = "1.0.140"
24
serde_qs = "1.0.0-rc.3"
25
thiserror = "2.0.12"
···
4
edition = "2024"
5
6
[dependencies]
7
+
anyhow = "1.0.100"
8
+
async-channel = "2.5.0"
9
async-trait = "0.1.88"
10
clap = { version = "4.5.40", features = ["derive"] }
11
ctrlc = "3.4.7"
12
+
dasl = "0.2.0"
13
dropshot = "0.16.2"
14
env_logger = "0.11.8"
15
+
fjall = "3.0.0-pre.0"
16
futures = "0.3.31"
17
http = "1.3.1"
18
+
ipld-core = { version = "0.4.2", features = ["serde"] }
19
jetstream = { path = "../jetstream", features = ["metrics"] }
20
links = { path = "../links" }
21
log = "0.4.27"
22
metrics = "0.24.2"
23
metrics-exporter-prometheus = { version = "0.17.1", features = ["http-listener"] }
24
rand = "0.9.1"
25
+
repo-stream = "0.2.2"
26
+
reqwest = { version = "0.12.24", features = ["json", "stream"] }
27
schemars = "0.8.22"
28
semver = "1.0.26"
29
serde = { version = "1.0.219", features = ["derive"] }
30
+
serde_ipld_dagcbor = "0.6.4"
31
serde_json = "1.0.140"
32
serde_qs = "1.0.0-rc.3"
33
thiserror = "2.0.12"
+21
spacedust/src/bin/import_car_file.rs
+21
spacedust/src/bin/import_car_file.rs
···
···
1
+
use clap::Parser;
2
+
use std::path::PathBuf;
3
+
4
+
type Result<T> = std::result::Result<T, Box<dyn std::error::Error>>;
5
+
6
+
#[derive(Debug, Parser)]
7
+
struct Args {
8
+
#[arg()]
9
+
file: PathBuf,
10
+
}
11
+
12
+
#[tokio::main]
13
+
async fn main() -> Result<()> {
14
+
env_logger::init();
15
+
16
+
let Args { file } = Args::parse();
17
+
18
+
let _reader = tokio::fs::File::open(file).await?;
19
+
20
+
Ok(())
21
+
}
+258
spacedust/src/bin/import_scraped.rs
+258
spacedust/src/bin/import_scraped.rs
···
···
1
+
use clap::Parser;
2
+
use links::CollectedLink;
3
+
use repo_stream::{
4
+
DiskBuilder, DiskStore, Driver, DriverBuilder, Processable, drive::DriverBuilderWithProcessor,
5
+
drive::NeedDisk,
6
+
};
7
+
use std::path::PathBuf;
8
+
use std::sync::{
9
+
Arc,
10
+
atomic::{AtomicUsize, Ordering},
11
+
};
12
+
use tokio::{io::AsyncRead, task::JoinSet};
13
+
14
+
type Result<T> = anyhow::Result<T>; //std::result::Result<T, Box<dyn std::error::Error>>;
15
+
16
+
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
17
+
struct CollectedProcessed(CollectedLink);
18
+
19
+
impl Processable for CollectedProcessed {
20
+
fn get_size(&self) -> usize {
21
+
self.0.path.capacity() + self.0.target.as_str().len()
22
+
}
23
+
}
24
+
25
+
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
26
+
struct ErrString(String);
27
+
28
+
impl Processable for ErrString {
29
+
fn get_size(&self) -> usize {
30
+
self.0.capacity()
31
+
}
32
+
}
33
+
34
+
type Processed = std::result::Result<Vec<CollectedProcessed>, ErrString>;
35
+
36
+
/// hacky for now: put errors in strings ๐คทโโ๏ธ
37
+
fn process(block: Vec<u8>) -> Processed {
38
+
let value: dasl::drisl::Value = dasl::drisl::from_slice(&block)
39
+
.map_err(|e| ErrString(format!("failed to parse block with drisl: {e:?}")))?;
40
+
let links = links::record::collect_links_drisl(&value)
41
+
.into_iter()
42
+
.map(CollectedProcessed)
43
+
.collect();
44
+
Ok(links)
45
+
}
46
+
47
+
#[derive(Debug, Parser)]
48
+
struct Args {
49
+
#[arg(long)]
50
+
cars_folder: PathBuf,
51
+
#[arg(long)]
52
+
mem_workers: usize,
53
+
#[arg(long)]
54
+
disk_workers: usize,
55
+
#[arg(long)]
56
+
disk_folder: PathBuf,
57
+
}
58
+
59
+
async fn get_cars(
60
+
cars_folder: PathBuf,
61
+
tx: async_channel::Sender<tokio::io::BufReader<tokio::fs::File>>,
62
+
) -> Result<()> {
63
+
let mut dir = tokio::fs::read_dir(cars_folder).await?;
64
+
while let Some(entry) = dir.next_entry().await? {
65
+
if !entry.file_type().await?.is_file() {
66
+
continue;
67
+
}
68
+
let reader = tokio::fs::File::open(&entry.path()).await?;
69
+
let reader = tokio::io::BufReader::new(reader);
70
+
tx.send(reader).await?;
71
+
}
72
+
Ok(())
73
+
}
74
+
75
+
async fn drive_mem<R: AsyncRead + Unpin + Send + Sync + 'static>(
76
+
f: R,
77
+
builder: &DriverBuilderWithProcessor<Processed>,
78
+
disk_tx: &async_channel::Sender<NeedDisk<R, Processed>>,
79
+
) -> Result<Option<(usize, usize)>> {
80
+
let mut n = 0;
81
+
let mut n_records = 0;
82
+
match builder.load_car(f).await? {
83
+
Driver::Memory(_commit, mut driver) => {
84
+
while let Some(chunk) = driver.next_chunk(512).await? {
85
+
n_records += chunk.len();
86
+
for (_key, links) in chunk {
87
+
match links {
88
+
Ok(links) => n += links.len(),
89
+
Err(e) => eprintln!("wat: {e:?}"),
90
+
}
91
+
}
92
+
}
93
+
Ok(Some((n, n_records)))
94
+
}
95
+
Driver::Disk(need_disk) => {
96
+
disk_tx.send(need_disk).await?;
97
+
Ok(None)
98
+
}
99
+
}
100
+
}
101
+
102
+
async fn mem_worker<R: AsyncRead + Unpin + Send + Sync + 'static>(
103
+
car_rx: async_channel::Receiver<R>,
104
+
disk_tx: async_channel::Sender<NeedDisk<R, Processed>>,
105
+
n: Arc<AtomicUsize>,
106
+
n_records: Arc<AtomicUsize>,
107
+
) -> Result<()> {
108
+
let builder = DriverBuilder::new()
109
+
.with_block_processor(process) // don't care just counting records
110
+
.with_mem_limit_mb(128);
111
+
while let Ok(f) = car_rx.recv().await {
112
+
let driven = match drive_mem(f, &builder, &disk_tx).await {
113
+
Ok(d) => d,
114
+
Err(e) => {
115
+
eprintln!("failed to drive mem: {e:?}. skipping...");
116
+
continue;
117
+
}
118
+
};
119
+
if let Some((drove, recs)) = driven {
120
+
n.fetch_add(drove, Ordering::Relaxed);
121
+
n_records.fetch_add(recs, Ordering::Relaxed);
122
+
}
123
+
}
124
+
Ok(())
125
+
}
126
+
127
+
async fn drive_disk<R: AsyncRead + Unpin>(
128
+
needed: NeedDisk<R, Processed>,
129
+
store: DiskStore,
130
+
) -> Result<(usize, usize, DiskStore)> {
131
+
let (_commit, mut driver) = needed.finish_loading(store).await?;
132
+
let mut n = 0;
133
+
let mut n_records = 0;
134
+
while let Some(chunk) = driver.next_chunk(512).await? {
135
+
n_records += chunk.len();
136
+
for (_key, links) in chunk {
137
+
match links {
138
+
Ok(links) => n += links.len(),
139
+
Err(e) => eprintln!("wat: {e:?}"),
140
+
}
141
+
}
142
+
}
143
+
let store = driver.reset_store().await?;
144
+
Ok((n, n_records, store))
145
+
}
146
+
147
+
async fn disk_worker<R: AsyncRead + Unpin>(
148
+
worker_id: usize,
149
+
disk_rx: async_channel::Receiver<NeedDisk<R, Processed>>,
150
+
folder: PathBuf,
151
+
n: Arc<AtomicUsize>,
152
+
n_records: Arc<AtomicUsize>,
153
+
disk_workers_active: Arc<AtomicUsize>,
154
+
) -> Result<()> {
155
+
let mut file = folder;
156
+
file.push(format!("disk-worker-{worker_id}.sqlite"));
157
+
let builder = DiskBuilder::new().with_cache_size_mb(128);
158
+
let mut store = builder.open(file.clone()).await?;
159
+
while let Ok(needed) = disk_rx.recv().await {
160
+
let active = disk_workers_active.fetch_add(1, Ordering::AcqRel);
161
+
println!("-> disk workers active: {}", active + 1);
162
+
let (drove, records) = match drive_disk(needed, store).await {
163
+
Ok((d, r, s)) => {
164
+
store = s;
165
+
(d, r)
166
+
}
167
+
Err(e) => {
168
+
eprintln!("failed to drive disk: {e:?}. skipping...");
169
+
store = builder.open(file.clone()).await?;
170
+
continue;
171
+
}
172
+
};
173
+
n.fetch_add(drove, Ordering::Relaxed);
174
+
n_records.fetch_add(records, Ordering::Relaxed);
175
+
let were_active = disk_workers_active.fetch_sub(1, Ordering::AcqRel);
176
+
println!("<- disk workers active: {}", were_active - 1);
177
+
}
178
+
Ok(())
179
+
}
180
+
181
+
#[tokio::main]
182
+
async fn main() -> Result<()> {
183
+
env_logger::init();
184
+
185
+
let Args {
186
+
cars_folder,
187
+
disk_folder,
188
+
disk_workers,
189
+
mem_workers,
190
+
} = Args::parse();
191
+
192
+
let mut set = JoinSet::<Result<()>>::new();
193
+
194
+
let (cars_tx, cars_rx) = async_channel::bounded(2);
195
+
set.spawn(get_cars(cars_folder, cars_tx));
196
+
197
+
let n: Arc<AtomicUsize> = Arc::new(0.into());
198
+
let n_records: Arc<AtomicUsize> = Arc::new(0.into());
199
+
let disk_workers_active: Arc<AtomicUsize> = Arc::new(0.into());
200
+
201
+
set.spawn({
202
+
let n = n.clone();
203
+
let n_records = n_records.clone();
204
+
let mut interval = tokio::time::interval(std::time::Duration::from_secs(10));
205
+
async move {
206
+
let mut last_n = n.load(Ordering::Relaxed);
207
+
let mut last_n_records = n.load(Ordering::Relaxed);
208
+
loop {
209
+
interval.tick().await;
210
+
let n = n.load(Ordering::Relaxed);
211
+
let n_records = n_records.load(Ordering::Relaxed);
212
+
let diff_n = n - last_n;
213
+
let diff_records = n_records - last_n_records;
214
+
println!("rate: {} rec/sec; {} n/sec", diff_records / 10, diff_n / 10);
215
+
if n_records > 0 && diff_records == 0 {
216
+
println!("zero encountered, stopping rate calculation polling.");
217
+
break Ok(());
218
+
}
219
+
last_n = n;
220
+
last_n_records = n_records;
221
+
}
222
+
}
223
+
});
224
+
225
+
let (needs_disk_tx, needs_disk_rx) = async_channel::bounded(disk_workers);
226
+
227
+
for _ in 0..mem_workers {
228
+
set.spawn(mem_worker(
229
+
cars_rx.clone(),
230
+
needs_disk_tx.clone(),
231
+
n.clone(),
232
+
n_records.clone(),
233
+
));
234
+
}
235
+
drop(cars_rx);
236
+
drop(needs_disk_tx);
237
+
238
+
tokio::fs::create_dir_all(disk_folder.clone()).await?;
239
+
for id in 0..disk_workers {
240
+
set.spawn(disk_worker(
241
+
id,
242
+
needs_disk_rx.clone(),
243
+
disk_folder.clone(),
244
+
n.clone(),
245
+
n_records.clone(),
246
+
disk_workers_active.clone(),
247
+
));
248
+
}
249
+
drop(needs_disk_rx);
250
+
251
+
while let Some(res) = set.join_next().await {
252
+
println!("task from set joined: {res:?}");
253
+
}
254
+
255
+
eprintln!("total records processed: {n_records:?}; total n: {n:?}");
256
+
257
+
Ok(())
258
+
}
+137
spacedust/src/bin/scrape_pds.rs
+137
spacedust/src/bin/scrape_pds.rs
···
···
1
+
use clap::Parser;
2
+
use reqwest::Url;
3
+
use serde::Deserialize;
4
+
use std::path::PathBuf;
5
+
use tokio::io::AsyncWriteExt;
6
+
use tokio::{sync::mpsc, time};
7
+
8
+
type Result<T> = std::result::Result<T, Box<dyn std::error::Error>>;
9
+
10
+
use futures::StreamExt;
11
+
12
+
#[derive(Debug, Parser)]
13
+
struct Args {
14
+
#[arg(long)]
15
+
pds: Url,
16
+
#[arg(long)]
17
+
throttle_ms: u64, // 100ms per pds?
18
+
#[arg(long)]
19
+
folder: PathBuf,
20
+
}
21
+
22
+
async fn download_repo(
23
+
client: &reqwest::Client,
24
+
mut pds: Url,
25
+
did: String,
26
+
mut path: PathBuf,
27
+
) -> Result<()> {
28
+
path.push(format!("{did}.car"));
29
+
let f = tokio::fs::File::create(path).await?;
30
+
let mut w = tokio::io::BufWriter::new(f);
31
+
32
+
pds.set_path("/xrpc/com.atproto.sync.getRepo");
33
+
pds.set_query(Some(&format!("did={did}")));
34
+
let mut byte_stream = client.get(pds).send().await?.bytes_stream();
35
+
36
+
while let Some(stuff) = byte_stream.next().await {
37
+
tokio::io::copy(&mut stuff?.as_ref(), &mut w).await?;
38
+
}
39
+
w.flush().await?;
40
+
41
+
Ok(())
42
+
}
43
+
44
+
#[derive(Debug, Deserialize)]
45
+
struct RepoInfo {
46
+
did: String,
47
+
active: bool,
48
+
}
49
+
50
+
#[derive(Debug, Deserialize)]
51
+
struct ListReposResponse {
52
+
cursor: Option<String>,
53
+
repos: Vec<RepoInfo>,
54
+
}
55
+
56
+
fn get_pds_dids(client: reqwest::Client, mut pds: Url) -> mpsc::Receiver<String> {
57
+
let (tx, rx) = mpsc::channel(2);
58
+
tokio::task::spawn(async move {
59
+
pds.set_path("/xrpc/com.atproto.sync.listRepos");
60
+
let mut cursor = None;
61
+
62
+
loop {
63
+
if let Some(c) = cursor {
64
+
pds.set_query(Some(&format!("cursor={c}")));
65
+
}
66
+
let res: ListReposResponse = client
67
+
.get(pds.clone())
68
+
.send()
69
+
.await
70
+
.expect("to send request")
71
+
.error_for_status()
72
+
.expect("to be ok")
73
+
.json()
74
+
.await
75
+
.expect("json response");
76
+
for repo in res.repos {
77
+
if repo.active {
78
+
tx.send(repo.did)
79
+
.await
80
+
.expect("to be able to send on the channel");
81
+
}
82
+
}
83
+
cursor = res.cursor;
84
+
if cursor.is_none() {
85
+
break;
86
+
}
87
+
}
88
+
});
89
+
rx
90
+
}
91
+
92
+
#[tokio::main]
93
+
async fn main() -> Result<()> {
94
+
env_logger::init();
95
+
96
+
let Args {
97
+
pds,
98
+
throttle_ms,
99
+
folder,
100
+
} = Args::parse();
101
+
102
+
tokio::fs::create_dir_all(folder.clone()).await?;
103
+
104
+
let client = reqwest::Client::builder()
105
+
.user_agent("microcosm/spacedust-testing")
106
+
.build()?;
107
+
108
+
let mut dids = get_pds_dids(client.clone(), pds.clone());
109
+
110
+
let mut interval = time::interval(time::Duration::from_millis(throttle_ms));
111
+
let mut oks = 0;
112
+
let mut single_fails = 0;
113
+
let mut double_fails = 0;
114
+
115
+
while let Some(did) = dids.recv().await {
116
+
interval.tick().await;
117
+
println!("did: {did:?}");
118
+
if let Err(e) = download_repo(&client, pds.clone(), did.clone(), folder.clone()).await {
119
+
single_fails += 1;
120
+
eprintln!("failed to download repo for did: {did:?}: {e:?}. retrying in a moment...");
121
+
tokio::time::sleep(time::Duration::from_secs(3)).await;
122
+
interval.reset();
123
+
if let Err(e) = download_repo(&client, pds.clone(), did.clone(), folder.clone()).await {
124
+
double_fails += 1;
125
+
eprintln!("failed again: {e:?}. moving on in a moment...");
126
+
tokio::time::sleep(time::Duration::from_secs(1)).await;
127
+
continue;
128
+
}
129
+
}
130
+
oks += 1;
131
+
println!(" -> done. did: {did:?}");
132
+
}
133
+
134
+
eprintln!("got {oks} repos. single fails: {single_fails}; doubles: {double_fails}.");
135
+
136
+
Ok(())
137
+
}
+1
spacedust/src/lib.rs
+1
spacedust/src/lib.rs
spacedust/src/storage/car/drive.rs
spacedust/src/storage/car/drive.rs
This is a binary file and will not be displayed.
+1
spacedust/src/storage/car/mod.rs
+1
spacedust/src/storage/car/mod.rs
···
···
1
+
spacedust/src/storage/car/walk.rs
spacedust/src/storage/car/walk.rs
This is a binary file and will not be displayed.
+9
spacedust/src/storage/fjall/mod.rs
+9
spacedust/src/storage/fjall/mod.rs