+293
-66
Cargo.lock
+293
-66
Cargo.lock
···
112
112
113
113
[[package]]
114
114
name = "anyhow"
115
-
version = "1.0.97"
115
+
version = "1.0.100"
116
116
source = "registry+https://github.com/rust-lang/crates.io-index"
117
-
checksum = "dcfed56ad506cb2c684a14971b8861fdc3baaaae314b9e5f9bb532cbe3ba7a4f"
117
+
checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61"
118
118
119
119
[[package]]
120
120
name = "arbitrary"
···
127
127
version = "1.7.1"
128
128
source = "registry+https://github.com/rust-lang/crates.io-index"
129
129
checksum = "69f7f8c3906b62b754cd5326047894316021dcfe5a194c8ea52bdd94934a3457"
130
+
131
+
[[package]]
132
+
name = "arrayref"
133
+
version = "0.3.9"
134
+
source = "registry+https://github.com/rust-lang/crates.io-index"
135
+
checksum = "76a2e8124351fda1ef8aaaa3bbd7ebbcb486bbcd4225aca0aa0d84bb2db8fecb"
130
136
131
137
[[package]]
132
138
name = "arrayvec"
···
192
198
"nom",
193
199
"num-traits",
194
200
"rusticata-macros",
195
-
"thiserror 2.0.16",
201
+
"thiserror 2.0.17",
196
202
"time",
197
203
]
198
204
···
644
650
"axum",
645
651
"handlebars",
646
652
"serde",
647
-
"thiserror 2.0.16",
653
+
"thiserror 2.0.17",
648
654
]
649
655
650
656
[[package]]
···
673
679
version = "0.2.0"
674
680
source = "registry+https://github.com/rust-lang/crates.io-index"
675
681
checksum = "4c7f02d4ea65f2c1853089ffd8d2787bdbc63de2f0d29dedbcf8ccdfa0ccd4cf"
682
+
683
+
[[package]]
684
+
name = "base256emoji"
685
+
version = "1.0.2"
686
+
source = "registry+https://github.com/rust-lang/crates.io-index"
687
+
checksum = "b5e9430d9a245a77c92176e649af6e275f20839a48389859d1661e9a128d077c"
688
+
dependencies = [
689
+
"const-str",
690
+
"match-lookup",
691
+
]
676
692
677
693
[[package]]
678
694
name = "base64"
···
812
828
checksum = "5c8214115b7bf84099f1309324e63141d4c5d7cc26862f97a0a857dbefe165bd"
813
829
814
830
[[package]]
831
+
name = "blake3"
832
+
version = "1.8.2"
833
+
source = "registry+https://github.com/rust-lang/crates.io-index"
834
+
checksum = "3888aaa89e4b2a40fca9848e400f6a658a5a3978de7be858e209cafa8be9a4a0"
835
+
dependencies = [
836
+
"arrayref",
837
+
"arrayvec",
838
+
"cc",
839
+
"cfg-if",
840
+
"constant_time_eq",
841
+
]
842
+
843
+
[[package]]
815
844
name = "block-buffer"
816
845
version = "0.10.4"
817
846
source = "registry+https://github.com/rust-lang/crates.io-index"
···
839
868
checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
840
869
841
870
[[package]]
871
+
name = "byteorder-lite"
872
+
version = "0.1.0"
873
+
source = "registry+https://github.com/rust-lang/crates.io-index"
874
+
checksum = "8f1fe948ff07f4bd06c30984e69f5b4899c516a3ef74f34df92a2df2ab535495"
875
+
876
+
[[package]]
842
877
name = "bytes"
843
878
version = "1.10.1"
844
879
source = "registry+https://github.com/rust-lang/crates.io-index"
···
851
886
checksum = "6236364b88b9b6d0bc181ba374cf1ab55ba3ef97a1cb6f8cddad48a273767fb5"
852
887
853
888
[[package]]
889
+
name = "byteview"
890
+
version = "0.8.0"
891
+
source = "registry+https://github.com/rust-lang/crates.io-index"
892
+
checksum = "1e6b0e42e210b794e14b152c6fe1a55831e30ef4a0f5dc39d73d714fb5f1906c"
893
+
894
+
[[package]]
854
895
name = "bzip2-sys"
855
896
version = "0.1.13+1.0.8"
856
897
source = "registry+https://github.com/rust-lang/crates.io-index"
···
892
933
]
893
934
894
935
[[package]]
936
+
name = "cbor4ii"
937
+
version = "0.2.14"
938
+
source = "registry+https://github.com/rust-lang/crates.io-index"
939
+
checksum = "b544cf8c89359205f4f990d0e6f3828db42df85b5dac95d09157a250eb0749c4"
940
+
dependencies = [
941
+
"serde",
942
+
]
943
+
944
+
[[package]]
945
+
name = "cbor4ii"
946
+
version = "1.2.0"
947
+
source = "registry+https://github.com/rust-lang/crates.io-index"
948
+
checksum = "b28d2802395e3bccd95cc4ae984bff7444b6c1f5981da46a41360c42a2c7e2d9"
949
+
950
+
[[package]]
895
951
name = "cc"
896
952
version = "1.2.18"
897
953
source = "registry+https://github.com/rust-lang/crates.io-index"
···
976
1032
"multihash",
977
1033
"serde",
978
1034
"serde_bytes",
979
-
"unsigned-varint",
1035
+
"unsigned-varint 0.8.0",
980
1036
]
981
1037
982
1038
[[package]]
···
1087
1143
checksum = "c2459377285ad874054d797f3ccebf984978aa39129f6eafde5cdc8315b612f8"
1088
1144
1089
1145
[[package]]
1146
+
name = "const-str"
1147
+
version = "0.4.3"
1148
+
source = "registry+https://github.com/rust-lang/crates.io-index"
1149
+
checksum = "2f421161cb492475f1661ddc9815a745a1c894592070661180fdec3d4872e9c3"
1150
+
1151
+
[[package]]
1152
+
name = "constant_time_eq"
1153
+
version = "0.3.1"
1154
+
source = "registry+https://github.com/rust-lang/crates.io-index"
1155
+
checksum = "7c74b8349d32d297c9134b8c88677813a227df8f779daa29bfc29c183fe3dca6"
1156
+
1157
+
[[package]]
1090
1158
name = "constellation"
1091
1159
version = "0.1.0"
1092
1160
dependencies = [
···
1353
1421
]
1354
1422
1355
1423
[[package]]
1424
+
name = "dasl"
1425
+
version = "0.2.0"
1426
+
source = "registry+https://github.com/rust-lang/crates.io-index"
1427
+
checksum = "b59666035a4386b0fd272bd78da4cbc3ccb558941e97579ab00f0eb4639f2a49"
1428
+
dependencies = [
1429
+
"blake3",
1430
+
"cbor4ii 1.2.0",
1431
+
"data-encoding",
1432
+
"data-encoding-macro",
1433
+
"scopeguard",
1434
+
"serde",
1435
+
"serde_bytes",
1436
+
"sha2",
1437
+
"thiserror 2.0.17",
1438
+
]
1439
+
1440
+
[[package]]
1356
1441
name = "data-encoding"
1357
-
version = "2.8.0"
1442
+
version = "2.9.0"
1358
1443
source = "registry+https://github.com/rust-lang/crates.io-index"
1359
-
checksum = "575f75dfd25738df5b91b8e43e14d44bda14637a58fae779fd2b064f8bf3e010"
1444
+
checksum = "2a2330da5de22e8a3cb63252ce2abb30116bf5265e89c0e01bc17015ce30a476"
1360
1445
1361
1446
[[package]]
1362
1447
name = "data-encoding-macro"
1363
-
version = "0.1.17"
1448
+
version = "0.1.18"
1364
1449
source = "registry+https://github.com/rust-lang/crates.io-index"
1365
-
checksum = "9f9724adfcf41f45bf652b3995837669d73c4d49a1b5ac1ff82905ac7d9b5558"
1450
+
checksum = "47ce6c96ea0102f01122a185683611bd5ac8d99e62bc59dd12e6bda344ee673d"
1366
1451
dependencies = [
1367
1452
"data-encoding",
1368
1453
"data-encoding-macro-internal",
···
1370
1455
1371
1456
[[package]]
1372
1457
name = "data-encoding-macro-internal"
1373
-
version = "0.1.15"
1458
+
version = "0.1.16"
1374
1459
source = "registry+https://github.com/rust-lang/crates.io-index"
1375
-
checksum = "18e4fdb82bd54a12e42fb58a800dcae6b9e13982238ce2296dc3570b92148e1f"
1460
+
checksum = "8d162beedaa69905488a8da94f5ac3edb4dd4788b732fadb7bd120b2625c1976"
1376
1461
dependencies = [
1377
1462
"data-encoding",
1378
1463
"syn 2.0.106",
···
1579
1664
"slog-bunyan",
1580
1665
"slog-json",
1581
1666
"slog-term",
1582
-
"thiserror 2.0.16",
1667
+
"thiserror 2.0.17",
1583
1668
"tokio",
1584
1669
"tokio-rustls 0.25.0",
1585
1670
"toml 0.9.7",
···
1783
1868
checksum = "0b25ad44cd4360a0448a9b5a0a6f1c7a621101cca4578706d43c9a821418aebc"
1784
1869
dependencies = [
1785
1870
"byteorder",
1786
-
"byteview",
1871
+
"byteview 0.6.1",
1787
1872
"dashmap",
1788
1873
"log",
1789
-
"lsm-tree",
1874
+
"lsm-tree 2.10.4",
1790
1875
"path-absolutize",
1791
1876
"std-semaphore",
1792
1877
"tempfile",
···
1799
1884
source = "git+https://github.com/fjall-rs/fjall.git#42d811f7c8cc9004407d520d37d2a1d8d246c03d"
1800
1885
dependencies = [
1801
1886
"byteorder",
1802
-
"byteview",
1887
+
"byteview 0.6.1",
1803
1888
"dashmap",
1804
1889
"log",
1805
-
"lsm-tree",
1890
+
"lsm-tree 2.10.4",
1806
1891
"path-absolutize",
1892
+
"std-semaphore",
1893
+
"tempfile",
1894
+
"xxhash-rust",
1895
+
]
1896
+
1897
+
[[package]]
1898
+
name = "fjall"
1899
+
version = "3.0.0-pre.0"
1900
+
source = "registry+https://github.com/rust-lang/crates.io-index"
1901
+
checksum = "467588c1f15d1cfa9e43f02a45cf55d82fa1f12a6ae961b848c520458525600c"
1902
+
dependencies = [
1903
+
"byteorder-lite",
1904
+
"byteview 0.8.0",
1905
+
"dashmap",
1906
+
"log",
1907
+
"lsm-tree 3.0.0-pre.0",
1807
1908
"std-semaphore",
1808
1909
"tempfile",
1809
1910
"xxhash-rust",
···
1891
1992
"mixtrics",
1892
1993
"pin-project",
1893
1994
"serde",
1894
-
"thiserror 2.0.16",
1995
+
"thiserror 2.0.17",
1895
1996
"tokio",
1896
1997
"tracing",
1897
1998
]
···
1911
2012
"parking_lot",
1912
2013
"pin-project",
1913
2014
"serde",
1914
-
"thiserror 2.0.16",
2015
+
"thiserror 2.0.17",
1915
2016
"tokio",
1916
2017
"twox-hash",
1917
2018
]
···
1944
2045
"parking_lot",
1945
2046
"pin-project",
1946
2047
"serde",
1947
-
"thiserror 2.0.16",
2048
+
"thiserror 2.0.17",
1948
2049
"tokio",
1949
2050
"tracing",
1950
2051
]
···
1976
2077
"pin-project",
1977
2078
"rand 0.9.1",
1978
2079
"serde",
1979
-
"thiserror 2.0.16",
2080
+
"thiserror 2.0.17",
1980
2081
"tokio",
1981
2082
"tracing",
1982
2083
"twox-hash",
···
2220
2321
"pest_derive",
2221
2322
"serde",
2222
2323
"serde_json",
2223
-
"thiserror 2.0.16",
2324
+
"thiserror 2.0.17",
2224
2325
"walkdir",
2225
2326
]
2226
2327
···
2345
2446
"once_cell",
2346
2447
"rand 0.9.1",
2347
2448
"ring",
2348
-
"thiserror 2.0.16",
2449
+
"thiserror 2.0.17",
2349
2450
"tinyvec",
2350
2451
"tokio",
2351
2452
"tracing",
···
2368
2469
"rand 0.9.1",
2369
2470
"resolv-conf",
2370
2471
"smallvec",
2371
-
"thiserror 2.0.16",
2472
+
"thiserror 2.0.17",
2372
2473
"tokio",
2373
2474
"tracing",
2374
2475
]
···
2800
2901
]
2801
2902
2802
2903
[[package]]
2904
+
name = "iroh-car"
2905
+
version = "0.5.1"
2906
+
source = "registry+https://github.com/rust-lang/crates.io-index"
2907
+
checksum = "cb7f8cd4cb9aa083fba8b52e921764252d0b4dcb1cd6d120b809dbfe1106e81a"
2908
+
dependencies = [
2909
+
"anyhow",
2910
+
"cid",
2911
+
"futures",
2912
+
"serde",
2913
+
"serde_ipld_dagcbor",
2914
+
"thiserror 1.0.69",
2915
+
"tokio",
2916
+
"unsigned-varint 0.7.2",
2917
+
]
2918
+
2919
+
[[package]]
2803
2920
name = "is-terminal"
2804
2921
version = "0.4.16"
2805
2922
source = "registry+https://github.com/rust-lang/crates.io-index"
···
2863
2980
"metrics",
2864
2981
"serde",
2865
2982
"serde_json",
2866
-
"thiserror 2.0.16",
2983
+
"thiserror 2.0.17",
2867
2984
"tokio",
2868
2985
"tokio-tungstenite 0.26.2",
2869
2986
"url",
···
3116
3233
version = "0.1.0"
3117
3234
dependencies = [
3118
3235
"anyhow",
3236
+
"dasl",
3119
3237
"fluent-uri",
3120
3238
"nom",
3121
-
"thiserror 2.0.16",
3239
+
"serde",
3240
+
"thiserror 2.0.17",
3122
3241
"tinyjson",
3123
3242
]
3124
3243
···
3186
3305
3187
3306
[[package]]
3188
3307
name = "lsm-tree"
3189
-
version = "2.10.2"
3308
+
version = "2.10.4"
3190
3309
source = "registry+https://github.com/rust-lang/crates.io-index"
3191
-
checksum = "55b6d7475a8dd22e749186968daacf8e2a77932b061b1bd263157987bbfc0c6c"
3310
+
checksum = "799399117a2bfb37660e08be33f470958babb98386b04185288d829df362ea15"
3192
3311
dependencies = [
3193
3312
"byteorder",
3194
3313
"crossbeam-skiplist",
···
3209
3328
]
3210
3329
3211
3330
[[package]]
3331
+
name = "lsm-tree"
3332
+
version = "3.0.0-pre.0"
3333
+
source = "registry+https://github.com/rust-lang/crates.io-index"
3334
+
checksum = "be375d45e348328e78582dffbda4f1709dd52fca27c1a81c7bf6ca134e6335f7"
3335
+
dependencies = [
3336
+
"byteorder-lite",
3337
+
"byteview 0.8.0",
3338
+
"crossbeam-skiplist",
3339
+
"enum_dispatch",
3340
+
"interval-heap",
3341
+
"log",
3342
+
"lz4_flex",
3343
+
"quick_cache",
3344
+
"rustc-hash 2.1.1",
3345
+
"self_cell",
3346
+
"sfa",
3347
+
"tempfile",
3348
+
"varint-rs",
3349
+
"xxhash-rust",
3350
+
]
3351
+
3352
+
[[package]]
3212
3353
name = "lz4"
3213
3354
version = "1.28.1"
3214
3355
source = "registry+https://github.com/rust-lang/crates.io-index"
···
3229
3370
3230
3371
[[package]]
3231
3372
name = "lz4_flex"
3232
-
version = "0.11.3"
3373
+
version = "0.11.5"
3233
3374
source = "registry+https://github.com/rust-lang/crates.io-index"
3234
-
checksum = "75761162ae2b0e580d7e7c390558127e5f01b4194debd6221fd8c207fc80e3f5"
3375
+
checksum = "08ab2867e3eeeca90e844d1940eab391c9dc5228783db2ed999acbc0a9ed375a"
3235
3376
3236
3377
[[package]]
3237
3378
name = "mach2"
···
3297
3438
]
3298
3439
3299
3440
[[package]]
3441
+
name = "match-lookup"
3442
+
version = "0.1.1"
3443
+
source = "registry+https://github.com/rust-lang/crates.io-index"
3444
+
checksum = "1265724d8cb29dbbc2b0f06fffb8bf1a8c0cf73a78eede9ba73a4a66c52a981e"
3445
+
dependencies = [
3446
+
"proc-macro2",
3447
+
"quote",
3448
+
"syn 1.0.109",
3449
+
]
3450
+
3451
+
[[package]]
3300
3452
name = "match_cfg"
3301
3453
version = "0.1.0"
3302
3454
source = "registry+https://github.com/rust-lang/crates.io-index"
···
3384
3536
"metrics",
3385
3537
"metrics-util 0.20.0",
3386
3538
"quanta",
3387
-
"thiserror 2.0.16",
3539
+
"thiserror 2.0.17",
3388
3540
"tokio",
3389
3541
"tracing",
3390
3542
]
···
3531
3683
3532
3684
[[package]]
3533
3685
name = "multibase"
3534
-
version = "0.9.1"
3686
+
version = "0.9.2"
3535
3687
source = "registry+https://github.com/rust-lang/crates.io-index"
3536
-
checksum = "9b3539ec3c1f04ac9748a260728e855f261b4977f5c3406612c884564f329404"
3688
+
checksum = "8694bb4835f452b0e3bb06dbebb1d6fc5385b6ca1caf2e55fd165c042390ec77"
3537
3689
dependencies = [
3538
3690
"base-x",
3691
+
"base256emoji",
3539
3692
"data-encoding",
3540
3693
"data-encoding-macro",
3541
3694
]
···
3548
3701
dependencies = [
3549
3702
"core2",
3550
3703
"serde",
3551
-
"unsigned-varint",
3704
+
"unsigned-varint 0.8.0",
3552
3705
]
3553
3706
3554
3707
[[package]]
···
3926
4079
checksum = "1db05f56d34358a8b1066f67cbb203ee3e7ed2ba674a6263a1d5ec6db2204323"
3927
4080
dependencies = [
3928
4081
"memchr",
3929
-
"thiserror 2.0.16",
4082
+
"thiserror 2.0.17",
3930
4083
"ucd-trie",
3931
4084
]
3932
4085
···
4036
4189
"rusqlite",
4037
4190
"serde",
4038
4191
"serde_json",
4039
-
"thiserror 2.0.16",
4192
+
"thiserror 2.0.17",
4040
4193
"tokio",
4041
4194
"tracing-subscriber",
4042
4195
]
···
4079
4232
"smallvec",
4080
4233
"sync_wrapper",
4081
4234
"tempfile",
4082
-
"thiserror 2.0.16",
4235
+
"thiserror 2.0.17",
4083
4236
"tokio",
4084
4237
"tokio-rustls 0.26.2",
4085
4238
"tokio-stream",
···
4123
4276
"serde_json",
4124
4277
"serde_urlencoded",
4125
4278
"serde_yaml",
4126
-
"thiserror 2.0.16",
4279
+
"thiserror 2.0.17",
4127
4280
"tokio",
4128
4281
]
4129
4282
···
4142
4295
"quote",
4143
4296
"regex",
4144
4297
"syn 2.0.106",
4145
-
"thiserror 2.0.16",
4298
+
"thiserror 2.0.17",
4146
4299
]
4147
4300
4148
4301
[[package]]
···
4269
4422
4270
4423
[[package]]
4271
4424
name = "quick_cache"
4272
-
version = "0.6.12"
4425
+
version = "0.6.16"
4273
4426
source = "registry+https://github.com/rust-lang/crates.io-index"
4274
-
checksum = "8f8ed0655cbaf18a26966142ad23b95d8ab47221c50c4f73a1db7d0d2d6e3da8"
4427
+
checksum = "9ad6644cb07b7f3488b9f3d2fde3b4c0a7fa367cafefb39dff93a659f76eb786"
4275
4428
dependencies = [
4276
4429
"equivalent",
4277
4430
"hashbrown 0.15.2",
···
4291
4444
"rustc-hash 2.1.1",
4292
4445
"rustls 0.23.31",
4293
4446
"socket2 0.5.9",
4294
-
"thiserror 2.0.16",
4447
+
"thiserror 2.0.17",
4295
4448
"tokio",
4296
4449
"tracing",
4297
4450
"web-time",
···
4312
4465
"rustls 0.23.31",
4313
4466
"rustls-pki-types",
4314
4467
"slab",
4315
-
"thiserror 2.0.16",
4468
+
"thiserror 2.0.17",
4316
4469
"tinyvec",
4317
4470
"tracing",
4318
4471
"web-time",
···
4538
4691
checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c"
4539
4692
4540
4693
[[package]]
4694
+
name = "repo-stream"
4695
+
version = "0.2.2"
4696
+
source = "registry+https://github.com/rust-lang/crates.io-index"
4697
+
checksum = "093b48e604c138949bf3d4a1a9bc1165feb1db28a73af0101c84eb703d279f43"
4698
+
dependencies = [
4699
+
"bincode 2.0.1",
4700
+
"futures",
4701
+
"futures-core",
4702
+
"ipld-core",
4703
+
"iroh-car",
4704
+
"log",
4705
+
"multibase",
4706
+
"rusqlite",
4707
+
"serde",
4708
+
"serde_bytes",
4709
+
"serde_ipld_dagcbor",
4710
+
"sha2",
4711
+
"thiserror 2.0.17",
4712
+
"tokio",
4713
+
]
4714
+
4715
+
[[package]]
4541
4716
name = "reqwest"
4542
-
version = "0.12.23"
4717
+
version = "0.12.24"
4543
4718
source = "registry+https://github.com/rust-lang/crates.io-index"
4544
-
checksum = "d429f34c8092b2d42c7c93cec323bb4adeb7c67698f70839adec842ec10c7ceb"
4719
+
checksum = "9d0946410b9f7b082a427e4ef5c8ff541a88b357bc6c637c40db3a68ac70a36f"
4545
4720
dependencies = [
4546
4721
"async-compression",
4547
4722
"base64 0.22.1",
···
4581
4756
"url",
4582
4757
"wasm-bindgen",
4583
4758
"wasm-bindgen-futures",
4759
+
"wasm-streams",
4584
4760
"web-sys",
4585
4761
]
4586
4762
···
4962
5138
4963
5139
[[package]]
4964
5140
name = "self_cell"
4965
-
version = "1.1.0"
5141
+
version = "1.2.0"
4966
5142
source = "registry+https://github.com/rust-lang/crates.io-index"
4967
-
checksum = "c2fdfc24bc566f839a2da4c4295b82db7d25a24253867d5c64355abb5799bdbe"
5143
+
checksum = "0f7d95a54511e0c7be3f51e8867aa8cf35148d7b9445d44de2f943e2b206e749"
4968
5144
4969
5145
[[package]]
4970
5146
name = "semver"
···
4984
5160
4985
5161
[[package]]
4986
5162
name = "serde_bytes"
4987
-
version = "0.11.17"
5163
+
version = "0.11.19"
4988
5164
source = "registry+https://github.com/rust-lang/crates.io-index"
4989
-
checksum = "8437fd221bde2d4ca316d61b90e337e9e702b3820b87d63caa9ba6c02bd06d96"
5165
+
checksum = "a5d440709e79d88e51ac01c4b72fc6cb7314017bb7da9eeff678aa94c10e3ea8"
4990
5166
dependencies = [
4991
5167
"serde",
5168
+
"serde_core",
4992
5169
]
4993
5170
4994
5171
[[package]]
···
5036
5213
]
5037
5214
5038
5215
[[package]]
5216
+
name = "serde_ipld_dagcbor"
5217
+
version = "0.6.4"
5218
+
source = "registry+https://github.com/rust-lang/crates.io-index"
5219
+
checksum = "46182f4f08349a02b45c998ba3215d3f9de826246ba02bb9dddfe9a2a2100778"
5220
+
dependencies = [
5221
+
"cbor4ii 0.2.14",
5222
+
"ipld-core",
5223
+
"scopeguard",
5224
+
"serde",
5225
+
]
5226
+
5227
+
[[package]]
5039
5228
name = "serde_json"
5040
5229
version = "1.0.145"
5041
5230
source = "registry+https://github.com/rust-lang/crates.io-index"
···
5068
5257
"percent-encoding",
5069
5258
"ryu",
5070
5259
"serde",
5071
-
"thiserror 2.0.16",
5260
+
"thiserror 2.0.17",
5072
5261
]
5073
5262
5074
5263
[[package]]
···
5157
5346
]
5158
5347
5159
5348
[[package]]
5349
+
name = "sfa"
5350
+
version = "0.0.1"
5351
+
source = "registry+https://github.com/rust-lang/crates.io-index"
5352
+
checksum = "e5f5f9dc21f55409f15103d5a7e7601b804935923c7fe4746dc806c3a422a038"
5353
+
dependencies = [
5354
+
"byteorder-lite",
5355
+
"log",
5356
+
"xxhash-rust",
5357
+
]
5358
+
5359
+
[[package]]
5160
5360
name = "sha1"
5161
5361
version = "0.10.6"
5162
5362
source = "registry+https://github.com/rust-lang/crates.io-index"
···
5220
5420
dependencies = [
5221
5421
"num-bigint",
5222
5422
"num-traits",
5223
-
"thiserror 2.0.16",
5423
+
"thiserror 2.0.17",
5224
5424
"time",
5225
5425
]
5226
5426
···
5262
5462
"rustls 0.23.31",
5263
5463
"serde",
5264
5464
"serde_json",
5265
-
"thiserror 2.0.16",
5465
+
"thiserror 2.0.17",
5266
5466
"time",
5267
5467
"tokio",
5268
5468
"tokio-util",
···
5355
5555
name = "spacedust"
5356
5556
version = "0.1.0"
5357
5557
dependencies = [
5558
+
"anyhow",
5559
+
"async-channel",
5358
5560
"async-trait",
5359
5561
"clap",
5360
5562
"ctrlc",
5563
+
"dasl",
5361
5564
"dropshot",
5362
5565
"env_logger",
5566
+
"fjall 3.0.0-pre.0",
5363
5567
"futures",
5364
5568
"http",
5569
+
"ipld-core",
5365
5570
"jetstream",
5366
5571
"links",
5367
5572
"log",
5368
5573
"metrics",
5369
5574
"metrics-exporter-prometheus 0.17.2",
5370
5575
"rand 0.9.1",
5576
+
"repo-stream",
5577
+
"reqwest",
5371
5578
"schemars",
5372
5579
"semver",
5373
5580
"serde",
5581
+
"serde_ipld_dagcbor",
5374
5582
"serde_json",
5375
5583
"serde_qs",
5376
-
"thiserror 2.0.16",
5584
+
"thiserror 2.0.17",
5377
5585
"tinyjson",
5378
5586
"tokio",
5379
5587
"tokio-tungstenite 0.27.0",
···
5506
5714
5507
5715
[[package]]
5508
5716
name = "tempfile"
5509
-
version = "3.19.1"
5717
+
version = "3.23.0"
5510
5718
source = "registry+https://github.com/rust-lang/crates.io-index"
5511
-
checksum = "7437ac7763b9b123ccf33c338a5cc1bac6f69b45a136c19bdd8a65e3916435bf"
5719
+
checksum = "2d31c77bdf42a745371d260a26ca7163f1e0924b64afa0b688e61b5a9fa02f16"
5512
5720
dependencies = [
5513
5721
"fastrand",
5514
5722
"getrandom 0.3.3",
···
5539
5747
5540
5748
[[package]]
5541
5749
name = "thiserror"
5542
-
version = "2.0.16"
5750
+
version = "2.0.17"
5543
5751
source = "registry+https://github.com/rust-lang/crates.io-index"
5544
-
checksum = "3467d614147380f2e4e374161426ff399c91084acd2363eaf549172b3d5e60c0"
5752
+
checksum = "f63587ca0f12b72a0600bcba1d40081f830876000bb46dd2337a3051618f4fc8"
5545
5753
dependencies = [
5546
-
"thiserror-impl 2.0.16",
5754
+
"thiserror-impl 2.0.17",
5547
5755
]
5548
5756
5549
5757
[[package]]
···
5559
5767
5560
5768
[[package]]
5561
5769
name = "thiserror-impl"
5562
-
version = "2.0.16"
5770
+
version = "2.0.17"
5563
5771
source = "registry+https://github.com/rust-lang/crates.io-index"
5564
-
checksum = "6c5e1be1c48b9172ee610da68fd9cd2770e7a4056cb3fc98710ee6906f0c7960"
5772
+
checksum = "3ff15c8ecd7de3849db632e14d18d2571fa09dfc5ed93479bc4485c7a517c913"
5565
5773
dependencies = [
5566
5774
"proc-macro2",
5567
5775
"quote",
···
5993
6201
"native-tls",
5994
6202
"rand 0.9.1",
5995
6203
"sha1",
5996
-
"thiserror 2.0.16",
6204
+
"thiserror 2.0.17",
5997
6205
"url",
5998
6206
"utf-8",
5999
6207
]
···
6011
6219
"log",
6012
6220
"rand 0.9.1",
6013
6221
"sha1",
6014
-
"thiserror 2.0.16",
6222
+
"thiserror 2.0.17",
6015
6223
"utf-8",
6016
6224
]
6017
6225
···
6054
6262
"http",
6055
6263
"jetstream",
6056
6264
"log",
6057
-
"lsm-tree",
6265
+
"lsm-tree 2.10.4",
6058
6266
"metrics",
6059
6267
"metrics-exporter-prometheus 0.17.2",
6060
6268
"schemars",
···
6064
6272
"serde_qs",
6065
6273
"sha2",
6066
6274
"tempfile",
6067
-
"thiserror 2.0.16",
6275
+
"thiserror 2.0.17",
6068
6276
"tikv-jemallocator",
6069
6277
"tokio",
6070
6278
"tokio-util",
···
6117
6325
6118
6326
[[package]]
6119
6327
name = "unsigned-varint"
6328
+
version = "0.7.2"
6329
+
source = "registry+https://github.com/rust-lang/crates.io-index"
6330
+
checksum = "6889a77d49f1f013504cec6bf97a2c730394adedaeb1deb5ea08949a50541105"
6331
+
6332
+
[[package]]
6333
+
name = "unsigned-varint"
6120
6334
version = "0.8.0"
6121
6335
source = "registry+https://github.com/rust-lang/crates.io-index"
6122
6336
checksum = "eb066959b24b5196ae73cb057f45598450d2c5f71460e98c49b738086eff9c06"
···
6193
6407
checksum = "62fc7c4ce161f049607ecea654dca3f2d727da5371ae85e2e4f14ce2b98ed67c"
6194
6408
dependencies = [
6195
6409
"byteorder",
6196
-
"byteview",
6410
+
"byteview 0.6.1",
6197
6411
"interval-heap",
6198
6412
"log",
6199
6413
"path-absolutize",
···
6342
6556
]
6343
6557
6344
6558
[[package]]
6559
+
name = "wasm-streams"
6560
+
version = "0.4.2"
6561
+
source = "registry+https://github.com/rust-lang/crates.io-index"
6562
+
checksum = "15053d8d85c7eccdbefef60f06769760a563c7f0a9d6902a13d35c7800b0ad65"
6563
+
dependencies = [
6564
+
"futures-util",
6565
+
"js-sys",
6566
+
"wasm-bindgen",
6567
+
"wasm-bindgen-futures",
6568
+
"web-sys",
6569
+
]
6570
+
6571
+
[[package]]
6345
6572
name = "web-sys"
6346
6573
version = "0.3.77"
6347
6574
source = "registry+https://github.com/rust-lang/crates.io-index"
···
6400
6627
"reqwest",
6401
6628
"serde",
6402
6629
"serde_json",
6403
-
"thiserror 2.0.16",
6630
+
"thiserror 2.0.17",
6404
6631
"tokio",
6405
6632
"tokio-util",
6406
6633
"url",
···
6758
6985
"nom",
6759
6986
"oid-registry",
6760
6987
"rusticata-macros",
6761
-
"thiserror 2.0.16",
6988
+
"thiserror 2.0.17",
6762
6989
"time",
6763
6990
]
6764
6991
+6
-13
constellation/src/consumer/jetstream.rs
+6
-13
constellation/src/consumer/jetstream.rs
···
226
226
println!("jetstream closed the websocket cleanly.");
227
227
break;
228
228
}
229
-
Err(_) => {
230
-
counter!("jetstream_read_fail", "url" => stream.clone(), "reason" => "dirty close").increment(1);
231
-
println!("jetstream failed to close the websocket cleanly.");
232
-
break;
233
-
}
234
-
Ok(r) => {
235
-
eprintln!("jetstream: close result after error: {r:?}");
236
-
counter!("jetstream_read_fail", "url" => stream.clone(), "reason" => "read error")
237
-
.increment(1);
238
-
// if we didn't immediately get ConnectionClosed, we should keep polling read
239
-
// until we get it.
240
-
continue;
241
-
}
229
+
r => eprintln!("jetstream: close result after error: {r:?}"),
242
230
}
231
+
counter!("jetstream_read_fail", "url" => stream.clone(), "reason" => "read error")
232
+
.increment(1);
233
+
// if we didn't immediately get ConnectionClosed, we should keep polling read
234
+
// until we get it.
235
+
continue;
243
236
}
244
237
};
245
238
+2
links/Cargo.toml
+2
links/Cargo.toml
+3
-2
links/src/lib.rs
+3
-2
links/src/lib.rs
···
1
1
use fluent_uri::Uri;
2
+
use serde::{Deserialize, Serialize};
2
3
3
4
pub mod at_uri;
4
5
pub mod did;
···
6
7
7
8
pub use record::collect_links;
8
9
9
-
#[derive(Debug, Clone, Ord, Eq, PartialOrd, PartialEq)]
10
+
#[derive(Debug, Clone, Ord, Eq, PartialOrd, PartialEq, Serialize, Deserialize)]
10
11
pub enum Link {
11
12
AtUri(String),
12
13
Uri(String),
···
59
60
}
60
61
}
61
62
62
-
#[derive(Debug, PartialEq)]
63
+
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
63
64
pub struct CollectedLink {
64
65
pub path: String,
65
66
pub target: Link,
+41
links/src/record.rs
+41
links/src/record.rs
···
1
+
use dasl::drisl::Value as DrislValue;
1
2
use tinyjson::JsonValue;
2
3
3
4
use crate::{parse_any_link, CollectedLink};
···
36
37
}
37
38
}
38
39
40
+
pub fn walk_drisl(path: &str, v: &DrislValue, found: &mut Vec<CollectedLink>) {
41
+
match v {
42
+
DrislValue::Map(o) => {
43
+
for (key, child) in o {
44
+
walk_drisl(&format!("{path}.{key}"), child, found)
45
+
}
46
+
}
47
+
DrislValue::Array(a) => {
48
+
for child in a {
49
+
let child_p = match child {
50
+
DrislValue::Map(o) => {
51
+
if let Some(DrislValue::Text(t)) = o.get("$type") {
52
+
format!("{path}[{t}]")
53
+
} else {
54
+
format!("{path}[]")
55
+
}
56
+
}
57
+
_ => format!("{path}[]"),
58
+
};
59
+
walk_drisl(&child_p, child, found)
60
+
}
61
+
}
62
+
DrislValue::Text(s) => {
63
+
if let Some(link) = parse_any_link(s) {
64
+
found.push(CollectedLink {
65
+
path: path.to_string(),
66
+
target: link,
67
+
});
68
+
}
69
+
}
70
+
_ => {}
71
+
}
72
+
}
73
+
39
74
pub fn collect_links(v: &JsonValue) -> Vec<CollectedLink> {
40
75
let mut found = vec![];
41
76
walk_record("", v, &mut found);
77
+
found
78
+
}
79
+
80
+
pub fn collect_links_drisl(v: &DrislValue) -> Vec<CollectedLink> {
81
+
let mut found = vec![];
82
+
walk_drisl("", v, &mut found);
42
83
found
43
84
}
44
85
+2
-4
slingshot/src/firehose_cache.rs
+2
-4
slingshot/src/firehose_cache.rs
···
4
4
5
5
pub async fn firehose_cache(
6
6
cache_dir: impl AsRef<Path>,
7
-
memory_mb: usize,
8
-
disk_gb: usize,
9
7
) -> Result<HybridCache<String, CachedRecord>, String> {
10
8
let cache = HybridCacheBuilder::new()
11
9
.with_name("firehose")
12
-
.memory(memory_mb * 2_usize.pow(20))
10
+
.memory(64 * 2_usize.pow(20))
13
11
.with_weighter(|k: &String, v| k.len() + std::mem::size_of_val(v))
14
12
.storage(Engine::large())
15
13
.with_device_options(
16
14
DirectFsDeviceOptions::new(cache_dir)
17
-
.with_capacity(disk_gb * 2_usize.pow(30))
15
+
.with_capacity(2_usize.pow(30)) // TODO: configurable (1GB to have something)
18
16
.with_file_size(16 * 2_usize.pow(20)), // note: this does limit the max cached item size, warning jumbo records
19
17
)
20
18
.build()
+5
-26
slingshot/src/main.rs
+5
-26
slingshot/src/main.rs
···
25
25
/// where to keep disk caches
26
26
#[arg(long)]
27
27
cache_dir: PathBuf,
28
-
/// memory cache size in MB
29
-
#[arg(long, default_value_t = 64)]
30
-
cache_memory_mb: usize,
31
-
/// disk cache size in GB
32
-
#[arg(long, default_value_t = 1)]
33
-
cache_disk_gb: usize,
34
-
/// host for HTTP server (when not using --domain)
35
-
#[arg(long, default_value = "127.0.0.1")]
36
-
host: String,
37
-
/// port for HTTP server (when not using --domain)
38
-
#[arg(long, default_value_t = 3000)]
39
-
port: u16,
40
-
/// port for metrics/prometheus server
41
-
#[arg(long, default_value_t = 8765)]
42
-
metrics_port: u16,
43
28
/// the domain pointing to this server
44
29
///
45
30
/// if present:
···
77
62
78
63
let args = Args::parse();
79
64
80
-
if let Err(e) = install_metrics_server(args.metrics_port) {
65
+
if let Err(e) = install_metrics_server() {
81
66
log::error!("failed to install metrics server: {e:?}");
82
67
} else {
83
-
log::info!("metrics listening at http://0.0.0.0:{}", args.metrics_port);
68
+
log::info!("metrics listening at http://0.0.0.0:8765");
84
69
}
85
70
86
71
std::fs::create_dir_all(&args.cache_dir).map_err(|e| {
···
98
83
log::info!("cache dir ready at at {cache_dir:?}.");
99
84
100
85
log::info!("setting up firehose cache...");
101
-
let cache = firehose_cache(
102
-
cache_dir.join("./firehose"),
103
-
args.cache_memory_mb,
104
-
args.cache_disk_gb,
105
-
)
106
-
.await?;
86
+
let cache = firehose_cache(cache_dir.join("./firehose")).await?;
107
87
log::info!("firehose cache ready.");
108
88
109
89
let mut tasks: tokio::task::JoinSet<Result<(), MainTaskError>> = tokio::task::JoinSet::new();
···
132
112
args.domain,
133
113
args.acme_contact,
134
114
args.certs,
135
-
args.host,
136
-
args.port,
137
115
server_shutdown,
138
116
)
139
117
.await?;
···
194
172
Ok(())
195
173
}
196
174
197
-
fn install_metrics_server(port: u16) -> Result<(), metrics_exporter_prometheus::BuildError> {
175
+
fn install_metrics_server() -> Result<(), metrics_exporter_prometheus::BuildError> {
198
176
log::info!("installing metrics server...");
199
177
let host = [0, 0, 0, 0];
178
+
let port = 8765;
200
179
PrometheusBuilder::new()
201
180
.set_quantiles(&[0.5, 0.9, 0.99, 1.0])?
202
181
.set_bucket_duration(std::time::Duration::from_secs(300))?
+12
-19
slingshot/src/server.rs
+12
-19
slingshot/src/server.rs
···
437
437
Ok(did) => did,
438
438
Err(_) => {
439
439
let Ok(alleged_handle) = Handle::new(identifier) else {
440
-
return invalid("Identifier was not a valid DID or handle");
440
+
return invalid("identifier was not a valid DID or handle");
441
441
};
442
442
443
443
match self.identity.handle_to_did(alleged_handle.clone()).await {
···
453
453
Err(e) => {
454
454
log::debug!("failed to resolve handle: {e}");
455
455
// TODO: ServerError not BadRequest
456
-
return invalid("Errored while trying to resolve handle to DID");
456
+
return invalid("errored while trying to resolve handle to DID");
457
457
}
458
458
}
459
459
}
460
460
};
461
461
let Ok(partial_doc) = self.identity.did_to_partial_mini_doc(&did).await else {
462
-
return invalid("Failed to get DID doc");
462
+
return invalid("failed to get DID doc");
463
463
};
464
464
let Some(partial_doc) = partial_doc else {
465
-
return invalid("Failed to find DID doc");
465
+
return invalid("failed to find DID doc");
466
466
};
467
467
468
468
// ok so here's where we're at:
···
483
483
.handle_to_did(partial_doc.unverified_handle.clone())
484
484
.await
485
485
else {
486
-
return invalid("Failed to get DID doc's handle");
486
+
return invalid("failed to get did doc's handle");
487
487
};
488
488
let Some(handle_did) = handle_did else {
489
-
return invalid("Failed to resolve DID doc's handle");
489
+
return invalid("failed to resolve did doc's handle");
490
490
};
491
491
if handle_did == did {
492
492
partial_doc.unverified_handle.to_string()
···
516
516
let Ok(handle) = Handle::new(repo) else {
517
517
return GetRecordResponse::BadRequest(xrpc_error(
518
518
"InvalidRequest",
519
-
"Repo was not a valid DID or handle",
519
+
"repo was not a valid DID or handle",
520
520
));
521
521
};
522
522
match self.identity.handle_to_did(handle).await {
···
534
534
log::debug!("handle resolution failed: {e}");
535
535
return GetRecordResponse::ServerError(xrpc_error(
536
536
"ResolutionFailed",
537
-
"Errored while trying to resolve handle to DID",
537
+
"errored while trying to resolve handle to DID",
538
538
));
539
539
}
540
540
}
···
544
544
let Ok(collection) = Nsid::new(collection) else {
545
545
return GetRecordResponse::BadRequest(xrpc_error(
546
546
"InvalidRequest",
547
-
"Invalid NSID for collection",
547
+
"invalid NSID for collection",
548
548
));
549
549
};
550
550
551
551
let Ok(rkey) = RecordKey::new(rkey) else {
552
-
return GetRecordResponse::BadRequest(xrpc_error("InvalidRequest", "Invalid rkey"));
552
+
return GetRecordResponse::BadRequest(xrpc_error("InvalidRequest", "invalid rkey"));
553
553
};
554
554
555
555
let cid: Option<Cid> = if let Some(cid) = cid {
556
556
let Ok(cid) = Cid::from_str(&cid) else {
557
-
return GetRecordResponse::BadRequest(xrpc_error("InvalidRequest", "Invalid CID"));
557
+
return GetRecordResponse::BadRequest(xrpc_error("InvalidRequest", "invalid CID"));
558
558
};
559
559
Some(cid)
560
560
} else {
···
694
694
domain: Option<String>,
695
695
acme_contact: Option<String>,
696
696
certs: Option<PathBuf>,
697
-
host: String,
698
-
port: u16,
699
697
shutdown: CancellationToken,
700
698
) -> Result<(), ServerError> {
701
699
let repo = Arc::new(repo);
···
754
752
)
755
753
.await
756
754
} else {
757
-
run(
758
-
TcpListener::bind(format!("{host}:{port}")),
759
-
app,
760
-
shutdown,
761
-
)
762
-
.await
755
+
run(TcpListener::bind("127.0.0.1:3000"), app, shutdown).await
763
756
}
764
757
}
765
758
+8
spacedust/Cargo.toml
+8
spacedust/Cargo.toml
···
4
4
edition = "2024"
5
5
6
6
[dependencies]
7
+
anyhow = "1.0.100"
8
+
async-channel = "2.5.0"
7
9
async-trait = "0.1.88"
8
10
clap = { version = "4.5.40", features = ["derive"] }
9
11
ctrlc = "3.4.7"
12
+
dasl = "0.2.0"
10
13
dropshot = "0.16.2"
11
14
env_logger = "0.11.8"
15
+
fjall = "3.0.0-pre.0"
12
16
futures = "0.3.31"
13
17
http = "1.3.1"
18
+
ipld-core = { version = "0.4.2", features = ["serde"] }
14
19
jetstream = { path = "../jetstream", features = ["metrics"] }
15
20
links = { path = "../links" }
16
21
log = "0.4.27"
17
22
metrics = "0.24.2"
18
23
metrics-exporter-prometheus = { version = "0.17.1", features = ["http-listener"] }
19
24
rand = "0.9.1"
25
+
repo-stream = "0.2.2"
26
+
reqwest = { version = "0.12.24", features = ["json", "stream"] }
20
27
schemars = "0.8.22"
21
28
semver = "1.0.26"
22
29
serde = { version = "1.0.219", features = ["derive"] }
30
+
serde_ipld_dagcbor = "0.6.4"
23
31
serde_json = "1.0.140"
24
32
serde_qs = "1.0.0-rc.3"
25
33
thiserror = "2.0.12"
+21
spacedust/src/bin/import_car_file.rs
+21
spacedust/src/bin/import_car_file.rs
···
1
+
use clap::Parser;
2
+
use std::path::PathBuf;
3
+
4
+
type Result<T> = std::result::Result<T, Box<dyn std::error::Error>>;
5
+
6
+
#[derive(Debug, Parser)]
7
+
struct Args {
8
+
#[arg()]
9
+
file: PathBuf,
10
+
}
11
+
12
+
#[tokio::main]
13
+
async fn main() -> Result<()> {
14
+
env_logger::init();
15
+
16
+
let Args { file } = Args::parse();
17
+
18
+
let _reader = tokio::fs::File::open(file).await?;
19
+
20
+
Ok(())
21
+
}
+258
spacedust/src/bin/import_scraped.rs
+258
spacedust/src/bin/import_scraped.rs
···
1
+
use clap::Parser;
2
+
use links::CollectedLink;
3
+
use repo_stream::{
4
+
DiskBuilder, DiskStore, Driver, DriverBuilder, Processable, drive::DriverBuilderWithProcessor,
5
+
drive::NeedDisk,
6
+
};
7
+
use std::path::PathBuf;
8
+
use std::sync::{
9
+
Arc,
10
+
atomic::{AtomicUsize, Ordering},
11
+
};
12
+
use tokio::{io::AsyncRead, task::JoinSet};
13
+
14
+
type Result<T> = anyhow::Result<T>; //std::result::Result<T, Box<dyn std::error::Error>>;
15
+
16
+
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
17
+
struct CollectedProcessed(CollectedLink);
18
+
19
+
impl Processable for CollectedProcessed {
20
+
fn get_size(&self) -> usize {
21
+
self.0.path.capacity() + self.0.target.as_str().len()
22
+
}
23
+
}
24
+
25
+
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
26
+
struct ErrString(String);
27
+
28
+
impl Processable for ErrString {
29
+
fn get_size(&self) -> usize {
30
+
self.0.capacity()
31
+
}
32
+
}
33
+
34
+
type Processed = std::result::Result<Vec<CollectedProcessed>, ErrString>;
35
+
36
+
/// hacky for now: put errors in strings ๐คทโโ๏ธ
37
+
fn process(block: Vec<u8>) -> Processed {
38
+
let value: dasl::drisl::Value = dasl::drisl::from_slice(&block)
39
+
.map_err(|e| ErrString(format!("failed to parse block with drisl: {e:?}")))?;
40
+
let links = links::record::collect_links_drisl(&value)
41
+
.into_iter()
42
+
.map(CollectedProcessed)
43
+
.collect();
44
+
Ok(links)
45
+
}
46
+
47
+
#[derive(Debug, Parser)]
48
+
struct Args {
49
+
#[arg(long)]
50
+
cars_folder: PathBuf,
51
+
#[arg(long)]
52
+
mem_workers: usize,
53
+
#[arg(long)]
54
+
disk_workers: usize,
55
+
#[arg(long)]
56
+
disk_folder: PathBuf,
57
+
}
58
+
59
+
async fn get_cars(
60
+
cars_folder: PathBuf,
61
+
tx: async_channel::Sender<tokio::io::BufReader<tokio::fs::File>>,
62
+
) -> Result<()> {
63
+
let mut dir = tokio::fs::read_dir(cars_folder).await?;
64
+
while let Some(entry) = dir.next_entry().await? {
65
+
if !entry.file_type().await?.is_file() {
66
+
continue;
67
+
}
68
+
let reader = tokio::fs::File::open(&entry.path()).await?;
69
+
let reader = tokio::io::BufReader::new(reader);
70
+
tx.send(reader).await?;
71
+
}
72
+
Ok(())
73
+
}
74
+
75
+
async fn drive_mem<R: AsyncRead + Unpin + Send + Sync + 'static>(
76
+
f: R,
77
+
builder: &DriverBuilderWithProcessor<Processed>,
78
+
disk_tx: &async_channel::Sender<NeedDisk<R, Processed>>,
79
+
) -> Result<Option<(usize, usize)>> {
80
+
let mut n = 0;
81
+
let mut n_records = 0;
82
+
match builder.load_car(f).await? {
83
+
Driver::Memory(_commit, mut driver) => {
84
+
while let Some(chunk) = driver.next_chunk(512).await? {
85
+
n_records += chunk.len();
86
+
for (_key, links) in chunk {
87
+
match links {
88
+
Ok(links) => n += links.len(),
89
+
Err(e) => eprintln!("wat: {e:?}"),
90
+
}
91
+
}
92
+
}
93
+
Ok(Some((n, n_records)))
94
+
}
95
+
Driver::Disk(need_disk) => {
96
+
disk_tx.send(need_disk).await?;
97
+
Ok(None)
98
+
}
99
+
}
100
+
}
101
+
102
+
async fn mem_worker<R: AsyncRead + Unpin + Send + Sync + 'static>(
103
+
car_rx: async_channel::Receiver<R>,
104
+
disk_tx: async_channel::Sender<NeedDisk<R, Processed>>,
105
+
n: Arc<AtomicUsize>,
106
+
n_records: Arc<AtomicUsize>,
107
+
) -> Result<()> {
108
+
let builder = DriverBuilder::new()
109
+
.with_block_processor(process) // don't care just counting records
110
+
.with_mem_limit_mb(128);
111
+
while let Ok(f) = car_rx.recv().await {
112
+
let driven = match drive_mem(f, &builder, &disk_tx).await {
113
+
Ok(d) => d,
114
+
Err(e) => {
115
+
eprintln!("failed to drive mem: {e:?}. skipping...");
116
+
continue;
117
+
}
118
+
};
119
+
if let Some((drove, recs)) = driven {
120
+
n.fetch_add(drove, Ordering::Relaxed);
121
+
n_records.fetch_add(recs, Ordering::Relaxed);
122
+
}
123
+
}
124
+
Ok(())
125
+
}
126
+
127
+
async fn drive_disk<R: AsyncRead + Unpin>(
128
+
needed: NeedDisk<R, Processed>,
129
+
store: DiskStore,
130
+
) -> Result<(usize, usize, DiskStore)> {
131
+
let (_commit, mut driver) = needed.finish_loading(store).await?;
132
+
let mut n = 0;
133
+
let mut n_records = 0;
134
+
while let Some(chunk) = driver.next_chunk(512).await? {
135
+
n_records += chunk.len();
136
+
for (_key, links) in chunk {
137
+
match links {
138
+
Ok(links) => n += links.len(),
139
+
Err(e) => eprintln!("wat: {e:?}"),
140
+
}
141
+
}
142
+
}
143
+
let store = driver.reset_store().await?;
144
+
Ok((n, n_records, store))
145
+
}
146
+
147
+
async fn disk_worker<R: AsyncRead + Unpin>(
148
+
worker_id: usize,
149
+
disk_rx: async_channel::Receiver<NeedDisk<R, Processed>>,
150
+
folder: PathBuf,
151
+
n: Arc<AtomicUsize>,
152
+
n_records: Arc<AtomicUsize>,
153
+
disk_workers_active: Arc<AtomicUsize>,
154
+
) -> Result<()> {
155
+
let mut file = folder;
156
+
file.push(format!("disk-worker-{worker_id}.sqlite"));
157
+
let builder = DiskBuilder::new().with_cache_size_mb(128);
158
+
let mut store = builder.open(file.clone()).await?;
159
+
while let Ok(needed) = disk_rx.recv().await {
160
+
let active = disk_workers_active.fetch_add(1, Ordering::AcqRel);
161
+
println!("-> disk workers active: {}", active + 1);
162
+
let (drove, records) = match drive_disk(needed, store).await {
163
+
Ok((d, r, s)) => {
164
+
store = s;
165
+
(d, r)
166
+
}
167
+
Err(e) => {
168
+
eprintln!("failed to drive disk: {e:?}. skipping...");
169
+
store = builder.open(file.clone()).await?;
170
+
continue;
171
+
}
172
+
};
173
+
n.fetch_add(drove, Ordering::Relaxed);
174
+
n_records.fetch_add(records, Ordering::Relaxed);
175
+
let were_active = disk_workers_active.fetch_sub(1, Ordering::AcqRel);
176
+
println!("<- disk workers active: {}", were_active - 1);
177
+
}
178
+
Ok(())
179
+
}
180
+
181
+
#[tokio::main]
182
+
async fn main() -> Result<()> {
183
+
env_logger::init();
184
+
185
+
let Args {
186
+
cars_folder,
187
+
disk_folder,
188
+
disk_workers,
189
+
mem_workers,
190
+
} = Args::parse();
191
+
192
+
let mut set = JoinSet::<Result<()>>::new();
193
+
194
+
let (cars_tx, cars_rx) = async_channel::bounded(2);
195
+
set.spawn(get_cars(cars_folder, cars_tx));
196
+
197
+
let n: Arc<AtomicUsize> = Arc::new(0.into());
198
+
let n_records: Arc<AtomicUsize> = Arc::new(0.into());
199
+
let disk_workers_active: Arc<AtomicUsize> = Arc::new(0.into());
200
+
201
+
set.spawn({
202
+
let n = n.clone();
203
+
let n_records = n_records.clone();
204
+
let mut interval = tokio::time::interval(std::time::Duration::from_secs(10));
205
+
async move {
206
+
let mut last_n = n.load(Ordering::Relaxed);
207
+
let mut last_n_records = n.load(Ordering::Relaxed);
208
+
loop {
209
+
interval.tick().await;
210
+
let n = n.load(Ordering::Relaxed);
211
+
let n_records = n_records.load(Ordering::Relaxed);
212
+
let diff_n = n - last_n;
213
+
let diff_records = n_records - last_n_records;
214
+
println!("rate: {} rec/sec; {} n/sec", diff_records / 10, diff_n / 10);
215
+
if n_records > 0 && diff_records == 0 {
216
+
println!("zero encountered, stopping rate calculation polling.");
217
+
break Ok(());
218
+
}
219
+
last_n = n;
220
+
last_n_records = n_records;
221
+
}
222
+
}
223
+
});
224
+
225
+
let (needs_disk_tx, needs_disk_rx) = async_channel::bounded(disk_workers);
226
+
227
+
for _ in 0..mem_workers {
228
+
set.spawn(mem_worker(
229
+
cars_rx.clone(),
230
+
needs_disk_tx.clone(),
231
+
n.clone(),
232
+
n_records.clone(),
233
+
));
234
+
}
235
+
drop(cars_rx);
236
+
drop(needs_disk_tx);
237
+
238
+
tokio::fs::create_dir_all(disk_folder.clone()).await?;
239
+
for id in 0..disk_workers {
240
+
set.spawn(disk_worker(
241
+
id,
242
+
needs_disk_rx.clone(),
243
+
disk_folder.clone(),
244
+
n.clone(),
245
+
n_records.clone(),
246
+
disk_workers_active.clone(),
247
+
));
248
+
}
249
+
drop(needs_disk_rx);
250
+
251
+
while let Some(res) = set.join_next().await {
252
+
println!("task from set joined: {res:?}");
253
+
}
254
+
255
+
eprintln!("total records processed: {n_records:?}; total n: {n:?}");
256
+
257
+
Ok(())
258
+
}
+137
spacedust/src/bin/scrape_pds.rs
+137
spacedust/src/bin/scrape_pds.rs
···
1
+
use clap::Parser;
2
+
use reqwest::Url;
3
+
use serde::Deserialize;
4
+
use std::path::PathBuf;
5
+
use tokio::io::AsyncWriteExt;
6
+
use tokio::{sync::mpsc, time};
7
+
8
+
type Result<T> = std::result::Result<T, Box<dyn std::error::Error>>;
9
+
10
+
use futures::StreamExt;
11
+
12
+
#[derive(Debug, Parser)]
13
+
struct Args {
14
+
#[arg(long)]
15
+
pds: Url,
16
+
#[arg(long)]
17
+
throttle_ms: u64, // 100ms per pds?
18
+
#[arg(long)]
19
+
folder: PathBuf,
20
+
}
21
+
22
+
async fn download_repo(
23
+
client: &reqwest::Client,
24
+
mut pds: Url,
25
+
did: String,
26
+
mut path: PathBuf,
27
+
) -> Result<()> {
28
+
path.push(format!("{did}.car"));
29
+
let f = tokio::fs::File::create(path).await?;
30
+
let mut w = tokio::io::BufWriter::new(f);
31
+
32
+
pds.set_path("/xrpc/com.atproto.sync.getRepo");
33
+
pds.set_query(Some(&format!("did={did}")));
34
+
let mut byte_stream = client.get(pds).send().await?.bytes_stream();
35
+
36
+
while let Some(stuff) = byte_stream.next().await {
37
+
tokio::io::copy(&mut stuff?.as_ref(), &mut w).await?;
38
+
}
39
+
w.flush().await?;
40
+
41
+
Ok(())
42
+
}
43
+
44
+
#[derive(Debug, Deserialize)]
45
+
struct RepoInfo {
46
+
did: String,
47
+
active: bool,
48
+
}
49
+
50
+
#[derive(Debug, Deserialize)]
51
+
struct ListReposResponse {
52
+
cursor: Option<String>,
53
+
repos: Vec<RepoInfo>,
54
+
}
55
+
56
+
fn get_pds_dids(client: reqwest::Client, mut pds: Url) -> mpsc::Receiver<String> {
57
+
let (tx, rx) = mpsc::channel(2);
58
+
tokio::task::spawn(async move {
59
+
pds.set_path("/xrpc/com.atproto.sync.listRepos");
60
+
let mut cursor = None;
61
+
62
+
loop {
63
+
if let Some(c) = cursor {
64
+
pds.set_query(Some(&format!("cursor={c}")));
65
+
}
66
+
let res: ListReposResponse = client
67
+
.get(pds.clone())
68
+
.send()
69
+
.await
70
+
.expect("to send request")
71
+
.error_for_status()
72
+
.expect("to be ok")
73
+
.json()
74
+
.await
75
+
.expect("json response");
76
+
for repo in res.repos {
77
+
if repo.active {
78
+
tx.send(repo.did)
79
+
.await
80
+
.expect("to be able to send on the channel");
81
+
}
82
+
}
83
+
cursor = res.cursor;
84
+
if cursor.is_none() {
85
+
break;
86
+
}
87
+
}
88
+
});
89
+
rx
90
+
}
91
+
92
+
#[tokio::main]
93
+
async fn main() -> Result<()> {
94
+
env_logger::init();
95
+
96
+
let Args {
97
+
pds,
98
+
throttle_ms,
99
+
folder,
100
+
} = Args::parse();
101
+
102
+
tokio::fs::create_dir_all(folder.clone()).await?;
103
+
104
+
let client = reqwest::Client::builder()
105
+
.user_agent("microcosm/spacedust-testing")
106
+
.build()?;
107
+
108
+
let mut dids = get_pds_dids(client.clone(), pds.clone());
109
+
110
+
let mut interval = time::interval(time::Duration::from_millis(throttle_ms));
111
+
let mut oks = 0;
112
+
let mut single_fails = 0;
113
+
let mut double_fails = 0;
114
+
115
+
while let Some(did) = dids.recv().await {
116
+
interval.tick().await;
117
+
println!("did: {did:?}");
118
+
if let Err(e) = download_repo(&client, pds.clone(), did.clone(), folder.clone()).await {
119
+
single_fails += 1;
120
+
eprintln!("failed to download repo for did: {did:?}: {e:?}. retrying in a moment...");
121
+
tokio::time::sleep(time::Duration::from_secs(3)).await;
122
+
interval.reset();
123
+
if let Err(e) = download_repo(&client, pds.clone(), did.clone(), folder.clone()).await {
124
+
double_fails += 1;
125
+
eprintln!("failed again: {e:?}. moving on in a moment...");
126
+
tokio::time::sleep(time::Duration::from_secs(1)).await;
127
+
continue;
128
+
}
129
+
}
130
+
oks += 1;
131
+
println!(" -> done. did: {did:?}");
132
+
}
133
+
134
+
eprintln!("got {oks} repos. single fails: {single_fails}; doubles: {double_fails}.");
135
+
136
+
Ok(())
137
+
}
+1
spacedust/src/lib.rs
+1
spacedust/src/lib.rs
spacedust/src/storage/car/drive.rs
spacedust/src/storage/car/drive.rs
This is a binary file and will not be displayed.
+1
spacedust/src/storage/car/mod.rs
+1
spacedust/src/storage/car/mod.rs
···
1
+
spacedust/src/storage/car/walk.rs
spacedust/src/storage/car/walk.rs
This is a binary file and will not be displayed.
+9
spacedust/src/storage/fjall/mod.rs
+9
spacedust/src/storage/fjall/mod.rs