+63
-24
Cargo.lock
+63
-24
Cargo.lock
···
225
226
[[package]]
227
name = "atrium-api"
228
-
version = "0.25.2"
229
-
source = "registry+https://github.com/rust-lang/crates.io-index"
230
-
checksum = "0d4eb9b4787aba546015c8ccda1d3924c157cee13d67848997fba74ac8144a07"
231
dependencies = [
232
"atrium-common",
233
"atrium-xrpc",
···
245
246
[[package]]
247
name = "atrium-common"
248
-
version = "0.1.1"
249
-
source = "registry+https://github.com/rust-lang/crates.io-index"
250
-
checksum = "ba30d2f9e1a8b3db8fc97d0a5f91ee5a28f8acdddb771ad74c1b08eda357ca3d"
251
dependencies = [
252
"dashmap",
253
"lru",
···
260
261
[[package]]
262
name = "atrium-xrpc"
263
-
version = "0.12.2"
264
-
source = "registry+https://github.com/rust-lang/crates.io-index"
265
-
checksum = "18a9e526cb2ed3e0a2ca78c3ce2a943d9041a68e067dadf42923b523771e07df"
266
dependencies = [
267
"http",
268
"serde",
···
573
]
574
575
[[package]]
576
name = "cc"
577
version = "1.2.18"
578
source = "registry+https://github.com/rust-lang/crates.io-index"
···
606
607
[[package]]
608
name = "chrono"
609
-
version = "0.4.40"
610
source = "registry+https://github.com/rust-lang/crates.io-index"
611
-
checksum = "1a7964611d71df112cb1730f2ee67324fcf4d0fc6606acbbe9bfe06df124637c"
612
dependencies = [
613
"android-tzdata",
614
"iana-time-zone",
···
1349
1350
[[package]]
1351
name = "getrandom"
1352
-
version = "0.3.2"
1353
source = "registry+https://github.com/rust-lang/crates.io-index"
1354
-
checksum = "73fea8450eea4bac3940448fb7ae50d91f034f941199fcd9d909a5a07aa455f0"
1355
dependencies = [
1356
"cfg-if",
1357
"libc",
···
1898
source = "registry+https://github.com/rust-lang/crates.io-index"
1899
checksum = "38f262f097c174adebe41eb73d66ae9c06b2844fb0da69969647bbddd9b0538a"
1900
dependencies = [
1901
-
"getrandom 0.3.2",
1902
"libc",
1903
]
1904
···
2686
2687
[[package]]
2688
name = "rand"
2689
-
version = "0.9.0"
2690
source = "registry+https://github.com/rust-lang/crates.io-index"
2691
-
checksum = "3779b94aeb87e8bd4e834cee3650289ee9e0d5677f976ecdb6d219e5f4f6cd94"
2692
dependencies = [
2693
"rand_chacha 0.9.0",
2694
"rand_core 0.9.3",
2695
-
"zerocopy 0.8.24",
2696
]
2697
2698
[[package]]
···
2730
source = "registry+https://github.com/rust-lang/crates.io-index"
2731
checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38"
2732
dependencies = [
2733
-
"getrandom 0.3.2",
2734
]
2735
2736
[[package]]
···
2999
source = "registry+https://github.com/rust-lang/crates.io-index"
3000
checksum = "3fbf2ae1b8bc8e02df939598064d22402220cd5bbcca1c76f7d6a310974d5615"
3001
dependencies = [
3002
"dyn-clone",
3003
"schemars_derive",
3004
"serde",
···
3141
]
3142
3143
[[package]]
3144
name = "serde_spanned"
3145
version = "0.6.8"
3146
source = "registry+https://github.com/rust-lang/crates.io-index"
···
3215
]
3216
3217
[[package]]
3218
name = "sharded-slab"
3219
version = "0.1.7"
3220
source = "registry+https://github.com/rust-lang/crates.io-index"
···
3404
checksum = "7437ac7763b9b123ccf33c338a5cc1bac6f69b45a136c19bdd8a65e3916435bf"
3405
dependencies = [
3406
"fastrand",
3407
-
"getrandom 0.3.2",
3408
"once_cell",
3409
"rustix 1.0.5",
3410
"windows-sys 0.59.0",
···
3606
3607
[[package]]
3608
name = "tokio-util"
3609
-
version = "0.7.14"
3610
source = "registry+https://github.com/rust-lang/crates.io-index"
3611
-
checksum = "6b9590b93e6fcc1739458317cccd391ad3955e2bde8913edf6f95f9e65a8f034"
3612
dependencies = [
3613
"bytes",
3614
"futures-core",
···
3772
"httparse",
3773
"log",
3774
"native-tls",
3775
-
"rand 0.9.0",
3776
"sha1",
3777
"thiserror 2.0.12",
3778
"url",
···
3791
dependencies = [
3792
"anyhow",
3793
"async-trait",
3794
"bincode 2.0.1",
3795
-
"cardinality-estimator",
3796
"clap",
3797
"dropshot",
3798
"env_logger",
3799
"fjall",
3800
"jetstream",
3801
"log",
3802
"lsm-tree",
···
3804
"semver",
3805
"serde",
3806
"serde_json",
3807
"tempfile",
3808
"thiserror 2.0.12",
3809
"tikv-jemallocator",
3810
"tokio",
3811
]
3812
3813
[[package]]
···
3893
source = "registry+https://github.com/rust-lang/crates.io-index"
3894
checksum = "458f7a779bf54acc9f347480ac654f68407d3aab21269a6e3c9f922acd9e2da9"
3895
dependencies = [
3896
-
"getrandom 0.3.2",
3897
"serde",
3898
]
3899
···
225
226
[[package]]
227
name = "atrium-api"
228
+
version = "0.25.3"
229
+
source = "git+https://github.com/uniphil/atrium?branch=fix%2Fnsid-allow-nonleading-name-digits#c4364f318d337bbc3e3e3aaf97c9f971e95f5f7e"
230
dependencies = [
231
"atrium-common",
232
"atrium-xrpc",
···
244
245
[[package]]
246
name = "atrium-common"
247
+
version = "0.1.2"
248
+
source = "git+https://github.com/uniphil/atrium?branch=fix%2Fnsid-allow-nonleading-name-digits#c4364f318d337bbc3e3e3aaf97c9f971e95f5f7e"
249
dependencies = [
250
"dashmap",
251
"lru",
···
258
259
[[package]]
260
name = "atrium-xrpc"
261
+
version = "0.12.3"
262
+
source = "git+https://github.com/uniphil/atrium?branch=fix%2Fnsid-allow-nonleading-name-digits#c4364f318d337bbc3e3e3aaf97c9f971e95f5f7e"
263
dependencies = [
264
"http",
265
"serde",
···
570
]
571
572
[[package]]
573
+
name = "cardinality-estimator-safe"
574
+
version = "4.0.1"
575
+
source = "registry+https://github.com/rust-lang/crates.io-index"
576
+
checksum = "b41ec0cd313b46ba3b508377544b25aa1d56d05ce9e657e77dfb001d5e726e53"
577
+
dependencies = [
578
+
"digest",
579
+
"enum_dispatch",
580
+
"serde",
581
+
]
582
+
583
+
[[package]]
584
name = "cc"
585
version = "1.2.18"
586
source = "registry+https://github.com/rust-lang/crates.io-index"
···
614
615
[[package]]
616
name = "chrono"
617
+
version = "0.4.41"
618
source = "registry+https://github.com/rust-lang/crates.io-index"
619
+
checksum = "c469d952047f47f91b68d1cba3f10d63c11d73e4636f24f08daf0278abf01c4d"
620
dependencies = [
621
"android-tzdata",
622
"iana-time-zone",
···
1357
1358
[[package]]
1359
name = "getrandom"
1360
+
version = "0.3.3"
1361
source = "registry+https://github.com/rust-lang/crates.io-index"
1362
+
checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4"
1363
dependencies = [
1364
"cfg-if",
1365
"libc",
···
1906
source = "registry+https://github.com/rust-lang/crates.io-index"
1907
checksum = "38f262f097c174adebe41eb73d66ae9c06b2844fb0da69969647bbddd9b0538a"
1908
dependencies = [
1909
+
"getrandom 0.3.3",
1910
"libc",
1911
]
1912
···
2694
2695
[[package]]
2696
name = "rand"
2697
+
version = "0.9.1"
2698
source = "registry+https://github.com/rust-lang/crates.io-index"
2699
+
checksum = "9fbfd9d094a40bf3ae768db9361049ace4c0e04a4fd6b359518bd7b73a73dd97"
2700
dependencies = [
2701
"rand_chacha 0.9.0",
2702
"rand_core 0.9.3",
2703
]
2704
2705
[[package]]
···
2737
source = "registry+https://github.com/rust-lang/crates.io-index"
2738
checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38"
2739
dependencies = [
2740
+
"getrandom 0.3.3",
2741
]
2742
2743
[[package]]
···
3006
source = "registry+https://github.com/rust-lang/crates.io-index"
3007
checksum = "3fbf2ae1b8bc8e02df939598064d22402220cd5bbcca1c76f7d6a310974d5615"
3008
dependencies = [
3009
+
"chrono",
3010
"dyn-clone",
3011
"schemars_derive",
3012
"serde",
···
3149
]
3150
3151
[[package]]
3152
+
name = "serde_qs"
3153
+
version = "1.0.0-rc.3"
3154
+
source = "registry+https://github.com/rust-lang/crates.io-index"
3155
+
checksum = "4cb0b9062a400c31442e67d1f2b1e7746bebd691110ebee1b7d0c7293b04fab1"
3156
+
dependencies = [
3157
+
"itoa",
3158
+
"percent-encoding",
3159
+
"ryu",
3160
+
"serde",
3161
+
"thiserror 2.0.12",
3162
+
]
3163
+
3164
+
[[package]]
3165
name = "serde_spanned"
3166
version = "0.6.8"
3167
source = "registry+https://github.com/rust-lang/crates.io-index"
···
3236
]
3237
3238
[[package]]
3239
+
name = "sha2"
3240
+
version = "0.10.9"
3241
+
source = "registry+https://github.com/rust-lang/crates.io-index"
3242
+
checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283"
3243
+
dependencies = [
3244
+
"cfg-if",
3245
+
"cpufeatures",
3246
+
"digest",
3247
+
]
3248
+
3249
+
[[package]]
3250
name = "sharded-slab"
3251
version = "0.1.7"
3252
source = "registry+https://github.com/rust-lang/crates.io-index"
···
3436
checksum = "7437ac7763b9b123ccf33c338a5cc1bac6f69b45a136c19bdd8a65e3916435bf"
3437
dependencies = [
3438
"fastrand",
3439
+
"getrandom 0.3.3",
3440
"once_cell",
3441
"rustix 1.0.5",
3442
"windows-sys 0.59.0",
···
3638
3639
[[package]]
3640
name = "tokio-util"
3641
+
version = "0.7.15"
3642
source = "registry+https://github.com/rust-lang/crates.io-index"
3643
+
checksum = "66a539a9ad6d5d281510d5bd368c973d636c02dbf8a67300bfb6b950696ad7df"
3644
dependencies = [
3645
"bytes",
3646
"futures-core",
···
3804
"httparse",
3805
"log",
3806
"native-tls",
3807
+
"rand 0.9.1",
3808
"sha1",
3809
"thiserror 2.0.12",
3810
"url",
···
3823
dependencies = [
3824
"anyhow",
3825
"async-trait",
3826
+
"base64 0.22.1",
3827
"bincode 2.0.1",
3828
+
"cardinality-estimator-safe",
3829
+
"chrono",
3830
"clap",
3831
"dropshot",
3832
"env_logger",
3833
"fjall",
3834
+
"getrandom 0.3.3",
3835
+
"http",
3836
"jetstream",
3837
"log",
3838
"lsm-tree",
···
3840
"semver",
3841
"serde",
3842
"serde_json",
3843
+
"serde_qs",
3844
+
"sha2",
3845
"tempfile",
3846
"thiserror 2.0.12",
3847
"tikv-jemallocator",
3848
"tokio",
3849
+
"tokio-util",
3850
]
3851
3852
[[package]]
···
3932
source = "registry+https://github.com/rust-lang/crates.io-index"
3933
checksum = "458f7a779bf54acc9f347480ac654f68407d3aab21269a6e3c9f922acd9e2da9"
3934
dependencies = [
3935
+
"getrandom 0.3.3",
3936
"serde",
3937
]
3938
+2
-2
constellation/src/server/mod.rs
+2
-2
constellation/src/server/mod.rs
···
299
Ok(acceptable(
300
accept,
301
GetLinkItemsResponse {
302
-
total: paged.version.0,
303
linking_records: paged.items,
304
cursor,
305
query: (*query).clone(),
···
360
Ok(acceptable(
361
accept,
362
GetDidItemsResponse {
363
-
total: paged.version.0,
364
linking_dids: paged.items,
365
cursor,
366
query: (*query).clone(),
···
299
Ok(acceptable(
300
accept,
301
GetLinkItemsResponse {
302
+
total: paged.total,
303
linking_records: paged.items,
304
cursor,
305
query: (*query).clone(),
···
360
Ok(acceptable(
361
accept,
362
GetDidItemsResponse {
363
+
total: paged.total,
364
linking_dids: paged.items,
365
cursor,
366
query: (*query).clone(),
+6
constellation/src/storage/mem_store.rs
+6
constellation/src/storage/mem_store.rs
···
174
version: (0, 0),
175
items: Vec::new(),
176
next: None,
177
});
178
};
179
let Some(did_rkeys) = paths.get(&Source::new(collection, path)) else {
···
181
version: (0, 0),
182
items: Vec::new(),
183
next: None,
184
});
185
};
186
···
224
version: (total as u64, gone as u64),
225
items,
226
next,
227
})
228
}
229
···
241
version: (0, 0),
242
items: Vec::new(),
243
next: None,
244
});
245
};
246
let Some(did_rkeys) = paths.get(&Source::new(collection, path)) else {
···
248
version: (0, 0),
249
items: Vec::new(),
250
next: None,
251
});
252
};
253
···
290
version: (total as u64, gone as u64),
291
items,
292
next,
293
})
294
}
295
···
174
version: (0, 0),
175
items: Vec::new(),
176
next: None,
177
+
total: 0,
178
});
179
};
180
let Some(did_rkeys) = paths.get(&Source::new(collection, path)) else {
···
182
version: (0, 0),
183
items: Vec::new(),
184
next: None,
185
+
total: 0,
186
});
187
};
188
···
226
version: (total as u64, gone as u64),
227
items,
228
next,
229
+
total: alive as u64,
230
})
231
}
232
···
244
version: (0, 0),
245
items: Vec::new(),
246
next: None,
247
+
total: 0,
248
});
249
};
250
let Some(did_rkeys) = paths.get(&Source::new(collection, path)) else {
···
252
version: (0, 0),
253
items: Vec::new(),
254
next: None,
255
+
total: 0,
256
});
257
};
258
···
295
version: (total as u64, gone as u64),
296
items,
297
next,
298
+
total: alive as u64,
299
})
300
}
301
+19
constellation/src/storage/mod.rs
+19
constellation/src/storage/mod.rs
···
16
pub version: (u64, u64), // (collection length, deleted item count) // TODO: change to (total, active)? since dedups isn't "deleted"
17
pub items: Vec<T>,
18
pub next: Option<u64>,
19
}
20
21
#[derive(Debug, Deserialize, Serialize, PartialEq)]
···
157
version: (0, 0),
158
items: vec![],
159
next: None,
160
}
161
);
162
assert_eq!(
···
165
version: (0, 0),
166
items: vec![],
167
next: None,
168
}
169
);
170
assert_eq!(storage.get_all_counts("bad-example.com")?, HashMap::new());
···
662
rkey: "asdf".into(),
663
}],
664
next: None,
665
}
666
);
667
assert_eq!(
···
670
version: (1, 0),
671
items: vec!["did:plc:asdf".into()],
672
next: None,
673
}
674
);
675
assert_stats(storage.get_stats()?, 1..=1, 1..=1, 1..=1);
···
712
},
713
],
714
next: Some(3),
715
}
716
);
717
assert_eq!(
···
720
version: (5, 0),
721
items: vec!["did:plc:asdf-5".into(), "did:plc:asdf-4".into()],
722
next: Some(3),
723
}
724
);
725
let links = storage.get_links(
···
748
},
749
],
750
next: Some(1),
751
}
752
);
753
assert_eq!(
···
756
version: (5, 0),
757
items: vec!["did:plc:asdf-3".into(), "did:plc:asdf-2".into()],
758
next: Some(1),
759
}
760
);
761
let links = storage.get_links(
···
777
rkey: "asdf".into(),
778
},],
779
next: None,
780
}
781
);
782
assert_eq!(
···
785
version: (5, 0),
786
items: vec!["did:plc:asdf-1".into()],
787
next: None,
788
}
789
);
790
assert_stats(storage.get_stats()?, 5..=5, 1..=1, 5..=5);
···
1003
},
1004
],
1005
next: Some(2),
1006
}
1007
);
1008
let links = storage.get_links(
···
1030
},
1031
],
1032
next: None,
1033
}
1034
);
1035
assert_stats(storage.get_stats()?, 4..=4, 1..=1, 4..=4);
···
1071
},
1072
],
1073
next: Some(2),
1074
}
1075
);
1076
storage.push(
···
1112
},
1113
],
1114
next: None,
1115
}
1116
);
1117
assert_stats(storage.get_stats()?, 5..=5, 1..=1, 5..=5);
···
1153
},
1154
],
1155
next: Some(2),
1156
}
1157
);
1158
storage.push(
···
1181
rkey: "asdf".into(),
1182
},],
1183
next: None,
1184
}
1185
);
1186
assert_stats(storage.get_stats()?, 4..=4, 1..=1, 3..=3);
···
1222
},
1223
],
1224
next: Some(2),
1225
}
1226
);
1227
storage.push(
···
1246
rkey: "asdf".into(),
1247
},],
1248
next: None,
1249
}
1250
);
1251
assert_stats(storage.get_stats()?, 4..=4, 1..=1, 4..=4);
···
16
pub version: (u64, u64), // (collection length, deleted item count) // TODO: change to (total, active)? since dedups isn't "deleted"
17
pub items: Vec<T>,
18
pub next: Option<u64>,
19
+
pub total: u64,
20
}
21
22
#[derive(Debug, Deserialize, Serialize, PartialEq)]
···
158
version: (0, 0),
159
items: vec![],
160
next: None,
161
+
total: 0,
162
}
163
);
164
assert_eq!(
···
167
version: (0, 0),
168
items: vec![],
169
next: None,
170
+
total: 0,
171
}
172
);
173
assert_eq!(storage.get_all_counts("bad-example.com")?, HashMap::new());
···
665
rkey: "asdf".into(),
666
}],
667
next: None,
668
+
total: 1,
669
}
670
);
671
assert_eq!(
···
674
version: (1, 0),
675
items: vec!["did:plc:asdf".into()],
676
next: None,
677
+
total: 1,
678
}
679
);
680
assert_stats(storage.get_stats()?, 1..=1, 1..=1, 1..=1);
···
717
},
718
],
719
next: Some(3),
720
+
total: 5,
721
}
722
);
723
assert_eq!(
···
726
version: (5, 0),
727
items: vec!["did:plc:asdf-5".into(), "did:plc:asdf-4".into()],
728
next: Some(3),
729
+
total: 5,
730
}
731
);
732
let links = storage.get_links(
···
755
},
756
],
757
next: Some(1),
758
+
total: 5,
759
}
760
);
761
assert_eq!(
···
764
version: (5, 0),
765
items: vec!["did:plc:asdf-3".into(), "did:plc:asdf-2".into()],
766
next: Some(1),
767
+
total: 5,
768
}
769
);
770
let links = storage.get_links(
···
786
rkey: "asdf".into(),
787
},],
788
next: None,
789
+
total: 5,
790
}
791
);
792
assert_eq!(
···
795
version: (5, 0),
796
items: vec!["did:plc:asdf-1".into()],
797
next: None,
798
+
total: 5,
799
}
800
);
801
assert_stats(storage.get_stats()?, 5..=5, 1..=1, 5..=5);
···
1014
},
1015
],
1016
next: Some(2),
1017
+
total: 4,
1018
}
1019
);
1020
let links = storage.get_links(
···
1042
},
1043
],
1044
next: None,
1045
+
total: 4,
1046
}
1047
);
1048
assert_stats(storage.get_stats()?, 4..=4, 1..=1, 4..=4);
···
1084
},
1085
],
1086
next: Some(2),
1087
+
total: 4,
1088
}
1089
);
1090
storage.push(
···
1126
},
1127
],
1128
next: None,
1129
+
total: 5,
1130
}
1131
);
1132
assert_stats(storage.get_stats()?, 5..=5, 1..=1, 5..=5);
···
1168
},
1169
],
1170
next: Some(2),
1171
+
total: 4,
1172
}
1173
);
1174
storage.push(
···
1197
rkey: "asdf".into(),
1198
},],
1199
next: None,
1200
+
total: 3,
1201
}
1202
);
1203
assert_stats(storage.get_stats()?, 4..=4, 1..=1, 3..=3);
···
1239
},
1240
],
1241
next: Some(2),
1242
+
total: 4,
1243
}
1244
);
1245
storage.push(
···
1264
rkey: "asdf".into(),
1265
},],
1266
next: None,
1267
+
total: 4,
1268
}
1269
);
1270
assert_stats(storage.get_stats()?, 4..=4, 1..=1, 4..=4);
+4
constellation/src/storage/rocks_store.rs
+4
constellation/src/storage/rocks_store.rs
···
873
version: (0, 0),
874
items: Vec::new(),
875
next: None,
876
});
877
};
878
···
932
version: (total, gone),
933
items,
934
next,
935
})
936
}
937
···
954
version: (0, 0),
955
items: Vec::new(),
956
next: None,
957
});
958
};
959
···
992
version: (total, gone),
993
items,
994
next,
995
})
996
}
997
···
873
version: (0, 0),
874
items: Vec::new(),
875
next: None,
876
+
total: 0,
877
});
878
};
879
···
933
version: (total, gone),
934
items,
935
next,
936
+
total: alive,
937
})
938
}
939
···
956
version: (0, 0),
957
items: Vec::new(),
958
next: None,
959
+
total: 0,
960
});
961
};
962
···
995
version: (total, gone),
996
items,
997
next,
998
+
total: alive,
999
})
1000
}
1001
+1
-1
jetstream/Cargo.toml
+1
-1
jetstream/Cargo.toml
···
10
11
[dependencies]
12
async-trait = "0.1.83"
13
+
atrium-api = { git = "https://github.com/uniphil/atrium", branch = "fix/nsid-allow-nonleading-name-digits", default-features = false, features = [
14
"namespace-appbsky",
15
] }
16
tokio = { version = "1.44.2", features = ["full", "sync", "time"] }
+2
-1
jetstream/src/events.rs
+2
-1
jetstream/src/events.rs
···
92
///
93
/// Warning: this exploits the internal implementation detail of jetstream cursors
94
/// being ~microsecond timestamps.
95
-
pub fn at(t: SystemTime) -> Self {
96
let unix_dt = t
97
.duration_since(UNIX_EPOCH)
98
.expect("cannot set jetstream cursor earlier than unix epoch");
99
Self(unix_dt.as_micros() as u64)
···
92
///
93
/// Warning: this exploits the internal implementation detail of jetstream cursors
94
/// being ~microsecond timestamps.
95
+
pub fn at(t: impl Into<SystemTime>) -> Self {
96
let unix_dt = t
97
+
.into()
98
.duration_since(UNIX_EPOCH)
99
.expect("cannot set jetstream cursor earlier than unix epoch");
100
Self(unix_dt.as_micros() as u64)
+74
-81
jetstream/src/lib.rs
+74
-81
jetstream/src/lib.rs
···
363
retry_attempt += 1;
364
if let Ok((ws_stream, _)) = connect_async(req).await {
365
let t_connected = Instant::now();
366
-
log::trace!("jetstream connected. starting websocket task...");
367
if let Err(e) =
368
websocket_task(dict, ws_stream, send_channel.clone(), &mut last_cursor)
369
.await
···
374
}
375
log::error!("Jetstream closed after encountering error: {e:?}");
376
} else {
377
-
log::error!("Jetstream connection closed cleanly");
378
}
379
if t_connected.elapsed() > Duration::from_secs(success_threshold_s) {
380
retry_attempt = 0;
381
}
382
}
383
384
if retry_attempt >= max_retries {
385
-
log::error!("hit max retries, bye");
386
break;
387
}
388
···
422
let mut closing_connection = false;
423
loop {
424
match socket_read.next().await {
425
-
Some(Ok(message)) => {
426
-
match message {
427
-
Message::Text(json) => {
428
-
let event: JetstreamEvent = match serde_json::from_str(&json) {
429
-
Ok(ev) => ev,
430
-
Err(e) => {
431
-
log::warn!(
432
-
"failed to parse json: {e:?} (from {})",
433
-
json.get(..24).unwrap_or(&json)
434
-
);
435
-
continue;
436
-
}
437
-
};
438
-
let event_cursor = event.cursor;
439
440
-
if let Some(last) = last_cursor {
441
-
if event_cursor <= *last {
442
-
log::warn!("event cursor {event_cursor:?} was older than the last one: {last:?}. dropping event.");
443
-
continue;
444
-
}
445
}
446
447
-
if send_channel.send(event).await.is_err() {
448
-
// We can assume that all receivers have been dropped, so we can close
449
-
// the connection and exit the task.
450
-
log::info!(
451
"All receivers for the Jetstream connection have been dropped, closing connection."
452
);
453
-
socket_write.close().await?;
454
-
return Err(JetstreamEventError::ReceiverClosedError);
455
-
} else if let Some(last) = last_cursor.as_mut() {
456
-
*last = event_cursor;
457
-
}
458
}
459
-
Message::Binary(zstd_json) => {
460
-
let mut cursor = IoCursor::new(zstd_json);
461
-
let decoder = zstd::stream::Decoder::with_prepared_dictionary(
462
-
&mut cursor,
463
-
&dictionary,
464
-
)
465
-
.map_err(JetstreamEventError::CompressionDictionaryError)?;
466
467
-
let event: JetstreamEvent = match serde_json::from_reader(decoder) {
468
-
Ok(ev) => ev,
469
-
Err(e) => {
470
-
log::warn!("failed to parse json: {e:?}");
471
-
continue;
472
-
}
473
-
};
474
-
let event_cursor = event.cursor;
475
476
-
if let Some(last) = last_cursor {
477
-
if event_cursor <= *last {
478
-
log::warn!("event cursor {event_cursor:?} was older than the last one: {last:?}. dropping event.");
479
-
continue;
480
-
}
481
}
482
483
-
if send_channel.send(event).await.is_err() {
484
-
// We can assume that all receivers have been dropped, so we can close
485
-
// the connection and exit the task.
486
-
log::info!(
487
"All receivers for the Jetstream connection have been dropped, closing connection."
488
);
489
-
socket_write.close().await?;
490
-
return Err(JetstreamEventError::ReceiverClosedError);
491
-
} else if let Some(last) = last_cursor.as_mut() {
492
-
*last = event_cursor;
493
-
}
494
}
495
-
Message::Ping(vec) => {
496
-
log::trace!("Ping recieved, responding");
497
-
socket_write
498
-
.send(Message::Pong(vec))
499
-
.await
500
-
.map_err(JetstreamEventError::PingPongError)?;
501
-
}
502
-
Message::Close(close_frame) => {
503
-
log::trace!("Close recieved. I guess we just log here?");
504
-
if let Some(close_frame) = close_frame {
505
-
let reason = close_frame.reason;
506
-
let code = close_frame.code;
507
-
log::trace!("Connection closed. Reason: {reason}, Code: {code}");
508
-
}
509
-
}
510
-
Message::Pong(pong) => {
511
-
let pong_payload = String::from_utf8(pong.to_vec())
512
-
.unwrap_or("Invalid payload".to_string());
513
-
log::trace!("Pong recieved. Payload: {pong_payload}");
514
}
515
-
Message::Frame(_) => (),
516
}
517
-
}
518
Some(Err(error)) => {
519
log::error!("Web socket error: {error}");
520
closing_connection = true;
···
363
retry_attempt += 1;
364
if let Ok((ws_stream, _)) = connect_async(req).await {
365
let t_connected = Instant::now();
366
+
log::info!("jetstream connected. starting websocket task...");
367
if let Err(e) =
368
websocket_task(dict, ws_stream, send_channel.clone(), &mut last_cursor)
369
.await
···
374
}
375
log::error!("Jetstream closed after encountering error: {e:?}");
376
} else {
377
+
log::warn!("Jetstream connection closed cleanly");
378
}
379
if t_connected.elapsed() > Duration::from_secs(success_threshold_s) {
380
+
log::warn!("Jetstream: more than {success_threshold_s}s since last reconnect, reconnecting immediately.");
381
retry_attempt = 0;
382
}
383
}
384
385
if retry_attempt >= max_retries {
386
+
log::error!("jetstream: hit max retries, bye");
387
break;
388
}
389
···
423
let mut closing_connection = false;
424
loop {
425
match socket_read.next().await {
426
+
Some(Ok(message)) => match message {
427
+
Message::Text(json) => {
428
+
let event: JetstreamEvent = match serde_json::from_str(&json) {
429
+
Ok(ev) => ev,
430
+
Err(e) => {
431
+
log::warn!(
432
+
"failed to parse json: {e:?} (from {})",
433
+
json.get(..24).unwrap_or(&json)
434
+
);
435
+
continue;
436
+
}
437
+
};
438
+
let event_cursor = event.cursor;
439
440
+
if let Some(last) = last_cursor {
441
+
if event_cursor <= *last {
442
+
log::warn!("event cursor {event_cursor:?} was not newer than the last one: {last:?}. dropping event.");
443
+
continue;
444
}
445
+
}
446
447
+
if send_channel.send(event).await.is_err() {
448
+
log::warn!(
449
"All receivers for the Jetstream connection have been dropped, closing connection."
450
);
451
+
socket_write.close().await?;
452
+
return Err(JetstreamEventError::ReceiverClosedError);
453
+
} else if let Some(last) = last_cursor.as_mut() {
454
+
*last = event_cursor;
455
}
456
+
}
457
+
Message::Binary(zstd_json) => {
458
+
let mut cursor = IoCursor::new(zstd_json);
459
+
let decoder =
460
+
zstd::stream::Decoder::with_prepared_dictionary(&mut cursor, &dictionary)
461
+
.map_err(JetstreamEventError::CompressionDictionaryError)?;
462
463
+
let event: JetstreamEvent = match serde_json::from_reader(decoder) {
464
+
Ok(ev) => ev,
465
+
Err(e) => {
466
+
log::warn!("failed to parse json: {e:?}");
467
+
continue;
468
+
}
469
+
};
470
+
let event_cursor = event.cursor;
471
472
+
if let Some(last) = last_cursor {
473
+
if event_cursor <= *last {
474
+
log::warn!("event cursor {event_cursor:?} was not newer than the last one: {last:?}. dropping event.");
475
+
continue;
476
}
477
+
}
478
479
+
if send_channel.send(event).await.is_err() {
480
+
log::warn!(
481
"All receivers for the Jetstream connection have been dropped, closing connection."
482
);
483
+
socket_write.close().await?;
484
+
return Err(JetstreamEventError::ReceiverClosedError);
485
+
} else if let Some(last) = last_cursor.as_mut() {
486
+
*last = event_cursor;
487
}
488
+
}
489
+
Message::Ping(vec) => {
490
+
log::trace!("Ping recieved, responding");
491
+
socket_write
492
+
.send(Message::Pong(vec))
493
+
.await
494
+
.map_err(JetstreamEventError::PingPongError)?;
495
+
}
496
+
Message::Close(close_frame) => {
497
+
log::trace!("Close recieved. I guess we just log here?");
498
+
if let Some(close_frame) = close_frame {
499
+
let reason = close_frame.reason;
500
+
let code = close_frame.code;
501
+
log::trace!("Connection closed. Reason: {reason}, Code: {code}");
502
}
503
+
}
504
+
Message::Pong(pong) => {
505
+
let pong_payload =
506
+
String::from_utf8(pong.to_vec()).unwrap_or("Invalid payload".to_string());
507
+
log::trace!("Pong recieved. Payload: {pong_payload}");
508
}
509
+
Message::Frame(_) => (),
510
+
},
511
Some(Err(error)) => {
512
log::error!("Web socket error: {error}");
513
closing_connection = true;
+9
-2
ufos/Cargo.toml
+9
-2
ufos/Cargo.toml
···
6
[dependencies]
7
anyhow = "1.0.97"
8
async-trait = "0.1.88"
9
bincode = { version = "2.0.1", features = ["serde"] }
10
-
cardinality-estimator = { version = "1.0.2", features = ["with_serde"] }
11
clap = { version = "4.5.31", features = ["derive"] }
12
dropshot = "0.16.0"
13
env_logger = "0.11.7"
14
fjall = { version = "2.8.0", features = ["lz4"] }
15
jetstream = { path = "../jetstream" }
16
log = "0.4.26"
17
lsm-tree = "2.6.6"
18
-
schemars = { version = "0.8.22", features = ["raw_value"] }
19
semver = "1.0.26"
20
serde = "1.0.219"
21
serde_json = "1.0.140"
22
thiserror = "2.0.12"
23
tokio = { version = "1.44.2", features = ["full", "sync", "time"] }
24
25
[target.'cfg(not(target_env = "msvc"))'.dependencies]
26
tikv-jemallocator = "0.6.0"
···
6
[dependencies]
7
anyhow = "1.0.97"
8
async-trait = "0.1.88"
9
+
base64 = "0.22.1"
10
bincode = { version = "2.0.1", features = ["serde"] }
11
+
cardinality-estimator-safe = { version = "4.0.1", features = ["with_serde", "with_digest"] }
12
+
chrono = { version = "0.4.41", features = ["serde"] }
13
clap = { version = "4.5.31", features = ["derive"] }
14
dropshot = "0.16.0"
15
env_logger = "0.11.7"
16
fjall = { version = "2.8.0", features = ["lz4"] }
17
+
getrandom = "0.3.3"
18
+
http = "1.3.1"
19
jetstream = { path = "../jetstream" }
20
log = "0.4.26"
21
lsm-tree = "2.6.6"
22
+
schemars = { version = "0.8.22", features = ["raw_value", "chrono"] }
23
semver = "1.0.26"
24
serde = "1.0.219"
25
serde_json = "1.0.140"
26
+
serde_qs = "1.0.0-rc.3"
27
+
sha2 = "0.10.9"
28
thiserror = "2.0.12"
29
tokio = { version = "1.44.2", features = ["full", "sync", "time"] }
30
+
tokio-util = "0.7.15"
31
32
[target.'cfg(not(target_env = "msvc"))'.dependencies]
33
tikv-jemallocator = "0.6.0"
+1
-1
ufos/fuzz/fuzz_targets/counts_value.rs
+1
-1
ufos/fuzz/fuzz_targets/counts_value.rs
+13
ufos/readme.md
+13
ufos/readme.md
···
28
cargo clean
29
```
30
31
+
for bonilla but 64-bit? (rp4)
32
+
```bash
33
+
cross build --release --target aarch64-unknown-linux-gnu && scp ../target/aarch64-unknown-linux-gnu/release/ufos pi@bonilla.local:ufos
34
+
# ^^ fails due to linker?
35
+
36
+
cross build --release --target aarch64-unknown-linux-musl && scp ../target/aarch64-unknown-linux-musl/release/ufos pi@bonilla.local:ufos
37
+
# seems to work
38
+
39
+
rsync -avhP ufos-bff-rl/ pi@bonilla:/mnt/ufos-db/
40
+
41
+
RUST_LOG=info ./ufos --jetstream us-west-2 --data /mnt/ufos-db/
42
+
```
43
+
44
nginx forward proxy for websocket (run this on another host):
45
46
```nginx
+45
-15
ufos/src/consumer.rs
+45
-15
ufos/src/consumer.rs
···
1
use jetstream::{
2
events::{Cursor, EventKind, JetstreamEvent},
3
exports::{Did, Nsid},
···
7
use std::mem;
8
use std::time::Duration;
9
use tokio::sync::mpsc::{channel, Receiver, Sender};
10
11
use crate::error::{BatchInsertError, FirehoseEventError};
12
use crate::{DeleteAccount, EventBatch, UFOsCommit};
···
16
pub const MAX_BATCHED_COLLECTIONS: usize = 64; // hard limit, MAX_BATCHED_RECORDS applies per-collection
17
pub const MIN_BATCH_SPAN_SECS: f64 = 2.; // breathe
18
pub const MAX_BATCH_SPAN_SECS: f64 = 60.; // hard limit, pause consumer if we're unable to send by now
19
-
pub const SEND_TIMEOUT_S: f64 = 15.; // if the channel is blocked longer than this, something is probably up
20
-
pub const BATCH_QUEUE_SIZE: usize = 1; // nearly-rendez-vous
21
22
pub type LimitedBatch = EventBatch<MAX_BATCHED_RECORDS>;
23
···
32
jetstream_receiver: JetstreamReceiver,
33
batch_sender: Sender<LimitedBatch>,
34
current_batch: CurrentBatch,
35
}
36
37
pub async fn consume(
38
jetstream_endpoint: &str,
39
cursor: Option<Cursor>,
40
no_compress: bool,
41
) -> anyhow::Result<Receiver<LimitedBatch>> {
42
let endpoint = DefaultJetstreamEndpoints::endpoint_or_shortcut(jetstream_endpoint);
43
if endpoint == jetstream_endpoint {
···
60
.connect_cursor(cursor)
61
.await?;
62
let (batch_sender, batch_reciever) = channel::<LimitedBatch>(BATCH_QUEUE_SIZE);
63
-
let mut batcher = Batcher::new(jetstream_receiver, batch_sender);
64
-
tokio::task::spawn(async move { batcher.run().await });
65
Ok(batch_reciever)
66
}
67
68
impl Batcher {
69
-
pub fn new(jetstream_receiver: JetstreamReceiver, batch_sender: Sender<LimitedBatch>) -> Self {
70
Self {
71
jetstream_receiver,
72
batch_sender,
73
current_batch: Default::default(),
74
}
75
}
76
77
pub async fn run(&mut self) -> anyhow::Result<()> {
78
loop {
79
-
if let Some(event) = self.jetstream_receiver.recv().await {
80
-
self.handle_event(event).await?
81
-
} else {
82
-
anyhow::bail!("channel closed");
83
}
84
}
85
}
86
87
async fn handle_event(&mut self, event: JetstreamEvent) -> anyhow::Result<()> {
88
if let Some(earliest) = &self.current_batch.initial_cursor {
89
if event.cursor.duration_since(earliest)? > Duration::from_secs_f64(MAX_BATCH_SPAN_SECS)
90
{
91
-
self.send_current_batch_now(false).await?;
92
}
93
} else {
94
self.current_batch.initial_cursor = Some(event.cursor);
···
118
if event.cursor.duration_since(earliest)?.as_secs_f64() > MIN_BATCH_SPAN_SECS
119
&& self.batch_sender.capacity() == BATCH_QUEUE_SIZE
120
{
121
-
self.send_current_batch_now(true).await?;
122
}
123
}
124
Ok(())
···
129
&collection,
130
commit,
131
MAX_BATCHED_COLLECTIONS,
132
);
133
134
if let Err(BatchInsertError::BatchFull(commit)) = optimistic_res {
135
-
self.send_current_batch_now(false).await?;
136
self.current_batch.batch.insert_commit_by_nsid(
137
&collection,
138
commit,
139
MAX_BATCHED_COLLECTIONS,
140
)?;
141
} else {
142
optimistic_res?;
···
147
148
async fn handle_delete_account(&mut self, did: Did, cursor: Cursor) -> anyhow::Result<()> {
149
if self.current_batch.batch.account_removes.len() >= MAX_ACCOUNT_REMOVES {
150
-
self.send_current_batch_now(false).await?;
151
}
152
self.current_batch
153
.batch
···
158
159
// holds up all consumer progress until it can send to the channel
160
// use this when the current batch is too full to add more to it
161
-
async fn send_current_batch_now(&mut self, small: bool) -> anyhow::Result<()> {
162
let beginning = match self.current_batch.initial_cursor.map(|c| c.elapsed()) {
163
None => "unknown".to_string(),
164
Some(Ok(t)) => format!("{:?}", t),
165
Some(Err(e)) => format!("+{:?}", e.duration()),
166
};
167
log::info!(
168
-
"sending batch now from {beginning}, {}, queue capacity: {}",
169
if small { "small" } else { "full" },
170
self.batch_sender.capacity(),
171
);
172
let current = mem::take(&mut self.current_batch);
173
self.batch_sender
174
.send_timeout(current.batch, Duration::from_secs_f64(SEND_TIMEOUT_S))
175
.await?;
···
1
+
use crate::store_types::SketchSecretPrefix;
2
use jetstream::{
3
events::{Cursor, EventKind, JetstreamEvent},
4
exports::{Did, Nsid},
···
8
use std::mem;
9
use std::time::Duration;
10
use tokio::sync::mpsc::{channel, Receiver, Sender};
11
+
use tokio::time::{timeout, Interval};
12
13
use crate::error::{BatchInsertError, FirehoseEventError};
14
use crate::{DeleteAccount, EventBatch, UFOsCommit};
···
18
pub const MAX_BATCHED_COLLECTIONS: usize = 64; // hard limit, MAX_BATCHED_RECORDS applies per-collection
19
pub const MIN_BATCH_SPAN_SECS: f64 = 2.; // breathe
20
pub const MAX_BATCH_SPAN_SECS: f64 = 60.; // hard limit, pause consumer if we're unable to send by now
21
+
pub const SEND_TIMEOUT_S: f64 = 150.; // if the channel is blocked longer than this, something is probably up
22
+
pub const BATCH_QUEUE_SIZE: usize = 64; // used to be 1, but sometimes inserts are just really slow????????
23
24
pub type LimitedBatch = EventBatch<MAX_BATCHED_RECORDS>;
25
···
34
jetstream_receiver: JetstreamReceiver,
35
batch_sender: Sender<LimitedBatch>,
36
current_batch: CurrentBatch,
37
+
sketch_secret: SketchSecretPrefix,
38
+
rate_limit: Interval,
39
}
40
41
pub async fn consume(
42
jetstream_endpoint: &str,
43
cursor: Option<Cursor>,
44
no_compress: bool,
45
+
sketch_secret: SketchSecretPrefix,
46
) -> anyhow::Result<Receiver<LimitedBatch>> {
47
let endpoint = DefaultJetstreamEndpoints::endpoint_or_shortcut(jetstream_endpoint);
48
if endpoint == jetstream_endpoint {
···
65
.connect_cursor(cursor)
66
.await?;
67
let (batch_sender, batch_reciever) = channel::<LimitedBatch>(BATCH_QUEUE_SIZE);
68
+
let mut batcher = Batcher::new(jetstream_receiver, batch_sender, sketch_secret);
69
+
tokio::task::spawn(async move {
70
+
let r = batcher.run().await;
71
+
log::warn!("batcher ended: {r:?}");
72
+
});
73
Ok(batch_reciever)
74
}
75
76
impl Batcher {
77
+
pub fn new(
78
+
jetstream_receiver: JetstreamReceiver,
79
+
batch_sender: Sender<LimitedBatch>,
80
+
sketch_secret: SketchSecretPrefix,
81
+
) -> Self {
82
+
let mut rate_limit = tokio::time::interval(std::time::Duration::from_millis(3));
83
+
rate_limit.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Delay);
84
Self {
85
jetstream_receiver,
86
batch_sender,
87
current_batch: Default::default(),
88
+
sketch_secret,
89
+
rate_limit,
90
}
91
}
92
93
pub async fn run(&mut self) -> anyhow::Result<()> {
94
+
// TODO: report errors *from here* probably, since this gets shipped off into a spawned task that might just vanish
95
loop {
96
+
match timeout(Duration::from_secs_f64(30.), self.jetstream_receiver.recv()).await {
97
+
Err(_elapsed) => self.no_events_step().await?,
98
+
Ok(Some(event)) => self.handle_event(event).await?,
99
+
Ok(None) => anyhow::bail!("channel closed"),
100
}
101
}
102
}
103
104
+
async fn no_events_step(&mut self) -> anyhow::Result<()> {
105
+
let empty = self.current_batch.batch.is_empty();
106
+
log::info!("no events received, stepping batcher (empty? {empty})");
107
+
if !empty {
108
+
self.send_current_batch_now(true, "no events step").await?;
109
+
}
110
+
Ok(())
111
+
}
112
+
113
async fn handle_event(&mut self, event: JetstreamEvent) -> anyhow::Result<()> {
114
if let Some(earliest) = &self.current_batch.initial_cursor {
115
if event.cursor.duration_since(earliest)? > Duration::from_secs_f64(MAX_BATCH_SPAN_SECS)
116
{
117
+
self.send_current_batch_now(false, "time since event")
118
+
.await?;
119
}
120
} else {
121
self.current_batch.initial_cursor = Some(event.cursor);
···
145
if event.cursor.duration_since(earliest)?.as_secs_f64() > MIN_BATCH_SPAN_SECS
146
&& self.batch_sender.capacity() == BATCH_QUEUE_SIZE
147
{
148
+
self.send_current_batch_now(true, "available queue").await?;
149
}
150
}
151
Ok(())
···
156
&collection,
157
commit,
158
MAX_BATCHED_COLLECTIONS,
159
+
&self.sketch_secret,
160
);
161
162
if let Err(BatchInsertError::BatchFull(commit)) = optimistic_res {
163
+
self.send_current_batch_now(false, "handle commit").await?;
164
self.current_batch.batch.insert_commit_by_nsid(
165
&collection,
166
commit,
167
MAX_BATCHED_COLLECTIONS,
168
+
&self.sketch_secret,
169
)?;
170
} else {
171
optimistic_res?;
···
176
177
async fn handle_delete_account(&mut self, did: Did, cursor: Cursor) -> anyhow::Result<()> {
178
if self.current_batch.batch.account_removes.len() >= MAX_ACCOUNT_REMOVES {
179
+
self.send_current_batch_now(false, "delete account").await?;
180
}
181
self.current_batch
182
.batch
···
187
188
// holds up all consumer progress until it can send to the channel
189
// use this when the current batch is too full to add more to it
190
+
async fn send_current_batch_now(&mut self, small: bool, referrer: &str) -> anyhow::Result<()> {
191
let beginning = match self.current_batch.initial_cursor.map(|c| c.elapsed()) {
192
None => "unknown".to_string(),
193
Some(Ok(t)) => format!("{:?}", t),
194
Some(Err(e)) => format!("+{:?}", e.duration()),
195
};
196
log::info!(
197
+
"sending batch now from {beginning}, {}, queue capacity: {}, referrer: {referrer}",
198
if small { "small" } else { "full" },
199
self.batch_sender.capacity(),
200
);
201
let current = mem::take(&mut self.current_batch);
202
+
self.rate_limit.tick().await;
203
self.batch_sender
204
.send_timeout(current.batch, Duration::from_secs_f64(SEND_TIMEOUT_S))
205
.await?;
+112
-33
ufos/src/db_types.rs
+112
-33
ufos/src/db_types.rs
···
18
pub enum EncodingError {
19
#[error("failed to parse Atrium string type: {0}")]
20
BadAtriumStringType(&'static str),
21
#[error("failed to bincode-encode: {0}")]
22
BincodeEncodeFailed(#[from] EncodeError),
23
#[error("failed to bincode-decode: {0}")]
···
48
InvalidTruncated(u64, u64),
49
}
50
51
fn bincode_conf() -> impl Config {
52
standard()
53
.with_big_endian()
···
56
}
57
58
pub trait DbBytes {
59
-
fn to_db_bytes(&self) -> Result<Vec<u8>, EncodingError>;
60
fn from_db_bytes(bytes: &[u8]) -> Result<(Self, usize), EncodingError>
61
where
62
Self: Sized;
63
}
64
65
#[derive(PartialEq)]
···
74
pub fn from_pair(prefix: P, suffix: S) -> Self {
75
Self { prefix, suffix }
76
}
77
-
pub fn from_prefix_to_db_bytes(prefix: &P) -> Result<Vec<u8>, EncodingError> {
78
prefix.to_db_bytes()
79
}
80
-
pub fn to_prefix_db_bytes(&self) -> Result<Vec<u8>, EncodingError> {
81
self.prefix.to_db_bytes()
82
}
83
-
pub fn prefix_range_end(prefix: &P) -> Result<Vec<u8>, EncodingError> {
84
-
let prefix_bytes = prefix.to_db_bytes()?;
85
-
let (_, Bound::Excluded(range_end)) = prefix_to_range(&prefix_bytes) else {
86
-
return Err(EncodingError::BadRangeBound);
87
-
};
88
-
Ok(range_end.to_vec())
89
}
90
-
pub fn range_end(&self) -> Result<Vec<u8>, EncodingError> {
91
Self::prefix_range_end(&self.prefix)
92
}
93
pub fn range(&self) -> Result<Range<Vec<u8>>, EncodingError> {
···
102
}
103
}
104
105
impl<P: DbBytes + std::fmt::Debug, S: DbBytes + std::fmt::Debug> fmt::Debug for DbConcat<P, S> {
106
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
107
write!(f, "DbConcat<{:?} || {:?}>", self.prefix, self.suffix)
···
109
}
110
111
impl<P: DbBytes, S: DbBytes> DbBytes for DbConcat<P, S> {
112
-
fn to_db_bytes(&self) -> Result<Vec<u8>, EncodingError> {
113
let mut combined = self.prefix.to_db_bytes()?;
114
combined.append(&mut self.suffix.to_db_bytes()?);
115
Ok(combined)
···
145
#[derive(Debug, Default, PartialEq)]
146
pub struct DbEmpty(());
147
impl DbBytes for DbEmpty {
148
-
fn to_db_bytes(&self) -> Result<Vec<u8>, EncodingError> {
149
Ok(vec![])
150
}
151
fn from_db_bytes(_: &[u8]) -> Result<(Self, usize), EncodingError> {
···
174
}
175
}
176
impl<S: StaticStr> DbBytes for DbStaticStr<S> {
177
-
fn to_db_bytes(&self) -> Result<Vec<u8>, EncodingError> {
178
S::static_str().to_string().to_db_bytes()
179
}
180
fn from_db_bytes(bytes: &[u8]) -> Result<(Self, usize), EncodingError> {
···
201
where
202
T: BincodeEncode + BincodeDecode<()> + UseBincodePlz + Sized + std::fmt::Debug,
203
{
204
-
fn to_db_bytes(&self) -> Result<Vec<u8>, EncodingError> {
205
Ok(encode_to_vec(self, bincode_conf())?)
206
}
207
fn from_db_bytes(bytes: &[u8]) -> Result<(Self, usize), EncodingError> {
···
211
212
/// helper trait: impl on a type to get helpers to implement DbBytes
213
pub trait SerdeBytes: serde::Serialize + for<'a> serde::Deserialize<'a> {
214
-
fn to_bytes(&self) -> Result<Vec<u8>, EncodingError>
215
where
216
Self: std::fmt::Debug,
217
{
···
224
225
//////
226
227
impl DbBytes for Vec<u8> {
228
-
fn to_db_bytes(&self) -> Result<Vec<u8>, EncodingError> {
229
Ok(self.to_vec())
230
}
231
fn from_db_bytes(bytes: &[u8]) -> Result<(Self, usize), EncodingError> {
232
Ok((bytes.to_owned(), bytes.len()))
233
}
···
243
/// TODO: wrap in another type. it's actually probably not desirable to serialize strings this way
244
/// *except* where needed as a prefix.
245
impl DbBytes for String {
246
-
fn to_db_bytes(&self) -> Result<Vec<u8>, EncodingError> {
247
let mut v = self.as_bytes().to_vec();
248
if v.contains(&0x00) {
249
return Err(EncodingError::StringContainedNull);
···
263
}
264
}
265
266
impl DbBytes for Did {
267
fn from_db_bytes(bytes: &[u8]) -> Result<(Self, usize), EncodingError> {
268
let (s, n) = decode_from_slice(bytes, bincode_conf())?;
269
let me = Self::new(s).map_err(EncodingError::BadAtriumStringType)?;
270
Ok((me, n))
271
}
272
-
fn to_db_bytes(&self) -> Result<Vec<u8>, EncodingError> {
273
Ok(encode_to_vec(self.as_ref(), bincode_conf())?)
274
}
275
}
276
277
impl DbBytes for Nsid {
278
fn from_db_bytes(bytes: &[u8]) -> Result<(Self, usize), EncodingError> {
279
-
let (s, n) = decode_from_slice(bytes, bincode_conf())?;
280
let me = Self::new(s).map_err(EncodingError::BadAtriumStringType)?;
281
Ok((me, n))
282
}
283
-
fn to_db_bytes(&self) -> Result<Vec<u8>, EncodingError> {
284
-
Ok(encode_to_vec(self.as_ref(), bincode_conf())?)
285
}
286
}
287
···
291
let me = Self::new(s).map_err(EncodingError::BadAtriumStringType)?;
292
Ok((me, n))
293
}
294
-
fn to_db_bytes(&self) -> Result<Vec<u8>, EncodingError> {
295
Ok(encode_to_vec(self.as_ref(), bincode_conf())?)
296
}
297
}
298
299
impl DbBytes for Cursor {
300
-
fn to_db_bytes(&self) -> Result<Vec<u8>, EncodingError> {
301
Ok(self.to_raw_u64().to_be_bytes().to_vec())
302
}
303
fn from_db_bytes(bytes: &[u8]) -> Result<(Self, usize), EncodingError> {
···
311
}
312
313
impl DbBytes for serde_json::Value {
314
-
fn to_db_bytes(&self) -> Result<Vec<u8>, EncodingError> {
315
self.to_string().to_db_bytes()
316
}
317
fn from_db_bytes(bytes: &[u8]) -> Result<(Self, usize), EncodingError> {
···
331
332
#[cfg(test)]
333
mod test {
334
-
use super::{Cursor, DbBytes, DbConcat, DbEmpty, DbStaticStr, EncodingError, StaticStr};
335
336
#[test]
337
-
fn test_db_empty() -> Result<(), EncodingError> {
338
let original = DbEmpty::default();
339
let serialized = original.to_db_bytes()?;
340
assert_eq!(serialized.len(), 0);
···
345
}
346
347
#[test]
348
-
fn test_string_roundtrip() -> Result<(), EncodingError> {
349
for (case, desc) in [
350
("", "empty string"),
351
("a", "basic string"),
···
364
}
365
366
#[test]
367
-
fn test_string_serialized_lexicographic_sort() -> Result<(), EncodingError> {
368
let aa = "aa".to_string().to_db_bytes()?;
369
let b = "b".to_string().to_db_bytes()?;
370
assert!(b > aa);
···
372
}
373
374
#[test]
375
-
fn test_string_cursor_prefix_roundtrip() -> Result<(), EncodingError> {
376
type TwoThings = DbConcat<String, Cursor>;
377
for (lazy_prefix, tired_suffix, desc) in [
378
("", 0, "empty string and cursor"),
···
397
}
398
399
#[test]
400
-
fn test_cursor_string_prefix_roundtrip() -> Result<(), EncodingError> {
401
type TwoThings = DbConcat<Cursor, String>;
402
for (tired_prefix, sad_suffix, desc) in [
403
(0, "", "empty string and cursor"),
···
422
}
423
424
#[test]
425
-
fn test_static_str() -> Result<(), EncodingError> {
426
#[derive(Debug, PartialEq)]
427
struct AStaticStr {}
428
impl StaticStr for AStaticStr {
···
443
}
444
445
#[test]
446
-
fn test_static_str_empty() -> Result<(), EncodingError> {
447
#[derive(Debug, PartialEq)]
448
struct AnEmptyStr {}
449
impl StaticStr for AnEmptyStr {
···
463
}
464
465
#[test]
466
-
fn test_static_prefix() -> Result<(), EncodingError> {
467
#[derive(Debug, PartialEq)]
468
struct AStaticPrefix {}
469
impl StaticStr for AStaticPrefix {
···
18
pub enum EncodingError {
19
#[error("failed to parse Atrium string type: {0}")]
20
BadAtriumStringType(&'static str),
21
+
#[error("Not enough NSID segments for a usable prefix")]
22
+
NotEnoughNsidSegments,
23
#[error("failed to bincode-encode: {0}")]
24
BincodeEncodeFailed(#[from] EncodeError),
25
#[error("failed to bincode-decode: {0}")]
···
50
InvalidTruncated(u64, u64),
51
}
52
53
+
pub type EncodingResult<T> = Result<T, EncodingError>;
54
+
55
fn bincode_conf() -> impl Config {
56
standard()
57
.with_big_endian()
···
60
}
61
62
pub trait DbBytes {
63
+
fn to_db_bytes(&self) -> EncodingResult<Vec<u8>>;
64
fn from_db_bytes(bytes: &[u8]) -> Result<(Self, usize), EncodingError>
65
where
66
Self: Sized;
67
+
fn as_prefix_range_end(&self) -> EncodingResult<Vec<u8>> {
68
+
let bytes = self.to_db_bytes()?;
69
+
let (_, Bound::Excluded(range_end)) = prefix_to_range(&bytes) else {
70
+
return Err(EncodingError::BadRangeBound);
71
+
};
72
+
Ok(range_end.to_vec())
73
+
}
74
+
}
75
+
76
+
pub trait SubPrefixBytes<T> {
77
+
fn sub_prefix(input: T) -> EncodingResult<Vec<u8>>;
78
}
79
80
#[derive(PartialEq)]
···
89
pub fn from_pair(prefix: P, suffix: S) -> Self {
90
Self { prefix, suffix }
91
}
92
+
pub fn from_prefix_to_db_bytes(prefix: &P) -> EncodingResult<Vec<u8>> {
93
prefix.to_db_bytes()
94
}
95
+
pub fn to_prefix_db_bytes(&self) -> EncodingResult<Vec<u8>> {
96
self.prefix.to_db_bytes()
97
}
98
+
pub fn prefix_range_end(prefix: &P) -> EncodingResult<Vec<u8>> {
99
+
prefix.as_prefix_range_end()
100
}
101
+
pub fn range_end(&self) -> EncodingResult<Vec<u8>> {
102
Self::prefix_range_end(&self.prefix)
103
}
104
pub fn range(&self) -> Result<Range<Vec<u8>>, EncodingError> {
···
113
}
114
}
115
116
+
impl<P: DbBytes + Default, S: DbBytes + Default> Default for DbConcat<P, S> {
117
+
fn default() -> Self {
118
+
Self {
119
+
prefix: Default::default(),
120
+
suffix: Default::default(),
121
+
}
122
+
}
123
+
}
124
+
125
impl<P: DbBytes + std::fmt::Debug, S: DbBytes + std::fmt::Debug> fmt::Debug for DbConcat<P, S> {
126
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
127
write!(f, "DbConcat<{:?} || {:?}>", self.prefix, self.suffix)
···
129
}
130
131
impl<P: DbBytes, S: DbBytes> DbBytes for DbConcat<P, S> {
132
+
fn to_db_bytes(&self) -> EncodingResult<Vec<u8>> {
133
let mut combined = self.prefix.to_db_bytes()?;
134
combined.append(&mut self.suffix.to_db_bytes()?);
135
Ok(combined)
···
165
#[derive(Debug, Default, PartialEq)]
166
pub struct DbEmpty(());
167
impl DbBytes for DbEmpty {
168
+
fn to_db_bytes(&self) -> EncodingResult<Vec<u8>> {
169
Ok(vec![])
170
}
171
fn from_db_bytes(_: &[u8]) -> Result<(Self, usize), EncodingError> {
···
194
}
195
}
196
impl<S: StaticStr> DbBytes for DbStaticStr<S> {
197
+
fn to_db_bytes(&self) -> EncodingResult<Vec<u8>> {
198
S::static_str().to_string().to_db_bytes()
199
}
200
fn from_db_bytes(bytes: &[u8]) -> Result<(Self, usize), EncodingError> {
···
221
where
222
T: BincodeEncode + BincodeDecode<()> + UseBincodePlz + Sized + std::fmt::Debug,
223
{
224
+
fn to_db_bytes(&self) -> EncodingResult<Vec<u8>> {
225
Ok(encode_to_vec(self, bincode_conf())?)
226
}
227
fn from_db_bytes(bytes: &[u8]) -> Result<(Self, usize), EncodingError> {
···
231
232
/// helper trait: impl on a type to get helpers to implement DbBytes
233
pub trait SerdeBytes: serde::Serialize + for<'a> serde::Deserialize<'a> {
234
+
fn to_bytes(&self) -> EncodingResult<Vec<u8>>
235
where
236
Self: std::fmt::Debug,
237
{
···
244
245
//////
246
247
+
impl<const N: usize> UseBincodePlz for [u8; N] {}
248
+
249
+
// bare bytes (NOT prefix-encoded!)
250
impl DbBytes for Vec<u8> {
251
+
fn to_db_bytes(&self) -> EncodingResult<Vec<u8>> {
252
Ok(self.to_vec())
253
}
254
+
// greedy, consumes ALL remaining bytes
255
fn from_db_bytes(bytes: &[u8]) -> Result<(Self, usize), EncodingError> {
256
Ok((bytes.to_owned(), bytes.len()))
257
}
···
267
/// TODO: wrap in another type. it's actually probably not desirable to serialize strings this way
268
/// *except* where needed as a prefix.
269
impl DbBytes for String {
270
+
fn to_db_bytes(&self) -> EncodingResult<Vec<u8>> {
271
let mut v = self.as_bytes().to_vec();
272
if v.contains(&0x00) {
273
return Err(EncodingError::StringContainedNull);
···
287
}
288
}
289
290
+
impl SubPrefixBytes<&str> for String {
291
+
fn sub_prefix(input: &str) -> EncodingResult<Vec<u8>> {
292
+
let v = input.as_bytes();
293
+
if v.contains(&0x00) {
294
+
return Err(EncodingError::StringContainedNull);
295
+
}
296
+
// NO null terminator!!
297
+
Ok(v.to_vec())
298
+
}
299
+
}
300
+
301
impl DbBytes for Did {
302
fn from_db_bytes(bytes: &[u8]) -> Result<(Self, usize), EncodingError> {
303
let (s, n) = decode_from_slice(bytes, bincode_conf())?;
304
let me = Self::new(s).map_err(EncodingError::BadAtriumStringType)?;
305
Ok((me, n))
306
}
307
+
fn to_db_bytes(&self) -> EncodingResult<Vec<u8>> {
308
Ok(encode_to_vec(self.as_ref(), bincode_conf())?)
309
}
310
}
311
312
impl DbBytes for Nsid {
313
fn from_db_bytes(bytes: &[u8]) -> Result<(Self, usize), EncodingError> {
314
+
let (s, n) = String::from_db_bytes(bytes)?; // null-terminated DbBytes impl!!
315
let me = Self::new(s).map_err(EncodingError::BadAtriumStringType)?;
316
Ok((me, n))
317
}
318
+
fn to_db_bytes(&self) -> EncodingResult<Vec<u8>> {
319
+
String::to_db_bytes(&self.to_string()) // null-terminated DbBytes impl!!!!
320
+
}
321
+
}
322
+
impl SubPrefixBytes<&str> for Nsid {
323
+
fn sub_prefix(input: &str) -> EncodingResult<Vec<u8>> {
324
+
String::sub_prefix(input)
325
}
326
}
327
···
331
let me = Self::new(s).map_err(EncodingError::BadAtriumStringType)?;
332
Ok((me, n))
333
}
334
+
fn to_db_bytes(&self) -> EncodingResult<Vec<u8>> {
335
Ok(encode_to_vec(self.as_ref(), bincode_conf())?)
336
}
337
}
338
339
impl DbBytes for Cursor {
340
+
fn to_db_bytes(&self) -> EncodingResult<Vec<u8>> {
341
Ok(self.to_raw_u64().to_be_bytes().to_vec())
342
}
343
fn from_db_bytes(bytes: &[u8]) -> Result<(Self, usize), EncodingError> {
···
351
}
352
353
impl DbBytes for serde_json::Value {
354
+
fn to_db_bytes(&self) -> EncodingResult<Vec<u8>> {
355
self.to_string().to_db_bytes()
356
}
357
fn from_db_bytes(bytes: &[u8]) -> Result<(Self, usize), EncodingError> {
···
371
372
#[cfg(test)]
373
mod test {
374
+
use super::{
375
+
Cursor, DbBytes, DbConcat, DbEmpty, DbStaticStr, EncodingResult, Nsid, StaticStr,
376
+
SubPrefixBytes,
377
+
};
378
379
#[test]
380
+
fn test_db_empty() -> EncodingResult<()> {
381
let original = DbEmpty::default();
382
let serialized = original.to_db_bytes()?;
383
assert_eq!(serialized.len(), 0);
···
388
}
389
390
#[test]
391
+
fn test_string_roundtrip() -> EncodingResult<()> {
392
for (case, desc) in [
393
("", "empty string"),
394
("a", "basic string"),
···
407
}
408
409
#[test]
410
+
fn test_string_serialized_lexicographic_sort() -> EncodingResult<()> {
411
let aa = "aa".to_string().to_db_bytes()?;
412
let b = "b".to_string().to_db_bytes()?;
413
assert!(b > aa);
···
415
}
416
417
#[test]
418
+
fn test_nullstring_can_prefix() -> EncodingResult<()> {
419
+
for (s, pre, is_pre, desc) in [
420
+
("", "", true, "empty strings"),
421
+
("", "a", false, "longer prefix"),
422
+
("a", "", true, "empty prefix matches"),
423
+
("a", "a", true, "whole string matches"),
424
+
("a", "b", false, "entirely different"),
425
+
("ab", "a", true, "prefix matches"),
426
+
("ab", "b", false, "shorter and entirely different"),
427
+
] {
428
+
let serialized = s.to_string().to_db_bytes()?;
429
+
let prefixed = String::sub_prefix(pre)?;
430
+
assert_eq!(serialized.starts_with(&prefixed), is_pre, "{}", desc);
431
+
}
432
+
Ok(())
433
+
}
434
+
435
+
#[test]
436
+
fn test_nsid_can_prefix() -> EncodingResult<()> {
437
+
for (s, pre, is_pre, desc) in [
438
+
("ab.cd.ef", "", true, "empty prefix"),
439
+
("ab.cd.ef", "a", true, "tiny prefix"),
440
+
("ab.cd.ef", "abc", false, "bad prefix"),
441
+
("ab.cd.ef", "ab", true, "segment prefix"),
442
+
("ab.cd.ef", "ab.cd", true, "multi-segment prefix"),
443
+
("ab.cd.ef", "ab.cd.ef", true, "full match"),
444
+
("ab.cd.ef", "ab.cd.ef.g", false, "prefix longer"),
445
+
] {
446
+
let serialized = Nsid::new(s.to_string()).unwrap().to_db_bytes()?;
447
+
let prefixed = Nsid::sub_prefix(pre)?;
448
+
assert_eq!(serialized.starts_with(&prefixed), is_pre, "{}", desc);
449
+
}
450
+
Ok(())
451
+
}
452
+
453
+
#[test]
454
+
fn test_string_cursor_prefix_roundtrip() -> EncodingResult<()> {
455
type TwoThings = DbConcat<String, Cursor>;
456
for (lazy_prefix, tired_suffix, desc) in [
457
("", 0, "empty string and cursor"),
···
476
}
477
478
#[test]
479
+
fn test_cursor_string_prefix_roundtrip() -> EncodingResult<()> {
480
type TwoThings = DbConcat<Cursor, String>;
481
for (tired_prefix, sad_suffix, desc) in [
482
(0, "", "empty string and cursor"),
···
501
}
502
503
#[test]
504
+
fn test_static_str() -> EncodingResult<()> {
505
#[derive(Debug, PartialEq)]
506
struct AStaticStr {}
507
impl StaticStr for AStaticStr {
···
522
}
523
524
#[test]
525
+
fn test_static_str_empty() -> EncodingResult<()> {
526
#[derive(Debug, PartialEq)]
527
struct AnEmptyStr {}
528
impl StaticStr for AnEmptyStr {
···
542
}
543
544
#[test]
545
+
fn test_static_prefix() -> EncodingResult<()> {
546
#[derive(Debug, PartialEq)]
547
struct AStaticPrefix {}
548
impl StaticStr for AStaticPrefix {
+4
ufos/src/error.rs
+4
ufos/src/error.rs
+57
-11
ufos/src/file_consumer.rs
+57
-11
ufos/src/file_consumer.rs
···
1
use crate::consumer::{Batcher, LimitedBatch, BATCH_QUEUE_SIZE};
2
use anyhow::Result;
3
use jetstream::{error::JetstreamEventError, events::JetstreamEvent};
4
use std::path::PathBuf;
···
8
sync::mpsc::{channel, Receiver, Sender},
9
};
10
11
-
async fn read_jsonl(f: File, sender: Sender<JetstreamEvent>) -> Result<()> {
12
let mut lines = BufReader::new(f).lines();
13
while let Some(line) = lines.next_line().await? {
14
-
let event: JetstreamEvent =
15
-
serde_json::from_str(&line).map_err(JetstreamEventError::ReceivedMalformedJSON)?;
16
-
if sender.send(event).await.is_err() {
17
-
log::warn!("All receivers for the jsonl fixture have been dropped, bye.");
18
-
return Err(JetstreamEventError::ReceiverClosedError.into());
19
}
20
}
21
-
Ok(())
22
}
23
24
-
pub async fn consume(p: PathBuf) -> Result<Receiver<LimitedBatch>> {
25
let f = File::open(p).await?;
26
let (jsonl_sender, jsonl_receiver) = channel::<JetstreamEvent>(16);
27
let (batch_sender, batch_reciever) = channel::<LimitedBatch>(BATCH_QUEUE_SIZE);
28
-
let mut batcher = Batcher::new(jsonl_receiver, batch_sender);
29
-
tokio::task::spawn(async move { read_jsonl(f, jsonl_sender).await });
30
-
tokio::task::spawn(async move { batcher.run().await });
31
Ok(batch_reciever)
32
}
···
1
use crate::consumer::{Batcher, LimitedBatch, BATCH_QUEUE_SIZE};
2
+
use crate::store_types::SketchSecretPrefix;
3
+
use crate::Cursor;
4
use anyhow::Result;
5
use jetstream::{error::JetstreamEventError, events::JetstreamEvent};
6
use std::path::PathBuf;
···
10
sync::mpsc::{channel, Receiver, Sender},
11
};
12
13
+
async fn read_jsonl(f: File, sender: Sender<JetstreamEvent>, cursor: Option<Cursor>) -> Result<()> {
14
let mut lines = BufReader::new(f).lines();
15
+
if let Some(db_cursor) = cursor {
16
+
log::info!("jsonl fixture: skipping events before cursor {db_cursor:?}");
17
+
let mut bad_lines = 0;
18
+
let mut skipped = 0;
19
+
while let Some(line) = lines.next_line().await? {
20
+
let Ok(event) = serde_json::from_str::<JetstreamEvent>(&line) else {
21
+
bad_lines += 1;
22
+
continue;
23
+
};
24
+
if event.cursor < db_cursor {
25
+
skipped += 1;
26
+
continue;
27
+
}
28
+
if event.cursor == db_cursor {
29
+
log::info!("jsonl fixture: found existing db cursor! skipped {skipped} old events and failed parsing {bad_lines} lines");
30
+
break;
31
+
}
32
+
anyhow::bail!("jsonl fixture: did not find existing db cursor, found event cursor {:?} which is newer. bailing.", event.cursor);
33
+
}
34
+
} else {
35
+
log::info!("jsonl fixture: no cursor provided, sending every event");
36
+
}
37
+
38
+
log::info!("jsonl fixture: now sending events");
39
while let Some(line) = lines.next_line().await? {
40
+
match serde_json::from_str::<JetstreamEvent>(&line) {
41
+
Ok(event) => match sender.send(event).await {
42
+
Ok(_) => {}
43
+
Err(e) => {
44
+
log::warn!("All receivers for the jsonl fixture have been dropped, bye: {e:?}");
45
+
return Err(JetstreamEventError::ReceiverClosedError.into());
46
+
}
47
+
},
48
+
Err(parse_err) => {
49
+
log::warn!("failed to parse event: {parse_err:?} from event:\n{line}");
50
+
continue;
51
+
}
52
}
53
}
54
+
log::info!("reached end of jsonl file, looping on noop to keep server alive.");
55
+
loop {
56
+
tokio::time::sleep(std::time::Duration::from_secs_f64(10.)).await;
57
+
}
58
}
59
60
+
pub async fn consume(
61
+
p: PathBuf,
62
+
sketch_secret: SketchSecretPrefix,
63
+
cursor: Option<Cursor>,
64
+
) -> Result<Receiver<LimitedBatch>> {
65
let f = File::open(p).await?;
66
let (jsonl_sender, jsonl_receiver) = channel::<JetstreamEvent>(16);
67
let (batch_sender, batch_reciever) = channel::<LimitedBatch>(BATCH_QUEUE_SIZE);
68
+
let mut batcher = Batcher::new(jsonl_receiver, batch_sender, sketch_secret);
69
+
tokio::task::spawn(async move {
70
+
let r = read_jsonl(f, jsonl_sender, cursor).await;
71
+
log::warn!("read_jsonl finished: {r:?}");
72
+
});
73
+
tokio::task::spawn(async move {
74
+
let r = batcher.run().await;
75
+
log::warn!("batcher finished: {r:?}");
76
+
});
77
Ok(batch_reciever)
78
}
+51
ufos/src/index_html.rs
+51
ufos/src/index_html.rs
···
···
1
+
pub const INDEX_HTML: &str = r#"<!doctype html>
2
+
<html lang="en">
3
+
<head>
4
+
<meta charset="utf-8" />
5
+
<title>UFOs API Documentation</title>
6
+
<meta name="viewport" content="width=device-width, initial-scale=1" />
7
+
<meta name="description" content="API Documentation for UFOs: Samples and stats for all atproto lexicons." />
8
+
<style>
9
+
.custom-header {
10
+
height: 42px;
11
+
background-color: var(--scalar-background-1);
12
+
box-shadow: inset 0 -1px 0 var(--scalar-border-color);
13
+
color: var(--scalar-color-1);
14
+
font-size: var(--scalar-font-size-3);
15
+
font-family: 'Iowan Old Style', 'Palatino Linotype', 'URW Palladio L', P052, serif;
16
+
padding: 0 18px;
17
+
justify-content: space-between;
18
+
}
19
+
.custom-header,
20
+
.custom-header nav {
21
+
display: flex;
22
+
align-items: center;
23
+
gap: 18px;
24
+
}
25
+
.custom-header a:hover {
26
+
color: var(--scalar-color-2);
27
+
}
28
+
</style>
29
+
</head>
30
+
<body>
31
+
<header class="custom-header scalar-app">
32
+
<b>a <a href="https://microcosm.blue">microcosm</a> project</b>
33
+
<nav>
34
+
<a href="https://bsky.app/profile/microcosm.blue">@microcosm.blue</a>
35
+
<a href="https://github.com/at-microcosm">github</a>
36
+
</nav>
37
+
</header>
38
+
39
+
<script id="api-reference" type="application/json" data-url="/openapi""></script>
40
+
41
+
<script>
42
+
var configuration = {
43
+
theme: 'purple',
44
+
}
45
+
document.getElementById('api-reference').dataset.configuration = JSON.stringify(configuration)
46
+
</script>
47
+
48
+
<script src="https://cdn.jsdelivr.net/npm/@scalar/api-reference"></script>
49
+
</body>
50
+
</html>
51
+
"#;
+297
-164
ufos/src/lib.rs
+297
-164
ufos/src/lib.rs
···
2
pub mod db_types;
3
pub mod error;
4
pub mod file_consumer;
5
pub mod server;
6
pub mod storage;
7
pub mod storage_fjall;
8
-
pub mod storage_mem;
9
pub mod store_types;
10
11
use crate::error::BatchInsertError;
12
-
use cardinality_estimator::CardinalityEstimator;
13
use error::FirehoseEventError;
14
use jetstream::events::{CommitEvent, CommitOp, Cursor};
15
use jetstream::exports::{Did, Nsid, RecordKey};
16
use schemars::JsonSchema;
17
use serde::Serialize;
18
use serde_json::value::RawValue;
19
use std::collections::HashMap;
20
21
#[derive(Debug, Default, Clone)]
22
pub struct CollectionCommits<const LIMIT: usize> {
23
-
pub total_seen: usize,
24
-
pub dids_estimate: CardinalityEstimator<Did>,
25
pub commits: Vec<UFOsCommit>,
26
head: usize,
27
-
non_creates: usize,
28
}
29
30
impl<const LIMIT: usize> CollectionCommits<LIMIT> {
···
34
self.head = 0;
35
}
36
}
37
-
pub fn truncating_insert(&mut self, commit: UFOsCommit) -> Result<(), BatchInsertError> {
38
-
if self.non_creates == LIMIT {
39
return Err(BatchInsertError::BatchFull(commit));
40
}
41
-
let did = commit.did.clone();
42
-
let is_create = commit.action.is_create();
43
if self.commits.len() < LIMIT {
44
self.commits.push(commit);
45
-
if self.commits.capacity() > LIMIT {
46
-
self.commits.shrink_to(LIMIT); // save mem?????? maybe??
47
-
}
48
} else {
49
let head_started_at = self.head;
50
loop {
51
let candidate = self
···
61
return Err(BatchInsertError::BatchForever);
62
}
63
}
64
-
}
65
-
66
-
if is_create {
67
-
self.total_seen += 1;
68
-
self.dids_estimate.insert(&did);
69
-
} else {
70
-
self.non_creates += 1;
71
}
72
73
Ok(())
···
157
collection: &Nsid,
158
commit: UFOsCommit,
159
max_collections: usize,
160
) -> Result<(), BatchInsertError> {
161
let map = &mut self.commits_by_nsid;
162
if !map.contains_key(collection) && map.len() >= max_collections {
···
164
}
165
map.entry(collection.clone())
166
.or_default()
167
-
.truncating_insert(commit)?;
168
Ok(())
169
-
}
170
-
pub fn total_records(&self) -> usize {
171
-
self.commits_by_nsid.values().map(|v| v.commits.len()).sum()
172
-
}
173
-
pub fn total_seen(&self) -> usize {
174
-
self.commits_by_nsid.values().map(|v| v.total_seen).sum()
175
}
176
pub fn total_collections(&self) -> usize {
177
self.commits_by_nsid.len()
···
180
self.account_removes.len()
181
}
182
pub fn estimate_dids(&self) -> usize {
183
-
let mut estimator = CardinalityEstimator::<Did>::new();
184
for commits in self.commits_by_nsid.values() {
185
estimator.merge(&commits.dids_estimate);
186
}
···
212
}
213
214
#[derive(Debug, Serialize, JsonSchema)]
215
pub enum ConsumerInfo {
216
Jetstream {
217
endpoint: String,
218
started_at: u64,
219
latest_cursor: Option<u64>,
220
},
221
}
222
223
-
#[derive(Debug, Default, PartialEq, Serialize, JsonSchema)]
224
-
pub struct TopCollections {
225
-
total_records: u64,
226
dids_estimate: u64,
227
-
nsid_child_segments: HashMap<String, TopCollections>,
228
}
229
230
-
// this is not safe from ~DOS
231
-
// todo: remove this and just iterate the all-time rollups to get nsids? (or recent rollups?)
232
-
impl From<TopCollections> for Vec<String> {
233
-
fn from(tc: TopCollections) -> Self {
234
-
let mut me = vec![];
235
-
for (segment, children) in tc.nsid_child_segments {
236
-
let child_segments: Self = children.into();
237
-
if child_segments.is_empty() {
238
-
me.push(segment);
239
-
} else {
240
-
for ch in child_segments {
241
-
let nsid = format!("{segment}.{ch}");
242
-
me.push(nsid);
243
-
}
244
-
}
245
}
246
-
me
247
}
248
}
249
···
252
use super::*;
253
254
#[test]
255
-
fn test_top_collections_to_nsids() {
256
-
let empty_tc = TopCollections::default();
257
-
assert_eq!(Into::<Vec<String>>::into(empty_tc), Vec::<String>::new());
258
-
259
-
let tc = TopCollections {
260
-
nsid_child_segments: HashMap::from([
261
-
(
262
-
"a".to_string(),
263
-
TopCollections {
264
-
nsid_child_segments: HashMap::from([
265
-
("b".to_string(), TopCollections::default()),
266
-
("c".to_string(), TopCollections::default()),
267
-
]),
268
-
..Default::default()
269
-
},
270
-
),
271
-
("z".to_string(), TopCollections::default()),
272
-
]),
273
-
..Default::default()
274
-
};
275
-
276
-
let mut nsids: Vec<String> = tc.into();
277
-
nsids.sort();
278
-
assert_eq!(nsids, ["a.b", "a.c", "z"]);
279
-
}
280
-
281
-
#[test]
282
fn test_truncating_insert_truncates() -> anyhow::Result<()> {
283
let mut commits: CollectionCommits<2> = Default::default();
284
285
-
commits.truncating_insert(UFOsCommit {
286
-
cursor: Cursor::from_raw_u64(100),
287
-
did: Did::new("did:plc:whatever".to_string()).unwrap(),
288
-
rkey: RecordKey::new("rkey-asdf-a".to_string()).unwrap(),
289
-
rev: "rev-asdf".to_string(),
290
-
action: CommitAction::Put(PutAction {
291
-
record: RawValue::from_string("{}".to_string())?,
292
-
is_update: false,
293
-
}),
294
-
})?;
295
296
-
commits.truncating_insert(UFOsCommit {
297
-
cursor: Cursor::from_raw_u64(101),
298
-
did: Did::new("did:plc:whatever".to_string()).unwrap(),
299
-
rkey: RecordKey::new("rkey-asdf-b".to_string()).unwrap(),
300
-
rev: "rev-asdg".to_string(),
301
-
action: CommitAction::Put(PutAction {
302
-
record: RawValue::from_string("{}".to_string())?,
303
-
is_update: false,
304
-
}),
305
-
})?;
306
307
-
commits.truncating_insert(UFOsCommit {
308
-
cursor: Cursor::from_raw_u64(102),
309
-
did: Did::new("did:plc:whatever".to_string()).unwrap(),
310
-
rkey: RecordKey::new("rkey-asdf-c".to_string()).unwrap(),
311
-
rev: "rev-asdh".to_string(),
312
-
action: CommitAction::Put(PutAction {
313
-
record: RawValue::from_string("{}".to_string())?,
314
-
is_update: false,
315
-
}),
316
-
})?;
317
318
-
assert_eq!(commits.total_seen, 3);
319
assert_eq!(commits.dids_estimate.estimate(), 1);
320
assert_eq!(commits.commits.len(), 2);
321
···
339
}
340
341
#[test]
342
fn test_truncating_insert_does_not_truncate_deletes() -> anyhow::Result<()> {
343
let mut commits: CollectionCommits<2> = Default::default();
344
345
-
commits.truncating_insert(UFOsCommit {
346
-
cursor: Cursor::from_raw_u64(100),
347
-
did: Did::new("did:plc:whatever".to_string()).unwrap(),
348
-
rkey: RecordKey::new("rkey-asdf-a".to_string()).unwrap(),
349
-
rev: "rev-asdf".to_string(),
350
-
action: CommitAction::Cut,
351
-
})?;
352
353
-
commits.truncating_insert(UFOsCommit {
354
-
cursor: Cursor::from_raw_u64(101),
355
-
did: Did::new("did:plc:whatever".to_string()).unwrap(),
356
-
rkey: RecordKey::new("rkey-asdf-b".to_string()).unwrap(),
357
-
rev: "rev-asdg".to_string(),
358
-
action: CommitAction::Put(PutAction {
359
-
record: RawValue::from_string("{}".to_string())?,
360
-
is_update: false,
361
-
}),
362
-
})?;
363
364
-
commits.truncating_insert(UFOsCommit {
365
-
cursor: Cursor::from_raw_u64(102),
366
-
did: Did::new("did:plc:whatever".to_string()).unwrap(),
367
-
rkey: RecordKey::new("rkey-asdf-c".to_string()).unwrap(),
368
-
rev: "rev-asdh".to_string(),
369
-
action: CommitAction::Put(PutAction {
370
-
record: RawValue::from_string("{}".to_string())?,
371
-
is_update: false,
372
-
}),
373
-
})?;
374
375
-
assert_eq!(commits.total_seen, 2);
376
assert_eq!(commits.dids_estimate.estimate(), 1);
377
assert_eq!(commits.commits.len(), 2);
378
···
405
let mut commits: CollectionCommits<2> = Default::default();
406
407
commits
408
-
.truncating_insert(UFOsCommit {
409
-
cursor: Cursor::from_raw_u64(100),
410
-
did: Did::new("did:plc:whatever".to_string()).unwrap(),
411
-
rkey: RecordKey::new("rkey-asdf-a".to_string()).unwrap(),
412
-
rev: "rev-asdf".to_string(),
413
-
action: CommitAction::Cut,
414
-
})
415
.unwrap();
416
417
// this create will just be discarded
418
commits
419
-
.truncating_insert(UFOsCommit {
420
-
cursor: Cursor::from_raw_u64(80),
421
-
did: Did::new("did:plc:whatever".to_string()).unwrap(),
422
-
rkey: RecordKey::new("rkey-asdf-zzz".to_string()).unwrap(),
423
-
rev: "rev-asdzzz".to_string(),
424
-
action: CommitAction::Put(PutAction {
425
-
record: RawValue::from_string("{}".to_string())?,
426
-
is_update: false,
427
-
}),
428
-
})
429
.unwrap();
430
431
commits
432
-
.truncating_insert(UFOsCommit {
433
-
cursor: Cursor::from_raw_u64(101),
434
-
did: Did::new("did:plc:whatever".to_string()).unwrap(),
435
-
rkey: RecordKey::new("rkey-asdf-b".to_string()).unwrap(),
436
-
rev: "rev-asdg".to_string(),
437
-
action: CommitAction::Cut,
438
-
})
439
.unwrap();
440
441
-
let res = commits.truncating_insert(UFOsCommit {
442
-
cursor: Cursor::from_raw_u64(102),
443
-
did: Did::new("did:plc:whatever".to_string()).unwrap(),
444
-
rkey: RecordKey::new("rkey-asdf-c".to_string()).unwrap(),
445
-
rev: "rev-asdh".to_string(),
446
-
action: CommitAction::Cut,
447
-
});
448
449
assert!(res.is_err());
450
let overflowed = match res {
···
2
pub mod db_types;
3
pub mod error;
4
pub mod file_consumer;
5
+
pub mod index_html;
6
pub mod server;
7
pub mod storage;
8
pub mod storage_fjall;
9
pub mod store_types;
10
11
+
use crate::db_types::{EncodingError, EncodingResult};
12
use crate::error::BatchInsertError;
13
+
use crate::store_types::SketchSecretPrefix;
14
+
use cardinality_estimator_safe::{Element, Sketch};
15
use error::FirehoseEventError;
16
use jetstream::events::{CommitEvent, CommitOp, Cursor};
17
use jetstream::exports::{Did, Nsid, RecordKey};
18
use schemars::JsonSchema;
19
use serde::Serialize;
20
use serde_json::value::RawValue;
21
+
use sha2::Sha256;
22
use std::collections::HashMap;
23
+
use std::time::Duration;
24
+
25
+
fn did_element(sketch_secret: &SketchSecretPrefix, did: &Did) -> Element<14> {
26
+
Element::from_digest_with_prefix::<Sha256>(sketch_secret, did.as_bytes())
27
+
}
28
+
29
+
pub fn nice_duration(dt: Duration) -> String {
30
+
let secs = dt.as_secs_f64();
31
+
if secs < 1. {
32
+
return format!("{:.0}ms", secs * 1000.);
33
+
}
34
+
if secs < 60. {
35
+
return format!("{secs:.02}s");
36
+
}
37
+
let mins = (secs / 60.).floor();
38
+
let rsecs = secs - (mins * 60.);
39
+
if mins < 60. {
40
+
return format!("{mins:.0}m{rsecs:.0}s");
41
+
}
42
+
let hrs = (mins / 60.).floor();
43
+
let rmins = mins - (hrs * 60.);
44
+
if hrs < 24. {
45
+
return format!("{hrs:.0}h{rmins:.0}m{rsecs:.0}s");
46
+
}
47
+
let days = (hrs / 24.).floor();
48
+
let rhrs = hrs - (days * 24.);
49
+
format!("{days:.0}d{rhrs:.0}h{rmins:.0}m{rsecs:.0}s")
50
+
}
51
52
#[derive(Debug, Default, Clone)]
53
pub struct CollectionCommits<const LIMIT: usize> {
54
+
pub creates: usize,
55
+
pub updates: usize,
56
+
pub deletes: usize,
57
+
pub dids_estimate: Sketch<14>,
58
pub commits: Vec<UFOsCommit>,
59
head: usize,
60
}
61
62
impl<const LIMIT: usize> CollectionCommits<LIMIT> {
···
66
self.head = 0;
67
}
68
}
69
+
/// lossy-ish commit insertion
70
+
///
71
+
/// - new commits are *always* added to the batch or else rejected as full.
72
+
/// - when LIMIT is reached, new commits can displace existing `creates`.
73
+
/// `update`s and `delete`s are *never* displaced.
74
+
/// - if all batched `creates` have been displaced, the batch is full.
75
+
///
76
+
/// in general it's rare for commits to be displaced except for very high-
77
+
/// volume collections such as `app.bsky.feed.like`.
78
+
///
79
+
/// it could be nice in the future to retain all batched commits and just
80
+
/// drop new `creates` after a limit instead.
81
+
pub fn truncating_insert(
82
+
&mut self,
83
+
commit: UFOsCommit,
84
+
sketch_secret: &SketchSecretPrefix,
85
+
) -> Result<(), BatchInsertError> {
86
+
if (self.updates + self.deletes) == LIMIT {
87
+
// nothing can be displaced (only `create`s may be displaced)
88
return Err(BatchInsertError::BatchFull(commit));
89
}
90
+
91
+
// every kind of commit counts as "user activity"
92
+
self.dids_estimate
93
+
.insert(did_element(sketch_secret, &commit.did));
94
+
95
+
match commit.action {
96
+
CommitAction::Put(PutAction {
97
+
is_update: false, ..
98
+
}) => {
99
+
self.creates += 1;
100
+
}
101
+
CommitAction::Put(PutAction {
102
+
is_update: true, ..
103
+
}) => {
104
+
self.updates += 1;
105
+
}
106
+
CommitAction::Cut => {
107
+
self.deletes += 1;
108
+
}
109
+
}
110
+
111
if self.commits.len() < LIMIT {
112
+
// normal insert: there's space left to put a new commit at the end
113
self.commits.push(commit);
114
} else {
115
+
// displacement insert: find an old `create` we can displace
116
let head_started_at = self.head;
117
loop {
118
let candidate = self
···
128
return Err(BatchInsertError::BatchForever);
129
}
130
}
131
}
132
133
Ok(())
···
217
collection: &Nsid,
218
commit: UFOsCommit,
219
max_collections: usize,
220
+
sketch_secret: &SketchSecretPrefix,
221
) -> Result<(), BatchInsertError> {
222
let map = &mut self.commits_by_nsid;
223
if !map.contains_key(collection) && map.len() >= max_collections {
···
225
}
226
map.entry(collection.clone())
227
.or_default()
228
+
.truncating_insert(commit, sketch_secret)?;
229
Ok(())
230
}
231
pub fn total_collections(&self) -> usize {
232
self.commits_by_nsid.len()
···
235
self.account_removes.len()
236
}
237
pub fn estimate_dids(&self) -> usize {
238
+
let mut estimator = Sketch::<14>::default();
239
for commits in self.commits_by_nsid.values() {
240
estimator.merge(&commits.dids_estimate);
241
}
···
267
}
268
269
#[derive(Debug, Serialize, JsonSchema)]
270
+
#[serde(rename_all = "camelCase")]
271
pub enum ConsumerInfo {
272
Jetstream {
273
endpoint: String,
274
started_at: u64,
275
latest_cursor: Option<u64>,
276
+
rollup_cursor: Option<u64>,
277
},
278
}
279
280
+
#[derive(Debug, PartialEq, Serialize, JsonSchema)]
281
+
pub struct NsidCount {
282
+
nsid: String,
283
+
creates: u64,
284
+
// TODO: add updates and deletes
285
+
dids_estimate: u64,
286
+
}
287
+
288
+
#[derive(Debug, PartialEq, Serialize, JsonSchema)]
289
+
pub struct PrefixCount {
290
+
prefix: String,
291
+
creates: u64,
292
+
// TODO: add updates and deletes
293
dids_estimate: u64,
294
}
295
296
+
#[derive(Debug, PartialEq, Serialize, JsonSchema)]
297
+
#[serde(tag = "type", rename_all = "camelCase")]
298
+
pub enum PrefixChild {
299
+
Collection(NsidCount),
300
+
Prefix(PrefixCount),
301
+
}
302
+
303
+
#[derive(Debug, Serialize, JsonSchema)]
304
+
pub struct NsidPrefix(String);
305
+
impl NsidPrefix {
306
+
/// Input must not include a trailing dot.
307
+
pub fn new(pre: &str) -> EncodingResult<Self> {
308
+
// it's a valid prefix if appending `.name` makes it a valid NSID
309
+
Nsid::new(format!("{pre}.name")).map_err(EncodingError::BadAtriumStringType)?;
310
+
// hack (shouldn't really be here): reject prefixes that aren't at least 2 segments long
311
+
if !pre.contains('.') {
312
+
return Err(EncodingError::NotEnoughNsidSegments);
313
}
314
+
Ok(Self(pre.to_string()))
315
+
}
316
+
pub fn is_group_of(&self, other: &Nsid) -> bool {
317
+
assert!(
318
+
other.as_str().starts_with(&self.0),
319
+
"must be a prefix of other"
320
+
);
321
+
self.0 == other.domain_authority()
322
+
}
323
+
/// The prefix as initialized (no trailing dot)
324
+
pub fn as_str(&self) -> &str {
325
+
self.0.as_str()
326
+
}
327
+
/// The prefix with a trailing `.` appended to avoid matching a longer segment
328
+
pub fn terminated(&self) -> String {
329
+
format!("{}.", self.0)
330
+
}
331
+
}
332
+
333
+
#[derive(Debug, Serialize, JsonSchema)]
334
+
pub struct JustCount {
335
+
creates: u64,
336
+
updates: u64,
337
+
deletes: u64,
338
+
dids_estimate: u64,
339
+
}
340
+
341
+
#[derive(Debug)]
342
+
pub enum OrderCollectionsBy {
343
+
Lexi { cursor: Option<Vec<u8>> },
344
+
RecordsCreated,
345
+
DidsEstimate,
346
+
}
347
+
impl Default for OrderCollectionsBy {
348
+
fn default() -> Self {
349
+
Self::Lexi { cursor: None }
350
}
351
}
352
···
355
use super::*;
356
357
#[test]
358
fn test_truncating_insert_truncates() -> anyhow::Result<()> {
359
let mut commits: CollectionCommits<2> = Default::default();
360
361
+
commits.truncating_insert(
362
+
UFOsCommit {
363
+
cursor: Cursor::from_raw_u64(100),
364
+
did: Did::new("did:plc:whatever".to_string()).unwrap(),
365
+
rkey: RecordKey::new("rkey-asdf-a".to_string()).unwrap(),
366
+
rev: "rev-asdf".to_string(),
367
+
action: CommitAction::Put(PutAction {
368
+
record: RawValue::from_string("{}".to_string())?,
369
+
is_update: false,
370
+
}),
371
+
},
372
+
&[0u8; 16],
373
+
)?;
374
375
+
commits.truncating_insert(
376
+
UFOsCommit {
377
+
cursor: Cursor::from_raw_u64(101),
378
+
did: Did::new("did:plc:whatever".to_string()).unwrap(),
379
+
rkey: RecordKey::new("rkey-asdf-b".to_string()).unwrap(),
380
+
rev: "rev-asdg".to_string(),
381
+
action: CommitAction::Put(PutAction {
382
+
record: RawValue::from_string("{}".to_string())?,
383
+
is_update: false,
384
+
}),
385
+
},
386
+
&[0u8; 16],
387
+
)?;
388
389
+
commits.truncating_insert(
390
+
UFOsCommit {
391
+
cursor: Cursor::from_raw_u64(102),
392
+
did: Did::new("did:plc:whatever".to_string()).unwrap(),
393
+
rkey: RecordKey::new("rkey-asdf-c".to_string()).unwrap(),
394
+
rev: "rev-asdh".to_string(),
395
+
action: CommitAction::Put(PutAction {
396
+
record: RawValue::from_string("{}".to_string())?,
397
+
is_update: false,
398
+
}),
399
+
},
400
+
&[0u8; 16],
401
+
)?;
402
403
+
assert_eq!(commits.creates, 3);
404
assert_eq!(commits.dids_estimate.estimate(), 1);
405
assert_eq!(commits.commits.len(), 2);
406
···
424
}
425
426
#[test]
427
+
fn test_truncating_insert_counts_updates() -> anyhow::Result<()> {
428
+
let mut commits: CollectionCommits<2> = Default::default();
429
+
430
+
commits.truncating_insert(
431
+
UFOsCommit {
432
+
cursor: Cursor::from_raw_u64(100),
433
+
did: Did::new("did:plc:whatever".to_string()).unwrap(),
434
+
rkey: RecordKey::new("rkey-asdf-a".to_string()).unwrap(),
435
+
rev: "rev-asdf".to_string(),
436
+
action: CommitAction::Put(PutAction {
437
+
record: RawValue::from_string("{}".to_string())?,
438
+
is_update: true,
439
+
}),
440
+
},
441
+
&[0u8; 16],
442
+
)?;
443
+
444
+
assert_eq!(commits.creates, 0);
445
+
assert_eq!(commits.updates, 1);
446
+
assert_eq!(commits.deletes, 0);
447
+
assert_eq!(commits.dids_estimate.estimate(), 1);
448
+
assert_eq!(commits.commits.len(), 1);
449
+
Ok(())
450
+
}
451
+
452
+
#[test]
453
fn test_truncating_insert_does_not_truncate_deletes() -> anyhow::Result<()> {
454
let mut commits: CollectionCommits<2> = Default::default();
455
456
+
commits.truncating_insert(
457
+
UFOsCommit {
458
+
cursor: Cursor::from_raw_u64(100),
459
+
did: Did::new("did:plc:whatever".to_string()).unwrap(),
460
+
rkey: RecordKey::new("rkey-asdf-a".to_string()).unwrap(),
461
+
rev: "rev-asdf".to_string(),
462
+
action: CommitAction::Cut,
463
+
},
464
+
&[0u8; 16],
465
+
)?;
466
467
+
commits.truncating_insert(
468
+
UFOsCommit {
469
+
cursor: Cursor::from_raw_u64(101),
470
+
did: Did::new("did:plc:whatever".to_string()).unwrap(),
471
+
rkey: RecordKey::new("rkey-asdf-b".to_string()).unwrap(),
472
+
rev: "rev-asdg".to_string(),
473
+
action: CommitAction::Put(PutAction {
474
+
record: RawValue::from_string("{}".to_string())?,
475
+
is_update: false,
476
+
}),
477
+
},
478
+
&[0u8; 16],
479
+
)?;
480
481
+
commits.truncating_insert(
482
+
UFOsCommit {
483
+
cursor: Cursor::from_raw_u64(102),
484
+
did: Did::new("did:plc:whatever".to_string()).unwrap(),
485
+
rkey: RecordKey::new("rkey-asdf-c".to_string()).unwrap(),
486
+
rev: "rev-asdh".to_string(),
487
+
action: CommitAction::Put(PutAction {
488
+
record: RawValue::from_string("{}".to_string())?,
489
+
is_update: false,
490
+
}),
491
+
},
492
+
&[0u8; 16],
493
+
)?;
494
495
+
assert_eq!(commits.creates, 2);
496
+
assert_eq!(commits.deletes, 1);
497
assert_eq!(commits.dids_estimate.estimate(), 1);
498
assert_eq!(commits.commits.len(), 2);
499
···
526
let mut commits: CollectionCommits<2> = Default::default();
527
528
commits
529
+
.truncating_insert(
530
+
UFOsCommit {
531
+
cursor: Cursor::from_raw_u64(100),
532
+
did: Did::new("did:plc:whatever".to_string()).unwrap(),
533
+
rkey: RecordKey::new("rkey-asdf-a".to_string()).unwrap(),
534
+
rev: "rev-asdf".to_string(),
535
+
action: CommitAction::Cut,
536
+
},
537
+
&[0u8; 16],
538
+
)
539
.unwrap();
540
541
// this create will just be discarded
542
commits
543
+
.truncating_insert(
544
+
UFOsCommit {
545
+
cursor: Cursor::from_raw_u64(80),
546
+
did: Did::new("did:plc:whatever".to_string()).unwrap(),
547
+
rkey: RecordKey::new("rkey-asdf-zzz".to_string()).unwrap(),
548
+
rev: "rev-asdzzz".to_string(),
549
+
action: CommitAction::Put(PutAction {
550
+
record: RawValue::from_string("{}".to_string())?,
551
+
is_update: false,
552
+
}),
553
+
},
554
+
&[0u8; 16],
555
+
)
556
.unwrap();
557
558
commits
559
+
.truncating_insert(
560
+
UFOsCommit {
561
+
cursor: Cursor::from_raw_u64(101),
562
+
did: Did::new("did:plc:whatever".to_string()).unwrap(),
563
+
rkey: RecordKey::new("rkey-asdf-b".to_string()).unwrap(),
564
+
rev: "rev-asdg".to_string(),
565
+
action: CommitAction::Cut,
566
+
},
567
+
&[0u8; 16],
568
+
)
569
.unwrap();
570
571
+
let res = commits.truncating_insert(
572
+
UFOsCommit {
573
+
cursor: Cursor::from_raw_u64(102),
574
+
did: Did::new("did:plc:whatever".to_string()).unwrap(),
575
+
rkey: RecordKey::new("rkey-asdf-c".to_string()).unwrap(),
576
+
rev: "rev-asdh".to_string(),
577
+
action: CommitAction::Cut,
578
+
},
579
+
&[0u8; 16],
580
+
);
581
582
assert!(res.is_err());
583
let overflowed = match res {
+148
-84
ufos/src/main.rs
+148
-84
ufos/src/main.rs
···
1
use clap::Parser;
2
use jetstream::events::Cursor;
3
use std::path::PathBuf;
4
use ufos::consumer;
5
-
use ufos::error::StorageError;
6
use ufos::file_consumer;
7
use ufos::server;
8
-
use ufos::storage::{StorageWhatever, StoreReader, StoreWriter};
9
use ufos::storage_fjall::FjallStorage;
10
-
use ufos::storage_mem::MemStorage;
11
12
#[cfg(not(target_env = "msvc"))]
13
use tikv_jemallocator::Jemalloc;
···
17
static GLOBAL: Jemalloc = Jemalloc;
18
19
/// Aggregate links in the at-mosphere
20
-
#[derive(Parser, Debug)]
21
#[command(version, about, long_about = None)]
22
struct Args {
23
/// Jetstream server to connect to (exclusive with --fixture). Provide either a wss:// URL, or a shorhand value:
···
36
#[arg(long)]
37
data: PathBuf,
38
/// DEBUG: don't start the jetstream consumer or its write loop
39
-
/// todo: restore this
40
#[arg(long, action)]
41
pause_writer: bool,
42
/// DEBUG: force the rw loop to fall behind by pausing it
43
/// todo: restore this
44
#[arg(long, action)]
45
pause_rw: bool,
46
-
/// DEBUG: use an in-memory store instead of fjall
47
#[arg(long, action)]
48
-
in_mem: bool,
49
/// DEBUG: interpret jetstream as a file fixture
50
#[arg(long, action)]
51
jetstream_fixture: bool,
52
}
53
54
-
// #[tokio::main(flavor = "current_thread")] // TODO: move this to config via args
55
#[tokio::main]
56
async fn main() -> anyhow::Result<()> {
57
env_logger::init();
58
59
let args = Args::parse();
60
let jetstream = args.jetstream.clone();
61
-
if args.in_mem {
62
-
let (read_store, write_store, cursor) = MemStorage::init(
63
-
args.data,
64
-
jetstream,
65
-
args.jetstream_force,
66
-
Default::default(),
67
-
)?;
68
-
go(
69
-
args.jetstream,
70
-
args.jetstream_fixture,
71
-
args.pause_writer,
72
-
read_store,
73
-
write_store,
74
-
cursor,
75
-
)
76
-
.await?;
77
-
} else {
78
-
let (read_store, write_store, cursor) = FjallStorage::init(
79
-
args.data,
80
-
jetstream,
81
-
args.jetstream_force,
82
-
Default::default(),
83
-
)?;
84
-
go(
85
-
args.jetstream,
86
-
args.jetstream_fixture,
87
-
args.pause_writer,
88
-
read_store,
89
-
write_store,
90
-
cursor,
91
-
)
92
-
.await?;
93
-
}
94
-
95
Ok(())
96
}
97
98
-
async fn go(
99
-
jetstream: String,
100
-
jetstream_fixture: bool,
101
-
pause_writer: bool,
102
-
read_store: impl StoreReader + 'static,
103
-
mut write_store: impl StoreWriter + 'static,
104
cursor: Option<Cursor>,
105
) -> anyhow::Result<()> {
106
println!("starting server with storage...");
107
-
let serving = server::serve(read_store);
108
109
-
let t1 = tokio::task::spawn(async {
110
-
let r = serving.await;
111
-
log::warn!("serving ended with: {r:?}");
112
-
});
113
114
-
let t2: tokio::task::JoinHandle<anyhow::Result<()>> = tokio::task::spawn({
115
-
async move {
116
-
if !pause_writer {
117
-
println!(
118
-
"starting consumer with cursor: {cursor:?} from {:?} ago",
119
-
cursor.map(|c| c.elapsed())
120
-
);
121
-
let mut batches = if jetstream_fixture {
122
-
file_consumer::consume(jetstream.into()).await?
123
-
} else {
124
-
consumer::consume(&jetstream, cursor, false).await?
125
-
};
126
127
-
tokio::task::spawn_blocking(move || {
128
-
while let Some(event_batch) = batches.blocking_recv() {
129
-
write_store.insert_batch(event_batch)?;
130
-
write_store
131
-
.step_rollup()
132
-
.inspect_err(|e| log::error!("laksjdfl: {e:?}"))?;
133
-
}
134
-
Ok::<(), StorageError>(())
135
-
})
136
-
.await??;
137
138
-
log::warn!("storage.receive ended with");
139
-
} else {
140
-
log::info!("not starting jetstream or the write loop.");
141
-
}
142
-
Ok(())
143
-
}
144
-
});
145
146
tokio::select! {
147
-
z = t1 => log::warn!("serve task ended: {z:?}"),
148
-
z = t2 => log::warn!("storage task ended: {z:?}"),
149
};
150
151
println!("bye!");
152
153
Ok(())
154
}
···
1
use clap::Parser;
2
use jetstream::events::Cursor;
3
use std::path::PathBuf;
4
+
use std::time::{Duration, SystemTime};
5
use ufos::consumer;
6
use ufos::file_consumer;
7
use ufos::server;
8
+
use ufos::storage::{StorageWhatever, StoreBackground, StoreReader, StoreWriter};
9
use ufos::storage_fjall::FjallStorage;
10
+
use ufos::store_types::SketchSecretPrefix;
11
+
use ufos::{nice_duration, ConsumerInfo};
12
13
#[cfg(not(target_env = "msvc"))]
14
use tikv_jemallocator::Jemalloc;
···
18
static GLOBAL: Jemalloc = Jemalloc;
19
20
/// Aggregate links in the at-mosphere
21
+
#[derive(Parser, Debug, Clone)]
22
#[command(version, about, long_about = None)]
23
struct Args {
24
/// Jetstream server to connect to (exclusive with --fixture). Provide either a wss:// URL, or a shorhand value:
···
37
#[arg(long)]
38
data: PathBuf,
39
/// DEBUG: don't start the jetstream consumer or its write loop
40
#[arg(long, action)]
41
pause_writer: bool,
42
+
/// Adjust runtime settings like background task intervals for efficient backfill
43
+
#[arg(long, action)]
44
+
backfill: bool,
45
/// DEBUG: force the rw loop to fall behind by pausing it
46
/// todo: restore this
47
#[arg(long, action)]
48
pause_rw: bool,
49
+
/// reset the rollup cursor, scrape through missed things in the past (backfill)
50
#[arg(long, action)]
51
+
reroll: bool,
52
/// DEBUG: interpret jetstream as a file fixture
53
#[arg(long, action)]
54
jetstream_fixture: bool,
55
}
56
57
#[tokio::main]
58
async fn main() -> anyhow::Result<()> {
59
env_logger::init();
60
61
let args = Args::parse();
62
let jetstream = args.jetstream.clone();
63
+
let (read_store, write_store, cursor, sketch_secret) = FjallStorage::init(
64
+
args.data.clone(),
65
+
jetstream,
66
+
args.jetstream_force,
67
+
Default::default(),
68
+
)?;
69
+
go(args, read_store, write_store, cursor, sketch_secret).await?;
70
Ok(())
71
}
72
73
+
async fn go<B: StoreBackground>(
74
+
args: Args,
75
+
read_store: impl StoreReader + 'static + Clone,
76
+
mut write_store: impl StoreWriter<B> + 'static,
77
cursor: Option<Cursor>,
78
+
sketch_secret: SketchSecretPrefix,
79
) -> anyhow::Result<()> {
80
println!("starting server with storage...");
81
+
let serving = server::serve(read_store.clone());
82
83
+
if args.pause_writer {
84
+
log::info!("not starting jetstream or the write loop.");
85
+
serving.await.map_err(|e| anyhow::anyhow!(e))?;
86
+
return Ok(());
87
+
}
88
89
+
let batches = if args.jetstream_fixture {
90
+
log::info!("starting with jestream file fixture: {:?}", args.jetstream);
91
+
file_consumer::consume(args.jetstream.into(), sketch_secret, cursor).await?
92
+
} else {
93
+
log::info!(
94
+
"starting consumer with cursor: {cursor:?} from {:?} ago",
95
+
cursor.map(|c| c.elapsed())
96
+
);
97
+
consumer::consume(&args.jetstream, cursor, false, sketch_secret).await?
98
+
};
99
100
+
let rolling = write_store
101
+
.background_tasks(args.reroll)?
102
+
.run(args.backfill);
103
+
let consuming = write_store.receive_batches(batches);
104
105
+
let stating = do_update_stuff(read_store);
106
107
tokio::select! {
108
+
z = serving => log::warn!("serve task ended: {z:?}"),
109
+
z = rolling => log::warn!("rollup task ended: {z:?}"),
110
+
z = consuming => log::warn!("consuming task ended: {z:?}"),
111
+
z = stating => log::warn!("status task ended: {z:?}"),
112
};
113
114
println!("bye!");
115
116
Ok(())
117
}
118
+
119
+
async fn do_update_stuff(read_store: impl StoreReader) {
120
+
let started_at = std::time::SystemTime::now();
121
+
let mut first_cursor = None;
122
+
let mut first_rollup = None;
123
+
let mut last_at = std::time::SystemTime::now();
124
+
let mut last_cursor = None;
125
+
let mut last_rollup = None;
126
+
let mut interval = tokio::time::interval(std::time::Duration::from_secs(4));
127
+
interval.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Delay);
128
+
loop {
129
+
interval.tick().await;
130
+
match read_store.get_consumer_info().await {
131
+
Err(e) => log::warn!("failed to get jetstream consumer info: {e:?}"),
132
+
Ok(ConsumerInfo::Jetstream {
133
+
latest_cursor,
134
+
rollup_cursor,
135
+
..
136
+
}) => {
137
+
let now = std::time::SystemTime::now();
138
+
let latest_cursor = latest_cursor.map(Cursor::from_raw_u64);
139
+
let rollup_cursor = rollup_cursor.map(Cursor::from_raw_u64);
140
+
backfill_info(
141
+
latest_cursor,
142
+
rollup_cursor,
143
+
last_cursor,
144
+
last_rollup,
145
+
last_at,
146
+
first_cursor,
147
+
first_rollup,
148
+
started_at,
149
+
now,
150
+
);
151
+
first_cursor = first_cursor.or(latest_cursor);
152
+
first_rollup = first_rollup.or(rollup_cursor);
153
+
last_cursor = latest_cursor;
154
+
last_rollup = rollup_cursor;
155
+
last_at = now;
156
+
}
157
+
}
158
+
}
159
+
}
160
+
161
+
#[allow(clippy::too_many_arguments)]
162
+
fn backfill_info(
163
+
latest_cursor: Option<Cursor>,
164
+
rollup_cursor: Option<Cursor>,
165
+
last_cursor: Option<Cursor>,
166
+
last_rollup: Option<Cursor>,
167
+
last_at: SystemTime,
168
+
first_cursor: Option<Cursor>,
169
+
first_rollup: Option<Cursor>,
170
+
started_at: SystemTime,
171
+
now: SystemTime,
172
+
) {
173
+
let nice_dt_two_maybes = |earlier: Option<Cursor>, later: Option<Cursor>| match (earlier, later)
174
+
{
175
+
(Some(earlier), Some(later)) => match later.duration_since(&earlier) {
176
+
Ok(dt) => nice_duration(dt),
177
+
Err(e) => {
178
+
let rev_dt = e.duration();
179
+
format!("+{}", nice_duration(rev_dt))
180
+
}
181
+
},
182
+
_ => "unknown".to_string(),
183
+
};
184
+
185
+
let rate = |mlatest: Option<Cursor>, msince: Option<Cursor>, real: Duration| {
186
+
mlatest
187
+
.zip(msince)
188
+
.map(|(latest, since)| {
189
+
latest
190
+
.duration_since(&since)
191
+
.unwrap_or(Duration::from_millis(1))
192
+
})
193
+
.map(|dtc| format!("{:.2}", dtc.as_secs_f64() / real.as_secs_f64()))
194
+
.unwrap_or("??".into())
195
+
};
196
+
197
+
let dt_real = now
198
+
.duration_since(last_at)
199
+
.unwrap_or(Duration::from_millis(1));
200
+
201
+
let dt_real_total = now
202
+
.duration_since(started_at)
203
+
.unwrap_or(Duration::from_millis(1));
204
+
205
+
let cursor_rate = rate(latest_cursor, last_cursor, dt_real);
206
+
let cursor_avg = rate(latest_cursor, first_cursor, dt_real_total);
207
+
208
+
let rollup_rate = rate(rollup_cursor, last_rollup, dt_real);
209
+
let rollup_avg = rate(rollup_cursor, first_rollup, dt_real_total);
210
+
211
+
log::info!(
212
+
"cursor: {} behind (→{}, {cursor_rate}x, {cursor_avg}x avg). rollup: {} behind (→{}, {rollup_rate}x, {rollup_avg}x avg).",
213
+
latest_cursor.map(|c| c.elapsed().map(nice_duration).unwrap_or("++".to_string())).unwrap_or("?".to_string()),
214
+
nice_dt_two_maybes(last_cursor, latest_cursor),
215
+
rollup_cursor.map(|c| c.elapsed().map(nice_duration).unwrap_or("++".to_string())).unwrap_or("?".to_string()),
216
+
nice_dt_two_maybes(last_rollup, rollup_cursor),
217
+
);
218
+
}
-249
ufos/src/server.rs
-249
ufos/src/server.rs
···
1
-
use crate::storage::StoreReader;
2
-
use crate::{ConsumerInfo, Nsid, TopCollections, UFOsRecord};
3
-
use dropshot::endpoint;
4
-
use dropshot::ApiDescription;
5
-
use dropshot::ConfigDropshot;
6
-
use dropshot::ConfigLogging;
7
-
use dropshot::ConfigLoggingLevel;
8
-
use dropshot::HttpError;
9
-
use dropshot::HttpResponseHeaders;
10
-
use dropshot::HttpResponseOk;
11
-
use dropshot::Query;
12
-
use dropshot::RequestContext;
13
-
use dropshot::ServerBuilder;
14
-
use schemars::JsonSchema;
15
-
use serde::{Deserialize, Serialize};
16
-
use std::collections::HashMap;
17
-
use std::sync::Arc;
18
-
19
-
struct Context {
20
-
pub spec: Arc<serde_json::Value>,
21
-
storage: Box<dyn StoreReader>,
22
-
}
23
-
24
-
/// Meta: get the openapi spec for this api
25
-
#[endpoint {
26
-
method = GET,
27
-
path = "/openapi",
28
-
}]
29
-
async fn get_openapi(ctx: RequestContext<Context>) -> OkCorsResponse<serde_json::Value> {
30
-
let spec = (*ctx.context().spec).clone();
31
-
ok_cors(spec)
32
-
}
33
-
34
-
#[derive(Debug, Serialize, JsonSchema)]
35
-
struct MetaInfo {
36
-
storage_name: String,
37
-
storage: serde_json::Value,
38
-
consumer: ConsumerInfo,
39
-
}
40
-
/// Get meta information about UFOs itself
41
-
#[endpoint {
42
-
method = GET,
43
-
path = "/meta"
44
-
}]
45
-
async fn get_meta_info(ctx: RequestContext<Context>) -> OkCorsResponse<MetaInfo> {
46
-
let Context { storage, .. } = ctx.context();
47
-
let failed_to_get =
48
-
|what| move |e| HttpError::for_internal_error(format!("failed to get {what}: {e:?}"));
49
-
50
-
let storage_info = storage
51
-
.get_storage_stats()
52
-
.await
53
-
.map_err(failed_to_get("storage info"))?;
54
-
55
-
let consumer = storage
56
-
.get_consumer_info()
57
-
.await
58
-
.map_err(failed_to_get("consumer info"))?;
59
-
60
-
ok_cors(MetaInfo {
61
-
storage_name: storage.name(),
62
-
storage: storage_info,
63
-
consumer,
64
-
})
65
-
}
66
-
fn to_multiple_nsids(s: &str) -> Result<Vec<Nsid>, String> {
67
-
let mut out = Vec::new();
68
-
for collection in s.split(',') {
69
-
let Ok(nsid) = Nsid::new(collection.to_string()) else {
70
-
return Err(format!("collection {collection:?} was not a valid NSID"));
71
-
};
72
-
out.push(nsid);
73
-
}
74
-
Ok(out)
75
-
}
76
-
77
-
#[derive(Debug, Deserialize, JsonSchema)]
78
-
struct RecordsCollectionsQuery {
79
-
collection: Option<String>, // JsonSchema not implemented for Nsid :(
80
-
}
81
-
#[derive(Debug, Serialize, JsonSchema)]
82
-
struct ApiRecord {
83
-
did: String,
84
-
collection: String,
85
-
rkey: String,
86
-
record: Box<serde_json::value::RawValue>,
87
-
time_us: u64,
88
-
}
89
-
impl From<UFOsRecord> for ApiRecord {
90
-
fn from(ufo: UFOsRecord) -> Self {
91
-
Self {
92
-
did: ufo.did.to_string(),
93
-
collection: ufo.collection.to_string(),
94
-
rkey: ufo.rkey.to_string(),
95
-
record: ufo.record,
96
-
time_us: ufo.cursor.to_raw_u64(),
97
-
}
98
-
}
99
-
}
100
-
/// Get recent records by collection
101
-
///
102
-
/// Multiple collections are supported. they will be delivered in one big array with no
103
-
/// specified order.
104
-
#[endpoint {
105
-
method = GET,
106
-
path = "/records",
107
-
}]
108
-
async fn get_records_by_collections(
109
-
ctx: RequestContext<Context>,
110
-
collection_query: Query<RecordsCollectionsQuery>,
111
-
) -> OkCorsResponse<Vec<ApiRecord>> {
112
-
let Context { storage, .. } = ctx.context();
113
-
let mut limit = 42;
114
-
let query = collection_query.into_inner();
115
-
let collections = if let Some(provided_collection) = query.collection {
116
-
to_multiple_nsids(&provided_collection)
117
-
.map_err(|reason| HttpError::for_bad_request(None, reason))?
118
-
} else {
119
-
let all_collections_should_be_nsids: Vec<String> = storage
120
-
.get_top_collections()
121
-
.await
122
-
.map_err(|e| {
123
-
HttpError::for_internal_error(format!("failed to get top collections: {e:?}"))
124
-
})?
125
-
.into();
126
-
let mut all_collections = Vec::with_capacity(all_collections_should_be_nsids.len());
127
-
for raw_nsid in all_collections_should_be_nsids {
128
-
let nsid = Nsid::new(raw_nsid).map_err(|e| {
129
-
HttpError::for_internal_error(format!("failed to parse nsid: {e:?}"))
130
-
})?;
131
-
all_collections.push(nsid);
132
-
}
133
-
134
-
limit = 12;
135
-
all_collections
136
-
};
137
-
138
-
let records = storage
139
-
.get_records_by_collections(&collections, limit, true)
140
-
.await
141
-
.map_err(|e| HttpError::for_internal_error(e.to_string()))?
142
-
.into_iter()
143
-
.map(|r| r.into())
144
-
.collect();
145
-
146
-
ok_cors(records)
147
-
}
148
-
149
-
#[derive(Debug, Deserialize, JsonSchema)]
150
-
struct TotalSeenCollectionsQuery {
151
-
collection: String, // JsonSchema not implemented for Nsid :(
152
-
}
153
-
#[derive(Debug, Serialize, JsonSchema)]
154
-
struct TotalCounts {
155
-
total_records: u64,
156
-
dids_estimate: u64,
157
-
}
158
-
/// Get total records seen by collection
159
-
#[endpoint {
160
-
method = GET,
161
-
path = "/records/total-seen"
162
-
}]
163
-
async fn get_records_total_seen(
164
-
ctx: RequestContext<Context>,
165
-
collection_query: Query<TotalSeenCollectionsQuery>,
166
-
) -> OkCorsResponse<HashMap<String, TotalCounts>> {
167
-
let Context { storage, .. } = ctx.context();
168
-
169
-
let query = collection_query.into_inner();
170
-
let collections = to_multiple_nsids(&query.collection)
171
-
.map_err(|reason| HttpError::for_bad_request(None, reason))?;
172
-
173
-
let mut seen_by_collection = HashMap::with_capacity(collections.len());
174
-
175
-
for collection in &collections {
176
-
let (total_records, dids_estimate) = storage
177
-
.get_counts_by_collection(collection)
178
-
.await
179
-
.map_err(|e| HttpError::for_internal_error(format!("boooo: {e:?}")))?;
180
-
181
-
seen_by_collection.insert(
182
-
collection.to_string(),
183
-
TotalCounts {
184
-
total_records,
185
-
dids_estimate,
186
-
},
187
-
);
188
-
}
189
-
190
-
ok_cors(seen_by_collection)
191
-
}
192
-
193
-
/// Get top collections
194
-
#[endpoint {
195
-
method = GET,
196
-
path = "/collections"
197
-
}]
198
-
async fn get_top_collections(ctx: RequestContext<Context>) -> OkCorsResponse<TopCollections> {
199
-
let Context { storage, .. } = ctx.context();
200
-
let collections = storage
201
-
.get_top_collections()
202
-
.await
203
-
.map_err(|e| HttpError::for_internal_error(format!("boooo: {e:?}")))?;
204
-
205
-
ok_cors(collections)
206
-
}
207
-
208
-
pub async fn serve(storage: impl StoreReader + 'static) -> Result<(), String> {
209
-
let log = ConfigLogging::StderrTerminal {
210
-
level: ConfigLoggingLevel::Info,
211
-
}
212
-
.to_logger("hello-ufos")
213
-
.map_err(|e| e.to_string())?;
214
-
215
-
let mut api = ApiDescription::new();
216
-
217
-
api.register(get_openapi).unwrap();
218
-
api.register(get_meta_info).unwrap();
219
-
api.register(get_records_by_collections).unwrap();
220
-
api.register(get_records_total_seen).unwrap();
221
-
api.register(get_top_collections).unwrap();
222
-
223
-
let context = Context {
224
-
spec: Arc::new(
225
-
api.openapi("UFOs", semver::Version::new(0, 0, 0))
226
-
.json()
227
-
.map_err(|e| e.to_string())?,
228
-
),
229
-
storage: Box::new(storage),
230
-
};
231
-
232
-
ServerBuilder::new(api, context, log)
233
-
.config(ConfigDropshot {
234
-
bind_address: "0.0.0.0:9999".parse().unwrap(),
235
-
..Default::default()
236
-
})
237
-
.start()
238
-
.map_err(|error| format!("failed to start server: {}", error))?
239
-
.await
240
-
}
241
-
242
-
/// awkward helpers
243
-
type OkCorsResponse<T> = Result<HttpResponseHeaders<HttpResponseOk<T>>, HttpError>;
244
-
fn ok_cors<T: Send + Sync + Serialize + JsonSchema>(t: T) -> OkCorsResponse<T> {
245
-
let mut res = HttpResponseHeaders::new_unnamed(HttpResponseOk(t));
246
-
res.headers_mut()
247
-
.insert("access-control-allow-origin", "*".parse().unwrap());
248
-
Ok(res)
249
-
}
···
+72
ufos/src/server/collections_query.rs
+72
ufos/src/server/collections_query.rs
···
···
1
+
use crate::Nsid;
2
+
use async_trait::async_trait;
3
+
use dropshot::{
4
+
ApiEndpointBodyContentType, ExtractorMetadata, HttpError, Query, RequestContext, ServerContext,
5
+
SharedExtractor,
6
+
};
7
+
use schemars::JsonSchema;
8
+
use serde::Deserialize;
9
+
use std::collections::HashSet;
10
+
11
+
/// The real type that gets deserialized
12
+
#[derive(Debug, Deserialize, JsonSchema)]
13
+
pub struct MultiCollectionQuery {
14
+
pub collection: Vec<String>,
15
+
}
16
+
17
+
/// The fake corresponding type for docs that dropshot won't freak out about a
18
+
/// vec for
19
+
#[derive(Deserialize, JsonSchema)]
20
+
#[allow(dead_code)]
21
+
struct MultiCollectionQueryForDocs {
22
+
/// One or more collection [NSID](https://atproto.com/specs/nsid)s
23
+
///
24
+
/// Pass this parameter multiple times to specify multiple collections, like
25
+
/// `collection=app.bsky.feed.like&collection=app.bsky.feed.post`
26
+
collection: String,
27
+
}
28
+
29
+
impl TryFrom<MultiCollectionQuery> for HashSet<Nsid> {
30
+
type Error = HttpError;
31
+
fn try_from(mcq: MultiCollectionQuery) -> Result<Self, Self::Error> {
32
+
let mut out = HashSet::with_capacity(mcq.collection.len());
33
+
for c in mcq.collection {
34
+
let nsid = Nsid::new(c).map_err(|e| {
35
+
HttpError::for_bad_request(
36
+
None,
37
+
format!("failed to convert collection to an NSID: {e:?}"),
38
+
)
39
+
})?;
40
+
out.insert(nsid);
41
+
}
42
+
Ok(out)
43
+
}
44
+
}
45
+
46
+
// The `SharedExtractor` implementation for Query<QueryType> describes how to
47
+
// construct an instance of `Query<QueryType>` from an HTTP request: namely, by
48
+
// parsing the query string to an instance of `QueryType`.
49
+
#[async_trait]
50
+
impl SharedExtractor for MultiCollectionQuery {
51
+
async fn from_request<Context: ServerContext>(
52
+
ctx: &RequestContext<Context>,
53
+
) -> Result<MultiCollectionQuery, HttpError> {
54
+
let raw_query = ctx.request.uri().query().unwrap_or("");
55
+
let q = serde_qs::from_str(raw_query).map_err(|e| {
56
+
HttpError::for_bad_request(None, format!("unable to parse query string: {}", e))
57
+
})?;
58
+
Ok(q)
59
+
}
60
+
61
+
fn metadata(body_content_type: ApiEndpointBodyContentType) -> ExtractorMetadata {
62
+
// HACK: query type switcheroo: passing MultiCollectionQuery to
63
+
// `metadata` would "helpfully" panic because dropshot believes we can
64
+
// only have scalar types in a query.
65
+
//
66
+
// so instead we have a fake second type whose only job is to look the
67
+
// same as MultiCollectionQuery exept that it has `String` instead of
68
+
// `Vec<String>`, which dropshot will accept, and generate ~close-enough
69
+
// docs for.
70
+
<Query<MultiCollectionQueryForDocs> as SharedExtractor>::metadata(body_content_type)
71
+
}
72
+
}
+23
ufos/src/server/cors.rs
+23
ufos/src/server/cors.rs
···
···
1
+
use dropshot::{HttpError, HttpResponseHeaders, HttpResponseOk};
2
+
use schemars::JsonSchema;
3
+
use serde::Serialize;
4
+
5
+
pub type OkCorsResponse<T> = Result<HttpResponseHeaders<HttpResponseOk<T>>, HttpError>;
6
+
7
+
/// Helper for constructing Ok responses: return OkCors(T).into()
8
+
/// (not happy with this yet)
9
+
pub struct OkCors<T: Serialize + JsonSchema + Send + Sync>(pub T);
10
+
11
+
impl<T> From<OkCors<T>> for OkCorsResponse<T>
12
+
where
13
+
T: Serialize + JsonSchema + Send + Sync,
14
+
{
15
+
fn from(ok: OkCors<T>) -> OkCorsResponse<T> {
16
+
let mut res = HttpResponseHeaders::new_unnamed(HttpResponseOk(ok.0));
17
+
res.headers_mut()
18
+
.insert("access-control-allow-origin", "*".parse().unwrap());
19
+
Ok(res)
20
+
}
21
+
}
22
+
23
+
// TODO: cors for HttpError
+638
ufos/src/server/mod.rs
+638
ufos/src/server/mod.rs
···
···
1
+
mod collections_query;
2
+
mod cors;
3
+
4
+
use crate::index_html::INDEX_HTML;
5
+
use crate::storage::StoreReader;
6
+
use crate::store_types::{HourTruncatedCursor, WeekTruncatedCursor};
7
+
use crate::{
8
+
ConsumerInfo, Cursor, JustCount, Nsid, NsidCount, NsidPrefix, OrderCollectionsBy, PrefixChild,
9
+
UFOsRecord,
10
+
};
11
+
use base64::{engine::general_purpose::URL_SAFE_NO_PAD, Engine as _};
12
+
use chrono::{DateTime, Utc};
13
+
use collections_query::MultiCollectionQuery;
14
+
use cors::{OkCors, OkCorsResponse};
15
+
use dropshot::endpoint;
16
+
use dropshot::ApiDescription;
17
+
use dropshot::Body;
18
+
use dropshot::ConfigDropshot;
19
+
use dropshot::ConfigLogging;
20
+
use dropshot::ConfigLoggingLevel;
21
+
use dropshot::HttpError;
22
+
use dropshot::Query;
23
+
use dropshot::RequestContext;
24
+
use dropshot::ServerBuilder;
25
+
26
+
use http::{Response, StatusCode};
27
+
use schemars::JsonSchema;
28
+
use serde::{Deserialize, Serialize};
29
+
use std::collections::{HashMap, HashSet};
30
+
use std::sync::Arc;
31
+
use std::time::{Duration, SystemTime, UNIX_EPOCH};
32
+
33
+
struct Context {
34
+
pub spec: Arc<serde_json::Value>,
35
+
storage: Box<dyn StoreReader>,
36
+
}
37
+
38
+
fn dt_to_cursor(dt: DateTime<Utc>) -> Result<HourTruncatedCursor, HttpError> {
39
+
let t = dt.timestamp_micros();
40
+
if t < 0 {
41
+
Err(HttpError::for_bad_request(None, "timestamp too old".into()))
42
+
} else {
43
+
let t = t as u64;
44
+
let t_now = SystemTime::now()
45
+
.duration_since(UNIX_EPOCH)
46
+
.unwrap()
47
+
.as_micros() as u64;
48
+
const ONE_HOUR: u64 = 60 * 60 * 1_000_000;
49
+
if t > t_now && (t - t_now > 2 * ONE_HOUR) {
50
+
Err(HttpError::for_bad_request(None, "future timestamp".into()))
51
+
} else {
52
+
Ok(HourTruncatedCursor::truncate_raw_u64(t))
53
+
}
54
+
}
55
+
}
56
+
57
+
/// Serve index page as html
58
+
#[endpoint {
59
+
method = GET,
60
+
path = "/",
61
+
/*
62
+
* not useful to have this in openapi
63
+
*/
64
+
unpublished = true,
65
+
}]
66
+
async fn index(_ctx: RequestContext<Context>) -> Result<Response<Body>, HttpError> {
67
+
Ok(Response::builder()
68
+
.status(StatusCode::OK)
69
+
.header(http::header::CONTENT_TYPE, "text/html")
70
+
.body(INDEX_HTML.into())?)
71
+
}
72
+
73
+
/// Meta: get the openapi spec for this api
74
+
#[endpoint {
75
+
method = GET,
76
+
path = "/openapi",
77
+
/*
78
+
* not useful to have this in openapi
79
+
*/
80
+
unpublished = true,
81
+
}]
82
+
async fn get_openapi(ctx: RequestContext<Context>) -> OkCorsResponse<serde_json::Value> {
83
+
let spec = (*ctx.context().spec).clone();
84
+
OkCors(spec).into()
85
+
}
86
+
87
+
#[derive(Debug, Serialize, JsonSchema)]
88
+
struct MetaInfo {
89
+
storage_name: String,
90
+
storage: serde_json::Value,
91
+
consumer: ConsumerInfo,
92
+
}
93
+
/// UFOs meta-info
94
+
#[endpoint {
95
+
method = GET,
96
+
path = "/meta"
97
+
}]
98
+
async fn get_meta_info(ctx: RequestContext<Context>) -> OkCorsResponse<MetaInfo> {
99
+
let Context { storage, .. } = ctx.context();
100
+
let failed_to_get =
101
+
|what| move |e| HttpError::for_internal_error(format!("failed to get {what}: {e:?}"));
102
+
103
+
let storage_info = storage
104
+
.get_storage_stats()
105
+
.await
106
+
.map_err(failed_to_get("storage info"))?;
107
+
108
+
let consumer = storage
109
+
.get_consumer_info()
110
+
.await
111
+
.map_err(failed_to_get("consumer info"))?;
112
+
113
+
OkCors(MetaInfo {
114
+
storage_name: storage.name(),
115
+
storage: storage_info,
116
+
consumer,
117
+
})
118
+
.into()
119
+
}
120
+
121
+
// TODO: replace with normal (🙃) multi-qs value somehow
122
+
fn to_multiple_nsids(s: &str) -> Result<HashSet<Nsid>, String> {
123
+
let mut out = HashSet::new();
124
+
for collection in s.split(',') {
125
+
let Ok(nsid) = Nsid::new(collection.to_string()) else {
126
+
return Err(format!("collection {collection:?} was not a valid NSID"));
127
+
};
128
+
out.insert(nsid);
129
+
}
130
+
Ok(out)
131
+
}
132
+
133
+
#[derive(Debug, Deserialize, JsonSchema)]
134
+
struct RecordsCollectionsQuery {
135
+
collection: Option<String>, // JsonSchema not implemented for Nsid :(
136
+
}
137
+
#[derive(Debug, Serialize, JsonSchema)]
138
+
struct ApiRecord {
139
+
did: String,
140
+
collection: String,
141
+
rkey: String,
142
+
record: Box<serde_json::value::RawValue>,
143
+
time_us: u64,
144
+
}
145
+
impl From<UFOsRecord> for ApiRecord {
146
+
fn from(ufo: UFOsRecord) -> Self {
147
+
Self {
148
+
did: ufo.did.to_string(),
149
+
collection: ufo.collection.to_string(),
150
+
rkey: ufo.rkey.to_string(),
151
+
record: ufo.record,
152
+
time_us: ufo.cursor.to_raw_u64(),
153
+
}
154
+
}
155
+
}
156
+
/// Record samples
157
+
///
158
+
/// Get most recent records seen in the firehose, by collection NSID
159
+
///
160
+
/// Multiple collections are supported. They will be delivered in one big array with no
161
+
/// specified order.
162
+
#[endpoint {
163
+
method = GET,
164
+
path = "/records",
165
+
}]
166
+
async fn get_records_by_collections(
167
+
ctx: RequestContext<Context>,
168
+
collection_query: Query<RecordsCollectionsQuery>,
169
+
) -> OkCorsResponse<Vec<ApiRecord>> {
170
+
let Context { storage, .. } = ctx.context();
171
+
let mut limit = 42;
172
+
let query = collection_query.into_inner();
173
+
let collections = if let Some(provided_collection) = query.collection {
174
+
to_multiple_nsids(&provided_collection)
175
+
.map_err(|reason| HttpError::for_bad_request(None, reason))?
176
+
} else {
177
+
limit = 12;
178
+
let min_time_ago = SystemTime::now() - Duration::from_secs(86_400 * 3); // we want at least 3 days of data
179
+
let since: WeekTruncatedCursor = Cursor::at(min_time_ago).into();
180
+
let (collections, _) = storage
181
+
.get_collections(
182
+
1000,
183
+
Default::default(),
184
+
Some(since.try_as().unwrap()),
185
+
None,
186
+
)
187
+
.await
188
+
.map_err(|e| HttpError::for_internal_error(e.to_string()))?;
189
+
collections
190
+
.into_iter()
191
+
.map(|c| Nsid::new(c.nsid).unwrap())
192
+
.collect()
193
+
};
194
+
195
+
let records = storage
196
+
.get_records_by_collections(collections, limit, true)
197
+
.await
198
+
.map_err(|e| HttpError::for_internal_error(e.to_string()))?
199
+
.into_iter()
200
+
.map(|r| r.into())
201
+
.collect();
202
+
203
+
OkCors(records).into()
204
+
}
205
+
206
+
#[derive(Debug, Deserialize, JsonSchema)]
207
+
struct CollectionsStatsQuery {
208
+
/// Limit stats to those seen after this UTC datetime
209
+
///
210
+
/// default: 1 week ago
211
+
since: Option<DateTime<Utc>>,
212
+
/// Limit stats to those seen before this UTC datetime
213
+
///
214
+
/// default: now
215
+
until: Option<DateTime<Utc>>,
216
+
}
217
+
/// Collection stats
218
+
///
219
+
/// Get record statistics for collections during a specific time period.
220
+
///
221
+
/// Note: the statistics are "rolled up" into hourly buckets in the background,
222
+
/// so the data here can be as stale as that background task is behind. See the
223
+
/// meta info endpoint to find out how up-to-date the rollup currently is. (In
224
+
/// general it sholud be pretty close to live)
225
+
#[endpoint {
226
+
method = GET,
227
+
path = "/collections/stats"
228
+
}]
229
+
async fn get_collection_stats(
230
+
ctx: RequestContext<Context>,
231
+
collections_query: MultiCollectionQuery,
232
+
query: Query<CollectionsStatsQuery>,
233
+
) -> OkCorsResponse<HashMap<String, JustCount>> {
234
+
let Context { storage, .. } = ctx.context();
235
+
let q = query.into_inner();
236
+
let collections: HashSet<Nsid> = collections_query.try_into()?;
237
+
238
+
let since = q.since.map(dt_to_cursor).transpose()?.unwrap_or_else(|| {
239
+
let week_ago_secs = 7 * 86_400;
240
+
let week_ago = SystemTime::now() - Duration::from_secs(week_ago_secs);
241
+
Cursor::at(week_ago).into()
242
+
});
243
+
244
+
let until = q.until.map(dt_to_cursor).transpose()?;
245
+
246
+
let mut seen_by_collection = HashMap::with_capacity(collections.len());
247
+
248
+
for collection in &collections {
249
+
let counts = storage
250
+
.get_collection_counts(collection, since, until)
251
+
.await
252
+
.map_err(|e| HttpError::for_internal_error(format!("boooo: {e:?}")))?;
253
+
254
+
seen_by_collection.insert(collection.to_string(), counts);
255
+
}
256
+
257
+
OkCors(seen_by_collection).into()
258
+
}
259
+
260
+
#[derive(Debug, Serialize, JsonSchema)]
261
+
struct CollectionsResponse {
262
+
/// Each known collection and its associated statistics
263
+
///
264
+
/// The order is unspecified.
265
+
collections: Vec<NsidCount>,
266
+
/// Include in a follow-up request to get the next page of results, if more are available
267
+
cursor: Option<String>,
268
+
}
269
+
#[derive(Debug, Deserialize, JsonSchema)]
270
+
#[serde(rename_all = "kebab-case")]
271
+
pub enum CollectionsQueryOrder {
272
+
RecordsCreated,
273
+
DidsEstimate,
274
+
}
275
+
impl From<&CollectionsQueryOrder> for OrderCollectionsBy {
276
+
fn from(q: &CollectionsQueryOrder) -> Self {
277
+
match q {
278
+
CollectionsQueryOrder::RecordsCreated => OrderCollectionsBy::RecordsCreated,
279
+
CollectionsQueryOrder::DidsEstimate => OrderCollectionsBy::DidsEstimate,
280
+
}
281
+
}
282
+
}
283
+
#[derive(Debug, Deserialize, JsonSchema)]
284
+
struct CollectionsQuery {
285
+
/// The maximum number of collections to return in one request.
286
+
///
287
+
/// Default: `100` normally, `32` if `order` is specified.
288
+
#[schemars(range(min = 1, max = 200))]
289
+
limit: Option<usize>,
290
+
/// Get a paginated response with more collections.
291
+
///
292
+
/// Always omit the cursor for the first request. If more collections than the limit are available, the response will contain a non-null `cursor` to include with the next request.
293
+
///
294
+
/// `cursor` is mutually exclusive with `order`.
295
+
cursor: Option<String>,
296
+
/// Limit collections and statistics to those seen after this UTC datetime
297
+
since: Option<DateTime<Utc>>,
298
+
/// Limit collections and statistics to those seen before this UTC datetime
299
+
until: Option<DateTime<Utc>>,
300
+
/// Get a limited, sorted list
301
+
///
302
+
/// Mutually exclusive with `cursor` -- sorted results cannot be paged.
303
+
order: Option<CollectionsQueryOrder>,
304
+
}
305
+
306
+
/// List collections
307
+
///
308
+
/// With statistics.
309
+
///
310
+
/// ## To fetch a full list:
311
+
///
312
+
/// Omit the `order` parameter and page through the results using the `cursor`. There have been a lot of collections seen in the ATmosphere, well over 400 at time of writing, so you *will* need to make a series of paginaged requests with `cursor`s to get them all.
313
+
///
314
+
/// The set of collections across multiple requests is not guaranteed to be a perfectly consistent snapshot:
315
+
///
316
+
/// - all collection NSIDs observed before the first request will be included in the results
317
+
///
318
+
/// - *new* NSIDs observed in the firehose *while paging* might be included or excluded from the final set
319
+
///
320
+
/// - no duplicate NSIDs will occur in the combined results
321
+
///
322
+
/// In practice this is close enough for most use-cases to not worry about.
323
+
///
324
+
/// ## To fetch the top collection NSIDs:
325
+
///
326
+
/// Specify the `order` parameter (must be either `records-created` or `did-estimate`). Note that ordered results cannot be paged.
327
+
///
328
+
/// All statistics are bucketed hourly, so the most granular effecitve time boundary for `since` and `until` is one hour.
329
+
#[endpoint {
330
+
method = GET,
331
+
path = "/collections"
332
+
}]
333
+
async fn get_collections(
334
+
ctx: RequestContext<Context>,
335
+
query: Query<CollectionsQuery>,
336
+
) -> OkCorsResponse<CollectionsResponse> {
337
+
let Context { storage, .. } = ctx.context();
338
+
let q = query.into_inner();
339
+
340
+
if q.cursor.is_some() && q.order.is_some() {
341
+
let msg = "`cursor` is mutually exclusive with `order`. ordered results cannot be paged.";
342
+
return Err(HttpError::for_bad_request(None, msg.to_string()));
343
+
}
344
+
345
+
let order = if let Some(ref o) = q.order {
346
+
o.into()
347
+
} else {
348
+
let cursor = q
349
+
.cursor
350
+
.and_then(|c| if c.is_empty() { None } else { Some(c) })
351
+
.map(|c| URL_SAFE_NO_PAD.decode(&c))
352
+
.transpose()
353
+
.map_err(|e| HttpError::for_bad_request(None, format!("invalid cursor: {e:?}")))?;
354
+
OrderCollectionsBy::Lexi { cursor }
355
+
};
356
+
357
+
let limit = match (q.limit, q.order) {
358
+
(Some(limit), _) => limit,
359
+
(None, Some(_)) => 32,
360
+
(None, None) => 100,
361
+
};
362
+
363
+
if !(1..=200).contains(&limit) {
364
+
let msg = format!("limit not in 1..=200: {}", limit);
365
+
return Err(HttpError::for_bad_request(None, msg));
366
+
}
367
+
368
+
let since = q.since.map(dt_to_cursor).transpose()?;
369
+
let until = q.until.map(dt_to_cursor).transpose()?;
370
+
371
+
let (collections, next_cursor) = storage
372
+
.get_collections(limit, order, since, until)
373
+
.await
374
+
.map_err(|e| HttpError::for_internal_error(format!("oh shoot: {e:?}")))?;
375
+
376
+
let next_cursor = next_cursor.map(|c| URL_SAFE_NO_PAD.encode(c));
377
+
378
+
OkCors(CollectionsResponse {
379
+
collections,
380
+
cursor: next_cursor,
381
+
})
382
+
.into()
383
+
}
384
+
385
+
#[derive(Debug, Serialize, JsonSchema)]
386
+
struct PrefixResponse {
387
+
/// Note that total may not include counts beyond the current page (TODO)
388
+
total: JustCount,
389
+
children: Vec<PrefixChild>,
390
+
/// Include in a follow-up request to get the next page of results, if more are available
391
+
cursor: Option<String>,
392
+
}
393
+
#[derive(Debug, Deserialize, JsonSchema)]
394
+
struct PrefixQuery {
395
+
///
396
+
/// The final segment of a collection NSID is the `name`, and everything before it is called its `group`. eg:
397
+
///
398
+
/// - `app.bsky.feed.post` and `app.bsky.feed.like` are both in the _lexicon group_ "`app.bsky.feed`".
399
+
///
400
+
prefix: String,
401
+
/// The maximum number of collections to return in one request.
402
+
///
403
+
/// The number of items actually returned may be less than the limit. If paginating, this does **not** indicate that no
404
+
/// more items are available! Check if the `cursor` in the response is `null` to determine the end of items.
405
+
///
406
+
/// Default: `100` normally, `32` if `order` is specified.
407
+
#[schemars(range(min = 1, max = 200))]
408
+
limit: Option<usize>,
409
+
/// Get a paginated response with more collections.
410
+
///
411
+
/// Always omit the cursor for the first request. If more collections than the limit are available, the response will contain a non-null `cursor` to include with the next request.
412
+
///
413
+
/// `cursor` is mutually exclusive with `order`.
414
+
cursor: Option<String>,
415
+
/// Limit collections and statistics to those seen after this UTC datetime
416
+
///
417
+
/// Default: all-time
418
+
since: Option<DateTime<Utc>>,
419
+
/// Limit collections and statistics to those seen before this UTC datetime
420
+
///
421
+
/// Default: now
422
+
until: Option<DateTime<Utc>>,
423
+
/// Get a limited, sorted list
424
+
///
425
+
/// Mutually exclusive with `cursor` -- sorted results cannot be paged.
426
+
order: Option<CollectionsQueryOrder>,
427
+
}
428
+
/// Prefix-filter collections list
429
+
///
430
+
/// This endpoint enumerates all collection NSIDs for a lexicon group.
431
+
///
432
+
/// ## To fetch a full list:
433
+
///
434
+
/// Omit the `order` parameter and page through the results using the `cursor`. There have been a lot of collections seen in the ATmosphere, well over 400 at time of writing, so you *will* need to make a series of paginaged requests with `cursor`s to get them all.
435
+
///
436
+
/// The set of collections across multiple requests is not guaranteed to be a perfectly consistent snapshot:
437
+
///
438
+
/// - all collection NSIDs observed before the first request will be included in the results
439
+
///
440
+
/// - *new* NSIDs observed in the firehose *while paging* might be included or excluded from the final set
441
+
///
442
+
/// - no duplicate NSIDs will occur in the combined results
443
+
///
444
+
/// In practice this is close enough for most use-cases to not worry about.
445
+
///
446
+
/// ## To fetch the top collection NSIDs:
447
+
///
448
+
/// Specify the `order` parameter (must be either `records-created` or `did-estimate`). Note that ordered results cannot be paged.
449
+
///
450
+
/// All statistics are bucketed hourly, so the most granular effecitve time boundary for `since` and `until` is one hour.
451
+
#[endpoint {
452
+
method = GET,
453
+
path = "/prefix"
454
+
}]
455
+
async fn get_prefix(
456
+
ctx: RequestContext<Context>,
457
+
query: Query<PrefixQuery>,
458
+
) -> OkCorsResponse<PrefixResponse> {
459
+
let Context { storage, .. } = ctx.context();
460
+
let q = query.into_inner();
461
+
462
+
let prefix = NsidPrefix::new(&q.prefix).map_err(|e| {
463
+
HttpError::for_bad_request(
464
+
None,
465
+
format!("{:?} was not a valid NSID prefix: {e:?}", q.prefix),
466
+
)
467
+
})?;
468
+
469
+
if q.cursor.is_some() && q.order.is_some() {
470
+
let msg = "`cursor` is mutually exclusive with `order`. ordered results cannot be paged.";
471
+
return Err(HttpError::for_bad_request(None, msg.to_string()));
472
+
}
473
+
474
+
let order = if let Some(ref o) = q.order {
475
+
o.into()
476
+
} else {
477
+
let cursor = q
478
+
.cursor
479
+
.and_then(|c| if c.is_empty() { None } else { Some(c) })
480
+
.map(|c| URL_SAFE_NO_PAD.decode(&c))
481
+
.transpose()
482
+
.map_err(|e| HttpError::for_bad_request(None, format!("invalid cursor: {e:?}")))?;
483
+
OrderCollectionsBy::Lexi { cursor }
484
+
};
485
+
486
+
let limit = match (q.limit, q.order) {
487
+
(Some(limit), _) => limit,
488
+
(None, Some(_)) => 32,
489
+
(None, None) => 100,
490
+
};
491
+
492
+
if !(1..=200).contains(&limit) {
493
+
let msg = format!("limit not in 1..=200: {}", limit);
494
+
return Err(HttpError::for_bad_request(None, msg));
495
+
}
496
+
497
+
let since = q.since.map(dt_to_cursor).transpose()?;
498
+
let until = q.until.map(dt_to_cursor).transpose()?;
499
+
500
+
let (total, children, next_cursor) = storage
501
+
.get_prefix(prefix, limit, order, since, until)
502
+
.await
503
+
.map_err(|e| HttpError::for_internal_error(format!("oh shoot: {e:?}")))?;
504
+
505
+
let next_cursor = next_cursor.map(|c| URL_SAFE_NO_PAD.encode(c));
506
+
507
+
OkCors(PrefixResponse {
508
+
total,
509
+
children,
510
+
cursor: next_cursor,
511
+
})
512
+
.into()
513
+
}
514
+
515
+
#[derive(Debug, Deserialize, JsonSchema)]
516
+
struct CollectionTimeseriesQuery {
517
+
collection: String, // JsonSchema not implemented for Nsid :(
518
+
/// Limit collections and statistics to those seen after this UTC datetime
519
+
///
520
+
/// default: 1 week ago
521
+
since: Option<DateTime<Utc>>,
522
+
/// Limit collections and statistics to those seen before this UTC datetime
523
+
///
524
+
/// default: now
525
+
until: Option<DateTime<Utc>>,
526
+
/// time steps between data, in seconds
527
+
///
528
+
/// the step will be rounded down to the nearest hour
529
+
///
530
+
/// default: 86400 (24hrs)
531
+
#[schemars(range(min = 3600))]
532
+
step: Option<u64>,
533
+
// todo: rolling averages
534
+
}
535
+
#[derive(Debug, Serialize, JsonSchema)]
536
+
struct CollectionTimeseriesResponse {
537
+
range: Vec<DateTime<Utc>>,
538
+
series: HashMap<String, Vec<JustCount>>,
539
+
}
540
+
/// Collection timeseries stats
541
+
#[endpoint {
542
+
method = GET,
543
+
path = "/timeseries"
544
+
}]
545
+
async fn get_timeseries(
546
+
ctx: RequestContext<Context>,
547
+
query: Query<CollectionTimeseriesQuery>,
548
+
) -> OkCorsResponse<CollectionTimeseriesResponse> {
549
+
let Context { storage, .. } = ctx.context();
550
+
let q = query.into_inner();
551
+
552
+
let since = q.since.map(dt_to_cursor).transpose()?.unwrap_or_else(|| {
553
+
let week_ago_secs = 7 * 86_400;
554
+
let week_ago = SystemTime::now() - Duration::from_secs(week_ago_secs);
555
+
Cursor::at(week_ago).into()
556
+
});
557
+
558
+
let until = q.until.map(dt_to_cursor).transpose()?;
559
+
560
+
let step = if let Some(secs) = q.step {
561
+
if secs < 3600 {
562
+
let msg = format!("step is too small: {}", secs);
563
+
Err(HttpError::for_bad_request(None, msg))?;
564
+
}
565
+
(secs / 3600) * 3600 // trucate to hour
566
+
} else {
567
+
86_400
568
+
};
569
+
570
+
let nsid = Nsid::new(q.collection).map_err(|e| {
571
+
HttpError::for_bad_request(None, format!("collection was not a valid NSID: {:?}", e))
572
+
})?;
573
+
574
+
let (range_cursors, series) = storage
575
+
.get_timeseries(vec![nsid], since, until, step)
576
+
.await
577
+
.map_err(|e| HttpError::for_internal_error(format!("oh shoot: {e:?}")))?;
578
+
579
+
let range = range_cursors
580
+
.into_iter()
581
+
.map(|c| DateTime::<Utc>::from_timestamp_micros(c.to_raw_u64() as i64).unwrap())
582
+
.collect();
583
+
584
+
let series = series
585
+
.into_iter()
586
+
.map(|(k, v)| (k.to_string(), v.iter().map(Into::into).collect()))
587
+
.collect();
588
+
589
+
OkCors(CollectionTimeseriesResponse { range, series }).into()
590
+
}
591
+
592
+
pub async fn serve(storage: impl StoreReader + 'static) -> Result<(), String> {
593
+
let log = ConfigLogging::StderrTerminal {
594
+
level: ConfigLoggingLevel::Info,
595
+
}
596
+
.to_logger("hello-ufos")
597
+
.map_err(|e| e.to_string())?;
598
+
599
+
let mut api = ApiDescription::new();
600
+
601
+
api.register(index).unwrap();
602
+
api.register(get_openapi).unwrap();
603
+
api.register(get_meta_info).unwrap();
604
+
api.register(get_records_by_collections).unwrap();
605
+
api.register(get_collection_stats).unwrap();
606
+
api.register(get_collections).unwrap();
607
+
api.register(get_prefix).unwrap();
608
+
api.register(get_timeseries).unwrap();
609
+
610
+
let context = Context {
611
+
spec: Arc::new(
612
+
api.openapi(
613
+
"UFOs: Every lexicon in the ATmosphere",
614
+
env!("CARGO_PKG_VERSION")
615
+
.parse()
616
+
.inspect_err(|e| {
617
+
log::warn!("failed to parse cargo package version for openapi: {e:?}")
618
+
})
619
+
.unwrap_or(semver::Version::new(0, 0, 1)),
620
+
)
621
+
.description("Samples and statistics of atproto records by their collection NSID")
622
+
.contact_name("part of @microcosm.blue")
623
+
.contact_url("https://microcosm.blue")
624
+
.json()
625
+
.map_err(|e| e.to_string())?,
626
+
),
627
+
storage: Box::new(storage),
628
+
};
629
+
630
+
ServerBuilder::new(api, context, log)
631
+
.config(ConfigDropshot {
632
+
bind_address: "0.0.0.0:9999".parse().unwrap(),
633
+
..Default::default()
634
+
})
635
+
.start()
636
+
.map_err(|error| format!("failed to start server: {}", error))?
637
+
.await
638
+
}
+99
-10
ufos/src/storage.rs
+99
-10
ufos/src/storage.rs
···
1
-
// use crate::store_types::CountsValue;
2
-
use crate::{error::StorageError, ConsumerInfo, Cursor, EventBatch, TopCollections, UFOsRecord};
3
use async_trait::async_trait;
4
use jetstream::exports::{Did, Nsid};
5
use std::path::Path;
6
7
pub type StorageResult<T> = Result<T, StorageError>;
8
9
-
pub trait StorageWhatever<R: StoreReader, W: StoreWriter, C> {
10
fn init(
11
path: impl AsRef<Path>,
12
endpoint: String,
13
force_endpoint: bool,
14
config: C,
15
-
) -> StorageResult<(R, W, Option<Cursor>)>
16
where
17
Self: Sized;
18
}
19
20
-
pub trait StoreWriter: Send + Sync {
21
fn insert_batch<const LIMIT: usize>(
22
&mut self,
23
event_batch: EventBatch<LIMIT>,
24
) -> StorageResult<()>;
25
26
-
fn step_rollup(&mut self) -> StorageResult<usize>;
27
28
-
fn trim_collection(&mut self, collection: &Nsid, limit: usize) -> StorageResult<()>;
29
30
fn delete_account(&mut self, did: &Did) -> StorageResult<usize>;
31
}
32
33
#[async_trait]
···
38
39
async fn get_consumer_info(&self) -> StorageResult<ConsumerInfo>;
40
41
-
async fn get_top_collections(&self) -> StorageResult<TopCollections>;
42
43
-
async fn get_counts_by_collection(&self, collection: &Nsid) -> StorageResult<(u64, u64)>;
44
45
async fn get_records_by_collections(
46
&self,
47
-
collections: &[Nsid],
48
limit: usize,
49
expand_each_collection: bool,
50
) -> StorageResult<Vec<UFOsRecord>>;
···
1
+
use crate::store_types::{CountsValue, HourTruncatedCursor, SketchSecretPrefix};
2
+
use crate::{
3
+
error::StorageError, ConsumerInfo, Cursor, EventBatch, JustCount, NsidCount, NsidPrefix,
4
+
OrderCollectionsBy, PrefixChild, UFOsRecord,
5
+
};
6
use async_trait::async_trait;
7
use jetstream::exports::{Did, Nsid};
8
+
use std::collections::{HashMap, HashSet};
9
use std::path::Path;
10
+
use std::time::{Duration, SystemTime};
11
+
use tokio::sync::mpsc::Receiver;
12
+
use tokio_util::sync::CancellationToken;
13
14
pub type StorageResult<T> = Result<T, StorageError>;
15
16
+
pub trait StorageWhatever<R: StoreReader, W: StoreWriter<B>, B: StoreBackground, C> {
17
fn init(
18
path: impl AsRef<Path>,
19
endpoint: String,
20
force_endpoint: bool,
21
config: C,
22
+
) -> StorageResult<(R, W, Option<Cursor>, SketchSecretPrefix)>
23
where
24
Self: Sized;
25
}
26
27
+
#[async_trait]
28
+
pub trait StoreWriter<B: StoreBackground>: Clone + Send + Sync
29
+
where
30
+
Self: 'static,
31
+
{
32
+
fn background_tasks(&mut self, reroll: bool) -> StorageResult<B>;
33
+
34
+
async fn receive_batches<const LIMIT: usize>(
35
+
self,
36
+
mut batches: Receiver<EventBatch<LIMIT>>,
37
+
) -> StorageResult<()> {
38
+
while let Some(event_batch) = batches.recv().await {
39
+
let token = CancellationToken::new();
40
+
let cancelled = token.clone();
41
+
tokio::spawn(async move {
42
+
let started = SystemTime::now();
43
+
let mut concerned = false;
44
+
loop {
45
+
tokio::select! {
46
+
_ = tokio::time::sleep(Duration::from_secs_f64(3.)) => {
47
+
log::warn!("taking a long time to insert an event batch ({:?})...", started.elapsed());
48
+
concerned = true;
49
+
}
50
+
_ = cancelled.cancelled() => {
51
+
if concerned {
52
+
log::warn!("finally inserted slow event batch (or failed) after {:?}", started.elapsed());
53
+
}
54
+
break
55
+
}
56
+
}
57
+
}
58
+
});
59
+
tokio::task::spawn_blocking({
60
+
let mut me = self.clone();
61
+
move || {
62
+
let _guard = token.drop_guard();
63
+
me.insert_batch(event_batch)
64
+
}
65
+
})
66
+
.await??;
67
+
}
68
+
69
+
Err(StorageError::BatchSenderExited)
70
+
}
71
+
72
fn insert_batch<const LIMIT: usize>(
73
&mut self,
74
event_batch: EventBatch<LIMIT>,
75
) -> StorageResult<()>;
76
77
+
fn step_rollup(&mut self) -> StorageResult<(usize, HashSet<Nsid>)>;
78
79
+
fn trim_collection(
80
+
&mut self,
81
+
collection: &Nsid,
82
+
limit: usize,
83
+
full_scan: bool,
84
+
) -> StorageResult<(usize, usize, bool)>;
85
86
fn delete_account(&mut self, did: &Did) -> StorageResult<usize>;
87
+
}
88
+
89
+
#[async_trait]
90
+
pub trait StoreBackground: Send + Sync {
91
+
async fn run(mut self, backfill: bool) -> StorageResult<()>;
92
}
93
94
#[async_trait]
···
99
100
async fn get_consumer_info(&self) -> StorageResult<ConsumerInfo>;
101
102
+
async fn get_collections(
103
+
&self,
104
+
limit: usize,
105
+
order: OrderCollectionsBy,
106
+
since: Option<HourTruncatedCursor>,
107
+
until: Option<HourTruncatedCursor>,
108
+
) -> StorageResult<(Vec<NsidCount>, Option<Vec<u8>>)>;
109
110
+
async fn get_prefix(
111
+
&self,
112
+
prefix: NsidPrefix,
113
+
limit: usize,
114
+
order: OrderCollectionsBy,
115
+
since: Option<HourTruncatedCursor>,
116
+
until: Option<HourTruncatedCursor>,
117
+
) -> StorageResult<(JustCount, Vec<PrefixChild>, Option<Vec<u8>>)>;
118
+
119
+
async fn get_timeseries(
120
+
&self,
121
+
collections: Vec<Nsid>,
122
+
since: HourTruncatedCursor,
123
+
until: Option<HourTruncatedCursor>,
124
+
step: u64,
125
+
) -> StorageResult<(Vec<HourTruncatedCursor>, HashMap<Nsid, Vec<CountsValue>>)>;
126
+
127
+
async fn get_collection_counts(
128
+
&self,
129
+
collection: &Nsid,
130
+
since: HourTruncatedCursor,
131
+
until: Option<HourTruncatedCursor>,
132
+
) -> StorageResult<JustCount>;
133
134
async fn get_records_by_collections(
135
&self,
136
+
collections: HashSet<Nsid>,
137
limit: usize,
138
expand_each_collection: bool,
139
) -> StorageResult<Vec<UFOsRecord>>;
+1367
-364
ufos/src/storage_fjall.rs
+1367
-364
ufos/src/storage_fjall.rs
···
1
-
use crate::db_types::{db_complete, DbBytes, DbStaticStr, StaticStr};
2
use crate::error::StorageError;
3
-
use crate::storage::{StorageResult, StorageWhatever, StoreReader, StoreWriter};
4
use crate::store_types::{
5
-
AllTimeRollupKey, CountsValue, DeleteAccountQueueKey, DeleteAccountQueueVal,
6
-
HourTruncatedCursor, HourlyRollupKey, JetstreamCursorKey, JetstreamCursorValue,
7
-
JetstreamEndpointKey, JetstreamEndpointValue, LiveCountsKey, NewRollupCursorKey,
8
-
NewRollupCursorValue, NsidRecordFeedKey, NsidRecordFeedVal, RecordLocationKey,
9
-
RecordLocationMeta, RecordLocationVal, RecordRawValue, TakeoffKey, TakeoffValue,
10
-
WeekTruncatedCursor, WeeklyRollupKey,
11
};
12
-
use crate::{CommitAction, ConsumerInfo, Did, EventBatch, Nsid, TopCollections, UFOsRecord};
13
use async_trait::async_trait;
14
-
use fjall::{Batch as FjallBatch, Config, Keyspace, PartitionCreateOptions, PartitionHandle};
15
use jetstream::events::Cursor;
16
-
use std::collections::HashMap;
17
use std::path::Path;
18
-
use std::time::SystemTime;
19
20
-
const MAX_BATCHED_CLEANUP_SIZE: usize = 1024; // try to commit progress for longer feeds
21
const MAX_BATCHED_ACCOUNT_DELETE_RECORDS: usize = 1024;
22
const MAX_BATCHED_ROLLUP_COUNTS: usize = 256;
23
···
38
/// - key: "takeoff" (literal)
39
/// - val: u64 (micros timestamp, not from jetstream for now so not precise)
40
///
41
/// - Rollup cursor (bg work: roll stats into hourlies, delete accounts, old record deletes)
42
/// - key: "rollup_cursor" (literal)
43
/// - val: u64 (tracks behind js_cursor)
44
///
45
///
46
/// Partition: 'feed'
47
///
···
63
/// - key: "live_counts" || u64 || nullstr (js_cursor, nsid)
64
/// - val: u64 || HLL (count (not cursor), estimator)
65
///
66
/// - Hourly total record counts and dids estimate per collection
67
/// - key: "hourly_counts" || u64 || nullstr (hour, nsid)
68
/// - val: u64 || HLL (count (not cursor), estimator)
69
///
70
/// - Weekly total record counts and dids estimate per collection
71
-
/// - key: "weekly_counts" || u64 || nullstr (hour, nsid)
72
/// - val: u64 || HLL (count (not cursor), estimator)
73
///
74
/// - All-time total record counts and dids estimate per collection
75
/// - key: "ever_counts" || nullstr (nsid)
76
/// - val: u64 || HLL (count (not cursor), estimator)
77
///
78
-
/// - TODO: sorted indexes for all-times?
79
///
80
///
81
/// Partition: 'queues'
···
99
pub temp: bool,
100
}
101
102
-
impl StorageWhatever<FjallReader, FjallWriter, FjallConfig> for FjallStorage {
103
fn init(
104
path: impl AsRef<Path>,
105
endpoint: String,
106
force_endpoint: bool,
107
_config: FjallConfig,
108
-
) -> StorageResult<(FjallReader, FjallWriter, Option<Cursor>)> {
109
let keyspace = {
110
let config = Config::new(path);
111
112
-
#[cfg(not(test))]
113
-
let config = config.fsync_ms(Some(4_000));
114
115
config.open()?
116
};
···
123
124
let js_cursor = get_static_neu::<JetstreamCursorKey, JetstreamCursorValue>(&global)?;
125
126
-
if js_cursor.is_some() {
127
let stored_endpoint =
128
get_static_neu::<JetstreamEndpointKey, JetstreamEndpointValue>(&global)?;
129
-
130
let JetstreamEndpointValue(stored) = stored_endpoint.ok_or(StorageError::InitError(
131
"found cursor but missing js_endpoint, refusing to start.".to_string(),
132
))?;
133
134
if stored != endpoint {
135
if force_endpoint {
···
140
)?;
141
} else {
142
return Err(StorageError::InitError(format!(
143
-
"stored js_endpoint {stored:?} differs from provided {endpoint:?}, refusing to start.")));
144
}
145
}
146
} else {
147
-
insert_static_neu::<JetstreamEndpointKey>(
148
&global,
149
JetstreamEndpointValue(endpoint.to_string()),
150
)?;
151
-
insert_static_neu::<TakeoffKey>(&global, Cursor::at(SystemTime::now()))?;
152
-
insert_static_neu::<NewRollupCursorKey>(&global, Cursor::from_start())?;
153
-
}
154
155
let reader = FjallReader {
156
keyspace: keyspace.clone(),
···
160
rollups: rollups.clone(),
161
};
162
let writer = FjallWriter {
163
keyspace,
164
global,
165
feeds,
···
167
rollups,
168
queues,
169
};
170
-
Ok((reader, writer, js_cursor))
171
}
172
}
173
···
264
}
265
}
266
267
impl FjallReader {
268
fn get_storage_stats(&self) -> StorageResult<serde_json::Value> {
269
let rollup_cursor =
···
298
get_snapshot_static_neu::<JetstreamCursorKey, JetstreamCursorValue>(&global)?
299
.map(|c| c.to_raw_u64());
300
301
Ok(ConsumerInfo::Jetstream {
302
endpoint,
303
started_at,
304
latest_cursor,
305
})
306
}
307
308
-
fn get_top_collections(&self) -> Result<TopCollections, StorageError> {
309
-
// TODO: limit nsid traversal depth
310
-
// TODO: limit nsid traversal breadth
311
-
// TODO: be serious about anything
312
313
-
// TODO: probably use a stack of segments to reduce to ~log-n merges
314
315
-
#[derive(Default)]
316
-
struct Blah {
317
-
counts: CountsValue,
318
-
children: HashMap<String, Blah>,
319
}
320
-
impl From<&Blah> for TopCollections {
321
-
fn from(bla: &Blah) -> Self {
322
-
Self {
323
-
total_records: bla.counts.records(),
324
-
dids_estimate: bla.counts.dids().estimate() as u64,
325
-
nsid_child_segments: HashMap::from_iter(
326
-
bla.children.iter().map(|(k, v)| (k.to_string(), v.into())),
327
-
),
328
}
329
}
330
}
331
332
-
let mut b = Blah::default();
333
-
let prefix = AllTimeRollupKey::from_prefix_to_db_bytes(&Default::default())?;
334
-
for kv in self.rollups.prefix(&prefix.to_db_bytes()?) {
335
-
let (key_bytes, val_bytes) = kv?;
336
-
let key = db_complete::<AllTimeRollupKey>(&key_bytes)?;
337
-
let val = db_complete::<CountsValue>(&val_bytes)?;
338
339
-
let mut node = &mut b;
340
-
node.counts.merge(&val);
341
-
for segment in key.collection().split('.') {
342
-
node = node.children.entry(segment.to_string()).or_default();
343
-
node.counts.merge(&val);
344
}
345
}
346
347
-
Ok((&b).into())
348
}
349
350
-
fn get_counts_by_collection(&self, collection: &Nsid) -> StorageResult<(u64, u64)> {
351
-
// 0. grab a snapshot in case rollups happen while we're working
352
-
let instant = self.keyspace.instant();
353
-
let global = self.global.snapshot_at(instant);
354
-
let rollups = self.rollups.snapshot_at(instant);
355
356
-
// 1. all-time counts
357
-
let all_time_key = AllTimeRollupKey::new(collection).to_db_bytes()?;
358
-
let mut total_counts = rollups
359
-
.get(&all_time_key)?
360
-
.as_deref()
361
-
.map(db_complete::<CountsValue>)
362
-
.transpose()?
363
-
.unwrap_or_default();
364
365
-
// 2. live counts that haven't been rolled into all-time yet.
366
-
let rollup_cursor =
367
-
get_snapshot_static_neu::<NewRollupCursorKey, NewRollupCursorValue>(&global)?.ok_or(
368
-
StorageError::BadStateError("Could not find current rollup cursor".to_string()),
369
-
)?;
370
371
-
let full_range = LiveCountsKey::range_from_cursor(rollup_cursor)?;
372
-
for kv in rollups.range(full_range) {
373
-
let (key_bytes, val_bytes) = kv?;
374
-
let key = db_complete::<LiveCountsKey>(&key_bytes)?;
375
-
if key.collection() == collection {
376
-
let counts = db_complete::<CountsValue>(&val_bytes)?;
377
-
total_counts.merge(&counts);
378
}
379
}
380
-
Ok((
381
-
total_counts.records(),
382
-
total_counts.dids().estimate() as u64,
383
-
))
384
}
385
386
fn get_records_by_collections(
387
&self,
388
-
collections: &[Nsid],
389
limit: usize,
390
expand_each_collection: bool,
391
) -> StorageResult<Vec<UFOsRecord>> {
···
394
}
395
let mut record_iterators = Vec::new();
396
for collection in collections {
397
-
let iter = RecordIterator::new(&self.feeds, self.records.clone(), collection, limit)?;
398
record_iterators.push(iter.peekable());
399
}
400
let mut merged = Vec::new();
···
446
let s = self.clone();
447
tokio::task::spawn_blocking(move || FjallReader::get_consumer_info(&s)).await?
448
}
449
-
async fn get_top_collections(&self) -> Result<TopCollections, StorageError> {
450
let s = self.clone();
451
-
tokio::task::spawn_blocking(move || FjallReader::get_top_collections(&s)).await?
452
}
453
-
async fn get_counts_by_collection(&self, collection: &Nsid) -> StorageResult<(u64, u64)> {
454
let s = self.clone();
455
let collection = collection.clone();
456
-
tokio::task::spawn_blocking(move || FjallReader::get_counts_by_collection(&s, &collection))
457
-
.await?
458
}
459
async fn get_records_by_collections(
460
&self,
461
-
collections: &[Nsid],
462
limit: usize,
463
expand_each_collection: bool,
464
) -> StorageResult<Vec<UFOsRecord>> {
465
let s = self.clone();
466
-
let collections = collections.to_vec();
467
tokio::task::spawn_blocking(move || {
468
-
FjallReader::get_records_by_collections(&s, &collections, limit, expand_each_collection)
469
})
470
.await?
471
}
472
}
473
474
pub struct FjallWriter {
475
keyspace: Keyspace,
476
global: PartitionHandle,
477
feeds: PartitionHandle,
···
501
timelies: impl Iterator<Item = Result<(fjall::Slice, fjall::Slice), fjall::Error>>,
502
cursor_exclusive_limit: Option<Cursor>,
503
rollup_limit: usize,
504
-
) -> StorageResult<usize> {
505
// current strategy is to buffer counts in mem before writing the rollups
506
// we *could* read+write every single batch to rollup.. but their merge is associative so
507
// ...so save the db some work up front? is this worth it? who knows...
508
509
#[derive(Eq, Hash, PartialEq)]
510
enum Rollup {
···
533
break;
534
}
535
536
batch.remove(&self.rollups, key_bytes);
537
let val = db_complete::<CountsValue>(&val_bytes)?;
538
counts_by_rollup
···
558
last_cursor = key.cursor();
559
}
560
561
for ((nsid, rollup), counts) in counts_by_rollup {
562
-
let key_bytes = match rollup {
563
Rollup::Hourly(hourly_cursor) => {
564
-
let k = HourlyRollupKey::new(hourly_cursor, &nsid);
565
-
k.to_db_bytes()?
566
}
567
Rollup::Weekly(weekly_cursor) => {
568
-
let k = WeeklyRollupKey::new(weekly_cursor, &nsid);
569
-
k.to_db_bytes()?
570
}
571
-
Rollup::AllTime => {
572
-
let k = AllTimeRollupKey::new(&nsid);
573
-
k.to_db_bytes()?
574
-
}
575
};
576
let mut rolled: CountsValue = self
577
.rollups
578
-
.get(&key_bytes)?
579
.as_deref()
580
.map(db_complete::<CountsValue>)
581
.transpose()?
582
.unwrap_or_default();
583
584
-
// try to round-trip before inserting, for funsies
585
-
let tripppin = counts.to_db_bytes()?;
586
-
let (and_back, n) = CountsValue::from_db_bytes(&tripppin)?;
587
-
assert_eq!(n, tripppin.len());
588
-
assert_eq!(counts.prefix, and_back.prefix);
589
-
assert_eq!(counts.dids().estimate(), and_back.dids().estimate());
590
-
if counts.records() > 200_000_000_000 {
591
-
panic!("COUNTS maybe wtf? {counts:?}")
592
}
593
594
-
rolled.merge(&counts);
595
-
batch.insert(&self.rollups, &key_bytes, &rolled.to_db_bytes()?);
596
}
597
598
insert_batch_static_neu::<NewRollupCursorKey>(&mut batch, &self.global, last_cursor)?;
599
600
batch.commit()?;
601
-
Ok(cursors_advanced)
602
}
603
}
604
605
-
impl StoreWriter for FjallWriter {
606
fn insert_batch<const LIMIT: usize>(
607
&mut self,
608
event_batch: EventBatch<LIMIT>,
···
645
}
646
}
647
let live_counts_key: LiveCountsKey = (latest, &nsid).into();
648
-
let counts_value = CountsValue::new(commits.total_seen as u64, commits.dids_estimate);
649
batch.insert(
650
&self.rollups,
651
&live_counts_key.to_db_bytes()?,
···
673
Ok(())
674
}
675
676
-
fn step_rollup(&mut self) -> StorageResult<usize> {
677
let rollup_cursor =
678
get_static_neu::<NewRollupCursorKey, NewRollupCursorValue>(&self.global)?.ok_or(
679
StorageError::BadStateError("Could not find current rollup cursor".to_string()),
···
683
let live_counts_range = LiveCountsKey::range_from_cursor(rollup_cursor)?;
684
let mut timely_iter = self.rollups.range(live_counts_range).peekable();
685
686
-
let timely_next_cursor = timely_iter
687
.peek_mut()
688
-
.map(|kv| -> StorageResult<Cursor> {
689
match kv {
690
Err(e) => Err(std::mem::replace(e, fjall::Error::Poisoned))?,
691
Ok((key_bytes, _)) => {
692
let key = db_complete::<LiveCountsKey>(key_bytes)?;
693
-
Ok(key.cursor())
694
}
695
}
696
})
···
711
})
712
.transpose()?;
713
714
-
let cursors_stepped = match (timely_next_cursor, next_delete) {
715
-
(
716
-
Some(timely_next_cursor),
717
-
Some((delete_cursor, delete_key_bytes, delete_val_bytes)),
718
-
) => {
719
-
if timely_next_cursor < delete_cursor {
720
-
self.rollup_live_counts(
721
timely_iter,
722
Some(delete_cursor),
723
MAX_BATCHED_ROLLUP_COUNTS,
724
-
)?
725
} else {
726
self.rollup_delete_account(delete_cursor, &delete_key_bytes, &delete_val_bytes)?
727
}
728
}
729
(Some(_), None) => {
730
-
self.rollup_live_counts(timely_iter, None, MAX_BATCHED_ROLLUP_COUNTS)?
731
}
732
(None, Some((delete_cursor, delete_key_bytes, delete_val_bytes))) => {
733
self.rollup_delete_account(delete_cursor, &delete_key_bytes, &delete_val_bytes)?
···
735
(None, None) => 0,
736
};
737
738
-
Ok(cursors_stepped)
739
}
740
741
fn trim_collection(
742
&mut self,
743
collection: &Nsid,
744
limit: usize,
745
-
// TODO: could add a start cursor limit to avoid iterating deleted stuff at the start (/end)
746
-
) -> StorageResult<()> {
747
let mut dangling_feed_keys_cleaned = 0;
748
let mut records_deleted = 0;
749
750
-
let mut batch = self.keyspace.batch();
751
752
-
let prefix = NsidRecordFeedKey::from_prefix_to_db_bytes(collection)?;
753
-
let mut found = 0;
754
-
for kv in self.feeds.prefix(prefix).rev() {
755
let (key_bytes, val_bytes) = kv?;
756
let feed_key = db_complete::<NsidRecordFeedKey>(&key_bytes)?;
757
let feed_val = db_complete::<NsidRecordFeedVal>(&val_bytes)?;
···
760
761
let Some(location_val_bytes) = self.records.get(&location_key_bytes)? else {
762
// record was deleted (hopefully)
763
-
batch.remove(&self.feeds, &location_key_bytes);
764
dangling_feed_keys_cleaned += 1;
765
continue;
766
};
767
768
let (meta, _) = RecordLocationMeta::from_db_bytes(&location_val_bytes)?;
769
770
if meta.cursor() != feed_key.cursor() {
771
// older/different version
772
-
batch.remove(&self.feeds, &location_key_bytes);
773
dangling_feed_keys_cleaned += 1;
774
continue;
775
}
776
if meta.rev != feed_val.rev() {
777
// weird...
778
log::warn!("record lookup: cursor match but rev did not...? removing.");
779
-
batch.remove(&self.feeds, &location_key_bytes);
780
dangling_feed_keys_cleaned += 1;
781
continue;
782
}
783
784
-
if batch.len() >= MAX_BATCHED_CLEANUP_SIZE {
785
-
batch.commit()?;
786
-
batch = self.keyspace.batch();
787
-
}
788
-
789
-
found += 1;
790
-
if found <= limit {
791
continue;
792
}
793
794
-
batch.remove(&self.feeds, &location_key_bytes);
795
-
batch.remove(&self.records, &location_key_bytes);
796
records_deleted += 1;
797
}
798
799
-
batch.commit()?;
800
801
-
log::info!("trim_collection ({collection:?}) removed {dangling_feed_keys_cleaned} dangling feed entries and {records_deleted} records");
802
-
Ok(())
803
}
804
805
fn delete_account(&mut self, did: &Did) -> Result<usize, StorageError> {
···
817
}
818
batch.commit()?;
819
Ok(records_deleted)
820
}
821
}
822
···
853
Ok(())
854
}
855
856
/// Set a value to a fixed key
857
fn insert_batch_static_neu<K: StaticStr>(
858
batch: &mut FjallBatch,
···
875
876
////////// temp stuff to remove:
877
878
-
// fn summarize_batch<const LIMIT: usize>(batch: &EventBatch<LIMIT>) -> String {
879
-
// format!(
880
-
// "batch of {: >3} samples from {: >4} records in {: >2} collections from ~{: >4} DIDs, {} acct removes, cursor {: <12?}",
881
-
// batch.total_records(),
882
-
// batch.total_seen(),
883
-
// batch.total_collections(),
884
-
// batch.estimate_dids(),
885
-
// batch.account_removes(),
886
-
// batch.latest_cursor().map(|c| c.elapsed()),
887
-
// )
888
-
// }
889
-
890
#[cfg(test)]
891
mod tests {
892
use super::*;
···
896
use serde_json::value::RawValue;
897
898
fn fjall_db() -> (FjallReader, FjallWriter) {
899
-
let (read, write, _) = FjallStorage::init(
900
tempfile::tempdir().unwrap(),
901
"offline test (no real jetstream endpoint)".to_string(),
902
false,
···
907
}
908
909
const TEST_BATCH_LIMIT: usize = 16;
910
911
#[derive(Debug, Default)]
912
struct TestBatch {
···
951
.commits_by_nsid
952
.entry(collection.clone())
953
.or_default()
954
-
.truncating_insert(commit)
955
.unwrap();
956
957
collection
···
993
.commits_by_nsid
994
.entry(collection.clone())
995
.or_default()
996
-
.truncating_insert(commit)
997
.unwrap();
998
999
collection
···
1025
.commits_by_nsid
1026
.entry(collection.clone())
1027
.or_default()
1028
-
.truncating_insert(commit)
1029
.unwrap();
1030
1031
collection
···
1044
fn test_hello() -> anyhow::Result<()> {
1045
let (read, mut write) = fjall_db();
1046
write.insert_batch::<TEST_BATCH_LIMIT>(EventBatch::default())?;
1047
-
let (records, dids) =
1048
-
read.get_counts_by_collection(&Nsid::new("a.b.c".to_string()).unwrap())?;
1049
-
assert_eq!(records, 0);
1050
-
assert_eq!(dids, 0);
1051
Ok(())
1052
}
1053
···
1066
100,
1067
);
1068
write.insert_batch(batch.batch)?;
1069
1070
-
let (records, dids) = read.get_counts_by_collection(&collection)?;
1071
-
assert_eq!(records, 1);
1072
-
assert_eq!(dids, 1);
1073
-
let (records, dids) =
1074
-
read.get_counts_by_collection(&Nsid::new("d.e.f".to_string()).unwrap())?;
1075
-
assert_eq!(records, 0);
1076
-
assert_eq!(dids, 0);
1077
1078
-
let records = read.get_records_by_collections(&[collection], 2, false)?;
1079
assert_eq!(records.len(), 1);
1080
let rec = &records[0];
1081
assert_eq!(rec.record.get(), "{}");
1082
assert!(!rec.is_update);
1083
1084
-
let records =
1085
-
read.get_records_by_collections(&[Nsid::new("d.e.f".to_string()).unwrap()], 2, false)?;
1086
assert_eq!(records.len(), 0);
1087
1088
Ok(())
···
1123
write.insert_batch(batch.batch)?;
1124
1125
let records = read.get_records_by_collections(
1126
-
&[
1127
Nsid::new("a.a.a".to_string()).unwrap(),
1128
Nsid::new("a.a.b".to_string()).unwrap(),
1129
Nsid::new("a.a.c".to_string()).unwrap(),
1130
-
],
1131
100,
1132
false,
1133
)?;
···
1183
write.insert_batch(batch.batch)?;
1184
1185
let records = read.get_records_by_collections(
1186
-
&[
1187
Nsid::new("a.a.a".to_string()).unwrap(),
1188
Nsid::new("a.a.b".to_string()).unwrap(),
1189
Nsid::new("a.a.c".to_string()).unwrap(),
1190
-
],
1191
2,
1192
true,
1193
)?;
···
1234
101,
1235
);
1236
write.insert_batch(batch.batch)?;
1237
1238
-
let (records, dids) = read.get_counts_by_collection(&collection)?;
1239
-
assert_eq!(records, 1);
1240
-
assert_eq!(dids, 1);
1241
1242
-
let records = read.get_records_by_collections(&[collection], 2, false)?;
1243
assert_eq!(records.len(), 1);
1244
let rec = &records[0];
1245
assert_eq!(rec.record.get(), r#"{"ch": "ch-ch-ch-changes"}"#);
···
1272
101,
1273
);
1274
write.insert_batch(batch.batch)?;
1275
1276
-
let (records, dids) = read.get_counts_by_collection(&collection)?;
1277
-
assert_eq!(records, 1);
1278
-
assert_eq!(dids, 1);
1279
1280
-
let records = read.get_records_by_collections(&[collection], 2, false)?;
1281
assert_eq!(records.len(), 0);
1282
1283
Ok(())
···
1323
write.insert_batch(batch.batch)?;
1324
1325
let records = read.get_records_by_collections(
1326
-
&[Nsid::new("a.a.a".to_string()).unwrap()],
1327
100,
1328
false,
1329
)?;
1330
assert_eq!(records.len(), 1);
1331
let records = read.get_records_by_collections(
1332
-
&[Nsid::new("a.a.b".to_string()).unwrap()],
1333
100,
1334
false,
1335
)?;
1336
assert_eq!(records.len(), 10);
1337
let records = read.get_records_by_collections(
1338
-
&[Nsid::new("a.a.c".to_string()).unwrap()],
1339
100,
1340
false,
1341
)?;
1342
assert_eq!(records.len(), 1);
1343
let records = read.get_records_by_collections(
1344
-
&[Nsid::new("a.a.d".to_string()).unwrap()],
1345
100,
1346
false,
1347
)?;
1348
assert_eq!(records.len(), 0);
1349
1350
-
write.trim_collection(&Nsid::new("a.a.a".to_string()).unwrap(), 6)?;
1351
-
write.trim_collection(&Nsid::new("a.a.b".to_string()).unwrap(), 6)?;
1352
-
write.trim_collection(&Nsid::new("a.a.c".to_string()).unwrap(), 6)?;
1353
-
write.trim_collection(&Nsid::new("a.a.d".to_string()).unwrap(), 6)?;
1354
1355
let records = read.get_records_by_collections(
1356
-
&[Nsid::new("a.a.a".to_string()).unwrap()],
1357
100,
1358
false,
1359
)?;
1360
assert_eq!(records.len(), 1);
1361
let records = read.get_records_by_collections(
1362
-
&[Nsid::new("a.a.b".to_string()).unwrap()],
1363
100,
1364
false,
1365
)?;
1366
assert_eq!(records.len(), 6);
1367
let records = read.get_records_by_collections(
1368
-
&[Nsid::new("a.a.c".to_string()).unwrap()],
1369
100,
1370
false,
1371
)?;
1372
assert_eq!(records.len(), 1);
1373
let records = read.get_records_by_collections(
1374
-
&[Nsid::new("a.a.d".to_string()).unwrap()],
1375
100,
1376
false,
1377
)?;
···
1408
write.insert_batch(batch.batch)?;
1409
1410
let records = read.get_records_by_collections(
1411
-
&[Nsid::new("a.a.a".to_string()).unwrap()],
1412
100,
1413
false,
1414
)?;
···
1419
assert_eq!(records_deleted, 2);
1420
1421
let records = read.get_records_by_collections(
1422
-
&[Nsid::new("a.a.a".to_string()).unwrap()],
1423
100,
1424
false,
1425
)?;
···
1450
1451
write.step_rollup()?;
1452
1453
-
let records =
1454
-
read.get_records_by_collections(&[Nsid::new("a.a.a".to_string()).unwrap()], 1, false)?;
1455
assert_eq!(records.len(), 0);
1456
1457
Ok(())
···
1473
);
1474
write.insert_batch(batch.batch)?;
1475
1476
-
let n = write.step_rollup()?;
1477
assert_eq!(n, 1);
1478
1479
let mut batch = TestBatch::default();
1480
batch.delete_account("did:plc:person-a", 10_001);
1481
write.insert_batch(batch.batch)?;
1482
1483
-
let records =
1484
-
read.get_records_by_collections(&[Nsid::new("a.a.a".to_string()).unwrap()], 1, false)?;
1485
assert_eq!(records.len(), 1);
1486
1487
-
let n = write.step_rollup()?;
1488
assert_eq!(n, 1);
1489
1490
-
let records =
1491
-
read.get_records_by_collections(&[Nsid::new("a.a.a".to_string()).unwrap()], 1, false)?;
1492
assert_eq!(records.len(), 0);
1493
1494
let mut batch = TestBatch::default();
1495
batch.delete_account("did:plc:person-a", 9_999);
1496
write.insert_batch(batch.batch)?;
1497
1498
-
let n = write.step_rollup()?;
1499
assert_eq!(n, 0);
1500
1501
Ok(())
···
1529
);
1530
write.insert_batch(batch.batch)?;
1531
1532
-
let n = write.step_rollup()?;
1533
assert_eq!(n, 2);
1534
1535
-
let n = write.step_rollup()?;
1536
assert_eq!(n, 0);
1537
1538
Ok(())
···
1580
write.insert_batch(batch.batch)?;
1581
1582
// before any rollup
1583
-
let (records, dids) =
1584
-
read.get_counts_by_collection(&Nsid::new("a.a.a".to_string()).unwrap())?;
1585
-
assert_eq!(records, 3);
1586
-
assert_eq!(dids, 2);
1587
1588
// first batch rolled up
1589
-
let n = write.step_rollup()?;
1590
assert_eq!(n, 1);
1591
1592
-
let (records, dids) =
1593
-
read.get_counts_by_collection(&Nsid::new("a.a.a".to_string()).unwrap())?;
1594
-
assert_eq!(records, 3);
1595
-
assert_eq!(dids, 2);
1596
1597
// delete account rolled up
1598
-
let n = write.step_rollup()?;
1599
assert_eq!(n, 1);
1600
1601
-
let (records, dids) =
1602
-
read.get_counts_by_collection(&Nsid::new("a.a.a".to_string()).unwrap())?;
1603
-
assert_eq!(records, 3);
1604
-
assert_eq!(dids, 2);
1605
1606
// second batch rolled up
1607
-
let n = write.step_rollup()?;
1608
assert_eq!(n, 1);
1609
1610
-
let (records, dids) =
1611
-
read.get_counts_by_collection(&Nsid::new("a.a.a".to_string()).unwrap())?;
1612
-
assert_eq!(records, 3);
1613
-
assert_eq!(dids, 2);
1614
1615
// no more rollups left
1616
-
let n = write.step_rollup()?;
1617
assert_eq!(n, 0);
1618
1619
Ok(())
1620
}
1621
1622
#[test]
1623
-
fn get_top_collections() -> anyhow::Result<()> {
1624
let (read, mut write) = fjall_db();
1625
1626
let mut batch = TestBatch::default();
···
1633
None,
1634
10_000,
1635
);
1636
batch.create(
1637
-
"did:plc:person-b",
1638
-
"a.a.b",
1639
-
"rkey-bbb",
1640
"{}",
1641
-
Some("rev-bbb"),
1642
None,
1643
-
10_001,
1644
);
1645
batch.create(
1646
-
"did:plc:person-c",
1647
-
"a.b.c",
1648
-
"rkey-ccc",
1649
"{}",
1650
-
Some("rev-ccc"),
1651
None,
1652
-
10_002,
1653
);
1654
batch.create(
1655
"did:plc:person-a",
1656
-
"a.a.a",
1657
-
"rkey-aaa-2",
1658
"{}",
1659
-
Some("rev-aaa-2"),
1660
None,
1661
-
10_003,
1662
);
1663
write.insert_batch(batch.batch)?;
1664
1665
-
let n = write.step_rollup()?;
1666
-
assert_eq!(n, 3); // 3 collections
1667
-
1668
-
let tops = read.get_top_collections()?;
1669
assert_eq!(
1670
-
tops,
1671
-
TopCollections {
1672
-
total_records: 4,
1673
-
dids_estimate: 3,
1674
-
nsid_child_segments: HashMap::from([(
1675
-
"a".to_string(),
1676
-
TopCollections {
1677
-
total_records: 4,
1678
-
dids_estimate: 3,
1679
-
nsid_child_segments: HashMap::from([
1680
-
(
1681
-
"a".to_string(),
1682
-
TopCollections {
1683
-
total_records: 3,
1684
-
dids_estimate: 2,
1685
-
nsid_child_segments: HashMap::from([
1686
-
(
1687
-
"a".to_string(),
1688
-
TopCollections {
1689
-
total_records: 2,
1690
-
dids_estimate: 1,
1691
-
nsid_child_segments: HashMap::from([]),
1692
-
},
1693
-
),
1694
-
(
1695
-
"b".to_string(),
1696
-
TopCollections {
1697
-
total_records: 1,
1698
-
dids_estimate: 1,
1699
-
nsid_child_segments: HashMap::from([]),
1700
-
}
1701
-
),
1702
-
]),
1703
-
},
1704
-
),
1705
-
(
1706
-
"b".to_string(),
1707
-
TopCollections {
1708
-
total_records: 1,
1709
-
dids_estimate: 1,
1710
-
nsid_child_segments: HashMap::from([(
1711
-
"c".to_string(),
1712
-
TopCollections {
1713
-
total_records: 1,
1714
-
dids_estimate: 1,
1715
-
nsid_child_segments: HashMap::from([]),
1716
-
},
1717
-
),]),
1718
-
},
1719
-
),
1720
-
]),
1721
-
},
1722
-
),]),
1723
-
}
1724
);
1725
Ok(())
1726
}
1727
1728
#[test]
1729
-
fn get_top_collections_with_parent_nsid() -> anyhow::Result<()> {
1730
let (read, mut write) = fjall_db();
1731
1732
let mut batch = TestBatch::default();
1733
batch.create(
1734
-
"did:plc:inze6wrmsm7pjl7yta3oig77",
1735
"a.a.a.a",
1736
-
"aaaa",
1737
-
r#""child nsid""#,
1738
Some("rev-aaaa"),
1739
None,
1740
-
100,
1741
);
1742
batch.create(
1743
-
"did:plc:inze6wrmsm7pjl7yta3oig77",
1744
-
"a.a.a",
1745
-
"aaa",
1746
-
r#""parent nsid""#,
1747
-
Some("rev-aaa"),
1748
None,
1749
-
101,
1750
);
1751
write.insert_batch(batch.batch)?;
1752
-
1753
-
let n = write.step_rollup()?;
1754
-
assert_eq!(n, 2); // 3 collections
1755
1756
-
let tops = read.get_top_collections()?;
1757
assert_eq!(
1758
-
tops,
1759
-
TopCollections {
1760
-
total_records: 2,
1761
-
dids_estimate: 1,
1762
-
nsid_child_segments: HashMap::from([(
1763
-
"a".to_string(),
1764
-
TopCollections {
1765
-
total_records: 2,
1766
-
dids_estimate: 1,
1767
-
nsid_child_segments: HashMap::from([(
1768
-
"a".to_string(),
1769
-
TopCollections {
1770
-
total_records: 2,
1771
-
dids_estimate: 1,
1772
-
nsid_child_segments: HashMap::from([(
1773
-
"a".to_string(),
1774
-
TopCollections {
1775
-
total_records: 2,
1776
-
dids_estimate: 1,
1777
-
nsid_child_segments: HashMap::from([(
1778
-
"a".to_string(),
1779
-
TopCollections {
1780
-
total_records: 1,
1781
-
dids_estimate: 1,
1782
-
nsid_child_segments: HashMap::from([]),
1783
-
},
1784
-
),]),
1785
-
},
1786
-
),]),
1787
-
},
1788
-
),]),
1789
-
},
1790
-
),]),
1791
-
}
1792
);
1793
1794
-
// TODO: handle leaf node counts explicitly, since parent NSIDs can be leaves themselves
1795
1796
Ok(())
1797
}
1798
}
···
1
+
use crate::db_types::{
2
+
db_complete, DbBytes, DbStaticStr, EncodingResult, StaticStr, SubPrefixBytes,
3
+
};
4
use crate::error::StorageError;
5
+
use crate::storage::{StorageResult, StorageWhatever, StoreBackground, StoreReader, StoreWriter};
6
use crate::store_types::{
7
+
AllTimeDidsKey, AllTimeRecordsKey, AllTimeRollupKey, CommitCounts, CountsValue, CursorBucket,
8
+
DeleteAccountQueueKey, DeleteAccountQueueVal, HourTruncatedCursor, HourlyDidsKey,
9
+
HourlyRecordsKey, HourlyRollupKey, HourlyRollupStaticPrefix, JetstreamCursorKey,
10
+
JetstreamCursorValue, JetstreamEndpointKey, JetstreamEndpointValue, LiveCountsKey,
11
+
NewRollupCursorKey, NewRollupCursorValue, NsidRecordFeedKey, NsidRecordFeedVal,
12
+
RecordLocationKey, RecordLocationMeta, RecordLocationVal, RecordRawValue, SketchSecretKey,
13
+
SketchSecretPrefix, TakeoffKey, TakeoffValue, TrimCollectionCursorKey, WeekTruncatedCursor,
14
+
WeeklyDidsKey, WeeklyRecordsKey, WeeklyRollupKey, WithCollection, WithRank, HOUR_IN_MICROS,
15
+
WEEK_IN_MICROS,
16
};
17
+
use crate::{
18
+
nice_duration, CommitAction, ConsumerInfo, Did, EncodingError, EventBatch, JustCount, Nsid,
19
+
NsidCount, NsidPrefix, OrderCollectionsBy, PrefixChild, PrefixCount, UFOsRecord,
20
+
};
21
use async_trait::async_trait;
22
+
use fjall::{
23
+
Batch as FjallBatch, Config, Keyspace, PartitionCreateOptions, PartitionHandle, Snapshot,
24
+
};
25
use jetstream::events::Cursor;
26
+
use std::collections::{HashMap, HashSet};
27
+
use std::iter::Peekable;
28
+
use std::ops::Bound;
29
use std::path::Path;
30
+
use std::sync::{
31
+
atomic::{AtomicBool, Ordering},
32
+
Arc,
33
+
};
34
+
use std::time::{Duration, Instant, SystemTime};
35
36
const MAX_BATCHED_ACCOUNT_DELETE_RECORDS: usize = 1024;
37
const MAX_BATCHED_ROLLUP_COUNTS: usize = 256;
38
···
53
/// - key: "takeoff" (literal)
54
/// - val: u64 (micros timestamp, not from jetstream for now so not precise)
55
///
56
+
/// - Cardinality estimator secret
57
+
/// - key: "sketch_secret" (literal)
58
+
/// - val: [u8; 16]
59
+
///
60
/// - Rollup cursor (bg work: roll stats into hourlies, delete accounts, old record deletes)
61
/// - key: "rollup_cursor" (literal)
62
/// - val: u64 (tracks behind js_cursor)
63
///
64
+
/// - Feed trim cursor (bg work: delete oldest excess records)
65
+
/// - key: "trim_cursor" || nullstr (nsid)
66
+
/// - val: u64 (earliest previously-removed feed entry jetstream cursor)
67
///
68
/// Partition: 'feed'
69
///
···
85
/// - key: "live_counts" || u64 || nullstr (js_cursor, nsid)
86
/// - val: u64 || HLL (count (not cursor), estimator)
87
///
88
+
///
89
/// - Hourly total record counts and dids estimate per collection
90
/// - key: "hourly_counts" || u64 || nullstr (hour, nsid)
91
/// - val: u64 || HLL (count (not cursor), estimator)
92
///
93
+
/// - Hourly record count ranking
94
+
/// - key: "hourly_rank_records" || u64 || u64 || nullstr (hour, count, nsid)
95
+
/// - val: [empty]
96
+
///
97
+
/// - Hourly did estimate ranking
98
+
/// - key: "hourly_rank_dids" || u64 || u64 || nullstr (hour, dids estimate, nsid)
99
+
/// - val: [empty]
100
+
///
101
+
///
102
/// - Weekly total record counts and dids estimate per collection
103
+
/// - key: "weekly_counts" || u64 || nullstr (week, nsid)
104
/// - val: u64 || HLL (count (not cursor), estimator)
105
+
///
106
+
/// - Weekly record count ranking
107
+
/// - key: "weekly_rank_records" || u64 || u64 || nullstr (week, count, nsid)
108
+
/// - val: [empty]
109
+
///
110
+
/// - Weekly did estimate ranking
111
+
/// - key: "weekly_rank_dids" || u64 || u64 || nullstr (week, dids estimate, nsid)
112
+
/// - val: [empty]
113
+
///
114
///
115
/// - All-time total record counts and dids estimate per collection
116
/// - key: "ever_counts" || nullstr (nsid)
117
/// - val: u64 || HLL (count (not cursor), estimator)
118
///
119
+
/// - All-time total record record count ranking
120
+
/// - key: "ever_rank_records" || u64 || nullstr (count, nsid)
121
+
/// - val: [empty]
122
+
///
123
+
/// - All-time did estimate ranking
124
+
/// - key: "ever_rank_dids" || u64 || nullstr (dids estimate, nsid)
125
+
/// - val: [empty]
126
///
127
///
128
/// Partition: 'queues'
···
146
pub temp: bool,
147
}
148
149
+
impl StorageWhatever<FjallReader, FjallWriter, FjallBackground, FjallConfig> for FjallStorage {
150
fn init(
151
path: impl AsRef<Path>,
152
endpoint: String,
153
force_endpoint: bool,
154
_config: FjallConfig,
155
+
) -> StorageResult<(FjallReader, FjallWriter, Option<Cursor>, SketchSecretPrefix)> {
156
let keyspace = {
157
let config = Config::new(path);
158
159
+
// #[cfg(not(test))]
160
+
// let config = config.fsync_ms(Some(4_000));
161
162
config.open()?
163
};
···
170
171
let js_cursor = get_static_neu::<JetstreamCursorKey, JetstreamCursorValue>(&global)?;
172
173
+
let sketch_secret = if js_cursor.is_some() {
174
let stored_endpoint =
175
get_static_neu::<JetstreamEndpointKey, JetstreamEndpointValue>(&global)?;
176
let JetstreamEndpointValue(stored) = stored_endpoint.ok_or(StorageError::InitError(
177
"found cursor but missing js_endpoint, refusing to start.".to_string(),
178
))?;
179
+
180
+
let Some(stored_secret) =
181
+
get_static_neu::<SketchSecretKey, SketchSecretPrefix>(&global)?
182
+
else {
183
+
return Err(StorageError::InitError(
184
+
"found cursor but missing sketch_secret, refusing to start.".to_string(),
185
+
));
186
+
};
187
188
if stored != endpoint {
189
if force_endpoint {
···
194
)?;
195
} else {
196
return Err(StorageError::InitError(format!(
197
+
"stored js_endpoint {stored:?} differs from provided {endpoint:?}, refusing to start without --jetstream-force.")));
198
}
199
}
200
+
stored_secret
201
} else {
202
+
log::info!("initializing a fresh db!");
203
+
init_static_neu::<JetstreamEndpointKey>(
204
&global,
205
JetstreamEndpointValue(endpoint.to_string()),
206
)?;
207
+
208
+
log::info!("generating new secret for cardinality sketches...");
209
+
let mut sketch_secret: SketchSecretPrefix = [0u8; 16];
210
+
getrandom::fill(&mut sketch_secret).map_err(|e| {
211
+
StorageError::InitError(format!(
212
+
"failed to get a random secret for cardinality sketches: {e:?}"
213
+
))
214
+
})?;
215
+
init_static_neu::<SketchSecretKey>(&global, sketch_secret)?;
216
+
217
+
init_static_neu::<TakeoffKey>(&global, Cursor::at(SystemTime::now()))?;
218
+
init_static_neu::<NewRollupCursorKey>(&global, Cursor::from_start())?;
219
+
220
+
sketch_secret
221
+
};
222
223
let reader = FjallReader {
224
keyspace: keyspace.clone(),
···
228
rollups: rollups.clone(),
229
};
230
let writer = FjallWriter {
231
+
bg_taken: Arc::new(AtomicBool::new(false)),
232
keyspace,
233
global,
234
feeds,
···
236
rollups,
237
queues,
238
};
239
+
Ok((reader, writer, js_cursor, sketch_secret))
240
}
241
}
242
···
333
}
334
}
335
336
+
type GetCounts = Box<dyn FnOnce() -> StorageResult<CountsValue>>;
337
+
type GetByterCounts = StorageResult<(Nsid, GetCounts)>;
338
+
type NsidCounter = Box<dyn Iterator<Item = GetByterCounts>>;
339
+
fn get_lexi_iter<T: WithCollection + DbBytes + 'static>(
340
+
snapshot: &Snapshot,
341
+
start: Bound<Vec<u8>>,
342
+
end: Bound<Vec<u8>>,
343
+
) -> StorageResult<NsidCounter> {
344
+
Ok(Box::new(snapshot.range((start, end)).map(|kv| {
345
+
let (k_bytes, v_bytes) = kv?;
346
+
let key = db_complete::<T>(&k_bytes)?;
347
+
let nsid = key.collection().clone();
348
+
let get_counts: GetCounts = Box::new(move || Ok(db_complete::<CountsValue>(&v_bytes)?));
349
+
Ok((nsid, get_counts))
350
+
})))
351
+
}
352
+
type GetRollupKey = Arc<dyn Fn(&Nsid) -> EncodingResult<Vec<u8>>>;
353
+
fn get_lookup_iter<T: WithCollection + WithRank + DbBytes + 'static>(
354
+
snapshot: lsm_tree::Snapshot,
355
+
start: Bound<Vec<u8>>,
356
+
end: Bound<Vec<u8>>,
357
+
get_rollup_key: GetRollupKey,
358
+
) -> StorageResult<NsidCounter> {
359
+
Ok(Box::new(snapshot.range((start, end)).rev().map(
360
+
move |kv| {
361
+
let (k_bytes, _) = kv?;
362
+
let key = db_complete::<T>(&k_bytes)?;
363
+
let nsid = key.collection().clone();
364
+
let get_counts: GetCounts = Box::new({
365
+
let nsid = nsid.clone();
366
+
let snapshot = snapshot.clone();
367
+
let get_rollup_key = get_rollup_key.clone();
368
+
move || {
369
+
let db_count_bytes = snapshot.get(get_rollup_key(&nsid)?)?.expect(
370
+
"integrity: all-time rank rollup must have corresponding all-time count rollup",
371
+
);
372
+
Ok(db_complete::<CountsValue>(&db_count_bytes)?)
373
+
}
374
+
});
375
+
Ok((nsid, get_counts))
376
+
},
377
+
)))
378
+
}
379
+
380
+
type CollectionSerieses = HashMap<Nsid, Vec<CountsValue>>;
381
+
382
impl FjallReader {
383
fn get_storage_stats(&self) -> StorageResult<serde_json::Value> {
384
let rollup_cursor =
···
413
get_snapshot_static_neu::<JetstreamCursorKey, JetstreamCursorValue>(&global)?
414
.map(|c| c.to_raw_u64());
415
416
+
let rollup_cursor =
417
+
get_snapshot_static_neu::<NewRollupCursorKey, NewRollupCursorValue>(&global)?
418
+
.map(|c| c.to_raw_u64());
419
+
420
Ok(ConsumerInfo::Jetstream {
421
endpoint,
422
started_at,
423
latest_cursor,
424
+
rollup_cursor,
425
})
426
}
427
428
+
fn get_earliest_hour(&self, rollups: Option<&Snapshot>) -> StorageResult<HourTruncatedCursor> {
429
+
let cursor = rollups
430
+
.unwrap_or(&self.rollups.snapshot())
431
+
.prefix(HourlyRollupStaticPrefix::default().to_db_bytes()?)
432
+
.next()
433
+
.transpose()?
434
+
.map(|(key_bytes, _)| db_complete::<HourlyRollupKey>(&key_bytes))
435
+
.transpose()?
436
+
.map(|key| key.cursor())
437
+
.unwrap_or_else(|| Cursor::from_start().into());
438
+
Ok(cursor)
439
+
}
440
+
441
+
fn get_lexi_collections(
442
+
&self,
443
+
snapshot: Snapshot,
444
+
limit: usize,
445
+
cursor: Option<Vec<u8>>,
446
+
buckets: Vec<CursorBucket>,
447
+
) -> StorageResult<(Vec<NsidCount>, Option<Vec<u8>>)> {
448
+
let cursor_nsid = cursor.as_deref().map(db_complete::<Nsid>).transpose()?;
449
+
let mut iters: Vec<Peekable<NsidCounter>> = Vec::with_capacity(buckets.len());
450
+
for bucket in &buckets {
451
+
let it: NsidCounter = match bucket {
452
+
CursorBucket::Hour(t) => {
453
+
let start = cursor_nsid
454
+
.as_ref()
455
+
.map(|nsid| HourlyRollupKey::after_nsid(*t, nsid))
456
+
.unwrap_or_else(|| HourlyRollupKey::start(*t))?;
457
+
let end = HourlyRollupKey::end(*t)?;
458
+
get_lexi_iter::<HourlyRollupKey>(&snapshot, start, end)?
459
+
}
460
+
CursorBucket::Week(t) => {
461
+
let start = cursor_nsid
462
+
.as_ref()
463
+
.map(|nsid| WeeklyRollupKey::after_nsid(*t, nsid))
464
+
.unwrap_or_else(|| WeeklyRollupKey::start(*t))?;
465
+
let end = WeeklyRollupKey::end(*t)?;
466
+
get_lexi_iter::<WeeklyRollupKey>(&snapshot, start, end)?
467
+
}
468
+
CursorBucket::AllTime => {
469
+
let start = cursor_nsid
470
+
.as_ref()
471
+
.map(AllTimeRollupKey::after_nsid)
472
+
.unwrap_or_else(AllTimeRollupKey::start)?;
473
+
let end = AllTimeRollupKey::end()?;
474
+
get_lexi_iter::<AllTimeRollupKey>(&snapshot, start, end)?
475
+
}
476
+
};
477
+
iters.push(it.peekable());
478
+
}
479
480
+
let mut out = Vec::new();
481
+
let mut current_nsid = None;
482
+
for _ in 0..limit {
483
+
// double-scan the iters for each element: this could be eliminated but we're starting simple.
484
+
// first scan: find the lowest nsid
485
+
// second scan: take + merge, and advance all iters with lowest nsid
486
+
let mut lowest: Option<Nsid> = None;
487
+
for iter in &mut iters {
488
+
if let Some(bla) = iter.peek_mut() {
489
+
let (nsid, _) = match bla {
490
+
Ok(v) => v,
491
+
Err(e) => Err(std::mem::replace(e, StorageError::Stolen))?,
492
+
};
493
+
lowest = match lowest {
494
+
Some(ref current) if nsid.as_str() > current.as_str() => lowest,
495
+
_ => Some(nsid.clone()),
496
+
};
497
+
}
498
+
}
499
+
current_nsid = lowest.clone();
500
+
let Some(nsid) = lowest else { break };
501
502
+
let mut merged = CountsValue::default();
503
+
for iter in &mut iters {
504
+
// unwrap: potential fjall error was already checked & bailed over when peeking in the first loop
505
+
if let Some(Ok((_, get_counts))) = iter.next_if(|v| v.as_ref().unwrap().0 == nsid) {
506
+
let counts = get_counts()?;
507
+
merged.merge(&counts);
508
+
}
509
+
}
510
+
out.push(NsidCount {
511
+
nsid: nsid.to_string(),
512
+
creates: merged.counts().creates,
513
+
dids_estimate: merged.dids().estimate() as u64,
514
+
});
515
}
516
+
517
+
let next_cursor = current_nsid.map(|s| s.to_db_bytes()).transpose()?;
518
+
Ok((out, next_cursor))
519
+
}
520
+
521
+
fn get_ordered_collections(
522
+
&self,
523
+
snapshot: Snapshot,
524
+
limit: usize,
525
+
order: OrderCollectionsBy,
526
+
buckets: Vec<CursorBucket>,
527
+
) -> StorageResult<Vec<NsidCount>> {
528
+
let mut iters: Vec<NsidCounter> = Vec::with_capacity(buckets.len());
529
+
530
+
for bucket in buckets {
531
+
let it: NsidCounter = match (&order, bucket) {
532
+
(OrderCollectionsBy::RecordsCreated, CursorBucket::Hour(t)) => {
533
+
get_lookup_iter::<HourlyRecordsKey>(
534
+
snapshot.clone(),
535
+
HourlyRecordsKey::start(t)?,
536
+
HourlyRecordsKey::end(t)?,
537
+
Arc::new({
538
+
move |collection| HourlyRollupKey::new(t, collection).to_db_bytes()
539
+
}),
540
+
)?
541
}
542
+
(OrderCollectionsBy::DidsEstimate, CursorBucket::Hour(t)) => {
543
+
get_lookup_iter::<HourlyDidsKey>(
544
+
snapshot.clone(),
545
+
HourlyDidsKey::start(t)?,
546
+
HourlyDidsKey::end(t)?,
547
+
Arc::new({
548
+
move |collection| HourlyRollupKey::new(t, collection).to_db_bytes()
549
+
}),
550
+
)?
551
+
}
552
+
(OrderCollectionsBy::RecordsCreated, CursorBucket::Week(t)) => {
553
+
get_lookup_iter::<WeeklyRecordsKey>(
554
+
snapshot.clone(),
555
+
WeeklyRecordsKey::start(t)?,
556
+
WeeklyRecordsKey::end(t)?,
557
+
Arc::new({
558
+
move |collection| WeeklyRollupKey::new(t, collection).to_db_bytes()
559
+
}),
560
+
)?
561
+
}
562
+
(OrderCollectionsBy::DidsEstimate, CursorBucket::Week(t)) => {
563
+
get_lookup_iter::<WeeklyDidsKey>(
564
+
snapshot.clone(),
565
+
WeeklyDidsKey::start(t)?,
566
+
WeeklyDidsKey::end(t)?,
567
+
Arc::new({
568
+
move |collection| WeeklyRollupKey::new(t, collection).to_db_bytes()
569
+
}),
570
+
)?
571
+
}
572
+
(OrderCollectionsBy::RecordsCreated, CursorBucket::AllTime) => {
573
+
get_lookup_iter::<AllTimeRecordsKey>(
574
+
snapshot.clone(),
575
+
AllTimeRecordsKey::start()?,
576
+
AllTimeRecordsKey::end()?,
577
+
Arc::new(|collection| AllTimeRollupKey::new(collection).to_db_bytes()),
578
+
)?
579
+
}
580
+
(OrderCollectionsBy::DidsEstimate, CursorBucket::AllTime) => {
581
+
get_lookup_iter::<AllTimeDidsKey>(
582
+
snapshot.clone(),
583
+
AllTimeDidsKey::start()?,
584
+
AllTimeDidsKey::end()?,
585
+
Arc::new(|collection| AllTimeRollupKey::new(collection).to_db_bytes()),
586
+
)?
587
+
}
588
+
(OrderCollectionsBy::Lexi { .. }, _) => unreachable!(),
589
+
};
590
+
iters.push(it);
591
+
}
592
+
593
+
// overfetch by taking a bit more than the limit
594
+
// merge by collection
595
+
// sort by requested order, take limit, discard all remaining
596
+
//
597
+
// this isn't guaranteed to be correct, but it will hopefully be close most of the time:
598
+
// - it's possible that some NSIDs might score low during some time-buckets, and miss being merged
599
+
// - overfetching hopefully helps a bit by catching nsids near the threshold more often, but. yeah.
600
+
//
601
+
// this thing is heavy, there's probably a better way
602
+
let mut ranked: HashMap<Nsid, CountsValue> = HashMap::with_capacity(limit * 2);
603
+
for iter in iters {
604
+
for pair in iter.take((limit as f64 * 1.3).ceil() as usize) {
605
+
let (nsid, get_counts) = pair?;
606
+
let counts = get_counts()?;
607
+
ranked.entry(nsid).or_default().merge(&counts);
608
}
609
}
610
+
let mut ranked: Vec<(Nsid, CountsValue)> = ranked.into_iter().collect();
611
+
match order {
612
+
OrderCollectionsBy::RecordsCreated => ranked.sort_by_key(|(_, c)| c.counts().creates),
613
+
OrderCollectionsBy::DidsEstimate => ranked.sort_by_key(|(_, c)| c.dids().estimate()),
614
+
OrderCollectionsBy::Lexi { .. } => unreachable!(),
615
+
}
616
+
let counts = ranked
617
+
.into_iter()
618
+
.rev()
619
+
.take(limit)
620
+
.map(|(nsid, cv)| NsidCount {
621
+
nsid: nsid.to_string(),
622
+
creates: cv.counts().creates,
623
+
dids_estimate: cv.dids().estimate() as u64,
624
+
})
625
+
.collect();
626
+
Ok(counts)
627
+
}
628
629
+
fn get_collections(
630
+
&self,
631
+
limit: usize,
632
+
order: OrderCollectionsBy,
633
+
since: Option<HourTruncatedCursor>,
634
+
until: Option<HourTruncatedCursor>,
635
+
) -> StorageResult<(Vec<NsidCount>, Option<Vec<u8>>)> {
636
+
let snapshot = self.rollups.snapshot();
637
+
let buckets = if let (None, None) = (since, until) {
638
+
vec![CursorBucket::AllTime]
639
+
} else {
640
+
let mut lower = self.get_earliest_hour(Some(&snapshot))?;
641
+
if let Some(specified) = since {
642
+
if specified > lower {
643
+
lower = specified;
644
+
}
645
+
}
646
+
let upper = until.unwrap_or_else(|| Cursor::at(SystemTime::now()).into());
647
+
CursorBucket::buckets_spanning(lower, upper)
648
+
};
649
+
match order {
650
+
OrderCollectionsBy::Lexi { cursor } => {
651
+
self.get_lexi_collections(snapshot, limit, cursor, buckets)
652
+
}
653
+
_ => Ok((
654
+
self.get_ordered_collections(snapshot, limit, order, buckets)?,
655
+
None,
656
+
)),
657
+
}
658
+
}
659
660
+
fn get_lexi_prefix(
661
+
&self,
662
+
snapshot: Snapshot,
663
+
prefix: NsidPrefix,
664
+
limit: usize,
665
+
cursor: Option<Vec<u8>>,
666
+
buckets: Vec<CursorBucket>,
667
+
) -> StorageResult<(JustCount, Vec<PrefixChild>, Option<Vec<u8>>)> {
668
+
// let prefix_sub_with_null = prefix.as_str().to_string().to_db_bytes()?;
669
+
let prefix_sub = String::sub_prefix(&prefix.terminated())?; // with trailing dot to ensure full segment match
670
+
let cursor_child = cursor
671
+
.as_deref()
672
+
.map(|encoded_bytes| {
673
+
let decoded: String = db_complete(encoded_bytes)?;
674
+
// TODO: write some tests for cursors, there's probably bugs here
675
+
let as_sub_prefix_with_null = decoded.to_db_bytes()?;
676
+
Ok::<_, EncodingError>(as_sub_prefix_with_null)
677
+
})
678
+
.transpose()?;
679
+
let mut iters: Vec<NsidCounter> = Vec::with_capacity(buckets.len());
680
+
for bucket in &buckets {
681
+
let it: NsidCounter = match bucket {
682
+
CursorBucket::Hour(t) => {
683
+
let start = cursor_child
684
+
.as_ref()
685
+
.map(|child| HourlyRollupKey::after_nsid_prefix(*t, child))
686
+
.unwrap_or_else(|| HourlyRollupKey::after_nsid_prefix(*t, &prefix_sub))?;
687
+
let end = HourlyRollupKey::nsid_prefix_end(*t, &prefix_sub)?;
688
+
get_lexi_iter::<HourlyRollupKey>(&snapshot, start, end)?
689
+
}
690
+
CursorBucket::Week(t) => {
691
+
let start = cursor_child
692
+
.as_ref()
693
+
.map(|child| WeeklyRollupKey::after_nsid_prefix(*t, child))
694
+
.unwrap_or_else(|| WeeklyRollupKey::after_nsid_prefix(*t, &prefix_sub))?;
695
+
let end = WeeklyRollupKey::nsid_prefix_end(*t, &prefix_sub)?;
696
+
get_lexi_iter::<WeeklyRollupKey>(&snapshot, start, end)?
697
+
}
698
+
CursorBucket::AllTime => {
699
+
let start = cursor_child
700
+
.as_ref()
701
+
.map(|child| AllTimeRollupKey::after_nsid_prefix(child))
702
+
.unwrap_or_else(|| AllTimeRollupKey::after_nsid_prefix(&prefix_sub))?;
703
+
let end = AllTimeRollupKey::nsid_prefix_end(&prefix_sub)?;
704
+
get_lexi_iter::<AllTimeRollupKey>(&snapshot, start, end)?
705
+
}
706
+
};
707
+
iters.push(it);
708
+
}
709
+
710
+
// with apologies
711
+
let mut iters: Vec<_> = iters
712
+
.into_iter()
713
+
.map(|it| {
714
+
it.map(|bla| {
715
+
bla.map(|(nsid, v)| {
716
+
let Some(child) = Child::from_prefix(&nsid, &prefix) else {
717
+
panic!("failed from_prefix: {nsid:?} {prefix:?} (bad iter bounds?)");
718
+
};
719
+
(child, v)
720
+
})
721
+
})
722
+
.peekable()
723
+
})
724
+
.collect();
725
+
726
+
let mut items = Vec::new();
727
+
let mut prefix_count = CountsValue::default();
728
+
#[derive(Debug, Clone, PartialEq)]
729
+
enum Child {
730
+
FullNsid(String),
731
+
ChildPrefix(String),
732
+
}
733
+
impl Child {
734
+
fn from_prefix(nsid: &Nsid, prefix: &NsidPrefix) -> Option<Self> {
735
+
if prefix.is_group_of(nsid) {
736
+
return Some(Child::FullNsid(nsid.to_string()));
737
+
}
738
+
let suffix = nsid.as_str().strip_prefix(&format!("{}.", prefix.0))?;
739
+
let (segment, _) = suffix.split_once('.').unwrap();
740
+
let child_prefix = format!("{}.{segment}", prefix.0);
741
+
Some(Child::ChildPrefix(child_prefix))
742
+
}
743
+
fn is_before(&self, other: &Child) -> bool {
744
+
match (self, other) {
745
+
(Child::FullNsid(s), Child::ChildPrefix(o)) if s == o => true,
746
+
(Child::ChildPrefix(s), Child::FullNsid(o)) if s == o => false,
747
+
(Child::FullNsid(s), Child::FullNsid(o)) => s < o,
748
+
(Child::ChildPrefix(s), Child::ChildPrefix(o)) => s < o,
749
+
(Child::FullNsid(s), Child::ChildPrefix(o)) => s < o,
750
+
(Child::ChildPrefix(s), Child::FullNsid(o)) => s < o,
751
+
}
752
+
}
753
+
fn into_inner(self) -> String {
754
+
match self {
755
+
Child::FullNsid(s) => s,
756
+
Child::ChildPrefix(s) => s,
757
+
}
758
}
759
}
760
+
let mut current_child: Option<Child> = None;
761
+
for _ in 0..limit {
762
+
// double-scan the iters for each element: this could be eliminated but we're starting simple.
763
+
// first scan: find the lowest nsid
764
+
// second scan: take + merge, and advance all iters with lowest nsid
765
+
let mut lowest: Option<Child> = None;
766
+
for iter in &mut iters {
767
+
if let Some(bla) = iter.peek_mut() {
768
+
let (child, _) = match bla {
769
+
Ok(v) => v,
770
+
Err(e) => Err(std::mem::replace(e, StorageError::Stolen))?,
771
+
};
772
773
+
lowest = match lowest {
774
+
Some(ref current) if current.is_before(child) => lowest,
775
+
_ => Some(child.clone()),
776
+
};
777
+
}
778
+
}
779
+
current_child = lowest.clone();
780
+
let Some(child) = lowest else { break };
781
+
782
+
let mut merged = CountsValue::default();
783
+
for iter in &mut iters {
784
+
// unwrap: potential fjall error was already checked & bailed over when peeking in the first loop
785
+
while let Some(Ok((_, get_counts))) =
786
+
iter.next_if(|v| v.as_ref().unwrap().0 == child)
787
+
{
788
+
let counts = get_counts()?;
789
+
prefix_count.merge(&counts);
790
+
merged.merge(&counts);
791
+
}
792
+
}
793
+
items.push(match child {
794
+
Child::FullNsid(nsid) => PrefixChild::Collection(NsidCount {
795
+
nsid,
796
+
creates: merged.counts().creates,
797
+
dids_estimate: merged.dids().estimate() as u64,
798
+
}),
799
+
Child::ChildPrefix(prefix) => PrefixChild::Prefix(PrefixCount {
800
+
prefix,
801
+
creates: merged.counts().creates,
802
+
dids_estimate: merged.dids().estimate() as u64,
803
+
}),
804
+
});
805
+
}
806
+
807
+
// TODO: could serialize the prefix count (with sketch) into the cursor so that uniqs can actually count up?
808
+
// ....er the sketch is probably too big
809
+
// TODO: this is probably buggy on child-type boundaries bleh
810
+
let next_cursor = current_child
811
+
.map(|s| s.into_inner().to_db_bytes())
812
+
.transpose()?;
813
+
814
+
Ok(((&prefix_count).into(), items, next_cursor))
815
}
816
817
+
fn get_prefix(
818
+
&self,
819
+
prefix: NsidPrefix,
820
+
limit: usize,
821
+
order: OrderCollectionsBy,
822
+
since: Option<HourTruncatedCursor>,
823
+
until: Option<HourTruncatedCursor>,
824
+
) -> StorageResult<(JustCount, Vec<PrefixChild>, Option<Vec<u8>>)> {
825
+
let snapshot = self.rollups.snapshot();
826
+
let buckets = if let (None, None) = (since, until) {
827
+
vec![CursorBucket::AllTime]
828
+
} else {
829
+
let mut lower = self.get_earliest_hour(Some(&snapshot))?;
830
+
if let Some(specified) = since {
831
+
if specified > lower {
832
+
lower = specified;
833
+
}
834
+
}
835
+
let upper = until.unwrap_or_else(|| Cursor::at(SystemTime::now()).into());
836
+
CursorBucket::buckets_spanning(lower, upper)
837
+
};
838
+
match order {
839
+
OrderCollectionsBy::Lexi { cursor } => {
840
+
self.get_lexi_prefix(snapshot, prefix, limit, cursor, buckets)
841
+
}
842
+
_ => todo!(),
843
+
}
844
+
}
845
846
+
/// - step: output series time step, in seconds
847
+
fn get_timeseries(
848
+
&self,
849
+
collections: Vec<Nsid>,
850
+
since: HourTruncatedCursor,
851
+
until: Option<HourTruncatedCursor>,
852
+
step: u64,
853
+
) -> StorageResult<(Vec<HourTruncatedCursor>, CollectionSerieses)> {
854
+
if step > WEEK_IN_MICROS {
855
+
panic!("week-stepping is todo");
856
+
}
857
+
let until = until.unwrap_or_else(|| Cursor::at(SystemTime::now()).into());
858
+
let Ok(dt) = Cursor::from(until).duration_since(&Cursor::from(since)) else {
859
+
return Ok((
860
+
// empty: until < since
861
+
vec![],
862
+
collections.into_iter().map(|c| (c, vec![])).collect(),
863
+
));
864
+
};
865
+
let n_hours = (dt.as_micros() as u64) / HOUR_IN_MICROS;
866
+
let mut counts_by_hour = Vec::with_capacity(n_hours as usize);
867
+
let snapshot = self.rollups.snapshot();
868
+
for hour in (0..n_hours).map(|i| since.nth_next(i)) {
869
+
let mut counts = Vec::with_capacity(collections.len());
870
+
for nsid in &collections {
871
+
let count = snapshot
872
+
.get(&HourlyRollupKey::new(hour, nsid).to_db_bytes()?)?
873
+
.as_deref()
874
+
.map(db_complete::<CountsValue>)
875
+
.transpose()?
876
+
.unwrap_or_default();
877
+
counts.push(count);
878
+
}
879
+
counts_by_hour.push((hour, counts));
880
+
}
881
882
+
let step_hours = step / (HOUR_IN_MICROS / 1_000_000);
883
+
let mut output_hours = Vec::with_capacity(step_hours as usize);
884
+
let mut output_series: CollectionSerieses = collections
885
+
.iter()
886
+
.map(|c| (c.clone(), Vec::with_capacity(step_hours as usize)))
887
+
.collect();
888
889
+
for chunk in counts_by_hour.chunks(step_hours as usize) {
890
+
output_hours.push(chunk[0].0); // always guaranteed to have at least one element in a chunks chunk
891
+
for (i, collection) in collections.iter().enumerate() {
892
+
let mut c = CountsValue::default();
893
+
for (_, counts) in chunk {
894
+
c.merge(&counts[i]);
895
+
}
896
+
output_series
897
+
.get_mut(collection)
898
+
.expect("output series is initialized with all collections")
899
+
.push(c);
900
}
901
}
902
+
903
+
Ok((output_hours, output_series))
904
+
}
905
+
906
+
fn get_collection_counts(
907
+
&self,
908
+
collection: &Nsid,
909
+
since: HourTruncatedCursor,
910
+
until: Option<HourTruncatedCursor>,
911
+
) -> StorageResult<JustCount> {
912
+
// grab snapshots in case rollups happen while we're working
913
+
let rollups = self.rollups.snapshot();
914
+
915
+
let until = until.unwrap_or_else(|| Cursor::at(SystemTime::now()).into());
916
+
let buckets = CursorBucket::buckets_spanning(since, until);
917
+
let mut total_counts = CountsValue::default();
918
+
919
+
for bucket in buckets {
920
+
let key = match bucket {
921
+
CursorBucket::Hour(t) => HourlyRollupKey::new(t, collection).to_db_bytes()?,
922
+
CursorBucket::Week(t) => WeeklyRollupKey::new(t, collection).to_db_bytes()?,
923
+
CursorBucket::AllTime => unreachable!(), // TODO: fall back on this if the time span spans the whole dataset?
924
+
};
925
+
let count = rollups
926
+
.get(&key)?
927
+
.as_deref()
928
+
.map(db_complete::<CountsValue>)
929
+
.transpose()?
930
+
.unwrap_or_default();
931
+
total_counts.merge(&count);
932
+
}
933
+
934
+
Ok((&total_counts).into())
935
}
936
937
fn get_records_by_collections(
938
&self,
939
+
collections: HashSet<Nsid>,
940
limit: usize,
941
expand_each_collection: bool,
942
) -> StorageResult<Vec<UFOsRecord>> {
···
945
}
946
let mut record_iterators = Vec::new();
947
for collection in collections {
948
+
let iter = RecordIterator::new(&self.feeds, self.records.clone(), &collection, limit)?;
949
record_iterators.push(iter.peekable());
950
}
951
let mut merged = Vec::new();
···
997
let s = self.clone();
998
tokio::task::spawn_blocking(move || FjallReader::get_consumer_info(&s)).await?
999
}
1000
+
async fn get_collections(
1001
+
&self,
1002
+
limit: usize,
1003
+
order: OrderCollectionsBy,
1004
+
since: Option<HourTruncatedCursor>,
1005
+
until: Option<HourTruncatedCursor>,
1006
+
) -> StorageResult<(Vec<NsidCount>, Option<Vec<u8>>)> {
1007
let s = self.clone();
1008
+
tokio::task::spawn_blocking(move || {
1009
+
FjallReader::get_collections(&s, limit, order, since, until)
1010
+
})
1011
+
.await?
1012
}
1013
+
async fn get_prefix(
1014
+
&self,
1015
+
prefix: NsidPrefix,
1016
+
limit: usize,
1017
+
order: OrderCollectionsBy,
1018
+
since: Option<HourTruncatedCursor>,
1019
+
until: Option<HourTruncatedCursor>,
1020
+
) -> StorageResult<(JustCount, Vec<PrefixChild>, Option<Vec<u8>>)> {
1021
+
let s = self.clone();
1022
+
tokio::task::spawn_blocking(move || {
1023
+
FjallReader::get_prefix(&s, prefix, limit, order, since, until)
1024
+
})
1025
+
.await?
1026
+
}
1027
+
async fn get_timeseries(
1028
+
&self,
1029
+
collections: Vec<Nsid>,
1030
+
since: HourTruncatedCursor,
1031
+
until: Option<HourTruncatedCursor>,
1032
+
step: u64,
1033
+
) -> StorageResult<(Vec<HourTruncatedCursor>, CollectionSerieses)> {
1034
+
let s = self.clone();
1035
+
tokio::task::spawn_blocking(move || {
1036
+
FjallReader::get_timeseries(&s, collections, since, until, step)
1037
+
})
1038
+
.await?
1039
+
}
1040
+
async fn get_collection_counts(
1041
+
&self,
1042
+
collection: &Nsid,
1043
+
since: HourTruncatedCursor,
1044
+
until: Option<HourTruncatedCursor>,
1045
+
) -> StorageResult<JustCount> {
1046
let s = self.clone();
1047
let collection = collection.clone();
1048
+
tokio::task::spawn_blocking(move || {
1049
+
FjallReader::get_collection_counts(&s, &collection, since, until)
1050
+
})
1051
+
.await?
1052
}
1053
async fn get_records_by_collections(
1054
&self,
1055
+
collections: HashSet<Nsid>,
1056
limit: usize,
1057
expand_each_collection: bool,
1058
) -> StorageResult<Vec<UFOsRecord>> {
1059
let s = self.clone();
1060
tokio::task::spawn_blocking(move || {
1061
+
FjallReader::get_records_by_collections(&s, collections, limit, expand_each_collection)
1062
})
1063
.await?
1064
}
1065
}
1066
1067
+
#[derive(Clone)]
1068
pub struct FjallWriter {
1069
+
bg_taken: Arc<AtomicBool>,
1070
keyspace: Keyspace,
1071
global: PartitionHandle,
1072
feeds: PartitionHandle,
···
1096
timelies: impl Iterator<Item = Result<(fjall::Slice, fjall::Slice), fjall::Error>>,
1097
cursor_exclusive_limit: Option<Cursor>,
1098
rollup_limit: usize,
1099
+
) -> StorageResult<(usize, HashSet<Nsid>)> {
1100
// current strategy is to buffer counts in mem before writing the rollups
1101
// we *could* read+write every single batch to rollup.. but their merge is associative so
1102
// ...so save the db some work up front? is this worth it? who knows...
1103
+
1104
+
let mut dirty_nsids = HashSet::new();
1105
1106
#[derive(Eq, Hash, PartialEq)]
1107
enum Rollup {
···
1130
break;
1131
}
1132
1133
+
dirty_nsids.insert(key.collection().clone());
1134
+
1135
batch.remove(&self.rollups, key_bytes);
1136
let val = db_complete::<CountsValue>(&val_bytes)?;
1137
counts_by_rollup
···
1157
last_cursor = key.cursor();
1158
}
1159
1160
+
// go through each new rollup thing and merge it with whatever might already be in the db
1161
for ((nsid, rollup), counts) in counts_by_rollup {
1162
+
let rollup_key_bytes = match rollup {
1163
Rollup::Hourly(hourly_cursor) => {
1164
+
HourlyRollupKey::new(hourly_cursor, &nsid).to_db_bytes()?
1165
}
1166
Rollup::Weekly(weekly_cursor) => {
1167
+
WeeklyRollupKey::new(weekly_cursor, &nsid).to_db_bytes()?
1168
}
1169
+
Rollup::AllTime => AllTimeRollupKey::new(&nsid).to_db_bytes()?,
1170
};
1171
let mut rolled: CountsValue = self
1172
.rollups
1173
+
.get(&rollup_key_bytes)?
1174
.as_deref()
1175
.map(db_complete::<CountsValue>)
1176
.transpose()?
1177
.unwrap_or_default();
1178
1179
+
// now that we have values, we can know the exising ranks
1180
+
let before_creates_count = rolled.counts().creates;
1181
+
let before_dids_estimate = rolled.dids().estimate() as u64;
1182
+
1183
+
// update the rollup
1184
+
rolled.merge(&counts);
1185
+
1186
+
// new ranks
1187
+
let new_creates_count = rolled.counts().creates;
1188
+
let new_dids_estimate = rolled.dids().estimate() as u64;
1189
+
1190
+
// update create-ranked secondary index if rank changed
1191
+
if new_creates_count != before_creates_count {
1192
+
let (old_k, new_k) = match rollup {
1193
+
Rollup::Hourly(cursor) => (
1194
+
HourlyRecordsKey::new(cursor, before_creates_count.into(), &nsid)
1195
+
.to_db_bytes()?,
1196
+
HourlyRecordsKey::new(cursor, new_creates_count.into(), &nsid)
1197
+
.to_db_bytes()?,
1198
+
),
1199
+
Rollup::Weekly(cursor) => (
1200
+
WeeklyRecordsKey::new(cursor, before_creates_count.into(), &nsid)
1201
+
.to_db_bytes()?,
1202
+
WeeklyRecordsKey::new(cursor, new_creates_count.into(), &nsid)
1203
+
.to_db_bytes()?,
1204
+
),
1205
+
Rollup::AllTime => (
1206
+
AllTimeRecordsKey::new(before_creates_count.into(), &nsid).to_db_bytes()?,
1207
+
AllTimeRecordsKey::new(new_creates_count.into(), &nsid).to_db_bytes()?,
1208
+
),
1209
+
};
1210
+
batch.remove(&self.rollups, &old_k); // TODO: when fjall gets weak delete, this will hopefully work way better
1211
+
batch.insert(&self.rollups, &new_k, "");
1212
+
}
1213
+
1214
+
// update dids-ranked secondary index if rank changed
1215
+
if new_dids_estimate != before_dids_estimate {
1216
+
let (old_k, new_k) = match rollup {
1217
+
Rollup::Hourly(cursor) => (
1218
+
HourlyDidsKey::new(cursor, before_dids_estimate.into(), &nsid)
1219
+
.to_db_bytes()?,
1220
+
HourlyDidsKey::new(cursor, new_dids_estimate.into(), &nsid)
1221
+
.to_db_bytes()?,
1222
+
),
1223
+
Rollup::Weekly(cursor) => (
1224
+
WeeklyDidsKey::new(cursor, before_dids_estimate.into(), &nsid)
1225
+
.to_db_bytes()?,
1226
+
WeeklyDidsKey::new(cursor, new_dids_estimate.into(), &nsid)
1227
+
.to_db_bytes()?,
1228
+
),
1229
+
Rollup::AllTime => (
1230
+
AllTimeDidsKey::new(before_dids_estimate.into(), &nsid).to_db_bytes()?,
1231
+
AllTimeDidsKey::new(new_dids_estimate.into(), &nsid).to_db_bytes()?,
1232
+
),
1233
+
};
1234
+
batch.remove(&self.rollups, &old_k); // TODO: when fjall gets weak delete, this will hopefully work way better
1235
+
batch.insert(&self.rollups, &new_k, "");
1236
}
1237
1238
+
// replace the main counts rollup
1239
+
batch.insert(&self.rollups, &rollup_key_bytes, &rolled.to_db_bytes()?);
1240
}
1241
1242
insert_batch_static_neu::<NewRollupCursorKey>(&mut batch, &self.global, last_cursor)?;
1243
1244
batch.commit()?;
1245
+
Ok((cursors_advanced, dirty_nsids))
1246
}
1247
}
1248
1249
+
impl StoreWriter<FjallBackground> for FjallWriter {
1250
+
fn background_tasks(&mut self, reroll: bool) -> StorageResult<FjallBackground> {
1251
+
if self.bg_taken.swap(true, Ordering::SeqCst) {
1252
+
Err(StorageError::BackgroundAlreadyStarted)
1253
+
} else {
1254
+
if reroll {
1255
+
log::info!("reroll: resetting rollup cursor...");
1256
+
insert_static_neu::<NewRollupCursorKey>(&self.global, Cursor::from_start())?;
1257
+
log::info!("reroll: clearing trim cursors...");
1258
+
let mut batch = self.keyspace.batch();
1259
+
for kv in self
1260
+
.global
1261
+
.prefix(TrimCollectionCursorKey::from_prefix_to_db_bytes(
1262
+
&Default::default(),
1263
+
)?)
1264
+
{
1265
+
let (k, _) = kv?;
1266
+
batch.remove(&self.global, k);
1267
+
}
1268
+
let n = batch.len();
1269
+
batch.commit()?;
1270
+
log::info!("reroll: cleared {n} trim cursors.");
1271
+
}
1272
+
Ok(FjallBackground(self.clone()))
1273
+
}
1274
+
}
1275
+
1276
fn insert_batch<const LIMIT: usize>(
1277
&mut self,
1278
event_batch: EventBatch<LIMIT>,
···
1315
}
1316
}
1317
let live_counts_key: LiveCountsKey = (latest, &nsid).into();
1318
+
let counts_value = CountsValue::new(
1319
+
CommitCounts {
1320
+
creates: commits.creates as u64,
1321
+
updates: commits.updates as u64,
1322
+
deletes: commits.deletes as u64,
1323
+
},
1324
+
commits.dids_estimate,
1325
+
);
1326
batch.insert(
1327
&self.rollups,
1328
&live_counts_key.to_db_bytes()?,
···
1350
Ok(())
1351
}
1352
1353
+
fn step_rollup(&mut self) -> StorageResult<(usize, HashSet<Nsid>)> {
1354
+
let mut dirty_nsids = HashSet::new();
1355
+
1356
let rollup_cursor =
1357
get_static_neu::<NewRollupCursorKey, NewRollupCursorValue>(&self.global)?.ok_or(
1358
StorageError::BadStateError("Could not find current rollup cursor".to_string()),
···
1362
let live_counts_range = LiveCountsKey::range_from_cursor(rollup_cursor)?;
1363
let mut timely_iter = self.rollups.range(live_counts_range).peekable();
1364
1365
+
let timely_next = timely_iter
1366
.peek_mut()
1367
+
.map(|kv| -> StorageResult<LiveCountsKey> {
1368
match kv {
1369
Err(e) => Err(std::mem::replace(e, fjall::Error::Poisoned))?,
1370
Ok((key_bytes, _)) => {
1371
let key = db_complete::<LiveCountsKey>(key_bytes)?;
1372
+
Ok(key)
1373
}
1374
}
1375
})
···
1390
})
1391
.transpose()?;
1392
1393
+
let cursors_stepped = match (timely_next, next_delete) {
1394
+
(Some(timely), Some((delete_cursor, delete_key_bytes, delete_val_bytes))) => {
1395
+
if timely.cursor() < delete_cursor {
1396
+
let (n, dirty) = self.rollup_live_counts(
1397
timely_iter,
1398
Some(delete_cursor),
1399
MAX_BATCHED_ROLLUP_COUNTS,
1400
+
)?;
1401
+
dirty_nsids.extend(dirty);
1402
+
n
1403
} else {
1404
self.rollup_delete_account(delete_cursor, &delete_key_bytes, &delete_val_bytes)?
1405
}
1406
}
1407
(Some(_), None) => {
1408
+
let (n, dirty) =
1409
+
self.rollup_live_counts(timely_iter, None, MAX_BATCHED_ROLLUP_COUNTS)?;
1410
+
dirty_nsids.extend(dirty);
1411
+
n
1412
}
1413
(None, Some((delete_cursor, delete_key_bytes, delete_val_bytes))) => {
1414
self.rollup_delete_account(delete_cursor, &delete_key_bytes, &delete_val_bytes)?
···
1416
(None, None) => 0,
1417
};
1418
1419
+
Ok((cursors_stepped, dirty_nsids))
1420
}
1421
1422
fn trim_collection(
1423
&mut self,
1424
collection: &Nsid,
1425
limit: usize,
1426
+
full_scan: bool,
1427
+
) -> StorageResult<(usize, usize, bool)> {
1428
let mut dangling_feed_keys_cleaned = 0;
1429
let mut records_deleted = 0;
1430
1431
+
let live_range = if full_scan {
1432
+
let start = NsidRecordFeedKey::from_prefix_to_db_bytes(collection)?;
1433
+
let end = NsidRecordFeedKey::prefix_range_end(collection)?;
1434
+
start..end
1435
+
} else {
1436
+
let feed_trim_cursor_key =
1437
+
TrimCollectionCursorKey::new(collection.clone()).to_db_bytes()?;
1438
+
let trim_cursor = self
1439
+
.global
1440
+
.get(&feed_trim_cursor_key)?
1441
+
.map(|value_bytes| db_complete(&value_bytes))
1442
+
.transpose()?
1443
+
.unwrap_or(Cursor::from_start());
1444
+
NsidRecordFeedKey::from_pair(collection.clone(), trim_cursor).range_to_prefix_end()?
1445
+
};
1446
1447
+
let mut live_records_found = 0;
1448
+
let mut candidate_new_feed_lower_cursor = None;
1449
+
let ended_early = false;
1450
+
let mut current_cursor: Option<Cursor> = None;
1451
+
for (i, kv) in self.feeds.range(live_range).rev().enumerate() {
1452
+
if i > 0 && i % 500_000 == 0 {
1453
+
log::info!(
1454
+
"trim: at {i} for {:?} (now at {})",
1455
+
collection.to_string(),
1456
+
current_cursor
1457
+
.map(|c| c
1458
+
.elapsed()
1459
+
.map(nice_duration)
1460
+
.unwrap_or("[not past]".into()))
1461
+
.unwrap_or("??".into()),
1462
+
);
1463
+
}
1464
let (key_bytes, val_bytes) = kv?;
1465
let feed_key = db_complete::<NsidRecordFeedKey>(&key_bytes)?;
1466
let feed_val = db_complete::<NsidRecordFeedVal>(&val_bytes)?;
···
1469
1470
let Some(location_val_bytes) = self.records.get(&location_key_bytes)? else {
1471
// record was deleted (hopefully)
1472
+
self.feeds.remove(&*key_bytes)?;
1473
dangling_feed_keys_cleaned += 1;
1474
continue;
1475
};
1476
1477
let (meta, _) = RecordLocationMeta::from_db_bytes(&location_val_bytes)?;
1478
+
current_cursor = Some(meta.cursor());
1479
1480
if meta.cursor() != feed_key.cursor() {
1481
// older/different version
1482
+
self.feeds.remove(&*key_bytes)?;
1483
dangling_feed_keys_cleaned += 1;
1484
continue;
1485
}
1486
if meta.rev != feed_val.rev() {
1487
// weird...
1488
log::warn!("record lookup: cursor match but rev did not...? removing.");
1489
+
self.records.remove(&location_key_bytes)?;
1490
+
self.feeds.remove(&*key_bytes)?;
1491
dangling_feed_keys_cleaned += 1;
1492
continue;
1493
}
1494
1495
+
live_records_found += 1;
1496
+
if live_records_found <= limit {
1497
continue;
1498
}
1499
+
if candidate_new_feed_lower_cursor.is_none() {
1500
+
candidate_new_feed_lower_cursor = Some(feed_key.cursor());
1501
+
}
1502
1503
+
self.feeds.remove(&location_key_bytes)?;
1504
+
self.feeds.remove(key_bytes)?;
1505
records_deleted += 1;
1506
}
1507
1508
+
if !ended_early {
1509
+
if let Some(new_cursor) = candidate_new_feed_lower_cursor {
1510
+
self.global.insert(
1511
+
&TrimCollectionCursorKey::new(collection.clone()).to_db_bytes()?,
1512
+
&new_cursor.to_db_bytes()?,
1513
+
)?;
1514
+
}
1515
+
}
1516
1517
+
log::trace!("trim_collection ({collection:?}) removed {dangling_feed_keys_cleaned} dangling feed entries and {records_deleted} records (ended early? {ended_early})");
1518
+
Ok((dangling_feed_keys_cleaned, records_deleted, ended_early))
1519
}
1520
1521
fn delete_account(&mut self, did: &Did) -> Result<usize, StorageError> {
···
1533
}
1534
batch.commit()?;
1535
Ok(records_deleted)
1536
+
}
1537
+
}
1538
+
1539
+
pub struct FjallBackground(FjallWriter);
1540
+
1541
+
#[async_trait]
1542
+
impl StoreBackground for FjallBackground {
1543
+
async fn run(mut self, backfill: bool) -> StorageResult<()> {
1544
+
let mut dirty_nsids = HashSet::new();
1545
+
1546
+
// backfill condition here is iffy -- longer is good when doing the main ingest and then collection trims
1547
+
// shorter once those are done helps things catch up
1548
+
// the best setting for non-backfill is non-obvious.. it can be pretty slow and still be fine
1549
+
let mut rollup =
1550
+
tokio::time::interval(Duration::from_micros(if backfill { 100 } else { 32_000 }));
1551
+
rollup.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Delay);
1552
+
1553
+
// backfill condition again iffy. collection trims should probably happen in their own phase.
1554
+
let mut trim = tokio::time::interval(Duration::from_secs(if backfill { 18 } else { 9 }));
1555
+
trim.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Skip);
1556
+
1557
+
loop {
1558
+
tokio::select! {
1559
+
_ = rollup.tick() => {
1560
+
let mut db = self.0.clone();
1561
+
let (n, dirty) = tokio::task::spawn_blocking(move || db.step_rollup()).await??;
1562
+
if n == 0 {
1563
+
rollup.reset_after(Duration::from_millis(1_200)); // we're caught up, take a break
1564
+
}
1565
+
dirty_nsids.extend(dirty);
1566
+
log::trace!("rolled up {n} items ({} collections now dirty)", dirty_nsids.len());
1567
+
},
1568
+
_ = trim.tick() => {
1569
+
let n = dirty_nsids.len();
1570
+
log::trace!("trimming {n} nsids: {dirty_nsids:?}");
1571
+
let t0 = Instant::now();
1572
+
let (mut total_danglers, mut total_deleted) = (0, 0);
1573
+
let mut completed = HashSet::new();
1574
+
for collection in &dirty_nsids {
1575
+
let mut db = self.0.clone();
1576
+
let c = collection.clone();
1577
+
let (danglers, deleted, ended_early) = tokio::task::spawn_blocking(move || db.trim_collection(&c, 512, false)).await??;
1578
+
total_danglers += danglers;
1579
+
total_deleted += deleted;
1580
+
if !ended_early {
1581
+
completed.insert(collection.clone());
1582
+
}
1583
+
if total_deleted > 10_000_000 {
1584
+
log::info!("trim stopped early, more than 10M records already deleted.");
1585
+
break;
1586
+
}
1587
+
}
1588
+
for c in completed {
1589
+
dirty_nsids.remove(&c);
1590
+
}
1591
+
log::info!("finished trimming {n} nsids in {:?}: {total_danglers} dangling and {total_deleted} total removed.", t0.elapsed());
1592
+
},
1593
+
};
1594
+
}
1595
}
1596
}
1597
···
1628
Ok(())
1629
}
1630
1631
+
/// Set a value to a fixed key, erroring if the value already exists
1632
+
///
1633
+
/// Intended for single-threaded init: not safe under concurrency, since there
1634
+
/// is no transaction between checking if the already exists and writing it.
1635
+
fn init_static_neu<K: StaticStr>(
1636
+
global: &PartitionHandle,
1637
+
value: impl DbBytes,
1638
+
) -> StorageResult<()> {
1639
+
let key_bytes = DbStaticStr::<K>::default().to_db_bytes()?;
1640
+
if global.get(&key_bytes)?.is_some() {
1641
+
return Err(StorageError::InitError(format!(
1642
+
"init failed: value for key {key_bytes:?} already exists"
1643
+
)));
1644
+
}
1645
+
let value_bytes = value.to_db_bytes()?;
1646
+
global.insert(&key_bytes, &value_bytes)?;
1647
+
Ok(())
1648
+
}
1649
+
1650
/// Set a value to a fixed key
1651
fn insert_batch_static_neu<K: StaticStr>(
1652
batch: &mut FjallBatch,
···
1669
1670
////////// temp stuff to remove:
1671
1672
#[cfg(test)]
1673
mod tests {
1674
use super::*;
···
1678
use serde_json::value::RawValue;
1679
1680
fn fjall_db() -> (FjallReader, FjallWriter) {
1681
+
let (read, write, _, _) = FjallStorage::init(
1682
tempfile::tempdir().unwrap(),
1683
"offline test (no real jetstream endpoint)".to_string(),
1684
false,
···
1689
}
1690
1691
const TEST_BATCH_LIMIT: usize = 16;
1692
+
fn beginning() -> HourTruncatedCursor {
1693
+
Cursor::from_start().into()
1694
+
}
1695
1696
#[derive(Debug, Default)]
1697
struct TestBatch {
···
1736
.commits_by_nsid
1737
.entry(collection.clone())
1738
.or_default()
1739
+
.truncating_insert(commit, &[0u8; 16])
1740
.unwrap();
1741
1742
collection
···
1778
.commits_by_nsid
1779
.entry(collection.clone())
1780
.or_default()
1781
+
.truncating_insert(commit, &[0u8; 16])
1782
.unwrap();
1783
1784
collection
···
1810
.commits_by_nsid
1811
.entry(collection.clone())
1812
.or_default()
1813
+
.truncating_insert(commit, &[0u8; 16])
1814
.unwrap();
1815
1816
collection
···
1829
fn test_hello() -> anyhow::Result<()> {
1830
let (read, mut write) = fjall_db();
1831
write.insert_batch::<TEST_BATCH_LIMIT>(EventBatch::default())?;
1832
+
let JustCount {
1833
+
creates,
1834
+
dids_estimate,
1835
+
..
1836
+
} = read.get_collection_counts(
1837
+
&Nsid::new("a.b.c".to_string()).unwrap(),
1838
+
beginning(),
1839
+
None,
1840
+
)?;
1841
+
assert_eq!(creates, 0);
1842
+
assert_eq!(dids_estimate, 0);
1843
Ok(())
1844
}
1845
···
1858
100,
1859
);
1860
write.insert_batch(batch.batch)?;
1861
+
write.step_rollup()?;
1862
1863
+
let JustCount {
1864
+
creates,
1865
+
dids_estimate,
1866
+
..
1867
+
} = read.get_collection_counts(&collection, beginning(), None)?;
1868
+
assert_eq!(creates, 1);
1869
+
assert_eq!(dids_estimate, 1);
1870
+
let JustCount {
1871
+
creates,
1872
+
dids_estimate,
1873
+
..
1874
+
} = read.get_collection_counts(
1875
+
&Nsid::new("d.e.f".to_string()).unwrap(),
1876
+
beginning(),
1877
+
None,
1878
+
)?;
1879
+
assert_eq!(creates, 0);
1880
+
assert_eq!(dids_estimate, 0);
1881
1882
+
let records = read.get_records_by_collections([collection].into(), 2, false)?;
1883
assert_eq!(records.len(), 1);
1884
let rec = &records[0];
1885
assert_eq!(rec.record.get(), "{}");
1886
assert!(!rec.is_update);
1887
1888
+
let records = read.get_records_by_collections(
1889
+
[Nsid::new("d.e.f".to_string()).unwrap()].into(),
1890
+
2,
1891
+
false,
1892
+
)?;
1893
assert_eq!(records.len(), 0);
1894
1895
Ok(())
···
1930
write.insert_batch(batch.batch)?;
1931
1932
let records = read.get_records_by_collections(
1933
+
HashSet::from([
1934
Nsid::new("a.a.a".to_string()).unwrap(),
1935
Nsid::new("a.a.b".to_string()).unwrap(),
1936
Nsid::new("a.a.c".to_string()).unwrap(),
1937
+
]),
1938
100,
1939
false,
1940
)?;
···
1990
write.insert_batch(batch.batch)?;
1991
1992
let records = read.get_records_by_collections(
1993
+
HashSet::from([
1994
Nsid::new("a.a.a".to_string()).unwrap(),
1995
Nsid::new("a.a.b".to_string()).unwrap(),
1996
Nsid::new("a.a.c".to_string()).unwrap(),
1997
+
]),
1998
2,
1999
true,
2000
)?;
···
2041
101,
2042
);
2043
write.insert_batch(batch.batch)?;
2044
+
write.step_rollup()?;
2045
2046
+
let JustCount {
2047
+
creates,
2048
+
dids_estimate,
2049
+
..
2050
+
} = read.get_collection_counts(&collection, beginning(), None)?;
2051
+
assert_eq!(creates, 1);
2052
+
assert_eq!(dids_estimate, 1);
2053
2054
+
let records = read.get_records_by_collections([collection].into(), 2, false)?;
2055
assert_eq!(records.len(), 1);
2056
let rec = &records[0];
2057
assert_eq!(rec.record.get(), r#"{"ch": "ch-ch-ch-changes"}"#);
···
2084
101,
2085
);
2086
write.insert_batch(batch.batch)?;
2087
+
write.step_rollup()?;
2088
2089
+
let JustCount {
2090
+
creates,
2091
+
dids_estimate,
2092
+
..
2093
+
} = read.get_collection_counts(&collection, beginning(), None)?;
2094
+
assert_eq!(creates, 1);
2095
+
assert_eq!(dids_estimate, 1);
2096
2097
+
let records = read.get_records_by_collections([collection].into(), 2, false)?;
2098
assert_eq!(records.len(), 0);
2099
2100
Ok(())
···
2140
write.insert_batch(batch.batch)?;
2141
2142
let records = read.get_records_by_collections(
2143
+
HashSet::from([Nsid::new("a.a.a".to_string()).unwrap()]),
2144
100,
2145
false,
2146
)?;
2147
assert_eq!(records.len(), 1);
2148
let records = read.get_records_by_collections(
2149
+
HashSet::from([Nsid::new("a.a.b".to_string()).unwrap()]),
2150
100,
2151
false,
2152
)?;
2153
assert_eq!(records.len(), 10);
2154
let records = read.get_records_by_collections(
2155
+
HashSet::from([Nsid::new("a.a.c".to_string()).unwrap()]),
2156
100,
2157
false,
2158
)?;
2159
assert_eq!(records.len(), 1);
2160
let records = read.get_records_by_collections(
2161
+
HashSet::from([Nsid::new("a.a.d".to_string()).unwrap()]),
2162
100,
2163
false,
2164
)?;
2165
assert_eq!(records.len(), 0);
2166
2167
+
write.trim_collection(&Nsid::new("a.a.a".to_string()).unwrap(), 6, false)?;
2168
+
write.trim_collection(&Nsid::new("a.a.b".to_string()).unwrap(), 6, false)?;
2169
+
write.trim_collection(&Nsid::new("a.a.c".to_string()).unwrap(), 6, false)?;
2170
+
write.trim_collection(&Nsid::new("a.a.d".to_string()).unwrap(), 6, false)?;
2171
2172
let records = read.get_records_by_collections(
2173
+
HashSet::from([Nsid::new("a.a.a".to_string()).unwrap()]),
2174
100,
2175
false,
2176
)?;
2177
assert_eq!(records.len(), 1);
2178
let records = read.get_records_by_collections(
2179
+
HashSet::from([Nsid::new("a.a.b".to_string()).unwrap()]),
2180
100,
2181
false,
2182
)?;
2183
assert_eq!(records.len(), 6);
2184
let records = read.get_records_by_collections(
2185
+
HashSet::from([Nsid::new("a.a.c".to_string()).unwrap()]),
2186
100,
2187
false,
2188
)?;
2189
assert_eq!(records.len(), 1);
2190
let records = read.get_records_by_collections(
2191
+
HashSet::from([Nsid::new("a.a.d".to_string()).unwrap()]),
2192
100,
2193
false,
2194
)?;
···
2225
write.insert_batch(batch.batch)?;
2226
2227
let records = read.get_records_by_collections(
2228
+
HashSet::from([Nsid::new("a.a.a".to_string()).unwrap()]),
2229
100,
2230
false,
2231
)?;
···
2236
assert_eq!(records_deleted, 2);
2237
2238
let records = read.get_records_by_collections(
2239
+
HashSet::from([Nsid::new("a.a.a".to_string()).unwrap()]),
2240
100,
2241
false,
2242
)?;
···
2267
2268
write.step_rollup()?;
2269
2270
+
let records = read.get_records_by_collections(
2271
+
[Nsid::new("a.a.a".to_string()).unwrap()].into(),
2272
+
1,
2273
+
false,
2274
+
)?;
2275
assert_eq!(records.len(), 0);
2276
2277
Ok(())
···
2293
);
2294
write.insert_batch(batch.batch)?;
2295
2296
+
let (n, _) = write.step_rollup()?;
2297
assert_eq!(n, 1);
2298
2299
let mut batch = TestBatch::default();
2300
batch.delete_account("did:plc:person-a", 10_001);
2301
write.insert_batch(batch.batch)?;
2302
2303
+
let records = read.get_records_by_collections(
2304
+
[Nsid::new("a.a.a".to_string()).unwrap()].into(),
2305
+
1,
2306
+
false,
2307
+
)?;
2308
assert_eq!(records.len(), 1);
2309
2310
+
let (n, _) = write.step_rollup()?;
2311
assert_eq!(n, 1);
2312
2313
+
let records = read.get_records_by_collections(
2314
+
[Nsid::new("a.a.a".to_string()).unwrap()].into(),
2315
+
1,
2316
+
false,
2317
+
)?;
2318
assert_eq!(records.len(), 0);
2319
2320
let mut batch = TestBatch::default();
2321
batch.delete_account("did:plc:person-a", 9_999);
2322
write.insert_batch(batch.batch)?;
2323
2324
+
let (n, _) = write.step_rollup()?;
2325
assert_eq!(n, 0);
2326
2327
Ok(())
···
2355
);
2356
write.insert_batch(batch.batch)?;
2357
2358
+
let (n, _) = write.step_rollup()?;
2359
assert_eq!(n, 2);
2360
2361
+
let (n, _) = write.step_rollup()?;
2362
assert_eq!(n, 0);
2363
2364
Ok(())
···
2406
write.insert_batch(batch.batch)?;
2407
2408
// before any rollup
2409
+
let JustCount {
2410
+
creates,
2411
+
dids_estimate,
2412
+
..
2413
+
} = read.get_collection_counts(
2414
+
&Nsid::new("a.a.a".to_string()).unwrap(),
2415
+
beginning(),
2416
+
None,
2417
+
)?;
2418
+
assert_eq!(creates, 0);
2419
+
assert_eq!(dids_estimate, 0);
2420
2421
// first batch rolled up
2422
+
let (n, _) = write.step_rollup()?;
2423
assert_eq!(n, 1);
2424
2425
+
let JustCount {
2426
+
creates,
2427
+
dids_estimate,
2428
+
..
2429
+
} = read.get_collection_counts(
2430
+
&Nsid::new("a.a.a".to_string()).unwrap(),
2431
+
beginning(),
2432
+
None,
2433
+
)?;
2434
+
assert_eq!(creates, 2);
2435
+
assert_eq!(dids_estimate, 2);
2436
2437
// delete account rolled up
2438
+
let (n, _) = write.step_rollup()?;
2439
assert_eq!(n, 1);
2440
2441
+
let JustCount {
2442
+
creates,
2443
+
dids_estimate,
2444
+
..
2445
+
} = read.get_collection_counts(
2446
+
&Nsid::new("a.a.a".to_string()).unwrap(),
2447
+
beginning(),
2448
+
None,
2449
+
)?;
2450
+
assert_eq!(creates, 2);
2451
+
assert_eq!(dids_estimate, 2);
2452
2453
// second batch rolled up
2454
+
let (n, _) = write.step_rollup()?;
2455
assert_eq!(n, 1);
2456
2457
+
let JustCount {
2458
+
creates,
2459
+
dids_estimate,
2460
+
..
2461
+
} = read.get_collection_counts(
2462
+
&Nsid::new("a.a.a".to_string()).unwrap(),
2463
+
beginning(),
2464
+
None,
2465
+
)?;
2466
+
assert_eq!(creates, 3);
2467
+
assert_eq!(dids_estimate, 2);
2468
2469
// no more rollups left
2470
+
let (n, _) = write.step_rollup()?;
2471
assert_eq!(n, 0);
2472
2473
Ok(())
2474
}
2475
2476
#[test]
2477
+
fn get_prefix_children_lexi_empty() {
2478
+
let (read, _) = fjall_db();
2479
+
let (
2480
+
JustCount {
2481
+
creates,
2482
+
dids_estimate,
2483
+
..
2484
+
},
2485
+
children,
2486
+
cursor,
2487
+
) = read
2488
+
.get_prefix(
2489
+
NsidPrefix::new("aaa.aaa").unwrap(),
2490
+
10,
2491
+
OrderCollectionsBy::Lexi { cursor: None },
2492
+
None,
2493
+
None,
2494
+
)
2495
+
.unwrap();
2496
+
2497
+
assert_eq!(creates, 0);
2498
+
assert_eq!(dids_estimate, 0);
2499
+
assert_eq!(children, vec![]);
2500
+
assert_eq!(cursor, None);
2501
+
}
2502
+
2503
+
#[test]
2504
+
fn get_prefix_excludes_exact_collection() -> anyhow::Result<()> {
2505
let (read, mut write) = fjall_db();
2506
2507
let mut batch = TestBatch::default();
···
2514
None,
2515
10_000,
2516
);
2517
+
write.insert_batch(batch.batch)?;
2518
+
write.step_rollup()?;
2519
+
2520
+
let (
2521
+
JustCount {
2522
+
creates,
2523
+
dids_estimate,
2524
+
..
2525
+
},
2526
+
children,
2527
+
cursor,
2528
+
) = read.get_prefix(
2529
+
NsidPrefix::new("a.a.a").unwrap(),
2530
+
10,
2531
+
OrderCollectionsBy::Lexi { cursor: None },
2532
+
None,
2533
+
None,
2534
+
)?;
2535
+
assert_eq!(creates, 0);
2536
+
assert_eq!(dids_estimate, 0);
2537
+
assert_eq!(children, vec![]);
2538
+
assert_eq!(cursor, None);
2539
+
Ok(())
2540
+
}
2541
+
2542
+
#[test]
2543
+
fn get_prefix_excludes_neighbour_collection() -> anyhow::Result<()> {
2544
+
let (read, mut write) = fjall_db();
2545
+
2546
+
let mut batch = TestBatch::default();
2547
batch.create(
2548
+
"did:plc:person-a",
2549
+
"a.a.aa",
2550
+
"rkey-aaa",
2551
"{}",
2552
+
Some("rev-aaa"),
2553
None,
2554
+
10_000,
2555
);
2556
+
write.insert_batch(batch.batch)?;
2557
+
write.step_rollup()?;
2558
+
2559
+
let (
2560
+
JustCount {
2561
+
creates,
2562
+
dids_estimate,
2563
+
..
2564
+
},
2565
+
children,
2566
+
cursor,
2567
+
) = read.get_prefix(
2568
+
NsidPrefix::new("a.a.a").unwrap(),
2569
+
10,
2570
+
OrderCollectionsBy::Lexi { cursor: None },
2571
+
None,
2572
+
None,
2573
+
)?;
2574
+
assert_eq!(creates, 0);
2575
+
assert_eq!(dids_estimate, 0);
2576
+
assert_eq!(children, vec![]);
2577
+
assert_eq!(cursor, None);
2578
+
Ok(())
2579
+
}
2580
+
2581
+
#[test]
2582
+
fn get_prefix_includes_child_collection() -> anyhow::Result<()> {
2583
+
let (read, mut write) = fjall_db();
2584
+
2585
+
let mut batch = TestBatch::default();
2586
batch.create(
2587
+
"did:plc:person-a",
2588
+
"a.a.a",
2589
+
"rkey-aaa",
2590
"{}",
2591
+
Some("rev-aaa"),
2592
None,
2593
+
10_000,
2594
);
2595
+
write.insert_batch(batch.batch)?;
2596
+
write.step_rollup()?;
2597
+
2598
+
let (
2599
+
JustCount {
2600
+
creates,
2601
+
dids_estimate,
2602
+
..
2603
+
},
2604
+
children,
2605
+
cursor,
2606
+
) = read.get_prefix(
2607
+
NsidPrefix::new("a.a").unwrap(),
2608
+
10,
2609
+
OrderCollectionsBy::Lexi { cursor: None },
2610
+
None,
2611
+
None,
2612
+
)?;
2613
+
assert_eq!(creates, 1);
2614
+
assert_eq!(dids_estimate, 1);
2615
+
assert_eq!(
2616
+
children,
2617
+
vec![PrefixChild::Collection(NsidCount {
2618
+
nsid: "a.a.a".to_string(),
2619
+
creates: 1,
2620
+
dids_estimate: 1
2621
+
}),]
2622
+
);
2623
+
assert_eq!(cursor, None);
2624
+
Ok(())
2625
+
}
2626
+
2627
+
#[test]
2628
+
fn get_prefix_includes_child_prefix() -> anyhow::Result<()> {
2629
+
let (read, mut write) = fjall_db();
2630
+
2631
+
let mut batch = TestBatch::default();
2632
batch.create(
2633
"did:plc:person-a",
2634
+
"a.a.a.a",
2635
+
"rkey-aaaa",
2636
"{}",
2637
+
Some("rev-aaaa"),
2638
None,
2639
+
10_000,
2640
);
2641
write.insert_batch(batch.batch)?;
2642
+
write.step_rollup()?;
2643
2644
+
let (
2645
+
JustCount {
2646
+
creates,
2647
+
dids_estimate,
2648
+
..
2649
+
},
2650
+
children,
2651
+
cursor,
2652
+
) = read.get_prefix(
2653
+
NsidPrefix::new("a.a").unwrap(),
2654
+
10,
2655
+
OrderCollectionsBy::Lexi { cursor: None },
2656
+
None,
2657
+
None,
2658
+
)?;
2659
+
assert_eq!(creates, 1);
2660
+
assert_eq!(dids_estimate, 1);
2661
assert_eq!(
2662
+
children,
2663
+
vec![PrefixChild::Prefix(PrefixCount {
2664
+
prefix: "a.a.a".to_string(),
2665
+
creates: 1,
2666
+
dids_estimate: 1
2667
+
}),]
2668
);
2669
+
assert_eq!(cursor, None);
2670
Ok(())
2671
}
2672
2673
#[test]
2674
+
fn get_prefix_merges_child_prefixes() -> anyhow::Result<()> {
2675
let (read, mut write) = fjall_db();
2676
2677
let mut batch = TestBatch::default();
2678
batch.create(
2679
+
"did:plc:person-a",
2680
"a.a.a.a",
2681
+
"rkey-aaaa",
2682
+
"{}",
2683
Some("rev-aaaa"),
2684
None,
2685
+
10_000,
2686
);
2687
batch.create(
2688
+
"did:plc:person-a",
2689
+
"a.a.a.b",
2690
+
"rkey-aaab",
2691
+
"{}",
2692
+
Some("rev-aaab"),
2693
None,
2694
+
10_001,
2695
);
2696
write.insert_batch(batch.batch)?;
2697
+
write.step_rollup()?;
2698
2699
+
let (
2700
+
JustCount {
2701
+
creates,
2702
+
dids_estimate,
2703
+
..
2704
+
},
2705
+
children,
2706
+
cursor,
2707
+
) = read.get_prefix(
2708
+
NsidPrefix::new("a.a").unwrap(),
2709
+
10,
2710
+
OrderCollectionsBy::Lexi { cursor: None },
2711
+
None,
2712
+
None,
2713
+
)?;
2714
+
assert_eq!(creates, 2);
2715
+
assert_eq!(dids_estimate, 1);
2716
assert_eq!(
2717
+
children,
2718
+
vec![PrefixChild::Prefix(PrefixCount {
2719
+
prefix: "a.a.a".to_string(),
2720
+
creates: 2,
2721
+
dids_estimate: 1
2722
+
}),]
2723
);
2724
+
assert_eq!(cursor, None);
2725
+
Ok(())
2726
+
}
2727
2728
+
#[test]
2729
+
fn get_prefix_exact_and_child_and_prefix() -> anyhow::Result<()> {
2730
+
let (read, mut write) = fjall_db();
2731
2732
+
let mut batch = TestBatch::default();
2733
+
// exact:
2734
+
batch.create(
2735
+
"did:plc:person-a",
2736
+
"a.a.a",
2737
+
"rkey-aaa",
2738
+
"{}",
2739
+
Some("rev-aaa"),
2740
+
None,
2741
+
10_000,
2742
+
);
2743
+
// child:
2744
+
batch.create(
2745
+
"did:plc:person-a",
2746
+
"a.a.a.a",
2747
+
"rkey-aaaa",
2748
+
"{}",
2749
+
Some("rev-aaaa"),
2750
+
None,
2751
+
10_001,
2752
+
);
2753
+
// prefix:
2754
+
batch.create(
2755
+
"did:plc:person-a",
2756
+
"a.a.a.a.a",
2757
+
"rkey-aaaaa",
2758
+
"{}",
2759
+
Some("rev-aaaaa"),
2760
+
None,
2761
+
10_002,
2762
+
);
2763
+
write.insert_batch(batch.batch)?;
2764
+
write.step_rollup()?;
2765
+
2766
+
let (
2767
+
JustCount {
2768
+
creates,
2769
+
dids_estimate,
2770
+
..
2771
+
},
2772
+
children,
2773
+
cursor,
2774
+
) = read.get_prefix(
2775
+
NsidPrefix::new("a.a.a").unwrap(),
2776
+
10,
2777
+
OrderCollectionsBy::Lexi { cursor: None },
2778
+
None,
2779
+
None,
2780
+
)?;
2781
+
assert_eq!(creates, 2);
2782
+
assert_eq!(dids_estimate, 1);
2783
+
assert_eq!(
2784
+
children,
2785
+
vec![
2786
+
PrefixChild::Collection(NsidCount {
2787
+
nsid: "a.a.a.a".to_string(),
2788
+
creates: 1,
2789
+
dids_estimate: 1
2790
+
}),
2791
+
PrefixChild::Prefix(PrefixCount {
2792
+
prefix: "a.a.a.a".to_string(),
2793
+
creates: 1,
2794
+
dids_estimate: 1
2795
+
}),
2796
+
]
2797
+
);
2798
+
assert_eq!(cursor, None);
2799
Ok(())
2800
}
2801
}
-1844
ufos/src/storage_mem.rs
-1844
ufos/src/storage_mem.rs
···
1
-
use std::ops::Bound;
2
-
use std::sync::Arc;
3
-
4
-
use crate::db_types::{db_complete, DbBytes, DbStaticStr, StaticStr};
5
-
use crate::error::StorageError;
6
-
use crate::storage::{StorageResult, StorageWhatever, StoreReader, StoreWriter};
7
-
use crate::store_types::{
8
-
AllTimeRollupKey, CountsValue, DeleteAccountQueueKey, DeleteAccountQueueVal,
9
-
HourTruncatedCursor, HourlyRollupKey, JetstreamCursorKey, JetstreamCursorValue,
10
-
JetstreamEndpointKey, JetstreamEndpointValue, LiveCountsKey, NewRollupCursorKey,
11
-
NewRollupCursorValue, NsidRecordFeedKey, NsidRecordFeedVal, RecordLocationKey,
12
-
RecordLocationMeta, RecordLocationVal, RecordRawValue, TakeoffKey, TakeoffValue,
13
-
WeekTruncatedCursor, WeeklyRollupKey,
14
-
};
15
-
use crate::{CommitAction, ConsumerInfo, Did, EventBatch, Nsid, TopCollections, UFOsRecord};
16
-
use async_trait::async_trait;
17
-
use jetstream::events::Cursor;
18
-
use lsm_tree::range::prefix_to_range;
19
-
use std::collections::BTreeMap;
20
-
use std::collections::HashMap;
21
-
use std::path::Path;
22
-
use std::sync::Mutex;
23
-
use std::sync::RwLock;
24
-
use std::time::SystemTime;
25
-
26
-
const MAX_BATCHED_CLEANUP_SIZE: usize = 1024; // try to commit progress for longer feeds
27
-
const MAX_BATCHED_ACCOUNT_DELETE_RECORDS: usize = 1024;
28
-
const MAX_BATCHED_ROLLUP_COUNTS: usize = 256;
29
-
30
-
///
31
-
/// new data format, roughly:
32
-
///
33
-
/// Partion: 'global'
34
-
///
35
-
/// - Global sequence counter (is the jetstream cursor -- monotonic with many gaps)
36
-
/// - key: "js_cursor" (literal)
37
-
/// - val: u64
38
-
///
39
-
/// - Jetstream server endpoint (persisted because the cursor can't be used on another instance without data loss)
40
-
/// - key: "js_endpoint" (literal)
41
-
/// - val: string (URL of the instance)
42
-
///
43
-
/// - Launch date
44
-
/// - key: "takeoff" (literal)
45
-
/// - val: u64 (micros timestamp, not from jetstream for now so not precise)
46
-
///
47
-
/// - Rollup cursor (bg work: roll stats into hourlies, delete accounts, old record deletes)
48
-
/// - key: "rollup_cursor" (literal)
49
-
/// - val: u64 (tracks behind js_cursor)
50
-
///
51
-
///
52
-
/// Partition: 'feed'
53
-
///
54
-
/// - Per-collection list of record references ordered by jetstream cursor
55
-
/// - key: nullstr || u64 (collection nsid null-terminated, jetstream cursor)
56
-
/// - val: nullstr || nullstr || nullstr (did, rkey, rev. rev is mostly a sanity-check for now.)
57
-
///
58
-
///
59
-
/// Partition: 'records'
60
-
///
61
-
/// - Actual records by their atproto location
62
-
/// - key: nullstr || nullstr || nullstr (did, collection, rkey)
63
-
/// - val: u64 || bool || nullstr || rawval (js_cursor, is_update, rev, actual record)
64
-
///
65
-
///
66
-
/// Partition: 'rollups'
67
-
///
68
-
/// - Live (batched) records counts and dids estimate per collection
69
-
/// - key: "live_counts" || u64 || nullstr (js_cursor, nsid)
70
-
/// - val: u64 || HLL (count (not cursor), estimator)
71
-
///
72
-
/// - Hourly total record counts and dids estimate per collection
73
-
/// - key: "hourly_counts" || u64 || nullstr (hour, nsid)
74
-
/// - val: u64 || HLL (count (not cursor), estimator)
75
-
///
76
-
/// - Weekly total record counts and dids estimate per collection
77
-
/// - key: "weekly_counts" || u64 || nullstr (hour, nsid)
78
-
/// - val: u64 || HLL (count (not cursor), estimator)
79
-
///
80
-
/// - All-time total record counts and dids estimate per collection
81
-
/// - key: "ever_counts" || nullstr (nsid)
82
-
/// - val: u64 || HLL (count (not cursor), estimator)
83
-
///
84
-
/// - TODO: sorted indexes for all-times?
85
-
///
86
-
///
87
-
/// Partition: 'queues'
88
-
///
89
-
/// - Delete account queue
90
-
/// - key: "delete_acount" || u64 (js_cursor)
91
-
/// - val: nullstr (did)
92
-
///
93
-
///
94
-
/// TODO: moderation actions
95
-
/// TODO: account privacy preferences. Might wait for the protocol-level (PDS-level?) stuff to land. Will probably do lazy fetching + caching on read.
96
-
#[derive(Debug)]
97
-
pub struct MemStorage {}
98
-
99
-
#[derive(Debug, Default)]
100
-
pub struct MemConfig {
101
-
/// drop the db when the storage is dropped
102
-
///
103
-
/// this is only meant for tests
104
-
#[cfg(test)]
105
-
pub temp: bool,
106
-
}
107
-
108
-
////////////
109
-
////////////
110
-
////////////
111
-
////////////
112
-
////////////
113
-
////////////
114
-
115
-
struct BatchSentinel {}
116
-
117
-
#[derive(Clone)]
118
-
struct MemKeyspace {
119
-
keyspace_guard: Arc<RwLock<BatchSentinel>>,
120
-
}
121
-
122
-
impl MemKeyspace {
123
-
pub fn open() -> Self {
124
-
Self {
125
-
keyspace_guard: Arc::new(RwLock::new(BatchSentinel {})),
126
-
}
127
-
}
128
-
pub fn open_partition(&self, _name: &str) -> StorageResult<MemPartion> {
129
-
Ok(MemPartion {
130
-
// name: name.to_string(),
131
-
keyspace_guard: self.keyspace_guard.clone(),
132
-
contents: Default::default(),
133
-
})
134
-
}
135
-
pub fn batch(&self) -> MemBatch {
136
-
MemBatch {
137
-
keyspace_guard: self.keyspace_guard.clone(),
138
-
tasks: Vec::new(),
139
-
}
140
-
}
141
-
pub fn instant(&self) -> u64 {
142
-
1
143
-
}
144
-
}
145
-
146
-
enum BatchTask {
147
-
Insert {
148
-
p: MemPartion,
149
-
key: Vec<u8>,
150
-
val: Vec<u8>,
151
-
},
152
-
Remove {
153
-
p: MemPartion,
154
-
key: Vec<u8>,
155
-
},
156
-
}
157
-
struct MemBatch {
158
-
keyspace_guard: Arc<RwLock<BatchSentinel>>,
159
-
tasks: Vec<BatchTask>,
160
-
}
161
-
impl MemBatch {
162
-
pub fn insert(&mut self, p: &MemPartion, key: &[u8], val: &[u8]) {
163
-
self.tasks.push(BatchTask::Insert {
164
-
p: p.clone(),
165
-
key: key.to_vec(),
166
-
val: val.to_vec(),
167
-
});
168
-
}
169
-
pub fn remove(&mut self, p: &MemPartion, key: &[u8]) {
170
-
self.tasks.push(BatchTask::Remove {
171
-
p: p.clone(),
172
-
key: key.to_vec(),
173
-
});
174
-
}
175
-
pub fn len(&self) -> usize {
176
-
self.tasks.len()
177
-
}
178
-
pub fn commit(&mut self) -> StorageResult<()> {
179
-
let _guard = self.keyspace_guard.write().unwrap();
180
-
for task in &mut self.tasks {
181
-
match task {
182
-
BatchTask::Insert { p, key, val } => p
183
-
.contents
184
-
.try_lock()
185
-
.unwrap()
186
-
.insert(key.to_vec(), val.to_vec()),
187
-
BatchTask::Remove { p, key } => p.contents.try_lock().unwrap().remove(key),
188
-
};
189
-
}
190
-
Ok(())
191
-
}
192
-
}
193
-
194
-
#[derive(Clone)]
195
-
struct MemPartion {
196
-
// name: String,
197
-
keyspace_guard: Arc<RwLock<BatchSentinel>>,
198
-
contents: Arc<Mutex<BTreeMap<Vec<u8>, Vec<u8>>>>,
199
-
}
200
-
impl MemPartion {
201
-
pub fn get(&self, key: &[u8]) -> StorageResult<Option<Vec<u8>>> {
202
-
let _guard = self.keyspace_guard.read().unwrap();
203
-
Ok(self.contents.lock().unwrap().get(key).cloned())
204
-
}
205
-
pub fn prefix(&self, pre: &[u8]) -> Vec<StorageResult<(Vec<u8>, Vec<u8>)>> {
206
-
// let prefix_bytes = prefix.to_db_bytes()?;
207
-
let (_, Bound::Excluded(range_end)) = prefix_to_range(pre) else {
208
-
panic!("bad range thing");
209
-
};
210
-
211
-
return self.range(pre.to_vec()..range_end.to_vec());
212
-
}
213
-
pub fn range(&self, r: std::ops::Range<Vec<u8>>) -> Vec<StorageResult<(Vec<u8>, Vec<u8>)>> {
214
-
let _guard = self.keyspace_guard.read().unwrap();
215
-
self.contents
216
-
.lock()
217
-
.unwrap()
218
-
.range(r)
219
-
.map(|(k, v)| Ok((k.clone(), v.clone())))
220
-
.collect()
221
-
}
222
-
pub fn insert(&self, key: &[u8], val: &[u8]) -> StorageResult<()> {
223
-
let _guard = self.keyspace_guard.read().unwrap();
224
-
self.contents
225
-
.lock()
226
-
.unwrap()
227
-
.insert(key.to_vec(), val.to_vec());
228
-
Ok(())
229
-
}
230
-
// pub fn remove(&self, key: &[u8]) -> StorageResult<()> {
231
-
// let _guard = self.keyspace_guard.read().unwrap();
232
-
// self.contents
233
-
// .lock()
234
-
// .unwrap()
235
-
// .remove(key);
236
-
// Ok(())
237
-
// }
238
-
pub fn snapshot_at(&self, _instant: u64) -> Self {
239
-
self.clone()
240
-
}
241
-
pub fn snapshot(&self) -> Self {
242
-
self.clone()
243
-
}
244
-
}
245
-
246
-
////////////
247
-
////////////
248
-
////////////
249
-
////////////
250
-
////////////
251
-
////////////
252
-
253
-
impl StorageWhatever<MemReader, MemWriter, MemConfig> for MemStorage {
254
-
fn init(
255
-
_path: impl AsRef<Path>,
256
-
endpoint: String,
257
-
force_endpoint: bool,
258
-
_config: MemConfig,
259
-
) -> StorageResult<(MemReader, MemWriter, Option<Cursor>)> {
260
-
let keyspace = MemKeyspace::open();
261
-
262
-
let global = keyspace.open_partition("global")?;
263
-
let feeds = keyspace.open_partition("feeds")?;
264
-
let records = keyspace.open_partition("records")?;
265
-
let rollups = keyspace.open_partition("rollups")?;
266
-
let queues = keyspace.open_partition("queues")?;
267
-
268
-
let js_cursor = get_static_neu::<JetstreamCursorKey, JetstreamCursorValue>(&global)?;
269
-
270
-
if js_cursor.is_some() {
271
-
let stored_endpoint =
272
-
get_static_neu::<JetstreamEndpointKey, JetstreamEndpointValue>(&global)?;
273
-
274
-
let JetstreamEndpointValue(stored) = stored_endpoint.ok_or(StorageError::InitError(
275
-
"found cursor but missing js_endpoint, refusing to start.".to_string(),
276
-
))?;
277
-
278
-
if stored != endpoint {
279
-
if force_endpoint {
280
-
log::warn!("forcing a jetstream switch from {stored:?} to {endpoint:?}");
281
-
insert_static_neu::<JetstreamEndpointKey>(
282
-
&global,
283
-
JetstreamEndpointValue(endpoint.to_string()),
284
-
)?;
285
-
} else {
286
-
return Err(StorageError::InitError(format!(
287
-
"stored js_endpoint {stored:?} differs from provided {endpoint:?}, refusing to start.")));
288
-
}
289
-
}
290
-
} else {
291
-
insert_static_neu::<JetstreamEndpointKey>(
292
-
&global,
293
-
JetstreamEndpointValue(endpoint.to_string()),
294
-
)?;
295
-
insert_static_neu::<TakeoffKey>(&global, Cursor::at(SystemTime::now()))?;
296
-
insert_static_neu::<NewRollupCursorKey>(&global, Cursor::from_start())?;
297
-
}
298
-
299
-
let reader = MemReader {
300
-
keyspace: keyspace.clone(),
301
-
global: global.clone(),
302
-
feeds: feeds.clone(),
303
-
records: records.clone(),
304
-
rollups: rollups.clone(),
305
-
};
306
-
let writer = MemWriter {
307
-
keyspace,
308
-
global,
309
-
feeds,
310
-
records,
311
-
rollups,
312
-
queues,
313
-
};
314
-
Ok((reader, writer, js_cursor))
315
-
}
316
-
}
317
-
318
-
type MemRKV = StorageResult<(Vec<u8>, Vec<u8>)>;
319
-
320
-
#[derive(Clone)]
321
-
pub struct MemReader {
322
-
keyspace: MemKeyspace,
323
-
global: MemPartion,
324
-
feeds: MemPartion,
325
-
records: MemPartion,
326
-
rollups: MemPartion,
327
-
}
328
-
329
-
/// An iterator that knows how to skip over deleted/invalidated records
330
-
struct RecordIterator {
331
-
db_iter: Box<dyn Iterator<Item = MemRKV>>,
332
-
records: MemPartion,
333
-
limit: usize,
334
-
fetched: usize,
335
-
}
336
-
impl RecordIterator {
337
-
pub fn new(
338
-
feeds: &MemPartion,
339
-
records: MemPartion,
340
-
collection: &Nsid,
341
-
limit: usize,
342
-
) -> StorageResult<Self> {
343
-
let prefix = NsidRecordFeedKey::from_prefix_to_db_bytes(collection)?;
344
-
let db_iter = feeds.prefix(&prefix).into_iter().rev();
345
-
Ok(Self {
346
-
db_iter: Box::new(db_iter),
347
-
records,
348
-
limit,
349
-
fetched: 0,
350
-
})
351
-
}
352
-
fn get_record(&self, db_next: MemRKV) -> StorageResult<Option<UFOsRecord>> {
353
-
let (key_bytes, val_bytes) = db_next?;
354
-
let feed_key = db_complete::<NsidRecordFeedKey>(&key_bytes)?;
355
-
let feed_val = db_complete::<NsidRecordFeedVal>(&val_bytes)?;
356
-
let location_key: RecordLocationKey = (&feed_key, &feed_val).into();
357
-
358
-
let Some(location_val_bytes) = self.records.get(&location_key.to_db_bytes()?)? else {
359
-
// record was deleted (hopefully)
360
-
return Ok(None);
361
-
};
362
-
363
-
let (meta, n) = RecordLocationMeta::from_db_bytes(&location_val_bytes)?;
364
-
365
-
if meta.cursor() != feed_key.cursor() {
366
-
// older/different version
367
-
return Ok(None);
368
-
}
369
-
if meta.rev != feed_val.rev() {
370
-
// weird...
371
-
log::warn!("record lookup: cursor match but rev did not...? excluding.");
372
-
return Ok(None);
373
-
}
374
-
let Some(raw_value_bytes) = location_val_bytes.get(n..) else {
375
-
log::warn!(
376
-
"record lookup: found record but could not get bytes to decode the record??"
377
-
);
378
-
return Ok(None);
379
-
};
380
-
let rawval = db_complete::<RecordRawValue>(raw_value_bytes)?;
381
-
Ok(Some(UFOsRecord {
382
-
collection: feed_key.collection().clone(),
383
-
cursor: feed_key.cursor(),
384
-
did: feed_val.did().clone(),
385
-
rkey: feed_val.rkey().clone(),
386
-
rev: meta.rev.to_string(),
387
-
record: rawval.try_into()?,
388
-
is_update: meta.is_update,
389
-
}))
390
-
}
391
-
}
392
-
impl Iterator for RecordIterator {
393
-
type Item = StorageResult<Option<UFOsRecord>>;
394
-
fn next(&mut self) -> Option<Self::Item> {
395
-
if self.fetched == self.limit {
396
-
return Some(Ok(None));
397
-
}
398
-
let record = loop {
399
-
let db_next = self.db_iter.next()?; // None short-circuits here
400
-
match self.get_record(db_next) {
401
-
Err(e) => return Some(Err(e)),
402
-
Ok(Some(record)) => break record,
403
-
Ok(None) => continue,
404
-
}
405
-
};
406
-
self.fetched += 1;
407
-
Some(Ok(Some(record)))
408
-
}
409
-
}
410
-
411
-
impl MemReader {
412
-
fn get_storage_stats(&self) -> StorageResult<serde_json::Value> {
413
-
let rollup_cursor =
414
-
get_static_neu::<NewRollupCursorKey, NewRollupCursorValue>(&self.global)?
415
-
.map(|c| c.to_raw_u64());
416
-
417
-
Ok(serde_json::json!({
418
-
"rollup_cursor": rollup_cursor,
419
-
}))
420
-
}
421
-
422
-
fn get_consumer_info(&self) -> StorageResult<ConsumerInfo> {
423
-
let global = self.global.snapshot();
424
-
425
-
let endpoint =
426
-
get_snapshot_static_neu::<JetstreamEndpointKey, JetstreamEndpointValue>(&global)?
427
-
.ok_or(StorageError::BadStateError(
428
-
"Could not find jetstream endpoint".to_string(),
429
-
))?
430
-
.0;
431
-
432
-
let started_at = get_snapshot_static_neu::<TakeoffKey, TakeoffValue>(&global)?
433
-
.ok_or(StorageError::BadStateError(
434
-
"Could not find jetstream takeoff time".to_string(),
435
-
))?
436
-
.to_raw_u64();
437
-
438
-
let latest_cursor =
439
-
get_snapshot_static_neu::<JetstreamCursorKey, JetstreamCursorValue>(&global)?
440
-
.map(|c| c.to_raw_u64());
441
-
442
-
Ok(ConsumerInfo::Jetstream {
443
-
endpoint,
444
-
started_at,
445
-
latest_cursor,
446
-
})
447
-
}
448
-
449
-
fn get_top_collections(&self) -> Result<TopCollections, StorageError> {
450
-
// TODO: limit nsid traversal depth
451
-
// TODO: limit nsid traversal breadth
452
-
// TODO: be serious about anything
453
-
454
-
// TODO: probably use a stack of segments to reduce to ~log-n merges
455
-
456
-
#[derive(Default)]
457
-
struct Blah {
458
-
counts: CountsValue,
459
-
children: HashMap<String, Blah>,
460
-
}
461
-
impl From<&Blah> for TopCollections {
462
-
fn from(bla: &Blah) -> Self {
463
-
Self {
464
-
total_records: bla.counts.records(),
465
-
dids_estimate: bla.counts.dids().estimate() as u64,
466
-
nsid_child_segments: HashMap::from_iter(
467
-
bla.children.iter().map(|(k, v)| (k.to_string(), v.into())),
468
-
),
469
-
}
470
-
}
471
-
}
472
-
473
-
let mut b = Blah::default();
474
-
let prefix = AllTimeRollupKey::from_prefix_to_db_bytes(&Default::default())?;
475
-
for kv in self.rollups.prefix(&prefix.to_db_bytes()?) {
476
-
let (key_bytes, val_bytes) = kv?;
477
-
let key = db_complete::<AllTimeRollupKey>(&key_bytes)?;
478
-
let val = db_complete::<CountsValue>(&val_bytes)?;
479
-
480
-
let mut node = &mut b;
481
-
node.counts.merge(&val);
482
-
for segment in key.collection().split('.') {
483
-
node = node.children.entry(segment.to_string()).or_default();
484
-
node.counts.merge(&val);
485
-
}
486
-
}
487
-
488
-
Ok((&b).into())
489
-
}
490
-
491
-
fn get_counts_by_collection(&self, collection: &Nsid) -> StorageResult<(u64, u64)> {
492
-
// 0. grab a snapshot in case rollups happen while we're working
493
-
let instant = self.keyspace.instant();
494
-
let global = self.global.snapshot_at(instant);
495
-
let rollups = self.rollups.snapshot_at(instant);
496
-
497
-
// 1. all-time counts
498
-
let all_time_key = AllTimeRollupKey::new(collection).to_db_bytes()?;
499
-
let mut total_counts = rollups
500
-
.get(&all_time_key)?
501
-
.as_deref()
502
-
.map(db_complete::<CountsValue>)
503
-
.transpose()?
504
-
.unwrap_or_default();
505
-
506
-
// 2. live counts that haven't been rolled into all-time yet.
507
-
let rollup_cursor =
508
-
get_snapshot_static_neu::<NewRollupCursorKey, NewRollupCursorValue>(&global)?.ok_or(
509
-
StorageError::BadStateError("Could not find current rollup cursor".to_string()),
510
-
)?;
511
-
512
-
let full_range = LiveCountsKey::range_from_cursor(rollup_cursor)?;
513
-
for kv in rollups.range(full_range) {
514
-
let (key_bytes, val_bytes) = kv?;
515
-
let key = db_complete::<LiveCountsKey>(&key_bytes)?;
516
-
if key.collection() == collection {
517
-
let counts = db_complete::<CountsValue>(&val_bytes)?;
518
-
total_counts.merge(&counts);
519
-
}
520
-
}
521
-
Ok((
522
-
total_counts.records(),
523
-
total_counts.dids().estimate() as u64,
524
-
))
525
-
}
526
-
527
-
fn get_records_by_collections(
528
-
&self,
529
-
collections: &[Nsid],
530
-
limit: usize,
531
-
_expand_each_collection: bool,
532
-
) -> StorageResult<Vec<UFOsRecord>> {
533
-
if collections.is_empty() {
534
-
return Ok(vec![]);
535
-
}
536
-
let mut record_iterators = Vec::new();
537
-
for collection in collections {
538
-
let iter = RecordIterator::new(&self.feeds, self.records.clone(), collection, limit)?;
539
-
record_iterators.push(iter.peekable());
540
-
}
541
-
let mut merged = Vec::new();
542
-
loop {
543
-
let mut latest: Option<(Cursor, usize)> = None; // ugh
544
-
for (i, iter) in record_iterators.iter_mut().enumerate() {
545
-
let Some(it) = iter.peek_mut() else {
546
-
continue;
547
-
};
548
-
let it = match it {
549
-
Ok(v) => v,
550
-
Err(e) => Err(std::mem::replace(e, StorageError::Stolen))?,
551
-
};
552
-
let Some(rec) = it else {
553
-
break;
554
-
};
555
-
if let Some((cursor, _)) = latest {
556
-
if rec.cursor > cursor {
557
-
latest = Some((rec.cursor, i))
558
-
}
559
-
} else {
560
-
latest = Some((rec.cursor, i));
561
-
}
562
-
}
563
-
let Some((_, idx)) = latest else {
564
-
break;
565
-
};
566
-
// yeah yeah whateverrrrrrrrrrrrrrrr
567
-
merged.push(record_iterators[idx].next().unwrap().unwrap().unwrap());
568
-
}
569
-
Ok(merged)
570
-
}
571
-
}
572
-
573
-
#[async_trait]
574
-
impl StoreReader for MemReader {
575
-
fn name(&self) -> String {
576
-
"in-memory store".into()
577
-
}
578
-
async fn get_storage_stats(&self) -> StorageResult<serde_json::Value> {
579
-
let s = self.clone();
580
-
tokio::task::spawn_blocking(move || MemReader::get_storage_stats(&s)).await?
581
-
}
582
-
async fn get_consumer_info(&self) -> StorageResult<ConsumerInfo> {
583
-
let s = self.clone();
584
-
tokio::task::spawn_blocking(move || MemReader::get_consumer_info(&s)).await?
585
-
}
586
-
async fn get_top_collections(&self) -> Result<TopCollections, StorageError> {
587
-
let s = self.clone();
588
-
tokio::task::spawn_blocking(move || MemReader::get_top_collections(&s)).await?
589
-
}
590
-
async fn get_counts_by_collection(&self, collection: &Nsid) -> StorageResult<(u64, u64)> {
591
-
let s = self.clone();
592
-
let collection = collection.clone();
593
-
tokio::task::spawn_blocking(move || MemReader::get_counts_by_collection(&s, &collection))
594
-
.await?
595
-
}
596
-
async fn get_records_by_collections(
597
-
&self,
598
-
collections: &[Nsid],
599
-
limit: usize,
600
-
expand_each_collection: bool,
601
-
) -> StorageResult<Vec<UFOsRecord>> {
602
-
let s = self.clone();
603
-
let collections = collections.to_vec();
604
-
tokio::task::spawn_blocking(move || {
605
-
MemReader::get_records_by_collections(&s, &collections, limit, expand_each_collection)
606
-
})
607
-
.await?
608
-
}
609
-
}
610
-
611
-
pub struct MemWriter {
612
-
keyspace: MemKeyspace,
613
-
global: MemPartion,
614
-
feeds: MemPartion,
615
-
records: MemPartion,
616
-
rollups: MemPartion,
617
-
queues: MemPartion,
618
-
}
619
-
620
-
impl MemWriter {
621
-
fn rollup_delete_account(
622
-
&mut self,
623
-
cursor: Cursor,
624
-
key_bytes: &[u8],
625
-
val_bytes: &[u8],
626
-
) -> StorageResult<usize> {
627
-
let did = db_complete::<DeleteAccountQueueVal>(val_bytes)?;
628
-
self.delete_account(&did)?;
629
-
let mut batch = self.keyspace.batch();
630
-
batch.remove(&self.queues, key_bytes);
631
-
insert_batch_static_neu::<NewRollupCursorKey>(&mut batch, &self.global, cursor)?;
632
-
batch.commit()?;
633
-
Ok(1)
634
-
}
635
-
636
-
fn rollup_live_counts(
637
-
&mut self,
638
-
timelies: impl Iterator<Item = Result<(Vec<u8>, Vec<u8>), StorageError>>,
639
-
cursor_exclusive_limit: Option<Cursor>,
640
-
rollup_limit: usize,
641
-
) -> StorageResult<usize> {
642
-
// current strategy is to buffer counts in mem before writing the rollups
643
-
// we *could* read+write every single batch to rollup.. but their merge is associative so
644
-
// ...so save the db some work up front? is this worth it? who knows...
645
-
646
-
log::warn!("sup!!!");
647
-
648
-
#[derive(Eq, Hash, PartialEq)]
649
-
enum Rollup {
650
-
Hourly(HourTruncatedCursor),
651
-
Weekly(WeekTruncatedCursor),
652
-
AllTime,
653
-
}
654
-
655
-
let mut batch = self.keyspace.batch();
656
-
let mut cursors_advanced = 0;
657
-
let mut last_cursor = Cursor::from_start();
658
-
let mut counts_by_rollup: HashMap<(Nsid, Rollup), CountsValue> = HashMap::new();
659
-
660
-
log::warn!("about to loop....");
661
-
for (i, kv) in timelies.enumerate() {
662
-
log::warn!("loop {i} {kv:?}...");
663
-
if i >= rollup_limit {
664
-
break;
665
-
}
666
-
667
-
let (key_bytes, val_bytes) = kv?;
668
-
let key = db_complete::<LiveCountsKey>(&key_bytes)
669
-
.inspect_err(|e| log::warn!("rlc: key: {e:?}"))?;
670
-
671
-
if cursor_exclusive_limit
672
-
.map(|limit| key.cursor() > limit)
673
-
.unwrap_or(false)
674
-
{
675
-
break;
676
-
}
677
-
678
-
batch.remove(&self.rollups, &key_bytes);
679
-
let val = db_complete::<CountsValue>(&val_bytes)
680
-
.inspect_err(|e| log::warn!("rlc: val: {e:?}"))?;
681
-
counts_by_rollup
682
-
.entry((
683
-
key.collection().clone(),
684
-
Rollup::Hourly(key.cursor().into()),
685
-
))
686
-
.or_default()
687
-
.merge(&val);
688
-
counts_by_rollup
689
-
.entry((
690
-
key.collection().clone(),
691
-
Rollup::Weekly(key.cursor().into()),
692
-
))
693
-
.or_default()
694
-
.merge(&val);
695
-
counts_by_rollup
696
-
.entry((key.collection().clone(), Rollup::AllTime))
697
-
.or_default()
698
-
.merge(&val);
699
-
700
-
cursors_advanced += 1;
701
-
last_cursor = key.cursor();
702
-
}
703
-
log::warn!("done looping. looping cbr counts(?)..");
704
-
705
-
for ((nsid, rollup), counts) in counts_by_rollup {
706
-
log::warn!(
707
-
"######################## cbr loop {nsid:?} {counts:?} ########################"
708
-
);
709
-
let key_bytes = match rollup {
710
-
Rollup::Hourly(hourly_cursor) => {
711
-
let k = HourlyRollupKey::new(hourly_cursor, &nsid);
712
-
log::info!("hrly k: {k:?}");
713
-
k.to_db_bytes()?
714
-
}
715
-
Rollup::Weekly(weekly_cursor) => {
716
-
let k = WeeklyRollupKey::new(weekly_cursor, &nsid);
717
-
log::info!("weekly k: {k:?}");
718
-
k.to_db_bytes()?
719
-
}
720
-
Rollup::AllTime => {
721
-
let k = AllTimeRollupKey::new(&nsid);
722
-
log::info!("alltime k: {k:?}");
723
-
k.to_db_bytes()?
724
-
}
725
-
};
726
-
// log::info!("key bytes: {key_bytes:?}");
727
-
let mut rolled: CountsValue = self
728
-
.rollups
729
-
.get(&key_bytes)?
730
-
.inspect(|v| {
731
-
let lax = CountsValue::from_db_bytes(v);
732
-
log::info!(
733
-
"val: len={}, lax={lax:?} first32={:?}",
734
-
v.len(),
735
-
v.get(..32)
736
-
);
737
-
})
738
-
.as_deref()
739
-
.map(db_complete::<CountsValue>)
740
-
.transpose()
741
-
.inspect_err(|e| log::warn!("oooh did we break on the rolled thing? {e:?}"))?
742
-
.unwrap_or_default();
743
-
744
-
// try to round-trip before inserting, for funsies
745
-
let tripppin = counts.to_db_bytes()?;
746
-
let (and_back, n) = CountsValue::from_db_bytes(&tripppin)?;
747
-
assert_eq!(n, tripppin.len());
748
-
assert_eq!(counts.prefix, and_back.prefix);
749
-
assert_eq!(counts.dids().estimate(), and_back.dids().estimate());
750
-
if counts.records() > 20000000 {
751
-
panic!("COUNTS maybe wtf? {counts:?}")
752
-
}
753
-
// assert_eq!(rolled, and_back);
754
-
755
-
rolled.merge(&counts);
756
-
757
-
// try to round-trip before inserting, for funsies
758
-
let tripppin = rolled.to_db_bytes()?;
759
-
let (and_back, n) = CountsValue::from_db_bytes(&tripppin)?;
760
-
assert_eq!(n, tripppin.len());
761
-
assert_eq!(rolled.prefix, and_back.prefix);
762
-
assert_eq!(rolled.dids().estimate(), and_back.dids().estimate());
763
-
if rolled.records() > 20000000 {
764
-
panic!("maybe wtf? {rolled:?}")
765
-
}
766
-
// assert_eq!(rolled, and_back);
767
-
768
-
batch.insert(&self.rollups, &key_bytes, &rolled.to_db_bytes()?);
769
-
}
770
-
771
-
log::warn!("done cbr loop.");
772
-
773
-
insert_batch_static_neu::<NewRollupCursorKey>(&mut batch, &self.global, last_cursor)
774
-
.inspect_err(|e| log::warn!("insert neu: {e:?}"))?;
775
-
776
-
batch.commit()?;
777
-
778
-
log::warn!("ok finished rlc stuff. huh.");
779
-
Ok(cursors_advanced)
780
-
}
781
-
}
782
-
783
-
impl StoreWriter for MemWriter {
784
-
fn insert_batch<const LIMIT: usize>(
785
-
&mut self,
786
-
event_batch: EventBatch<LIMIT>,
787
-
) -> StorageResult<()> {
788
-
if event_batch.is_empty() {
789
-
return Ok(());
790
-
}
791
-
792
-
let mut batch = self.keyspace.batch();
793
-
794
-
// would be nice not to have to iterate everything at once here
795
-
let latest = event_batch.latest_cursor().unwrap();
796
-
797
-
for (nsid, commits) in event_batch.commits_by_nsid {
798
-
for commit in commits.commits {
799
-
let location_key: RecordLocationKey = (&commit, &nsid).into();
800
-
801
-
match commit.action {
802
-
CommitAction::Cut => {
803
-
batch.remove(&self.records, &location_key.to_db_bytes()?);
804
-
}
805
-
CommitAction::Put(put_action) => {
806
-
let feed_key = NsidRecordFeedKey::from_pair(nsid.clone(), commit.cursor);
807
-
let feed_val: NsidRecordFeedVal =
808
-
(&commit.did, &commit.rkey, commit.rev.as_str()).into();
809
-
batch.insert(
810
-
&self.feeds,
811
-
&feed_key.to_db_bytes()?,
812
-
&feed_val.to_db_bytes()?,
813
-
);
814
-
815
-
let location_val: RecordLocationVal =
816
-
(commit.cursor, commit.rev.as_str(), put_action).into();
817
-
batch.insert(
818
-
&self.records,
819
-
&location_key.to_db_bytes()?,
820
-
&location_val.to_db_bytes()?,
821
-
);
822
-
}
823
-
}
824
-
}
825
-
let live_counts_key: LiveCountsKey = (latest, &nsid).into();
826
-
let counts_value = CountsValue::new(commits.total_seen as u64, commits.dids_estimate);
827
-
batch.insert(
828
-
&self.rollups,
829
-
&live_counts_key.to_db_bytes()?,
830
-
&counts_value.to_db_bytes()?,
831
-
);
832
-
}
833
-
834
-
for remove in event_batch.account_removes {
835
-
let queue_key = DeleteAccountQueueKey::new(remove.cursor);
836
-
let queue_val: DeleteAccountQueueVal = remove.did;
837
-
batch.insert(
838
-
&self.queues,
839
-
&queue_key.to_db_bytes()?,
840
-
&queue_val.to_db_bytes()?,
841
-
);
842
-
}
843
-
844
-
batch.insert(
845
-
&self.global,
846
-
&DbStaticStr::<JetstreamCursorKey>::default().to_db_bytes()?,
847
-
&latest.to_db_bytes()?,
848
-
);
849
-
850
-
batch.commit()?;
851
-
Ok(())
852
-
}
853
-
854
-
fn step_rollup(&mut self) -> StorageResult<usize> {
855
-
let rollup_cursor =
856
-
get_static_neu::<NewRollupCursorKey, NewRollupCursorValue>(&self.global)?
857
-
.ok_or(StorageError::BadStateError(
858
-
"Could not find current rollup cursor".to_string(),
859
-
))
860
-
.inspect_err(|e| log::warn!("failed getting rollup cursor: {e:?}"))?;
861
-
862
-
// timelies
863
-
let live_counts_range = LiveCountsKey::range_from_cursor(rollup_cursor)
864
-
.inspect_err(|e| log::warn!("live counts range: {e:?}"))?;
865
-
let mut timely_iter = self.rollups.range(live_counts_range).into_iter().peekable();
866
-
867
-
let timely_next_cursor = timely_iter
868
-
.peek_mut()
869
-
.map(|kv| -> StorageResult<Cursor> {
870
-
match kv {
871
-
Err(e) => Err(std::mem::replace(e, StorageError::Stolen))?,
872
-
Ok((key_bytes, _)) => {
873
-
let key = db_complete::<LiveCountsKey>(key_bytes).inspect_err(|e| {
874
-
log::warn!("failed getting key for next timely: {e:?}")
875
-
})?;
876
-
Ok(key.cursor())
877
-
}
878
-
}
879
-
})
880
-
.transpose()
881
-
.inspect_err(|e| log::warn!("something about timely: {e:?}"))?;
882
-
883
-
// delete accounts
884
-
let delete_accounts_range =
885
-
DeleteAccountQueueKey::new(rollup_cursor).range_to_prefix_end()?;
886
-
887
-
let next_delete = self
888
-
.queues
889
-
.range(delete_accounts_range)
890
-
.into_iter()
891
-
.next()
892
-
.transpose()
893
-
.inspect_err(|e| log::warn!("range for next delete: {e:?}"))?
894
-
.map(|(key_bytes, val_bytes)| {
895
-
db_complete::<DeleteAccountQueueKey>(&key_bytes)
896
-
.inspect_err(|e| log::warn!("failed inside next delete thing????: {e:?}"))
897
-
.map(|k| (k.suffix, key_bytes, val_bytes))
898
-
})
899
-
.transpose()
900
-
.inspect_err(|e| log::warn!("failed getting next delete: {e:?}"))?;
901
-
902
-
let cursors_stepped = match (timely_next_cursor, next_delete) {
903
-
(
904
-
Some(timely_next_cursor),
905
-
Some((delete_cursor, delete_key_bytes, delete_val_bytes)),
906
-
) => {
907
-
if timely_next_cursor < delete_cursor {
908
-
self.rollup_live_counts(
909
-
timely_iter,
910
-
Some(delete_cursor),
911
-
MAX_BATCHED_ROLLUP_COUNTS,
912
-
)
913
-
.inspect_err(|e| log::warn!("rolling up live counts: {e:?}"))?
914
-
} else {
915
-
self.rollup_delete_account(delete_cursor, &delete_key_bytes, &delete_val_bytes)
916
-
.inspect_err(|e| log::warn!("deleting acocunt: {e:?}"))?
917
-
}
918
-
}
919
-
(Some(_), None) => self
920
-
.rollup_live_counts(timely_iter, None, MAX_BATCHED_ROLLUP_COUNTS)
921
-
.inspect_err(|e| log::warn!("rolling up (lasjdflkajs): {e:?}"))?,
922
-
(None, Some((delete_cursor, delete_key_bytes, delete_val_bytes))) => self
923
-
.rollup_delete_account(delete_cursor, &delete_key_bytes, &delete_val_bytes)
924
-
.inspect_err(|e| log::warn!("deleting acocunt other branch: {e:?}"))?,
925
-
(None, None) => 0,
926
-
};
927
-
928
-
Ok(cursors_stepped)
929
-
}
930
-
931
-
fn trim_collection(
932
-
&mut self,
933
-
collection: &Nsid,
934
-
limit: usize,
935
-
// TODO: could add a start cursor limit to avoid iterating deleted stuff at the start (/end)
936
-
) -> StorageResult<()> {
937
-
let mut dangling_feed_keys_cleaned = 0;
938
-
let mut records_deleted = 0;
939
-
940
-
let mut batch = self.keyspace.batch();
941
-
942
-
let prefix = NsidRecordFeedKey::from_prefix_to_db_bytes(collection)?;
943
-
let mut found = 0;
944
-
for kv in self.feeds.prefix(&prefix).into_iter().rev() {
945
-
let (key_bytes, val_bytes) = kv?;
946
-
let feed_key = db_complete::<NsidRecordFeedKey>(&key_bytes)?;
947
-
let feed_val = db_complete::<NsidRecordFeedVal>(&val_bytes)?;
948
-
let location_key: RecordLocationKey = (&feed_key, &feed_val).into();
949
-
let location_key_bytes = location_key.to_db_bytes()?;
950
-
951
-
let Some(location_val_bytes) = self.records.get(&location_key_bytes)? else {
952
-
// record was deleted (hopefully)
953
-
batch.remove(&self.feeds, &location_key_bytes);
954
-
dangling_feed_keys_cleaned += 1;
955
-
continue;
956
-
};
957
-
958
-
let (meta, _) = RecordLocationMeta::from_db_bytes(&location_val_bytes)?;
959
-
960
-
if meta.cursor() != feed_key.cursor() {
961
-
// older/different version
962
-
batch.remove(&self.feeds, &location_key_bytes);
963
-
dangling_feed_keys_cleaned += 1;
964
-
continue;
965
-
}
966
-
if meta.rev != feed_val.rev() {
967
-
// weird...
968
-
log::warn!("record lookup: cursor match but rev did not...? removing.");
969
-
batch.remove(&self.feeds, &location_key_bytes);
970
-
dangling_feed_keys_cleaned += 1;
971
-
continue;
972
-
}
973
-
974
-
if batch.len() >= MAX_BATCHED_CLEANUP_SIZE {
975
-
batch.commit()?;
976
-
batch = self.keyspace.batch();
977
-
}
978
-
979
-
found += 1;
980
-
if found <= limit {
981
-
continue;
982
-
}
983
-
984
-
batch.remove(&self.feeds, &location_key_bytes);
985
-
batch.remove(&self.records, &location_key_bytes);
986
-
records_deleted += 1;
987
-
}
988
-
989
-
batch.commit()?;
990
-
991
-
log::info!("trim_collection ({collection:?}) removed {dangling_feed_keys_cleaned} dangling feed entries and {records_deleted} records");
992
-
Ok(())
993
-
}
994
-
995
-
fn delete_account(&mut self, did: &Did) -> Result<usize, StorageError> {
996
-
let mut records_deleted = 0;
997
-
let mut batch = self.keyspace.batch();
998
-
let prefix = RecordLocationKey::from_prefix_to_db_bytes(did)?;
999
-
for kv in self.records.prefix(&prefix) {
1000
-
let (key_bytes, _) = kv?;
1001
-
batch.remove(&self.records, &key_bytes);
1002
-
records_deleted += 1;
1003
-
if batch.len() >= MAX_BATCHED_ACCOUNT_DELETE_RECORDS {
1004
-
batch.commit()?;
1005
-
batch = self.keyspace.batch();
1006
-
}
1007
-
}
1008
-
batch.commit()?;
1009
-
Ok(records_deleted)
1010
-
}
1011
-
}
1012
-
1013
-
/// Get a value from a fixed key
1014
-
fn get_static_neu<K: StaticStr, V: DbBytes>(global: &MemPartion) -> StorageResult<Option<V>> {
1015
-
let key_bytes = DbStaticStr::<K>::default().to_db_bytes()?;
1016
-
let value = global
1017
-
.get(&key_bytes)?
1018
-
.map(|value_bytes| db_complete(&value_bytes))
1019
-
.transpose()?;
1020
-
Ok(value)
1021
-
}
1022
-
1023
-
/// Get a value from a fixed key
1024
-
fn get_snapshot_static_neu<K: StaticStr, V: DbBytes>(
1025
-
global: &MemPartion,
1026
-
) -> StorageResult<Option<V>> {
1027
-
let key_bytes = DbStaticStr::<K>::default().to_db_bytes()?;
1028
-
let value = global
1029
-
.get(&key_bytes)?
1030
-
.map(|value_bytes| db_complete(&value_bytes))
1031
-
.transpose()?;
1032
-
Ok(value)
1033
-
}
1034
-
1035
-
/// Set a value to a fixed key
1036
-
fn insert_static_neu<K: StaticStr>(global: &MemPartion, value: impl DbBytes) -> StorageResult<()> {
1037
-
let key_bytes = DbStaticStr::<K>::default().to_db_bytes()?;
1038
-
let value_bytes = value.to_db_bytes()?;
1039
-
global.insert(&key_bytes, &value_bytes)?;
1040
-
Ok(())
1041
-
}
1042
-
1043
-
/// Set a value to a fixed key
1044
-
fn insert_batch_static_neu<K: StaticStr>(
1045
-
batch: &mut MemBatch,
1046
-
global: &MemPartion,
1047
-
value: impl DbBytes,
1048
-
) -> StorageResult<()> {
1049
-
let key_bytes = DbStaticStr::<K>::default().to_db_bytes()?;
1050
-
let value_bytes = value.to_db_bytes()?;
1051
-
batch.insert(global, &key_bytes, &value_bytes);
1052
-
Ok(())
1053
-
}
1054
-
1055
-
#[derive(Debug, serde::Serialize, schemars::JsonSchema)]
1056
-
pub struct StorageInfo {
1057
-
pub keyspace_disk_space: u64,
1058
-
pub keyspace_journal_count: usize,
1059
-
pub keyspace_sequence: u64,
1060
-
pub global_approximate_len: usize,
1061
-
}
1062
-
1063
-
#[cfg(test)]
1064
-
mod tests {
1065
-
use super::*;
1066
-
use crate::{DeleteAccount, RecordKey, UFOsCommit};
1067
-
use jetstream::events::{CommitEvent, CommitOp};
1068
-
use jetstream::exports::Cid;
1069
-
use serde_json::value::RawValue;
1070
-
1071
-
fn fjall_db() -> (MemReader, MemWriter) {
1072
-
let (read, write, _) = MemStorage::init(
1073
-
tempfile::tempdir().unwrap(),
1074
-
"offline test (no real jetstream endpoint)".to_string(),
1075
-
false,
1076
-
MemConfig { temp: true },
1077
-
)
1078
-
.unwrap();
1079
-
(read, write)
1080
-
}
1081
-
1082
-
const TEST_BATCH_LIMIT: usize = 16;
1083
-
1084
-
#[derive(Debug, Default)]
1085
-
struct TestBatch {
1086
-
pub batch: EventBatch<TEST_BATCH_LIMIT>,
1087
-
}
1088
-
1089
-
impl TestBatch {
1090
-
#[allow(clippy::too_many_arguments)]
1091
-
pub fn create(
1092
-
&mut self,
1093
-
did: &str,
1094
-
collection: &str,
1095
-
rkey: &str,
1096
-
record: &str,
1097
-
rev: Option<&str>,
1098
-
cid: Option<Cid>,
1099
-
cursor: u64,
1100
-
) -> Nsid {
1101
-
let did = Did::new(did.to_string()).unwrap();
1102
-
let collection = Nsid::new(collection.to_string()).unwrap();
1103
-
let record = RawValue::from_string(record.to_string()).unwrap();
1104
-
let cid = cid.unwrap_or(
1105
-
"bafyreidofvwoqvd2cnzbun6dkzgfucxh57tirf3ohhde7lsvh4fu3jehgy"
1106
-
.parse()
1107
-
.unwrap(),
1108
-
);
1109
-
1110
-
let event = CommitEvent {
1111
-
collection,
1112
-
rkey: RecordKey::new(rkey.to_string()).unwrap(),
1113
-
rev: rev.unwrap_or("asdf").to_string(),
1114
-
operation: CommitOp::Create,
1115
-
record: Some(record),
1116
-
cid: Some(cid),
1117
-
};
1118
-
1119
-
let (commit, collection) =
1120
-
UFOsCommit::from_commit_info(event, did.clone(), Cursor::from_raw_u64(cursor))
1121
-
.unwrap();
1122
-
1123
-
self.batch
1124
-
.commits_by_nsid
1125
-
.entry(collection.clone())
1126
-
.or_default()
1127
-
.truncating_insert(commit)
1128
-
.unwrap();
1129
-
1130
-
collection
1131
-
}
1132
-
#[allow(clippy::too_many_arguments)]
1133
-
pub fn update(
1134
-
&mut self,
1135
-
did: &str,
1136
-
collection: &str,
1137
-
rkey: &str,
1138
-
record: &str,
1139
-
rev: Option<&str>,
1140
-
cid: Option<Cid>,
1141
-
cursor: u64,
1142
-
) -> Nsid {
1143
-
let did = Did::new(did.to_string()).unwrap();
1144
-
let collection = Nsid::new(collection.to_string()).unwrap();
1145
-
let record = RawValue::from_string(record.to_string()).unwrap();
1146
-
let cid = cid.unwrap_or(
1147
-
"bafyreidofvwoqvd2cnzbun6dkzgfucxh57tirf3ohhde7lsvh4fu3jehgy"
1148
-
.parse()
1149
-
.unwrap(),
1150
-
);
1151
-
1152
-
let event = CommitEvent {
1153
-
collection,
1154
-
rkey: RecordKey::new(rkey.to_string()).unwrap(),
1155
-
rev: rev.unwrap_or("asdf").to_string(),
1156
-
operation: CommitOp::Update,
1157
-
record: Some(record),
1158
-
cid: Some(cid),
1159
-
};
1160
-
1161
-
let (commit, collection) =
1162
-
UFOsCommit::from_commit_info(event, did.clone(), Cursor::from_raw_u64(cursor))
1163
-
.unwrap();
1164
-
1165
-
self.batch
1166
-
.commits_by_nsid
1167
-
.entry(collection.clone())
1168
-
.or_default()
1169
-
.truncating_insert(commit)
1170
-
.unwrap();
1171
-
1172
-
collection
1173
-
}
1174
-
#[allow(clippy::too_many_arguments)]
1175
-
pub fn delete(
1176
-
&mut self,
1177
-
did: &str,
1178
-
collection: &str,
1179
-
rkey: &str,
1180
-
rev: Option<&str>,
1181
-
cursor: u64,
1182
-
) -> Nsid {
1183
-
let did = Did::new(did.to_string()).unwrap();
1184
-
let collection = Nsid::new(collection.to_string()).unwrap();
1185
-
let event = CommitEvent {
1186
-
collection,
1187
-
rkey: RecordKey::new(rkey.to_string()).unwrap(),
1188
-
rev: rev.unwrap_or("asdf").to_string(),
1189
-
operation: CommitOp::Delete,
1190
-
record: None,
1191
-
cid: None,
1192
-
};
1193
-
1194
-
let (commit, collection) =
1195
-
UFOsCommit::from_commit_info(event, did, Cursor::from_raw_u64(cursor)).unwrap();
1196
-
1197
-
self.batch
1198
-
.commits_by_nsid
1199
-
.entry(collection.clone())
1200
-
.or_default()
1201
-
.truncating_insert(commit)
1202
-
.unwrap();
1203
-
1204
-
collection
1205
-
}
1206
-
pub fn delete_account(&mut self, did: &str, cursor: u64) -> Did {
1207
-
let did = Did::new(did.to_string()).unwrap();
1208
-
self.batch.account_removes.push(DeleteAccount {
1209
-
did: did.clone(),
1210
-
cursor: Cursor::from_raw_u64(cursor),
1211
-
});
1212
-
did
1213
-
}
1214
-
}
1215
-
1216
-
#[test]
1217
-
fn test_hello() -> anyhow::Result<()> {
1218
-
let (read, mut write) = fjall_db();
1219
-
write.insert_batch::<TEST_BATCH_LIMIT>(EventBatch::default())?;
1220
-
let (records, dids) =
1221
-
read.get_counts_by_collection(&Nsid::new("a.b.c".to_string()).unwrap())?;
1222
-
assert_eq!(records, 0);
1223
-
assert_eq!(dids, 0);
1224
-
Ok(())
1225
-
}
1226
-
1227
-
#[test]
1228
-
fn test_insert_one() -> anyhow::Result<()> {
1229
-
let (read, mut write) = fjall_db();
1230
-
1231
-
let mut batch = TestBatch::default();
1232
-
let collection = batch.create(
1233
-
"did:plc:inze6wrmsm7pjl7yta3oig77",
1234
-
"a.b.c",
1235
-
"asdf",
1236
-
"{}",
1237
-
Some("rev-z"),
1238
-
None,
1239
-
100,
1240
-
);
1241
-
write.insert_batch(batch.batch)?;
1242
-
1243
-
let (records, dids) = read.get_counts_by_collection(&collection)?;
1244
-
assert_eq!(records, 1);
1245
-
assert_eq!(dids, 1);
1246
-
let (records, dids) =
1247
-
read.get_counts_by_collection(&Nsid::new("d.e.f".to_string()).unwrap())?;
1248
-
assert_eq!(records, 0);
1249
-
assert_eq!(dids, 0);
1250
-
1251
-
let records = read.get_records_by_collections(&[collection], 2, false)?;
1252
-
assert_eq!(records.len(), 1);
1253
-
let rec = &records[0];
1254
-
assert_eq!(rec.record.get(), "{}");
1255
-
assert!(!rec.is_update);
1256
-
1257
-
let records =
1258
-
read.get_records_by_collections(&[Nsid::new("d.e.f".to_string()).unwrap()], 2, false)?;
1259
-
assert_eq!(records.len(), 0);
1260
-
1261
-
Ok(())
1262
-
}
1263
-
1264
-
#[test]
1265
-
fn test_get_multi_collection() -> anyhow::Result<()> {
1266
-
let (read, mut write) = fjall_db();
1267
-
1268
-
let mut batch = TestBatch::default();
1269
-
batch.create(
1270
-
"did:plc:inze6wrmsm7pjl7yta3oig77",
1271
-
"a.a.a",
1272
-
"aaa",
1273
-
r#""earliest""#,
1274
-
Some("rev-a"),
1275
-
None,
1276
-
100,
1277
-
);
1278
-
batch.create(
1279
-
"did:plc:inze6wrmsm7pjl7yta3oig77",
1280
-
"a.a.b",
1281
-
"aab",
1282
-
r#""in between""#,
1283
-
Some("rev-ab"),
1284
-
None,
1285
-
101,
1286
-
);
1287
-
batch.create(
1288
-
"did:plc:inze6wrmsm7pjl7yta3oig77",
1289
-
"a.a.a",
1290
-
"aaa-2",
1291
-
r#""last""#,
1292
-
Some("rev-a-2"),
1293
-
None,
1294
-
102,
1295
-
);
1296
-
write.insert_batch(batch.batch)?;
1297
-
1298
-
let records = read.get_records_by_collections(
1299
-
&[
1300
-
Nsid::new("a.a.a".to_string()).unwrap(),
1301
-
Nsid::new("a.a.b".to_string()).unwrap(),
1302
-
Nsid::new("a.a.c".to_string()).unwrap(),
1303
-
],
1304
-
100,
1305
-
false,
1306
-
)?;
1307
-
assert_eq!(records.len(), 3);
1308
-
assert_eq!(records[0].record.get(), r#""last""#);
1309
-
assert_eq!(
1310
-
records[0].collection,
1311
-
Nsid::new("a.a.a".to_string()).unwrap()
1312
-
);
1313
-
assert_eq!(records[1].record.get(), r#""in between""#);
1314
-
assert_eq!(
1315
-
records[1].collection,
1316
-
Nsid::new("a.a.b".to_string()).unwrap()
1317
-
);
1318
-
assert_eq!(records[2].record.get(), r#""earliest""#);
1319
-
assert_eq!(
1320
-
records[2].collection,
1321
-
Nsid::new("a.a.a".to_string()).unwrap()
1322
-
);
1323
-
1324
-
Ok(())
1325
-
}
1326
-
1327
-
#[test]
1328
-
fn test_update_one() -> anyhow::Result<()> {
1329
-
let (read, mut write) = fjall_db();
1330
-
1331
-
let mut batch = TestBatch::default();
1332
-
let collection = batch.create(
1333
-
"did:plc:inze6wrmsm7pjl7yta3oig77",
1334
-
"a.b.c",
1335
-
"rkey-asdf",
1336
-
"{}",
1337
-
Some("rev-a"),
1338
-
None,
1339
-
100,
1340
-
);
1341
-
write.insert_batch(batch.batch)?;
1342
-
1343
-
let mut batch = TestBatch::default();
1344
-
batch.update(
1345
-
"did:plc:inze6wrmsm7pjl7yta3oig77",
1346
-
"a.b.c",
1347
-
"rkey-asdf",
1348
-
r#"{"ch": "ch-ch-ch-changes"}"#,
1349
-
Some("rev-z"),
1350
-
None,
1351
-
101,
1352
-
);
1353
-
write.insert_batch(batch.batch)?;
1354
-
1355
-
let (records, dids) = read.get_counts_by_collection(&collection)?;
1356
-
assert_eq!(records, 1);
1357
-
assert_eq!(dids, 1);
1358
-
1359
-
let records = read.get_records_by_collections(&[collection], 2, false)?;
1360
-
assert_eq!(records.len(), 1);
1361
-
let rec = &records[0];
1362
-
assert_eq!(rec.record.get(), r#"{"ch": "ch-ch-ch-changes"}"#);
1363
-
assert!(rec.is_update);
1364
-
Ok(())
1365
-
}
1366
-
1367
-
#[test]
1368
-
fn test_delete_one() -> anyhow::Result<()> {
1369
-
let (read, mut write) = fjall_db();
1370
-
1371
-
let mut batch = TestBatch::default();
1372
-
let collection = batch.create(
1373
-
"did:plc:inze6wrmsm7pjl7yta3oig77",
1374
-
"a.b.c",
1375
-
"rkey-asdf",
1376
-
"{}",
1377
-
Some("rev-a"),
1378
-
None,
1379
-
100,
1380
-
);
1381
-
write.insert_batch(batch.batch)?;
1382
-
1383
-
let mut batch = TestBatch::default();
1384
-
batch.delete(
1385
-
"did:plc:inze6wrmsm7pjl7yta3oig77",
1386
-
"a.b.c",
1387
-
"rkey-asdf",
1388
-
Some("rev-z"),
1389
-
101,
1390
-
);
1391
-
write.insert_batch(batch.batch)?;
1392
-
1393
-
let (records, dids) = read.get_counts_by_collection(&collection)?;
1394
-
assert_eq!(records, 1);
1395
-
assert_eq!(dids, 1);
1396
-
1397
-
let records = read.get_records_by_collections(&[collection], 2, false)?;
1398
-
assert_eq!(records.len(), 0);
1399
-
1400
-
Ok(())
1401
-
}
1402
-
1403
-
#[test]
1404
-
fn test_collection_trim() -> anyhow::Result<()> {
1405
-
let (read, mut write) = fjall_db();
1406
-
1407
-
let mut batch = TestBatch::default();
1408
-
batch.create(
1409
-
"did:plc:inze6wrmsm7pjl7yta3oig77",
1410
-
"a.a.a",
1411
-
"rkey-aaa",
1412
-
"{}",
1413
-
Some("rev-aaa"),
1414
-
None,
1415
-
10_000,
1416
-
);
1417
-
let mut last_b_cursor;
1418
-
for i in 1..=10 {
1419
-
last_b_cursor = 11_000 + i;
1420
-
batch.create(
1421
-
&format!("did:plc:inze6wrmsm7pjl7yta3oig7{}", i % 3),
1422
-
"a.a.b",
1423
-
&format!("rkey-bbb-{i}"),
1424
-
&format!(r#"{{"n": {i}}}"#),
1425
-
Some(&format!("rev-bbb-{i}")),
1426
-
None,
1427
-
last_b_cursor,
1428
-
);
1429
-
}
1430
-
batch.create(
1431
-
"did:plc:inze6wrmsm7pjl7yta3oig77",
1432
-
"a.a.c",
1433
-
"rkey-ccc",
1434
-
"{}",
1435
-
Some("rev-ccc"),
1436
-
None,
1437
-
12_000,
1438
-
);
1439
-
1440
-
write.insert_batch(batch.batch)?;
1441
-
1442
-
let records = read.get_records_by_collections(
1443
-
&[Nsid::new("a.a.a".to_string()).unwrap()],
1444
-
100,
1445
-
false,
1446
-
)?;
1447
-
assert_eq!(records.len(), 1);
1448
-
let records = read.get_records_by_collections(
1449
-
&[Nsid::new("a.a.b".to_string()).unwrap()],
1450
-
100,
1451
-
false,
1452
-
)?;
1453
-
assert_eq!(records.len(), 10);
1454
-
let records = read.get_records_by_collections(
1455
-
&[Nsid::new("a.a.c".to_string()).unwrap()],
1456
-
100,
1457
-
false,
1458
-
)?;
1459
-
assert_eq!(records.len(), 1);
1460
-
let records = read.get_records_by_collections(
1461
-
&[Nsid::new("a.a.d".to_string()).unwrap()],
1462
-
100,
1463
-
false,
1464
-
)?;
1465
-
assert_eq!(records.len(), 0);
1466
-
1467
-
write.trim_collection(&Nsid::new("a.a.a".to_string()).unwrap(), 6)?;
1468
-
write.trim_collection(&Nsid::new("a.a.b".to_string()).unwrap(), 6)?;
1469
-
write.trim_collection(&Nsid::new("a.a.c".to_string()).unwrap(), 6)?;
1470
-
write.trim_collection(&Nsid::new("a.a.d".to_string()).unwrap(), 6)?;
1471
-
1472
-
let records = read.get_records_by_collections(
1473
-
&[Nsid::new("a.a.a".to_string()).unwrap()],
1474
-
100,
1475
-
false,
1476
-
)?;
1477
-
assert_eq!(records.len(), 1);
1478
-
let records = read.get_records_by_collections(
1479
-
&[Nsid::new("a.a.b".to_string()).unwrap()],
1480
-
100,
1481
-
false,
1482
-
)?;
1483
-
assert_eq!(records.len(), 6);
1484
-
let records = read.get_records_by_collections(
1485
-
&[Nsid::new("a.a.c".to_string()).unwrap()],
1486
-
100,
1487
-
false,
1488
-
)?;
1489
-
assert_eq!(records.len(), 1);
1490
-
let records = read.get_records_by_collections(
1491
-
&[Nsid::new("a.a.d".to_string()).unwrap()],
1492
-
100,
1493
-
false,
1494
-
)?;
1495
-
assert_eq!(records.len(), 0);
1496
-
1497
-
Ok(())
1498
-
}
1499
-
1500
-
#[test]
1501
-
fn test_delete_account() -> anyhow::Result<()> {
1502
-
let (read, mut write) = fjall_db();
1503
-
1504
-
let mut batch = TestBatch::default();
1505
-
batch.create(
1506
-
"did:plc:person-a",
1507
-
"a.a.a",
1508
-
"rkey-aaa",
1509
-
"{}",
1510
-
Some("rev-aaa"),
1511
-
None,
1512
-
10_000,
1513
-
);
1514
-
for i in 1..=2 {
1515
-
batch.create(
1516
-
"did:plc:person-b",
1517
-
"a.a.a",
1518
-
&format!("rkey-bbb-{i}"),
1519
-
&format!(r#"{{"n": {i}}}"#),
1520
-
Some(&format!("rev-bbb-{i}")),
1521
-
None,
1522
-
11_000 + i,
1523
-
);
1524
-
}
1525
-
write.insert_batch(batch.batch)?;
1526
-
1527
-
let records = read.get_records_by_collections(
1528
-
&[Nsid::new("a.a.a".to_string()).unwrap()],
1529
-
100,
1530
-
false,
1531
-
)?;
1532
-
assert_eq!(records.len(), 3);
1533
-
1534
-
let records_deleted =
1535
-
write.delete_account(&Did::new("did:plc:person-b".to_string()).unwrap())?;
1536
-
assert_eq!(records_deleted, 2);
1537
-
1538
-
let records = read.get_records_by_collections(
1539
-
&[Nsid::new("a.a.a".to_string()).unwrap()],
1540
-
100,
1541
-
false,
1542
-
)?;
1543
-
assert_eq!(records.len(), 1);
1544
-
1545
-
Ok(())
1546
-
}
1547
-
1548
-
#[test]
1549
-
fn rollup_delete_account_removes_record() -> anyhow::Result<()> {
1550
-
let (read, mut write) = fjall_db();
1551
-
1552
-
let mut batch = TestBatch::default();
1553
-
batch.create(
1554
-
"did:plc:person-a",
1555
-
"a.a.a",
1556
-
"rkey-aaa",
1557
-
"{}",
1558
-
Some("rev-aaa"),
1559
-
None,
1560
-
10_000,
1561
-
);
1562
-
write.insert_batch(batch.batch)?;
1563
-
1564
-
let mut batch = TestBatch::default();
1565
-
batch.delete_account("did:plc:person-a", 9_999); // queue it before the rollup
1566
-
write.insert_batch(batch.batch)?;
1567
-
1568
-
write.step_rollup()?;
1569
-
1570
-
let records =
1571
-
read.get_records_by_collections(&[Nsid::new("a.a.a".to_string()).unwrap()], 1, false)?;
1572
-
assert_eq!(records.len(), 0);
1573
-
1574
-
Ok(())
1575
-
}
1576
-
1577
-
#[test]
1578
-
fn rollup_delete_live_count_step() -> anyhow::Result<()> {
1579
-
let (read, mut write) = fjall_db();
1580
-
1581
-
let mut batch = TestBatch::default();
1582
-
batch.create(
1583
-
"did:plc:person-a",
1584
-
"a.a.a",
1585
-
"rkey-aaa",
1586
-
"{}",
1587
-
Some("rev-aaa"),
1588
-
None,
1589
-
10_000,
1590
-
);
1591
-
write.insert_batch(batch.batch)?;
1592
-
1593
-
let n = write.step_rollup()?;
1594
-
assert_eq!(n, 1);
1595
-
1596
-
let mut batch = TestBatch::default();
1597
-
batch.delete_account("did:plc:person-a", 10_001);
1598
-
write.insert_batch(batch.batch)?;
1599
-
1600
-
let records =
1601
-
read.get_records_by_collections(&[Nsid::new("a.a.a".to_string()).unwrap()], 1, false)?;
1602
-
assert_eq!(records.len(), 1);
1603
-
1604
-
let n = write.step_rollup()?;
1605
-
assert_eq!(n, 1);
1606
-
1607
-
let records =
1608
-
read.get_records_by_collections(&[Nsid::new("a.a.a".to_string()).unwrap()], 1, false)?;
1609
-
assert_eq!(records.len(), 0);
1610
-
1611
-
let mut batch = TestBatch::default();
1612
-
batch.delete_account("did:plc:person-a", 9_999);
1613
-
write.insert_batch(batch.batch)?;
1614
-
1615
-
let n = write.step_rollup()?;
1616
-
assert_eq!(n, 0);
1617
-
1618
-
Ok(())
1619
-
}
1620
-
1621
-
#[test]
1622
-
fn rollup_multiple_count_batches() -> anyhow::Result<()> {
1623
-
let (_read, mut write) = fjall_db();
1624
-
1625
-
let mut batch = TestBatch::default();
1626
-
batch.create(
1627
-
"did:plc:person-a",
1628
-
"a.a.a",
1629
-
"rkey-aaa",
1630
-
"{}",
1631
-
Some("rev-aaa"),
1632
-
None,
1633
-
10_000,
1634
-
);
1635
-
write.insert_batch(batch.batch)?;
1636
-
1637
-
let mut batch = TestBatch::default();
1638
-
batch.create(
1639
-
"did:plc:person-a",
1640
-
"a.a.a",
1641
-
"rkey-aab",
1642
-
"{}",
1643
-
Some("rev-aab"),
1644
-
None,
1645
-
10_001,
1646
-
);
1647
-
write.insert_batch(batch.batch)?;
1648
-
1649
-
let n = write.step_rollup()?;
1650
-
assert_eq!(n, 2);
1651
-
1652
-
let n = write.step_rollup()?;
1653
-
assert_eq!(n, 0);
1654
-
1655
-
Ok(())
1656
-
}
1657
-
1658
-
#[test]
1659
-
fn counts_before_and_after_rollup() -> anyhow::Result<()> {
1660
-
let (read, mut write) = fjall_db();
1661
-
1662
-
let mut batch = TestBatch::default();
1663
-
batch.create(
1664
-
"did:plc:person-a",
1665
-
"a.a.a",
1666
-
"rkey-aaa",
1667
-
"{}",
1668
-
Some("rev-aaa"),
1669
-
None,
1670
-
10_000,
1671
-
);
1672
-
batch.create(
1673
-
"did:plc:person-b",
1674
-
"a.a.a",
1675
-
"rkey-bbb",
1676
-
"{}",
1677
-
Some("rev-bbb"),
1678
-
None,
1679
-
10_001,
1680
-
);
1681
-
write.insert_batch(batch.batch)?;
1682
-
1683
-
let mut batch = TestBatch::default();
1684
-
batch.delete_account("did:plc:person-a", 11_000);
1685
-
write.insert_batch(batch.batch)?;
1686
-
1687
-
let mut batch = TestBatch::default();
1688
-
batch.create(
1689
-
"did:plc:person-a",
1690
-
"a.a.a",
1691
-
"rkey-aac",
1692
-
"{}",
1693
-
Some("rev-aac"),
1694
-
None,
1695
-
12_000,
1696
-
);
1697
-
write.insert_batch(batch.batch)?;
1698
-
1699
-
// before any rollup
1700
-
let (records, dids) =
1701
-
read.get_counts_by_collection(&Nsid::new("a.a.a".to_string()).unwrap())?;
1702
-
assert_eq!(records, 3);
1703
-
assert_eq!(dids, 2);
1704
-
1705
-
// first batch rolled up
1706
-
let n = write.step_rollup()?;
1707
-
assert_eq!(n, 1);
1708
-
1709
-
let (records, dids) =
1710
-
read.get_counts_by_collection(&Nsid::new("a.a.a".to_string()).unwrap())?;
1711
-
assert_eq!(records, 3);
1712
-
assert_eq!(dids, 2);
1713
-
1714
-
// delete account rolled up
1715
-
let n = write.step_rollup()?;
1716
-
assert_eq!(n, 1);
1717
-
1718
-
let (records, dids) =
1719
-
read.get_counts_by_collection(&Nsid::new("a.a.a".to_string()).unwrap())?;
1720
-
assert_eq!(records, 3);
1721
-
assert_eq!(dids, 2);
1722
-
1723
-
// second batch rolled up
1724
-
let n = write.step_rollup()?;
1725
-
assert_eq!(n, 1);
1726
-
1727
-
let (records, dids) =
1728
-
read.get_counts_by_collection(&Nsid::new("a.a.a".to_string()).unwrap())?;
1729
-
assert_eq!(records, 3);
1730
-
assert_eq!(dids, 2);
1731
-
1732
-
// no more rollups left
1733
-
let n = write.step_rollup()?;
1734
-
assert_eq!(n, 0);
1735
-
1736
-
Ok(())
1737
-
}
1738
-
1739
-
#[test]
1740
-
fn get_top_collections() -> anyhow::Result<()> {
1741
-
let (read, mut write) = fjall_db();
1742
-
1743
-
let mut batch = TestBatch::default();
1744
-
batch.create(
1745
-
"did:plc:person-a",
1746
-
"a.a.a",
1747
-
"rkey-aaa",
1748
-
"{}",
1749
-
Some("rev-aaa"),
1750
-
None,
1751
-
10_000,
1752
-
);
1753
-
batch.create(
1754
-
"did:plc:person-b",
1755
-
"a.a.b",
1756
-
"rkey-bbb",
1757
-
"{}",
1758
-
Some("rev-bbb"),
1759
-
None,
1760
-
10_001,
1761
-
);
1762
-
batch.create(
1763
-
"did:plc:person-c",
1764
-
"a.b.c",
1765
-
"rkey-ccc",
1766
-
"{}",
1767
-
Some("rev-ccc"),
1768
-
None,
1769
-
10_002,
1770
-
);
1771
-
batch.create(
1772
-
"did:plc:person-a",
1773
-
"a.a.a",
1774
-
"rkey-aaa-2",
1775
-
"{}",
1776
-
Some("rev-aaa-2"),
1777
-
None,
1778
-
10_003,
1779
-
);
1780
-
write.insert_batch(batch.batch)?;
1781
-
1782
-
let n = write.step_rollup()?;
1783
-
assert_eq!(n, 3); // 3 collections
1784
-
1785
-
let tops = read.get_top_collections()?;
1786
-
assert_eq!(
1787
-
tops,
1788
-
TopCollections {
1789
-
total_records: 4,
1790
-
dids_estimate: 3,
1791
-
nsid_child_segments: HashMap::from([(
1792
-
"a".to_string(),
1793
-
TopCollections {
1794
-
total_records: 4,
1795
-
dids_estimate: 3,
1796
-
nsid_child_segments: HashMap::from([
1797
-
(
1798
-
"a".to_string(),
1799
-
TopCollections {
1800
-
total_records: 3,
1801
-
dids_estimate: 2,
1802
-
nsid_child_segments: HashMap::from([
1803
-
(
1804
-
"a".to_string(),
1805
-
TopCollections {
1806
-
total_records: 2,
1807
-
dids_estimate: 1,
1808
-
nsid_child_segments: HashMap::from([]),
1809
-
},
1810
-
),
1811
-
(
1812
-
"b".to_string(),
1813
-
TopCollections {
1814
-
total_records: 1,
1815
-
dids_estimate: 1,
1816
-
nsid_child_segments: HashMap::from([]),
1817
-
}
1818
-
),
1819
-
]),
1820
-
},
1821
-
),
1822
-
(
1823
-
"b".to_string(),
1824
-
TopCollections {
1825
-
total_records: 1,
1826
-
dids_estimate: 1,
1827
-
nsid_child_segments: HashMap::from([(
1828
-
"c".to_string(),
1829
-
TopCollections {
1830
-
total_records: 1,
1831
-
dids_estimate: 1,
1832
-
nsid_child_segments: HashMap::from([]),
1833
-
},
1834
-
),]),
1835
-
},
1836
-
),
1837
-
]),
1838
-
},
1839
-
),]),
1840
-
}
1841
-
);
1842
-
Ok(())
1843
-
}
1844
-
}
···
+492
-106
ufos/src/store_types.rs
+492
-106
ufos/src/store_types.rs
···
1
use crate::db_types::{
2
-
DbBytes, DbConcat, DbStaticStr, EncodingError, SerdeBytes, StaticStr, UseBincodePlz,
3
};
4
-
use crate::{Cursor, Did, Nsid, PutAction, RecordKey, UFOsCommit};
5
use bincode::{Decode, Encode};
6
-
use cardinality_estimator::CardinalityEstimator;
7
-
use std::ops::Range;
8
9
-
/// key format: ["js_cursor"]
10
-
#[derive(Debug, PartialEq)]
11
-
pub struct JetstreamCursorKey {}
12
-
impl StaticStr for JetstreamCursorKey {
13
-
fn static_str() -> &'static str {
14
-
"js_cursor"
15
-
}
16
}
17
pub type JetstreamCursorValue = Cursor;
18
19
-
/// key format: ["rollup_cursor"]
20
-
#[derive(Debug, PartialEq)]
21
-
pub struct NewRollupCursorKey {}
22
-
impl StaticStr for NewRollupCursorKey {
23
-
fn static_str() -> &'static str {
24
-
"rollup_cursor"
25
-
}
26
-
}
27
// pub type NewRollupCursorKey = DbStaticStr<_NewRollupCursorKey>;
28
/// value format: [rollup_cursor(Cursor)|collection(Nsid)]
29
pub type NewRollupCursorValue = Cursor;
30
31
-
/// key format: ["js_endpoint"]
32
-
#[derive(Debug, PartialEq)]
33
-
pub struct TakeoffKey {}
34
-
impl StaticStr for TakeoffKey {
35
-
fn static_str() -> &'static str {
36
-
"takeoff"
37
}
38
}
39
pub type TakeoffValue = Cursor;
40
41
-
/// key format: ["js_endpoint"]
42
-
#[derive(Debug, PartialEq)]
43
-
pub struct JetstreamEndpointKey {}
44
-
impl StaticStr for JetstreamEndpointKey {
45
-
fn static_str() -> &'static str {
46
-
"js_endpoint"
47
-
}
48
-
}
49
#[derive(Debug, PartialEq)]
50
pub struct JetstreamEndpointValue(pub String);
51
/// String wrapper for jetstream endpoint value
···
62
}
63
}
64
65
pub type NsidRecordFeedKey = DbConcat<Nsid, Cursor>;
66
impl NsidRecordFeedKey {
67
pub fn collection(&self) -> &Nsid {
···
171
}
172
}
173
174
-
#[derive(Debug, PartialEq)]
175
-
pub struct _LiveRecordsStaticStr {}
176
-
impl StaticStr for _LiveRecordsStaticStr {
177
-
fn static_str() -> &'static str {
178
-
"live_counts"
179
-
}
180
-
}
181
182
type LiveCountsStaticPrefix = DbStaticStr<_LiveRecordsStaticStr>;
183
type LiveCountsCursorPrefix = DbConcat<LiveCountsStaticPrefix, Cursor>;
···
190
pub fn cursor(&self) -> Cursor {
191
self.prefix.suffix
192
}
193
-
pub fn collection(&self) -> &Nsid {
194
&self.suffix
195
}
196
}
···
202
)
203
}
204
}
205
-
#[derive(Debug, PartialEq, Decode, Encode)]
206
-
pub struct TotalRecordsValue(pub u64);
207
-
impl UseBincodePlz for TotalRecordsValue {}
208
209
-
#[derive(Debug, PartialEq, serde::Serialize, serde::Deserialize)]
210
-
pub struct EstimatedDidsValue(pub CardinalityEstimator<Did>);
211
impl SerdeBytes for EstimatedDidsValue {}
212
impl DbBytes for EstimatedDidsValue {
213
#[cfg(test)]
···
221
222
#[cfg(not(test))]
223
fn to_db_bytes(&self) -> Result<Vec<u8>, EncodingError> {
224
-
Ok(vec![1, 2, 3]) // TODO: un-stub when their heap overflow is fixed
225
}
226
#[cfg(not(test))]
227
fn from_db_bytes(bytes: &[u8]) -> Result<(Self, usize), EncodingError> {
228
-
if bytes.len() < 3 {
229
-
return Err(EncodingError::DecodeNotEnoughBytes);
230
-
}
231
-
Ok((Self(CardinalityEstimator::new()), 3)) // TODO: un-stub when their heap overflow is fixed
232
}
233
}
234
235
-
pub type CountsValue = DbConcat<TotalRecordsValue, EstimatedDidsValue>;
236
impl CountsValue {
237
-
pub fn new(total: u64, dids: CardinalityEstimator<Did>) -> Self {
238
Self {
239
-
prefix: TotalRecordsValue(total),
240
suffix: EstimatedDidsValue(dids),
241
}
242
}
243
-
pub fn records(&self) -> u64 {
244
-
self.prefix.0
245
}
246
-
pub fn dids(&self) -> &CardinalityEstimator<Did> {
247
&self.suffix.0
248
}
249
pub fn merge(&mut self, other: &Self) {
250
-
self.prefix.0 += other.records();
251
-
self.suffix.0.merge(other.dids());
252
}
253
}
254
-
impl Default for CountsValue {
255
-
fn default() -> Self {
256
Self {
257
-
prefix: TotalRecordsValue(0),
258
-
suffix: EstimatedDidsValue(CardinalityEstimator::new()),
259
}
260
}
261
}
262
263
-
#[derive(Debug, PartialEq)]
264
-
pub struct _DeleteAccountStaticStr {}
265
-
impl StaticStr for _DeleteAccountStaticStr {
266
-
fn static_str() -> &'static str {
267
-
"delete_acount"
268
-
}
269
-
}
270
pub type DeleteAccountStaticPrefix = DbStaticStr<_DeleteAccountStaticStr>;
271
pub type DeleteAccountQueueKey = DbConcat<DeleteAccountStaticPrefix, Cursor>;
272
impl DeleteAccountQueueKey {
···
276
}
277
pub type DeleteAccountQueueVal = Did;
278
279
-
#[derive(Debug, PartialEq)]
280
-
pub struct _HourlyRollupStaticStr {}
281
-
impl StaticStr for _HourlyRollupStaticStr {
282
-
fn static_str() -> &'static str {
283
-
"hourly_counts"
284
}
285
}
286
pub type HourlyRollupStaticPrefix = DbStaticStr<_HourlyRollupStaticStr>;
287
-
pub type HourlyRollupKey = DbConcat<DbConcat<HourlyRollupStaticPrefix, HourTruncatedCursor>, Nsid>;
288
impl HourlyRollupKey {
289
-
pub fn new(hourly_cursor: HourTruncatedCursor, nsid: &Nsid) -> Self {
290
Self::from_pair(
291
-
DbConcat::from_pair(Default::default(), hourly_cursor),
292
nsid.clone(),
293
)
294
}
295
}
296
pub type HourlyRollupVal = CountsValue;
297
298
-
#[derive(Debug, PartialEq)]
299
-
pub struct _WeeklyRollupStaticStr {}
300
-
impl StaticStr for _WeeklyRollupStaticStr {
301
-
fn static_str() -> &'static str {
302
-
"weekly_counts"
303
-
}
304
-
}
305
pub type WeeklyRollupStaticPrefix = DbStaticStr<_WeeklyRollupStaticStr>;
306
-
pub type WeeklyRollupKey = DbConcat<DbConcat<WeeklyRollupStaticPrefix, WeekTruncatedCursor>, Nsid>;
307
impl WeeklyRollupKey {
308
-
pub fn new(weekly_cursor: WeekTruncatedCursor, nsid: &Nsid) -> Self {
309
Self::from_pair(
310
-
DbConcat::from_pair(Default::default(), weekly_cursor),
311
nsid.clone(),
312
)
313
}
314
}
315
pub type WeeklyRollupVal = CountsValue;
316
317
-
#[derive(Debug, PartialEq)]
318
-
pub struct _AllTimeRollupStaticStr {}
319
-
impl StaticStr for _AllTimeRollupStaticStr {
320
-
fn static_str() -> &'static str {
321
-
"ever_counts"
322
-
}
323
-
}
324
pub type AllTimeRollupStaticPrefix = DbStaticStr<_AllTimeRollupStaticStr>;
325
pub type AllTimeRollupKey = DbConcat<AllTimeRollupStaticPrefix, Nsid>;
326
impl AllTimeRollupKey {
327
pub fn new(nsid: &Nsid) -> Self {
328
Self::from_pair(Default::default(), nsid.clone())
329
}
330
-
pub fn collection(&self) -> &Nsid {
331
&self.suffix
332
}
333
}
334
pub type AllTimeRollupVal = CountsValue;
335
336
#[derive(Debug, Copy, Clone, PartialEq, Hash, PartialOrd, Eq)]
337
pub struct TruncatedCursor<const MOD: u64>(u64);
338
impl<const MOD: u64> TruncatedCursor<MOD> {
339
-
fn truncate(raw: u64) -> u64 {
340
(raw / MOD) * MOD
341
}
342
pub fn try_from_raw_u64(time_us: u64) -> Result<Self, EncodingError> {
···
348
}
349
pub fn try_from_cursor(cursor: Cursor) -> Result<Self, EncodingError> {
350
Self::try_from_raw_u64(cursor.to_raw_u64())
351
}
352
pub fn truncate_cursor(cursor: Cursor) -> Self {
353
let raw = cursor.to_raw_u64();
354
let truncated = Self::truncate(raw);
355
Self(truncated)
356
}
357
}
358
impl<const MOD: u64> From<TruncatedCursor<MOD>> for Cursor {
359
fn from(truncated: TruncatedCursor<MOD>) -> Self {
···
377
}
378
}
379
380
-
const HOUR_IN_MICROS: u64 = 1_000_000 * 3600;
381
pub type HourTruncatedCursor = TruncatedCursor<HOUR_IN_MICROS>;
382
383
-
const WEEK_IN_MICROS: u64 = HOUR_IN_MICROS * 24 * 7;
384
pub type WeekTruncatedCursor = TruncatedCursor<WEEK_IN_MICROS>;
385
386
#[cfg(test)]
387
mod test {
388
use super::{
389
-
CardinalityEstimator, CountsValue, Cursor, Did, EncodingError, HourTruncatedCursor,
390
-
HourlyRollupKey, Nsid, HOUR_IN_MICROS,
391
};
392
use crate::db_types::DbBytes;
393
394
#[test]
395
fn test_by_hourly_rollup_key() -> Result<(), EncodingError> {
···
409
410
#[test]
411
fn test_by_hourly_rollup_value() -> Result<(), EncodingError> {
412
-
let mut estimator = CardinalityEstimator::new();
413
for i in 0..10 {
414
-
estimator.insert(&Did::new(format!("did:plc:inze6wrmsm7pjl7yta3oig7{i}")).unwrap());
415
}
416
-
let original = CountsValue::new(123, estimator.clone());
417
let serialized = original.to_db_bytes()?;
418
let (restored, bytes_consumed) = CountsValue::from_db_bytes(&serialized)?;
419
assert_eq!(restored, original);
420
assert_eq!(bytes_consumed, serialized.len());
421
422
for i in 10..1_000 {
423
-
estimator.insert(&Did::new(format!("did:plc:inze6wrmsm7pjl7yta3oig{i}")).unwrap());
424
}
425
-
let original = CountsValue::new(123, estimator);
426
let serialized = original.to_db_bytes()?;
427
let (restored, bytes_consumed) = CountsValue::from_db_bytes(&serialized)?;
428
assert_eq!(restored, original);
···
449
assert_eq!(back, us);
450
let diff = us.to_raw_u64() - back.to_raw_u64();
451
assert_eq!(diff, 0);
452
}
453
}
···
1
use crate::db_types::{
2
+
DbBytes, DbConcat, DbStaticStr, EncodingError, EncodingResult, SerdeBytes, StaticStr,
3
+
UseBincodePlz,
4
};
5
+
use crate::{Cursor, Did, JustCount, Nsid, PutAction, RecordKey, UFOsCommit};
6
use bincode::{Decode, Encode};
7
+
use cardinality_estimator_safe::Sketch;
8
+
use std::ops::{Bound, Range};
9
10
+
macro_rules! static_str {
11
+
($prefix:expr, $name:ident) => {
12
+
#[derive(Debug, PartialEq)]
13
+
pub struct $name {}
14
+
impl StaticStr for $name {
15
+
fn static_str() -> &'static str {
16
+
$prefix
17
+
}
18
+
}
19
+
};
20
}
21
+
22
+
// key format: ["js_cursor"]
23
+
static_str!("js_cursor", JetstreamCursorKey);
24
pub type JetstreamCursorValue = Cursor;
25
26
+
// key format: ["sketch_secret"]
27
+
static_str!("sketch_secret", SketchSecretKey);
28
+
pub type SketchSecretPrefix = [u8; 16];
29
+
30
+
// key format: ["rollup_cursor"]
31
+
static_str!("rollup_cursor", NewRollupCursorKey);
32
// pub type NewRollupCursorKey = DbStaticStr<_NewRollupCursorKey>;
33
/// value format: [rollup_cursor(Cursor)|collection(Nsid)]
34
pub type NewRollupCursorValue = Cursor;
35
36
+
static_str!("trim_cursor", _TrimCollectionStaticStr);
37
+
type TrimCollectionCursorPrefix = DbStaticStr<_TrimCollectionStaticStr>;
38
+
pub type TrimCollectionCursorKey = DbConcat<TrimCollectionCursorPrefix, Nsid>;
39
+
impl TrimCollectionCursorKey {
40
+
pub fn new(collection: Nsid) -> Self {
41
+
Self::from_pair(Default::default(), collection)
42
}
43
}
44
+
pub type TrimCollectionCursorVal = Cursor;
45
+
46
+
// key format: ["js_endpoint"]
47
+
static_str!("takeoff", TakeoffKey);
48
pub type TakeoffValue = Cursor;
49
50
+
// key format: ["js_endpoint"]
51
+
static_str!("js_endpoint", JetstreamEndpointKey);
52
#[derive(Debug, PartialEq)]
53
pub struct JetstreamEndpointValue(pub String);
54
/// String wrapper for jetstream endpoint value
···
65
}
66
}
67
68
+
pub trait WithCollection {
69
+
fn collection(&self) -> &Nsid;
70
+
}
71
+
72
+
pub trait WithRank {
73
+
fn rank(&self) -> u64;
74
+
}
75
+
76
pub type NsidRecordFeedKey = DbConcat<Nsid, Cursor>;
77
impl NsidRecordFeedKey {
78
pub fn collection(&self) -> &Nsid {
···
182
}
183
}
184
185
+
static_str!("live_counts", _LiveRecordsStaticStr);
186
187
type LiveCountsStaticPrefix = DbStaticStr<_LiveRecordsStaticStr>;
188
type LiveCountsCursorPrefix = DbConcat<LiveCountsStaticPrefix, Cursor>;
···
195
pub fn cursor(&self) -> Cursor {
196
self.prefix.suffix
197
}
198
+
}
199
+
impl WithCollection for LiveCountsKey {
200
+
fn collection(&self) -> &Nsid {
201
&self.suffix
202
}
203
}
···
209
)
210
}
211
}
212
+
213
+
#[derive(Debug, Clone, Copy, Default, PartialEq, Decode, Encode)]
214
+
pub struct CommitCounts {
215
+
pub creates: u64,
216
+
pub updates: u64,
217
+
pub deletes: u64,
218
+
}
219
+
impl CommitCounts {
220
+
pub fn merge(&mut self, other: &Self) {
221
+
self.creates += other.creates;
222
+
self.updates += other.updates;
223
+
self.deletes += other.deletes;
224
+
}
225
+
}
226
+
impl UseBincodePlz for CommitCounts {}
227
228
+
#[derive(Debug, Default, PartialEq, serde::Serialize, serde::Deserialize)]
229
+
pub struct EstimatedDidsValue(pub Sketch<14>);
230
impl SerdeBytes for EstimatedDidsValue {}
231
impl DbBytes for EstimatedDidsValue {
232
#[cfg(test)]
···
240
241
#[cfg(not(test))]
242
fn to_db_bytes(&self) -> Result<Vec<u8>, EncodingError> {
243
+
SerdeBytes::to_bytes(self)
244
}
245
#[cfg(not(test))]
246
fn from_db_bytes(bytes: &[u8]) -> Result<(Self, usize), EncodingError> {
247
+
SerdeBytes::from_bytes(bytes)
248
}
249
}
250
251
+
pub type CountsValue = DbConcat<CommitCounts, EstimatedDidsValue>;
252
impl CountsValue {
253
+
pub fn new(counts: CommitCounts, dids: Sketch<14>) -> Self {
254
Self {
255
+
prefix: counts,
256
suffix: EstimatedDidsValue(dids),
257
}
258
}
259
+
pub fn counts(&self) -> CommitCounts {
260
+
self.prefix
261
}
262
+
pub fn dids(&self) -> &Sketch<14> {
263
&self.suffix.0
264
}
265
pub fn merge(&mut self, other: &Self) {
266
+
self.prefix.merge(&other.prefix);
267
+
self.suffix.0.merge(&other.suffix.0);
268
}
269
}
270
+
impl From<&CountsValue> for JustCount {
271
+
fn from(cv: &CountsValue) -> Self {
272
+
let CommitCounts {
273
+
creates,
274
+
updates,
275
+
deletes,
276
+
} = cv.counts();
277
Self {
278
+
creates,
279
+
updates,
280
+
deletes,
281
+
dids_estimate: cv.dids().estimate() as u64,
282
}
283
}
284
}
285
286
+
static_str!("delete_acount", _DeleteAccountStaticStr);
287
pub type DeleteAccountStaticPrefix = DbStaticStr<_DeleteAccountStaticStr>;
288
pub type DeleteAccountQueueKey = DbConcat<DeleteAccountStaticPrefix, Cursor>;
289
impl DeleteAccountQueueKey {
···
293
}
294
pub type DeleteAccountQueueVal = Did;
295
296
+
/// big-endian encoded u64 for LSM prefix-fiendly key
297
+
#[derive(Debug, Clone, Copy, PartialEq)]
298
+
pub struct KeyRank(u64);
299
+
impl DbBytes for KeyRank {
300
+
fn to_db_bytes(&self) -> Result<Vec<u8>, EncodingError> {
301
+
Ok(self.0.to_be_bytes().to_vec())
302
+
}
303
+
fn from_db_bytes(bytes: &[u8]) -> Result<(Self, usize), EncodingError> {
304
+
if bytes.len() < 8 {
305
+
return Err(EncodingError::DecodeNotEnoughBytes);
306
+
}
307
+
let bytes8 = TryInto::<[u8; 8]>::try_into(&bytes[..8])?;
308
+
let rank = KeyRank(u64::from_be_bytes(bytes8));
309
+
Ok((rank, 8))
310
+
}
311
+
}
312
+
impl From<u64> for KeyRank {
313
+
fn from(n: u64) -> Self {
314
+
Self(n)
315
+
}
316
+
}
317
+
impl From<KeyRank> for u64 {
318
+
fn from(kr: KeyRank) -> Self {
319
+
kr.0
320
+
}
321
+
}
322
+
323
+
pub type BucketedRankRecordsKey<P, C> =
324
+
DbConcat<DbConcat<DbStaticStr<P>, C>, DbConcat<KeyRank, Nsid>>;
325
+
impl<P, C> BucketedRankRecordsKey<P, C>
326
+
where
327
+
P: StaticStr + PartialEq + std::fmt::Debug,
328
+
C: DbBytes + PartialEq + std::fmt::Debug + Clone,
329
+
{
330
+
pub fn new(cursor: C, rank: KeyRank, nsid: &Nsid) -> Self {
331
+
Self::from_pair(
332
+
DbConcat::from_pair(Default::default(), cursor),
333
+
DbConcat::from_pair(rank, nsid.clone()),
334
+
)
335
+
}
336
+
pub fn with_rank(&self, new_rank: KeyRank) -> Self {
337
+
Self::new(self.prefix.suffix.clone(), new_rank, &self.suffix.suffix)
338
+
}
339
+
pub fn start(cursor: C) -> EncodingResult<Bound<Vec<u8>>> {
340
+
let prefix: DbConcat<DbStaticStr<P>, C> = DbConcat::from_pair(Default::default(), cursor);
341
+
Ok(Bound::Included(Self::from_prefix_to_db_bytes(&prefix)?))
342
+
}
343
+
pub fn end(cursor: C) -> EncodingResult<Bound<Vec<u8>>> {
344
+
let prefix: DbConcat<DbStaticStr<P>, C> = DbConcat::from_pair(Default::default(), cursor);
345
+
Ok(Bound::Excluded(Self::prefix_range_end(&prefix)?))
346
}
347
}
348
+
impl<P: StaticStr, C: DbBytes> WithCollection for BucketedRankRecordsKey<P, C> {
349
+
fn collection(&self) -> &Nsid {
350
+
&self.suffix.suffix
351
+
}
352
+
}
353
+
impl<P: StaticStr, C: DbBytes> WithRank for BucketedRankRecordsKey<P, C> {
354
+
fn rank(&self) -> u64 {
355
+
self.suffix.prefix.into()
356
+
}
357
+
}
358
+
359
+
static_str!("hourly_counts", _HourlyRollupStaticStr);
360
pub type HourlyRollupStaticPrefix = DbStaticStr<_HourlyRollupStaticStr>;
361
+
pub type HourlyRollupKeyHourPrefix = DbConcat<HourlyRollupStaticPrefix, HourTruncatedCursor>;
362
+
pub type HourlyRollupKey = DbConcat<HourlyRollupKeyHourPrefix, Nsid>;
363
+
pub type HourlyRollupPre = DbConcat<HourlyRollupKeyHourPrefix, Vec<u8>>; // bit hack but
364
impl HourlyRollupKey {
365
+
pub fn new(cursor: HourTruncatedCursor, nsid: &Nsid) -> Self {
366
Self::from_pair(
367
+
DbConcat::from_pair(Default::default(), cursor),
368
nsid.clone(),
369
)
370
}
371
+
pub fn new_nsid_prefix(cursor: HourTruncatedCursor, pre: &[u8]) -> HourlyRollupPre {
372
+
HourlyRollupPre::from_pair(
373
+
DbConcat::from_pair(Default::default(), cursor),
374
+
pre.to_vec(),
375
+
)
376
+
}
377
+
pub fn cursor(&self) -> HourTruncatedCursor {
378
+
self.prefix.suffix
379
+
}
380
+
pub fn start(hour: HourTruncatedCursor) -> EncodingResult<Bound<Vec<u8>>> {
381
+
let prefix = HourlyRollupKeyHourPrefix::from_pair(Default::default(), hour);
382
+
let prefix_bytes = Self::from_prefix_to_db_bytes(&prefix)?;
383
+
Ok(Bound::Included(prefix_bytes))
384
+
}
385
+
pub fn after_nsid(hour: HourTruncatedCursor, nsid: &Nsid) -> EncodingResult<Bound<Vec<u8>>> {
386
+
Ok(Bound::Excluded(Self::new(hour, nsid).to_db_bytes()?))
387
+
}
388
+
pub fn after_nsid_prefix(
389
+
hour: HourTruncatedCursor,
390
+
pre: &[u8],
391
+
) -> EncodingResult<Bound<Vec<u8>>> {
392
+
Ok(Bound::Excluded(
393
+
Self::new_nsid_prefix(hour, pre).to_db_bytes()?,
394
+
))
395
+
}
396
+
pub fn end(hour: HourTruncatedCursor) -> EncodingResult<Bound<Vec<u8>>> {
397
+
let prefix = HourlyRollupKeyHourPrefix::from_pair(Default::default(), hour);
398
+
Ok(Bound::Excluded(Self::prefix_range_end(&prefix)?))
399
+
}
400
+
pub fn nsid_prefix_end(
401
+
hour: HourTruncatedCursor,
402
+
pre: &[u8],
403
+
) -> EncodingResult<Bound<Vec<u8>>> {
404
+
Ok(Bound::Excluded(
405
+
Self::new_nsid_prefix(hour, pre).as_prefix_range_end()?,
406
+
))
407
+
}
408
+
}
409
+
impl WithCollection for HourlyRollupKey {
410
+
fn collection(&self) -> &Nsid {
411
+
&self.suffix
412
+
}
413
}
414
pub type HourlyRollupVal = CountsValue;
415
416
+
static_str!("hourly_rank_records", _HourlyRecordsStaticStr);
417
+
pub type HourlyRecordsKey = BucketedRankRecordsKey<_HourlyRecordsStaticStr, HourTruncatedCursor>;
418
+
419
+
static_str!("hourly_rank_dids", _HourlyDidsStaticStr);
420
+
pub type HourlyDidsKey = BucketedRankRecordsKey<_HourlyDidsStaticStr, HourTruncatedCursor>;
421
+
422
+
static_str!("weekly_counts", _WeeklyRollupStaticStr);
423
pub type WeeklyRollupStaticPrefix = DbStaticStr<_WeeklyRollupStaticStr>;
424
+
pub type WeeklyRollupKeyWeekPrefix = DbConcat<WeeklyRollupStaticPrefix, WeekTruncatedCursor>;
425
+
pub type WeeklyRollupKey = DbConcat<WeeklyRollupKeyWeekPrefix, Nsid>;
426
+
pub type WeeklyRollupPre = DbConcat<WeeklyRollupKeyWeekPrefix, Vec<u8>>;
427
impl WeeklyRollupKey {
428
+
pub fn new(cursor: WeekTruncatedCursor, nsid: &Nsid) -> Self {
429
Self::from_pair(
430
+
DbConcat::from_pair(Default::default(), cursor),
431
nsid.clone(),
432
)
433
}
434
+
pub fn new_nsid_prefix(cursor: WeekTruncatedCursor, pre: &[u8]) -> WeeklyRollupPre {
435
+
WeeklyRollupPre::from_pair(
436
+
DbConcat::from_pair(Default::default(), cursor),
437
+
pre.to_vec(),
438
+
)
439
+
}
440
+
pub fn cursor(&self) -> WeekTruncatedCursor {
441
+
self.prefix.suffix
442
+
}
443
+
pub fn start(week: WeekTruncatedCursor) -> EncodingResult<Bound<Vec<u8>>> {
444
+
let prefix = WeeklyRollupKeyWeekPrefix::from_pair(Default::default(), week);
445
+
let prefix_bytes = Self::from_prefix_to_db_bytes(&prefix)?;
446
+
Ok(Bound::Included(prefix_bytes))
447
+
}
448
+
pub fn after_nsid(week: WeekTruncatedCursor, nsid: &Nsid) -> EncodingResult<Bound<Vec<u8>>> {
449
+
Ok(Bound::Excluded(Self::new(week, nsid).to_db_bytes()?))
450
+
}
451
+
pub fn after_nsid_prefix(
452
+
week: WeekTruncatedCursor,
453
+
prefix: &[u8],
454
+
) -> EncodingResult<Bound<Vec<u8>>> {
455
+
Ok(Bound::Excluded(
456
+
Self::new_nsid_prefix(week, prefix).to_db_bytes()?,
457
+
))
458
+
}
459
+
pub fn end(week: WeekTruncatedCursor) -> EncodingResult<Bound<Vec<u8>>> {
460
+
let prefix = WeeklyRollupKeyWeekPrefix::from_pair(Default::default(), week);
461
+
Ok(Bound::Excluded(Self::prefix_range_end(&prefix)?))
462
+
}
463
+
pub fn nsid_prefix_end(
464
+
week: WeekTruncatedCursor,
465
+
prefix: &[u8],
466
+
) -> EncodingResult<Bound<Vec<u8>>> {
467
+
Ok(Bound::Excluded(
468
+
Self::new_nsid_prefix(week, prefix).as_prefix_range_end()?,
469
+
))
470
+
}
471
+
}
472
+
impl WithCollection for WeeklyRollupKey {
473
+
fn collection(&self) -> &Nsid {
474
+
&self.suffix
475
+
}
476
}
477
pub type WeeklyRollupVal = CountsValue;
478
479
+
static_str!("weekly_rank_records", _WeeklyRecordsStaticStr);
480
+
pub type WeeklyRecordsKey = BucketedRankRecordsKey<_WeeklyRecordsStaticStr, WeekTruncatedCursor>;
481
+
482
+
static_str!("weekly_rank_dids", _WeeklyDidsStaticStr);
483
+
pub type WeeklyDidsKey = BucketedRankRecordsKey<_WeeklyDidsStaticStr, WeekTruncatedCursor>;
484
+
485
+
static_str!("ever_counts", _AllTimeRollupStaticStr);
486
pub type AllTimeRollupStaticPrefix = DbStaticStr<_AllTimeRollupStaticStr>;
487
pub type AllTimeRollupKey = DbConcat<AllTimeRollupStaticPrefix, Nsid>;
488
+
pub type AllTimeRollupPre = DbConcat<AllTimeRollupStaticPrefix, Vec<u8>>;
489
impl AllTimeRollupKey {
490
pub fn new(nsid: &Nsid) -> Self {
491
Self::from_pair(Default::default(), nsid.clone())
492
}
493
+
pub fn new_nsid_prefix(pre: &[u8]) -> AllTimeRollupPre {
494
+
AllTimeRollupPre::from_pair(Default::default(), pre.to_vec())
495
+
}
496
+
pub fn start() -> EncodingResult<Bound<Vec<u8>>> {
497
+
Ok(Bound::Included(Self::from_prefix_to_db_bytes(
498
+
&Default::default(),
499
+
)?))
500
+
}
501
+
pub fn after_nsid(nsid: &Nsid) -> EncodingResult<Bound<Vec<u8>>> {
502
+
Ok(Bound::Excluded(Self::new(nsid).to_db_bytes()?))
503
+
}
504
+
pub fn after_nsid_prefix(prefix: &[u8]) -> EncodingResult<Bound<Vec<u8>>> {
505
+
Ok(Bound::Excluded(
506
+
Self::new_nsid_prefix(prefix).to_db_bytes()?,
507
+
))
508
+
}
509
+
pub fn end() -> EncodingResult<Bound<Vec<u8>>> {
510
+
Ok(Bound::Excluded(
511
+
Self::prefix_range_end(&Default::default())?,
512
+
))
513
+
}
514
+
pub fn nsid_prefix_end(prefix: &[u8]) -> EncodingResult<Bound<Vec<u8>>> {
515
+
Ok(Bound::Excluded(
516
+
Self::new_nsid_prefix(prefix).as_prefix_range_end()?,
517
+
))
518
+
}
519
+
}
520
+
impl WithCollection for AllTimeRollupKey {
521
+
fn collection(&self) -> &Nsid {
522
&self.suffix
523
}
524
}
525
pub type AllTimeRollupVal = CountsValue;
526
527
+
pub type AllTimeRankRecordsKey<P> = DbConcat<DbStaticStr<P>, DbConcat<KeyRank, Nsid>>;
528
+
impl<P> AllTimeRankRecordsKey<P>
529
+
where
530
+
P: StaticStr + PartialEq + std::fmt::Debug,
531
+
{
532
+
pub fn new(rank: KeyRank, nsid: &Nsid) -> Self {
533
+
Self::from_pair(Default::default(), DbConcat::from_pair(rank, nsid.clone()))
534
+
}
535
+
pub fn with_rank(&self, new_rank: KeyRank) -> Self {
536
+
Self::new(new_rank, &self.suffix.suffix)
537
+
}
538
+
pub fn count(&self) -> u64 {
539
+
self.suffix.prefix.0
540
+
}
541
+
pub fn start() -> EncodingResult<Bound<Vec<u8>>> {
542
+
Ok(Bound::Included(Self::from_prefix_to_db_bytes(
543
+
&Default::default(),
544
+
)?))
545
+
}
546
+
pub fn end() -> EncodingResult<Bound<Vec<u8>>> {
547
+
Ok(Bound::Excluded(
548
+
Self::prefix_range_end(&Default::default())?,
549
+
))
550
+
}
551
+
}
552
+
impl<P: StaticStr> WithCollection for AllTimeRankRecordsKey<P> {
553
+
fn collection(&self) -> &Nsid {
554
+
&self.suffix.suffix
555
+
}
556
+
}
557
+
impl<P: StaticStr> WithRank for AllTimeRankRecordsKey<P> {
558
+
fn rank(&self) -> u64 {
559
+
self.suffix.prefix.into()
560
+
}
561
+
}
562
+
563
+
static_str!("ever_rank_records", _AllTimeRecordsStaticStr);
564
+
pub type AllTimeRecordsKey = AllTimeRankRecordsKey<_AllTimeRecordsStaticStr>;
565
+
566
+
static_str!("ever_rank_dids", _AllTimeDidsStaticStr);
567
+
pub type AllTimeDidsKey = AllTimeRankRecordsKey<_AllTimeDidsStaticStr>;
568
+
569
#[derive(Debug, Copy, Clone, PartialEq, Hash, PartialOrd, Eq)]
570
pub struct TruncatedCursor<const MOD: u64>(u64);
571
impl<const MOD: u64> TruncatedCursor<MOD> {
572
+
pub fn truncate(raw: u64) -> u64 {
573
(raw / MOD) * MOD
574
}
575
pub fn try_from_raw_u64(time_us: u64) -> Result<Self, EncodingError> {
···
581
}
582
pub fn try_from_cursor(cursor: Cursor) -> Result<Self, EncodingError> {
583
Self::try_from_raw_u64(cursor.to_raw_u64())
584
+
}
585
+
pub fn truncate_raw_u64(raw: u64) -> Self {
586
+
let truncated = Self::truncate(raw);
587
+
Self(truncated)
588
}
589
pub fn truncate_cursor(cursor: Cursor) -> Self {
590
let raw = cursor.to_raw_u64();
591
let truncated = Self::truncate(raw);
592
Self(truncated)
593
}
594
+
pub fn to_raw_u64(&self) -> u64 {
595
+
self.0
596
+
}
597
+
pub fn try_as<const MOD_B: u64>(&self) -> Result<TruncatedCursor<MOD_B>, EncodingError> {
598
+
TruncatedCursor::<MOD_B>::try_from_raw_u64(self.0)
599
+
}
600
+
pub fn cycles_until(&self, other: Self) -> u64 {
601
+
if other < *self {
602
+
panic!("other must be greater than or equal to self");
603
+
}
604
+
(other.0 - self.0) / MOD
605
+
}
606
+
pub fn next(&self) -> Self {
607
+
Self(self.0 + MOD)
608
+
}
609
+
pub fn nth_next(&self, n: u64) -> Self {
610
+
Self(self.0 + (n * MOD))
611
+
}
612
+
pub fn prev(&self) -> Self {
613
+
if self.0 < MOD {
614
+
panic!("underflow: previous truncation start would be less than zero");
615
+
}
616
+
Self(self.0 - MOD)
617
+
}
618
}
619
impl<const MOD: u64> From<TruncatedCursor<MOD>> for Cursor {
620
fn from(truncated: TruncatedCursor<MOD>) -> Self {
···
638
}
639
}
640
641
+
pub const HOUR_IN_MICROS: u64 = 1_000_000 * 3600;
642
pub type HourTruncatedCursor = TruncatedCursor<HOUR_IN_MICROS>;
643
644
+
pub const WEEK_IN_MICROS: u64 = HOUR_IN_MICROS * 24 * 7;
645
pub type WeekTruncatedCursor = TruncatedCursor<WEEK_IN_MICROS>;
646
647
+
#[derive(Debug, PartialEq)]
648
+
pub enum CursorBucket {
649
+
Hour(HourTruncatedCursor),
650
+
Week(WeekTruncatedCursor),
651
+
AllTime,
652
+
}
653
+
654
+
impl CursorBucket {
655
+
pub fn buckets_spanning(
656
+
since: HourTruncatedCursor,
657
+
until: HourTruncatedCursor,
658
+
) -> Vec<CursorBucket> {
659
+
if until <= since {
660
+
return vec![];
661
+
}
662
+
let mut out = vec![];
663
+
let mut current_lower = since;
664
+
while current_lower < until {
665
+
if current_lower.cycles_until(until) >= (WEEK_IN_MICROS / HOUR_IN_MICROS) {
666
+
if let Ok(week) = current_lower.try_as::<WEEK_IN_MICROS>() {
667
+
out.push(CursorBucket::Week(week));
668
+
current_lower = week.next().try_as().unwrap();
669
+
continue;
670
+
}
671
+
}
672
+
out.push(CursorBucket::Hour(current_lower));
673
+
current_lower = current_lower.next();
674
+
}
675
+
out
676
+
}
677
+
}
678
+
679
#[cfg(test)]
680
mod test {
681
use super::{
682
+
CommitCounts, CountsValue, Cursor, CursorBucket, Did, EncodingError, HourTruncatedCursor,
683
+
HourlyRollupKey, Nsid, Sketch, HOUR_IN_MICROS, WEEK_IN_MICROS,
684
};
685
use crate::db_types::DbBytes;
686
+
use cardinality_estimator_safe::Element;
687
+
use sha2::Sha256;
688
689
#[test]
690
fn test_by_hourly_rollup_key() -> Result<(), EncodingError> {
···
704
705
#[test]
706
fn test_by_hourly_rollup_value() -> Result<(), EncodingError> {
707
+
let mut estimator = Sketch::<14>::default();
708
+
fn to_element(d: Did) -> Element<14> {
709
+
Element::from_digest_oneshot::<Sha256>(d.to_string().as_bytes())
710
+
}
711
for i in 0..10 {
712
+
estimator.insert(to_element(
713
+
Did::new(format!("did:plc:inze6wrmsm7pjl7yta3oig7{i}")).unwrap(),
714
+
));
715
}
716
+
let original = CountsValue::new(
717
+
CommitCounts {
718
+
creates: 123,
719
+
..Default::default()
720
+
},
721
+
estimator.clone(),
722
+
);
723
let serialized = original.to_db_bytes()?;
724
let (restored, bytes_consumed) = CountsValue::from_db_bytes(&serialized)?;
725
assert_eq!(restored, original);
726
assert_eq!(bytes_consumed, serialized.len());
727
728
for i in 10..1_000 {
729
+
estimator.insert(to_element(
730
+
Did::new(format!("did:plc:inze6wrmsm7pjl7yta3oig{i}")).unwrap(),
731
+
));
732
}
733
+
let original = CountsValue::new(
734
+
CommitCounts {
735
+
creates: 123,
736
+
..Default::default()
737
+
},
738
+
estimator,
739
+
);
740
let serialized = original.to_db_bytes()?;
741
let (restored, bytes_consumed) = CountsValue::from_db_bytes(&serialized)?;
742
assert_eq!(restored, original);
···
763
assert_eq!(back, us);
764
let diff = us.to_raw_u64() - back.to_raw_u64();
765
assert_eq!(diff, 0);
766
+
}
767
+
768
+
#[test]
769
+
fn test_spanning_nothing() {
770
+
let from = Cursor::from_raw_u64(1_743_775_200_000_000).into();
771
+
let until = Cursor::from_raw_u64(1_743_775_200_000_000).into();
772
+
assert!(CursorBucket::buckets_spanning(from, until).is_empty());
773
+
let until = Cursor::from_raw_u64(0).into();
774
+
assert!(CursorBucket::buckets_spanning(from, until).is_empty());
775
+
}
776
+
777
+
#[test]
778
+
fn test_spanning_low_hours() {
779
+
let from = HourTruncatedCursor::truncate_cursor(Cursor::from_start());
780
+
let until = from.next();
781
+
assert_eq!(
782
+
CursorBucket::buckets_spanning(from, until),
783
+
vec![CursorBucket::Hour(from)]
784
+
);
785
+
let until2 = until.next();
786
+
let until3 = until2.next();
787
+
assert_eq!(
788
+
CursorBucket::buckets_spanning(from, until3),
789
+
vec![
790
+
CursorBucket::Hour(from),
791
+
CursorBucket::Hour(until),
792
+
CursorBucket::Hour(until2),
793
+
]
794
+
);
795
+
}
796
+
797
+
#[test]
798
+
fn test_spanning_week_aligned() {
799
+
let from = HourTruncatedCursor::truncate_cursor(Cursor::from_start());
800
+
let until = HourTruncatedCursor::truncate_cursor(Cursor::from_raw_u64(WEEK_IN_MICROS));
801
+
assert_eq!(
802
+
CursorBucket::buckets_spanning(from, until),
803
+
vec![CursorBucket::Week(from.try_as().unwrap()),]
804
+
);
805
+
let next_hour = until.next();
806
+
assert_eq!(
807
+
CursorBucket::buckets_spanning(from, next_hour),
808
+
vec![
809
+
CursorBucket::Week(from.try_as().unwrap()),
810
+
CursorBucket::Hour(until),
811
+
]
812
+
);
813
+
}
814
+
815
+
#[test]
816
+
fn test_spanning_week_unaligned() {
817
+
let from = HourTruncatedCursor::truncate_cursor(Cursor::from_raw_u64(
818
+
WEEK_IN_MICROS - HOUR_IN_MICROS,
819
+
));
820
+
let until = HourTruncatedCursor::truncate_cursor(Cursor::from_raw_u64(
821
+
from.to_raw_u64() + WEEK_IN_MICROS,
822
+
));
823
+
let span = CursorBucket::buckets_spanning(from, until);
824
+
assert_eq!(span.len(), 168);
825
+
for b in &span {
826
+
let CursorBucket::Hour(_) = b else {
827
+
panic!("found week bucket in a span that should only have hourlies");
828
+
};
829
+
}
830
+
let until2 = until.next();
831
+
assert_eq!(
832
+
CursorBucket::buckets_spanning(from, until2),
833
+
vec![
834
+
CursorBucket::Hour(from),
835
+
CursorBucket::Week(from.next().try_as().unwrap()),
836
+
]
837
+
);
838
}
839
}