+7
server/Cargo.lock
+7
server/Cargo.lock
···
48
48
checksum = "e16d2d3311acee920a9eb8d33b8cbc1787ce4a264e85f964c2404b969bdcd487"
49
49
50
50
[[package]]
51
+
name = "arc-swap"
52
+
version = "1.7.1"
53
+
source = "registry+https://github.com/rust-lang/crates.io-index"
54
+
checksum = "69f7f8c3906b62b754cd5326047894316021dcfe5a194c8ea52bdd94934a3457"
55
+
56
+
[[package]]
51
57
name = "async-compression"
52
58
version = "0.4.25"
53
59
source = "registry+https://github.com/rust-lang/crates.io-index"
···
1553
1559
version = "0.1.0"
1554
1560
dependencies = [
1555
1561
"anyhow",
1562
+
"arc-swap",
1556
1563
"async-trait",
1557
1564
"axum",
1558
1565
"axum-tws",
+1
server/Cargo.toml
+1
server/Cargo.toml
+36
-17
server/src/db/handle.rs
+36
-17
server/src/db/handle.rs
···
1
1
use std::{
2
2
fmt::Debug,
3
3
io::Cursor,
4
-
ops::{Bound, Deref, RangeBounds},
4
+
ops::{Bound, RangeBounds},
5
5
sync::atomic::{AtomicU64, Ordering as AtomicOrdering},
6
6
time::Duration,
7
7
};
8
8
9
9
use byteview::ByteView;
10
-
use fjall::{Keyspace, Partition, PartitionCreateOptions, Slice};
10
+
use fjall::{Keyspace, Partition, PartitionCreateOptions, Slice, Snapshot};
11
11
use itertools::Itertools;
12
12
use parking_lot::Mutex;
13
13
use rayon::iter::{IntoParallelIterator, ParallelIterator};
···
16
16
17
17
use crate::{
18
18
db::{EventRecord, NsidHit, block},
19
-
error::AppResult,
20
-
utils::{CLOCK, DefaultRateTracker, RateTracker, ReadVariableExt, varints_unsigned_encoded},
19
+
error::{AppError, AppResult},
20
+
utils::{
21
+
ArcRefCnt, ArcliteSwap, CLOCK, DefaultRateTracker, RateTracker, ReadVariableExt,
22
+
varints_unsigned_encoded,
23
+
},
21
24
};
22
25
23
26
pub type ItemDecoder = block::ItemDecoder<Cursor<Slice>, NsidHit>;
···
31
34
}
32
35
33
36
pub struct LexiconHandle {
34
-
tree: Partition,
37
+
write_tree: Partition,
38
+
read_tree: ArcliteSwap<Snapshot>,
35
39
nsid: SmolStr,
36
40
buf: Arc<Mutex<Vec<EventRecord>>>,
37
41
last_insert: AtomicU64, // relaxed
···
46
50
}
47
51
}
48
52
49
-
impl Deref for LexiconHandle {
50
-
type Target = Partition;
51
-
52
-
fn deref(&self) -> &Self::Target {
53
-
&self.tree
54
-
}
55
-
}
56
-
57
53
impl LexiconHandle {
58
54
pub fn new(keyspace: &Keyspace, nsid: &str) -> Self {
59
55
let opts = PartitionCreateOptions::default()
60
56
.block_size(1024 * 48)
61
57
.compression(fjall::CompressionType::Miniz(9));
58
+
let write_tree = keyspace.open_partition(nsid, opts).unwrap();
59
+
let read_tree = ArcliteSwap::new(ArcRefCnt::new(write_tree.snapshot()));
62
60
Self {
63
-
tree: keyspace.open_partition(nsid, opts).unwrap(),
61
+
write_tree,
62
+
read_tree,
64
63
nsid: nsid.into(),
65
64
buf: Default::default(),
66
65
last_insert: AtomicU64::new(0),
···
68
67
}
69
68
}
70
69
70
+
#[inline(always)]
71
+
pub fn read(&self) -> arc_swap::Guard<ArcRefCnt<Snapshot>> {
72
+
self.read_tree.load()
73
+
}
74
+
75
+
#[inline(always)]
76
+
pub fn update_tree(&self) {
77
+
self.read_tree
78
+
.store(ArcRefCnt::new(self.write_tree.snapshot()));
79
+
}
80
+
81
+
#[inline(always)]
71
82
pub fn span(&self) -> tracing::Span {
72
83
tracing::info_span!("handle", nsid = %self.nsid)
73
84
}
74
85
86
+
#[inline(always)]
75
87
pub fn nsid(&self) -> &SmolStr {
76
88
&self.nsid
77
89
}
78
90
91
+
#[inline(always)]
79
92
pub fn item_count(&self) -> usize {
80
93
self.buf.lock().len()
81
94
}
···
122
135
let end_key = varints_unsigned_encoded([end_limit]);
123
136
124
137
let blocks_to_compact = self
125
-
.tree
138
+
.read()
126
139
.range(start_key..end_key)
127
140
.collect::<Result<Vec<_>, _>>()?;
128
141
if blocks_to_compact.len() < 2 {
···
162
175
let end_blocks_size = new_blocks.len();
163
176
164
177
for key in keys_to_delete {
165
-
self.tree.remove(key.clone())?;
178
+
self.write_tree.remove(key.clone())?;
166
179
}
167
180
for block in new_blocks {
168
-
self.tree.insert(block.key, block.data)?;
181
+
self.write_tree.insert(block.key, block.data)?;
169
182
}
170
183
171
184
let reduction =
···
179
192
);
180
193
181
194
Ok(())
195
+
}
196
+
197
+
pub fn insert_block(&self, block: Block) -> AppResult<()> {
198
+
self.write_tree
199
+
.insert(block.key, block.data)
200
+
.map_err(AppError::from)
182
201
}
183
202
184
203
pub fn encode_block_from_items(
+37
-20
server/src/db/mod.rs
+37
-20
server/src/db/mod.rs
···
1
1
use std::{
2
-
collections::HashMap,
2
+
collections::{HashMap, HashSet},
3
3
fmt::Debug,
4
4
io::Cursor,
5
5
ops::{Bound, Deref, RangeBounds},
···
165
165
pub fn sync(&self, all: bool) -> AppResult<()> {
166
166
let start = CLOCK.now();
167
167
// prepare all the data
168
-
let mut data = Vec::with_capacity(self.hits.len());
168
+
let nsids_len = self.hits.len();
169
+
let mut data = Vec::with_capacity(nsids_len);
170
+
let mut nsids = HashSet::with_capacity(nsids_len);
169
171
let _guard = scc::ebr::Guard::new();
170
-
for (_, handle) in self.hits.iter(&_guard) {
172
+
for (nsid, handle) in self.hits.iter(&_guard) {
171
173
let mut nsid_data = Vec::with_capacity(2);
172
174
let mut total_count = 0;
173
175
let is_too_old = handle.since_last_activity() > self.cfg.max_last_activity;
···
201
203
{blocks = %nsid_data.len(), count = %total_count},
202
204
"will encode & sync",
203
205
);
206
+
nsids.insert(nsid.clone());
204
207
data.push(nsid_data);
205
208
}
206
209
}
···
228
231
for (block, handle) in chunk {
229
232
self.sync_pool.execute(move || {
230
233
let _span = handle.span().entered();
231
-
match handle.insert(block.key, block.data) {
234
+
let written = block.written;
235
+
match handle.insert_block(block) {
232
236
Ok(_) => {
233
-
tracing::info!({count = %block.written}, "synced")
237
+
tracing::info!({count = %written}, "synced")
234
238
}
235
239
Err(err) => tracing::error!({ err = %err }, "failed to sync block"),
236
240
}
···
239
243
AppResult::Ok(())
240
244
})?;
241
245
self.sync_pool.join();
246
+
247
+
// update snapshots for all (changed) handles
248
+
for nsid in nsids {
249
+
self.hits.peek_with(&nsid, |_, handle| handle.update_tree());
250
+
}
251
+
242
252
tracing::info!(time = %start.elapsed().as_secs_f64(), "synced all blocks");
243
253
244
254
Ok(())
···
254
264
let Some(handle) = self.get_handle(nsid) else {
255
265
return Ok(());
256
266
};
257
-
handle.compact(max_count, range, sort)
267
+
handle.compact(max_count, range, sort)?;
268
+
handle.update_tree();
269
+
Ok(())
258
270
}
259
271
260
272
pub fn compact_all(
···
363
375
let Some(handle) = self.get_handle(&nsid) else {
364
376
continue;
365
377
};
366
-
let block_lens = handle.iter().rev().try_fold(Vec::new(), |mut acc, item| {
367
-
let (key, value) = item?;
368
-
let mut timestamps = Cursor::new(key);
369
-
let start_timestamp = timestamps.read_varint()?;
370
-
let decoder = ItemDecoder::new(Cursor::new(value), start_timestamp)?;
371
-
acc.push(decoder.item_count());
372
-
AppResult::Ok(acc)
373
-
})?;
378
+
let block_lens = handle
379
+
.read()
380
+
.iter()
381
+
.rev()
382
+
.try_fold(Vec::new(), |mut acc, item| {
383
+
let (key, value) = item?;
384
+
let mut timestamps = Cursor::new(key);
385
+
let start_timestamp = timestamps.read_varint()?;
386
+
let decoder = ItemDecoder::new(Cursor::new(value), start_timestamp)?;
387
+
acc.push(decoder.item_count());
388
+
AppResult::Ok(acc)
389
+
})?;
374
390
nsids.insert(nsid.to_smolstr(), block_lens);
375
391
}
376
392
Ok(DbInfo {
···
438
454
))
439
455
};
440
456
441
-
let (blocks, counted) = handle
457
+
let (blocks, _counted) = handle
458
+
.read()
442
459
.range(..end_key)
443
460
.map(|res| res.map_err(AppError::from))
444
461
.rev()
···
462
479
)
463
480
.into_inner();
464
481
465
-
tracing::info!(
466
-
"got blocks with size {}, item count {counted}",
467
-
blocks.len()
468
-
);
482
+
// tracing::info!(
483
+
// "got blocks with size {}, item count {counted}",
484
+
// blocks.len()
485
+
// );
469
486
470
487
Either::Left(blocks.into_iter().rev().flatten().flatten())
471
488
}
···
476
493
let Some(handle) = self.get_handle("app.bsky.feed.like") else {
477
494
return Ok(0);
478
495
};
479
-
let Some((timestamps_raw, _)) = handle.first_key_value()? else {
496
+
let Some((timestamps_raw, _)) = handle.read().first_key_value()? else {
480
497
return Ok(0);
481
498
};
482
499
let mut timestamp_reader = Cursor::new(timestamps_raw);
+66
server/src/utils.rs
+66
server/src/utils.rs
···
1
1
use std::io::{self, Read, Write};
2
+
use std::ops::Deref;
2
3
use std::sync::atomic::{AtomicU64, Ordering};
3
4
use std::time::Duration;
4
5
6
+
use arc_swap::RefCnt;
5
7
use byteview::ByteView;
6
8
use ordered_varint::Variable;
9
+
use rclite::Arc;
7
10
8
11
pub fn get_time() -> Duration {
9
12
std::time::SystemTime::now()
···
320
323
}
321
324
}
322
325
}
326
+
327
+
pub type ArcliteSwap<T> = arc_swap::ArcSwapAny<ArcRefCnt<T>>;
328
+
329
+
pub struct ArcRefCnt<T>(Arc<T>);
330
+
331
+
impl<T> ArcRefCnt<T> {
332
+
pub fn new(value: T) -> Self {
333
+
Self(Arc::new(value))
334
+
}
335
+
}
336
+
337
+
impl<T> Deref for ArcRefCnt<T> {
338
+
type Target = T;
339
+
340
+
fn deref(&self) -> &Self::Target {
341
+
&self.0
342
+
}
343
+
}
344
+
345
+
impl<T> Clone for ArcRefCnt<T> {
346
+
fn clone(&self) -> Self {
347
+
Self(self.0.clone())
348
+
}
349
+
}
350
+
351
+
// SAFETY: uhhhhhhhh copied the Arc impl from arc_swap xd
352
+
unsafe impl<T> RefCnt for ArcRefCnt<T> {
353
+
type Base = T;
354
+
355
+
fn into_ptr(me: Self) -> *mut Self::Base {
356
+
Arc::into_raw(me.0) as *mut T
357
+
}
358
+
359
+
fn as_ptr(me: &Self) -> *mut Self::Base {
360
+
// Slightly convoluted way to do this, but this avoids stacked borrows violations. The same
361
+
// intention as
362
+
//
363
+
// me as &T as *const T as *mut T
364
+
//
365
+
// We first create a "shallow copy" of me - one that doesn't really own its ref count
366
+
// (that's OK, me _does_ own it, so it can't be destroyed in the meantime).
367
+
// Then we can use into_raw (which preserves not having the ref count).
368
+
//
369
+
// We need to "revert" the changes we did. In current std implementation, the combination
370
+
// of from_raw and forget is no-op. But formally, into_raw shall be paired with from_raw
371
+
// and that read shall be paired with forget to properly "close the brackets". In future
372
+
// versions of STD, these may become something else that's not really no-op (unlikely, but
373
+
// possible), so we future-proof it a bit.
374
+
375
+
// SAFETY: &T cast to *const T will always be aligned, initialised and valid for reads
376
+
let ptr = Arc::into_raw(unsafe { std::ptr::read(&me.0) });
377
+
let ptr = ptr as *mut T;
378
+
379
+
// SAFETY: We got the pointer from into_raw just above
380
+
std::mem::forget(unsafe { Arc::from_raw(ptr) });
381
+
382
+
ptr
383
+
}
384
+
385
+
unsafe fn from_ptr(ptr: *const Self::Base) -> Self {
386
+
Self(unsafe { Arc::from_raw(ptr) })
387
+
}
388
+
}