+116
Cargo.lock
+116
Cargo.lock
···
126
126
]
127
127
128
128
[[package]]
129
+
name = "bincode"
130
+
version = "2.0.1"
131
+
source = "registry+https://github.com/rust-lang/crates.io-index"
132
+
checksum = "36eaf5d7b090263e8150820482d5d93cd964a81e4019913c972f4edcc6edb740"
133
+
dependencies = [
134
+
"bincode_derive",
135
+
"serde",
136
+
"unty",
137
+
]
138
+
139
+
[[package]]
140
+
name = "bincode_derive"
141
+
version = "2.0.1"
142
+
source = "registry+https://github.com/rust-lang/crates.io-index"
143
+
checksum = "bf95709a440f45e986983918d0e8a1f30a9b1df04918fc828670606804ac3c09"
144
+
dependencies = [
145
+
"virtue",
146
+
]
147
+
148
+
[[package]]
129
149
name = "bitflags"
130
150
version = "2.9.4"
131
151
source = "registry+https://github.com/rust-lang/crates.io-index"
···
387
407
]
388
408
389
409
[[package]]
410
+
name = "fallible-iterator"
411
+
version = "0.3.0"
412
+
source = "registry+https://github.com/rust-lang/crates.io-index"
413
+
checksum = "2acce4a10f12dc2fb14a218589d4f1f62ef011b2d0cc4b3cb1bba8e94da14649"
414
+
415
+
[[package]]
416
+
name = "fallible-streaming-iterator"
417
+
version = "0.1.9"
418
+
source = "registry+https://github.com/rust-lang/crates.io-index"
419
+
checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a"
420
+
421
+
[[package]]
422
+
name = "foldhash"
423
+
version = "0.1.5"
424
+
source = "registry+https://github.com/rust-lang/crates.io-index"
425
+
checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2"
426
+
427
+
[[package]]
390
428
name = "futures"
391
429
version = "0.3.31"
392
430
source = "registry+https://github.com/rust-lang/crates.io-index"
···
493
531
]
494
532
495
533
[[package]]
534
+
name = "hashbrown"
535
+
version = "0.15.5"
536
+
source = "registry+https://github.com/rust-lang/crates.io-index"
537
+
checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1"
538
+
dependencies = [
539
+
"foldhash",
540
+
]
541
+
542
+
[[package]]
543
+
name = "hashlink"
544
+
version = "0.10.0"
545
+
source = "registry+https://github.com/rust-lang/crates.io-index"
546
+
checksum = "7382cf6263419f2d8df38c55d7da83da5c18aef87fc7a7fc1fb1e344edfe14c1"
547
+
dependencies = [
548
+
"hashbrown",
549
+
]
550
+
551
+
[[package]]
496
552
name = "heck"
497
553
version = "0.5.0"
498
554
source = "registry+https://github.com/rust-lang/crates.io-index"
···
598
654
checksum = "58f929b4d672ea937a23a1ab494143d968337a5f47e56d0815df1e0890ddf174"
599
655
600
656
[[package]]
657
+
name = "libsqlite3-sys"
658
+
version = "0.35.0"
659
+
source = "registry+https://github.com/rust-lang/crates.io-index"
660
+
checksum = "133c182a6a2c87864fe97778797e46c7e999672690dc9fa3ee8e241aa4a9c13f"
661
+
dependencies = [
662
+
"pkg-config",
663
+
"vcpkg",
664
+
]
665
+
666
+
[[package]]
601
667
name = "lock_api"
602
668
version = "0.4.14"
603
669
source = "registry+https://github.com/rust-lang/crates.io-index"
···
744
810
checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
745
811
746
812
[[package]]
813
+
name = "pkg-config"
814
+
version = "0.3.32"
815
+
source = "registry+https://github.com/rust-lang/crates.io-index"
816
+
checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c"
817
+
818
+
[[package]]
747
819
name = "plotters"
748
820
version = "0.3.7"
749
821
source = "registry+https://github.com/rust-lang/crates.io-index"
···
825
897
]
826
898
827
899
[[package]]
900
+
name = "redb"
901
+
version = "3.1.0"
902
+
source = "registry+https://github.com/rust-lang/crates.io-index"
903
+
checksum = "ae323eb086579a3769daa2c753bb96deb95993c534711e0dbe881b5192906a06"
904
+
dependencies = [
905
+
"libc",
906
+
]
907
+
908
+
[[package]]
828
909
name = "redox_syscall"
829
910
version = "0.5.18"
830
911
source = "registry+https://github.com/rust-lang/crates.io-index"
···
866
947
name = "repo-stream"
867
948
version = "0.1.1"
868
949
dependencies = [
950
+
"bincode",
869
951
"clap",
870
952
"criterion",
871
953
"env_logger",
···
875
957
"iroh-car",
876
958
"log",
877
959
"multibase",
960
+
"redb",
961
+
"rusqlite",
878
962
"serde",
879
963
"serde_bytes",
880
964
"serde_ipld_dagcbor",
···
883
967
]
884
968
885
969
[[package]]
970
+
name = "rusqlite"
971
+
version = "0.37.0"
972
+
source = "registry+https://github.com/rust-lang/crates.io-index"
973
+
checksum = "165ca6e57b20e1351573e3729b958bc62f0e48025386970b6e4d29e7a7e71f3f"
974
+
dependencies = [
975
+
"bitflags",
976
+
"fallible-iterator",
977
+
"fallible-streaming-iterator",
978
+
"hashlink",
979
+
"libsqlite3-sys",
980
+
"smallvec",
981
+
]
982
+
983
+
[[package]]
886
984
name = "rustc-demangle"
887
985
version = "0.1.26"
888
986
source = "registry+https://github.com/rust-lang/crates.io-index"
···
1139
1237
checksum = "eb066959b24b5196ae73cb057f45598450d2c5f71460e98c49b738086eff9c06"
1140
1238
1141
1239
[[package]]
1240
+
name = "unty"
1241
+
version = "0.0.4"
1242
+
source = "registry+https://github.com/rust-lang/crates.io-index"
1243
+
checksum = "6d49784317cd0d1ee7ec5c716dd598ec5b4483ea832a2dced265471cc0f690ae"
1244
+
1245
+
[[package]]
1142
1246
name = "utf8parse"
1143
1247
version = "0.2.2"
1144
1248
source = "registry+https://github.com/rust-lang/crates.io-index"
1145
1249
checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
1250
+
1251
+
[[package]]
1252
+
name = "vcpkg"
1253
+
version = "0.2.15"
1254
+
source = "registry+https://github.com/rust-lang/crates.io-index"
1255
+
checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426"
1256
+
1257
+
[[package]]
1258
+
name = "virtue"
1259
+
version = "0.0.18"
1260
+
source = "registry+https://github.com/rust-lang/crates.io-index"
1261
+
checksum = "051eb1abcf10076295e815102942cc58f9d5e3b4560e46e53c21e8ff6f3af7b1"
1146
1262
1147
1263
[[package]]
1148
1264
name = "walkdir"
+3
Cargo.toml
+3
Cargo.toml
···
7
7
repository = "https://tangled.org/@microcosm.blue/repo-stream"
8
8
9
9
[dependencies]
10
+
bincode = { version = "2.0.1", features = ["serde"] }
10
11
futures = "0.3.31"
11
12
futures-core = "0.3.31"
12
13
ipld-core = { version = "0.4.2", features = ["serde"] }
13
14
iroh-car = "0.5.1"
14
15
log = "0.4.28"
15
16
multibase = "0.9.2"
17
+
redb = "3.1.0"
18
+
rusqlite = "0.37.0"
16
19
serde = { version = "1.0.228", features = ["derive"] }
17
20
serde_bytes = "0.11.19"
18
21
serde_ipld_dagcbor = "0.6.4"
+57
examples/disk-read-file/main.rs
+57
examples/disk-read-file/main.rs
···
1
+
extern crate repo_stream;
2
+
use clap::Parser;
3
+
use futures::TryStreamExt;
4
+
use iroh_car::CarReader;
5
+
use std::convert::Infallible;
6
+
use std::path::PathBuf;
7
+
8
+
type Result<T> = std::result::Result<T, Box<dyn std::error::Error>>;
9
+
10
+
#[derive(Debug, Parser)]
11
+
struct Args {
12
+
#[arg()]
13
+
car: PathBuf,
14
+
#[arg()]
15
+
tmpfile: PathBuf,
16
+
}
17
+
18
+
#[tokio::main]
19
+
async fn main() -> Result<()> {
20
+
env_logger::init();
21
+
22
+
let Args { car, tmpfile } = Args::parse();
23
+
let reader = tokio::fs::File::open(car).await?;
24
+
let reader = tokio::io::BufReader::new(reader);
25
+
26
+
println!("hello!");
27
+
28
+
let reader = CarReader::new(reader).await?;
29
+
30
+
let redb_store = repo_stream::disk_redb::RedbStore::new(tmpfile)?;
31
+
32
+
let root = reader
33
+
.header()
34
+
.roots()
35
+
.first()
36
+
.ok_or("missing root")?
37
+
.clone();
38
+
log::debug!("root: {root:?}");
39
+
40
+
// let stream = Box::pin(reader.stream());
41
+
let stream = std::pin::pin!(reader.stream());
42
+
43
+
let (commit, v) = repo_stream::disk_drive::Vehicle::init(root, stream, redb_store, |block| {
44
+
Ok::<_, Infallible>(block.len())
45
+
})
46
+
.await?;
47
+
let mut record_stream = std::pin::pin!(v.stream());
48
+
49
+
log::info!("got commit: {commit:?}");
50
+
51
+
while let Some((rkey, _rec)) = record_stream.try_next().await? {
52
+
log::info!("got {rkey:?}");
53
+
}
54
+
log::info!("bye!");
55
+
56
+
Ok(())
57
+
}
+201
src/disk_drive.rs
+201
src/disk_drive.rs
···
1
+
use futures::Stream;
2
+
use futures::TryStreamExt;
3
+
use std::error::Error;
4
+
5
+
use crate::disk_walk::{Step, Trip, Walker};
6
+
use crate::mst::Commit;
7
+
use crate::mst::Node;
8
+
9
+
use ipld_core::cid::Cid;
10
+
use serde::{Deserialize, Serialize, de::DeserializeOwned};
11
+
12
+
/// Errors that can happen while consuming and emitting blocks and records
13
+
#[derive(Debug, thiserror::Error)]
14
+
pub enum DriveError {
15
+
#[error("Failed to initialize CarReader: {0}")]
16
+
CarReader(#[from] iroh_car::Error),
17
+
#[error("Car block stream error: {0}")]
18
+
CarBlockError(Box<dyn Error>),
19
+
#[error("Failed to decode commit block: {0}")]
20
+
BadCommit(Box<dyn Error>),
21
+
#[error("The Commit block reference by the root was not found")]
22
+
MissingCommit,
23
+
#[error("The MST block {0} could not be found")]
24
+
MissingBlock(Cid),
25
+
#[error("Failed to walk the mst tree: {0}")]
26
+
Tripped(#[from] Trip),
27
+
}
28
+
29
+
#[derive(Debug, Clone, Serialize, Deserialize)]
30
+
pub enum MaybeProcessedBlock<T: Clone + Serialize> {
31
+
/// A block that's *probably* a Node (but we can't know yet)
32
+
///
33
+
/// It *can be* a record that suspiciously looks a lot like a node, so we
34
+
/// cannot eagerly turn it into a Node. We only know for sure what it is
35
+
/// when we actually walk down the MST
36
+
Raw(Vec<u8>),
37
+
/// A processed record from a block that was definitely not a Node
38
+
///
39
+
/// If we _never_ needed this block, then we may have wasted a bit of effort
40
+
/// trying to process it. Oh well.
41
+
///
42
+
/// Processing has to be fallible because the CAR can have totally-unused
43
+
/// blocks, which can just be garbage. since we're eagerly trying to process
44
+
/// record blocks without knowing for sure that they *are* records, we
45
+
/// discard any definitely-not-nodes that fail processing and keep their
46
+
/// error in the buffer for them. if we later try to retreive them as a
47
+
/// record, then we can surface the error.
48
+
///
49
+
/// The error type is `String` because we don't really want to put
50
+
/// any constraints like `Serialize` on the error type, and `Error`
51
+
/// at least requires `Display`. It's a compromise.
52
+
ProcessedOk(T),
53
+
Unprocessable(String),
54
+
}
55
+
56
+
pub trait BlockStore<MPB: Serialize + DeserializeOwned> {
57
+
fn put(&self, key: Cid, value: MPB); // unwraps for now
58
+
fn get(&self, key: Cid) -> Option<MPB>;
59
+
}
60
+
61
+
type CarBlock<E> = Result<(Cid, Vec<u8>), E>;
62
+
63
+
/// The core driver between the block stream and MST walker
64
+
pub struct Vehicle<SE, S, T, BS, P, PE>
65
+
where
66
+
SE: Error + 'static,
67
+
S: Stream<Item = CarBlock<SE>>,
68
+
T: Clone + Serialize + DeserializeOwned,
69
+
BS: BlockStore<MaybeProcessedBlock<T>>,
70
+
P: Fn(&[u8]) -> Result<T, PE>,
71
+
PE: Error,
72
+
{
73
+
block_stream: S,
74
+
block_store: BS,
75
+
walker: Walker,
76
+
process: P,
77
+
}
78
+
79
+
impl<SE, S, T, BS, P, PE> Vehicle<SE, S, T, BS, P, PE>
80
+
where
81
+
SE: Error + 'static,
82
+
S: Stream<Item = CarBlock<SE>> + Unpin,
83
+
T: Clone + Serialize + DeserializeOwned,
84
+
BS: BlockStore<MaybeProcessedBlock<T>>,
85
+
P: Fn(&[u8]) -> Result<T, PE>,
86
+
PE: Error,
87
+
{
88
+
/// Set up the stream
89
+
///
90
+
/// This will eagerly consume blocks until the `Commit` object is found.
91
+
/// *Usually* the it's the first block, but there is no guarantee.
92
+
///
93
+
/// ### Parameters
94
+
///
95
+
/// `root`: CID of the commit object that is the root of the MST
96
+
///
97
+
/// `block_stream`: Input stream of raw CAR blocks
98
+
///
99
+
/// `process`: record-transforming callback:
100
+
///
101
+
/// For tasks where records can be quickly processed into a *smaller*
102
+
/// useful representation, you can do that eagerly as blocks come in by
103
+
/// passing the processor as a callback here. This can reduce overall
104
+
/// memory usage.
105
+
pub async fn init(
106
+
root: Cid,
107
+
mut block_stream: S,
108
+
block_store: BS,
109
+
process: P,
110
+
) -> Result<(Commit, Self), DriveError> {
111
+
let mut commit = None;
112
+
113
+
while let Some((cid, data)) = block_stream
114
+
.try_next()
115
+
.await
116
+
.map_err(|e| DriveError::CarBlockError(e.into()))?
117
+
{
118
+
if cid == root {
119
+
let c: Commit = serde_ipld_dagcbor::from_slice(&data)
120
+
.map_err(|e| DriveError::BadCommit(e.into()))?;
121
+
commit = Some(c);
122
+
break;
123
+
} else {
124
+
block_store.put(
125
+
cid,
126
+
if Node::could_be(&data) {
127
+
MaybeProcessedBlock::Raw(data)
128
+
} else {
129
+
match process(&data) {
130
+
Ok(t) => MaybeProcessedBlock::ProcessedOk(t),
131
+
Err(e) => MaybeProcessedBlock::Unprocessable(e.to_string()),
132
+
}
133
+
},
134
+
);
135
+
}
136
+
}
137
+
138
+
// we either broke out or read all the blocks without finding the commit...
139
+
let commit = commit.ok_or(DriveError::MissingCommit)?;
140
+
141
+
let walker = Walker::new(commit.data);
142
+
143
+
let me = Self {
144
+
block_stream,
145
+
block_store,
146
+
walker,
147
+
process,
148
+
};
149
+
Ok((commit, me))
150
+
}
151
+
152
+
async fn drive_until(&mut self, cid_needed: Cid) -> Result<(), DriveError> {
153
+
while let Some((cid, data)) = self
154
+
.block_stream
155
+
.try_next()
156
+
.await
157
+
.map_err(|e| DriveError::CarBlockError(e.into()))?
158
+
{
159
+
self.block_store.put(
160
+
cid,
161
+
if Node::could_be(&data) {
162
+
MaybeProcessedBlock::Raw(data)
163
+
} else {
164
+
match (self.process)(&data) {
165
+
Ok(t) => MaybeProcessedBlock::ProcessedOk(t),
166
+
Err(e) => MaybeProcessedBlock::Unprocessable(e.to_string()),
167
+
}
168
+
},
169
+
);
170
+
if cid == cid_needed {
171
+
return Ok(());
172
+
}
173
+
}
174
+
175
+
// if we never found the block
176
+
Err(DriveError::MissingBlock(cid_needed))
177
+
}
178
+
179
+
/// Manually step through the record outputs
180
+
pub async fn next_record(&mut self) -> Result<Option<(String, T)>, DriveError> {
181
+
loop {
182
+
// walk as far as we can until we run out of blocks or find a record
183
+
let cid_needed = match self.walker.step(&mut self.block_store, &self.process)? {
184
+
Step::Rest(cid) => cid,
185
+
Step::Finish => return Ok(None),
186
+
Step::Step { rkey, data } => return Ok(Some((rkey, data))),
187
+
};
188
+
189
+
// load blocks until we reach that cid
190
+
self.drive_until(cid_needed).await?;
191
+
}
192
+
}
193
+
194
+
/// Convert to a futures::stream of record outputs
195
+
pub fn stream(self) -> impl Stream<Item = Result<(String, T), DriveError>> {
196
+
futures::stream::try_unfold(self, |mut this| async move {
197
+
let maybe_record = this.next_record().await?;
198
+
Ok(maybe_record.map(|b| (b, this)))
199
+
})
200
+
}
201
+
}
+50
src/disk_redb.rs
+50
src/disk_redb.rs
···
1
+
use crate::disk_drive::BlockStore;
2
+
use ipld_core::cid::Cid;
3
+
use redb::{Database, Error, ReadableDatabase, TableDefinition};
4
+
use serde::{Serialize, de::DeserializeOwned};
5
+
use std::path::Path;
6
+
7
+
const TABLE: TableDefinition<&[u8], &[u8]> = TableDefinition::new("blocks");
8
+
9
+
pub struct RedbStore {
10
+
db: Database,
11
+
}
12
+
13
+
impl RedbStore {
14
+
pub fn new(path: impl AsRef<Path>) -> Result<Self, Error> {
15
+
let db = Database::create(path)?;
16
+
Ok(Self { db })
17
+
}
18
+
}
19
+
20
+
// TODO: clean up on drop
21
+
22
+
impl<MPB: Serialize + DeserializeOwned> BlockStore<MPB> for RedbStore {
23
+
fn put(&self, c: Cid, t: MPB) {
24
+
let key_bytes = c.to_bytes();
25
+
let val_bytes = bincode::serde::encode_to_vec(t, bincode::config::standard()).unwrap();
26
+
27
+
let mut tx = self.db.begin_write().unwrap();
28
+
tx.set_durability(redb::Durability::None).unwrap();
29
+
{
30
+
let mut table = tx.open_table(TABLE).unwrap();
31
+
table.insert(&*key_bytes, &*val_bytes).unwrap();
32
+
}
33
+
tx.commit().unwrap();
34
+
}
35
+
fn get(&self, c: Cid) -> Option<MPB> {
36
+
let key_bytes = c.to_bytes();
37
+
let tx = self.db.begin_read().unwrap();
38
+
let table = match tx.open_table(TABLE) {
39
+
Ok(t) => t,
40
+
Err(redb::TableError::TableDoesNotExist(_)) => return None,
41
+
e => e.unwrap(),
42
+
};
43
+
let maybe_val_bytes = table.get(&*key_bytes).unwrap()?;
44
+
let (t, n): (MPB, usize) =
45
+
bincode::serde::decode_from_slice(maybe_val_bytes.value(), bincode::config::standard())
46
+
.unwrap();
47
+
assert_eq!(maybe_val_bytes.value().len(), n);
48
+
Some(t)
49
+
}
50
+
}
+403
src/disk_walk.rs
+403
src/disk_walk.rs
···
1
+
//! Depth-first MST traversal
2
+
3
+
use crate::disk_drive::{BlockStore, MaybeProcessedBlock};
4
+
use crate::mst::Node;
5
+
6
+
use ipld_core::cid::Cid;
7
+
use serde::{Serialize, de::DeserializeOwned};
8
+
use std::error::Error;
9
+
10
+
/// Errors that can happen while walking
11
+
#[derive(Debug, thiserror::Error)]
12
+
pub enum Trip {
13
+
#[error("empty mst nodes are not allowed")]
14
+
NodeEmpty,
15
+
#[error("Failed to decode commit block: {0}")]
16
+
BadCommit(Box<dyn std::error::Error>),
17
+
#[error("Action node error: {0}")]
18
+
RkeyError(#[from] RkeyError),
19
+
#[error("Process failed: {0}")]
20
+
ProcessFailed(String),
21
+
#[error("Encountered an rkey out of order while walking the MST")]
22
+
RkeyOutOfOrder,
23
+
}
24
+
25
+
/// Errors from invalid Rkeys
26
+
#[derive(Debug, thiserror::Error)]
27
+
pub enum RkeyError {
28
+
#[error("Failed to compute an rkey due to invalid prefix_len")]
29
+
EntryPrefixOutOfbounds,
30
+
#[error("RKey was not utf-8")]
31
+
EntryRkeyNotUtf8(#[from] std::string::FromUtf8Error),
32
+
}
33
+
34
+
/// Walker outputs
35
+
#[derive(Debug)]
36
+
pub enum Step<T: Serialize + DeserializeOwned> {
37
+
/// We need a CID but it's not in the block store
38
+
///
39
+
/// Give the needed CID to the driver so it can load blocks until it's found
40
+
Rest(Cid),
41
+
/// Reached the end of the MST! yay!
42
+
Finish,
43
+
/// A record was found!
44
+
Step { rkey: String, data: T },
45
+
}
46
+
47
+
#[derive(Debug, Clone, PartialEq)]
48
+
enum Need {
49
+
Node(Cid),
50
+
Record { rkey: String, cid: Cid },
51
+
}
52
+
53
+
fn push_from_node(stack: &mut Vec<Need>, node: &Node) -> Result<(), RkeyError> {
54
+
let mut entries = Vec::with_capacity(node.entries.len());
55
+
56
+
let mut prefix = vec![];
57
+
for entry in &node.entries {
58
+
let mut rkey = vec![];
59
+
let pre_checked = prefix
60
+
.get(..entry.prefix_len)
61
+
.ok_or(RkeyError::EntryPrefixOutOfbounds)?;
62
+
rkey.extend_from_slice(pre_checked);
63
+
rkey.extend_from_slice(&entry.keysuffix);
64
+
prefix = rkey.clone();
65
+
66
+
entries.push(Need::Record {
67
+
rkey: String::from_utf8(rkey)?,
68
+
cid: entry.value,
69
+
});
70
+
if let Some(ref tree) = entry.tree {
71
+
entries.push(Need::Node(*tree));
72
+
}
73
+
}
74
+
75
+
entries.reverse();
76
+
stack.append(&mut entries);
77
+
78
+
if let Some(tree) = node.left {
79
+
stack.push(Need::Node(tree));
80
+
}
81
+
Ok(())
82
+
}
83
+
84
+
/// Traverser of an atproto MST
85
+
///
86
+
/// Walks the tree from left-to-right in depth-first order
87
+
#[derive(Debug)]
88
+
pub struct Walker {
89
+
stack: Vec<Need>,
90
+
prev: String,
91
+
}
92
+
93
+
impl Walker {
94
+
pub fn new(tree_root_cid: Cid) -> Self {
95
+
Self {
96
+
stack: vec![Need::Node(tree_root_cid)],
97
+
prev: "".to_string(),
98
+
}
99
+
}
100
+
101
+
/// Advance through nodes until we find a record or can't go further
102
+
pub fn step<T: Clone + Serialize + DeserializeOwned, E: Error>(
103
+
&mut self,
104
+
block_store: &mut impl BlockStore<MaybeProcessedBlock<T>>,
105
+
process: impl Fn(&[u8]) -> Result<T, E>,
106
+
) -> Result<Step<T>, Trip> {
107
+
loop {
108
+
let Some(mut need) = self.stack.last() else {
109
+
log::trace!("tried to walk but we're actually done.");
110
+
return Ok(Step::Finish);
111
+
};
112
+
113
+
match &mut need {
114
+
Need::Node(cid) => {
115
+
log::trace!("need node {cid:?}");
116
+
let Some(mpb) = block_store.get(*cid) else {
117
+
log::trace!("node not found, resting");
118
+
return Ok(Step::Rest(*cid));
119
+
};
120
+
121
+
let MaybeProcessedBlock::<T>::Raw(block) = mpb else {
122
+
return Err(Trip::BadCommit("failed commit fingerprint".into()));
123
+
};
124
+
let node = serde_ipld_dagcbor::from_slice::<Node>(&block)
125
+
.map_err(|e| Trip::BadCommit(e.into()))?;
126
+
127
+
// found node, make sure we remember
128
+
self.stack.pop();
129
+
130
+
// queue up work on the found node next
131
+
push_from_node(&mut self.stack, &node)?;
132
+
}
133
+
Need::Record { rkey, cid } => {
134
+
log::trace!("need record {cid:?}");
135
+
let Some(mpb) = block_store.get(*cid) else {
136
+
log::trace!("record block not found, resting");
137
+
return Ok(Step::Rest(*cid));
138
+
};
139
+
let rkey = rkey.clone();
140
+
let data = match mpb {
141
+
MaybeProcessedBlock::Raw(data) => match process(&data) {
142
+
Ok(t) => Ok(t),
143
+
Err(e) => Err(Trip::ProcessFailed(e.to_string())),
144
+
},
145
+
MaybeProcessedBlock::ProcessedOk(t) => Ok(t.clone()),
146
+
MaybeProcessedBlock::Unprocessable(s) => {
147
+
return Err(Trip::ProcessFailed(s.clone()));
148
+
}
149
+
};
150
+
151
+
// found node, make sure we remember
152
+
self.stack.pop();
153
+
154
+
log::trace!("emitting a block as a step. depth={}", self.stack.len());
155
+
156
+
let data = data.map_err(|e| Trip::ProcessFailed(e.to_string()))?;
157
+
158
+
// rkeys *must* be in order or else the tree is invalid (or
159
+
// we have a bug)
160
+
if rkey <= self.prev {
161
+
return Err(Trip::RkeyOutOfOrder);
162
+
}
163
+
self.prev = rkey.clone();
164
+
165
+
return Ok(Step::Step { rkey, data });
166
+
}
167
+
}
168
+
}
169
+
}
170
+
}
171
+
172
+
#[cfg(test)]
173
+
mod test {
174
+
use super::*;
175
+
// use crate::mst::Entry;
176
+
177
+
fn cid1() -> Cid {
178
+
"bafyreihixenvk3ahqbytas4hk4a26w43bh6eo3w6usjqtxkpzsvi655a3m"
179
+
.parse()
180
+
.unwrap()
181
+
}
182
+
// fn cid2() -> Cid {
183
+
// "QmY7Yh4UquoXHLPFo2XbhXkhBvFoPwmQUSa92pxnxjQuPU"
184
+
// .parse()
185
+
// .unwrap()
186
+
// }
187
+
// fn cid3() -> Cid {
188
+
// "bafybeigdyrzt5sfp7udm7hu76uh7y26nf3efuylqabf3oclgtqy55fbzdi"
189
+
// .parse()
190
+
// .unwrap()
191
+
// }
192
+
// fn cid4() -> Cid {
193
+
// "QmbWqxBEKC3P8tqsKc98xmWNzrzDtRLMiMPL8wBuTGsMnR"
194
+
// .parse()
195
+
// .unwrap()
196
+
// }
197
+
// fn cid5() -> Cid {
198
+
// "QmSnuWmxptJZdLJpKRarxBMS2Ju2oANVrgbr2xWbie9b2D"
199
+
// .parse()
200
+
// .unwrap()
201
+
// }
202
+
// fn cid6() -> Cid {
203
+
// "QmdmQXB2mzChmMeKY47C43LxUdg1NDJ5MWcKMKxDu7RgQm"
204
+
// .parse()
205
+
// .unwrap()
206
+
// }
207
+
// fn cid7() -> Cid {
208
+
// "bafybeiaysi4s6lnjev27ln5icwm6tueaw2vdykrtjkwiphwekaywqhcjze"
209
+
// .parse()
210
+
// .unwrap()
211
+
// }
212
+
// fn cid8() -> Cid {
213
+
// "bafyreif3tfdpr5n4jdrbielmcapwvbpcthepfkwq2vwonmlhirbjmotedi"
214
+
// .parse()
215
+
// .unwrap()
216
+
// }
217
+
// fn cid9() -> Cid {
218
+
// "bafyreicnokmhmrnlp2wjhyk2haep4tqxiptwfrp2rrs7rzq7uk766chqvq"
219
+
// .parse()
220
+
// .unwrap()
221
+
// }
222
+
223
+
#[test]
224
+
fn test_next_from_node_empty() {
225
+
let node = Node {
226
+
left: None,
227
+
entries: vec![],
228
+
};
229
+
let mut stack = vec![];
230
+
push_from_node(&mut stack, &node).unwrap();
231
+
assert_eq!(stack.last(), None);
232
+
}
233
+
234
+
#[test]
235
+
fn test_needs_from_node_just_left() {
236
+
let node = Node {
237
+
left: Some(cid1()),
238
+
entries: vec![],
239
+
};
240
+
let mut stack = vec![];
241
+
push_from_node(&mut stack, &node).unwrap();
242
+
assert_eq!(stack.last(), Some(Need::Node(cid1())).as_ref());
243
+
}
244
+
245
+
// #[test]
246
+
// fn test_needs_from_node_just_one_record() {
247
+
// let node = Node {
248
+
// left: None,
249
+
// entries: vec![Entry {
250
+
// keysuffix: "asdf".into(),
251
+
// prefix_len: 0,
252
+
// value: cid1(),
253
+
// tree: None,
254
+
// }],
255
+
// };
256
+
// assert_eq!(
257
+
// needs_from_node(node).unwrap(),
258
+
// vec![Need::Record {
259
+
// rkey: "asdf".into(),
260
+
// cid: cid1(),
261
+
// },]
262
+
// );
263
+
// }
264
+
265
+
// #[test]
266
+
// fn test_needs_from_node_two_records() {
267
+
// let node = Node {
268
+
// left: None,
269
+
// entries: vec![
270
+
// Entry {
271
+
// keysuffix: "asdf".into(),
272
+
// prefix_len: 0,
273
+
// value: cid1(),
274
+
// tree: None,
275
+
// },
276
+
// Entry {
277
+
// keysuffix: "gh".into(),
278
+
// prefix_len: 2,
279
+
// value: cid2(),
280
+
// tree: None,
281
+
// },
282
+
// ],
283
+
// };
284
+
// assert_eq!(
285
+
// needs_from_node(node).unwrap(),
286
+
// vec![
287
+
// Need::Record {
288
+
// rkey: "asdf".into(),
289
+
// cid: cid1(),
290
+
// },
291
+
// Need::Record {
292
+
// rkey: "asgh".into(),
293
+
// cid: cid2(),
294
+
// },
295
+
// ]
296
+
// );
297
+
// }
298
+
299
+
// #[test]
300
+
// fn test_needs_from_node_with_both() {
301
+
// let node = Node {
302
+
// left: None,
303
+
// entries: vec![Entry {
304
+
// keysuffix: "asdf".into(),
305
+
// prefix_len: 0,
306
+
// value: cid1(),
307
+
// tree: Some(cid2()),
308
+
// }],
309
+
// };
310
+
// assert_eq!(
311
+
// needs_from_node(node).unwrap(),
312
+
// vec![
313
+
// Need::Record {
314
+
// rkey: "asdf".into(),
315
+
// cid: cid1(),
316
+
// },
317
+
// Need::Node(cid2()),
318
+
// ]
319
+
// );
320
+
// }
321
+
322
+
// #[test]
323
+
// fn test_needs_from_node_left_and_record() {
324
+
// let node = Node {
325
+
// left: Some(cid1()),
326
+
// entries: vec![Entry {
327
+
// keysuffix: "asdf".into(),
328
+
// prefix_len: 0,
329
+
// value: cid2(),
330
+
// tree: None,
331
+
// }],
332
+
// };
333
+
// assert_eq!(
334
+
// needs_from_node(node).unwrap(),
335
+
// vec![
336
+
// Need::Node(cid1()),
337
+
// Need::Record {
338
+
// rkey: "asdf".into(),
339
+
// cid: cid2(),
340
+
// },
341
+
// ]
342
+
// );
343
+
// }
344
+
345
+
// #[test]
346
+
// fn test_needs_from_full_node() {
347
+
// let node = Node {
348
+
// left: Some(cid1()),
349
+
// entries: vec![
350
+
// Entry {
351
+
// keysuffix: "asdf".into(),
352
+
// prefix_len: 0,
353
+
// value: cid2(),
354
+
// tree: Some(cid3()),
355
+
// },
356
+
// Entry {
357
+
// keysuffix: "ghi".into(),
358
+
// prefix_len: 1,
359
+
// value: cid4(),
360
+
// tree: Some(cid5()),
361
+
// },
362
+
// Entry {
363
+
// keysuffix: "jkl".into(),
364
+
// prefix_len: 2,
365
+
// value: cid6(),
366
+
// tree: Some(cid7()),
367
+
// },
368
+
// Entry {
369
+
// keysuffix: "mno".into(),
370
+
// prefix_len: 4,
371
+
// value: cid8(),
372
+
// tree: Some(cid9()),
373
+
// },
374
+
// ],
375
+
// };
376
+
// assert_eq!(
377
+
// needs_from_node(node).unwrap(),
378
+
// vec![
379
+
// Need::Node(cid1()),
380
+
// Need::Record {
381
+
// rkey: "asdf".into(),
382
+
// cid: cid2(),
383
+
// },
384
+
// Need::Node(cid3()),
385
+
// Need::Record {
386
+
// rkey: "aghi".into(),
387
+
// cid: cid4(),
388
+
// },
389
+
// Need::Node(cid5()),
390
+
// Need::Record {
391
+
// rkey: "agjkl".into(),
392
+
// cid: cid6(),
393
+
// },
394
+
// Need::Node(cid7()),
395
+
// Need::Record {
396
+
// rkey: "agjkmno".into(),
397
+
// cid: cid8(),
398
+
// },
399
+
// Need::Node(cid9()),
400
+
// ]
401
+
// );
402
+
// }
403
+
}