+55
-8
Cargo.lock
+55
-8
Cargo.lock
···
1183
1183
checksum = "9425c3bf7089c983facbae04de54513cce73b41c7f9ff8c845b54e7bc64ebbfb"
1184
1184
1185
1185
[[package]]
1186
+
name = "bitcoin-io"
1187
+
version = "0.1.3"
1188
+
source = "registry+https://github.com/rust-lang/crates.io-index"
1189
+
checksum = "0b47c4ab7a93edb0c7198c5535ed9b52b63095f4e9b45279c6736cec4b856baf"
1190
+
1191
+
[[package]]
1186
1192
name = "bitcoin_hashes"
1187
1193
version = "0.13.0"
1188
1194
source = "registry+https://github.com/rust-lang/crates.io-index"
1189
1195
checksum = "1930a4dabfebb8d7d9992db18ebe3ae2876f0a305fab206fd168df931ede293b"
1190
1196
dependencies = [
1191
1197
"bitcoin-internals",
1192
-
"hex-conservative",
1198
+
"hex-conservative 0.1.2",
1199
+
]
1200
+
1201
+
[[package]]
1202
+
name = "bitcoin_hashes"
1203
+
version = "0.14.0"
1204
+
source = "registry+https://github.com/rust-lang/crates.io-index"
1205
+
checksum = "bb18c03d0db0247e147a21a6faafd5a7eb851c743db062de72018b6b7e8e4d16"
1206
+
dependencies = [
1207
+
"bitcoin-io",
1208
+
"hex-conservative 0.2.1",
1193
1209
]
1194
1210
1195
1211
[[package]]
···
1317
1333
"reqwest 0.12.15",
1318
1334
"reqwest-middleware",
1319
1335
"rsky-common",
1336
+
"rsky-lexicon",
1320
1337
"rsky-pds",
1321
1338
"rsky-repo",
1322
1339
"rsky-syntax",
1340
+
"secp256k1 0.31.0",
1323
1341
"serde",
1324
1342
"serde_bytes",
1325
1343
"serde_ipld_dagcbor",
···
2693
2711
version = "0.1.2"
2694
2712
source = "registry+https://github.com/rust-lang/crates.io-index"
2695
2713
checksum = "212ab92002354b4819390025006c897e8140934349e8635c9b077f47b4dcbd20"
2714
+
2715
+
[[package]]
2716
+
name = "hex-conservative"
2717
+
version = "0.2.1"
2718
+
source = "registry+https://github.com/rust-lang/crates.io-index"
2719
+
checksum = "5313b072ce3c597065a808dbf612c4c8e8590bdbf8b579508bf7a762c5eae6cd"
2720
+
dependencies = [
2721
+
"arrayvec",
2722
+
]
2696
2723
2697
2724
[[package]]
2698
2725
name = "hickory-proto"
···
5264
5291
"rand_core 0.6.4",
5265
5292
"regex",
5266
5293
"rsky-identity",
5267
-
"secp256k1",
5294
+
"secp256k1 0.28.2",
5268
5295
"serde",
5269
5296
"serde_ipld_dagcbor",
5270
5297
"serde_json",
···
5283
5310
"anyhow",
5284
5311
"multibase",
5285
5312
"p256 0.13.2",
5286
-
"secp256k1",
5313
+
"secp256k1 0.28.2",
5287
5314
"unsigned-varint 0.8.0",
5288
5315
]
5289
5316
···
5315
5342
"libipld",
5316
5343
"miette",
5317
5344
"parking_lot",
5318
-
"secp256k1",
5345
+
"secp256k1 0.28.2",
5319
5346
"serde",
5320
5347
"serde_bytes",
5321
5348
"serde_cbor",
···
5370
5397
"rsky-lexicon",
5371
5398
"rsky-repo",
5372
5399
"rsky-syntax",
5373
-
"secp256k1",
5400
+
"secp256k1 0.28.2",
5374
5401
"serde",
5375
5402
"serde_bytes",
5376
5403
"serde_cbor",
···
5409
5436
"rsky-crypto",
5410
5437
"rsky-lexicon",
5411
5438
"rsky-syntax",
5412
-
"secp256k1",
5439
+
"secp256k1 0.28.2",
5413
5440
"serde",
5414
5441
"serde_bytes",
5415
5442
"serde_cbor",
···
5665
5692
source = "registry+https://github.com/rust-lang/crates.io-index"
5666
5693
checksum = "d24b59d129cdadea20aea4fb2352fa053712e5d713eee47d700cd4b2bc002f10"
5667
5694
dependencies = [
5668
-
"bitcoin_hashes",
5695
+
"bitcoin_hashes 0.13.0",
5669
5696
"rand 0.8.5",
5670
-
"secp256k1-sys",
5697
+
"secp256k1-sys 0.9.2",
5671
5698
"serde",
5672
5699
]
5673
5700
5674
5701
[[package]]
5702
+
name = "secp256k1"
5703
+
version = "0.31.0"
5704
+
source = "registry+https://github.com/rust-lang/crates.io-index"
5705
+
checksum = "6a3dff2d01c9aa65c3186a45ff846bfea52cbe6de3b6320ed2a358d90dad0d76"
5706
+
dependencies = [
5707
+
"bitcoin_hashes 0.14.0",
5708
+
"rand 0.9.1",
5709
+
"secp256k1-sys 0.11.0",
5710
+
]
5711
+
5712
+
[[package]]
5675
5713
name = "secp256k1-sys"
5676
5714
version = "0.9.2"
5677
5715
source = "registry+https://github.com/rust-lang/crates.io-index"
5678
5716
checksum = "e5d1746aae42c19d583c3c1a8c646bfad910498e2051c551a7f2e3c0c9fbb7eb"
5717
+
dependencies = [
5718
+
"cc",
5719
+
]
5720
+
5721
+
[[package]]
5722
+
name = "secp256k1-sys"
5723
+
version = "0.11.0"
5724
+
source = "registry+https://github.com/rust-lang/crates.io-index"
5725
+
checksum = "dcb913707158fadaf0d8702c2db0e857de66eb003ccfdda5924b5f5ac98efb38"
5679
5726
dependencies = [
5680
5727
"cc",
5681
5728
]
+2
Cargo.toml
+2
Cargo.toml
···
145
145
rsky-repo = { git = "https://github.com/blacksky-algorithms/rsky.git" }
146
146
rsky-pds = { git = "https://github.com/blacksky-algorithms/rsky.git" }
147
147
rsky-common = { git = "https://github.com/blacksky-algorithms/rsky.git" }
148
+
rsky-lexicon = { git = "https://github.com/blacksky-algorithms/rsky.git" }
148
149
149
150
# async in streams
150
151
# async-stream = "0.3"
···
249
250
urlencoding = "2.1.3"
250
251
async-trait = "0.1.88"
251
252
lazy_static = "1.5.0"
253
+
secp256k1 = "0.31.0"
+424
-316
src/actor_store/actor_store.rs
+424
-316
src/actor_store/actor_store.rs
···
1
-
use std::path::PathBuf;
1
+
//! Based on https://github.com/blacksky-algorithms/rsky/blob/main/rsky-pds/src/actor_store/mod.rs
2
+
//! Which is based on https://github.com/bluesky-social/atproto/blob/main/packages/repo/src/repo.ts
3
+
//! and also adds components from https://github.com/bluesky-social/atproto/blob/main/packages/pds/src/actor-store/repo/transactor.ts
4
+
//! blacksky-algorithms/rsky is licensed under the Apache License 2.0
5
+
//!
6
+
//! Modified for SQLite backend
7
+
8
+
use anyhow::Result;
9
+
use cidv10::Cid;
10
+
use diesel::*;
11
+
use futures::stream::{self, StreamExt};
12
+
use rsky_common;
13
+
use rsky_pds::actor_store::repo::types::SyncEvtData;
14
+
use rsky_repo::repo::Repo;
15
+
use rsky_repo::storage::readable_blockstore::ReadableBlockstore;
16
+
use rsky_repo::storage::types::RepoStorage;
17
+
use rsky_repo::types::{
18
+
CommitAction, CommitData, CommitDataWithOps, CommitOp, PreparedCreateOrUpdate, PreparedWrite,
19
+
RecordCreateOrUpdateOp, RecordWriteEnum, RecordWriteOp, WriteOpAction, write_to_op,
20
+
};
21
+
use rsky_repo::util::format_data_key;
22
+
use rsky_syntax::aturi::AtUri;
23
+
use secp256k1::{Keypair, Secp256k1, SecretKey};
24
+
use std::env;
25
+
use std::fmt;
2
26
use std::str::FromStr;
3
27
use std::sync::Arc;
28
+
use tokio::sync::RwLock;
4
29
5
-
use anyhow::{Context as _, Result, anyhow, bail};
6
-
use atrium_crypto::keypair::{Did as _, Export as _, Secp256k1Keypair};
7
-
use atrium_repo::Cid;
8
-
use diesel::prelude::*;
9
-
use sha2::Digest as _;
10
-
use tokio::fs;
30
+
use super::ActorDb;
31
+
use super::blob::BlobReader;
32
+
use super::preference::PreferenceReader;
33
+
use super::record::RecordReader;
34
+
use super::sql_blob::BlobStoreSql;
35
+
use super::sql_repo::SqlRepoReader;
11
36
12
-
use super::PreparedWrite;
13
-
use super::actor_store_handler::ActorStoreHandler;
14
-
use super::actor_store_resources::ActorStoreResources;
15
-
use super::blob::{BlobStore as _, BlobStorePlaceholder};
16
-
use super::db::{ActorDb, get_db};
17
-
use crate::SigningKey;
18
-
19
-
/// Central manager for actor stores
20
-
pub(crate) struct ActorStore {
21
-
/// Base directory for actor data
22
-
pub directory: PathBuf,
23
-
/// Resources shared between actor stores
24
-
pub resources: ActorStoreResources,
37
+
#[derive(Debug)]
38
+
enum FormatCommitError {
39
+
BadRecordSwap(String),
40
+
RecordSwapMismatch(String),
41
+
BadCommitSwap(String),
42
+
MissingRepoRoot(String),
25
43
}
26
44
27
-
struct ActorLocation {
28
-
/// Actor's directory path
29
-
directory: PathBuf,
30
-
/// Database path
31
-
db_location: PathBuf,
32
-
/// Key path
33
-
key_location: PathBuf,
45
+
pub struct ActorStore {
46
+
pub did: String,
47
+
pub storage: Arc<RwLock<SqlRepoReader>>, // get ipld blocks from db
48
+
pub record: RecordReader, // get lexicon records from db
49
+
pub blob: BlobReader, // get blobs
50
+
pub pref: PreferenceReader, // get preferences
34
51
}
35
52
53
+
// Combination of RepoReader/Transactor, BlobReader/Transactor, SqlRepoReader/Transactor
36
54
impl ActorStore {
37
-
/// Create a new actor store manager
38
-
pub(crate) fn new(directory: impl Into<PathBuf>, resources: ActorStoreResources) -> Self {
39
-
Self {
40
-
directory: directory.into(),
41
-
resources,
55
+
/// Concrete reader of an individual repo (hence BlobStoreSql which takes `did` param)
56
+
pub fn new(did: String, blobstore: BlobStoreSql, db: ActorDb) -> Self {
57
+
let db = Arc::new(db);
58
+
ActorStore {
59
+
storage: Arc::new(RwLock::new(SqlRepoReader::new(
60
+
did.clone(),
61
+
None,
62
+
db.clone(),
63
+
))),
64
+
record: RecordReader::new(did.clone(), db.clone()),
65
+
pref: PreferenceReader::new(did.clone(), db.clone()),
66
+
did,
67
+
blob: BlobReader::new(blobstore, db.clone()), // Unlike TS impl, just use blob reader vs generator
42
68
}
43
69
}
44
70
45
-
/// Get the location information for an actor
46
-
pub(crate) async fn get_location(&self, did: &str) -> Result<ActorLocation> {
47
-
// Hash the DID for directory organization
48
-
let did_hash = sha2::Sha256::digest(did.as_bytes());
49
-
let hash_prefix = format!("{:02x}", did_hash[0]);
50
-
51
-
// Create paths
52
-
let directory = self.directory.join(hash_prefix).join(did);
53
-
let db_location = directory.join("store.sqlite");
54
-
let key_location = directory.join("key");
55
-
56
-
Ok(ActorLocation {
57
-
directory,
58
-
db_location,
59
-
key_location,
60
-
})
71
+
pub async fn get_repo_root(&self) -> Option<Cid> {
72
+
let storage_guard = self.storage.read().await;
73
+
storage_guard.get_root().await
61
74
}
62
75
63
-
/// Check if an actor store exists
64
-
pub(crate) async fn exists(&self, did: &str) -> Result<bool> {
65
-
let location = self.get_location(did).await?;
66
-
Ok(location.db_location.exists())
67
-
}
68
-
69
-
/// Get the signing keypair for an actor
70
-
pub(crate) async fn keypair(&self, did: &str) -> Result<Arc<SigningKey>> {
71
-
let location = self.get_location(did).await?;
72
-
let priv_key = fs::read(&location.key_location)
73
-
.await
74
-
.context("Failed to read key file")?;
75
-
76
-
let keypair = SigningKey::import(&priv_key).context("Failed to import signing key")?;
77
-
78
-
Ok(Arc::new(keypair))
79
-
}
80
-
81
-
/// Open the database for an actor
82
-
pub(crate) async fn open_db(&self, did: &str) -> Result<ActorDb> {
83
-
let location = self.get_location(did).await?;
84
-
85
-
if !location.db_location.exists() {
86
-
bail!("Repo not found");
87
-
}
88
-
89
-
// Convert path to string for SQLite connection
90
-
let db_path = location
91
-
.db_location
92
-
.to_str()
93
-
.ok_or_else(|| anyhow!("Invalid path encoding"))?;
94
-
95
-
// Open database with WAL mode enabled
96
-
let db = get_db(db_path, false)
97
-
.await
98
-
.context("Failed to open actor database")?;
99
-
100
-
// Run a simple query to ensure the database is ready
101
-
db.run(|conn| diesel::sql_query("SELECT 1 FROM repo_root LIMIT 1").execute(conn))
102
-
.await
103
-
.context("Database not ready")?;
104
-
105
-
Ok(db)
106
-
}
107
-
108
-
/// Execute read operations on an actor store
109
-
pub(crate) async fn read<T, F>(&self, did: &str, f: F) -> Result<T>
110
-
where
111
-
F: FnOnce(ActorStoreHandler) -> Result<T>,
112
-
{
113
-
let db = self.open_db(did).await?;
114
-
let blobstore = self.resources.blobstore(did.to_string());
115
-
116
-
// Create a read-only handler
117
-
let handler = ActorStoreHandler::new_reader(db.clone(), did.to_string(), blobstore);
118
-
119
-
// Execute the function
120
-
f(handler)
121
-
}
76
+
// Transactors
77
+
// -------------------
122
78
123
-
/// Execute read-write operations with a transaction
124
-
pub(crate) async fn transact<T, F>(&self, did: &str, f: F) -> Result<T>
125
-
where
126
-
F: FnOnce(ActorStoreHandler) -> Result<T>,
127
-
{
128
-
let db = self.open_db(did).await?;
129
-
let keypair = self.keypair(did).await?;
130
-
let blobstore = self.resources.blobstore(did.to_string());
131
-
let background_queue = self.resources.background_queue();
132
-
133
-
// Create a read-write handler with transaction support
134
-
let handler = ActorStoreHandler::new_writer(
135
-
db,
136
-
did.to_string(),
137
-
blobstore,
79
+
#[deprecated]
80
+
pub async fn create_repo_legacy(
81
+
&self,
82
+
keypair: Keypair,
83
+
writes: Vec<PreparedCreateOrUpdate>,
84
+
) -> Result<CommitData> {
85
+
let write_ops = writes
86
+
.clone()
87
+
.into_iter()
88
+
.map(|prepare| {
89
+
let at_uri: AtUri = prepare.uri.try_into()?;
90
+
Ok(RecordCreateOrUpdateOp {
91
+
action: WriteOpAction::Create,
92
+
collection: at_uri.get_collection(),
93
+
rkey: at_uri.get_rkey(),
94
+
record: prepare.record,
95
+
})
96
+
})
97
+
.collect::<Result<Vec<RecordCreateOrUpdateOp>>>()?;
98
+
let commit = Repo::format_init_commit(
99
+
self.storage.clone(),
100
+
self.did.clone(),
138
101
keypair,
139
-
background_queue.as_ref().clone(),
140
-
);
141
-
142
-
// Execute the function (will handle transactions internally)
143
-
f(handler)
102
+
Some(write_ops),
103
+
)
104
+
.await?;
105
+
let storage_guard = self.storage.read().await;
106
+
storage_guard.apply_commit(commit.clone(), None).await?;
107
+
let writes = writes
108
+
.into_iter()
109
+
.map(PreparedWrite::Create)
110
+
.collect::<Vec<PreparedWrite>>();
111
+
self.blob.process_write_blobs(writes).await?;
112
+
Ok(commit)
144
113
}
145
114
146
-
/// Execute read-write operations without a transaction
147
-
pub(crate) async fn write_no_transaction<T, F>(&self, did: &str, f: F) -> Result<T>
148
-
where
149
-
F: FnOnce(ActorStoreHandler) -> Result<T>,
150
-
{
151
-
let db = self.open_db(did).await?;
152
-
let keypair = self.keypair(did).await?;
153
-
let blobstore = self.resources.blobstore(did.to_string());
154
-
let background_queue = self.resources.background_queue();
155
-
156
-
// Create a read-write handler without automatic transaction
157
-
let handler = ActorStoreHandler::new_writer(
158
-
db,
159
-
did.to_string(),
160
-
blobstore,
115
+
pub async fn create_repo(
116
+
&self,
117
+
keypair: Keypair,
118
+
writes: Vec<PreparedCreateOrUpdate>,
119
+
) -> Result<CommitDataWithOps> {
120
+
let write_ops = writes
121
+
.clone()
122
+
.into_iter()
123
+
.map(|prepare| {
124
+
let at_uri: AtUri = prepare.uri.try_into()?;
125
+
Ok(RecordCreateOrUpdateOp {
126
+
action: WriteOpAction::Create,
127
+
collection: at_uri.get_collection(),
128
+
rkey: at_uri.get_rkey(),
129
+
record: prepare.record,
130
+
})
131
+
})
132
+
.collect::<Result<Vec<RecordCreateOrUpdateOp>>>()?;
133
+
let commit = Repo::format_init_commit(
134
+
self.storage.clone(),
135
+
self.did.clone(),
161
136
keypair,
162
-
background_queue.as_ref().clone(),
163
-
);
164
-
165
-
// Execute the function
166
-
f(handler)
137
+
Some(write_ops),
138
+
)
139
+
.await?;
140
+
let storage_guard = self.storage.read().await;
141
+
storage_guard.apply_commit(commit.clone(), None).await?;
142
+
let write_commit_ops = writes.iter().try_fold(
143
+
Vec::with_capacity(writes.len()),
144
+
|mut acc, w| -> Result<Vec<CommitOp>> {
145
+
let aturi: AtUri = w.uri.clone().try_into()?;
146
+
acc.push(CommitOp {
147
+
action: CommitAction::Create,
148
+
path: format_data_key(aturi.get_collection(), aturi.get_rkey()),
149
+
cid: Some(w.cid.clone()),
150
+
prev: None,
151
+
});
152
+
Ok(acc)
153
+
},
154
+
)?;
155
+
let writes = writes
156
+
.into_iter()
157
+
.map(PreparedWrite::Create)
158
+
.collect::<Vec<PreparedWrite>>();
159
+
self.blob.process_write_blobs(writes).await?;
160
+
Ok(CommitDataWithOps {
161
+
commit_data: commit,
162
+
ops: write_commit_ops,
163
+
prev_data: None,
164
+
})
167
165
}
168
166
169
-
/// Create a new actor store
170
-
pub(crate) async fn create(&self, did: &str, keypair: SigningKey) -> Result<()> {
171
-
let location = self.get_location(did).await?;
172
-
173
-
// Ensure directory exists
174
-
fs::create_dir_all(&location.directory)
175
-
.await
176
-
.context("Failed to create directory")?;
177
-
178
-
// Check if repo already exists
179
-
if location.db_location.exists() {
180
-
bail!("Repo already exists");
167
+
pub async fn process_import_repo(
168
+
&mut self,
169
+
commit: CommitData,
170
+
writes: Vec<PreparedWrite>,
171
+
) -> Result<()> {
172
+
{
173
+
let immutable_borrow = &self;
174
+
// & send to indexing
175
+
immutable_borrow
176
+
.index_writes(writes.clone(), &commit.rev)
177
+
.await?;
181
178
}
182
-
183
-
// Export and save private key
184
-
let priv_key = keypair.export();
185
-
fs::write(&location.key_location, priv_key)
186
-
.await
187
-
.context("Failed to write key file")?;
188
-
189
-
// Initialize the database
190
-
let db_path = location
191
-
.db_location
192
-
.to_str()
193
-
.ok_or_else(|| anyhow!("Invalid path encoding"))?;
194
-
195
-
let db = get_db(db_path, false)
196
-
.await
197
-
.context("Failed to create actor database")?;
198
-
199
-
// Ensure WAL mode and run migrations
200
-
db.ensure_wal().await?;
201
-
db.run_migrations()?;
202
-
179
+
// persist the commit to repo storage
180
+
let storage_guard = self.storage.read().await;
181
+
storage_guard.apply_commit(commit.clone(), None).await?;
182
+
// process blobs
183
+
self.blob.process_write_blobs(writes).await?;
203
184
Ok(())
204
185
}
205
186
206
-
/// Destroy an actor store
207
-
pub(crate) async fn destroy(&self, did: &str) -> Result<()> {
208
-
// Get all blob CIDs first
209
-
let cids = self
210
-
.read(did, |handler| async move {
211
-
handler.repo.blob.get_blob_cids().await
212
-
})
213
-
.await?;
214
-
215
-
// Delete all blobs
216
-
let blobstore = self.resources.blobstore(did.to_string());
217
-
if !cids.is_empty() {
218
-
// Process in chunks of 500
219
-
for chunk in cids.chunks(500) {
220
-
let _ = blobstore.delete_many(chunk.to_vec()).await;
221
-
}
222
-
}
223
-
224
-
// Remove directory and all files
225
-
let location = self.get_location(did).await?;
226
-
if location.directory.exists() {
227
-
fs::remove_dir_all(&location.directory)
228
-
.await
229
-
.context("Failed to remove actor directory")?;
187
+
pub async fn process_writes(
188
+
&mut self,
189
+
writes: Vec<PreparedWrite>,
190
+
swap_commit_cid: Option<Cid>,
191
+
) -> Result<CommitDataWithOps> {
192
+
// NOTE: In the typescript PR on sync v1.1
193
+
// there are some safeguards added for adding
194
+
// very large commits and very many commits
195
+
// for which I'm sure we could safeguard on
196
+
// but may not be necessary.
197
+
// https://github.com/bluesky-social/atproto/pull/3585/files#diff-7627844a4a6b50190014e947d1331a96df3c64d4c5273fa0ce544f85c3c1265f
198
+
let commit = self.format_commit(writes.clone(), swap_commit_cid).await?;
199
+
{
200
+
let immutable_borrow = &self;
201
+
// & send to indexing
202
+
immutable_borrow
203
+
.index_writes(writes.clone(), &commit.commit_data.rev)
204
+
.await?;
230
205
}
206
+
// persist the commit to repo storage
207
+
let storage_guard = self.storage.read().await;
208
+
storage_guard
209
+
.apply_commit(commit.commit_data.clone(), None)
210
+
.await?;
211
+
// process blobs
212
+
self.blob.process_write_blobs(writes).await?;
213
+
Ok(commit)
214
+
}
231
215
232
-
Ok(())
216
+
pub async fn get_sync_event_data(&mut self) -> Result<SyncEvtData> {
217
+
let storage_guard = self.storage.read().await;
218
+
let current_root = storage_guard.get_root_detailed().await?;
219
+
let blocks_and_missing = storage_guard.get_blocks(vec![current_root.cid]).await?;
220
+
Ok(SyncEvtData {
221
+
cid: current_root.cid,
222
+
rev: current_root.rev,
223
+
blocks: blocks_and_missing.blocks,
224
+
})
233
225
}
234
226
235
-
/// Reserve a keypair for future use
236
-
pub(crate) async fn reserve_keypair(&self, did: Option<&str>) -> Result<String> {
237
-
let reserved_dir = self
238
-
.resources
239
-
.reserved_key_dir()
240
-
.ok_or_else(|| anyhow!("No reserved key directory configured"))?;
241
-
242
-
// If DID is provided, check if key already exists
243
-
let mut key_path = None;
244
-
if let Some(did_str) = did {
245
-
assert_safe_path_part(did_str)?;
246
-
key_path = Some(reserved_dir.join(did_str));
247
-
248
-
if key_path.as_ref().unwrap().exists() {
249
-
let key_data = fs::read(key_path.as_ref().unwrap()).await?;
250
-
let keypair = Secp256k1Keypair::import(&key_data)
251
-
.context("Failed to import existing reserved key")?;
252
-
return Ok(keypair.did());
227
+
pub async fn format_commit(
228
+
&mut self,
229
+
writes: Vec<PreparedWrite>,
230
+
swap_commit: Option<Cid>,
231
+
) -> Result<CommitDataWithOps> {
232
+
let current_root = {
233
+
let storage_guard = self.storage.read().await;
234
+
storage_guard.get_root_detailed().await
235
+
};
236
+
if let Ok(current_root) = current_root {
237
+
if let Some(swap_commit) = swap_commit {
238
+
if !current_root.cid.eq(&swap_commit) {
239
+
return Err(
240
+
FormatCommitError::BadCommitSwap(current_root.cid.to_string()).into(),
241
+
);
242
+
}
243
+
}
244
+
{
245
+
let mut storage_guard = self.storage.write().await;
246
+
storage_guard.cache_rev(current_root.rev).await?;
247
+
}
248
+
let mut new_record_cids: Vec<Cid> = vec![];
249
+
let mut delete_and_update_uris = vec![];
250
+
let mut commit_ops = vec![];
251
+
for write in &writes {
252
+
let commit_action: CommitAction = write.action().into();
253
+
match write.clone() {
254
+
PreparedWrite::Create(c) => new_record_cids.push(c.cid),
255
+
PreparedWrite::Update(u) => {
256
+
new_record_cids.push(u.cid);
257
+
let u_at_uri: AtUri = u.uri.try_into()?;
258
+
delete_and_update_uris.push(u_at_uri);
259
+
}
260
+
PreparedWrite::Delete(d) => {
261
+
let d_at_uri: AtUri = d.uri.try_into()?;
262
+
delete_and_update_uris.push(d_at_uri)
263
+
}
264
+
}
265
+
if write.swap_cid().is_none() {
266
+
continue;
267
+
}
268
+
let write_at_uri: &AtUri = &write.uri().try_into()?;
269
+
let record = self
270
+
.record
271
+
.get_record(write_at_uri, None, Some(true))
272
+
.await?;
273
+
let current_record = match record {
274
+
Some(record) => Some(Cid::from_str(&record.cid)?),
275
+
None => None,
276
+
};
277
+
let cid = match &write {
278
+
&PreparedWrite::Delete(_) => None,
279
+
&PreparedWrite::Create(w) | &PreparedWrite::Update(w) => Some(w.cid),
280
+
};
281
+
let mut op = CommitOp {
282
+
action: commit_action,
283
+
path: format_data_key(write_at_uri.get_collection(), write_at_uri.get_rkey()),
284
+
cid,
285
+
prev: None,
286
+
};
287
+
if let Some(_) = current_record {
288
+
op.prev = current_record;
289
+
};
290
+
commit_ops.push(op);
291
+
match write {
292
+
// There should be no current record for a create
293
+
PreparedWrite::Create(_) if write.swap_cid().is_some() => {
294
+
Err::<(), anyhow::Error>(
295
+
FormatCommitError::BadRecordSwap(format!("{:?}", current_record))
296
+
.into(),
297
+
)
298
+
}
299
+
// There should be a current record for an update
300
+
PreparedWrite::Update(_) if write.swap_cid().is_none() => {
301
+
Err::<(), anyhow::Error>(
302
+
FormatCommitError::BadRecordSwap(format!("{:?}", current_record))
303
+
.into(),
304
+
)
305
+
}
306
+
// There should be a current record for a delete
307
+
PreparedWrite::Delete(_) if write.swap_cid().is_none() => {
308
+
Err::<(), anyhow::Error>(
309
+
FormatCommitError::BadRecordSwap(format!("{:?}", current_record))
310
+
.into(),
311
+
)
312
+
}
313
+
_ => Ok::<(), anyhow::Error>(()),
314
+
}?;
315
+
match (current_record, write.swap_cid()) {
316
+
(Some(current_record), Some(swap_cid)) if current_record.eq(swap_cid) => {
317
+
Ok::<(), anyhow::Error>(())
318
+
}
319
+
_ => Err::<(), anyhow::Error>(
320
+
FormatCommitError::RecordSwapMismatch(format!("{:?}", current_record))
321
+
.into(),
322
+
),
323
+
}?;
253
324
}
254
-
}
255
-
256
-
// Create a new keypair
257
-
let keypair = Secp256k1Keypair::create(&mut rand::thread_rng());
258
-
let key_did = keypair.did();
259
-
260
-
// Set path if not already set
261
-
let final_path = key_path.unwrap_or_else(|| reserved_dir.join(&key_did));
262
-
263
-
// Ensure directory exists
264
-
fs::create_dir_all(reserved_dir).await?;
265
-
266
-
// Save key
267
-
fs::write(&final_path, keypair.export()).await?;
268
-
269
-
Ok(key_did)
270
-
}
271
-
272
-
/// Get a reserved keypair
273
-
pub(crate) async fn get_reserved_keypair(
274
-
&self,
275
-
key_did: &str,
276
-
) -> Result<Option<Arc<SigningKey>>> {
277
-
let reserved_dir = self
278
-
.resources
279
-
.reserved_key_dir()
280
-
.ok_or_else(|| anyhow!("No reserved key directory configured"))?;
281
-
282
-
let key_path = reserved_dir.join(key_did);
283
-
if !key_path.exists() {
284
-
return Ok(None);
285
-
}
286
-
287
-
let key_data = fs::read(key_path).await?;
288
-
let keypair = SigningKey::import(&key_data).context("Failed to import reserved key")?;
289
-
290
-
Ok(Some(Arc::new(keypair)))
291
-
}
325
+
let mut repo = Repo::load(self.storage.clone(), Some(current_root.cid)).await?;
326
+
let previous_data = repo.commit.data;
327
+
let write_ops: Vec<RecordWriteOp> = writes
328
+
.into_iter()
329
+
.map(write_to_op)
330
+
.collect::<Result<Vec<RecordWriteOp>>>()?;
331
+
// @TODO: Use repo signing key global config
332
+
let secp = Secp256k1::new();
333
+
let repo_private_key = env::var("PDS_REPO_SIGNING_KEY_K256_PRIVATE_KEY_HEX").unwrap();
334
+
let repo_secret_key =
335
+
SecretKey::from_slice(&hex::decode(repo_private_key.as_bytes()).unwrap()).unwrap();
336
+
let repo_signing_key = Keypair::from_secret_key(&secp, &repo_secret_key);
292
337
293
-
/// Clear a reserved keypair
294
-
pub(crate) async fn clear_reserved_keypair(
295
-
&self,
296
-
key_did: &str,
297
-
did: Option<&str>,
298
-
) -> Result<()> {
299
-
let reserved_dir = self
300
-
.resources
301
-
.reserved_key_dir()
302
-
.ok_or_else(|| anyhow!("No reserved key directory configured"))?;
338
+
let mut commit = repo
339
+
.format_commit(RecordWriteEnum::List(write_ops), repo_signing_key)
340
+
.await?;
303
341
304
-
// Remove key by DID
305
-
let key_path = reserved_dir.join(key_did);
306
-
if key_path.exists() {
307
-
fs::remove_file(key_path).await?;
308
-
}
342
+
// find blocks that would be deleted but are referenced by another record
343
+
let duplicate_record_cids = self
344
+
.get_duplicate_record_cids(commit.removed_cids.to_list(), delete_and_update_uris)
345
+
.await?;
346
+
for cid in duplicate_record_cids {
347
+
commit.removed_cids.delete(cid)
348
+
}
309
349
310
-
// If DID mapping provided, remove that too
311
-
if let Some(did_str) = did {
312
-
let did_path = reserved_dir.join(did_str);
313
-
if did_path.exists() {
314
-
fs::remove_file(did_path).await?;
350
+
// find blocks that are relevant to ops but not included in diff
351
+
// (for instance a record that was moved but cid stayed the same)
352
+
let new_record_blocks = commit.relevant_blocks.get_many(new_record_cids)?;
353
+
if !new_record_blocks.missing.is_empty() {
354
+
let missing_blocks = {
355
+
let storage_guard = self.storage.read().await;
356
+
storage_guard.get_blocks(new_record_blocks.missing).await?
357
+
};
358
+
commit.relevant_blocks.add_map(missing_blocks.blocks)?;
315
359
}
360
+
let commit_with_data_ops = CommitDataWithOps {
361
+
ops: commit_ops,
362
+
commit_data: commit,
363
+
prev_data: Some(previous_data),
364
+
};
365
+
Ok(commit_with_data_ops)
366
+
} else {
367
+
Err(FormatCommitError::MissingRepoRoot(self.did.clone()).into())
316
368
}
317
-
318
-
Ok(())
319
369
}
320
370
321
-
/// Store a PLC operation
322
-
pub(crate) async fn store_plc_op(&self, did: &str, op: &[u8]) -> Result<()> {
323
-
let location = self.get_location(did).await?;
324
-
let op_path = location.directory.join("did-op");
371
+
pub async fn index_writes(&self, writes: Vec<PreparedWrite>, rev: &str) -> Result<()> {
372
+
let now: &str = &rsky_common::now();
325
373
326
-
fs::write(op_path, op).await?;
374
+
let _ = stream::iter(writes)
375
+
.then(|write| async move {
376
+
Ok::<(), anyhow::Error>(match write {
377
+
PreparedWrite::Create(write) => {
378
+
let write_at_uri: AtUri = write.uri.try_into()?;
379
+
self.record
380
+
.index_record(
381
+
write_at_uri.clone(),
382
+
write.cid,
383
+
Some(write.record),
384
+
Some(write.action),
385
+
rev.to_owned(),
386
+
Some(now.to_string()),
387
+
)
388
+
.await?
389
+
}
390
+
PreparedWrite::Update(write) => {
391
+
let write_at_uri: AtUri = write.uri.try_into()?;
392
+
self.record
393
+
.index_record(
394
+
write_at_uri.clone(),
395
+
write.cid,
396
+
Some(write.record),
397
+
Some(write.action),
398
+
rev.to_owned(),
399
+
Some(now.to_string()),
400
+
)
401
+
.await?
402
+
}
403
+
PreparedWrite::Delete(write) => {
404
+
let write_at_uri: AtUri = write.uri.try_into()?;
405
+
self.record.delete_record(&write_at_uri).await?
406
+
}
407
+
})
408
+
})
409
+
.collect::<Vec<_>>()
410
+
.await
411
+
.into_iter()
412
+
.collect::<Result<Vec<_>, _>>()?;
327
413
Ok(())
328
414
}
329
415
330
-
/// Get a stored PLC operation
331
-
pub(crate) async fn get_plc_op(&self, did: &str) -> Result<Vec<u8>> {
332
-
let location = self.get_location(did).await?;
333
-
let op_path = location.directory.join("did-op");
334
-
335
-
let data = fs::read(op_path).await?;
336
-
Ok(data)
337
-
}
338
-
339
-
/// Clear a stored PLC operation
340
-
pub(crate) async fn clear_plc_op(&self, did: &str) -> Result<()> {
341
-
let location = self.get_location(did).await?;
342
-
let op_path = location.directory.join("did-op");
416
+
pub async fn destroy(&mut self) -> Result<()> {
417
+
let did: String = self.did.clone();
418
+
let storage_guard = self.storage.read().await;
419
+
let db: Arc<ActorDb> = storage_guard.db.clone();
420
+
use rsky_pds::schema::pds::blob::dsl as BlobSchema;
343
421
344
-
if op_path.exists() {
345
-
fs::remove_file(op_path).await?;
346
-
}
347
-
422
+
let blob_rows: Vec<String> = db
423
+
.run(move |conn| {
424
+
BlobSchema::blob
425
+
.filter(BlobSchema::did.eq(did))
426
+
.select(BlobSchema::cid)
427
+
.get_results(conn)
428
+
})
429
+
.await?;
430
+
let cids = blob_rows
431
+
.into_iter()
432
+
.map(|row| Ok(Cid::from_str(&row)?))
433
+
.collect::<Result<Vec<Cid>>>()?;
434
+
let _ = stream::iter(cids.chunks(500))
435
+
.then(|chunk| async { self.blob.blobstore.delete_many(chunk.to_vec()).await })
436
+
.collect::<Vec<_>>()
437
+
.await
438
+
.into_iter()
439
+
.collect::<Result<Vec<_>, _>>()?;
348
440
Ok(())
349
441
}
350
-
}
351
442
352
-
/// Ensure a path part is safe to use in a filename
353
-
fn assert_safe_path_part(part: &str) -> Result<()> {
354
-
let normalized = std::path::Path::new(part)
355
-
.file_name()
356
-
.and_then(|s| s.to_str())
357
-
.ok_or_else(|| anyhow!("Invalid path"))?;
443
+
pub async fn get_duplicate_record_cids(
444
+
&self,
445
+
cids: Vec<Cid>,
446
+
touched_uris: Vec<AtUri>,
447
+
) -> Result<Vec<Cid>> {
448
+
if touched_uris.is_empty() || cids.is_empty() {
449
+
return Ok(vec![]);
450
+
}
451
+
let did: String = self.did.clone();
452
+
let storage_guard = self.storage.read().await;
453
+
let db: Arc<ActorDb> = storage_guard.db.clone();
454
+
use rsky_pds::schema::pds::record::dsl as RecordSchema;
358
455
359
-
if part != normalized || part.starts_with('.') || part.contains('/') || part.contains('\\') {
360
-
bail!("Unsafe path part: {}", part);
456
+
let cid_strs: Vec<String> = cids.into_iter().map(|c| c.to_string()).collect();
457
+
let touched_uri_strs: Vec<String> = touched_uris.iter().map(|t| t.to_string()).collect();
458
+
let res: Vec<String> = db
459
+
.run(move |conn| {
460
+
RecordSchema::record
461
+
.filter(RecordSchema::did.eq(did))
462
+
.filter(RecordSchema::cid.eq_any(cid_strs))
463
+
.filter(RecordSchema::uri.ne_all(touched_uri_strs))
464
+
.select(RecordSchema::cid)
465
+
.get_results(conn)
466
+
})
467
+
.await?;
468
+
res.into_iter()
469
+
.map(|row| Cid::from_str(&row).map_err(|error| anyhow::Error::new(error)))
470
+
.collect::<Result<Vec<Cid>>>()
361
471
}
362
-
363
-
Ok(())
364
472
}
-329
src/actor_store/actor_store_handler.rs
-329
src/actor_store/actor_store_handler.rs
···
1
-
use std::path::PathBuf;
2
-
use std::sync::Arc;
3
-
4
-
use anyhow::{Context as _, Result, anyhow};
5
-
use futures::TryStreamExt;
6
-
use rsky_repo::repo::Repo;
7
-
use rsky_repo::types::{CommitData, CommitDataWithOps, PreparedWrite as RskyPreparedWrite};
8
-
9
-
use super::PreparedWrite;
10
-
use super::blob::{BackgroundQueue, BlobStorePlaceholder};
11
-
use super::db::ActorDb;
12
-
use super::preference::PreferenceHandler;
13
-
use super::record::RecordHandler;
14
-
use super::repo::RepoHandler;
15
-
use crate::SigningKey;
16
-
17
-
/// Unified handler for actor store operations.
18
-
pub(crate) struct ActorStoreHandler {
19
-
/// Actor DID
20
-
pub did: String,
21
-
/// Database connection
22
-
pub db: ActorDb,
23
-
/// Repository handler
24
-
pub repo: RepoHandler,
25
-
/// Record handler
26
-
pub record: RecordHandler,
27
-
/// Preference handler
28
-
pub pref: PreferenceHandler,
29
-
/// Background queue for async operations
30
-
pub background_queue: Option<BackgroundQueue>,
31
-
/// Signing keypair (required for write operations)
32
-
pub signing_key: Option<Arc<SigningKey>>,
33
-
}
34
-
35
-
impl ActorStoreHandler {
36
-
/// Create a new actor store handler with read-only capabilities
37
-
pub(crate) fn new_reader(db: ActorDb, did: String, blobstore: BlobStorePlaceholder) -> Self {
38
-
let record = RecordHandler::new(db.clone(), did.clone());
39
-
let pref = PreferenceHandler::new(db.clone(), did.clone());
40
-
let repo = RepoHandler::new_reader(db.clone(), blobstore, did.clone());
41
-
42
-
Self {
43
-
did,
44
-
db,
45
-
repo,
46
-
record,
47
-
pref,
48
-
background_queue: None,
49
-
signing_key: None,
50
-
}
51
-
}
52
-
53
-
/// Create a new actor store handler with read/write capabilities
54
-
pub(crate) fn new_writer(
55
-
db: ActorDb,
56
-
did: String,
57
-
blobstore: BlobStorePlaceholder,
58
-
signing_key: Arc<SigningKey>,
59
-
background_queue: BackgroundQueue,
60
-
) -> Self {
61
-
let record = RecordHandler::new_with_blobstore(db.clone(), blobstore.clone(), did.clone());
62
-
let pref = PreferenceHandler::new(db.clone(), did.clone());
63
-
let repo = RepoHandler::new_writer(
64
-
db.clone(),
65
-
blobstore,
66
-
did.clone(),
67
-
signing_key.clone(),
68
-
background_queue.clone(),
69
-
);
70
-
71
-
Self {
72
-
did,
73
-
db,
74
-
repo,
75
-
record,
76
-
pref,
77
-
background_queue: Some(background_queue),
78
-
signing_key: Some(signing_key),
79
-
}
80
-
}
81
-
82
-
/// Set signing key (needed for write operations)
83
-
pub(crate) fn with_signing_key(mut self, signing_key: Arc<SigningKey>) -> Self {
84
-
self.signing_key = Some(signing_key);
85
-
self
86
-
}
87
-
88
-
/// Set background queue (needed for async operations)
89
-
pub(crate) fn with_background_queue(mut self, queue: BackgroundQueue) -> Self {
90
-
self.background_queue = Some(queue);
91
-
self
92
-
}
93
-
94
-
// Repository Operations
95
-
// --------------------
96
-
97
-
/// Try to load repository
98
-
pub(crate) async fn maybe_load_repo(&self) -> Result<Option<Repo>> {
99
-
self.repo.maybe_load_repo().await
100
-
}
101
-
102
-
/// Get repository root CID
103
-
pub(crate) async fn get_repo_root(&self) -> Result<Option<atrium_repo::Cid>> {
104
-
self.repo.get_repo_root().await
105
-
}
106
-
107
-
/// Create a new repository with prepared writes
108
-
pub(crate) async fn create_repo(
109
-
&self,
110
-
writes: Vec<PreparedWrite>,
111
-
) -> Result<CommitDataWithOps> {
112
-
if self.signing_key.is_none() {
113
-
return Err(anyhow!(
114
-
"No signing key available for create_repo operation"
115
-
));
116
-
}
117
-
118
-
let rsky_writes = writes
119
-
.into_iter()
120
-
.map(|w| RskyPreparedWrite::from(w))
121
-
.collect::<Vec<_>>();
122
-
123
-
self.repo.create_repo(rsky_writes).await
124
-
}
125
-
126
-
/// Process writes to the repository
127
-
pub(crate) async fn process_writes(
128
-
&self,
129
-
writes: Vec<PreparedWrite>,
130
-
swap_commit_cid: Option<atrium_repo::Cid>,
131
-
) -> Result<CommitDataWithOps> {
132
-
if self.signing_key.is_none() {
133
-
return Err(anyhow!(
134
-
"No signing key available for process_writes operation"
135
-
));
136
-
}
137
-
138
-
let rsky_writes = writes
139
-
.into_iter()
140
-
.map(|w| RskyPreparedWrite::from(w))
141
-
.collect::<Vec<_>>();
142
-
143
-
self.repo.process_writes(rsky_writes, swap_commit_cid).await
144
-
}
145
-
146
-
/// Import a repository from external data
147
-
pub(crate) async fn process_import_repo(
148
-
&self,
149
-
commit: CommitData,
150
-
writes: Vec<PreparedWrite>,
151
-
) -> Result<()> {
152
-
let rsky_writes = writes
153
-
.into_iter()
154
-
.map(|w| RskyPreparedWrite::from(w))
155
-
.collect::<Vec<_>>();
156
-
157
-
// First index the records
158
-
self.repo.index_writes(&rsky_writes, &commit.rev).await?;
159
-
160
-
// Then process the commit
161
-
self.repo.storage.apply_commit(commit.clone(), None).await?;
162
-
163
-
// Finally process any blobs
164
-
if let Some(bg_queue) = &self.background_queue {
165
-
self.repo
166
-
.blob_transactor
167
-
.process_write_blobs(&commit.rev, rsky_writes)
168
-
.await?;
169
-
} else {
170
-
return Err(anyhow!(
171
-
"Background queue required for process_import_repo operation"
172
-
));
173
-
}
174
-
175
-
Ok(())
176
-
}
177
-
178
-
/// Get sync event data for replication
179
-
pub(crate) async fn get_sync_event_data(&self) -> Result<super::repo::SyncEventData> {
180
-
self.repo.get_sync_event_data().await
181
-
}
182
-
183
-
/// Destroy the repository and all associated data
184
-
pub(crate) async fn destroy(&self) -> Result<()> {
185
-
// Get all blob CIDs
186
-
let blob_cids = self.repo.blob.get_blob_cids().await?;
187
-
188
-
// Delete all blobs
189
-
if !blob_cids.is_empty() {
190
-
self.repo
191
-
.blob_transactor
192
-
.blobstore
193
-
.delete_many(blob_cids.clone())
194
-
.await?;
195
-
}
196
-
197
-
Ok(())
198
-
}
199
-
200
-
// Record Operations
201
-
// ----------------
202
-
203
-
/// Get a specific record
204
-
pub(crate) async fn get_record(
205
-
&self,
206
-
uri: &rsky_syntax::aturi::AtUri,
207
-
cid: Option<&str>,
208
-
include_soft_deleted: bool,
209
-
) -> Result<Option<super::record::RecordData>> {
210
-
self.record.get_record(uri, cid, include_soft_deleted).await
211
-
}
212
-
213
-
/// List collections in the repository
214
-
pub(crate) async fn list_collections(&self) -> Result<Vec<String>> {
215
-
self.record.list_collections().await
216
-
}
217
-
218
-
/// List records in a collection
219
-
pub(crate) async fn list_records_for_collection(
220
-
&self,
221
-
opts: super::record::ListRecordsOptions,
222
-
) -> Result<Vec<super::record::RecordData>> {
223
-
self.record.list_records_for_collection(opts).await
224
-
}
225
-
226
-
/// Get record count
227
-
pub(crate) async fn record_count(&self) -> Result<i64> {
228
-
self.record.record_count().await
229
-
}
230
-
231
-
/// Update record takedown status
232
-
pub(crate) async fn update_record_takedown_status(
233
-
&self,
234
-
uri: &rsky_syntax::aturi::AtUri,
235
-
takedown: atrium_api::com::atproto::admin::defs::StatusAttr,
236
-
) -> Result<()> {
237
-
self.record
238
-
.update_record_takedown_status(uri, takedown)
239
-
.await
240
-
}
241
-
242
-
// Preference Operations
243
-
// -------------------
244
-
245
-
/// Get preferences for a namespace
246
-
pub(crate) async fn get_preferences(
247
-
&self,
248
-
namespace: Option<&str>,
249
-
scope: &str,
250
-
) -> Result<Vec<super::preference::AccountPreference>> {
251
-
self.pref.get_preferences(namespace, scope).await
252
-
}
253
-
254
-
/// Put preferences for a namespace
255
-
pub(crate) async fn put_preferences(
256
-
&self,
257
-
values: Vec<super::preference::AccountPreference>,
258
-
namespace: &str,
259
-
scope: &str,
260
-
) -> Result<()> {
261
-
self.pref.put_preferences(values, namespace, scope).await
262
-
}
263
-
264
-
// Blob Operations
265
-
// --------------
266
-
267
-
/// Get blob metadata
268
-
pub(crate) async fn get_blob_metadata(
269
-
&self,
270
-
cid: &atrium_repo::Cid,
271
-
) -> Result<super::blob::BlobMetadata> {
272
-
self.repo.blob.get_blob_metadata(cid).await
273
-
}
274
-
275
-
/// Get blob data
276
-
pub(crate) async fn get_blob(&self, cid: &atrium_repo::Cid) -> Result<super::blob::BlobData> {
277
-
self.repo.blob.get_blob(cid).await
278
-
}
279
-
280
-
/// Update blob takedown status
281
-
pub(crate) async fn update_blob_takedown_status(
282
-
&self,
283
-
cid: atrium_repo::Cid,
284
-
takedown: atrium_api::com::atproto::admin::defs::StatusAttr,
285
-
) -> Result<()> {
286
-
self.repo
287
-
.blob
288
-
.update_blob_takedown_status(cid, takedown)
289
-
.await
290
-
}
291
-
292
-
/// Upload blob and get metadata
293
-
pub(crate) async fn upload_blob_and_get_metadata(
294
-
&self,
295
-
user_suggested_mime: &str,
296
-
blob_bytes: &[u8],
297
-
) -> Result<super::blob::BlobMetadata> {
298
-
self.repo
299
-
.blob
300
-
.upload_blob_and_get_metadata(user_suggested_mime, blob_bytes)
301
-
.await
302
-
}
303
-
304
-
/// Count blobs
305
-
pub(crate) async fn blob_count(&self) -> Result<i64> {
306
-
self.repo.blob.blob_count().await
307
-
}
308
-
309
-
// Transaction Support
310
-
// -----------------
311
-
312
-
/// Execute a transaction
313
-
pub(crate) async fn transaction<T, F>(&self, f: F) -> Result<T>
314
-
where
315
-
F: FnOnce(&mut diesel::SqliteConnection) -> Result<T> + Send,
316
-
T: Send + 'static,
317
-
{
318
-
self.db.transaction(f).await
319
-
}
320
-
321
-
/// Execute a database operation with retries
322
-
pub(crate) async fn run<F, T>(&self, operation: F) -> Result<T>
323
-
where
324
-
F: FnOnce(&mut diesel::SqliteConnection) -> diesel::QueryResult<T> + Send,
325
-
T: Send + 'static,
326
-
{
327
-
self.db.run(operation).await
328
-
}
329
-
}
-48
src/actor_store/actor_store_resources.rs
-48
src/actor_store/actor_store_resources.rs
···
1
-
use std::path::PathBuf;
2
-
use std::sync::Arc;
3
-
4
-
use super::blob::{BackgroundQueue, BlobStorePlaceholder};
5
-
6
-
pub(crate) struct ActorStoreResources {
7
-
// Factory function to create blobstore instances
8
-
blobstore_factory: Arc<dyn Fn(String) -> BlobStorePlaceholder + Send + Sync>,
9
-
// Shared background queue
10
-
background_queue: Arc<BackgroundQueue>,
11
-
// Optional directory for reserved keys
12
-
reserved_key_dir: Option<PathBuf>,
13
-
}
14
-
15
-
impl ActorStoreResources {
16
-
// Simple constructor with minimal parameters
17
-
pub(crate) fn new(
18
-
blobstore_factory: impl Fn(String) -> BlobStorePlaceholder + Send + Sync + 'static,
19
-
concurrency: usize,
20
-
) -> Self {
21
-
Self {
22
-
blobstore_factory: Arc::new(blobstore_factory),
23
-
background_queue: Arc::new(BackgroundQueue::new(concurrency)),
24
-
reserved_key_dir: None,
25
-
}
26
-
}
27
-
28
-
// Set reserved key directory
29
-
pub(crate) fn with_reserved_key_dir(mut self, dir: impl Into<PathBuf>) -> Self {
30
-
self.reserved_key_dir = Some(dir.into());
31
-
self
32
-
}
33
-
34
-
// Get a blobstore for a DID
35
-
pub(crate) fn blobstore(&self, did: String) -> BlobStorePlaceholder {
36
-
(self.blobstore_factory)(did)
37
-
}
38
-
39
-
// Get the background queue
40
-
pub(crate) fn background_queue(&self) -> Arc<BackgroundQueue> {
41
-
self.background_queue.clone()
42
-
}
43
-
44
-
// Get the reserved key directory
45
-
pub(crate) fn reserved_key_dir(&self) -> Option<&PathBuf> {
46
-
self.reserved_key_dir.as_ref()
47
-
}
48
-
}
+578
src/actor_store/blob.rs
+578
src/actor_store/blob.rs
···
1
+
//! Blob storage and retrieval for the actor store.
2
+
//! Based on https://github.com/blacksky-algorithms/rsky/blob/main/rsky-pds/src/actor_store/blob/mod.rs
3
+
//! blacksky-algorithms/rsky is licensed under the Apache License 2.0
4
+
//!
5
+
//! Modified for SQLite backend
6
+
7
+
use anyhow::{Result, bail};
8
+
use cidv10::Cid;
9
+
use diesel::dsl::{count_distinct, exists, not};
10
+
use diesel::result::Error;
11
+
use diesel::sql_types::{Integer, Nullable, Text};
12
+
use diesel::*;
13
+
use futures::stream::{self, StreamExt};
14
+
use futures::try_join;
15
+
use rsky_pds::actor_store::blob::sha256_stream;
16
+
// use rocket::data::{Data, ToByteUnit};
17
+
// use rocket::form::validate::Contains;
18
+
use rsky_common::ipld::sha256_raw_to_cid;
19
+
use rsky_common::now;
20
+
use rsky_lexicon::blob_refs::BlobRef;
21
+
use rsky_lexicon::com::atproto::admin::StatusAttr;
22
+
use rsky_lexicon::com::atproto::repo::ListMissingBlobsRefRecordBlob;
23
+
use rsky_pds::actor_store::blob::{
24
+
BlobMetadata, GetBlobMetadataOutput, GetBlobOutput, ListBlobsOpts, ListMissingBlobsOpts,
25
+
verify_blob,
26
+
};
27
+
use rsky_pds::image;
28
+
use rsky_pds::models::models;
29
+
use rsky_repo::error::BlobError;
30
+
use rsky_repo::types::{PreparedBlobRef, PreparedWrite};
31
+
use sha2::{Digest, Sha256};
32
+
33
+
use super::ActorDb;
34
+
use super::sql_blob::BlobStoreSql;
35
+
36
+
pub struct BlobReader {
37
+
pub blobstore: BlobStoreSql,
38
+
pub did: String,
39
+
pub db: ActorDb,
40
+
}
41
+
42
+
// Basically handles getting blob records from db
43
+
impl BlobReader {
44
+
pub fn new(blobstore: BlobStoreSql, db: ActorDb) -> Self {
45
+
BlobReader {
46
+
did: blobstore.bucket.clone(),
47
+
blobstore,
48
+
db,
49
+
}
50
+
}
51
+
52
+
pub async fn get_blob_metadata(&self, cid: Cid) -> Result<GetBlobMetadataOutput> {
53
+
use rsky_pds::schema::pds::blob::dsl as BlobSchema;
54
+
55
+
let did = self.did.clone();
56
+
let found = self
57
+
.db
58
+
.run(move |conn| {
59
+
BlobSchema::blob
60
+
.filter(BlobSchema::did.eq(did))
61
+
.filter(BlobSchema::cid.eq(cid.to_string()))
62
+
.filter(BlobSchema::takedownRef.is_null())
63
+
.select(models::Blob::as_select())
64
+
.first(conn)
65
+
.optional()
66
+
})
67
+
.await?;
68
+
69
+
match found {
70
+
None => bail!("Blob not found"),
71
+
Some(found) => Ok(GetBlobMetadataOutput {
72
+
size: found.size,
73
+
mime_type: Some(found.mime_type),
74
+
}),
75
+
}
76
+
}
77
+
78
+
pub async fn get_blob(&self, cid: Cid) -> Result<GetBlobOutput> {
79
+
let metadata = self.get_blob_metadata(cid).await?;
80
+
let blob_stream = match self.blobstore.get_stream(cid).await {
81
+
Ok(res) => res,
82
+
Err(e) => {
83
+
return match e.downcast_ref() {
84
+
Some(GetObjectError::NoSuchKey(key)) => {
85
+
Err(anyhow::Error::new(GetObjectError::NoSuchKey(key.clone())))
86
+
}
87
+
_ => bail!(e.to_string()),
88
+
};
89
+
}
90
+
};
91
+
Ok(GetBlobOutput {
92
+
size: metadata.size,
93
+
mime_type: metadata.mime_type,
94
+
stream: blob_stream,
95
+
})
96
+
}
97
+
98
+
pub async fn get_records_for_blob(&self, cid: Cid) -> Result<Vec<String>> {
99
+
use rsky_pds::schema::pds::record_blob::dsl as RecordBlobSchema;
100
+
101
+
let did = self.did.clone();
102
+
let res = self
103
+
.db
104
+
.run(move |conn| {
105
+
let results = RecordBlobSchema::record_blob
106
+
.filter(RecordBlobSchema::blobCid.eq(cid.to_string()))
107
+
.filter(RecordBlobSchema::did.eq(did))
108
+
.select(models::RecordBlob::as_select())
109
+
.get_results(conn)?;
110
+
Ok::<_, Error>(results.into_iter().map(|row| row.record_uri))
111
+
})
112
+
.await?
113
+
.collect::<Vec<String>>();
114
+
115
+
Ok(res)
116
+
}
117
+
118
+
pub async fn upload_blob_and_get_metadata(
119
+
&self,
120
+
user_suggested_mime: String,
121
+
blob: Data<'_>,
122
+
) -> Result<BlobMetadata> {
123
+
let blob_stream = blob.open(100.mebibytes());
124
+
let bytes = blob_stream.into_bytes().await?;
125
+
let size = bytes.n.written;
126
+
let bytes = bytes.into_inner();
127
+
let (temp_key, sha256, img_info, sniffed_mime) = try_join!(
128
+
self.blobstore.put_temp(bytes.clone()),
129
+
sha256_stream(bytes.clone()),
130
+
image::maybe_get_info(bytes.clone()),
131
+
image::mime_type_from_bytes(bytes.clone())
132
+
)?;
133
+
134
+
let cid = sha256_raw_to_cid(sha256);
135
+
let mime_type = sniffed_mime.unwrap_or(user_suggested_mime);
136
+
137
+
Ok(BlobMetadata {
138
+
temp_key,
139
+
size: size as i64,
140
+
cid,
141
+
mime_type,
142
+
width: if let Some(ref info) = img_info {
143
+
Some(info.width as i32)
144
+
} else {
145
+
None
146
+
},
147
+
height: if let Some(info) = img_info {
148
+
Some(info.height as i32)
149
+
} else {
150
+
None
151
+
},
152
+
})
153
+
}
154
+
155
+
pub async fn track_untethered_blob(&self, metadata: BlobMetadata) -> Result<BlobRef> {
156
+
use rsky_pds::schema::pds::blob::dsl as BlobSchema;
157
+
158
+
let did = self.did.clone();
159
+
self.db.run(move |conn| {
160
+
let BlobMetadata {
161
+
temp_key,
162
+
size,
163
+
cid,
164
+
mime_type,
165
+
width,
166
+
height,
167
+
} = metadata;
168
+
let created_at = now();
169
+
170
+
let found = BlobSchema::blob
171
+
.filter(BlobSchema::did.eq(&did))
172
+
.filter(BlobSchema::cid.eq(&cid.to_string()))
173
+
.select(models::Blob::as_select())
174
+
.first(conn)
175
+
.optional()?;
176
+
177
+
if let Some(found) = found {
178
+
if found.takedown_ref.is_some() {
179
+
bail!("Blob has been takendown, cannot re-upload")
180
+
}
181
+
}
182
+
183
+
let upsert = sql_query("INSERT INTO pds.blob (cid, did, \"mimeType\", size, \"tempKey\", width, height, \"createdAt\", \"takedownRef\") \
184
+
VALUES \
185
+
($1, $2, $3, $4, $5, $6, $7, $8, $9) \
186
+
ON CONFLICT (cid, did) DO UPDATE \
187
+
SET \"tempKey\" = EXCLUDED.\"tempKey\" \
188
+
WHERE pds.blob.\"tempKey\" is not null;");
189
+
upsert
190
+
.bind::<Text, _>(&cid.to_string())
191
+
.bind::<Text, _>(&did)
192
+
.bind::<Text, _>(&mime_type)
193
+
.bind::<Integer, _>(size as i32)
194
+
.bind::<Nullable<Text>, _>(Some(temp_key.clone()))
195
+
.bind::<Nullable<Integer>, _>(width)
196
+
.bind::<Nullable<Integer>, _>(height)
197
+
.bind::<Text, _>(created_at)
198
+
.bind::<Nullable<Text>, _>(None as Option<String>)
199
+
.execute(conn)?;
200
+
201
+
Ok(BlobRef::new(cid, mime_type, size, None))
202
+
}).await
203
+
}
204
+
205
+
pub async fn process_write_blobs(&self, writes: Vec<PreparedWrite>) -> Result<()> {
206
+
self.delete_dereferenced_blobs(writes.clone()).await?;
207
+
let _ = stream::iter(writes)
208
+
.then(|write| async move {
209
+
Ok::<(), anyhow::Error>(match write {
210
+
PreparedWrite::Create(w) => {
211
+
for blob in w.blobs {
212
+
self.verify_blob_and_make_permanent(blob.clone()).await?;
213
+
self.associate_blob(blob, w.uri.clone()).await?;
214
+
}
215
+
}
216
+
PreparedWrite::Update(w) => {
217
+
for blob in w.blobs {
218
+
self.verify_blob_and_make_permanent(blob.clone()).await?;
219
+
self.associate_blob(blob, w.uri.clone()).await?;
220
+
}
221
+
}
222
+
_ => (),
223
+
})
224
+
})
225
+
.collect::<Vec<_>>()
226
+
.await
227
+
.into_iter()
228
+
.collect::<Result<Vec<_>, _>>()?;
229
+
Ok(())
230
+
}
231
+
232
+
pub async fn delete_dereferenced_blobs(&self, writes: Vec<PreparedWrite>) -> Result<()> {
233
+
use rsky_pds::schema::pds::blob::dsl as BlobSchema;
234
+
use rsky_pds::schema::pds::record_blob::dsl as RecordBlobSchema;
235
+
236
+
let uris: Vec<String> = writes
237
+
.clone()
238
+
.into_iter()
239
+
.filter_map(|w| match w {
240
+
PreparedWrite::Delete(w) => Some(w.uri),
241
+
PreparedWrite::Update(w) => Some(w.uri),
242
+
_ => None,
243
+
})
244
+
.collect();
245
+
if uris.is_empty() {
246
+
return Ok(());
247
+
}
248
+
249
+
let deleted_repo_blobs: Vec<models::RecordBlob> = self
250
+
.db
251
+
.run(move |conn| {
252
+
delete(RecordBlobSchema::record_blob)
253
+
.filter(RecordBlobSchema::recordUri.eq_any(uris))
254
+
.get_results(conn)
255
+
})
256
+
.await?
257
+
.into_iter()
258
+
.collect::<Vec<models::RecordBlob>>();
259
+
if deleted_repo_blobs.is_empty() {
260
+
return Ok(());
261
+
}
262
+
263
+
let deleted_repo_blob_cids: Vec<String> = deleted_repo_blobs
264
+
.into_iter()
265
+
.map(|row| row.blob_cid)
266
+
.collect::<Vec<String>>();
267
+
268
+
let x = deleted_repo_blob_cids.clone();
269
+
let mut duplicated_cids: Vec<String> = self
270
+
.db
271
+
.run(move |conn| {
272
+
RecordBlobSchema::record_blob
273
+
.select(RecordBlobSchema::blobCid)
274
+
.filter(RecordBlobSchema::blobCid.eq_any(&x))
275
+
.load(conn)
276
+
})
277
+
.await?
278
+
.into_iter()
279
+
.collect::<Vec<String>>();
280
+
281
+
let mut new_blob_cids: Vec<String> = writes
282
+
.into_iter()
283
+
.map(|w| match w {
284
+
PreparedWrite::Create(w) => w.blobs,
285
+
PreparedWrite::Update(w) => w.blobs,
286
+
PreparedWrite::Delete(_) => Vec::new(),
287
+
})
288
+
.collect::<Vec<Vec<PreparedBlobRef>>>()
289
+
.into_iter()
290
+
.flat_map(|v: Vec<PreparedBlobRef>| v.into_iter().map(|b| b.cid.to_string()))
291
+
.collect();
292
+
let mut cids_to_keep = Vec::new();
293
+
cids_to_keep.append(&mut new_blob_cids);
294
+
cids_to_keep.append(&mut duplicated_cids);
295
+
296
+
let cids_to_delete = deleted_repo_blob_cids
297
+
.into_iter()
298
+
.filter_map(|cid: String| match cids_to_keep.contains(&cid) {
299
+
true => Some(cid),
300
+
false => None,
301
+
})
302
+
.collect::<Vec<String>>();
303
+
if cids_to_delete.is_empty() {
304
+
return Ok(());
305
+
}
306
+
307
+
let y = cids_to_delete.clone();
308
+
self.db
309
+
.run(move |conn| {
310
+
delete(BlobSchema::blob)
311
+
.filter(BlobSchema::cid.eq_any(&y))
312
+
.execute(conn)
313
+
})
314
+
.await?;
315
+
316
+
// Original code queues a background job to delete by CID from S3 compatible blobstore
317
+
let _ = stream::iter(cids_to_delete)
318
+
.then(|cid| async { self.blobstore.delete(cid).await })
319
+
.collect::<Vec<_>>()
320
+
.await
321
+
.into_iter()
322
+
.collect::<Result<Vec<_>, _>>()?;
323
+
Ok(())
324
+
}
325
+
326
+
pub async fn verify_blob_and_make_permanent(&self, blob: PreparedBlobRef) -> Result<()> {
327
+
use rsky_pds::schema::pds::blob::dsl as BlobSchema;
328
+
329
+
let found = self
330
+
.db
331
+
.run(move |conn| {
332
+
BlobSchema::blob
333
+
.filter(
334
+
BlobSchema::cid
335
+
.eq(blob.cid.to_string())
336
+
.and(BlobSchema::takedownRef.is_null()),
337
+
)
338
+
.select(models::Blob::as_select())
339
+
.first(conn)
340
+
.optional()
341
+
})
342
+
.await?;
343
+
if let Some(found) = found {
344
+
verify_blob(&blob, &found).await?;
345
+
if let Some(ref temp_key) = found.temp_key {
346
+
self.blobstore
347
+
.make_permanent(temp_key.clone(), blob.cid)
348
+
.await?;
349
+
}
350
+
self.db
351
+
.run(move |conn| {
352
+
update(BlobSchema::blob)
353
+
.filter(BlobSchema::tempKey.eq(found.temp_key))
354
+
.set(BlobSchema::tempKey.eq::<Option<String>>(None))
355
+
.execute(conn)
356
+
})
357
+
.await?;
358
+
Ok(())
359
+
} else {
360
+
bail!("Cound not find blob: {:?}", blob.cid.to_string())
361
+
}
362
+
}
363
+
364
+
pub async fn associate_blob(&self, blob: PreparedBlobRef, _record_uri: String) -> Result<()> {
365
+
use rsky_pds::schema::pds::record_blob::dsl as RecordBlobSchema;
366
+
367
+
let cid = blob.cid.to_string();
368
+
let record_uri = _record_uri;
369
+
let did = self.did.clone();
370
+
self.db
371
+
.run(move |conn| {
372
+
insert_into(RecordBlobSchema::record_blob)
373
+
.values((
374
+
RecordBlobSchema::blobCid.eq(cid),
375
+
RecordBlobSchema::recordUri.eq(record_uri),
376
+
RecordBlobSchema::did.eq(&did),
377
+
))
378
+
.on_conflict_do_nothing()
379
+
.execute(conn)
380
+
})
381
+
.await?;
382
+
Ok(())
383
+
}
384
+
385
+
pub async fn blob_count(&self) -> Result<i64> {
386
+
use rsky_pds::schema::pds::blob::dsl as BlobSchema;
387
+
388
+
let did = self.did.clone();
389
+
self.db
390
+
.run(move |conn| {
391
+
let res = BlobSchema::blob
392
+
.filter(BlobSchema::did.eq(&did))
393
+
.count()
394
+
.get_result(conn)?;
395
+
Ok(res)
396
+
})
397
+
.await
398
+
}
399
+
400
+
pub async fn record_blob_count(&self) -> Result<i64> {
401
+
use rsky_pds::schema::pds::record_blob::dsl as RecordBlobSchema;
402
+
403
+
let did = self.did.clone();
404
+
self.db
405
+
.run(move |conn| {
406
+
let res: i64 = RecordBlobSchema::record_blob
407
+
.filter(RecordBlobSchema::did.eq(&did))
408
+
.select(count_distinct(RecordBlobSchema::blobCid))
409
+
.get_result(conn)?;
410
+
Ok(res)
411
+
})
412
+
.await
413
+
}
414
+
415
+
pub async fn list_missing_blobs(
416
+
&self,
417
+
opts: ListMissingBlobsOpts,
418
+
) -> Result<Vec<ListMissingBlobsRefRecordBlob>> {
419
+
use rsky_pds::schema::pds::blob::dsl as BlobSchema;
420
+
use rsky_pds::schema::pds::record_blob::dsl as RecordBlobSchema;
421
+
422
+
let did = self.did.clone();
423
+
self.db
424
+
.run(move |conn| {
425
+
let ListMissingBlobsOpts { cursor, limit } = opts;
426
+
427
+
if limit > 1000 {
428
+
bail!("Limit too high. Max: 1000.");
429
+
}
430
+
431
+
let res: Vec<models::RecordBlob> = if let Some(cursor) = cursor {
432
+
RecordBlobSchema::record_blob
433
+
.limit(limit as i64)
434
+
.filter(not(exists(
435
+
BlobSchema::blob
436
+
.filter(BlobSchema::cid.eq(RecordBlobSchema::blobCid))
437
+
.filter(BlobSchema::did.eq(&did))
438
+
.select(models::Blob::as_select()),
439
+
)))
440
+
.filter(RecordBlobSchema::blobCid.gt(cursor))
441
+
.filter(RecordBlobSchema::did.eq(&did))
442
+
.select(models::RecordBlob::as_select())
443
+
.order(RecordBlobSchema::blobCid.asc())
444
+
.distinct_on(RecordBlobSchema::blobCid)
445
+
.get_results(conn)?
446
+
} else {
447
+
RecordBlobSchema::record_blob
448
+
.limit(limit as i64)
449
+
.filter(not(exists(
450
+
BlobSchema::blob
451
+
.filter(BlobSchema::cid.eq(RecordBlobSchema::blobCid))
452
+
.filter(BlobSchema::did.eq(&did))
453
+
.select(models::Blob::as_select()),
454
+
)))
455
+
.filter(RecordBlobSchema::did.eq(&did))
456
+
.select(models::RecordBlob::as_select())
457
+
.order(RecordBlobSchema::blobCid.asc())
458
+
.distinct_on(RecordBlobSchema::blobCid)
459
+
.get_results(conn)?
460
+
};
461
+
462
+
Ok(res
463
+
.into_iter()
464
+
.map(|row| ListMissingBlobsRefRecordBlob {
465
+
cid: row.blob_cid,
466
+
record_uri: row.record_uri,
467
+
})
468
+
.collect())
469
+
})
470
+
.await
471
+
}
472
+
473
+
pub async fn list_blobs(&self, opts: ListBlobsOpts) -> Result<Vec<String>> {
474
+
use rsky_pds::schema::pds::record::dsl as RecordSchema;
475
+
use rsky_pds::schema::pds::record_blob::dsl as RecordBlobSchema;
476
+
let ListBlobsOpts {
477
+
since,
478
+
cursor,
479
+
limit,
480
+
} = opts;
481
+
482
+
let res: Vec<String> = if let Some(since) = since {
483
+
let mut builder = RecordBlobSchema::record_blob
484
+
.inner_join(
485
+
RecordSchema::record.on(RecordSchema::uri.eq(RecordBlobSchema::recordUri)),
486
+
)
487
+
.filter(RecordSchema::repoRev.gt(since))
488
+
.select(RecordBlobSchema::blobCid)
489
+
.distinct()
490
+
.order(RecordBlobSchema::blobCid.asc())
491
+
.limit(limit as i64)
492
+
.into_boxed();
493
+
494
+
if let Some(cursor) = cursor {
495
+
builder = builder.filter(RecordBlobSchema::blobCid.gt(cursor));
496
+
}
497
+
self.db.run(move |conn| builder.load(conn)).await?
498
+
} else {
499
+
let mut builder = RecordBlobSchema::record_blob
500
+
.select(RecordBlobSchema::blobCid)
501
+
.distinct()
502
+
.order(RecordBlobSchema::blobCid.asc())
503
+
.limit(limit as i64)
504
+
.into_boxed();
505
+
506
+
if let Some(cursor) = cursor {
507
+
builder = builder.filter(RecordBlobSchema::blobCid.gt(cursor));
508
+
}
509
+
self.db.run(move |conn| builder.load(conn)).await?
510
+
};
511
+
Ok(res)
512
+
}
513
+
514
+
pub async fn get_blob_takedown_status(&self, cid: Cid) -> Result<Option<StatusAttr>> {
515
+
use rsky_pds::schema::pds::blob::dsl as BlobSchema;
516
+
517
+
self.db
518
+
.run(move |conn| {
519
+
let res = BlobSchema::blob
520
+
.filter(BlobSchema::cid.eq(cid.to_string()))
521
+
.select(models::Blob::as_select())
522
+
.first(conn)
523
+
.optional()?;
524
+
match res {
525
+
None => Ok(None),
526
+
Some(res) => match res.takedown_ref {
527
+
None => Ok(Some(StatusAttr {
528
+
applied: false,
529
+
r#ref: None,
530
+
})),
531
+
Some(takedown_ref) => Ok(Some(StatusAttr {
532
+
applied: true,
533
+
r#ref: Some(takedown_ref),
534
+
})),
535
+
},
536
+
}
537
+
})
538
+
.await
539
+
}
540
+
541
+
// Transactors
542
+
// -------------------
543
+
544
+
pub async fn update_blob_takedown_status(&self, blob: Cid, takedown: StatusAttr) -> Result<()> {
545
+
use rsky_pds::schema::pds::blob::dsl as BlobSchema;
546
+
547
+
let takedown_ref: Option<String> = match takedown.applied {
548
+
true => match takedown.r#ref {
549
+
Some(takedown_ref) => Some(takedown_ref),
550
+
None => Some(now()),
551
+
},
552
+
false => None,
553
+
};
554
+
555
+
let blob = self
556
+
.db
557
+
.run(move |conn| {
558
+
update(BlobSchema::blob)
559
+
.filter(BlobSchema::cid.eq(blob.to_string()))
560
+
.set(BlobSchema::takedownRef.eq(takedown_ref))
561
+
.execute(conn)?;
562
+
Ok::<_, Error>(blob)
563
+
})
564
+
.await?;
565
+
566
+
let res = match takedown.applied {
567
+
true => self.blobstore.quarantine(blob).await,
568
+
false => self.blobstore.unquarantine(blob).await,
569
+
};
570
+
match res {
571
+
Ok(_) => Ok(()),
572
+
Err(e) => match e.downcast_ref() {
573
+
Some(BlobError::BlobNotFoundError) => Ok(()),
574
+
None => Err(e),
575
+
},
576
+
}
577
+
}
578
+
}
-76
src/actor_store/blob/background.rs
-76
src/actor_store/blob/background.rs
···
1
-
use std::future::Future;
2
-
use std::sync::Arc;
3
-
use tokio::sync::{Mutex, Semaphore};
4
-
use tokio::task::{self, JoinHandle};
5
-
use tracing::error;
6
-
7
-
/// Background Queue for asynchronous processing tasks
8
-
///
9
-
/// A simple queue for in-process, out-of-band/backgrounded work
10
-
#[derive(Clone)]
11
-
pub struct BackgroundQueue {
12
-
semaphore: Arc<Semaphore>,
13
-
tasks: Arc<Mutex<Vec<JoinHandle<()>>>>,
14
-
destroyed: Arc<Mutex<bool>>,
15
-
}
16
-
17
-
impl BackgroundQueue {
18
-
/// Create a new BackgroundQueue with the specified concurrency limit
19
-
pub fn new(concurrency: usize) -> Self {
20
-
Self {
21
-
semaphore: Arc::new(Semaphore::new(concurrency)),
22
-
tasks: Arc::new(Mutex::new(Vec::new())),
23
-
destroyed: Arc::new(Mutex::new(false)),
24
-
}
25
-
}
26
-
27
-
/// Add a task to the queue
28
-
pub async fn add<F>(&self, future: F)
29
-
where
30
-
F: Future<Output = ()> + Send + 'static,
31
-
{
32
-
let destroyed = *self.destroyed.lock().await;
33
-
if destroyed {
34
-
return;
35
-
}
36
-
37
-
let permit = match self.semaphore.clone().acquire_owned().await {
38
-
Ok(p) => p,
39
-
Err(_) => {
40
-
error!("Failed to acquire semaphore permit for background task");
41
-
return;
42
-
}
43
-
};
44
-
45
-
let tasks = self.tasks.clone();
46
-
47
-
let handle = task::spawn(async move {
48
-
future.await;
49
-
50
-
// Catch any panics to prevent task failures from propagating
51
-
if let Err(e) = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {})) {
52
-
error!("Background queue task panicked: {:?}", e);
53
-
}
54
-
55
-
// Release the semaphore permit
56
-
drop(permit);
57
-
});
58
-
59
-
// Store the handle for later cleanup
60
-
tasks.lock().await.push(handle);
61
-
}
62
-
63
-
/// Wait for all tasks to finish
64
-
pub async fn process_all(&self) {
65
-
let mut handles = self.tasks.lock().await;
66
-
while let Some(handle) = handles.pop() {
67
-
let _ = handle.await;
68
-
}
69
-
}
70
-
71
-
/// Stop accepting new tasks, wait for all to finish
72
-
pub async fn destroy(&self) {
73
-
*self.destroyed.lock().await = true;
74
-
self.process_all().await;
75
-
}
76
-
}
-731
src/actor_store/blob/mod.rs
-731
src/actor_store/blob/mod.rs
···
1
-
// bluepds/src/actor_store/blob/mod.rs
2
-
3
-
//! Blob storage and retrieval for the actor store.
4
-
5
-
use std::str::FromStr;
6
-
7
-
use anyhow::{Context as _, Result, bail};
8
-
use atrium_api::com::atproto::admin::defs::StatusAttr;
9
-
use atrium_repo::Cid;
10
-
use diesel::associations::HasTable as _;
11
-
use diesel::prelude::*;
12
-
use futures::{StreamExt, future::try_join_all};
13
-
use rsky_common::ipld::sha256_raw_to_cid;
14
-
use rsky_pds::actor_store::blob::sha256_stream;
15
-
use rsky_pds::image::{maybe_get_info, mime_type_from_bytes};
16
-
use rsky_pds::schema::pds::*;
17
-
use rsky_repo::types::{PreparedBlobRef, PreparedWrite, WriteOpAction};
18
-
use sha2::Digest;
19
-
use uuid::Uuid;
20
-
21
-
use crate::actor_store::PreparedWrite as BluePreparedWrite;
22
-
use crate::actor_store::db::ActorDb;
23
-
24
-
/// Background task queue for blob operations
25
-
pub mod background;
26
-
// Re-export BackgroundQueue
27
-
pub use background::BackgroundQueue;
28
-
29
-
pub mod placeholder;
30
-
pub(crate) use placeholder::BlobStorePlaceholder;
31
-
32
-
/// Type for stream of blob data
33
-
pub type BlobStream = Box<dyn std::io::Read + Send>;
34
-
35
-
/// Blob store interface
36
-
pub trait BlobStore: Send + Sync {
37
-
async fn put_temp(&self, bytes: &[u8]) -> Result<String>;
38
-
async fn make_permanent(&self, key: &str, cid: Cid) -> Result<()>;
39
-
async fn put_permanent(&self, cid: Cid, bytes: &[u8]) -> Result<()>;
40
-
async fn quarantine(&self, cid: Cid) -> Result<()>;
41
-
async fn unquarantine(&self, cid: Cid) -> Result<()>;
42
-
async fn get_bytes(&self, cid: Cid) -> Result<Vec<u8>>;
43
-
async fn get_stream(&self, cid: Cid) -> Result<BlobStream>;
44
-
async fn has_temp(&self, key: &str) -> Result<bool>;
45
-
async fn has_stored(&self, cid: Cid) -> Result<bool>;
46
-
async fn delete(&self, cid: Cid) -> Result<()>;
47
-
async fn delete_many(&self, cids: Vec<Cid>) -> Result<()>;
48
-
}
49
-
50
-
/// Blob metadata for upload
51
-
pub struct BlobMetadata {
52
-
pub temp_key: String,
53
-
pub size: i64,
54
-
pub cid: Cid,
55
-
pub mime_type: String,
56
-
pub width: Option<i32>,
57
-
pub height: Option<i32>,
58
-
}
59
-
60
-
/// Blob data with content stream
61
-
pub struct BlobData {
62
-
pub size: u64,
63
-
pub mime_type: Option<String>,
64
-
pub stream: BlobStream,
65
-
}
66
-
67
-
/// Options for listing blobs
68
-
pub struct ListBlobsOptions {
69
-
pub since: Option<String>,
70
-
pub cursor: Option<String>,
71
-
pub limit: i64,
72
-
}
73
-
74
-
/// Options for listing missing blobs
75
-
pub struct ListMissingBlobsOptions {
76
-
pub cursor: Option<String>,
77
-
pub limit: i64,
78
-
}
79
-
80
-
/// Information about a missing blob
81
-
pub struct MissingBlob {
82
-
pub cid: String,
83
-
pub record_uri: String,
84
-
}
85
-
86
-
/// Unified handler for blob operations
87
-
pub struct BlobHandler {
88
-
/// Database connection
89
-
pub db: ActorDb,
90
-
/// DID of the actor
91
-
pub did: String,
92
-
/// Blob store implementation
93
-
pub blobstore: Box<dyn BlobStore>,
94
-
/// Background queue for async operations
95
-
pub background_queue: Option<background::BackgroundQueue>,
96
-
}
97
-
98
-
impl BlobHandler {
99
-
/// Create a new blob handler with background queue for write operations
100
-
pub fn new(
101
-
db: ActorDb,
102
-
blobstore: impl BlobStore + 'static,
103
-
background_queue: background::BackgroundQueue,
104
-
did: String,
105
-
) -> Self {
106
-
Self {
107
-
db,
108
-
did,
109
-
blobstore: Box::new(blobstore),
110
-
background_queue: Some(background_queue),
111
-
}
112
-
}
113
-
114
-
/// Get metadata for a blob
115
-
pub async fn get_blob_metadata(&self, cid: &Cid) -> Result<BlobMetadata> {
116
-
let cid_str = cid.to_string();
117
-
let did = self.did.clone();
118
-
119
-
let found = self
120
-
.db
121
-
.run(move |conn| {
122
-
blob::table
123
-
.filter(blob::cid.eq(&cid_str))
124
-
.filter(blob::did.eq(&did))
125
-
.filter(blob::takedownRef.is_null())
126
-
.first::<BlobModel>(conn)
127
-
.optional()
128
-
})
129
-
.await?;
130
-
131
-
match found {
132
-
Some(found) => Ok(BlobMetadata {
133
-
temp_key: found.temp_key.unwrap_or_default(),
134
-
size: found.size as i64,
135
-
cid: Cid::from_str(&found.cid)?,
136
-
mime_type: found.mime_type,
137
-
width: found.width,
138
-
height: found.height,
139
-
}),
140
-
None => bail!("Blob not found"),
141
-
}
142
-
}
143
-
144
-
/// Get a blob's complete data
145
-
pub async fn get_blob(&self, cid: &Cid) -> Result<BlobData> {
146
-
let metadata = self.get_blob_metadata(cid).await?;
147
-
let blob_stream = self.blobstore.get_stream(*cid).await?;
148
-
149
-
Ok(BlobData {
150
-
size: metadata.size as u64,
151
-
mime_type: Some(metadata.mime_type),
152
-
stream: blob_stream,
153
-
})
154
-
}
155
-
156
-
/// List blobs for a repository
157
-
pub async fn list_blobs(&self, opts: ListBlobsOptions) -> Result<Vec<String>> {
158
-
let did = self.did.clone();
159
-
let since = opts.since;
160
-
let cursor = opts.cursor;
161
-
let limit = opts.limit;
162
-
163
-
self.db
164
-
.run(move |conn| {
165
-
let mut query = record_blob::table
166
-
.inner_join(
167
-
crate::schema::record::table
168
-
.on(crate::schema::record::uri.eq(record_blob::record_uri)),
169
-
)
170
-
.filter(record_blob::did.eq(&did))
171
-
.select(record_blob::blob_cid)
172
-
.distinct()
173
-
.order(record_blob::blob_cid.asc())
174
-
.limit(limit)
175
-
.into_boxed();
176
-
177
-
if let Some(since_val) = since {
178
-
query = query.filter(crate::schema::record::repo_rev.gt(since_val));
179
-
}
180
-
181
-
if let Some(cursor_val) = cursor {
182
-
query = query.filter(record_blob::blob_cid.gt(cursor_val));
183
-
}
184
-
185
-
query.load::<String>(conn)
186
-
})
187
-
.await
188
-
}
189
-
190
-
/// Get records that reference a blob
191
-
pub async fn get_records_for_blob(&self, cid: &Cid) -> Result<Vec<String>> {
192
-
let cid_str = cid.to_string();
193
-
let did = self.did.clone();
194
-
195
-
self.db
196
-
.run(move |conn| {
197
-
record_blob::table
198
-
.filter(record_blob::blob_cid.eq(&cid_str))
199
-
.filter(record_blob::did.eq(&did))
200
-
.select(record_blob::record_uri)
201
-
.load::<String>(conn)
202
-
})
203
-
.await
204
-
}
205
-
206
-
/// Get blobs referenced by a record
207
-
pub async fn get_blobs_for_record(&self, record_uri: &str) -> Result<Vec<String>> {
208
-
let record_uri_str = record_uri.to_string();
209
-
let did = self.did.clone();
210
-
211
-
self.db
212
-
.run(move |conn| {
213
-
blob::table
214
-
.inner_join(record_blob::table.on(record_blob::blob_cid.eq(blob::cid)))
215
-
.filter(record_blob::record_uri.eq(&record_uri_str))
216
-
.filter(blob::did.eq(&did))
217
-
.select(blob::cid)
218
-
.load::<String>(conn)
219
-
})
220
-
.await
221
-
}
222
-
223
-
/// Upload a blob and get its metadata
224
-
pub async fn upload_blob_and_get_metadata(
225
-
&self,
226
-
user_suggested_mime: &str,
227
-
blob_bytes: &[u8],
228
-
) -> Result<BlobMetadata> {
229
-
let temp_key = self.blobstore.put_temp(blob_bytes).await?;
230
-
let size = blob_bytes.len() as i64;
231
-
let sha256 = sha256_stream(blob_bytes).await?;
232
-
let img_info = maybe_get_info(blob_bytes).await?;
233
-
let sniffed_mime = mime_type_from_bytes(blob_bytes).await?;
234
-
let cid = sha256_raw_to_cid(sha256);
235
-
let mime_type = sniffed_mime.unwrap_or_else(|| user_suggested_mime.to_string());
236
-
237
-
Ok(BlobMetadata {
238
-
temp_key,
239
-
size,
240
-
cid,
241
-
mime_type,
242
-
width: img_info.as_ref().map(|info| info.width as i32),
243
-
height: img_info.as_ref().map(|info| info.height as i32),
244
-
})
245
-
}
246
-
247
-
/// Count total blobs
248
-
pub async fn blob_count(&self) -> Result<i64> {
249
-
let did = self.did.clone();
250
-
251
-
self.db
252
-
.run(move |conn| {
253
-
blob::table
254
-
.filter(blob::did.eq(&did))
255
-
.count()
256
-
.get_result(conn)
257
-
})
258
-
.await
259
-
}
260
-
261
-
/// Count distinct blobs referenced by records
262
-
pub async fn record_blob_count(&self) -> Result<i64> {
263
-
let did = self.did.clone();
264
-
265
-
self.db
266
-
.run(move |conn| {
267
-
record_blob::table
268
-
.filter(record_blob::did.eq(&did))
269
-
.select(diesel::dsl::count_distinct(record_blob::blob_cid))
270
-
.first::<i64>(conn)
271
-
})
272
-
.await
273
-
}
274
-
275
-
/// List blobs that are referenced but missing from storage
276
-
pub async fn list_missing_blobs(
277
-
&self,
278
-
opts: ListMissingBlobsOptions,
279
-
) -> Result<Vec<MissingBlob>> {
280
-
let did = self.did.clone();
281
-
let limit = opts.limit;
282
-
let cursor = opts.cursor;
283
-
284
-
self.db
285
-
.run(move |conn| {
286
-
let mut query = record_blob::table
287
-
.left_join(
288
-
blob::table.on(blob::cid.eq(record_blob::blob_cid).and(blob::did.eq(&did))),
289
-
)
290
-
.filter(record_blob::did.eq(&did))
291
-
.filter(blob::cid.is_null())
292
-
.select((record_blob::blob_cid, record_blob::record_uri))
293
-
.order(record_blob::blob_cid.asc())
294
-
.limit(limit)
295
-
.into_boxed();
296
-
297
-
if let Some(cursor_val) = cursor {
298
-
query = query.filter(record_blob::blob_cid.gt(cursor_val));
299
-
}
300
-
301
-
let results = query.load::<(String, String)>(conn)?;
302
-
303
-
Ok(results
304
-
.into_iter()
305
-
.map(|(cid, record_uri)| MissingBlob { cid, record_uri })
306
-
.collect())
307
-
})
308
-
.await
309
-
}
310
-
311
-
/// Get takedown status for a blob
312
-
pub async fn get_blob_takedown_status(&self, cid: &Cid) -> Result<Option<StatusAttr>> {
313
-
let cid_str = cid.to_string();
314
-
let did = self.did.clone();
315
-
316
-
self.db
317
-
.run(move |conn| {
318
-
let result = blob::table
319
-
.filter(blob::cid.eq(&cid_str))
320
-
.filter(blob::did.eq(&did))
321
-
.select(blob::takedownRef)
322
-
.first::<Option<String>>(conn)
323
-
.optional()?;
324
-
325
-
match result {
326
-
Some(takedown) => match takedown {
327
-
Some(takedownRef) => Ok(Some(StatusAttr {
328
-
applied: true,
329
-
r#ref: Some(takedownRef),
330
-
})),
331
-
None => Ok(Some(StatusAttr {
332
-
applied: false,
333
-
r#ref: None,
334
-
})),
335
-
},
336
-
None => Ok(None),
337
-
}
338
-
})
339
-
.await
340
-
}
341
-
342
-
/// Get all blob CIDs in the repository
343
-
pub async fn get_blob_cids(&self) -> Result<Vec<Cid>> {
344
-
let did = self.did.clone();
345
-
346
-
let rows = self
347
-
.db
348
-
.run(move |conn| {
349
-
blob::table
350
-
.filter(blob::did.eq(&did))
351
-
.select(blob::cid)
352
-
.load::<String>(conn)
353
-
})
354
-
.await?;
355
-
356
-
rows.into_iter()
357
-
.map(|cid_str| Cid::from_str(&cid_str).context("Invalid CID format"))
358
-
.collect()
359
-
}
360
-
361
-
/// Track a blob that's not yet associated with a record
362
-
pub async fn track_untethered_blob(&self, metadata: &BlobMetadata) -> Result<()> {
363
-
let cid_str = metadata.cid.to_string();
364
-
let did = self.did.clone();
365
-
366
-
// Check if blob exists and is taken down
367
-
let existing = self
368
-
.db
369
-
.run({
370
-
let cid_str_clone = cid_str.clone();
371
-
let did_clone = did.clone();
372
-
373
-
move |conn| {
374
-
blob::table
375
-
.filter(blob::did.eq(&did_clone))
376
-
.filter(blob::cid.eq(&cid_str_clone))
377
-
.select(blob::takedownRef)
378
-
.first::<Option<String>>(conn)
379
-
.optional()
380
-
}
381
-
})
382
-
.await?;
383
-
384
-
if let Some(row) = existing {
385
-
if row.is_some() {
386
-
return Err(anyhow::anyhow!(
387
-
"Blob has been taken down, cannot re-upload"
388
-
));
389
-
}
390
-
}
391
-
392
-
let size = metadata.size as i32;
393
-
let now = chrono::Utc::now().to_rfc3339();
394
-
let mime_type = metadata.mime_type.clone();
395
-
let temp_key = metadata.temp_key.clone();
396
-
let width = metadata.width;
397
-
let height = metadata.height;
398
-
399
-
self.db.run(move |conn| {
400
-
diesel::insert_into(blob::table)
401
-
.values((
402
-
blob::cid.eq(&cid_str),
403
-
blob::did.eq(&did),
404
-
blob::mime_type.eq(&mime_type),
405
-
blob::size.eq(size),
406
-
blob::temp_key.eq(&temp_key),
407
-
blob::width.eq(width),
408
-
blob::height.eq(height),
409
-
blob::created_at.eq(&now),
410
-
))
411
-
.on_conflict((blob::cid, blob::did))
412
-
.do_update()
413
-
.set(
414
-
blob::temp_key.eq(
415
-
diesel::dsl::sql::<diesel::sql_types::Text>(
416
-
"CASE WHEN blob.temp_key IS NULL THEN excluded.temp_key ELSE blob.temp_key END"
417
-
)
418
-
)
419
-
)
420
-
.execute(conn)
421
-
.context("Failed to track untethered blob")
422
-
}).await?;
423
-
424
-
Ok(())
425
-
}
426
-
427
-
/// Process blobs for repository writes
428
-
pub async fn process_write_blobs(&self, rev: &str, writes: Vec<PreparedWrite>) -> Result<()> {
429
-
self.delete_dereferenced_blobs(writes.clone()).await?;
430
-
431
-
let futures = writes.iter().filter_map(|write| match write {
432
-
PreparedWrite::Create(w) | PreparedWrite::Update(w) => {
433
-
let blobs = &w.blobs;
434
-
let uri = w.uri.clone();
435
-
let handler = self;
436
-
437
-
Some(async move {
438
-
for blob in blobs {
439
-
handler.verify_blob_and_make_permanent(blob).await?;
440
-
handler.associate_blob(blob, &uri).await?;
441
-
}
442
-
Ok(())
443
-
})
444
-
}
445
-
_ => None,
446
-
});
447
-
448
-
try_join_all(futures).await?;
449
-
450
-
Ok(())
451
-
}
452
-
453
-
/// Delete blobs that are no longer referenced
454
-
pub async fn delete_dereferenced_blobs(&self, writes: Vec<PreparedWrite>) -> Result<()> {
455
-
let uris: Vec<String> = writes
456
-
.iter()
457
-
.filter_map(|w| match w {
458
-
PreparedWrite::Delete(w) => Some(w.uri.clone()),
459
-
PreparedWrite::Update(w) => Some(w.uri.clone()),
460
-
_ => None,
461
-
})
462
-
.collect();
463
-
464
-
if uris.is_empty() {
465
-
return Ok(());
466
-
}
467
-
468
-
let did = self.did.clone();
469
-
470
-
// Delete record-blob associations
471
-
let deleted_repo_blobs = self
472
-
.db
473
-
.run({
474
-
let uris_clone = uris.clone();
475
-
let did_clone = did.clone();
476
-
477
-
move |conn| {
478
-
let query = diesel::delete(record_blob::table)
479
-
.filter(record_blob::did.eq(&did_clone))
480
-
.filter(record_blob::record_uri.eq_any(&uris_clone))
481
-
.returning(RecordBlob::as_returning());
482
-
483
-
query.load(conn)
484
-
}
485
-
})
486
-
.await?;
487
-
488
-
if deleted_repo_blobs.is_empty() {
489
-
return Ok(());
490
-
}
491
-
492
-
// Collect deleted blob CIDs
493
-
let deleted_repo_blob_cids: Vec<String> = deleted_repo_blobs
494
-
.iter()
495
-
.map(|rb| rb.blob_cid.clone())
496
-
.collect();
497
-
498
-
// Find duplicates in record_blob table
499
-
let duplicate_cids = self
500
-
.db
501
-
.run({
502
-
let blob_cids = deleted_repo_blob_cids.clone();
503
-
let did_clone = did.clone();
504
-
505
-
move |conn| {
506
-
record_blob::table
507
-
.filter(record_blob::did.eq(&did_clone))
508
-
.filter(record_blob::blob_cid.eq_any(&blob_cids))
509
-
.select(record_blob::blob_cid)
510
-
.load::<String>(conn)
511
-
}
512
-
})
513
-
.await?;
514
-
515
-
// Get new blob CIDs from writes
516
-
let new_blob_cids: Vec<String> = writes
517
-
.iter()
518
-
.filter_map(|w| match w {
519
-
PreparedWrite::Create(w) | PreparedWrite::Update(w) => Some(
520
-
w.blobs
521
-
.iter()
522
-
.map(|b| b.cid.to_string())
523
-
.collect::<Vec<String>>(),
524
-
),
525
-
_ => None,
526
-
})
527
-
.flatten()
528
-
.collect();
529
-
530
-
// Determine which CIDs to keep and which to delete
531
-
let cids_to_keep: std::collections::HashSet<String> = new_blob_cids
532
-
.into_iter()
533
-
.chain(duplicate_cids.into_iter())
534
-
.collect();
535
-
536
-
let cids_to_delete: Vec<String> = deleted_repo_blob_cids
537
-
.into_iter()
538
-
.filter(|cid| !cids_to_keep.contains(cid))
539
-
.collect();
540
-
541
-
if cids_to_delete.is_empty() {
542
-
return Ok(());
543
-
}
544
-
545
-
// Delete blobs from the database
546
-
self.db
547
-
.run({
548
-
let cids = cids_to_delete.clone();
549
-
let did_clone = did.clone();
550
-
551
-
move |conn| {
552
-
diesel::delete(blob::table)
553
-
.filter(blob::did.eq(&did_clone))
554
-
.filter(blob::cid.eq_any(&cids))
555
-
.execute(conn)
556
-
}
557
-
})
558
-
.await?;
559
-
560
-
// Delete blobs from storage
561
-
let cids_to_delete_objects: Vec<Cid> = cids_to_delete
562
-
.iter()
563
-
.filter_map(|cid_str| Cid::from_str(cid_str).ok())
564
-
.collect();
565
-
566
-
// Use background queue if available
567
-
if let Some(queue) = &self.background_queue {
568
-
let blobstore = self.blobstore.clone();
569
-
queue
570
-
.add(async move {
571
-
let _ = blobstore.delete_many(cids_to_delete_objects).await;
572
-
})
573
-
.await;
574
-
} else {
575
-
// Otherwise delete directly
576
-
if !cids_to_delete_objects.is_empty() {
577
-
self.blobstore.delete_many(cids_to_delete_objects).await?;
578
-
}
579
-
}
580
-
581
-
Ok(())
582
-
}
583
-
584
-
/// Verify blob integrity and move from temporary to permanent storage
585
-
pub async fn verify_blob_and_make_permanent(&self, blob: &PreparedBlobRef) -> Result<()> {
586
-
let cid_str = blob.cid.to_string();
587
-
let did = self.did.clone();
588
-
589
-
let found = self
590
-
.db
591
-
.run(move |conn| {
592
-
blob::table
593
-
.filter(blob::did.eq(&did))
594
-
.filter(blob::cid.eq(&cid_str))
595
-
.filter(blob::takedownRef.is_null())
596
-
.first::<BlobModel>(conn)
597
-
.optional()
598
-
})
599
-
.await?;
600
-
601
-
let found = match found {
602
-
Some(b) => b,
603
-
None => bail!("Blob not found: {}", cid_str),
604
-
};
605
-
606
-
// Verify blob constraints
607
-
if let Some(max_size) = blob.constraints.max_size {
608
-
if found.size as usize > max_size {
609
-
bail!(
610
-
"BlobTooLarge: This file is too large. It is {} but the maximum size is {}",
611
-
found.size,
612
-
max_size
613
-
);
614
-
}
615
-
}
616
-
617
-
if blob.mime_type != found.mime_type {
618
-
bail!(
619
-
"InvalidMimeType: Referenced MIME type does not match stored blob. Expected: {}, Got: {}",
620
-
found.mime_type,
621
-
blob.mime_type
622
-
);
623
-
}
624
-
625
-
if let Some(ref accept) = blob.constraints.accept {
626
-
if !accepted_mime(&blob.mime_type, accept).await {
627
-
bail!(
628
-
"Wrong type of file. It is {} but it must match {:?}.",
629
-
blob.mime_type,
630
-
accept
631
-
);
632
-
}
633
-
}
634
-
635
-
// Move blob from temporary to permanent storage if needed
636
-
if let Some(temp_key) = found.temp_key {
637
-
self.blobstore.make_permanent(&temp_key, blob.cid).await?;
638
-
639
-
// Update database to clear temp key
640
-
let cid_str = blob.cid.to_string();
641
-
let did = self.did.clone();
642
-
643
-
self.db
644
-
.run(move |conn| {
645
-
diesel::update(blob::table)
646
-
.filter(blob::did.eq(&did))
647
-
.filter(blob::cid.eq(&cid_str))
648
-
.set(blob::temp_key.eq::<Option<String>>(None))
649
-
.execute(conn)
650
-
})
651
-
.await?;
652
-
}
653
-
654
-
Ok(())
655
-
}
656
-
657
-
/// Associate a blob with a record
658
-
pub async fn associate_blob(&self, blob: &PreparedBlobRef, record_uri: &str) -> Result<()> {
659
-
let cid_str = blob.cid.to_string();
660
-
let record_uri = record_uri.to_string();
661
-
let did = self.did.clone();
662
-
663
-
self.db
664
-
.run(move |conn| {
665
-
diesel::insert_into(record_blob::table)
666
-
.values((
667
-
record_blob::blob_cid.eq(&cid_str),
668
-
record_blob::record_uri.eq(&record_uri),
669
-
record_blob::did.eq(&did),
670
-
))
671
-
.on_conflict_do_nothing()
672
-
.execute(conn)
673
-
})
674
-
.await?;
675
-
676
-
Ok(())
677
-
}
678
-
679
-
/// Update takedown status for a blob
680
-
pub async fn update_blob_takedown_status(&self, blob: Cid, takedown: StatusAttr) -> Result<()> {
681
-
let cid_str = blob.to_string();
682
-
let did = self.did.clone();
683
-
684
-
let takedownRef: Option<String> = if takedown.applied {
685
-
Some(takedown.r#ref.unwrap_or_else(|| Uuid::new_v4().to_string()))
686
-
} else {
687
-
None
688
-
};
689
-
690
-
// Update database
691
-
self.db
692
-
.run(move |conn| {
693
-
diesel::update(blob::table)
694
-
.filter(blob::did.eq(&did))
695
-
.filter(blob::cid.eq(&cid_str))
696
-
.set(blob::takedownRef.eq(takedownRef))
697
-
.execute(conn)
698
-
})
699
-
.await?;
700
-
701
-
// Update blob storage
702
-
if takedown.applied {
703
-
self.blobstore.quarantine(blob).await?;
704
-
} else {
705
-
self.blobstore.unquarantine(blob).await?;
706
-
}
707
-
708
-
Ok(())
709
-
}
710
-
}
711
-
712
-
/// Verify MIME type against accepted formats
713
-
async fn accepted_mime(mime: &str, accepted: &[String]) -> bool {
714
-
// Accept any type
715
-
if accepted.contains(&"*/*".to_string()) {
716
-
return true;
717
-
}
718
-
719
-
// Check for glob patterns (e.g., "image/*")
720
-
for glob in accepted {
721
-
if glob.ends_with("/*") {
722
-
let prefix = glob.split('/').next().unwrap();
723
-
if mime.starts_with(&format!("{}/", prefix)) {
724
-
return true;
725
-
}
726
-
}
727
-
}
728
-
729
-
// Check for exact match
730
-
accepted.contains(&mime.to_string())
731
-
}
-54
src/actor_store/blob/placeholder.rs
-54
src/actor_store/blob/placeholder.rs
···
1
-
use anyhow::Result;
2
-
use atrium_repo::Cid;
3
-
4
-
use super::{BlobStore, BlobStream};
5
-
6
-
/// Placeholder implementation for blob store
7
-
#[derive(Clone)]
8
-
pub struct BlobStorePlaceholder;
9
-
10
-
impl BlobStore for BlobStorePlaceholder {
11
-
async fn put_temp(&self, _bytes: &[u8]) -> Result<String> {
12
-
todo!("BlobStorePlaceholder::put_temp not implemented");
13
-
}
14
-
15
-
async fn make_permanent(&self, _key: &str, _cid: Cid) -> Result<()> {
16
-
todo!("BlobStorePlaceholder::make_permanent not implemented");
17
-
}
18
-
19
-
async fn put_permanent(&self, _cid: Cid, _bytes: &[u8]) -> Result<()> {
20
-
todo!("BlobStorePlaceholder::put_permanent not implemented");
21
-
}
22
-
23
-
async fn quarantine(&self, _cid: Cid) -> Result<()> {
24
-
todo!("BlobStorePlaceholder::quarantine not implemented");
25
-
}
26
-
27
-
async fn unquarantine(&self, _cid: Cid) -> Result<()> {
28
-
todo!("BlobStorePlaceholder::unquarantine not implemented");
29
-
}
30
-
31
-
async fn get_bytes(&self, _cid: Cid) -> Result<Vec<u8>> {
32
-
todo!("BlobStorePlaceholder::get_bytes not implemented");
33
-
}
34
-
35
-
async fn get_stream(&self, _cid: Cid) -> Result<BlobStream> {
36
-
todo!("BlobStorePlaceholder::get_stream not implemented");
37
-
}
38
-
39
-
async fn has_temp(&self, _key: &str) -> Result<bool> {
40
-
todo!("BlobStorePlaceholder::has_temp not implemented");
41
-
}
42
-
43
-
async fn has_stored(&self, _cid: Cid) -> Result<bool> {
44
-
todo!("BlobStorePlaceholder::has_stored not implemented");
45
-
}
46
-
47
-
async fn delete(&self, _cid: Cid) -> Result<()> {
48
-
todo!("BlobStorePlaceholder::delete not implemented");
49
-
}
50
-
51
-
async fn delete_many(&self, _cids: Vec<Cid>) -> Result<()> {
52
-
todo!("BlobStorePlaceholder::delete_many not implemented");
53
-
}
54
-
}
-1
src/actor_store/db.rs
-1
src/actor_store/db.rs
+2
-7
src/actor_store/mod.rs
+2
-7
src/actor_store/mod.rs
···
1
1
//! Actor store implementation for ATProto PDS.
2
2
3
3
mod actor_store;
4
-
mod actor_store_handler;
5
-
mod actor_store_resources;
6
4
mod blob;
7
5
mod db;
8
6
mod preference;
9
-
mod prepared_write;
10
7
mod record;
11
-
mod repo;
8
+
mod sql_blob;
12
9
mod sql_repo;
13
10
14
11
pub(crate) use actor_store::ActorStore;
15
-
pub(crate) use actor_store_handler::ActorStoreHandler;
16
-
pub(crate) use actor_store_resources::ActorStoreResources;
17
12
pub(crate) use db::ActorDb;
18
-
pub(crate) use prepared_write::PreparedWrite;
13
+
pub(crate) use sql_blob::BlobStoreSql;
+17
-19
src/actor_store/preference.rs
+17
-19
src/actor_store/preference.rs
···
1
1
//! Preference handling for actor store.
2
+
//! Based on https://github.com/blacksky-algorithms/rsky/blob/main/rsky-pds/src/actor_store/preference/mod.rs
3
+
//! blacksky-algorithms/rsky is licensed under the Apache License 2.0
4
+
//!
5
+
//! Modified for SQLite backend
2
6
3
7
use anyhow::{Result, bail};
4
8
use diesel::*;
5
9
use rsky_lexicon::app::bsky::actor::RefPreferences;
6
-
use rsky_pds::{
7
-
actor_store::preference::{pref_match_namespace, util::pref_in_scope},
8
-
auth_verifier::AuthScope,
9
-
models::AccountPref,
10
-
};
11
-
12
-
use crate::actor_store::db::ActorDb;
10
+
use rsky_pds::actor_store::preference::pref_match_namespace;
11
+
use rsky_pds::actor_store::preference::util::pref_in_scope;
12
+
use rsky_pds::auth_verifier::AuthScope;
13
+
use rsky_pds::db::DbConn;
14
+
use rsky_pds::models;
15
+
use rsky_pds::models::AccountPref;
16
+
use std::sync::Arc;
13
17
14
-
/// Handler for preference operations with both read and write capabilities.
15
-
pub(crate) struct PreferenceHandler {
16
-
/// Database connection.
17
-
pub db: ActorDb,
18
-
/// DID of the actor.
18
+
pub struct PreferenceReader {
19
19
pub did: String,
20
+
pub db: Arc<DbConn>,
20
21
}
21
22
22
-
impl PreferenceHandler {
23
-
/// Create a new preference handler.
24
-
pub(crate) fn new(did: String, db: ActorDb) -> Self {
25
-
Self { db, did }
23
+
impl PreferenceReader {
24
+
pub fn new(did: String, db: Arc<DbConn>) -> Self {
25
+
PreferenceReader { did, db }
26
26
}
27
27
28
-
/// Get preferences for a namespace.
29
28
pub async fn get_preferences(
30
29
&self,
31
30
namespace: Option<String>,
···
64
63
.await
65
64
}
66
65
67
-
/// Put preferences for a namespace.
68
66
#[tracing::instrument(skip_all)]
69
67
pub async fn put_preferences(
70
68
&self,
···
97
95
use rsky_pds::schema::pds::account_pref::dsl as AccountPrefSchema;
98
96
let all_prefs = AccountPrefSchema::account_pref
99
97
.filter(AccountPrefSchema::did.eq(&did))
100
-
.select(AccountPref::as_select())
98
+
.select(models::AccountPref::as_select())
101
99
.load(conn)?;
102
100
let put_prefs = values
103
101
.into_iter()
-76
src/actor_store/prepared_write.rs
-76
src/actor_store/prepared_write.rs
···
1
-
use std::str::FromStr;
2
-
3
-
use cidv10::Cid as CidV10;
4
-
use rsky_repo::types::{
5
-
CommitAction, PreparedBlobRef, PreparedCreateOrUpdate, PreparedDelete, WriteOpAction,
6
-
};
7
-
use serde::{Deserialize, Serialize};
8
-
9
-
#[derive(Debug, Clone, PartialEq, Deserialize, Serialize)]
10
-
pub enum PreparedWrite {
11
-
Create(PreparedCreateOrUpdate),
12
-
Update(PreparedCreateOrUpdate),
13
-
Delete(PreparedDelete),
14
-
}
15
-
16
-
impl PreparedWrite {
17
-
pub fn uri(&self) -> &String {
18
-
match self {
19
-
PreparedWrite::Create(w) => &w.uri,
20
-
PreparedWrite::Update(w) => &w.uri,
21
-
PreparedWrite::Delete(w) => &w.uri,
22
-
}
23
-
}
24
-
25
-
pub fn cid(&self) -> Option<CidV10> {
26
-
match self {
27
-
PreparedWrite::Create(w) => Some(CidV10::from_str(w.cid.to_string().as_str()).unwrap()),
28
-
PreparedWrite::Update(w) => Some(CidV10::from_str(w.cid.to_string().as_str()).unwrap()),
29
-
PreparedWrite::Delete(_) => None,
30
-
}
31
-
}
32
-
33
-
pub fn swap_cid(&self) -> Option<CidV10> {
34
-
match self {
35
-
PreparedWrite::Create(w) => w
36
-
.swap_cid
37
-
.as_ref()
38
-
.map(|cid| CidV10::from_str(cid.to_string().as_str()).unwrap()),
39
-
PreparedWrite::Update(w) => w
40
-
.swap_cid
41
-
.as_ref()
42
-
.map(|cid| CidV10::from_str(cid.to_string().as_str()).unwrap()),
43
-
PreparedWrite::Delete(w) => w
44
-
.swap_cid
45
-
.as_ref()
46
-
.map(|cid| CidV10::from_str(cid.to_string().as_str()).unwrap()),
47
-
}
48
-
}
49
-
50
-
pub fn action(&self) -> &WriteOpAction {
51
-
match self {
52
-
PreparedWrite::Create(w) => &w.action,
53
-
PreparedWrite::Update(w) => &w.action,
54
-
PreparedWrite::Delete(w) => &w.action,
55
-
}
56
-
}
57
-
58
-
/// TEQ: Add blobs() impl
59
-
pub fn blobs(&self) -> Option<&Vec<PreparedBlobRef>> {
60
-
match self {
61
-
PreparedWrite::Create(w) => Some(&w.blobs),
62
-
PreparedWrite::Update(w) => Some(&w.blobs),
63
-
PreparedWrite::Delete(_) => None,
64
-
}
65
-
}
66
-
}
67
-
68
-
impl From<&PreparedWrite> for CommitAction {
69
-
fn from(value: &PreparedWrite) -> Self {
70
-
match value {
71
-
&PreparedWrite::Create(_) => CommitAction::Create,
72
-
&PreparedWrite::Update(_) => CommitAction::Update,
73
-
&PreparedWrite::Delete(_) => CommitAction::Delete,
74
-
}
75
-
}
76
-
}
+337
-698
src/actor_store/record.rs
+337
-698
src/actor_store/record.rs
···
1
1
//! Record storage and retrieval for the actor store.
2
+
//! Based on https://github.com/blacksky-algorithms/rsky/blob/main/rsky-pds/src/actor_store/record/mod.rs
3
+
//! blacksky-algorithms/rsky is licensed under the Apache License 2.0
4
+
//!
5
+
//! Modified for SQLite backend
2
6
3
-
use anyhow::{Context as _, Result, bail};
4
-
use atrium_api::com::atproto::admin::defs::StatusAttr;
5
-
use atrium_repo::Cid;
6
-
use diesel::associations::HasTable;
7
-
use diesel::prelude::*;
7
+
use anyhow::{Error, Result, bail};
8
+
use cidv10::Cid;
9
+
use diesel::*;
10
+
use futures::stream::{self, StreamExt};
11
+
use rsky_lexicon::com::atproto::admin::StatusAttr;
12
+
use rsky_pds::actor_store::record::{GetRecord, RecordsForCollection, get_backlinks};
8
13
use rsky_pds::models::{Backlink, Record};
9
-
use rsky_pds::schema::pds::repo_block::dsl::repo_block;
10
-
use rsky_pds::schema::pds::{backlink, record};
11
-
use rsky_repo::types::WriteOpAction;
14
+
use rsky_repo::types::{RepoRecord, WriteOpAction};
15
+
use rsky_repo::util::cbor_to_lex_record;
12
16
use rsky_syntax::aturi::AtUri;
17
+
use std::env;
13
18
use std::str::FromStr;
14
19
15
-
use crate::actor_store::blob::BlobStorePlaceholder;
16
20
use crate::actor_store::db::ActorDb;
17
21
18
22
/// Combined handler for record operations with both read and write capabilities.
19
-
pub(crate) struct RecordHandler {
23
+
pub(crate) struct RecordReader {
20
24
/// Database connection.
21
25
pub db: ActorDb,
22
26
/// DID of the actor.
23
27
pub did: String,
24
-
/// Blob store for handling blobs.
25
-
pub blobstore: Option<BlobStorePlaceholder>,
26
28
}
27
29
28
-
/// Record descriptor containing URI, path, and CID.
29
-
pub(crate) struct RecordDescript {
30
-
/// Record URI.
31
-
pub uri: String,
32
-
/// Record path.
33
-
pub path: String,
34
-
/// Record CID.
35
-
pub cid: Cid,
36
-
}
37
-
38
-
/// Record data with values.
39
-
#[derive(Debug, Clone)]
40
-
pub(crate) struct RecordData {
41
-
/// Record URI.
42
-
pub uri: String,
43
-
/// Record CID.
44
-
pub cid: String,
45
-
/// Record value as JSON.
46
-
pub value: serde_json::Value,
47
-
/// When the record was indexed.
48
-
pub indexedAt: String,
49
-
/// Reference for takedown, if any.
50
-
pub takedownRef: Option<String>,
51
-
}
52
-
53
-
/// Options for listing records in a collection.
54
-
#[derive(Debug, Clone)]
55
-
pub(crate) struct ListRecordsOptions {
56
-
/// Collection to list records from.
57
-
pub collection: String,
58
-
/// Maximum number of records to return.
59
-
pub limit: i64,
60
-
/// Whether to reverse the sort order.
61
-
pub reverse: bool,
62
-
/// Cursor for pagination.
63
-
pub cursor: Option<String>,
64
-
/// Start key (deprecated).
65
-
pub rkey_start: Option<String>,
66
-
/// End key (deprecated).
67
-
pub rkey_end: Option<String>,
68
-
/// Whether to include soft-deleted records.
69
-
pub include_soft_deleted: bool,
70
-
}
71
-
72
-
impl RecordHandler {
30
+
impl RecordReader {
73
31
/// Create a new record handler.
74
-
pub(crate) fn new(db: ActorDb, did: String) -> Self {
75
-
Self {
76
-
db,
77
-
did,
78
-
blobstore: None,
79
-
}
80
-
}
81
-
82
-
/// Create a new record handler with blobstore support.
83
-
pub(crate) fn new_with_blobstore(
84
-
db: ActorDb,
85
-
blobstore: BlobStorePlaceholder,
86
-
did: String,
87
-
) -> Self {
88
-
Self {
89
-
db,
90
-
did,
91
-
blobstore: Some(blobstore),
92
-
}
32
+
pub(crate) fn new(did: String, db: ActorDb) -> Self {
33
+
Self { did, db }
93
34
}
94
35
95
36
/// Count the total number of records.
96
-
pub(crate) async fn record_count(&self) -> Result<i64> {
97
-
let did = self.did.clone();
37
+
pub(crate) async fn record_count(&mut self) -> Result<i64> {
38
+
use rsky_pds::schema::pds::record::dsl::*;
98
39
40
+
let other_did = self.did.clone();
99
41
self.db
100
42
.run(move |conn| {
101
-
use rsky_pds::schema::pds::record::dsl::*;
102
-
103
-
record.filter(did.eq(&did)).count().get_result(conn)
43
+
let res: i64 = record.filter(did.eq(&other_did)).count().get_result(conn)?;
44
+
Ok(res)
104
45
})
105
46
.await
106
47
}
107
48
108
-
/// List all records.
109
-
pub(crate) async fn list_all(&self) -> Result<Vec<RecordDescript>> {
110
-
let did = self.did.clone();
111
-
let mut records = Vec::new();
112
-
let mut current_cursor = Some("".to_string());
113
-
114
-
while let Some(cursor) = current_cursor.take() {
115
-
let cursor_clone = cursor.clone();
116
-
let did_clone = did.clone();
117
-
118
-
let rows = self
119
-
.db
120
-
.run(move |conn| {
121
-
use rsky_pds::schema::pds::record::dsl::*;
122
-
123
-
record
124
-
.filter(did.eq(&did_clone))
125
-
.filter(uri.gt(&cursor_clone))
126
-
.order(uri.asc())
127
-
.limit(1000)
128
-
.select((uri, cid))
129
-
.load::<(String, String)>(conn)
130
-
})
131
-
.await?;
132
-
133
-
for (uri_str, cid_str) in &rows {
134
-
let uri = uri_str.clone();
135
-
let parts: Vec<&str> = uri.rsplitn(2, '/').collect();
136
-
let path = if parts.len() == 2 {
137
-
format!("{}/{}", parts[1], parts[0])
138
-
} else {
139
-
uri.clone()
140
-
};
141
-
142
-
match Cid::from_str(&cid_str) {
143
-
Ok(cid) => records.push(RecordDescript { uri, path, cid }),
144
-
Err(e) => tracing::warn!("Invalid CID in database: {}", e),
145
-
}
146
-
}
147
-
148
-
if let Some(last) = rows.last() {
149
-
current_cursor = Some(last.0.clone());
150
-
} else {
151
-
break;
152
-
}
153
-
}
154
-
155
-
Ok(records)
156
-
}
157
-
158
49
/// List all collections in the repository.
159
50
pub(crate) async fn list_collections(&self) -> Result<Vec<String>> {
160
-
let did = self.did.clone();
51
+
use rsky_pds::schema::pds::record::dsl::*;
161
52
53
+
let other_did = self.did.clone();
162
54
self.db
163
55
.run(move |conn| {
164
-
use rsky_pds::schema::pds::record::dsl::*;
165
-
166
-
record
167
-
.filter(did.eq(&did))
56
+
let collections = record
57
+
.filter(did.eq(&other_did))
58
+
.select(collection)
168
59
.group_by(collection)
169
-
.select(collection)
170
-
.load::<String>(conn)
60
+
.load::<String>(conn)?
61
+
.into_iter()
62
+
.collect::<Vec<String>>();
63
+
Ok(collections)
171
64
})
172
65
.await
173
66
}
174
67
175
68
/// List records for a specific collection.
176
69
pub(crate) async fn list_records_for_collection(
177
-
&self,
178
-
opts: ListRecordsOptions,
179
-
) -> Result<Vec<RecordData>> {
180
-
let did = self.did.clone();
70
+
&mut self,
71
+
collection: String,
72
+
limit: i64,
73
+
reverse: bool,
74
+
cursor: Option<String>,
75
+
rkey_start: Option<String>,
76
+
rkey_end: Option<String>,
77
+
include_soft_deleted: Option<bool>,
78
+
) -> Result<Vec<RecordsForCollection>> {
79
+
use rsky_pds::schema::pds::record::dsl as RecordSchema;
80
+
use rsky_pds::schema::pds::repo_block::dsl as RepoBlockSchema;
181
81
182
-
self.db
183
-
.run(move |conn| {
184
-
// Start building the query
185
-
let mut query = record::table
186
-
.inner_join(repo_block::table.on(repo_block::cid.eq(record::cid)))
187
-
.filter(record::did.eq(&did))
188
-
.filter(record::collection.eq(&opts.collection))
189
-
.into_boxed();
190
-
191
-
// Handle soft-deleted records
192
-
if !opts.include_soft_deleted {
193
-
query = query.filter(record::takedownRef.is_null());
194
-
}
195
-
196
-
// Handle cursor-based pagination first
197
-
if let Some(cursor) = &opts.cursor {
198
-
if opts.reverse {
199
-
query = query.filter(record::rkey.gt(cursor));
200
-
} else {
201
-
query = query.filter(record::rkey.lt(cursor));
202
-
}
203
-
} else {
204
-
// Fall back to deprecated rkey-based pagination
205
-
if let Some(start) = &opts.rkey_start {
206
-
query = query.filter(record::rkey.gt(start));
207
-
}
208
-
if let Some(end) = &opts.rkey_end {
209
-
query = query.filter(record::rkey.lt(end));
210
-
}
211
-
}
212
-
213
-
// Add order and limit
214
-
if opts.reverse {
215
-
query = query.order(record::rkey.asc());
216
-
} else {
217
-
query = query.order(record::rkey.desc());
218
-
}
219
-
220
-
query = query.limit(opts.limit);
221
-
222
-
// Execute the query
223
-
let results = query
224
-
.select((
225
-
record::uri,
226
-
record::cid,
227
-
record::indexedAt,
228
-
record::takedownRef,
229
-
repo_block::content,
230
-
))
231
-
.load::<(String, String, String, Option<String>, Vec<u8>)>(conn)?;
232
-
233
-
// Convert results to RecordData
234
-
let records = results
235
-
.into_iter()
236
-
.map(|(uri, cid, indexedAt, takedownRef, content)| {
237
-
let value = serde_json::from_slice(&content)
238
-
.with_context(|| format!("Failed to decode record {}", cid))?;
239
-
240
-
Ok(RecordData {
241
-
uri,
242
-
cid,
243
-
value,
244
-
indexedAt,
245
-
takedownRef,
246
-
})
247
-
})
248
-
.collect::<Result<Vec<_>>>()?;
82
+
let include_soft_deleted: bool = if let Some(include_soft_deleted) = include_soft_deleted {
83
+
include_soft_deleted
84
+
} else {
85
+
false
86
+
};
87
+
let mut builder = RecordSchema::record
88
+
.inner_join(RepoBlockSchema::repo_block.on(RepoBlockSchema::cid.eq(RecordSchema::cid)))
89
+
.limit(limit)
90
+
.select((
91
+
rsky_pds::models::Record::as_select(),
92
+
rsky_pds::models::RepoBlock::as_select(),
93
+
))
94
+
.filter(RecordSchema::did.eq(self.did.clone()))
95
+
.filter(RecordSchema::collection.eq(collection))
96
+
.into_boxed();
97
+
if !include_soft_deleted {
98
+
builder = builder.filter(RecordSchema::takedownRef.is_null());
99
+
}
100
+
if reverse {
101
+
builder = builder.order(RecordSchema::rkey.asc());
102
+
} else {
103
+
builder = builder.order(RecordSchema::rkey.desc());
104
+
}
249
105
250
-
Ok(records)
106
+
if let Some(cursor) = cursor {
107
+
if reverse {
108
+
builder = builder.filter(RecordSchema::rkey.gt(cursor));
109
+
} else {
110
+
builder = builder.filter(RecordSchema::rkey.lt(cursor));
111
+
}
112
+
} else {
113
+
if let Some(rkey_start) = rkey_start {
114
+
builder = builder.filter(RecordSchema::rkey.gt(rkey_start));
115
+
}
116
+
if let Some(rkey_end) = rkey_end {
117
+
builder = builder.filter(RecordSchema::rkey.lt(rkey_end));
118
+
}
119
+
}
120
+
let res: Vec<(rsky_pds::models::Record, rsky_pds::models::RepoBlock)> =
121
+
self.db.run(move |conn| builder.load(conn)).await?;
122
+
res.into_iter()
123
+
.map(|row| {
124
+
Ok(RecordsForCollection {
125
+
uri: row.0.uri,
126
+
cid: row.0.cid,
127
+
value: cbor_to_lex_record(row.1.content)?,
128
+
})
251
129
})
252
-
.await
130
+
.collect::<Result<Vec<RecordsForCollection>>>()
253
131
}
254
132
255
133
/// Get a specific record by URI.
256
134
pub(crate) async fn get_record(
257
-
&self,
135
+
&mut self,
258
136
uri: &AtUri,
259
-
cid: Option<&str>,
260
-
include_soft_deleted: bool,
261
-
) -> Result<Option<RecordData>> {
262
-
let did = self.did.clone();
263
-
let uri_str = uri.to_string();
264
-
let cid_opt = cid.map(|c| c.to_string());
265
-
266
-
self.db
267
-
.run(move |conn| {
268
-
let mut query = record::table
269
-
.inner_join(repo_block::table.on(repo_block::cid.eq(record::cid)))
270
-
.filter(record::did.eq(&did))
271
-
.filter(record::uri.eq(&uri_str))
272
-
.into_boxed();
273
-
274
-
if !include_soft_deleted {
275
-
query = query.filter(record::takedownRef.is_null());
276
-
}
277
-
278
-
if let Some(cid_val) = cid_opt {
279
-
query = query.filter(record::cid.eq(cid_val));
280
-
}
137
+
cid: Option<String>,
138
+
include_soft_deleted: Option<bool>,
139
+
) -> Result<Option<GetRecord>> {
140
+
use rsky_pds::schema::pds::record::dsl as RecordSchema;
141
+
use rsky_pds::schema::pds::repo_block::dsl as RepoBlockSchema;
281
142
282
-
let result = query
283
-
.select((
284
-
record::uri,
285
-
record::cid,
286
-
record::indexedAt,
287
-
record::takedownRef,
288
-
repo_block::content,
289
-
))
290
-
.first::<(String, String, String, Option<String>, Vec<u8>)>(conn)
291
-
.optional()?;
292
-
293
-
if let Some((uri, cid, indexedAt, takedownRef, content)) = result {
294
-
let value = serde_json::from_slice(&content)
295
-
.with_context(|| format!("Failed to decode record {}", cid))?;
296
-
297
-
Ok(Some(RecordData {
298
-
uri,
299
-
cid,
300
-
value,
301
-
indexedAt,
302
-
takedownRef,
303
-
}))
304
-
} else {
305
-
Ok(None)
306
-
}
307
-
})
308
-
.await
143
+
let include_soft_deleted: bool = if let Some(include_soft_deleted) = include_soft_deleted {
144
+
include_soft_deleted
145
+
} else {
146
+
false
147
+
};
148
+
let mut builder = RecordSchema::record
149
+
.inner_join(RepoBlockSchema::repo_block.on(RepoBlockSchema::cid.eq(RecordSchema::cid)))
150
+
.select((
151
+
rsky_pds::models::Record::as_select(),
152
+
rsky_pds::models::RepoBlock::as_select(),
153
+
))
154
+
.filter(RecordSchema::uri.eq(uri.to_string()))
155
+
.into_boxed();
156
+
if !include_soft_deleted {
157
+
builder = builder.filter(RecordSchema::takedownRef.is_null());
158
+
}
159
+
if let Some(cid) = cid {
160
+
builder = builder.filter(RecordSchema::cid.eq(cid));
161
+
}
162
+
let record: Option<(rsky_pds::models::Record, rsky_pds::models::RepoBlock)> = self
163
+
.db
164
+
.run(move |conn| builder.first(conn).optional())
165
+
.await?;
166
+
if let Some(record) = record {
167
+
Ok(Some(GetRecord {
168
+
uri: record.0.uri,
169
+
cid: record.0.cid,
170
+
value: cbor_to_lex_record(record.1.content)?,
171
+
indexed_at: record.0.indexed_at,
172
+
takedown_ref: record.0.takedown_ref,
173
+
}))
174
+
} else {
175
+
Ok(None)
176
+
}
309
177
}
310
178
311
179
/// Check if a record exists.
312
180
pub(crate) async fn has_record(
313
-
&self,
314
-
uri: &str,
315
-
cid: Option<&str>,
316
-
include_soft_deleted: bool,
181
+
&mut self,
182
+
uri: String,
183
+
cid: Option<String>,
184
+
include_soft_deleted: Option<bool>,
317
185
) -> Result<bool> {
318
-
let did = self.did.clone();
319
-
let uri_str = uri.to_string();
320
-
let cid_opt = cid.map(|c| c.to_string());
186
+
use rsky_pds::schema::pds::record::dsl as RecordSchema;
321
187
322
-
self.db
323
-
.run(move |conn| {
324
-
let mut query = record::table
325
-
.filter(record::did.eq(&did))
326
-
.filter(record::uri.eq(&uri_str))
327
-
.into_boxed();
328
-
329
-
if !include_soft_deleted {
330
-
query = query.filter(record::takedownRef.is_null());
331
-
}
332
-
333
-
if let Some(cid_val) = cid_opt {
334
-
query = query.filter(record::cid.eq(cid_val));
335
-
}
336
-
337
-
let exists = query
338
-
.select(record::uri)
339
-
.first::<String>(conn)
340
-
.optional()?
341
-
.is_some();
342
-
343
-
Ok(exists)
344
-
})
345
-
.await
188
+
let include_soft_deleted: bool = if let Some(include_soft_deleted) = include_soft_deleted {
189
+
include_soft_deleted
190
+
} else {
191
+
false
192
+
};
193
+
let mut builder = RecordSchema::record
194
+
.select(RecordSchema::uri)
195
+
.filter(RecordSchema::uri.eq(uri))
196
+
.into_boxed();
197
+
if !include_soft_deleted {
198
+
builder = builder.filter(RecordSchema::takedownRef.is_null());
199
+
}
200
+
if let Some(cid) = cid {
201
+
builder = builder.filter(RecordSchema::cid.eq(cid));
202
+
}
203
+
let record_uri = self
204
+
.db
205
+
.run(move |conn| builder.first::<String>(conn).optional())
206
+
.await?;
207
+
Ok(!!record_uri.is_some())
346
208
}
347
209
348
210
/// Get the takedown status of a record.
349
-
pub(crate) async fn get_record_takedown_status(&self, uri: &str) -> Result<Option<StatusAttr>> {
350
-
let did = self.did.clone();
351
-
let uri_str = uri.to_string();
211
+
pub(crate) async fn get_record_takedown_status(
212
+
&self,
213
+
uri: String,
214
+
) -> Result<Option<StatusAttr>> {
215
+
use rsky_pds::schema::pds::record::dsl as RecordSchema;
352
216
353
-
self.db
217
+
let res = self
218
+
.db
354
219
.run(move |conn| {
355
-
let result = record::table
356
-
.filter(record::did.eq(&did))
357
-
.filter(record::uri.eq(&uri_str))
358
-
.select(record::takedownRef)
220
+
RecordSchema::record
221
+
.select(RecordSchema::takedownRef)
222
+
.filter(RecordSchema::uri.eq(uri))
359
223
.first::<Option<String>>(conn)
360
-
.optional()?;
361
-
362
-
match result {
363
-
Some(takedown) => match takedown {
364
-
Some(takedownRef) => Ok(Some(StatusAttr {
365
-
applied: true,
366
-
r#ref: Some(takedownRef),
367
-
})),
368
-
None => Ok(Some(StatusAttr {
369
-
applied: false,
370
-
r#ref: None,
371
-
})),
372
-
},
373
-
None => Ok(None),
374
-
}
224
+
.optional()
375
225
})
376
-
.await
226
+
.await?;
227
+
if let Some(res) = res {
228
+
if let Some(takedown_ref) = res {
229
+
Ok(Some(StatusAttr {
230
+
applied: true,
231
+
r#ref: Some(takedown_ref),
232
+
}))
233
+
} else {
234
+
Ok(Some(StatusAttr {
235
+
applied: false,
236
+
r#ref: None,
237
+
}))
238
+
}
239
+
} else {
240
+
Ok(None)
241
+
}
377
242
}
378
243
379
244
/// Get the current CID for a record URI.
380
-
pub(crate) async fn get_current_record_cid(&self, uri: &str) -> Result<Option<Cid>> {
381
-
let did = self.did.clone();
382
-
let uri_str = uri.to_string();
245
+
pub(crate) async fn get_current_record_cid(&self, uri: String) -> Result<Option<Cid>> {
246
+
use rsky_pds::schema::pds::record::dsl as RecordSchema;
383
247
384
-
self.db
248
+
let res = self
249
+
.db
385
250
.run(move |conn| {
386
-
let result = record::table
387
-
.filter(record::did.eq(&did))
388
-
.filter(record::uri.eq(&uri_str))
389
-
.select(record::cid)
251
+
RecordSchema::record
252
+
.select(RecordSchema::cid)
253
+
.filter(RecordSchema::uri.eq(uri))
390
254
.first::<String>(conn)
391
-
.optional()?;
392
-
393
-
match result {
394
-
Some(cid_str) => {
395
-
let cid = Cid::from_str(&cid_str)?;
396
-
Ok(Some(cid))
397
-
}
398
-
None => Ok(None),
399
-
}
255
+
.optional()
400
256
})
401
-
.await
257
+
.await?;
258
+
if let Some(res) = res {
259
+
Ok(Some(Cid::from_str(&res)?))
260
+
} else {
261
+
Ok(None)
262
+
}
402
263
}
403
264
404
265
/// Get backlinks for a record.
405
266
pub(crate) async fn get_record_backlinks(
406
267
&self,
407
-
collection: &str,
408
-
path: &str,
409
-
linkTo: &str,
268
+
collection: String,
269
+
path: String,
270
+
link_to: String,
410
271
) -> Result<Vec<Record>> {
411
-
let did = self.did.clone();
412
-
let collection_str = collection.to_string();
413
-
let path_str = path.to_string();
414
-
let linkTo_str = linkTo.to_string();
272
+
use rsky_pds::schema::pds::backlink::dsl as BacklinkSchema;
273
+
use rsky_pds::schema::pds::record::dsl as RecordSchema;
415
274
416
-
self.db
275
+
let res = self
276
+
.db
417
277
.run(move |conn| {
418
-
backlink::table
419
-
.inner_join(record::table.on(backlink::uri.eq(record::uri)))
420
-
.filter(backlink::path.eq(&path_str))
421
-
.filter(backlink::linkTo.eq(&linkTo_str))
422
-
.filter(record::collection.eq(&collection_str))
423
-
.filter(record::did.eq(&did))
278
+
RecordSchema::record
279
+
.inner_join(
280
+
BacklinkSchema::backlink.on(BacklinkSchema::uri.eq(RecordSchema::uri)),
281
+
)
424
282
.select(Record::as_select())
283
+
.filter(BacklinkSchema::path.eq(path))
284
+
.filter(BacklinkSchema::linkTo.eq(link_to))
285
+
.filter(RecordSchema::collection.eq(collection))
425
286
.load::<Record>(conn)
426
287
})
427
-
.await
288
+
.await?;
289
+
Ok(res)
428
290
}
429
291
430
292
/// Get backlink conflicts for a record.
431
293
pub(crate) async fn get_backlink_conflicts(
432
294
&self,
433
295
uri: &AtUri,
434
-
record: &serde_json::Value,
435
-
) -> Result<Vec<String>> {
436
-
let backlinks = get_backlinks(uri, record)?;
437
-
if backlinks.is_empty() {
438
-
return Ok(Vec::new());
439
-
}
440
-
441
-
let did = self.did.clone();
442
-
let uri_collection = uri.get_collection().to_string();
443
-
let mut conflicts = Vec::new();
444
-
445
-
for backlink in backlinks {
446
-
let path_str = backlink.path.clone();
447
-
let linkTo_str = backlink.linkTo.clone();
448
-
449
-
let results = self
450
-
.db
451
-
.run(move |conn| {
452
-
backlink::table
453
-
.inner_join(record::table.on(backlink::uri.eq(record::uri)))
454
-
.filter(backlink::path.eq(&path_str))
455
-
.filter(backlink::linkTo.eq(&linkTo_str))
456
-
.filter(record::collection.eq(&uri_collection))
457
-
.filter(record::did.eq(&did))
458
-
.select(record::uri)
459
-
.load::<String>(conn)
460
-
})
461
-
.await?;
462
-
463
-
conflicts.extend(results);
464
-
}
465
-
466
-
Ok(conflicts)
467
-
}
468
-
469
-
/// List existing blocks in the repository.
470
-
pub(crate) async fn list_existing_blocks(&self) -> Result<Vec<Cid>> {
471
-
let did = self.did.clone();
472
-
let mut blocks = Vec::new();
473
-
let mut current_cursor = Some("".to_string());
474
-
475
-
while let Some(cursor) = current_cursor.take() {
476
-
let cursor_clone = cursor.clone();
477
-
let did_clone = did.clone();
478
-
479
-
let rows = self
480
-
.db
481
-
.run(move |conn| {
482
-
use rsky_pds::schema::pds::repo_block::dsl::*;
483
-
484
-
repo_block
485
-
.filter(did.eq(&did_clone))
486
-
.filter(cid.gt(&cursor_clone))
487
-
.order(cid.asc())
488
-
.limit(1000)
489
-
.select(cid)
490
-
.load::<String>(conn)
491
-
})
492
-
.await?;
493
-
494
-
for cid_str in &rows {
495
-
match Cid::from_str(cid_str) {
496
-
Ok(cid) => blocks.push(cid),
497
-
Err(e) => tracing::warn!("Invalid CID in database: {}", e),
498
-
}
499
-
}
500
-
501
-
if let Some(last) = rows.last() {
502
-
current_cursor = Some(last.clone());
503
-
} else {
504
-
break;
505
-
}
506
-
}
507
-
508
-
Ok(blocks)
509
-
}
510
-
511
-
/// Get the profile record for this repository
512
-
pub(crate) async fn get_profile_record(&self) -> Result<Option<serde_json::Value>> {
513
-
let did = self.did.clone();
514
-
515
-
self.db
516
-
.run(move |conn| {
517
-
let result = record::table
518
-
.inner_join(repo_block::table.on(repo_block::cid.eq(record::cid)))
519
-
.filter(record::did.eq(&did))
520
-
.filter(record::collection.eq("app.bsky.actor.profile"))
521
-
.filter(record::rkey.eq("self"))
522
-
.select(repo_block::content)
523
-
.first::<Vec<u8>>(conn)
524
-
.optional()?;
525
-
526
-
if let Some(content) = result {
527
-
let value = serde_json::from_slice(&content)
528
-
.context("Failed to decode profile record")?;
529
-
Ok(Some(value))
530
-
} else {
531
-
Ok(None)
532
-
}
296
+
record: &RepoRecord,
297
+
) -> Result<Vec<AtUri>> {
298
+
let record_backlinks = get_backlinks(uri, record)?;
299
+
let conflicts: Vec<Vec<Record>> = stream::iter(record_backlinks)
300
+
.then(|backlink| async move {
301
+
Ok::<Vec<Record>, anyhow::Error>(
302
+
self.get_record_backlinks(
303
+
uri.get_collection(),
304
+
backlink.path,
305
+
backlink.link_to,
306
+
)
307
+
.await?,
308
+
)
533
309
})
310
+
.collect::<Vec<_>>()
534
311
.await
535
-
}
536
-
537
-
/// Get records created or updated since a specific revision
538
-
pub(crate) async fn get_records_since_rev(&self, rev: &str) -> Result<Vec<RecordData>> {
539
-
let did = self.did.clone();
540
-
let rev_str = rev.to_string();
541
-
542
-
// First check if the revision exists
543
-
let exists = self
544
-
.db
545
-
.run({
546
-
let did_clone = did.clone();
547
-
let rev_clone = rev_str.clone();
548
-
549
-
move |conn| {
550
-
record::table
551
-
.filter(record::did.eq(&did_clone))
552
-
.filter(record::repoRev.le(&rev_clone))
553
-
.count()
554
-
.get_result::<i64>(conn)
555
-
.map(|count| count > 0)
556
-
}
312
+
.into_iter()
313
+
.collect::<Result<Vec<_>, _>>()?;
314
+
Ok(conflicts
315
+
.into_iter()
316
+
.flatten()
317
+
.filter_map(|record| {
318
+
AtUri::make(
319
+
env::var("BLUEPDS_HOST_NAME").unwrap_or("localhost".to_owned()),
320
+
Some(String::from(uri.get_collection())),
321
+
Some(record.rkey),
322
+
)
323
+
.ok()
557
324
})
558
-
.await?;
559
-
560
-
if !exists {
561
-
// No records before this revision - possible account migration case
562
-
return Ok(Vec::new());
563
-
}
564
-
565
-
// Get records since the revision
566
-
self.db
567
-
.run(move |conn| {
568
-
let results = record::table
569
-
.inner_join(repo_block::table.on(repo_block::cid.eq(record::cid)))
570
-
.filter(record::did.eq(&did))
571
-
.filter(record::repoRev.gt(&rev_str))
572
-
.order(record::repoRev.asc())
573
-
.limit(10)
574
-
.select((
575
-
record::uri,
576
-
record::cid,
577
-
record::indexedAt,
578
-
repo_block::content,
579
-
))
580
-
.load::<(String, String, String, Vec<u8>)>(conn)?;
581
-
582
-
let records = results
583
-
.into_iter()
584
-
.map(|(uri, cid, indexedAt, content)| {
585
-
let value = serde_json::from_slice(&content)
586
-
.with_context(|| format!("Failed to decode record {}", cid))?;
587
-
588
-
Ok(RecordData {
589
-
uri,
590
-
cid,
591
-
value,
592
-
indexedAt,
593
-
takedownRef: None, // Not included in the query
594
-
})
595
-
})
596
-
.collect::<Result<Vec<_>>>()?;
597
-
598
-
Ok(records)
599
-
})
600
-
.await
325
+
.collect::<Vec<AtUri>>())
601
326
}
602
327
603
328
// Transactor methods
604
329
// -----------------
605
330
606
331
/// Index a record in the database.
332
+
#[tracing::instrument(skip_all)]
607
333
pub(crate) async fn index_record(
608
334
&self,
609
335
uri: AtUri,
610
336
cid: Cid,
611
-
record: Option<&serde_json::Value>,
612
-
action: WriteOpAction,
613
-
repoRev: &str,
337
+
record: Option<RepoRecord>,
338
+
action: Option<WriteOpAction>, // Create or update with a default of create
339
+
repo_rev: String,
614
340
timestamp: Option<String>,
615
341
) -> Result<()> {
616
-
let uri_str = uri.to_string();
617
-
tracing::debug!("Indexing record {}", uri_str);
342
+
tracing::debug!("@LOG DEBUG RecordReader::index_record, indexing record {uri}");
618
343
619
-
if !uri_str.starts_with("at://did:") {
620
-
return Err(anyhow::anyhow!("Expected indexed URI to contain DID"));
621
-
}
344
+
let collection = uri.get_collection();
345
+
let rkey = uri.get_rkey();
346
+
let hostname = uri.get_hostname().to_string();
347
+
let action = action.unwrap_or(WriteOpAction::Create);
348
+
let indexed_at = timestamp.unwrap_or_else(|| rsky_common::now());
349
+
let row = Record {
350
+
did: self.did.clone(),
351
+
uri: uri.to_string(),
352
+
cid: cid.to_string(),
353
+
collection: collection.clone(),
354
+
rkey: rkey.to_string(),
355
+
repo_rev: Some(repo_rev.clone()),
356
+
indexed_at: indexed_at.clone(),
357
+
takedown_ref: None,
358
+
};
622
359
623
-
let collection = uri.get_collection().to_string();
624
-
let rkey = uri.get_rkey().to_string();
625
-
626
-
if collection.is_empty() {
627
-
return Err(anyhow::anyhow!(
628
-
"Expected indexed URI to contain a collection"
629
-
));
360
+
if !hostname.starts_with("did:") {
361
+
bail!("Expected indexed URI to contain DID")
362
+
} else if collection.is_empty() {
363
+
bail!("Expected indexed URI to contain a collection")
630
364
} else if rkey.is_empty() {
631
-
return Err(anyhow::anyhow!(
632
-
"Expected indexed URI to contain a record key"
633
-
));
365
+
bail!("Expected indexed URI to contain a record key")
634
366
}
635
367
636
-
let cid_str = cid.to_string();
637
-
let now = timestamp.unwrap_or_else(|| chrono::Utc::now().to_rfc3339());
638
-
let did = self.did.clone();
639
-
let repoRev = repoRev.to_string();
368
+
use rsky_pds::schema::pds::record::dsl as RecordSchema;
640
369
641
-
// Create the record for database insertion
642
-
let record_values = (
643
-
record::did.eq(&did),
644
-
record::uri.eq(&uri_str),
645
-
record::cid.eq(&cid_str),
646
-
record::collection.eq(&collection),
647
-
record::rkey.eq(&rkey),
648
-
record::repoRev.eq(&repoRev),
649
-
record::indexedAt.eq(&now),
650
-
);
651
-
652
-
self.db
653
-
.transaction(move |conn| {
654
-
// Track current version of record
655
-
diesel::insert_into(record::table)
656
-
.values(&record_values)
657
-
.on_conflict(record::uri)
370
+
// Track current version of record
371
+
let (record, uri) = self
372
+
.db
373
+
.run(move |conn| {
374
+
insert_into(RecordSchema::record)
375
+
.values(row)
376
+
.on_conflict(RecordSchema::uri)
658
377
.do_update()
659
378
.set((
660
-
record::cid.eq(&cid_str),
661
-
record::repoRev.eq(&repoRev),
662
-
record::indexedAt.eq(&now),
379
+
RecordSchema::cid.eq(cid.to_string()),
380
+
RecordSchema::repoRev.eq(&repo_rev),
381
+
RecordSchema::indexedAt.eq(&indexed_at),
663
382
))
664
-
.execute(conn)
665
-
.context("Failed to insert/update record")?;
383
+
.execute(conn)?;
384
+
Ok::<_, Error>((record, uri))
385
+
})
386
+
.await?;
666
387
667
-
// Maintain backlinks if record is provided
668
-
if let Some(record_value) = record {
669
-
let backlinks = get_backlinks(&uri, record_value)?;
670
-
671
-
if action == WriteOpAction::Update {
672
-
// On update, clear old backlinks first
673
-
diesel::delete(backlink::table)
674
-
.filter(backlink::uri.eq(&uri_str))
675
-
.execute(conn)
676
-
.context("Failed to delete existing backlinks")?;
677
-
}
678
-
679
-
if !backlinks.is_empty() {
680
-
// Insert all backlinks at once
681
-
let backlink_values: Vec<_> = backlinks
682
-
.into_iter()
683
-
.map(|backlink| {
684
-
(
685
-
backlink::uri.eq(&uri_str),
686
-
backlink::path.eq(&backlink.path),
687
-
backlink::linkTo.eq(&backlink.linkTo),
688
-
)
689
-
})
690
-
.collect();
691
-
692
-
diesel::insert_into(backlink::table)
693
-
.values(&backlink_values)
694
-
.on_conflict_do_nothing()
695
-
.execute(conn)
696
-
.context("Failed to insert backlinks")?;
697
-
}
698
-
}
699
-
700
-
tracing::info!("Indexed record {}", uri_str);
701
-
Ok(())
702
-
})
703
-
.await
388
+
if let Some(record) = record {
389
+
// Maintain backlinks
390
+
let backlinks = get_backlinks(&uri, &record)?;
391
+
if let WriteOpAction::Update = action {
392
+
// On update just recreate backlinks from scratch for the record, so we can clear out
393
+
// the old ones. E.g. for weird cases like updating a follow to be for a different did.
394
+
self.remove_backlinks_by_uri(&uri).await?;
395
+
}
396
+
self.add_backlinks(backlinks).await?;
397
+
}
398
+
tracing::debug!("@LOG DEBUG RecordReader::index_record, indexed record {uri}");
399
+
Ok(())
704
400
}
705
401
706
402
/// Delete a record from the database.
403
+
#[tracing::instrument(skip_all)]
707
404
pub(crate) async fn delete_record(&self, uri: &AtUri) -> Result<()> {
708
-
let uri_str = uri.to_string();
709
-
tracing::debug!("Deleting indexed record {}", uri_str);
710
-
405
+
tracing::debug!("@LOG DEBUG RecordReader::delete_record, deleting indexed record {uri}");
406
+
use rsky_pds::schema::pds::backlink::dsl as BacklinkSchema;
407
+
use rsky_pds::schema::pds::record::dsl as RecordSchema;
408
+
let uri = uri.to_string();
711
409
self.db
712
-
.transaction(move |conn| {
713
-
// Delete from record table
714
-
diesel::delete(record::table)
715
-
.filter(record::uri.eq(&uri_str))
716
-
.execute(conn)
717
-
.context("Failed to delete record")?;
718
-
719
-
// Delete from backlink table
720
-
diesel::delete(backlink::table)
721
-
.filter(backlink::uri.eq(&uri_str))
722
-
.execute(conn)
723
-
.context("Failed to delete record backlinks")?;
724
-
725
-
tracing::info!("Deleted indexed record {}", uri_str);
410
+
.run(move |conn| {
411
+
delete(RecordSchema::record)
412
+
.filter(RecordSchema::uri.eq(&uri))
413
+
.execute(conn)?;
414
+
delete(BacklinkSchema::backlink)
415
+
.filter(BacklinkSchema::uri.eq(&uri))
416
+
.execute(conn)?;
417
+
tracing::debug!(
418
+
"@LOG DEBUG RecordReader::delete_record, deleted indexed record {uri}"
419
+
);
726
420
Ok(())
727
421
})
728
422
.await
729
423
}
730
424
731
425
/// Remove backlinks for a URI.
732
-
pub(crate) async fn remove_backlinks_by_uri(&self, uri: &str) -> Result<()> {
733
-
let uri_str = uri.to_string();
734
-
426
+
pub(crate) async fn remove_backlinks_by_uri(&self, uri: &AtUri) -> Result<()> {
427
+
use rsky_pds::schema::pds::backlink::dsl as BacklinkSchema;
428
+
let uri = uri.to_string();
735
429
self.db
736
430
.run(move |conn| {
737
-
diesel::delete(backlink::table)
738
-
.filter(backlink::uri.eq(&uri_str))
739
-
.execute(conn)
740
-
.context("Failed to remove backlinks")?;
741
-
431
+
delete(BacklinkSchema::backlink)
432
+
.filter(BacklinkSchema::uri.eq(uri))
433
+
.execute(conn)?;
742
434
Ok(())
743
435
})
744
436
.await
···
746
438
747
439
/// Add backlinks to the database.
748
440
pub(crate) async fn add_backlinks(&self, backlinks: Vec<Backlink>) -> Result<()> {
749
-
if backlinks.is_empty() {
750
-
return Ok(());
441
+
if backlinks.len() == 0 {
442
+
Ok(())
443
+
} else {
444
+
use rsky_pds::schema::pds::backlink::dsl as BacklinkSchema;
445
+
self.db
446
+
.run(move |conn| {
447
+
insert_into(BacklinkSchema::backlink)
448
+
.values(&backlinks)
449
+
.on_conflict_do_nothing()
450
+
.execute(conn)?;
451
+
Ok(())
452
+
})
453
+
.await
751
454
}
752
-
753
-
self.db
754
-
.run(move |conn| {
755
-
let backlink_values: Vec<_> = backlinks
756
-
.into_iter()
757
-
.map(|backlink| {
758
-
(
759
-
backlink::uri.eq(&backlink.uri),
760
-
backlink::path.eq(&backlink.path),
761
-
backlink::linkTo.eq(&backlink.linkTo),
762
-
)
763
-
})
764
-
.collect();
765
-
766
-
diesel::insert_into(backlink::table)
767
-
.values(&backlink_values)
768
-
.on_conflict_do_nothing()
769
-
.execute(conn)
770
-
.context("Failed to add backlinks")?;
771
-
772
-
Ok(())
773
-
})
774
-
.await
775
455
}
776
456
777
457
/// Update the takedown status of a record.
···
780
460
uri: &AtUri,
781
461
takedown: StatusAttr,
782
462
) -> Result<()> {
783
-
let uri_str = uri.to_string();
784
-
let did = self.did.clone();
785
-
let takedownRef = if takedown.applied {
786
-
takedown
787
-
.r#ref
788
-
.or_else(|| Some(chrono::Utc::now().to_rfc3339()))
789
-
} else {
790
-
None
463
+
use rsky_pds::schema::pds::record::dsl as RecordSchema;
464
+
465
+
let takedown_ref: Option<String> = match takedown.applied {
466
+
true => match takedown.r#ref {
467
+
Some(takedown_ref) => Some(takedown_ref),
468
+
None => Some(rsky_common::now()),
469
+
},
470
+
false => None,
791
471
};
472
+
let uri_string = uri.to_string();
792
473
793
474
self.db
794
475
.run(move |conn| {
795
-
diesel::update(record::table)
796
-
.filter(record::did.eq(&did))
797
-
.filter(record::uri.eq(&uri_str))
798
-
.set(record::takedownRef.eq(takedownRef))
799
-
.execute(conn)
800
-
.context("Failed to update record takedown status")?;
801
-
476
+
update(RecordSchema::record)
477
+
.filter(RecordSchema::uri.eq(uri_string))
478
+
.set(RecordSchema::takedownRef.eq(takedown_ref))
479
+
.execute(conn)?;
802
480
Ok(())
803
481
})
804
482
.await
805
483
}
806
484
}
807
-
808
-
/// Extract backlinks from a record.
809
-
pub(super) fn get_backlinks(uri: &AtUri, record: &serde_json::Value) -> Result<Vec<Backlink>> {
810
-
let mut backlinks = Vec::new();
811
-
812
-
// Check for record type
813
-
if let Some(record_type) = record.get("$type").and_then(|t| t.as_str()) {
814
-
// Handle follow and block records
815
-
if record_type == "app.bsky.graph.follow" || record_type == "app.bsky.graph.block" {
816
-
if let Some(subject) = record.get("subject").and_then(|s| s.as_str()) {
817
-
// Verify it's a valid DID
818
-
if subject.starts_with("did:") {
819
-
backlinks.push(Backlink {
820
-
uri: uri.to_string(),
821
-
path: "subject".to_string(),
822
-
linkTo: subject.to_string(),
823
-
});
824
-
}
825
-
}
826
-
}
827
-
// Handle like and repost records
828
-
else if record_type == "app.bsky.feed.like" || record_type == "app.bsky.feed.repost" {
829
-
if let Some(subject) = record.get("subject") {
830
-
if let Some(subject_uri) = subject.get("uri").and_then(|u| u.as_str()) {
831
-
// Verify it's a valid AT URI
832
-
if subject_uri.starts_with("at://") {
833
-
backlinks.push(Backlink {
834
-
uri: uri.to_string(),
835
-
path: "subject.uri".to_string(),
836
-
linkTo: subject_uri.to_string(),
837
-
});
838
-
}
839
-
}
840
-
}
841
-
}
842
-
}
843
-
844
-
Ok(backlinks)
845
-
}
-467
src/actor_store/repo.rs
-467
src/actor_store/repo.rs
···
1
-
//! Repository operations for actor store.
2
-
3
-
use std::str::FromStr as _;
4
-
use std::sync::Arc;
5
-
6
-
use anyhow::{Context as _, Result};
7
-
use atrium_repo::Cid;
8
-
use cidv10::Cid as CidV10;
9
-
use diesel::prelude::*;
10
-
use rsky_repo::{
11
-
block_map::BlockMap,
12
-
cid_set::CidSet,
13
-
repo::Repo,
14
-
storage::{readable_blockstore::ReadableBlockstore as _, types::RepoStorage},
15
-
types::{
16
-
CommitAction, CommitData, CommitDataWithOps, CommitOp, PreparedBlobRef, PreparedWrite,
17
-
WriteOpAction, write_to_op,
18
-
},
19
-
util::format_data_key,
20
-
};
21
-
use rsky_syntax::aturi::AtUri;
22
-
use tokio::sync::RwLock;
23
-
24
-
use super::{
25
-
ActorDb,
26
-
blob::{BackgroundQueue, BlobHandler, BlobStorePlaceholder},
27
-
record::RecordHandler,
28
-
};
29
-
use crate::SigningKey;
30
-
31
-
use crate::actor_store::sql_repo::SqlRepoStorage;
32
-
33
-
/// Data for sync events.
34
-
pub(crate) struct SyncEventData {
35
-
/// The CID of the repository root.
36
-
pub cid: Cid,
37
-
/// The revision of the repository.
38
-
pub rev: String,
39
-
/// The blocks in the repository.
40
-
pub blocks: BlockMap,
41
-
}
42
-
43
-
/// Unified repository handler for the actor store with both read and write capabilities.
44
-
pub(crate) struct RepoHandler {
45
-
/// Actor DID
46
-
pub did: String,
47
-
/// Backend storage
48
-
pub storage: Arc<RwLock<dyn RepoStorage>>,
49
-
/// BlobReader for handling blob operations
50
-
pub blob: BlobHandler,
51
-
/// RecordHandler for handling record operations
52
-
pub record: RecordHandler,
53
-
/// BlobTransactor for handling blob writes
54
-
pub blob_transactor: BlobHandler,
55
-
/// RecordHandler for handling record writes
56
-
pub record_transactor: RecordHandler,
57
-
/// Signing keypair
58
-
pub signing_key: Option<Arc<SigningKey>>,
59
-
/// Background queue for async operations
60
-
pub background_queue: BackgroundQueue,
61
-
}
62
-
63
-
impl RepoHandler {
64
-
/// Create a new repository handler with read/write capabilities.
65
-
pub(crate) fn new(
66
-
db: ActorDb,
67
-
blobstore: BlobStorePlaceholder,
68
-
did: String,
69
-
signing_key: Arc<SigningKey>,
70
-
background_queue: BackgroundQueue,
71
-
) -> Self {
72
-
// Create readers
73
-
let blob = BlobHandler::new(db.clone(), blobstore.clone());
74
-
let record = RecordHandler::new(db.clone(), did.clone());
75
-
76
-
// Create storage backend with current timestamp
77
-
let now = chrono::Utc::now().to_rfc3339();
78
-
let storage = SqlRepoStorage::new(did.clone(), db.clone(), Some(now));
79
-
80
-
// Create transactors
81
-
let blob_transactor =
82
-
BlobHandler::new(db.clone(), blobstore.clone(), background_queue.clone());
83
-
let record_transactor = RecordHandler::new(db.clone(), blobstore);
84
-
85
-
Self {
86
-
did,
87
-
storage,
88
-
blob,
89
-
record,
90
-
blob_transactor,
91
-
record_transactor,
92
-
signing_key: Some(signing_key),
93
-
background_queue,
94
-
}
95
-
}
96
-
97
-
/// Get event data for synchronization.
98
-
pub(crate) async fn get_sync_event_data(&self) -> Result<SyncEventData> {
99
-
let root = self.storage.get_root_detailed().await?;
100
-
let blocks = self
101
-
.storage
102
-
.get_blocks(vec![CidV10::from_str(&root.cid.to_string()).unwrap()])
103
-
.await?;
104
-
105
-
Ok(SyncEventData {
106
-
cid: root.cid,
107
-
rev: root.rev,
108
-
blocks: blocks.blocks,
109
-
})
110
-
}
111
-
112
-
/// Try to load repository
113
-
pub(crate) async fn maybe_load_repo(&self) -> Result<Option<Repo>> {
114
-
match self.storage.get_root().await {
115
-
Some(cid) => {
116
-
let repo = Repo::load(&self.storage, cid).await?;
117
-
Ok(Some(repo))
118
-
}
119
-
None => Ok(None),
120
-
}
121
-
}
122
-
123
-
/// Create a new repository with prepared writes
124
-
pub(crate) async fn create_repo(
125
-
&self,
126
-
writes: Vec<PreparedWrite>,
127
-
) -> Result<CommitDataWithOps> {
128
-
let signing_key = self
129
-
.signing_key
130
-
.as_ref()
131
-
.ok_or_else(|| anyhow::anyhow!("No signing key available for write operations"))?;
132
-
133
-
// Convert writes to operations
134
-
let ops = writes
135
-
.iter()
136
-
.map(|w| write_to_op(w))
137
-
.collect::<Result<Vec<_>>>()?;
138
-
139
-
// Format the initial commit
140
-
let commit = Repo::format_init_commit(&self.storage, &self.did, signing_key, ops).await?;
141
-
142
-
// Apply the commit, index the writes, and process blobs in parallel
143
-
let results = futures::future::join3(
144
-
self.storage.apply_commit(commit.clone(), Some(true)),
145
-
self.index_writes(&writes, &commit.rev),
146
-
self.blob_transactor
147
-
.process_write_blobs(&commit.rev, writes.clone()),
148
-
)
149
-
.await;
150
-
151
-
// Check for errors
152
-
results.0.context("Failed to apply commit")?;
153
-
results.1.context("Failed to index writes")?;
154
-
results.2.context("Failed to process blobs")?;
155
-
156
-
// Create commit operations
157
-
let ops = writes
158
-
.iter()
159
-
.filter_map(|w| match w {
160
-
PreparedWrite::Create(c) | PreparedWrite::Update(c) => {
161
-
let uri = AtUri::from_str(&c.uri).ok()?;
162
-
Some(CommitOp {
163
-
action: CommitAction::Create,
164
-
path: format_data_key(uri.get_collection(), uri.get_rkey()),
165
-
cid: Some(c.cid),
166
-
prev: None,
167
-
})
168
-
}
169
-
PreparedWrite::Delete(_) => None,
170
-
})
171
-
.collect();
172
-
173
-
Ok(CommitDataWithOps {
174
-
commit_data: commit,
175
-
ops,
176
-
prev_data: None,
177
-
})
178
-
}
179
-
180
-
/// Process writes to the repository
181
-
pub(crate) async fn process_writes(
182
-
&self,
183
-
writes: Vec<PreparedWrite>,
184
-
swap_commit_cid: Option<Cid>,
185
-
) -> Result<CommitDataWithOps> {
186
-
// Check write limit
187
-
if writes.len() > 200 {
188
-
return Err(anyhow::anyhow!("Too many writes. Max: 200"));
189
-
}
190
-
191
-
// Format the commit
192
-
let commit = self.format_commit(writes.clone(), swap_commit_cid).await?;
193
-
194
-
// Check commit size limit (2MB)
195
-
if commit.commit_data.relevant_blocks.byte_size()? > 2_000_000 {
196
-
return Err(anyhow::anyhow!("Too many writes. Max event size: 2MB"));
197
-
}
198
-
199
-
// Apply the commit, index the writes, and process blobs in parallel
200
-
let results = futures::future::join3(
201
-
self.storage.apply_commit(commit.commit_data.clone(), None),
202
-
self.index_writes(&writes, &commit.commit_data.rev),
203
-
self.blob_transactor
204
-
.process_write_blobs(&commit.commit_data.rev, writes),
205
-
)
206
-
.await;
207
-
208
-
// Check for errors
209
-
results.0.context("Failed to apply commit")?;
210
-
results.1.context("Failed to index writes")?;
211
-
results.2.context("Failed to process blobs")?;
212
-
213
-
Ok(commit)
214
-
}
215
-
216
-
/// Format a commit for writes
217
-
pub(crate) async fn format_commit(
218
-
&self,
219
-
writes: Vec<PreparedWrite>,
220
-
swap_commit_cid: Option<Cid>,
221
-
) -> Result<CommitDataWithOps> {
222
-
// Ensure we have a signing key
223
-
let signing_key = self
224
-
.signing_key
225
-
.as_ref()
226
-
.ok_or_else(|| anyhow::anyhow!("No signing key available for write operations"))?;
227
-
228
-
// Get current root
229
-
let curr_root = self
230
-
.storage
231
-
.get_root_detailed()
232
-
.await
233
-
.context("Failed to get repository root")?;
234
-
235
-
// Check commit swap if requested
236
-
if let Some(swap) = swap_commit_cid {
237
-
if curr_root.cid != swap {
238
-
return Err(anyhow::anyhow!(
239
-
"Bad commit swap: current={}, expected={}",
240
-
curr_root.cid,
241
-
swap
242
-
));
243
-
}
244
-
}
245
-
246
-
// Cache the current revision for better performance
247
-
self.storage.cache_rev(&curr_root.rev).await?;
248
-
249
-
// Prepare collections for tracking changes
250
-
let mut new_record_cids = Vec::new();
251
-
let mut del_and_update_uris = Vec::new();
252
-
let mut commit_ops = Vec::new();
253
-
254
-
// Process each write to build operations and gather info
255
-
for write in &writes {
256
-
match write {
257
-
PreparedWrite::Create(w) => {
258
-
new_record_cids.push(w.cid);
259
-
let uri = AtUri::from_str(&w.uri)?;
260
-
commit_ops.push(CommitOp {
261
-
action: CommitAction::Create,
262
-
path: format_data_key(uri.get_collection(), uri.get_rkey()),
263
-
cid: Some(w.cid),
264
-
prev: None,
265
-
});
266
-
267
-
// Validate swap_cid conditions
268
-
if w.swap_cid.is_some() && w.swap_cid != Some(None) {
269
-
return Err(anyhow::anyhow!(
270
-
"Bad record swap: there should be no current record for a create"
271
-
));
272
-
}
273
-
}
274
-
PreparedWrite::Update(w) => {
275
-
new_record_cids.push(w.cid);
276
-
let uri = AtUri::from_str(&w.uri)?;
277
-
del_and_update_uris.push(uri.clone());
278
-
279
-
// Get the current record if it exists
280
-
let record = self.record.get_record(&uri, None, true).await?;
281
-
let curr_record = record.as_ref().map(|r| Cid::from_str(&r.cid).unwrap());
282
-
283
-
commit_ops.push(CommitOp {
284
-
action: CommitAction::Update,
285
-
path: format_data_key(uri.get_collection(), uri.get_rkey()),
286
-
cid: Some(w.cid),
287
-
prev: curr_record,
288
-
});
289
-
290
-
// Validate swap_cid conditions
291
-
if w.swap_cid.is_some() {
292
-
if w.swap_cid == Some(None) {
293
-
return Err(anyhow::anyhow!(
294
-
"Bad record swap: there should be a current record for an update"
295
-
));
296
-
}
297
-
298
-
if let Some(Some(swap)) = w.swap_cid {
299
-
if curr_record.is_some() && curr_record != Some(swap) {
300
-
return Err(anyhow::anyhow!(
301
-
"Bad record swap: current={:?}, expected={}",
302
-
curr_record,
303
-
swap
304
-
));
305
-
}
306
-
}
307
-
}
308
-
}
309
-
PreparedWrite::Delete(w) => {
310
-
let uri = AtUri::from_str(&w.uri)?;
311
-
del_and_update_uris.push(uri.clone());
312
-
313
-
// Get the current record if it exists
314
-
let record = self.record.get_record(&uri, None, true).await?;
315
-
let curr_record = record.as_ref().map(|r| Cid::from_str(&r.cid).unwrap());
316
-
317
-
commit_ops.push(CommitOp {
318
-
action: CommitAction::Delete,
319
-
path: format_data_key(uri.get_collection(), uri.get_rkey()),
320
-
cid: None,
321
-
prev: curr_record,
322
-
});
323
-
324
-
// Validate swap_cid conditions
325
-
if w.swap_cid.is_some() {
326
-
if w.swap_cid == Some(None) {
327
-
return Err(anyhow::anyhow!(
328
-
"Bad record swap: there should be a current record for a delete"
329
-
));
330
-
}
331
-
332
-
if let Some(Some(swap)) = w.swap_cid {
333
-
if curr_record.is_some() && curr_record != Some(swap) {
334
-
return Err(anyhow::anyhow!(
335
-
"Bad record swap: current={:?}, expected={}",
336
-
curr_record,
337
-
swap
338
-
));
339
-
}
340
-
}
341
-
}
342
-
}
343
-
}
344
-
}
345
-
346
-
// Load repository
347
-
let repo = Repo::load(&self.storage, curr_root.cid).await?;
348
-
let prev_data = repo.commit.data.clone();
349
-
350
-
// Convert writes to repo operations
351
-
let write_ops = writes
352
-
.iter()
353
-
.map(|w| write_to_op(w))
354
-
.collect::<Result<Vec<_>>>()?;
355
-
356
-
// Format the commit with the repository
357
-
let mut commit = repo.format_commit(write_ops, signing_key).await?;
358
-
359
-
// Find blocks that would be deleted but are referenced by another record
360
-
let dupe_record_cids = self
361
-
.get_duplicate_record_cids(&commit.removed_cids.to_list(), &del_and_update_uris)
362
-
.await?;
363
-
364
-
// Remove duplicates from removed_cids
365
-
for cid in &dupe_record_cids {
366
-
commit.removed_cids.delete(*cid);
367
-
}
368
-
369
-
// Find blocks that are relevant to ops but not included in diff
370
-
let new_record_blocks = commit.relevant_blocks.get_many(&new_record_cids)?;
371
-
if !new_record_blocks.missing.is_empty() {
372
-
let missing_blocks = self.storage.get_blocks(&new_record_blocks.missing).await?;
373
-
commit.relevant_blocks.add_map(missing_blocks.blocks)?;
374
-
}
375
-
376
-
Ok(CommitDataWithOps {
377
-
commit_data: commit,
378
-
ops: commit_ops,
379
-
prev_data: Some(prev_data),
380
-
})
381
-
}
382
-
383
-
/// Index writes to the database
384
-
pub(crate) async fn index_writes(&self, writes: &[PreparedWrite], rev: &str) -> Result<()> {
385
-
let timestamp = chrono::Utc::now().to_rfc3339();
386
-
387
-
for write in writes {
388
-
match write {
389
-
PreparedWrite::Create(w) => {
390
-
let uri = AtUri::from_str(&w.uri)?;
391
-
self.record_transactor
392
-
.index_record(
393
-
uri,
394
-
w.cid,
395
-
Some(&w.record),
396
-
WriteOpAction::Create,
397
-
rev,
398
-
Some(timestamp.clone()),
399
-
)
400
-
.await?;
401
-
}
402
-
PreparedWrite::Update(w) => {
403
-
let uri = AtUri::from_str(&w.uri)?;
404
-
self.record_transactor
405
-
.index_record(
406
-
uri,
407
-
w.cid,
408
-
Some(&w.record),
409
-
WriteOpAction::Update,
410
-
rev,
411
-
Some(timestamp.clone()),
412
-
)
413
-
.await?;
414
-
}
415
-
PreparedWrite::Delete(w) => {
416
-
let uri = AtUri::from_str(&w.uri)?;
417
-
self.record_transactor.delete_record(&uri).await?;
418
-
}
419
-
}
420
-
}
421
-
422
-
Ok(())
423
-
}
424
-
425
-
/// Get record CIDs that are duplicated elsewhere in the repository
426
-
pub(crate) async fn get_duplicate_record_cids(
427
-
&self,
428
-
cids: &[Cid],
429
-
touched_uris: &[AtUri],
430
-
) -> Result<Vec<Cid>> {
431
-
if touched_uris.is_empty() || cids.is_empty() {
432
-
return Ok(Vec::new());
433
-
}
434
-
435
-
// Convert URIs to strings for the query
436
-
let uri_strings: Vec<String> = touched_uris.iter().map(|u| u.to_string()).collect();
437
-
438
-
// Convert CIDs to strings for the query
439
-
let cid_strings: Vec<String> = cids.iter().map(|c| c.to_string()).collect();
440
-
441
-
let did = self.did.clone();
442
-
443
-
// Query for records with these CIDs that aren't in the touched URIs
444
-
let duplicate_cids = self
445
-
.storage
446
-
.db
447
-
.run(move |conn| {
448
-
use rsky_pds::schema::pds::record::dsl::*;
449
-
450
-
record
451
-
.filter(did.eq(&did))
452
-
.filter(cid.eq_any(&cid_strings))
453
-
.filter(uri.ne_all(&uri_strings))
454
-
.select(cid)
455
-
.load::<String>(conn)
456
-
})
457
-
.await?;
458
-
459
-
// Convert strings back to CIDs
460
-
let cids = duplicate_cids
461
-
.into_iter()
462
-
.filter_map(|c| Cid::from_str(&c).ok())
463
-
.collect();
464
-
465
-
Ok(cids)
466
-
}
467
-
}
+224
src/actor_store/sql_blob.rs
+224
src/actor_store/sql_blob.rs
···
1
+
use std::{path::PathBuf, str::FromStr as _};
2
+
3
+
use anyhow::Result;
4
+
use cidv10::Cid;
5
+
use rsky_common::get_random_str;
6
+
7
+
use crate::db::DatabaseConnection;
8
+
9
+
/// Type for stream of blob data
10
+
pub type BlobStream = Box<dyn std::io::Read + Send>;
11
+
12
+
/// Placeholder implementation for blob store
13
+
#[derive(Clone)]
14
+
pub(crate) struct BlobStoreSql {
15
+
client: DatabaseConnection,
16
+
path: PathBuf,
17
+
}
18
+
19
+
impl BlobStoreSql {
20
+
pub fn new(did: String, cfg: &SdkConfig) -> Self {
21
+
// let client = aws_sdk_s3::Client::new(cfg);
22
+
// BlobStorePlaceholder {
23
+
// client,
24
+
// bucket: did,
25
+
// }
26
+
todo!();
27
+
}
28
+
29
+
pub fn creator(cfg: &SdkConfig) -> Box<dyn Fn(String) -> BlobStoreSql + '_> {
30
+
Box::new(move |did: String| BlobStoreSql::new(did, cfg))
31
+
}
32
+
33
+
fn gen_key(&self) -> String {
34
+
get_random_str()
35
+
}
36
+
37
+
fn get_tmp_path(&self, key: &String) -> String {
38
+
// format!("tmp/{0}/{1}", self.bucket, key)
39
+
todo!();
40
+
}
41
+
42
+
fn get_stored_path(&self, cid: Cid) -> String {
43
+
// format!("blocks/{0}/{1}", self.bucket, cid)
44
+
todo!();
45
+
}
46
+
47
+
fn get_quarantined_path(&self, cid: Cid) -> String {
48
+
// format!("quarantine/{0}/{1}", self.bucket, cid)
49
+
todo!();
50
+
}
51
+
52
+
pub async fn put_temp(&self, bytes: Vec<u8>) -> Result<String> {
53
+
let key = self.gen_key();
54
+
// let body = ByteStream::from(bytes);
55
+
// self.client
56
+
// .put_object()
57
+
// .body(body)
58
+
// .bucket(&self.bucket)
59
+
// .key(self.get_tmp_path(&key))
60
+
// .acl(ObjectCannedAcl::PublicRead)
61
+
// .send()
62
+
// .await?;
63
+
// Ok(key)
64
+
todo!();
65
+
}
66
+
67
+
pub async fn make_permanent(&self, key: String, cid: Cid) -> Result<()> {
68
+
// let already_has = self.has_stored(cid).await?;
69
+
// if !already_has {
70
+
// Ok(self
71
+
// .move_object(MoveObject {
72
+
// from: self.get_tmp_path(&key),
73
+
// to: self.get_stored_path(cid),
74
+
// })
75
+
// .await?)
76
+
// } else {
77
+
// // already saved, so we no-op & just delete the temp
78
+
// Ok(self.delete_key(self.get_tmp_path(&key)).await?)
79
+
// }
80
+
todo!();
81
+
}
82
+
83
+
pub async fn put_permanent(&self, cid: Cid, bytes: Vec<u8>) -> Result<()> {
84
+
// let body = ByteStream::from(bytes);
85
+
// self.client
86
+
// .put_object()
87
+
// .body(body)
88
+
// .bucket(&self.bucket)
89
+
// .key(self.get_stored_path(cid))
90
+
// .acl(ObjectCannedAcl::PublicRead)
91
+
// .send()
92
+
// .await?;
93
+
// Ok(())
94
+
todo!();
95
+
}
96
+
97
+
pub async fn quarantine(&self, cid: Cid) -> Result<()> {
98
+
// self.move_object(MoveObject {
99
+
// from: self.get_stored_path(cid),
100
+
// to: self.get_quarantined_path(cid),
101
+
// })
102
+
// .await
103
+
todo!();
104
+
}
105
+
106
+
pub async fn unquarantine(&self, cid: Cid) -> Result<()> {
107
+
// self.move_object(MoveObject {
108
+
// from: self.get_quarantined_path(cid),
109
+
// to: self.get_stored_path(cid),
110
+
// })
111
+
// .await
112
+
todo!();
113
+
}
114
+
115
+
async fn get_object(&self, cid: Cid) -> Result<ByteStream> {
116
+
// let res = self
117
+
// .client
118
+
// .get_object()
119
+
// .bucket(&self.bucket)
120
+
// .key(self.get_stored_path(cid))
121
+
// .send()
122
+
// .await;
123
+
// match res {
124
+
// Ok(res) => Ok(res.body),
125
+
// Err(SdkError::ServiceError(s)) => Err(anyhow::Error::new(s.into_err())),
126
+
// Err(e) => Err(anyhow::Error::new(e.into_service_error())),
127
+
// }
128
+
todo!();
129
+
}
130
+
131
+
pub async fn get_bytes(&self, cid: Cid) -> Result<Vec<u8>> {
132
+
let res = self.get_object(cid).await?;
133
+
let bytes = res.collect().await.map(|data| data.into_bytes())?;
134
+
Ok(bytes.to_vec())
135
+
}
136
+
137
+
pub async fn get_stream(&self, cid: Cid) -> Result<ByteStream> {
138
+
self.get_object(cid).await
139
+
}
140
+
141
+
pub async fn delete(&self, cid: String) -> Result<()> {
142
+
self.delete_key(self.get_stored_path(Cid::from_str(&cid)?))
143
+
.await
144
+
}
145
+
146
+
pub async fn delete_many(&self, cids: Vec<Cid>) -> Result<()> {
147
+
let keys: Vec<String> = cids
148
+
.into_iter()
149
+
.map(|cid| self.get_stored_path(cid))
150
+
.collect();
151
+
self.delete_many_keys(keys).await
152
+
}
153
+
154
+
pub async fn has_stored(&self, cid: Cid) -> Result<bool> {
155
+
Ok(self.has_key(self.get_stored_path(cid)).await)
156
+
}
157
+
158
+
pub async fn has_temp(&self, key: String) -> Result<bool> {
159
+
Ok(self.has_key(self.get_tmp_path(&key)).await)
160
+
}
161
+
162
+
async fn has_key(&self, key: String) -> bool {
163
+
// let res = self
164
+
// .client
165
+
// .head_object()
166
+
// .bucket(&self.bucket)
167
+
// .key(key)
168
+
// .send()
169
+
// .await;
170
+
// res.is_ok()
171
+
todo!();
172
+
}
173
+
174
+
async fn delete_key(&self, key: String) -> Result<()> {
175
+
// self.client
176
+
// .delete_object()
177
+
// .bucket(&self.bucket)
178
+
// .key(key)
179
+
// .send()
180
+
// .await?;
181
+
// Ok(())
182
+
todo!();
183
+
}
184
+
185
+
async fn delete_many_keys(&self, keys: Vec<String>) -> Result<()> {
186
+
// let objects: Vec<ObjectIdentifier> = keys
187
+
// .into_iter()
188
+
// .map(|key| Ok(ObjectIdentifier::builder().key(key).build()?))
189
+
// .collect::<Result<Vec<ObjectIdentifier>>>()?;
190
+
// let deletes = Delete::builder().set_objects(Some(objects)).build()?;
191
+
// self.client
192
+
// .delete_objects()
193
+
// .bucket(&self.bucket)
194
+
// .delete(deletes)
195
+
// .send()
196
+
// .await?;
197
+
// Ok(())
198
+
todo!();
199
+
}
200
+
201
+
async fn move_object(&self, keys: MoveObject) -> Result<()> {
202
+
// self.client
203
+
// .copy_object()
204
+
// .bucket(&self.bucket)
205
+
// .copy_source(format!(
206
+
// "{0}/{1}/{2}",
207
+
// env_str("AWS_ENDPOINT_BUCKET").unwrap(),
208
+
// self.bucket,
209
+
// keys.from
210
+
// ))
211
+
// .key(keys.to)
212
+
// .acl(ObjectCannedAcl::PublicRead)
213
+
// .send()
214
+
// .await?;
215
+
// self.client
216
+
// .delete_object()
217
+
// .bucket(&self.bucket)
218
+
// .key(keys.from)
219
+
// .send()
220
+
// .await?;
221
+
// Ok(())
222
+
todo!();
223
+
}
224
+
}
+327
-412
src/actor_store/sql_repo.rs
+327
-412
src/actor_store/sql_repo.rs
···
1
-
use anyhow::{Context as _, Result};
2
-
use atrium_repo::Cid;
3
-
use atrium_repo::blockstore::{
4
-
AsyncBlockStoreRead, AsyncBlockStoreWrite, Error as BlockstoreError,
5
-
};
1
+
//! Based on https://github.com/blacksky-algorithms/rsky/blob/main/rsky-pds/src/actor_store/repo/sql_repo.rs
2
+
//! blacksky-algorithms/rsky is licensed under the Apache License 2.0
3
+
//!
4
+
//! Modified for SQLite backend
5
+
6
+
use anyhow::Result;
7
+
use cidv10::Cid;
8
+
use diesel::dsl::sql;
6
9
use diesel::prelude::*;
7
-
use diesel::r2d2::{self, ConnectionManager};
8
-
use diesel::sqlite::SqliteConnection;
10
+
use diesel::sql_types::{Bool, Text};
11
+
use diesel::*;
9
12
use futures::{StreamExt, TryStreamExt, stream};
10
-
use rsky_pds::models::{RepoBlock, RepoRoot};
13
+
use rsky_pds::models;
14
+
use rsky_pds::models::RepoBlock;
11
15
use rsky_repo::block_map::{BlockMap, BlocksAndMissing};
12
16
use rsky_repo::car::blocks_to_car_file;
13
17
use rsky_repo::cid_set::CidSet;
···
16
20
use rsky_repo::storage::readable_blockstore::ReadableBlockstore;
17
21
use rsky_repo::storage::types::RepoStorage;
18
22
use rsky_repo::types::CommitData;
19
-
use sha2::{Digest, Sha256};
20
-
use std::future::Future;
21
23
use std::pin::Pin;
22
24
use std::str::FromStr;
23
25
use std::sync::Arc;
24
26
use tokio::sync::RwLock;
25
27
26
-
use crate::actor_store::db::ActorDb;
28
+
use super::ActorDb;
27
29
28
30
#[derive(Clone, Debug)]
29
-
pub struct SqlRepoStorage {
30
-
/// In-memory cache for blocks
31
+
pub struct SqlRepoReader {
31
32
pub cache: Arc<RwLock<BlockMap>>,
32
-
/// Database connection
33
33
pub db: ActorDb,
34
-
/// DID of the actor
35
-
pub did: String,
36
-
/// Current timestamp
34
+
pub root: Option<Cid>,
35
+
pub rev: Option<String>,
37
36
pub now: String,
38
-
}
39
-
40
-
impl SqlRepoStorage {
41
-
/// Create a new SQL repository storage
42
-
pub fn new(did: String, db: ActorDb, now: Option<String>) -> Self {
43
-
let now = now.unwrap_or_else(|| chrono::Utc::now().to_rfc3339());
44
-
45
-
Self {
46
-
cache: Arc::new(RwLock::new(BlockMap::new())),
47
-
db,
48
-
did,
49
-
now,
50
-
}
51
-
}
52
-
53
-
/// Get the CAR stream for the repository
54
-
pub async fn get_car_stream(&self, since: Option<String>) -> Result<Vec<u8>> {
55
-
match self.get_root().await {
56
-
None => Err(anyhow::Error::new(RepoRootNotFoundError)),
57
-
Some(root) => {
58
-
let mut car = BlockMap::new();
59
-
let mut cursor: Option<CidAndRev> = None;
60
-
61
-
loop {
62
-
let blocks = self.get_block_range(&since, &cursor).await?;
63
-
if blocks.is_empty() {
64
-
break;
65
-
}
66
-
67
-
// Add blocks to car
68
-
for block in &blocks {
69
-
car.set(Cid::from_str(&block.cid)?, block.content.clone());
70
-
}
71
-
72
-
if let Some(last_block) = blocks.last() {
73
-
cursor = Some(CidAndRev {
74
-
cid: Cid::from_str(&last_block.cid)?,
75
-
rev: last_block.repoRev.clone(),
76
-
});
77
-
} else {
78
-
break;
79
-
}
80
-
}
81
-
82
-
blocks_to_car_file(Some(&root), car).await
83
-
}
84
-
}
85
-
}
86
-
87
-
/// Get a range of blocks from the database
88
-
pub async fn get_block_range(
89
-
&self,
90
-
since: &Option<String>,
91
-
cursor: &Option<CidAndRev>,
92
-
) -> Result<Vec<RepoBlock>> {
93
-
let did = self.did.clone();
94
-
95
-
self.db
96
-
.run(move |conn| {
97
-
use rsky_pds::schema::pds::repo_block::dsl::*;
98
-
99
-
let mut query = repo_block.filter(did.eq(&did)).limit(500).into_boxed();
100
-
101
-
if let Some(c) = cursor {
102
-
query = query.filter(
103
-
repoRev
104
-
.lt(&c.rev)
105
-
.or(repoRev.eq(&c.rev).and(cid.lt(&c.cid.to_string()))),
106
-
);
107
-
}
108
-
109
-
if let Some(s) = since {
110
-
query = query.filter(repoRev.gt(s));
111
-
}
112
-
113
-
query
114
-
.order((repoRev.desc(), cid.desc()))
115
-
.load::<RepoBlock>(conn)
116
-
})
117
-
.await
118
-
}
119
-
120
-
/// Count total blocks for this repository
121
-
pub async fn count_blocks(&self) -> Result<i64> {
122
-
let did = self.did.clone();
123
-
124
-
self.db
125
-
.run(move |conn| {
126
-
use rsky_pds::schema::pds::repo_block::dsl::*;
127
-
128
-
repo_block.filter(did.eq(&did)).count().get_result(conn)
129
-
})
130
-
.await
131
-
}
132
-
133
-
/// Proactively cache blocks from a specific revision
134
-
pub async fn cache_rev(&mut self, rev: &str) -> Result<()> {
135
-
let did = self.did.clone();
136
-
let rev_string = rev.to_string();
137
-
138
-
let blocks = self
139
-
.db
140
-
.run(move |conn| {
141
-
use rsky_pds::schema::pds::repo_block::dsl::*;
142
-
143
-
repo_block
144
-
.filter(did.eq(&did))
145
-
.filter(repoRev.eq(&rev_string))
146
-
.select((cid, content))
147
-
.limit(15)
148
-
.load::<(String, Vec<u8>)>(conn)
149
-
})
150
-
.await?;
151
-
152
-
let mut cache_guard = self.cache.write().await;
153
-
for (cid_str, content) in blocks {
154
-
let cid = Cid::from_str(&cid_str)?;
155
-
cache_guard.set(cid, content);
156
-
}
157
-
158
-
Ok(())
159
-
}
160
-
161
-
/// Delete multiple blocks by their CIDs
162
-
pub async fn delete_many(&self, cids: Vec<Cid>) -> Result<()> {
163
-
if cids.is_empty() {
164
-
return Ok(());
165
-
}
166
-
167
-
let did = self.did.clone();
168
-
let cid_strings: Vec<String> = cids.into_iter().map(|c| c.to_string()).collect();
169
-
170
-
// Process in chunks to avoid too many parameters
171
-
for chunk in cid_strings.chunks(100) {
172
-
let chunk_vec = chunk.to_vec();
173
-
let did_clone = did.clone();
174
-
175
-
self.db
176
-
.run(move |conn| {
177
-
use rsky_pds::schema::pds::repo_block::dsl::*;
178
-
179
-
diesel::delete(repo_block)
180
-
.filter(did.eq(&did_clone))
181
-
.filter(cid.eq_any(&chunk_vec))
182
-
.execute(conn)
183
-
})
184
-
.await?;
185
-
}
186
-
187
-
Ok(())
188
-
}
189
-
190
-
/// Get the detailed root information
191
-
pub async fn get_root_detailed(&self) -> Result<CidAndRev> {
192
-
let did = self.did.clone();
193
-
194
-
let root = self
195
-
.db
196
-
.run(move |conn| {
197
-
use rsky_pds::schema::pds::repo_root::dsl::*;
198
-
199
-
repo_root
200
-
.filter(did.eq(&did))
201
-
.first::<RepoRoot>(conn)
202
-
.optional()
203
-
})
204
-
.await?;
205
-
206
-
match root {
207
-
Some(r) => Ok(CidAndRev {
208
-
cid: Cid::from_str(&r.cid)?,
209
-
rev: r.rev,
210
-
}),
211
-
None => Err(anyhow::Error::new(RepoRootNotFoundError)),
212
-
}
213
-
}
37
+
pub did: String,
214
38
}
215
39
216
-
impl ReadableBlockstore for SqlRepoStorage {
40
+
impl ReadableBlockstore for SqlRepoReader {
217
41
fn get_bytes<'a>(
218
42
&'a self,
219
43
cid: &'a Cid,
220
44
) -> Pin<Box<dyn Future<Output = Result<Option<Vec<u8>>>> + Send + Sync + 'a>> {
221
-
let did = self.did.clone();
45
+
let did: String = self.did.clone();
46
+
let db: ActorDb = self.db.clone();
222
47
let cid = cid.clone();
223
48
224
49
Box::pin(async move {
225
-
// Check cache first
226
-
{
50
+
use rsky_pds::schema::pds::repo_block::dsl as RepoBlockSchema;
51
+
let cached = {
227
52
let cache_guard = self.cache.read().await;
228
-
if let Some(cached) = cache_guard.get(cid) {
229
-
return Ok(Some(cached.clone()));
230
-
}
53
+
cache_guard.get(cid).map(|v| v.clone())
54
+
};
55
+
if let Some(cached_result) = cached {
56
+
return Ok(Some(cached_result.clone()));
231
57
}
232
58
233
-
// Not in cache, query database
234
-
let cid_str = cid.to_string();
235
-
let result = self
236
-
.db
59
+
let found: Option<Vec<u8>> = db
237
60
.run(move |conn| {
238
-
use rsky_pds::schema::pds::repo_block::dsl::*;
239
-
240
-
repo_block
241
-
.filter(did.eq(&did))
242
-
.filter(cid.eq(&cid_str))
243
-
.select(content)
244
-
.first::<Vec<u8>>(conn)
61
+
RepoBlockSchema::repo_block
62
+
.filter(RepoBlockSchema::cid.eq(cid.to_string()))
63
+
.filter(RepoBlockSchema::did.eq(did))
64
+
.select(RepoBlockSchema::content)
65
+
.first(conn)
245
66
.optional()
246
67
})
247
68
.await?;
248
-
249
-
// Update cache if found
250
-
if let Some(content) = &result {
251
-
let mut cache_guard = self.cache.write().await;
252
-
cache_guard.set(cid, content.clone());
69
+
match found {
70
+
None => Ok(None),
71
+
Some(result) => {
72
+
{
73
+
let mut cache_guard = self.cache.write().await;
74
+
cache_guard.set(cid, result.clone());
75
+
}
76
+
Ok(Some(result))
77
+
}
253
78
}
254
-
255
-
Ok(result)
256
79
})
257
80
}
258
81
···
261
84
cid: Cid,
262
85
) -> Pin<Box<dyn Future<Output = Result<bool>> + Send + Sync + 'a>> {
263
86
Box::pin(async move {
264
-
let bytes = self.get_bytes(&cid).await?;
265
-
Ok(bytes.is_some())
87
+
let got = <Self as ReadableBlockstore>::get_bytes(self, &cid).await?;
88
+
Ok(got.is_some())
266
89
})
267
90
}
268
91
···
270
93
&'a self,
271
94
cids: Vec<Cid>,
272
95
) -> Pin<Box<dyn Future<Output = Result<BlocksAndMissing>> + Send + Sync + 'a>> {
96
+
let did: String = self.did.clone();
97
+
let db: ActorDb = self.db.clone();
98
+
273
99
Box::pin(async move {
274
-
// Check cache first
100
+
use rsky_pds::schema::pds::repo_block::dsl as RepoBlockSchema;
275
101
let cached = {
276
102
let mut cache_guard = self.cache.write().await;
277
-
cache_guard.get_many(cids.clone())?
103
+
cache_guard.get_many(cids)?
278
104
};
279
105
280
106
if cached.missing.is_empty() {
281
107
return Ok(cached);
282
108
}
283
-
284
-
// Prepare data structures for missing blocks
285
109
let missing = CidSet::new(Some(cached.missing.clone()));
286
110
let missing_strings: Vec<String> =
287
-
cached.missing.iter().map(|c| c.to_string()).collect();
288
-
let did = self.did.clone();
111
+
cached.missing.into_iter().map(|c| c.to_string()).collect();
112
+
113
+
let blocks = Arc::new(tokio::sync::Mutex::new(BlockMap::new()));
114
+
let missing_set = Arc::new(tokio::sync::Mutex::new(missing));
115
+
116
+
let _: Vec<_> = stream::iter(missing_strings.chunks(500))
117
+
.then(|batch| {
118
+
let this_db = db.clone();
119
+
let this_did = did.clone();
120
+
let blocks = Arc::clone(&blocks);
121
+
let missing = Arc::clone(&missing_set);
122
+
let batch = batch.to_vec(); // Convert to owned Vec
289
123
290
-
// Create block map for results
291
-
let mut blocks = BlockMap::new();
292
-
let mut missing_set = CidSet::new(Some(cached.missing.clone()));
124
+
async move {
125
+
// Database query
126
+
let rows: Vec<(String, Vec<u8>)> = this_db
127
+
.run(move |conn| {
128
+
RepoBlockSchema::repo_block
129
+
.filter(RepoBlockSchema::cid.eq_any(batch))
130
+
.filter(RepoBlockSchema::did.eq(this_did))
131
+
.select((RepoBlockSchema::cid, RepoBlockSchema::content))
132
+
.load(conn)
133
+
})
134
+
.await?;
293
135
294
-
// Query database in chunks
295
-
for chunk in missing_strings.chunks(100) {
296
-
let chunk_vec = chunk.to_vec();
297
-
let did_clone = did.clone();
136
+
// Process rows with locked access
137
+
let mut blocks = blocks.lock().await;
138
+
let mut missing = missing.lock().await;
298
139
299
-
let rows = self
300
-
.db
301
-
.run(move |conn| {
302
-
use rsky_pds::schema::pds::repo_block::dsl::*;
140
+
for row in rows {
141
+
let cid = Cid::from_str(&row.0)?; // Proper error handling
142
+
blocks.set(cid, row.1);
143
+
missing.delete(cid);
144
+
}
303
145
304
-
repo_block
305
-
.filter(did.eq(&did_clone))
306
-
.filter(cid.eq_any(&chunk_vec))
307
-
.select((cid, content))
308
-
.load::<(String, Vec<u8>)>(conn)
309
-
})
310
-
.await?;
146
+
Ok::<(), anyhow::Error>(())
147
+
}
148
+
})
149
+
.try_collect()
150
+
.await?;
311
151
312
-
// Process results
313
-
for (cid_str, content) in rows {
314
-
let block_cid = Cid::from_str(&cid_str)?;
315
-
blocks.set(block_cid, content.clone());
316
-
missing_set.delete(block_cid);
152
+
// Extract values from synchronization primitives
153
+
let mut blocks = Arc::try_unwrap(blocks)
154
+
.expect("Arc still has owners")
155
+
.into_inner();
156
+
let missing = Arc::try_unwrap(missing_set)
157
+
.expect("Arc still has owners")
158
+
.into_inner();
317
159
318
-
// Update cache
319
-
let mut cache_guard = self.cache.write().await;
320
-
cache_guard.set(block_cid, content);
321
-
}
160
+
{
161
+
let mut cache_guard = self.cache.write().await;
162
+
cache_guard.add_map(blocks.clone())?;
322
163
}
323
164
324
-
// Combine with cached blocks
325
165
blocks.add_map(cached.blocks)?;
326
166
327
167
Ok(BlocksAndMissing {
328
168
blocks,
329
-
missing: missing_set.to_list(),
169
+
missing: missing.to_list(),
330
170
})
331
171
})
332
172
}
333
173
}
334
174
335
-
impl RepoStorage for SqlRepoStorage {
175
+
impl RepoStorage for SqlRepoReader {
336
176
fn get_root<'a>(&'a self) -> Pin<Box<dyn Future<Output = Option<Cid>> + Send + Sync + 'a>> {
337
177
Box::pin(async move {
338
178
match self.get_root_detailed().await {
···
348
188
bytes: Vec<u8>,
349
189
rev: String,
350
190
) -> Pin<Box<dyn Future<Output = Result<()>> + Send + Sync + 'a>> {
351
-
let did = self.did.clone();
352
-
let bytes_clone = bytes.clone();
353
-
191
+
let did: String = self.did.clone();
192
+
let db: ActorDb = self.db.clone();
193
+
let bytes_cloned = bytes.clone();
354
194
Box::pin(async move {
355
-
let cid_str = cid.to_string();
356
-
let size = bytes.len() as i32;
195
+
use rsky_pds::schema::pds::repo_block::dsl as RepoBlockSchema;
357
196
358
-
self.db
359
-
.run(move |conn| {
360
-
use rsky_pds::schema::pds::repo_block::dsl::*;
361
-
362
-
diesel::insert_into(repo_block)
363
-
.values((
364
-
did.eq(&did),
365
-
cid.eq(&cid_str),
366
-
repoRev.eq(&rev),
367
-
size.eq(size),
368
-
content.eq(&bytes),
369
-
))
370
-
.on_conflict_do_nothing()
371
-
.execute(conn)
372
-
})
373
-
.await?;
374
-
375
-
// Update cache
376
-
let mut cache_guard = self.cache.write().await;
377
-
cache_guard.set(cid, bytes_clone);
378
-
197
+
db.run(move |conn| {
198
+
insert_into(RepoBlockSchema::repo_block)
199
+
.values((
200
+
RepoBlockSchema::did.eq(did),
201
+
RepoBlockSchema::cid.eq(cid.to_string()),
202
+
RepoBlockSchema::repoRev.eq(rev),
203
+
RepoBlockSchema::size.eq(bytes.len() as i32),
204
+
RepoBlockSchema::content.eq(bytes),
205
+
))
206
+
.execute(conn)
207
+
})
208
+
.await?;
209
+
{
210
+
let mut cache_guard = self.cache.write().await;
211
+
cache_guard.set(cid, bytes_cloned);
212
+
}
379
213
Ok(())
380
214
})
381
215
}
···
385
219
to_put: BlockMap,
386
220
rev: String,
387
221
) -> Pin<Box<dyn Future<Output = Result<()>> + Send + Sync + 'a>> {
388
-
let did = self.did.clone();
222
+
let did: String = self.did.clone();
223
+
let db: ActorDb = self.db.clone();
389
224
390
225
Box::pin(async move {
391
-
if to_put.size() == 0 {
392
-
return Ok(());
393
-
}
226
+
use rsky_pds::schema::pds::repo_block::dsl as RepoBlockSchema;
394
227
395
-
// Prepare blocks for insertion
396
-
let blocks: Vec<(String, String, String, i32, Vec<u8>)> = to_put
228
+
let blocks: Vec<RepoBlock> = to_put
397
229
.map
398
230
.iter()
399
-
.map(|(cid, bytes)| {
400
-
(
401
-
did.clone(),
402
-
cid.to_string(),
403
-
rev.clone(),
404
-
bytes.0.len() as i32,
405
-
bytes.0.clone(),
406
-
)
231
+
.map(|(cid, bytes)| RepoBlock {
232
+
cid: cid.to_string(),
233
+
did: did.clone(),
234
+
repo_rev: rev.clone(),
235
+
size: bytes.0.len() as i32,
236
+
content: bytes.0.clone(),
407
237
})
408
238
.collect();
409
239
410
-
// Process in chunks
411
-
for chunk in blocks.chunks(50) {
412
-
let chunk_vec = chunk.to_vec();
240
+
let chunks: Vec<Vec<RepoBlock>> =
241
+
blocks.chunks(50).map(|chunk| chunk.to_vec()).collect();
413
242
414
-
self.db
415
-
.run(move |conn| {
416
-
use rsky_pds::schema::pds::repo_block::dsl::*;
417
-
418
-
let values: Vec<_> = chunk_vec
419
-
.iter()
420
-
.map(|(did_val, cid_val, rev_val, size_val, content_val)| {
421
-
(
422
-
did.eq(did_val),
423
-
cid.eq(cid_val),
424
-
repoRev.eq(rev_val),
425
-
size.eq(*size_val),
426
-
content.eq(content_val),
427
-
)
428
-
})
429
-
.collect();
430
-
431
-
diesel::insert_into(repo_block)
432
-
.values(&values)
433
-
.on_conflict_do_nothing()
434
-
.execute(conn)
435
-
})
436
-
.await?;
437
-
}
438
-
439
-
// Update cache with all blocks
440
-
{
441
-
let mut cache_guard = self.cache.write().await;
442
-
for (cid, bytes) in &to_put.map {
443
-
cache_guard.set(*cid, bytes.0.clone());
444
-
}
445
-
}
243
+
let _: Vec<_> = stream::iter(chunks)
244
+
.then(|batch| {
245
+
let db = db.clone();
246
+
async move {
247
+
db.run(move |conn| {
248
+
insert_into(RepoBlockSchema::repo_block)
249
+
.values(batch)
250
+
.on_conflict_do_nothing()
251
+
.execute(conn)
252
+
.map(|_| ())
253
+
})
254
+
.await
255
+
.map_err(anyhow::Error::from)
256
+
}
257
+
})
258
+
.collect::<Vec<_>>()
259
+
.await
260
+
.into_iter()
261
+
.collect::<Result<Vec<()>>>()?;
446
262
447
263
Ok(())
448
264
})
449
265
}
450
-
451
266
fn update_root<'a>(
452
267
&'a self,
453
268
cid: Cid,
454
269
rev: String,
455
270
is_create: Option<bool>,
456
271
) -> Pin<Box<dyn Future<Output = Result<()>> + Send + Sync + 'a>> {
457
-
let did = self.did.clone();
458
-
let now = self.now.clone();
459
-
let is_create = is_create.unwrap_or(false);
272
+
let did: String = self.did.clone();
273
+
let db: ActorDb = self.db.clone();
274
+
let now: String = self.now.clone();
460
275
461
276
Box::pin(async move {
462
-
let cid_str = cid.to_string();
277
+
use rsky_pds::schema::pds::repo_root::dsl as RepoRootSchema;
463
278
279
+
let is_create = is_create.unwrap_or(false);
464
280
if is_create {
465
-
// Insert new root
466
-
self.db
467
-
.run(move |conn| {
468
-
use rsky_pds::schema::pds::repo_root::dsl::*;
469
-
470
-
diesel::insert_into(repo_root)
471
-
.values((
472
-
did.eq(&did),
473
-
cid.eq(&cid_str),
474
-
rev.eq(&rev),
475
-
indexedAt.eq(&now),
476
-
))
477
-
.execute(conn)
478
-
})
479
-
.await?;
281
+
db.run(move |conn| {
282
+
insert_into(RepoRootSchema::repo_root)
283
+
.values((
284
+
RepoRootSchema::did.eq(did),
285
+
RepoRootSchema::cid.eq(cid.to_string()),
286
+
RepoRootSchema::rev.eq(rev),
287
+
RepoRootSchema::indexedAt.eq(now),
288
+
))
289
+
.execute(conn)
290
+
})
291
+
.await?;
480
292
} else {
481
-
// Update existing root
482
-
self.db
483
-
.run(move |conn| {
484
-
use rsky_pds::schema::pds::repo_root::dsl::*;
485
-
486
-
diesel::update(repo_root)
487
-
.filter(did.eq(&did))
488
-
.set((cid.eq(&cid_str), rev.eq(&rev), indexedAt.eq(&now)))
489
-
.execute(conn)
490
-
})
491
-
.await?;
293
+
db.run(move |conn| {
294
+
update(RepoRootSchema::repo_root)
295
+
.filter(RepoRootSchema::did.eq(did))
296
+
.set((
297
+
RepoRootSchema::cid.eq(cid.to_string()),
298
+
RepoRootSchema::rev.eq(rev),
299
+
RepoRootSchema::indexedAt.eq(now),
300
+
))
301
+
.execute(conn)
302
+
})
303
+
.await?;
492
304
}
493
-
494
305
Ok(())
495
306
})
496
307
}
···
501
312
is_create: Option<bool>,
502
313
) -> Pin<Box<dyn Future<Output = Result<()>> + Send + Sync + 'a>> {
503
314
Box::pin(async move {
504
-
// Apply commit in three steps
505
315
self.update_root(commit.cid, commit.rev.clone(), is_create)
506
316
.await?;
507
317
self.put_many(commit.new_blocks, commit.rev).await?;
508
318
self.delete_many(commit.removed_cids.to_list()).await?;
509
-
510
319
Ok(())
511
320
})
512
321
}
513
322
}
514
323
515
-
#[async_trait::async_trait]
516
-
impl AsyncBlockStoreRead for SqlRepoStorage {
517
-
async fn read_block(&mut self, cid: Cid) -> Result<Vec<u8>, BlockstoreError> {
518
-
let bytes = self
519
-
.get_bytes(&cid)
520
-
.await
521
-
.map_err(|e| BlockstoreError::Other(Box::new(e)))?
522
-
.ok_or(BlockstoreError::CidNotFound)?;
324
+
// Basically handles getting ipld blocks from db
325
+
impl SqlRepoReader {
326
+
pub fn new(did: String, now: Option<String>, db: ActorDb) -> Self {
327
+
let now = now.unwrap_or_else(rsky_common::now);
328
+
SqlRepoReader {
329
+
cache: Arc::new(RwLock::new(BlockMap::new())),
330
+
root: None,
331
+
rev: None,
332
+
db,
333
+
now,
334
+
did,
335
+
}
336
+
}
523
337
524
-
Ok(bytes)
338
+
pub async fn get_car_stream(&self, since: Option<String>) -> Result<Vec<u8>> {
339
+
match self.get_root().await {
340
+
None => Err(anyhow::Error::new(RepoRootNotFoundError)),
341
+
Some(root) => {
342
+
let mut car = BlockMap::new();
343
+
let mut cursor: Option<CidAndRev> = None;
344
+
let mut write_rows = |rows: Vec<RepoBlock>| -> Result<()> {
345
+
for row in rows {
346
+
car.set(Cid::from_str(&row.cid)?, row.content);
347
+
}
348
+
Ok(())
349
+
};
350
+
loop {
351
+
let res = self.get_block_range(&since, &cursor).await?;
352
+
write_rows(res.clone())?;
353
+
if let Some(last_row) = res.last() {
354
+
cursor = Some(CidAndRev {
355
+
cid: Cid::from_str(&last_row.cid)?,
356
+
rev: last_row.repo_rev.clone(),
357
+
});
358
+
} else {
359
+
break;
360
+
}
361
+
}
362
+
blocks_to_car_file(Some(&root), car).await
363
+
}
364
+
}
525
365
}
526
366
527
-
fn read_block_into(
528
-
&mut self,
529
-
cid: Cid,
530
-
contents: &mut Vec<u8>,
531
-
) -> impl Future<Output = Result<(), BlockstoreError>> + Send {
532
-
async move {
533
-
let bytes = self.read_block(cid).await?;
534
-
contents.clear();
535
-
contents.extend_from_slice(&bytes);
536
-
Ok(())
537
-
}
367
+
pub async fn get_block_range(
368
+
&self,
369
+
since: &Option<String>,
370
+
cursor: &Option<CidAndRev>,
371
+
) -> Result<Vec<RepoBlock>> {
372
+
let did: String = self.did.clone();
373
+
let db: ActorDb = self.db.clone();
374
+
let since = since.clone();
375
+
let cursor = cursor.clone();
376
+
use rsky_pds::schema::pds::repo_block::dsl as RepoBlockSchema;
377
+
378
+
Ok(db
379
+
.run(move |conn| {
380
+
let mut builder = RepoBlockSchema::repo_block
381
+
.select(RepoBlock::as_select())
382
+
.order((RepoBlockSchema::repoRev.desc(), RepoBlockSchema::cid.desc()))
383
+
.filter(RepoBlockSchema::did.eq(did))
384
+
.limit(500)
385
+
.into_boxed();
386
+
387
+
if let Some(cursor) = cursor {
388
+
// use this syntax to ensure we hit the index
389
+
builder = builder.filter(
390
+
sql::<Bool>("((")
391
+
.bind(RepoBlockSchema::repoRev)
392
+
.sql(", ")
393
+
.bind(RepoBlockSchema::cid)
394
+
.sql(") < (")
395
+
.bind::<Text, _>(cursor.rev.clone())
396
+
.sql(", ")
397
+
.bind::<Text, _>(cursor.cid.to_string())
398
+
.sql("))"),
399
+
);
400
+
}
401
+
if let Some(since) = since {
402
+
builder = builder.filter(RepoBlockSchema::repoRev.gt(since));
403
+
}
404
+
builder.load(conn)
405
+
})
406
+
.await?)
538
407
}
539
-
}
540
408
541
-
#[async_trait::async_trait]
542
-
impl AsyncBlockStoreWrite for SqlRepoStorage {
543
-
fn write_block(
544
-
&mut self,
545
-
codec: u64,
546
-
hash: u64,
547
-
contents: &[u8],
548
-
) -> impl Future<Output = Result<Cid, BlockstoreError>> + Send {
549
-
let contents = contents.to_vec();
550
-
let rev = self.now.clone();
409
+
pub async fn count_blocks(&self) -> Result<i64> {
410
+
let did: String = self.did.clone();
411
+
let db: ActorDb = self.db.clone();
412
+
use rsky_pds::schema::pds::repo_block::dsl as RepoBlockSchema;
551
413
552
-
async move {
553
-
// Calculate digest based on hash algorithm
554
-
let digest = match hash {
555
-
atrium_repo::blockstore::SHA2_256 => sha2::Sha256::digest(&contents),
556
-
_ => return Err(BlockstoreError::UnsupportedHash(hash)),
557
-
};
414
+
let res = db
415
+
.run(move |conn| {
416
+
RepoBlockSchema::repo_block
417
+
.filter(RepoBlockSchema::did.eq(did))
418
+
.count()
419
+
.get_result(conn)
420
+
})
421
+
.await?;
422
+
Ok(res)
423
+
}
558
424
559
-
// Create multihash
560
-
let multihash = atrium_repo::Multihash::wrap(hash, &digest)
561
-
.map_err(|_| BlockstoreError::UnsupportedHash(hash))?;
425
+
// Transactors
426
+
// -------------------
562
427
563
-
// Create CID
564
-
let cid = Cid::new_v1(codec, multihash);
428
+
/// Proactively cache all blocks from a particular commit (to prevent multiple roundtrips)
429
+
pub async fn cache_rev(&mut self, rev: String) -> Result<()> {
430
+
let did: String = self.did.clone();
431
+
let db: ActorDb = self.db.clone();
432
+
use rsky_pds::schema::pds::repo_block::dsl as RepoBlockSchema;
565
433
566
-
// Store the block
567
-
self.put_block(cid, contents, rev)
568
-
.await
569
-
.map_err(|e| BlockstoreError::Other(Box::new(e)))?;
434
+
let res: Vec<(String, Vec<u8>)> = db
435
+
.run(move |conn| {
436
+
RepoBlockSchema::repo_block
437
+
.filter(RepoBlockSchema::did.eq(did))
438
+
.filter(RepoBlockSchema::repoRev.eq(rev))
439
+
.select((RepoBlockSchema::cid, RepoBlockSchema::content))
440
+
.limit(15)
441
+
.get_results::<(String, Vec<u8>)>(conn)
442
+
})
443
+
.await?;
444
+
for row in res {
445
+
let mut cache_guard = self.cache.write().await;
446
+
cache_guard.set(Cid::from_str(&row.0)?, row.1)
447
+
}
448
+
Ok(())
449
+
}
570
450
571
-
Ok(cid)
451
+
pub async fn delete_many(&self, cids: Vec<Cid>) -> Result<()> {
452
+
if cids.is_empty() {
453
+
return Ok(());
572
454
}
455
+
let did: String = self.did.clone();
456
+
let db: ActorDb = self.db.clone();
457
+
use rsky_pds::schema::pds::repo_block::dsl as RepoBlockSchema;
458
+
459
+
let cid_strings: Vec<String> = cids.into_iter().map(|c| c.to_string()).collect();
460
+
db.run(move |conn| {
461
+
delete(RepoBlockSchema::repo_block)
462
+
.filter(RepoBlockSchema::did.eq(did))
463
+
.filter(RepoBlockSchema::cid.eq_any(cid_strings))
464
+
.execute(conn)
465
+
})
466
+
.await?;
467
+
Ok(())
468
+
}
469
+
470
+
pub async fn get_root_detailed(&self) -> Result<CidAndRev> {
471
+
let did: String = self.did.clone();
472
+
let db: ActorDb = self.db.clone();
473
+
use rsky_pds::schema::pds::repo_root::dsl as RepoRootSchema;
474
+
475
+
let res = db
476
+
.run(move |conn| {
477
+
RepoRootSchema::repo_root
478
+
.filter(RepoRootSchema::did.eq(did))
479
+
.select(models::RepoRoot::as_select())
480
+
.first(conn)
481
+
})
482
+
.await?;
483
+
484
+
Ok(CidAndRev {
485
+
cid: Cid::from_str(&res.cid)?,
486
+
rev: res.rev,
487
+
})
573
488
}
574
489
}
+7
-8
src/db/mod.rs
+7
-8
src/db/mod.rs
···
1
1
use anyhow::{Context, Result};
2
2
use diesel::connection::SimpleConnection;
3
-
use diesel::prelude::*;
4
-
use diesel::r2d2::{self, ConnectionManager, Pool, PooledConnection};
5
-
use diesel::sqlite::{Sqlite, SqliteConnection};
3
+
use diesel::r2d2::{ConnectionManager, Pool, PooledConnection};
4
+
use diesel::sqlite::Sqlite;
5
+
use diesel::*;
6
6
use diesel_migrations::{EmbeddedMigrations, MigrationHarness, embed_migrations};
7
7
use std::path::Path;
8
-
use std::sync::Arc;
9
8
use std::time::Duration;
10
9
11
10
pub const MIGRATIONS: EmbeddedMigrations = embed_migrations!("migrations");
···
40
39
let manager = ConnectionManager::<SqliteConnection>::new(database_url);
41
40
42
41
// Create the connection pool with SQLite-specific configurations
43
-
let pool = r2d2::Pool::builder()
42
+
let pool = Pool::builder()
44
43
.max_size(10)
45
44
.connection_timeout(Duration::from_secs(30))
46
45
.test_on_check_out(true)
···
118
117
119
118
Err(anyhow::anyhow!(
120
119
"Max retries exceeded: {}",
121
-
last_error.unwrap_or_else(|| diesel::result::Error::RollbackTransaction)
120
+
last_error.unwrap_or_else(|| result::Error::RollbackTransaction)
122
121
))
123
122
}
124
123
···
136
135
T: Send + 'static,
137
136
{
138
137
self.run(|conn| {
139
-
conn.transaction(|tx| f(tx).map_err(|e| diesel::result::Error::RollbackTransaction))
138
+
conn.transaction(|tx| f(tx).map_err(|e| result::Error::RollbackTransaction))
140
139
})
141
140
.await
142
141
}
···
144
143
/// Run a transaction with no retry logic
145
144
pub async fn transaction_no_retry<T, F>(&self, f: F) -> Result<T>
146
145
where
147
-
F: FnOnce(&mut SqliteConnection) -> std::result::Result<T, diesel::result::Error> + Send,
146
+
F: FnOnce(&mut SqliteConnection) -> std::result::Result<T, result::Error> + Send,
148
147
T: Send + 'static,
149
148
{
150
149
let mut conn = self