Alternative ATProto PDS implementation

actor_store prototyping

+55 -8
Cargo.lock
··· 1183 1183 checksum = "9425c3bf7089c983facbae04de54513cce73b41c7f9ff8c845b54e7bc64ebbfb" 1184 1184 1185 1185 [[package]] 1186 + name = "bitcoin-io" 1187 + version = "0.1.3" 1188 + source = "registry+https://github.com/rust-lang/crates.io-index" 1189 + checksum = "0b47c4ab7a93edb0c7198c5535ed9b52b63095f4e9b45279c6736cec4b856baf" 1190 + 1191 + [[package]] 1186 1192 name = "bitcoin_hashes" 1187 1193 version = "0.13.0" 1188 1194 source = "registry+https://github.com/rust-lang/crates.io-index" 1189 1195 checksum = "1930a4dabfebb8d7d9992db18ebe3ae2876f0a305fab206fd168df931ede293b" 1190 1196 dependencies = [ 1191 1197 "bitcoin-internals", 1192 - "hex-conservative", 1198 + "hex-conservative 0.1.2", 1199 + ] 1200 + 1201 + [[package]] 1202 + name = "bitcoin_hashes" 1203 + version = "0.14.0" 1204 + source = "registry+https://github.com/rust-lang/crates.io-index" 1205 + checksum = "bb18c03d0db0247e147a21a6faafd5a7eb851c743db062de72018b6b7e8e4d16" 1206 + dependencies = [ 1207 + "bitcoin-io", 1208 + "hex-conservative 0.2.1", 1193 1209 ] 1194 1210 1195 1211 [[package]] ··· 1317 1333 "reqwest 0.12.15", 1318 1334 "reqwest-middleware", 1319 1335 "rsky-common", 1336 + "rsky-lexicon", 1320 1337 "rsky-pds", 1321 1338 "rsky-repo", 1322 1339 "rsky-syntax", 1340 + "secp256k1 0.31.0", 1323 1341 "serde", 1324 1342 "serde_bytes", 1325 1343 "serde_ipld_dagcbor", ··· 2693 2711 version = "0.1.2" 2694 2712 source = "registry+https://github.com/rust-lang/crates.io-index" 2695 2713 checksum = "212ab92002354b4819390025006c897e8140934349e8635c9b077f47b4dcbd20" 2714 + 2715 + [[package]] 2716 + name = "hex-conservative" 2717 + version = "0.2.1" 2718 + source = "registry+https://github.com/rust-lang/crates.io-index" 2719 + checksum = "5313b072ce3c597065a808dbf612c4c8e8590bdbf8b579508bf7a762c5eae6cd" 2720 + dependencies = [ 2721 + "arrayvec", 2722 + ] 2696 2723 2697 2724 [[package]] 2698 2725 name = "hickory-proto" ··· 5264 5291 "rand_core 0.6.4", 5265 5292 "regex", 5266 5293 "rsky-identity", 5267 - "secp256k1", 5294 + "secp256k1 0.28.2", 5268 5295 "serde", 5269 5296 "serde_ipld_dagcbor", 5270 5297 "serde_json", ··· 5283 5310 "anyhow", 5284 5311 "multibase", 5285 5312 "p256 0.13.2", 5286 - "secp256k1", 5313 + "secp256k1 0.28.2", 5287 5314 "unsigned-varint 0.8.0", 5288 5315 ] 5289 5316 ··· 5315 5342 "libipld", 5316 5343 "miette", 5317 5344 "parking_lot", 5318 - "secp256k1", 5345 + "secp256k1 0.28.2", 5319 5346 "serde", 5320 5347 "serde_bytes", 5321 5348 "serde_cbor", ··· 5370 5397 "rsky-lexicon", 5371 5398 "rsky-repo", 5372 5399 "rsky-syntax", 5373 - "secp256k1", 5400 + "secp256k1 0.28.2", 5374 5401 "serde", 5375 5402 "serde_bytes", 5376 5403 "serde_cbor", ··· 5409 5436 "rsky-crypto", 5410 5437 "rsky-lexicon", 5411 5438 "rsky-syntax", 5412 - "secp256k1", 5439 + "secp256k1 0.28.2", 5413 5440 "serde", 5414 5441 "serde_bytes", 5415 5442 "serde_cbor", ··· 5665 5692 source = "registry+https://github.com/rust-lang/crates.io-index" 5666 5693 checksum = "d24b59d129cdadea20aea4fb2352fa053712e5d713eee47d700cd4b2bc002f10" 5667 5694 dependencies = [ 5668 - "bitcoin_hashes", 5695 + "bitcoin_hashes 0.13.0", 5669 5696 "rand 0.8.5", 5670 - "secp256k1-sys", 5697 + "secp256k1-sys 0.9.2", 5671 5698 "serde", 5672 5699 ] 5673 5700 5674 5701 [[package]] 5702 + name = "secp256k1" 5703 + version = "0.31.0" 5704 + source = "registry+https://github.com/rust-lang/crates.io-index" 5705 + checksum = "6a3dff2d01c9aa65c3186a45ff846bfea52cbe6de3b6320ed2a358d90dad0d76" 5706 + dependencies = [ 5707 + "bitcoin_hashes 0.14.0", 5708 + "rand 0.9.1", 5709 + "secp256k1-sys 0.11.0", 5710 + ] 5711 + 5712 + [[package]] 5675 5713 name = "secp256k1-sys" 5676 5714 version = "0.9.2" 5677 5715 source = "registry+https://github.com/rust-lang/crates.io-index" 5678 5716 checksum = "e5d1746aae42c19d583c3c1a8c646bfad910498e2051c551a7f2e3c0c9fbb7eb" 5717 + dependencies = [ 5718 + "cc", 5719 + ] 5720 + 5721 + [[package]] 5722 + name = "secp256k1-sys" 5723 + version = "0.11.0" 5724 + source = "registry+https://github.com/rust-lang/crates.io-index" 5725 + checksum = "dcb913707158fadaf0d8702c2db0e857de66eb003ccfdda5924b5f5ac98efb38" 5679 5726 dependencies = [ 5680 5727 "cc", 5681 5728 ]
+2
Cargo.toml
··· 145 145 rsky-repo = { git = "https://github.com/blacksky-algorithms/rsky.git" } 146 146 rsky-pds = { git = "https://github.com/blacksky-algorithms/rsky.git" } 147 147 rsky-common = { git = "https://github.com/blacksky-algorithms/rsky.git" } 148 + rsky-lexicon = { git = "https://github.com/blacksky-algorithms/rsky.git" } 148 149 149 150 # async in streams 150 151 # async-stream = "0.3" ··· 249 250 urlencoding = "2.1.3" 250 251 async-trait = "0.1.88" 251 252 lazy_static = "1.5.0" 253 + secp256k1 = "0.31.0"
+424 -316
src/actor_store/actor_store.rs
··· 1 - use std::path::PathBuf; 1 + //! Based on https://github.com/blacksky-algorithms/rsky/blob/main/rsky-pds/src/actor_store/mod.rs 2 + //! Which is based on https://github.com/bluesky-social/atproto/blob/main/packages/repo/src/repo.ts 3 + //! and also adds components from https://github.com/bluesky-social/atproto/blob/main/packages/pds/src/actor-store/repo/transactor.ts 4 + //! blacksky-algorithms/rsky is licensed under the Apache License 2.0 5 + //! 6 + //! Modified for SQLite backend 7 + 8 + use anyhow::Result; 9 + use cidv10::Cid; 10 + use diesel::*; 11 + use futures::stream::{self, StreamExt}; 12 + use rsky_common; 13 + use rsky_pds::actor_store::repo::types::SyncEvtData; 14 + use rsky_repo::repo::Repo; 15 + use rsky_repo::storage::readable_blockstore::ReadableBlockstore; 16 + use rsky_repo::storage::types::RepoStorage; 17 + use rsky_repo::types::{ 18 + CommitAction, CommitData, CommitDataWithOps, CommitOp, PreparedCreateOrUpdate, PreparedWrite, 19 + RecordCreateOrUpdateOp, RecordWriteEnum, RecordWriteOp, WriteOpAction, write_to_op, 20 + }; 21 + use rsky_repo::util::format_data_key; 22 + use rsky_syntax::aturi::AtUri; 23 + use secp256k1::{Keypair, Secp256k1, SecretKey}; 24 + use std::env; 25 + use std::fmt; 2 26 use std::str::FromStr; 3 27 use std::sync::Arc; 28 + use tokio::sync::RwLock; 4 29 5 - use anyhow::{Context as _, Result, anyhow, bail}; 6 - use atrium_crypto::keypair::{Did as _, Export as _, Secp256k1Keypair}; 7 - use atrium_repo::Cid; 8 - use diesel::prelude::*; 9 - use sha2::Digest as _; 10 - use tokio::fs; 30 + use super::ActorDb; 31 + use super::blob::BlobReader; 32 + use super::preference::PreferenceReader; 33 + use super::record::RecordReader; 34 + use super::sql_blob::BlobStoreSql; 35 + use super::sql_repo::SqlRepoReader; 11 36 12 - use super::PreparedWrite; 13 - use super::actor_store_handler::ActorStoreHandler; 14 - use super::actor_store_resources::ActorStoreResources; 15 - use super::blob::{BlobStore as _, BlobStorePlaceholder}; 16 - use super::db::{ActorDb, get_db}; 17 - use crate::SigningKey; 18 - 19 - /// Central manager for actor stores 20 - pub(crate) struct ActorStore { 21 - /// Base directory for actor data 22 - pub directory: PathBuf, 23 - /// Resources shared between actor stores 24 - pub resources: ActorStoreResources, 37 + #[derive(Debug)] 38 + enum FormatCommitError { 39 + BadRecordSwap(String), 40 + RecordSwapMismatch(String), 41 + BadCommitSwap(String), 42 + MissingRepoRoot(String), 25 43 } 26 44 27 - struct ActorLocation { 28 - /// Actor's directory path 29 - directory: PathBuf, 30 - /// Database path 31 - db_location: PathBuf, 32 - /// Key path 33 - key_location: PathBuf, 45 + pub struct ActorStore { 46 + pub did: String, 47 + pub storage: Arc<RwLock<SqlRepoReader>>, // get ipld blocks from db 48 + pub record: RecordReader, // get lexicon records from db 49 + pub blob: BlobReader, // get blobs 50 + pub pref: PreferenceReader, // get preferences 34 51 } 35 52 53 + // Combination of RepoReader/Transactor, BlobReader/Transactor, SqlRepoReader/Transactor 36 54 impl ActorStore { 37 - /// Create a new actor store manager 38 - pub(crate) fn new(directory: impl Into<PathBuf>, resources: ActorStoreResources) -> Self { 39 - Self { 40 - directory: directory.into(), 41 - resources, 55 + /// Concrete reader of an individual repo (hence BlobStoreSql which takes `did` param) 56 + pub fn new(did: String, blobstore: BlobStoreSql, db: ActorDb) -> Self { 57 + let db = Arc::new(db); 58 + ActorStore { 59 + storage: Arc::new(RwLock::new(SqlRepoReader::new( 60 + did.clone(), 61 + None, 62 + db.clone(), 63 + ))), 64 + record: RecordReader::new(did.clone(), db.clone()), 65 + pref: PreferenceReader::new(did.clone(), db.clone()), 66 + did, 67 + blob: BlobReader::new(blobstore, db.clone()), // Unlike TS impl, just use blob reader vs generator 42 68 } 43 69 } 44 70 45 - /// Get the location information for an actor 46 - pub(crate) async fn get_location(&self, did: &str) -> Result<ActorLocation> { 47 - // Hash the DID for directory organization 48 - let did_hash = sha2::Sha256::digest(did.as_bytes()); 49 - let hash_prefix = format!("{:02x}", did_hash[0]); 50 - 51 - // Create paths 52 - let directory = self.directory.join(hash_prefix).join(did); 53 - let db_location = directory.join("store.sqlite"); 54 - let key_location = directory.join("key"); 55 - 56 - Ok(ActorLocation { 57 - directory, 58 - db_location, 59 - key_location, 60 - }) 71 + pub async fn get_repo_root(&self) -> Option<Cid> { 72 + let storage_guard = self.storage.read().await; 73 + storage_guard.get_root().await 61 74 } 62 75 63 - /// Check if an actor store exists 64 - pub(crate) async fn exists(&self, did: &str) -> Result<bool> { 65 - let location = self.get_location(did).await?; 66 - Ok(location.db_location.exists()) 67 - } 68 - 69 - /// Get the signing keypair for an actor 70 - pub(crate) async fn keypair(&self, did: &str) -> Result<Arc<SigningKey>> { 71 - let location = self.get_location(did).await?; 72 - let priv_key = fs::read(&location.key_location) 73 - .await 74 - .context("Failed to read key file")?; 75 - 76 - let keypair = SigningKey::import(&priv_key).context("Failed to import signing key")?; 77 - 78 - Ok(Arc::new(keypair)) 79 - } 80 - 81 - /// Open the database for an actor 82 - pub(crate) async fn open_db(&self, did: &str) -> Result<ActorDb> { 83 - let location = self.get_location(did).await?; 84 - 85 - if !location.db_location.exists() { 86 - bail!("Repo not found"); 87 - } 88 - 89 - // Convert path to string for SQLite connection 90 - let db_path = location 91 - .db_location 92 - .to_str() 93 - .ok_or_else(|| anyhow!("Invalid path encoding"))?; 94 - 95 - // Open database with WAL mode enabled 96 - let db = get_db(db_path, false) 97 - .await 98 - .context("Failed to open actor database")?; 99 - 100 - // Run a simple query to ensure the database is ready 101 - db.run(|conn| diesel::sql_query("SELECT 1 FROM repo_root LIMIT 1").execute(conn)) 102 - .await 103 - .context("Database not ready")?; 104 - 105 - Ok(db) 106 - } 107 - 108 - /// Execute read operations on an actor store 109 - pub(crate) async fn read<T, F>(&self, did: &str, f: F) -> Result<T> 110 - where 111 - F: FnOnce(ActorStoreHandler) -> Result<T>, 112 - { 113 - let db = self.open_db(did).await?; 114 - let blobstore = self.resources.blobstore(did.to_string()); 115 - 116 - // Create a read-only handler 117 - let handler = ActorStoreHandler::new_reader(db.clone(), did.to_string(), blobstore); 118 - 119 - // Execute the function 120 - f(handler) 121 - } 76 + // Transactors 77 + // ------------------- 122 78 123 - /// Execute read-write operations with a transaction 124 - pub(crate) async fn transact<T, F>(&self, did: &str, f: F) -> Result<T> 125 - where 126 - F: FnOnce(ActorStoreHandler) -> Result<T>, 127 - { 128 - let db = self.open_db(did).await?; 129 - let keypair = self.keypair(did).await?; 130 - let blobstore = self.resources.blobstore(did.to_string()); 131 - let background_queue = self.resources.background_queue(); 132 - 133 - // Create a read-write handler with transaction support 134 - let handler = ActorStoreHandler::new_writer( 135 - db, 136 - did.to_string(), 137 - blobstore, 79 + #[deprecated] 80 + pub async fn create_repo_legacy( 81 + &self, 82 + keypair: Keypair, 83 + writes: Vec<PreparedCreateOrUpdate>, 84 + ) -> Result<CommitData> { 85 + let write_ops = writes 86 + .clone() 87 + .into_iter() 88 + .map(|prepare| { 89 + let at_uri: AtUri = prepare.uri.try_into()?; 90 + Ok(RecordCreateOrUpdateOp { 91 + action: WriteOpAction::Create, 92 + collection: at_uri.get_collection(), 93 + rkey: at_uri.get_rkey(), 94 + record: prepare.record, 95 + }) 96 + }) 97 + .collect::<Result<Vec<RecordCreateOrUpdateOp>>>()?; 98 + let commit = Repo::format_init_commit( 99 + self.storage.clone(), 100 + self.did.clone(), 138 101 keypair, 139 - background_queue.as_ref().clone(), 140 - ); 141 - 142 - // Execute the function (will handle transactions internally) 143 - f(handler) 102 + Some(write_ops), 103 + ) 104 + .await?; 105 + let storage_guard = self.storage.read().await; 106 + storage_guard.apply_commit(commit.clone(), None).await?; 107 + let writes = writes 108 + .into_iter() 109 + .map(PreparedWrite::Create) 110 + .collect::<Vec<PreparedWrite>>(); 111 + self.blob.process_write_blobs(writes).await?; 112 + Ok(commit) 144 113 } 145 114 146 - /// Execute read-write operations without a transaction 147 - pub(crate) async fn write_no_transaction<T, F>(&self, did: &str, f: F) -> Result<T> 148 - where 149 - F: FnOnce(ActorStoreHandler) -> Result<T>, 150 - { 151 - let db = self.open_db(did).await?; 152 - let keypair = self.keypair(did).await?; 153 - let blobstore = self.resources.blobstore(did.to_string()); 154 - let background_queue = self.resources.background_queue(); 155 - 156 - // Create a read-write handler without automatic transaction 157 - let handler = ActorStoreHandler::new_writer( 158 - db, 159 - did.to_string(), 160 - blobstore, 115 + pub async fn create_repo( 116 + &self, 117 + keypair: Keypair, 118 + writes: Vec<PreparedCreateOrUpdate>, 119 + ) -> Result<CommitDataWithOps> { 120 + let write_ops = writes 121 + .clone() 122 + .into_iter() 123 + .map(|prepare| { 124 + let at_uri: AtUri = prepare.uri.try_into()?; 125 + Ok(RecordCreateOrUpdateOp { 126 + action: WriteOpAction::Create, 127 + collection: at_uri.get_collection(), 128 + rkey: at_uri.get_rkey(), 129 + record: prepare.record, 130 + }) 131 + }) 132 + .collect::<Result<Vec<RecordCreateOrUpdateOp>>>()?; 133 + let commit = Repo::format_init_commit( 134 + self.storage.clone(), 135 + self.did.clone(), 161 136 keypair, 162 - background_queue.as_ref().clone(), 163 - ); 164 - 165 - // Execute the function 166 - f(handler) 137 + Some(write_ops), 138 + ) 139 + .await?; 140 + let storage_guard = self.storage.read().await; 141 + storage_guard.apply_commit(commit.clone(), None).await?; 142 + let write_commit_ops = writes.iter().try_fold( 143 + Vec::with_capacity(writes.len()), 144 + |mut acc, w| -> Result<Vec<CommitOp>> { 145 + let aturi: AtUri = w.uri.clone().try_into()?; 146 + acc.push(CommitOp { 147 + action: CommitAction::Create, 148 + path: format_data_key(aturi.get_collection(), aturi.get_rkey()), 149 + cid: Some(w.cid.clone()), 150 + prev: None, 151 + }); 152 + Ok(acc) 153 + }, 154 + )?; 155 + let writes = writes 156 + .into_iter() 157 + .map(PreparedWrite::Create) 158 + .collect::<Vec<PreparedWrite>>(); 159 + self.blob.process_write_blobs(writes).await?; 160 + Ok(CommitDataWithOps { 161 + commit_data: commit, 162 + ops: write_commit_ops, 163 + prev_data: None, 164 + }) 167 165 } 168 166 169 - /// Create a new actor store 170 - pub(crate) async fn create(&self, did: &str, keypair: SigningKey) -> Result<()> { 171 - let location = self.get_location(did).await?; 172 - 173 - // Ensure directory exists 174 - fs::create_dir_all(&location.directory) 175 - .await 176 - .context("Failed to create directory")?; 177 - 178 - // Check if repo already exists 179 - if location.db_location.exists() { 180 - bail!("Repo already exists"); 167 + pub async fn process_import_repo( 168 + &mut self, 169 + commit: CommitData, 170 + writes: Vec<PreparedWrite>, 171 + ) -> Result<()> { 172 + { 173 + let immutable_borrow = &self; 174 + // & send to indexing 175 + immutable_borrow 176 + .index_writes(writes.clone(), &commit.rev) 177 + .await?; 181 178 } 182 - 183 - // Export and save private key 184 - let priv_key = keypair.export(); 185 - fs::write(&location.key_location, priv_key) 186 - .await 187 - .context("Failed to write key file")?; 188 - 189 - // Initialize the database 190 - let db_path = location 191 - .db_location 192 - .to_str() 193 - .ok_or_else(|| anyhow!("Invalid path encoding"))?; 194 - 195 - let db = get_db(db_path, false) 196 - .await 197 - .context("Failed to create actor database")?; 198 - 199 - // Ensure WAL mode and run migrations 200 - db.ensure_wal().await?; 201 - db.run_migrations()?; 202 - 179 + // persist the commit to repo storage 180 + let storage_guard = self.storage.read().await; 181 + storage_guard.apply_commit(commit.clone(), None).await?; 182 + // process blobs 183 + self.blob.process_write_blobs(writes).await?; 203 184 Ok(()) 204 185 } 205 186 206 - /// Destroy an actor store 207 - pub(crate) async fn destroy(&self, did: &str) -> Result<()> { 208 - // Get all blob CIDs first 209 - let cids = self 210 - .read(did, |handler| async move { 211 - handler.repo.blob.get_blob_cids().await 212 - }) 213 - .await?; 214 - 215 - // Delete all blobs 216 - let blobstore = self.resources.blobstore(did.to_string()); 217 - if !cids.is_empty() { 218 - // Process in chunks of 500 219 - for chunk in cids.chunks(500) { 220 - let _ = blobstore.delete_many(chunk.to_vec()).await; 221 - } 222 - } 223 - 224 - // Remove directory and all files 225 - let location = self.get_location(did).await?; 226 - if location.directory.exists() { 227 - fs::remove_dir_all(&location.directory) 228 - .await 229 - .context("Failed to remove actor directory")?; 187 + pub async fn process_writes( 188 + &mut self, 189 + writes: Vec<PreparedWrite>, 190 + swap_commit_cid: Option<Cid>, 191 + ) -> Result<CommitDataWithOps> { 192 + // NOTE: In the typescript PR on sync v1.1 193 + // there are some safeguards added for adding 194 + // very large commits and very many commits 195 + // for which I'm sure we could safeguard on 196 + // but may not be necessary. 197 + // https://github.com/bluesky-social/atproto/pull/3585/files#diff-7627844a4a6b50190014e947d1331a96df3c64d4c5273fa0ce544f85c3c1265f 198 + let commit = self.format_commit(writes.clone(), swap_commit_cid).await?; 199 + { 200 + let immutable_borrow = &self; 201 + // & send to indexing 202 + immutable_borrow 203 + .index_writes(writes.clone(), &commit.commit_data.rev) 204 + .await?; 230 205 } 206 + // persist the commit to repo storage 207 + let storage_guard = self.storage.read().await; 208 + storage_guard 209 + .apply_commit(commit.commit_data.clone(), None) 210 + .await?; 211 + // process blobs 212 + self.blob.process_write_blobs(writes).await?; 213 + Ok(commit) 214 + } 231 215 232 - Ok(()) 216 + pub async fn get_sync_event_data(&mut self) -> Result<SyncEvtData> { 217 + let storage_guard = self.storage.read().await; 218 + let current_root = storage_guard.get_root_detailed().await?; 219 + let blocks_and_missing = storage_guard.get_blocks(vec![current_root.cid]).await?; 220 + Ok(SyncEvtData { 221 + cid: current_root.cid, 222 + rev: current_root.rev, 223 + blocks: blocks_and_missing.blocks, 224 + }) 233 225 } 234 226 235 - /// Reserve a keypair for future use 236 - pub(crate) async fn reserve_keypair(&self, did: Option<&str>) -> Result<String> { 237 - let reserved_dir = self 238 - .resources 239 - .reserved_key_dir() 240 - .ok_or_else(|| anyhow!("No reserved key directory configured"))?; 241 - 242 - // If DID is provided, check if key already exists 243 - let mut key_path = None; 244 - if let Some(did_str) = did { 245 - assert_safe_path_part(did_str)?; 246 - key_path = Some(reserved_dir.join(did_str)); 247 - 248 - if key_path.as_ref().unwrap().exists() { 249 - let key_data = fs::read(key_path.as_ref().unwrap()).await?; 250 - let keypair = Secp256k1Keypair::import(&key_data) 251 - .context("Failed to import existing reserved key")?; 252 - return Ok(keypair.did()); 227 + pub async fn format_commit( 228 + &mut self, 229 + writes: Vec<PreparedWrite>, 230 + swap_commit: Option<Cid>, 231 + ) -> Result<CommitDataWithOps> { 232 + let current_root = { 233 + let storage_guard = self.storage.read().await; 234 + storage_guard.get_root_detailed().await 235 + }; 236 + if let Ok(current_root) = current_root { 237 + if let Some(swap_commit) = swap_commit { 238 + if !current_root.cid.eq(&swap_commit) { 239 + return Err( 240 + FormatCommitError::BadCommitSwap(current_root.cid.to_string()).into(), 241 + ); 242 + } 243 + } 244 + { 245 + let mut storage_guard = self.storage.write().await; 246 + storage_guard.cache_rev(current_root.rev).await?; 247 + } 248 + let mut new_record_cids: Vec<Cid> = vec![]; 249 + let mut delete_and_update_uris = vec![]; 250 + let mut commit_ops = vec![]; 251 + for write in &writes { 252 + let commit_action: CommitAction = write.action().into(); 253 + match write.clone() { 254 + PreparedWrite::Create(c) => new_record_cids.push(c.cid), 255 + PreparedWrite::Update(u) => { 256 + new_record_cids.push(u.cid); 257 + let u_at_uri: AtUri = u.uri.try_into()?; 258 + delete_and_update_uris.push(u_at_uri); 259 + } 260 + PreparedWrite::Delete(d) => { 261 + let d_at_uri: AtUri = d.uri.try_into()?; 262 + delete_and_update_uris.push(d_at_uri) 263 + } 264 + } 265 + if write.swap_cid().is_none() { 266 + continue; 267 + } 268 + let write_at_uri: &AtUri = &write.uri().try_into()?; 269 + let record = self 270 + .record 271 + .get_record(write_at_uri, None, Some(true)) 272 + .await?; 273 + let current_record = match record { 274 + Some(record) => Some(Cid::from_str(&record.cid)?), 275 + None => None, 276 + }; 277 + let cid = match &write { 278 + &PreparedWrite::Delete(_) => None, 279 + &PreparedWrite::Create(w) | &PreparedWrite::Update(w) => Some(w.cid), 280 + }; 281 + let mut op = CommitOp { 282 + action: commit_action, 283 + path: format_data_key(write_at_uri.get_collection(), write_at_uri.get_rkey()), 284 + cid, 285 + prev: None, 286 + }; 287 + if let Some(_) = current_record { 288 + op.prev = current_record; 289 + }; 290 + commit_ops.push(op); 291 + match write { 292 + // There should be no current record for a create 293 + PreparedWrite::Create(_) if write.swap_cid().is_some() => { 294 + Err::<(), anyhow::Error>( 295 + FormatCommitError::BadRecordSwap(format!("{:?}", current_record)) 296 + .into(), 297 + ) 298 + } 299 + // There should be a current record for an update 300 + PreparedWrite::Update(_) if write.swap_cid().is_none() => { 301 + Err::<(), anyhow::Error>( 302 + FormatCommitError::BadRecordSwap(format!("{:?}", current_record)) 303 + .into(), 304 + ) 305 + } 306 + // There should be a current record for a delete 307 + PreparedWrite::Delete(_) if write.swap_cid().is_none() => { 308 + Err::<(), anyhow::Error>( 309 + FormatCommitError::BadRecordSwap(format!("{:?}", current_record)) 310 + .into(), 311 + ) 312 + } 313 + _ => Ok::<(), anyhow::Error>(()), 314 + }?; 315 + match (current_record, write.swap_cid()) { 316 + (Some(current_record), Some(swap_cid)) if current_record.eq(swap_cid) => { 317 + Ok::<(), anyhow::Error>(()) 318 + } 319 + _ => Err::<(), anyhow::Error>( 320 + FormatCommitError::RecordSwapMismatch(format!("{:?}", current_record)) 321 + .into(), 322 + ), 323 + }?; 253 324 } 254 - } 255 - 256 - // Create a new keypair 257 - let keypair = Secp256k1Keypair::create(&mut rand::thread_rng()); 258 - let key_did = keypair.did(); 259 - 260 - // Set path if not already set 261 - let final_path = key_path.unwrap_or_else(|| reserved_dir.join(&key_did)); 262 - 263 - // Ensure directory exists 264 - fs::create_dir_all(reserved_dir).await?; 265 - 266 - // Save key 267 - fs::write(&final_path, keypair.export()).await?; 268 - 269 - Ok(key_did) 270 - } 271 - 272 - /// Get a reserved keypair 273 - pub(crate) async fn get_reserved_keypair( 274 - &self, 275 - key_did: &str, 276 - ) -> Result<Option<Arc<SigningKey>>> { 277 - let reserved_dir = self 278 - .resources 279 - .reserved_key_dir() 280 - .ok_or_else(|| anyhow!("No reserved key directory configured"))?; 281 - 282 - let key_path = reserved_dir.join(key_did); 283 - if !key_path.exists() { 284 - return Ok(None); 285 - } 286 - 287 - let key_data = fs::read(key_path).await?; 288 - let keypair = SigningKey::import(&key_data).context("Failed to import reserved key")?; 289 - 290 - Ok(Some(Arc::new(keypair))) 291 - } 325 + let mut repo = Repo::load(self.storage.clone(), Some(current_root.cid)).await?; 326 + let previous_data = repo.commit.data; 327 + let write_ops: Vec<RecordWriteOp> = writes 328 + .into_iter() 329 + .map(write_to_op) 330 + .collect::<Result<Vec<RecordWriteOp>>>()?; 331 + // @TODO: Use repo signing key global config 332 + let secp = Secp256k1::new(); 333 + let repo_private_key = env::var("PDS_REPO_SIGNING_KEY_K256_PRIVATE_KEY_HEX").unwrap(); 334 + let repo_secret_key = 335 + SecretKey::from_slice(&hex::decode(repo_private_key.as_bytes()).unwrap()).unwrap(); 336 + let repo_signing_key = Keypair::from_secret_key(&secp, &repo_secret_key); 292 337 293 - /// Clear a reserved keypair 294 - pub(crate) async fn clear_reserved_keypair( 295 - &self, 296 - key_did: &str, 297 - did: Option<&str>, 298 - ) -> Result<()> { 299 - let reserved_dir = self 300 - .resources 301 - .reserved_key_dir() 302 - .ok_or_else(|| anyhow!("No reserved key directory configured"))?; 338 + let mut commit = repo 339 + .format_commit(RecordWriteEnum::List(write_ops), repo_signing_key) 340 + .await?; 303 341 304 - // Remove key by DID 305 - let key_path = reserved_dir.join(key_did); 306 - if key_path.exists() { 307 - fs::remove_file(key_path).await?; 308 - } 342 + // find blocks that would be deleted but are referenced by another record 343 + let duplicate_record_cids = self 344 + .get_duplicate_record_cids(commit.removed_cids.to_list(), delete_and_update_uris) 345 + .await?; 346 + for cid in duplicate_record_cids { 347 + commit.removed_cids.delete(cid) 348 + } 309 349 310 - // If DID mapping provided, remove that too 311 - if let Some(did_str) = did { 312 - let did_path = reserved_dir.join(did_str); 313 - if did_path.exists() { 314 - fs::remove_file(did_path).await?; 350 + // find blocks that are relevant to ops but not included in diff 351 + // (for instance a record that was moved but cid stayed the same) 352 + let new_record_blocks = commit.relevant_blocks.get_many(new_record_cids)?; 353 + if !new_record_blocks.missing.is_empty() { 354 + let missing_blocks = { 355 + let storage_guard = self.storage.read().await; 356 + storage_guard.get_blocks(new_record_blocks.missing).await? 357 + }; 358 + commit.relevant_blocks.add_map(missing_blocks.blocks)?; 315 359 } 360 + let commit_with_data_ops = CommitDataWithOps { 361 + ops: commit_ops, 362 + commit_data: commit, 363 + prev_data: Some(previous_data), 364 + }; 365 + Ok(commit_with_data_ops) 366 + } else { 367 + Err(FormatCommitError::MissingRepoRoot(self.did.clone()).into()) 316 368 } 317 - 318 - Ok(()) 319 369 } 320 370 321 - /// Store a PLC operation 322 - pub(crate) async fn store_plc_op(&self, did: &str, op: &[u8]) -> Result<()> { 323 - let location = self.get_location(did).await?; 324 - let op_path = location.directory.join("did-op"); 371 + pub async fn index_writes(&self, writes: Vec<PreparedWrite>, rev: &str) -> Result<()> { 372 + let now: &str = &rsky_common::now(); 325 373 326 - fs::write(op_path, op).await?; 374 + let _ = stream::iter(writes) 375 + .then(|write| async move { 376 + Ok::<(), anyhow::Error>(match write { 377 + PreparedWrite::Create(write) => { 378 + let write_at_uri: AtUri = write.uri.try_into()?; 379 + self.record 380 + .index_record( 381 + write_at_uri.clone(), 382 + write.cid, 383 + Some(write.record), 384 + Some(write.action), 385 + rev.to_owned(), 386 + Some(now.to_string()), 387 + ) 388 + .await? 389 + } 390 + PreparedWrite::Update(write) => { 391 + let write_at_uri: AtUri = write.uri.try_into()?; 392 + self.record 393 + .index_record( 394 + write_at_uri.clone(), 395 + write.cid, 396 + Some(write.record), 397 + Some(write.action), 398 + rev.to_owned(), 399 + Some(now.to_string()), 400 + ) 401 + .await? 402 + } 403 + PreparedWrite::Delete(write) => { 404 + let write_at_uri: AtUri = write.uri.try_into()?; 405 + self.record.delete_record(&write_at_uri).await? 406 + } 407 + }) 408 + }) 409 + .collect::<Vec<_>>() 410 + .await 411 + .into_iter() 412 + .collect::<Result<Vec<_>, _>>()?; 327 413 Ok(()) 328 414 } 329 415 330 - /// Get a stored PLC operation 331 - pub(crate) async fn get_plc_op(&self, did: &str) -> Result<Vec<u8>> { 332 - let location = self.get_location(did).await?; 333 - let op_path = location.directory.join("did-op"); 334 - 335 - let data = fs::read(op_path).await?; 336 - Ok(data) 337 - } 338 - 339 - /// Clear a stored PLC operation 340 - pub(crate) async fn clear_plc_op(&self, did: &str) -> Result<()> { 341 - let location = self.get_location(did).await?; 342 - let op_path = location.directory.join("did-op"); 416 + pub async fn destroy(&mut self) -> Result<()> { 417 + let did: String = self.did.clone(); 418 + let storage_guard = self.storage.read().await; 419 + let db: Arc<ActorDb> = storage_guard.db.clone(); 420 + use rsky_pds::schema::pds::blob::dsl as BlobSchema; 343 421 344 - if op_path.exists() { 345 - fs::remove_file(op_path).await?; 346 - } 347 - 422 + let blob_rows: Vec<String> = db 423 + .run(move |conn| { 424 + BlobSchema::blob 425 + .filter(BlobSchema::did.eq(did)) 426 + .select(BlobSchema::cid) 427 + .get_results(conn) 428 + }) 429 + .await?; 430 + let cids = blob_rows 431 + .into_iter() 432 + .map(|row| Ok(Cid::from_str(&row)?)) 433 + .collect::<Result<Vec<Cid>>>()?; 434 + let _ = stream::iter(cids.chunks(500)) 435 + .then(|chunk| async { self.blob.blobstore.delete_many(chunk.to_vec()).await }) 436 + .collect::<Vec<_>>() 437 + .await 438 + .into_iter() 439 + .collect::<Result<Vec<_>, _>>()?; 348 440 Ok(()) 349 441 } 350 - } 351 442 352 - /// Ensure a path part is safe to use in a filename 353 - fn assert_safe_path_part(part: &str) -> Result<()> { 354 - let normalized = std::path::Path::new(part) 355 - .file_name() 356 - .and_then(|s| s.to_str()) 357 - .ok_or_else(|| anyhow!("Invalid path"))?; 443 + pub async fn get_duplicate_record_cids( 444 + &self, 445 + cids: Vec<Cid>, 446 + touched_uris: Vec<AtUri>, 447 + ) -> Result<Vec<Cid>> { 448 + if touched_uris.is_empty() || cids.is_empty() { 449 + return Ok(vec![]); 450 + } 451 + let did: String = self.did.clone(); 452 + let storage_guard = self.storage.read().await; 453 + let db: Arc<ActorDb> = storage_guard.db.clone(); 454 + use rsky_pds::schema::pds::record::dsl as RecordSchema; 358 455 359 - if part != normalized || part.starts_with('.') || part.contains('/') || part.contains('\\') { 360 - bail!("Unsafe path part: {}", part); 456 + let cid_strs: Vec<String> = cids.into_iter().map(|c| c.to_string()).collect(); 457 + let touched_uri_strs: Vec<String> = touched_uris.iter().map(|t| t.to_string()).collect(); 458 + let res: Vec<String> = db 459 + .run(move |conn| { 460 + RecordSchema::record 461 + .filter(RecordSchema::did.eq(did)) 462 + .filter(RecordSchema::cid.eq_any(cid_strs)) 463 + .filter(RecordSchema::uri.ne_all(touched_uri_strs)) 464 + .select(RecordSchema::cid) 465 + .get_results(conn) 466 + }) 467 + .await?; 468 + res.into_iter() 469 + .map(|row| Cid::from_str(&row).map_err(|error| anyhow::Error::new(error))) 470 + .collect::<Result<Vec<Cid>>>() 361 471 } 362 - 363 - Ok(()) 364 472 }
-329
src/actor_store/actor_store_handler.rs
··· 1 - use std::path::PathBuf; 2 - use std::sync::Arc; 3 - 4 - use anyhow::{Context as _, Result, anyhow}; 5 - use futures::TryStreamExt; 6 - use rsky_repo::repo::Repo; 7 - use rsky_repo::types::{CommitData, CommitDataWithOps, PreparedWrite as RskyPreparedWrite}; 8 - 9 - use super::PreparedWrite; 10 - use super::blob::{BackgroundQueue, BlobStorePlaceholder}; 11 - use super::db::ActorDb; 12 - use super::preference::PreferenceHandler; 13 - use super::record::RecordHandler; 14 - use super::repo::RepoHandler; 15 - use crate::SigningKey; 16 - 17 - /// Unified handler for actor store operations. 18 - pub(crate) struct ActorStoreHandler { 19 - /// Actor DID 20 - pub did: String, 21 - /// Database connection 22 - pub db: ActorDb, 23 - /// Repository handler 24 - pub repo: RepoHandler, 25 - /// Record handler 26 - pub record: RecordHandler, 27 - /// Preference handler 28 - pub pref: PreferenceHandler, 29 - /// Background queue for async operations 30 - pub background_queue: Option<BackgroundQueue>, 31 - /// Signing keypair (required for write operations) 32 - pub signing_key: Option<Arc<SigningKey>>, 33 - } 34 - 35 - impl ActorStoreHandler { 36 - /// Create a new actor store handler with read-only capabilities 37 - pub(crate) fn new_reader(db: ActorDb, did: String, blobstore: BlobStorePlaceholder) -> Self { 38 - let record = RecordHandler::new(db.clone(), did.clone()); 39 - let pref = PreferenceHandler::new(db.clone(), did.clone()); 40 - let repo = RepoHandler::new_reader(db.clone(), blobstore, did.clone()); 41 - 42 - Self { 43 - did, 44 - db, 45 - repo, 46 - record, 47 - pref, 48 - background_queue: None, 49 - signing_key: None, 50 - } 51 - } 52 - 53 - /// Create a new actor store handler with read/write capabilities 54 - pub(crate) fn new_writer( 55 - db: ActorDb, 56 - did: String, 57 - blobstore: BlobStorePlaceholder, 58 - signing_key: Arc<SigningKey>, 59 - background_queue: BackgroundQueue, 60 - ) -> Self { 61 - let record = RecordHandler::new_with_blobstore(db.clone(), blobstore.clone(), did.clone()); 62 - let pref = PreferenceHandler::new(db.clone(), did.clone()); 63 - let repo = RepoHandler::new_writer( 64 - db.clone(), 65 - blobstore, 66 - did.clone(), 67 - signing_key.clone(), 68 - background_queue.clone(), 69 - ); 70 - 71 - Self { 72 - did, 73 - db, 74 - repo, 75 - record, 76 - pref, 77 - background_queue: Some(background_queue), 78 - signing_key: Some(signing_key), 79 - } 80 - } 81 - 82 - /// Set signing key (needed for write operations) 83 - pub(crate) fn with_signing_key(mut self, signing_key: Arc<SigningKey>) -> Self { 84 - self.signing_key = Some(signing_key); 85 - self 86 - } 87 - 88 - /// Set background queue (needed for async operations) 89 - pub(crate) fn with_background_queue(mut self, queue: BackgroundQueue) -> Self { 90 - self.background_queue = Some(queue); 91 - self 92 - } 93 - 94 - // Repository Operations 95 - // -------------------- 96 - 97 - /// Try to load repository 98 - pub(crate) async fn maybe_load_repo(&self) -> Result<Option<Repo>> { 99 - self.repo.maybe_load_repo().await 100 - } 101 - 102 - /// Get repository root CID 103 - pub(crate) async fn get_repo_root(&self) -> Result<Option<atrium_repo::Cid>> { 104 - self.repo.get_repo_root().await 105 - } 106 - 107 - /// Create a new repository with prepared writes 108 - pub(crate) async fn create_repo( 109 - &self, 110 - writes: Vec<PreparedWrite>, 111 - ) -> Result<CommitDataWithOps> { 112 - if self.signing_key.is_none() { 113 - return Err(anyhow!( 114 - "No signing key available for create_repo operation" 115 - )); 116 - } 117 - 118 - let rsky_writes = writes 119 - .into_iter() 120 - .map(|w| RskyPreparedWrite::from(w)) 121 - .collect::<Vec<_>>(); 122 - 123 - self.repo.create_repo(rsky_writes).await 124 - } 125 - 126 - /// Process writes to the repository 127 - pub(crate) async fn process_writes( 128 - &self, 129 - writes: Vec<PreparedWrite>, 130 - swap_commit_cid: Option<atrium_repo::Cid>, 131 - ) -> Result<CommitDataWithOps> { 132 - if self.signing_key.is_none() { 133 - return Err(anyhow!( 134 - "No signing key available for process_writes operation" 135 - )); 136 - } 137 - 138 - let rsky_writes = writes 139 - .into_iter() 140 - .map(|w| RskyPreparedWrite::from(w)) 141 - .collect::<Vec<_>>(); 142 - 143 - self.repo.process_writes(rsky_writes, swap_commit_cid).await 144 - } 145 - 146 - /// Import a repository from external data 147 - pub(crate) async fn process_import_repo( 148 - &self, 149 - commit: CommitData, 150 - writes: Vec<PreparedWrite>, 151 - ) -> Result<()> { 152 - let rsky_writes = writes 153 - .into_iter() 154 - .map(|w| RskyPreparedWrite::from(w)) 155 - .collect::<Vec<_>>(); 156 - 157 - // First index the records 158 - self.repo.index_writes(&rsky_writes, &commit.rev).await?; 159 - 160 - // Then process the commit 161 - self.repo.storage.apply_commit(commit.clone(), None).await?; 162 - 163 - // Finally process any blobs 164 - if let Some(bg_queue) = &self.background_queue { 165 - self.repo 166 - .blob_transactor 167 - .process_write_blobs(&commit.rev, rsky_writes) 168 - .await?; 169 - } else { 170 - return Err(anyhow!( 171 - "Background queue required for process_import_repo operation" 172 - )); 173 - } 174 - 175 - Ok(()) 176 - } 177 - 178 - /// Get sync event data for replication 179 - pub(crate) async fn get_sync_event_data(&self) -> Result<super::repo::SyncEventData> { 180 - self.repo.get_sync_event_data().await 181 - } 182 - 183 - /// Destroy the repository and all associated data 184 - pub(crate) async fn destroy(&self) -> Result<()> { 185 - // Get all blob CIDs 186 - let blob_cids = self.repo.blob.get_blob_cids().await?; 187 - 188 - // Delete all blobs 189 - if !blob_cids.is_empty() { 190 - self.repo 191 - .blob_transactor 192 - .blobstore 193 - .delete_many(blob_cids.clone()) 194 - .await?; 195 - } 196 - 197 - Ok(()) 198 - } 199 - 200 - // Record Operations 201 - // ---------------- 202 - 203 - /// Get a specific record 204 - pub(crate) async fn get_record( 205 - &self, 206 - uri: &rsky_syntax::aturi::AtUri, 207 - cid: Option<&str>, 208 - include_soft_deleted: bool, 209 - ) -> Result<Option<super::record::RecordData>> { 210 - self.record.get_record(uri, cid, include_soft_deleted).await 211 - } 212 - 213 - /// List collections in the repository 214 - pub(crate) async fn list_collections(&self) -> Result<Vec<String>> { 215 - self.record.list_collections().await 216 - } 217 - 218 - /// List records in a collection 219 - pub(crate) async fn list_records_for_collection( 220 - &self, 221 - opts: super::record::ListRecordsOptions, 222 - ) -> Result<Vec<super::record::RecordData>> { 223 - self.record.list_records_for_collection(opts).await 224 - } 225 - 226 - /// Get record count 227 - pub(crate) async fn record_count(&self) -> Result<i64> { 228 - self.record.record_count().await 229 - } 230 - 231 - /// Update record takedown status 232 - pub(crate) async fn update_record_takedown_status( 233 - &self, 234 - uri: &rsky_syntax::aturi::AtUri, 235 - takedown: atrium_api::com::atproto::admin::defs::StatusAttr, 236 - ) -> Result<()> { 237 - self.record 238 - .update_record_takedown_status(uri, takedown) 239 - .await 240 - } 241 - 242 - // Preference Operations 243 - // ------------------- 244 - 245 - /// Get preferences for a namespace 246 - pub(crate) async fn get_preferences( 247 - &self, 248 - namespace: Option<&str>, 249 - scope: &str, 250 - ) -> Result<Vec<super::preference::AccountPreference>> { 251 - self.pref.get_preferences(namespace, scope).await 252 - } 253 - 254 - /// Put preferences for a namespace 255 - pub(crate) async fn put_preferences( 256 - &self, 257 - values: Vec<super::preference::AccountPreference>, 258 - namespace: &str, 259 - scope: &str, 260 - ) -> Result<()> { 261 - self.pref.put_preferences(values, namespace, scope).await 262 - } 263 - 264 - // Blob Operations 265 - // -------------- 266 - 267 - /// Get blob metadata 268 - pub(crate) async fn get_blob_metadata( 269 - &self, 270 - cid: &atrium_repo::Cid, 271 - ) -> Result<super::blob::BlobMetadata> { 272 - self.repo.blob.get_blob_metadata(cid).await 273 - } 274 - 275 - /// Get blob data 276 - pub(crate) async fn get_blob(&self, cid: &atrium_repo::Cid) -> Result<super::blob::BlobData> { 277 - self.repo.blob.get_blob(cid).await 278 - } 279 - 280 - /// Update blob takedown status 281 - pub(crate) async fn update_blob_takedown_status( 282 - &self, 283 - cid: atrium_repo::Cid, 284 - takedown: atrium_api::com::atproto::admin::defs::StatusAttr, 285 - ) -> Result<()> { 286 - self.repo 287 - .blob 288 - .update_blob_takedown_status(cid, takedown) 289 - .await 290 - } 291 - 292 - /// Upload blob and get metadata 293 - pub(crate) async fn upload_blob_and_get_metadata( 294 - &self, 295 - user_suggested_mime: &str, 296 - blob_bytes: &[u8], 297 - ) -> Result<super::blob::BlobMetadata> { 298 - self.repo 299 - .blob 300 - .upload_blob_and_get_metadata(user_suggested_mime, blob_bytes) 301 - .await 302 - } 303 - 304 - /// Count blobs 305 - pub(crate) async fn blob_count(&self) -> Result<i64> { 306 - self.repo.blob.blob_count().await 307 - } 308 - 309 - // Transaction Support 310 - // ----------------- 311 - 312 - /// Execute a transaction 313 - pub(crate) async fn transaction<T, F>(&self, f: F) -> Result<T> 314 - where 315 - F: FnOnce(&mut diesel::SqliteConnection) -> Result<T> + Send, 316 - T: Send + 'static, 317 - { 318 - self.db.transaction(f).await 319 - } 320 - 321 - /// Execute a database operation with retries 322 - pub(crate) async fn run<F, T>(&self, operation: F) -> Result<T> 323 - where 324 - F: FnOnce(&mut diesel::SqliteConnection) -> diesel::QueryResult<T> + Send, 325 - T: Send + 'static, 326 - { 327 - self.db.run(operation).await 328 - } 329 - }
-48
src/actor_store/actor_store_resources.rs
··· 1 - use std::path::PathBuf; 2 - use std::sync::Arc; 3 - 4 - use super::blob::{BackgroundQueue, BlobStorePlaceholder}; 5 - 6 - pub(crate) struct ActorStoreResources { 7 - // Factory function to create blobstore instances 8 - blobstore_factory: Arc<dyn Fn(String) -> BlobStorePlaceholder + Send + Sync>, 9 - // Shared background queue 10 - background_queue: Arc<BackgroundQueue>, 11 - // Optional directory for reserved keys 12 - reserved_key_dir: Option<PathBuf>, 13 - } 14 - 15 - impl ActorStoreResources { 16 - // Simple constructor with minimal parameters 17 - pub(crate) fn new( 18 - blobstore_factory: impl Fn(String) -> BlobStorePlaceholder + Send + Sync + 'static, 19 - concurrency: usize, 20 - ) -> Self { 21 - Self { 22 - blobstore_factory: Arc::new(blobstore_factory), 23 - background_queue: Arc::new(BackgroundQueue::new(concurrency)), 24 - reserved_key_dir: None, 25 - } 26 - } 27 - 28 - // Set reserved key directory 29 - pub(crate) fn with_reserved_key_dir(mut self, dir: impl Into<PathBuf>) -> Self { 30 - self.reserved_key_dir = Some(dir.into()); 31 - self 32 - } 33 - 34 - // Get a blobstore for a DID 35 - pub(crate) fn blobstore(&self, did: String) -> BlobStorePlaceholder { 36 - (self.blobstore_factory)(did) 37 - } 38 - 39 - // Get the background queue 40 - pub(crate) fn background_queue(&self) -> Arc<BackgroundQueue> { 41 - self.background_queue.clone() 42 - } 43 - 44 - // Get the reserved key directory 45 - pub(crate) fn reserved_key_dir(&self) -> Option<&PathBuf> { 46 - self.reserved_key_dir.as_ref() 47 - } 48 - }
+578
src/actor_store/blob.rs
··· 1 + //! Blob storage and retrieval for the actor store. 2 + //! Based on https://github.com/blacksky-algorithms/rsky/blob/main/rsky-pds/src/actor_store/blob/mod.rs 3 + //! blacksky-algorithms/rsky is licensed under the Apache License 2.0 4 + //! 5 + //! Modified for SQLite backend 6 + 7 + use anyhow::{Result, bail}; 8 + use cidv10::Cid; 9 + use diesel::dsl::{count_distinct, exists, not}; 10 + use diesel::result::Error; 11 + use diesel::sql_types::{Integer, Nullable, Text}; 12 + use diesel::*; 13 + use futures::stream::{self, StreamExt}; 14 + use futures::try_join; 15 + use rsky_pds::actor_store::blob::sha256_stream; 16 + // use rocket::data::{Data, ToByteUnit}; 17 + // use rocket::form::validate::Contains; 18 + use rsky_common::ipld::sha256_raw_to_cid; 19 + use rsky_common::now; 20 + use rsky_lexicon::blob_refs::BlobRef; 21 + use rsky_lexicon::com::atproto::admin::StatusAttr; 22 + use rsky_lexicon::com::atproto::repo::ListMissingBlobsRefRecordBlob; 23 + use rsky_pds::actor_store::blob::{ 24 + BlobMetadata, GetBlobMetadataOutput, GetBlobOutput, ListBlobsOpts, ListMissingBlobsOpts, 25 + verify_blob, 26 + }; 27 + use rsky_pds::image; 28 + use rsky_pds::models::models; 29 + use rsky_repo::error::BlobError; 30 + use rsky_repo::types::{PreparedBlobRef, PreparedWrite}; 31 + use sha2::{Digest, Sha256}; 32 + 33 + use super::ActorDb; 34 + use super::sql_blob::BlobStoreSql; 35 + 36 + pub struct BlobReader { 37 + pub blobstore: BlobStoreSql, 38 + pub did: String, 39 + pub db: ActorDb, 40 + } 41 + 42 + // Basically handles getting blob records from db 43 + impl BlobReader { 44 + pub fn new(blobstore: BlobStoreSql, db: ActorDb) -> Self { 45 + BlobReader { 46 + did: blobstore.bucket.clone(), 47 + blobstore, 48 + db, 49 + } 50 + } 51 + 52 + pub async fn get_blob_metadata(&self, cid: Cid) -> Result<GetBlobMetadataOutput> { 53 + use rsky_pds::schema::pds::blob::dsl as BlobSchema; 54 + 55 + let did = self.did.clone(); 56 + let found = self 57 + .db 58 + .run(move |conn| { 59 + BlobSchema::blob 60 + .filter(BlobSchema::did.eq(did)) 61 + .filter(BlobSchema::cid.eq(cid.to_string())) 62 + .filter(BlobSchema::takedownRef.is_null()) 63 + .select(models::Blob::as_select()) 64 + .first(conn) 65 + .optional() 66 + }) 67 + .await?; 68 + 69 + match found { 70 + None => bail!("Blob not found"), 71 + Some(found) => Ok(GetBlobMetadataOutput { 72 + size: found.size, 73 + mime_type: Some(found.mime_type), 74 + }), 75 + } 76 + } 77 + 78 + pub async fn get_blob(&self, cid: Cid) -> Result<GetBlobOutput> { 79 + let metadata = self.get_blob_metadata(cid).await?; 80 + let blob_stream = match self.blobstore.get_stream(cid).await { 81 + Ok(res) => res, 82 + Err(e) => { 83 + return match e.downcast_ref() { 84 + Some(GetObjectError::NoSuchKey(key)) => { 85 + Err(anyhow::Error::new(GetObjectError::NoSuchKey(key.clone()))) 86 + } 87 + _ => bail!(e.to_string()), 88 + }; 89 + } 90 + }; 91 + Ok(GetBlobOutput { 92 + size: metadata.size, 93 + mime_type: metadata.mime_type, 94 + stream: blob_stream, 95 + }) 96 + } 97 + 98 + pub async fn get_records_for_blob(&self, cid: Cid) -> Result<Vec<String>> { 99 + use rsky_pds::schema::pds::record_blob::dsl as RecordBlobSchema; 100 + 101 + let did = self.did.clone(); 102 + let res = self 103 + .db 104 + .run(move |conn| { 105 + let results = RecordBlobSchema::record_blob 106 + .filter(RecordBlobSchema::blobCid.eq(cid.to_string())) 107 + .filter(RecordBlobSchema::did.eq(did)) 108 + .select(models::RecordBlob::as_select()) 109 + .get_results(conn)?; 110 + Ok::<_, Error>(results.into_iter().map(|row| row.record_uri)) 111 + }) 112 + .await? 113 + .collect::<Vec<String>>(); 114 + 115 + Ok(res) 116 + } 117 + 118 + pub async fn upload_blob_and_get_metadata( 119 + &self, 120 + user_suggested_mime: String, 121 + blob: Data<'_>, 122 + ) -> Result<BlobMetadata> { 123 + let blob_stream = blob.open(100.mebibytes()); 124 + let bytes = blob_stream.into_bytes().await?; 125 + let size = bytes.n.written; 126 + let bytes = bytes.into_inner(); 127 + let (temp_key, sha256, img_info, sniffed_mime) = try_join!( 128 + self.blobstore.put_temp(bytes.clone()), 129 + sha256_stream(bytes.clone()), 130 + image::maybe_get_info(bytes.clone()), 131 + image::mime_type_from_bytes(bytes.clone()) 132 + )?; 133 + 134 + let cid = sha256_raw_to_cid(sha256); 135 + let mime_type = sniffed_mime.unwrap_or(user_suggested_mime); 136 + 137 + Ok(BlobMetadata { 138 + temp_key, 139 + size: size as i64, 140 + cid, 141 + mime_type, 142 + width: if let Some(ref info) = img_info { 143 + Some(info.width as i32) 144 + } else { 145 + None 146 + }, 147 + height: if let Some(info) = img_info { 148 + Some(info.height as i32) 149 + } else { 150 + None 151 + }, 152 + }) 153 + } 154 + 155 + pub async fn track_untethered_blob(&self, metadata: BlobMetadata) -> Result<BlobRef> { 156 + use rsky_pds::schema::pds::blob::dsl as BlobSchema; 157 + 158 + let did = self.did.clone(); 159 + self.db.run(move |conn| { 160 + let BlobMetadata { 161 + temp_key, 162 + size, 163 + cid, 164 + mime_type, 165 + width, 166 + height, 167 + } = metadata; 168 + let created_at = now(); 169 + 170 + let found = BlobSchema::blob 171 + .filter(BlobSchema::did.eq(&did)) 172 + .filter(BlobSchema::cid.eq(&cid.to_string())) 173 + .select(models::Blob::as_select()) 174 + .first(conn) 175 + .optional()?; 176 + 177 + if let Some(found) = found { 178 + if found.takedown_ref.is_some() { 179 + bail!("Blob has been takendown, cannot re-upload") 180 + } 181 + } 182 + 183 + let upsert = sql_query("INSERT INTO pds.blob (cid, did, \"mimeType\", size, \"tempKey\", width, height, \"createdAt\", \"takedownRef\") \ 184 + VALUES \ 185 + ($1, $2, $3, $4, $5, $6, $7, $8, $9) \ 186 + ON CONFLICT (cid, did) DO UPDATE \ 187 + SET \"tempKey\" = EXCLUDED.\"tempKey\" \ 188 + WHERE pds.blob.\"tempKey\" is not null;"); 189 + upsert 190 + .bind::<Text, _>(&cid.to_string()) 191 + .bind::<Text, _>(&did) 192 + .bind::<Text, _>(&mime_type) 193 + .bind::<Integer, _>(size as i32) 194 + .bind::<Nullable<Text>, _>(Some(temp_key.clone())) 195 + .bind::<Nullable<Integer>, _>(width) 196 + .bind::<Nullable<Integer>, _>(height) 197 + .bind::<Text, _>(created_at) 198 + .bind::<Nullable<Text>, _>(None as Option<String>) 199 + .execute(conn)?; 200 + 201 + Ok(BlobRef::new(cid, mime_type, size, None)) 202 + }).await 203 + } 204 + 205 + pub async fn process_write_blobs(&self, writes: Vec<PreparedWrite>) -> Result<()> { 206 + self.delete_dereferenced_blobs(writes.clone()).await?; 207 + let _ = stream::iter(writes) 208 + .then(|write| async move { 209 + Ok::<(), anyhow::Error>(match write { 210 + PreparedWrite::Create(w) => { 211 + for blob in w.blobs { 212 + self.verify_blob_and_make_permanent(blob.clone()).await?; 213 + self.associate_blob(blob, w.uri.clone()).await?; 214 + } 215 + } 216 + PreparedWrite::Update(w) => { 217 + for blob in w.blobs { 218 + self.verify_blob_and_make_permanent(blob.clone()).await?; 219 + self.associate_blob(blob, w.uri.clone()).await?; 220 + } 221 + } 222 + _ => (), 223 + }) 224 + }) 225 + .collect::<Vec<_>>() 226 + .await 227 + .into_iter() 228 + .collect::<Result<Vec<_>, _>>()?; 229 + Ok(()) 230 + } 231 + 232 + pub async fn delete_dereferenced_blobs(&self, writes: Vec<PreparedWrite>) -> Result<()> { 233 + use rsky_pds::schema::pds::blob::dsl as BlobSchema; 234 + use rsky_pds::schema::pds::record_blob::dsl as RecordBlobSchema; 235 + 236 + let uris: Vec<String> = writes 237 + .clone() 238 + .into_iter() 239 + .filter_map(|w| match w { 240 + PreparedWrite::Delete(w) => Some(w.uri), 241 + PreparedWrite::Update(w) => Some(w.uri), 242 + _ => None, 243 + }) 244 + .collect(); 245 + if uris.is_empty() { 246 + return Ok(()); 247 + } 248 + 249 + let deleted_repo_blobs: Vec<models::RecordBlob> = self 250 + .db 251 + .run(move |conn| { 252 + delete(RecordBlobSchema::record_blob) 253 + .filter(RecordBlobSchema::recordUri.eq_any(uris)) 254 + .get_results(conn) 255 + }) 256 + .await? 257 + .into_iter() 258 + .collect::<Vec<models::RecordBlob>>(); 259 + if deleted_repo_blobs.is_empty() { 260 + return Ok(()); 261 + } 262 + 263 + let deleted_repo_blob_cids: Vec<String> = deleted_repo_blobs 264 + .into_iter() 265 + .map(|row| row.blob_cid) 266 + .collect::<Vec<String>>(); 267 + 268 + let x = deleted_repo_blob_cids.clone(); 269 + let mut duplicated_cids: Vec<String> = self 270 + .db 271 + .run(move |conn| { 272 + RecordBlobSchema::record_blob 273 + .select(RecordBlobSchema::blobCid) 274 + .filter(RecordBlobSchema::blobCid.eq_any(&x)) 275 + .load(conn) 276 + }) 277 + .await? 278 + .into_iter() 279 + .collect::<Vec<String>>(); 280 + 281 + let mut new_blob_cids: Vec<String> = writes 282 + .into_iter() 283 + .map(|w| match w { 284 + PreparedWrite::Create(w) => w.blobs, 285 + PreparedWrite::Update(w) => w.blobs, 286 + PreparedWrite::Delete(_) => Vec::new(), 287 + }) 288 + .collect::<Vec<Vec<PreparedBlobRef>>>() 289 + .into_iter() 290 + .flat_map(|v: Vec<PreparedBlobRef>| v.into_iter().map(|b| b.cid.to_string())) 291 + .collect(); 292 + let mut cids_to_keep = Vec::new(); 293 + cids_to_keep.append(&mut new_blob_cids); 294 + cids_to_keep.append(&mut duplicated_cids); 295 + 296 + let cids_to_delete = deleted_repo_blob_cids 297 + .into_iter() 298 + .filter_map(|cid: String| match cids_to_keep.contains(&cid) { 299 + true => Some(cid), 300 + false => None, 301 + }) 302 + .collect::<Vec<String>>(); 303 + if cids_to_delete.is_empty() { 304 + return Ok(()); 305 + } 306 + 307 + let y = cids_to_delete.clone(); 308 + self.db 309 + .run(move |conn| { 310 + delete(BlobSchema::blob) 311 + .filter(BlobSchema::cid.eq_any(&y)) 312 + .execute(conn) 313 + }) 314 + .await?; 315 + 316 + // Original code queues a background job to delete by CID from S3 compatible blobstore 317 + let _ = stream::iter(cids_to_delete) 318 + .then(|cid| async { self.blobstore.delete(cid).await }) 319 + .collect::<Vec<_>>() 320 + .await 321 + .into_iter() 322 + .collect::<Result<Vec<_>, _>>()?; 323 + Ok(()) 324 + } 325 + 326 + pub async fn verify_blob_and_make_permanent(&self, blob: PreparedBlobRef) -> Result<()> { 327 + use rsky_pds::schema::pds::blob::dsl as BlobSchema; 328 + 329 + let found = self 330 + .db 331 + .run(move |conn| { 332 + BlobSchema::blob 333 + .filter( 334 + BlobSchema::cid 335 + .eq(blob.cid.to_string()) 336 + .and(BlobSchema::takedownRef.is_null()), 337 + ) 338 + .select(models::Blob::as_select()) 339 + .first(conn) 340 + .optional() 341 + }) 342 + .await?; 343 + if let Some(found) = found { 344 + verify_blob(&blob, &found).await?; 345 + if let Some(ref temp_key) = found.temp_key { 346 + self.blobstore 347 + .make_permanent(temp_key.clone(), blob.cid) 348 + .await?; 349 + } 350 + self.db 351 + .run(move |conn| { 352 + update(BlobSchema::blob) 353 + .filter(BlobSchema::tempKey.eq(found.temp_key)) 354 + .set(BlobSchema::tempKey.eq::<Option<String>>(None)) 355 + .execute(conn) 356 + }) 357 + .await?; 358 + Ok(()) 359 + } else { 360 + bail!("Cound not find blob: {:?}", blob.cid.to_string()) 361 + } 362 + } 363 + 364 + pub async fn associate_blob(&self, blob: PreparedBlobRef, _record_uri: String) -> Result<()> { 365 + use rsky_pds::schema::pds::record_blob::dsl as RecordBlobSchema; 366 + 367 + let cid = blob.cid.to_string(); 368 + let record_uri = _record_uri; 369 + let did = self.did.clone(); 370 + self.db 371 + .run(move |conn| { 372 + insert_into(RecordBlobSchema::record_blob) 373 + .values(( 374 + RecordBlobSchema::blobCid.eq(cid), 375 + RecordBlobSchema::recordUri.eq(record_uri), 376 + RecordBlobSchema::did.eq(&did), 377 + )) 378 + .on_conflict_do_nothing() 379 + .execute(conn) 380 + }) 381 + .await?; 382 + Ok(()) 383 + } 384 + 385 + pub async fn blob_count(&self) -> Result<i64> { 386 + use rsky_pds::schema::pds::blob::dsl as BlobSchema; 387 + 388 + let did = self.did.clone(); 389 + self.db 390 + .run(move |conn| { 391 + let res = BlobSchema::blob 392 + .filter(BlobSchema::did.eq(&did)) 393 + .count() 394 + .get_result(conn)?; 395 + Ok(res) 396 + }) 397 + .await 398 + } 399 + 400 + pub async fn record_blob_count(&self) -> Result<i64> { 401 + use rsky_pds::schema::pds::record_blob::dsl as RecordBlobSchema; 402 + 403 + let did = self.did.clone(); 404 + self.db 405 + .run(move |conn| { 406 + let res: i64 = RecordBlobSchema::record_blob 407 + .filter(RecordBlobSchema::did.eq(&did)) 408 + .select(count_distinct(RecordBlobSchema::blobCid)) 409 + .get_result(conn)?; 410 + Ok(res) 411 + }) 412 + .await 413 + } 414 + 415 + pub async fn list_missing_blobs( 416 + &self, 417 + opts: ListMissingBlobsOpts, 418 + ) -> Result<Vec<ListMissingBlobsRefRecordBlob>> { 419 + use rsky_pds::schema::pds::blob::dsl as BlobSchema; 420 + use rsky_pds::schema::pds::record_blob::dsl as RecordBlobSchema; 421 + 422 + let did = self.did.clone(); 423 + self.db 424 + .run(move |conn| { 425 + let ListMissingBlobsOpts { cursor, limit } = opts; 426 + 427 + if limit > 1000 { 428 + bail!("Limit too high. Max: 1000."); 429 + } 430 + 431 + let res: Vec<models::RecordBlob> = if let Some(cursor) = cursor { 432 + RecordBlobSchema::record_blob 433 + .limit(limit as i64) 434 + .filter(not(exists( 435 + BlobSchema::blob 436 + .filter(BlobSchema::cid.eq(RecordBlobSchema::blobCid)) 437 + .filter(BlobSchema::did.eq(&did)) 438 + .select(models::Blob::as_select()), 439 + ))) 440 + .filter(RecordBlobSchema::blobCid.gt(cursor)) 441 + .filter(RecordBlobSchema::did.eq(&did)) 442 + .select(models::RecordBlob::as_select()) 443 + .order(RecordBlobSchema::blobCid.asc()) 444 + .distinct_on(RecordBlobSchema::blobCid) 445 + .get_results(conn)? 446 + } else { 447 + RecordBlobSchema::record_blob 448 + .limit(limit as i64) 449 + .filter(not(exists( 450 + BlobSchema::blob 451 + .filter(BlobSchema::cid.eq(RecordBlobSchema::blobCid)) 452 + .filter(BlobSchema::did.eq(&did)) 453 + .select(models::Blob::as_select()), 454 + ))) 455 + .filter(RecordBlobSchema::did.eq(&did)) 456 + .select(models::RecordBlob::as_select()) 457 + .order(RecordBlobSchema::blobCid.asc()) 458 + .distinct_on(RecordBlobSchema::blobCid) 459 + .get_results(conn)? 460 + }; 461 + 462 + Ok(res 463 + .into_iter() 464 + .map(|row| ListMissingBlobsRefRecordBlob { 465 + cid: row.blob_cid, 466 + record_uri: row.record_uri, 467 + }) 468 + .collect()) 469 + }) 470 + .await 471 + } 472 + 473 + pub async fn list_blobs(&self, opts: ListBlobsOpts) -> Result<Vec<String>> { 474 + use rsky_pds::schema::pds::record::dsl as RecordSchema; 475 + use rsky_pds::schema::pds::record_blob::dsl as RecordBlobSchema; 476 + let ListBlobsOpts { 477 + since, 478 + cursor, 479 + limit, 480 + } = opts; 481 + 482 + let res: Vec<String> = if let Some(since) = since { 483 + let mut builder = RecordBlobSchema::record_blob 484 + .inner_join( 485 + RecordSchema::record.on(RecordSchema::uri.eq(RecordBlobSchema::recordUri)), 486 + ) 487 + .filter(RecordSchema::repoRev.gt(since)) 488 + .select(RecordBlobSchema::blobCid) 489 + .distinct() 490 + .order(RecordBlobSchema::blobCid.asc()) 491 + .limit(limit as i64) 492 + .into_boxed(); 493 + 494 + if let Some(cursor) = cursor { 495 + builder = builder.filter(RecordBlobSchema::blobCid.gt(cursor)); 496 + } 497 + self.db.run(move |conn| builder.load(conn)).await? 498 + } else { 499 + let mut builder = RecordBlobSchema::record_blob 500 + .select(RecordBlobSchema::blobCid) 501 + .distinct() 502 + .order(RecordBlobSchema::blobCid.asc()) 503 + .limit(limit as i64) 504 + .into_boxed(); 505 + 506 + if let Some(cursor) = cursor { 507 + builder = builder.filter(RecordBlobSchema::blobCid.gt(cursor)); 508 + } 509 + self.db.run(move |conn| builder.load(conn)).await? 510 + }; 511 + Ok(res) 512 + } 513 + 514 + pub async fn get_blob_takedown_status(&self, cid: Cid) -> Result<Option<StatusAttr>> { 515 + use rsky_pds::schema::pds::blob::dsl as BlobSchema; 516 + 517 + self.db 518 + .run(move |conn| { 519 + let res = BlobSchema::blob 520 + .filter(BlobSchema::cid.eq(cid.to_string())) 521 + .select(models::Blob::as_select()) 522 + .first(conn) 523 + .optional()?; 524 + match res { 525 + None => Ok(None), 526 + Some(res) => match res.takedown_ref { 527 + None => Ok(Some(StatusAttr { 528 + applied: false, 529 + r#ref: None, 530 + })), 531 + Some(takedown_ref) => Ok(Some(StatusAttr { 532 + applied: true, 533 + r#ref: Some(takedown_ref), 534 + })), 535 + }, 536 + } 537 + }) 538 + .await 539 + } 540 + 541 + // Transactors 542 + // ------------------- 543 + 544 + pub async fn update_blob_takedown_status(&self, blob: Cid, takedown: StatusAttr) -> Result<()> { 545 + use rsky_pds::schema::pds::blob::dsl as BlobSchema; 546 + 547 + let takedown_ref: Option<String> = match takedown.applied { 548 + true => match takedown.r#ref { 549 + Some(takedown_ref) => Some(takedown_ref), 550 + None => Some(now()), 551 + }, 552 + false => None, 553 + }; 554 + 555 + let blob = self 556 + .db 557 + .run(move |conn| { 558 + update(BlobSchema::blob) 559 + .filter(BlobSchema::cid.eq(blob.to_string())) 560 + .set(BlobSchema::takedownRef.eq(takedown_ref)) 561 + .execute(conn)?; 562 + Ok::<_, Error>(blob) 563 + }) 564 + .await?; 565 + 566 + let res = match takedown.applied { 567 + true => self.blobstore.quarantine(blob).await, 568 + false => self.blobstore.unquarantine(blob).await, 569 + }; 570 + match res { 571 + Ok(_) => Ok(()), 572 + Err(e) => match e.downcast_ref() { 573 + Some(BlobError::BlobNotFoundError) => Ok(()), 574 + None => Err(e), 575 + }, 576 + } 577 + } 578 + }
-76
src/actor_store/blob/background.rs
··· 1 - use std::future::Future; 2 - use std::sync::Arc; 3 - use tokio::sync::{Mutex, Semaphore}; 4 - use tokio::task::{self, JoinHandle}; 5 - use tracing::error; 6 - 7 - /// Background Queue for asynchronous processing tasks 8 - /// 9 - /// A simple queue for in-process, out-of-band/backgrounded work 10 - #[derive(Clone)] 11 - pub struct BackgroundQueue { 12 - semaphore: Arc<Semaphore>, 13 - tasks: Arc<Mutex<Vec<JoinHandle<()>>>>, 14 - destroyed: Arc<Mutex<bool>>, 15 - } 16 - 17 - impl BackgroundQueue { 18 - /// Create a new BackgroundQueue with the specified concurrency limit 19 - pub fn new(concurrency: usize) -> Self { 20 - Self { 21 - semaphore: Arc::new(Semaphore::new(concurrency)), 22 - tasks: Arc::new(Mutex::new(Vec::new())), 23 - destroyed: Arc::new(Mutex::new(false)), 24 - } 25 - } 26 - 27 - /// Add a task to the queue 28 - pub async fn add<F>(&self, future: F) 29 - where 30 - F: Future<Output = ()> + Send + 'static, 31 - { 32 - let destroyed = *self.destroyed.lock().await; 33 - if destroyed { 34 - return; 35 - } 36 - 37 - let permit = match self.semaphore.clone().acquire_owned().await { 38 - Ok(p) => p, 39 - Err(_) => { 40 - error!("Failed to acquire semaphore permit for background task"); 41 - return; 42 - } 43 - }; 44 - 45 - let tasks = self.tasks.clone(); 46 - 47 - let handle = task::spawn(async move { 48 - future.await; 49 - 50 - // Catch any panics to prevent task failures from propagating 51 - if let Err(e) = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {})) { 52 - error!("Background queue task panicked: {:?}", e); 53 - } 54 - 55 - // Release the semaphore permit 56 - drop(permit); 57 - }); 58 - 59 - // Store the handle for later cleanup 60 - tasks.lock().await.push(handle); 61 - } 62 - 63 - /// Wait for all tasks to finish 64 - pub async fn process_all(&self) { 65 - let mut handles = self.tasks.lock().await; 66 - while let Some(handle) = handles.pop() { 67 - let _ = handle.await; 68 - } 69 - } 70 - 71 - /// Stop accepting new tasks, wait for all to finish 72 - pub async fn destroy(&self) { 73 - *self.destroyed.lock().await = true; 74 - self.process_all().await; 75 - } 76 - }
-731
src/actor_store/blob/mod.rs
··· 1 - // bluepds/src/actor_store/blob/mod.rs 2 - 3 - //! Blob storage and retrieval for the actor store. 4 - 5 - use std::str::FromStr; 6 - 7 - use anyhow::{Context as _, Result, bail}; 8 - use atrium_api::com::atproto::admin::defs::StatusAttr; 9 - use atrium_repo::Cid; 10 - use diesel::associations::HasTable as _; 11 - use diesel::prelude::*; 12 - use futures::{StreamExt, future::try_join_all}; 13 - use rsky_common::ipld::sha256_raw_to_cid; 14 - use rsky_pds::actor_store::blob::sha256_stream; 15 - use rsky_pds::image::{maybe_get_info, mime_type_from_bytes}; 16 - use rsky_pds::schema::pds::*; 17 - use rsky_repo::types::{PreparedBlobRef, PreparedWrite, WriteOpAction}; 18 - use sha2::Digest; 19 - use uuid::Uuid; 20 - 21 - use crate::actor_store::PreparedWrite as BluePreparedWrite; 22 - use crate::actor_store::db::ActorDb; 23 - 24 - /// Background task queue for blob operations 25 - pub mod background; 26 - // Re-export BackgroundQueue 27 - pub use background::BackgroundQueue; 28 - 29 - pub mod placeholder; 30 - pub(crate) use placeholder::BlobStorePlaceholder; 31 - 32 - /// Type for stream of blob data 33 - pub type BlobStream = Box<dyn std::io::Read + Send>; 34 - 35 - /// Blob store interface 36 - pub trait BlobStore: Send + Sync { 37 - async fn put_temp(&self, bytes: &[u8]) -> Result<String>; 38 - async fn make_permanent(&self, key: &str, cid: Cid) -> Result<()>; 39 - async fn put_permanent(&self, cid: Cid, bytes: &[u8]) -> Result<()>; 40 - async fn quarantine(&self, cid: Cid) -> Result<()>; 41 - async fn unquarantine(&self, cid: Cid) -> Result<()>; 42 - async fn get_bytes(&self, cid: Cid) -> Result<Vec<u8>>; 43 - async fn get_stream(&self, cid: Cid) -> Result<BlobStream>; 44 - async fn has_temp(&self, key: &str) -> Result<bool>; 45 - async fn has_stored(&self, cid: Cid) -> Result<bool>; 46 - async fn delete(&self, cid: Cid) -> Result<()>; 47 - async fn delete_many(&self, cids: Vec<Cid>) -> Result<()>; 48 - } 49 - 50 - /// Blob metadata for upload 51 - pub struct BlobMetadata { 52 - pub temp_key: String, 53 - pub size: i64, 54 - pub cid: Cid, 55 - pub mime_type: String, 56 - pub width: Option<i32>, 57 - pub height: Option<i32>, 58 - } 59 - 60 - /// Blob data with content stream 61 - pub struct BlobData { 62 - pub size: u64, 63 - pub mime_type: Option<String>, 64 - pub stream: BlobStream, 65 - } 66 - 67 - /// Options for listing blobs 68 - pub struct ListBlobsOptions { 69 - pub since: Option<String>, 70 - pub cursor: Option<String>, 71 - pub limit: i64, 72 - } 73 - 74 - /// Options for listing missing blobs 75 - pub struct ListMissingBlobsOptions { 76 - pub cursor: Option<String>, 77 - pub limit: i64, 78 - } 79 - 80 - /// Information about a missing blob 81 - pub struct MissingBlob { 82 - pub cid: String, 83 - pub record_uri: String, 84 - } 85 - 86 - /// Unified handler for blob operations 87 - pub struct BlobHandler { 88 - /// Database connection 89 - pub db: ActorDb, 90 - /// DID of the actor 91 - pub did: String, 92 - /// Blob store implementation 93 - pub blobstore: Box<dyn BlobStore>, 94 - /// Background queue for async operations 95 - pub background_queue: Option<background::BackgroundQueue>, 96 - } 97 - 98 - impl BlobHandler { 99 - /// Create a new blob handler with background queue for write operations 100 - pub fn new( 101 - db: ActorDb, 102 - blobstore: impl BlobStore + 'static, 103 - background_queue: background::BackgroundQueue, 104 - did: String, 105 - ) -> Self { 106 - Self { 107 - db, 108 - did, 109 - blobstore: Box::new(blobstore), 110 - background_queue: Some(background_queue), 111 - } 112 - } 113 - 114 - /// Get metadata for a blob 115 - pub async fn get_blob_metadata(&self, cid: &Cid) -> Result<BlobMetadata> { 116 - let cid_str = cid.to_string(); 117 - let did = self.did.clone(); 118 - 119 - let found = self 120 - .db 121 - .run(move |conn| { 122 - blob::table 123 - .filter(blob::cid.eq(&cid_str)) 124 - .filter(blob::did.eq(&did)) 125 - .filter(blob::takedownRef.is_null()) 126 - .first::<BlobModel>(conn) 127 - .optional() 128 - }) 129 - .await?; 130 - 131 - match found { 132 - Some(found) => Ok(BlobMetadata { 133 - temp_key: found.temp_key.unwrap_or_default(), 134 - size: found.size as i64, 135 - cid: Cid::from_str(&found.cid)?, 136 - mime_type: found.mime_type, 137 - width: found.width, 138 - height: found.height, 139 - }), 140 - None => bail!("Blob not found"), 141 - } 142 - } 143 - 144 - /// Get a blob's complete data 145 - pub async fn get_blob(&self, cid: &Cid) -> Result<BlobData> { 146 - let metadata = self.get_blob_metadata(cid).await?; 147 - let blob_stream = self.blobstore.get_stream(*cid).await?; 148 - 149 - Ok(BlobData { 150 - size: metadata.size as u64, 151 - mime_type: Some(metadata.mime_type), 152 - stream: blob_stream, 153 - }) 154 - } 155 - 156 - /// List blobs for a repository 157 - pub async fn list_blobs(&self, opts: ListBlobsOptions) -> Result<Vec<String>> { 158 - let did = self.did.clone(); 159 - let since = opts.since; 160 - let cursor = opts.cursor; 161 - let limit = opts.limit; 162 - 163 - self.db 164 - .run(move |conn| { 165 - let mut query = record_blob::table 166 - .inner_join( 167 - crate::schema::record::table 168 - .on(crate::schema::record::uri.eq(record_blob::record_uri)), 169 - ) 170 - .filter(record_blob::did.eq(&did)) 171 - .select(record_blob::blob_cid) 172 - .distinct() 173 - .order(record_blob::blob_cid.asc()) 174 - .limit(limit) 175 - .into_boxed(); 176 - 177 - if let Some(since_val) = since { 178 - query = query.filter(crate::schema::record::repo_rev.gt(since_val)); 179 - } 180 - 181 - if let Some(cursor_val) = cursor { 182 - query = query.filter(record_blob::blob_cid.gt(cursor_val)); 183 - } 184 - 185 - query.load::<String>(conn) 186 - }) 187 - .await 188 - } 189 - 190 - /// Get records that reference a blob 191 - pub async fn get_records_for_blob(&self, cid: &Cid) -> Result<Vec<String>> { 192 - let cid_str = cid.to_string(); 193 - let did = self.did.clone(); 194 - 195 - self.db 196 - .run(move |conn| { 197 - record_blob::table 198 - .filter(record_blob::blob_cid.eq(&cid_str)) 199 - .filter(record_blob::did.eq(&did)) 200 - .select(record_blob::record_uri) 201 - .load::<String>(conn) 202 - }) 203 - .await 204 - } 205 - 206 - /// Get blobs referenced by a record 207 - pub async fn get_blobs_for_record(&self, record_uri: &str) -> Result<Vec<String>> { 208 - let record_uri_str = record_uri.to_string(); 209 - let did = self.did.clone(); 210 - 211 - self.db 212 - .run(move |conn| { 213 - blob::table 214 - .inner_join(record_blob::table.on(record_blob::blob_cid.eq(blob::cid))) 215 - .filter(record_blob::record_uri.eq(&record_uri_str)) 216 - .filter(blob::did.eq(&did)) 217 - .select(blob::cid) 218 - .load::<String>(conn) 219 - }) 220 - .await 221 - } 222 - 223 - /// Upload a blob and get its metadata 224 - pub async fn upload_blob_and_get_metadata( 225 - &self, 226 - user_suggested_mime: &str, 227 - blob_bytes: &[u8], 228 - ) -> Result<BlobMetadata> { 229 - let temp_key = self.blobstore.put_temp(blob_bytes).await?; 230 - let size = blob_bytes.len() as i64; 231 - let sha256 = sha256_stream(blob_bytes).await?; 232 - let img_info = maybe_get_info(blob_bytes).await?; 233 - let sniffed_mime = mime_type_from_bytes(blob_bytes).await?; 234 - let cid = sha256_raw_to_cid(sha256); 235 - let mime_type = sniffed_mime.unwrap_or_else(|| user_suggested_mime.to_string()); 236 - 237 - Ok(BlobMetadata { 238 - temp_key, 239 - size, 240 - cid, 241 - mime_type, 242 - width: img_info.as_ref().map(|info| info.width as i32), 243 - height: img_info.as_ref().map(|info| info.height as i32), 244 - }) 245 - } 246 - 247 - /// Count total blobs 248 - pub async fn blob_count(&self) -> Result<i64> { 249 - let did = self.did.clone(); 250 - 251 - self.db 252 - .run(move |conn| { 253 - blob::table 254 - .filter(blob::did.eq(&did)) 255 - .count() 256 - .get_result(conn) 257 - }) 258 - .await 259 - } 260 - 261 - /// Count distinct blobs referenced by records 262 - pub async fn record_blob_count(&self) -> Result<i64> { 263 - let did = self.did.clone(); 264 - 265 - self.db 266 - .run(move |conn| { 267 - record_blob::table 268 - .filter(record_blob::did.eq(&did)) 269 - .select(diesel::dsl::count_distinct(record_blob::blob_cid)) 270 - .first::<i64>(conn) 271 - }) 272 - .await 273 - } 274 - 275 - /// List blobs that are referenced but missing from storage 276 - pub async fn list_missing_blobs( 277 - &self, 278 - opts: ListMissingBlobsOptions, 279 - ) -> Result<Vec<MissingBlob>> { 280 - let did = self.did.clone(); 281 - let limit = opts.limit; 282 - let cursor = opts.cursor; 283 - 284 - self.db 285 - .run(move |conn| { 286 - let mut query = record_blob::table 287 - .left_join( 288 - blob::table.on(blob::cid.eq(record_blob::blob_cid).and(blob::did.eq(&did))), 289 - ) 290 - .filter(record_blob::did.eq(&did)) 291 - .filter(blob::cid.is_null()) 292 - .select((record_blob::blob_cid, record_blob::record_uri)) 293 - .order(record_blob::blob_cid.asc()) 294 - .limit(limit) 295 - .into_boxed(); 296 - 297 - if let Some(cursor_val) = cursor { 298 - query = query.filter(record_blob::blob_cid.gt(cursor_val)); 299 - } 300 - 301 - let results = query.load::<(String, String)>(conn)?; 302 - 303 - Ok(results 304 - .into_iter() 305 - .map(|(cid, record_uri)| MissingBlob { cid, record_uri }) 306 - .collect()) 307 - }) 308 - .await 309 - } 310 - 311 - /// Get takedown status for a blob 312 - pub async fn get_blob_takedown_status(&self, cid: &Cid) -> Result<Option<StatusAttr>> { 313 - let cid_str = cid.to_string(); 314 - let did = self.did.clone(); 315 - 316 - self.db 317 - .run(move |conn| { 318 - let result = blob::table 319 - .filter(blob::cid.eq(&cid_str)) 320 - .filter(blob::did.eq(&did)) 321 - .select(blob::takedownRef) 322 - .first::<Option<String>>(conn) 323 - .optional()?; 324 - 325 - match result { 326 - Some(takedown) => match takedown { 327 - Some(takedownRef) => Ok(Some(StatusAttr { 328 - applied: true, 329 - r#ref: Some(takedownRef), 330 - })), 331 - None => Ok(Some(StatusAttr { 332 - applied: false, 333 - r#ref: None, 334 - })), 335 - }, 336 - None => Ok(None), 337 - } 338 - }) 339 - .await 340 - } 341 - 342 - /// Get all blob CIDs in the repository 343 - pub async fn get_blob_cids(&self) -> Result<Vec<Cid>> { 344 - let did = self.did.clone(); 345 - 346 - let rows = self 347 - .db 348 - .run(move |conn| { 349 - blob::table 350 - .filter(blob::did.eq(&did)) 351 - .select(blob::cid) 352 - .load::<String>(conn) 353 - }) 354 - .await?; 355 - 356 - rows.into_iter() 357 - .map(|cid_str| Cid::from_str(&cid_str).context("Invalid CID format")) 358 - .collect() 359 - } 360 - 361 - /// Track a blob that's not yet associated with a record 362 - pub async fn track_untethered_blob(&self, metadata: &BlobMetadata) -> Result<()> { 363 - let cid_str = metadata.cid.to_string(); 364 - let did = self.did.clone(); 365 - 366 - // Check if blob exists and is taken down 367 - let existing = self 368 - .db 369 - .run({ 370 - let cid_str_clone = cid_str.clone(); 371 - let did_clone = did.clone(); 372 - 373 - move |conn| { 374 - blob::table 375 - .filter(blob::did.eq(&did_clone)) 376 - .filter(blob::cid.eq(&cid_str_clone)) 377 - .select(blob::takedownRef) 378 - .first::<Option<String>>(conn) 379 - .optional() 380 - } 381 - }) 382 - .await?; 383 - 384 - if let Some(row) = existing { 385 - if row.is_some() { 386 - return Err(anyhow::anyhow!( 387 - "Blob has been taken down, cannot re-upload" 388 - )); 389 - } 390 - } 391 - 392 - let size = metadata.size as i32; 393 - let now = chrono::Utc::now().to_rfc3339(); 394 - let mime_type = metadata.mime_type.clone(); 395 - let temp_key = metadata.temp_key.clone(); 396 - let width = metadata.width; 397 - let height = metadata.height; 398 - 399 - self.db.run(move |conn| { 400 - diesel::insert_into(blob::table) 401 - .values(( 402 - blob::cid.eq(&cid_str), 403 - blob::did.eq(&did), 404 - blob::mime_type.eq(&mime_type), 405 - blob::size.eq(size), 406 - blob::temp_key.eq(&temp_key), 407 - blob::width.eq(width), 408 - blob::height.eq(height), 409 - blob::created_at.eq(&now), 410 - )) 411 - .on_conflict((blob::cid, blob::did)) 412 - .do_update() 413 - .set( 414 - blob::temp_key.eq( 415 - diesel::dsl::sql::<diesel::sql_types::Text>( 416 - "CASE WHEN blob.temp_key IS NULL THEN excluded.temp_key ELSE blob.temp_key END" 417 - ) 418 - ) 419 - ) 420 - .execute(conn) 421 - .context("Failed to track untethered blob") 422 - }).await?; 423 - 424 - Ok(()) 425 - } 426 - 427 - /// Process blobs for repository writes 428 - pub async fn process_write_blobs(&self, rev: &str, writes: Vec<PreparedWrite>) -> Result<()> { 429 - self.delete_dereferenced_blobs(writes.clone()).await?; 430 - 431 - let futures = writes.iter().filter_map(|write| match write { 432 - PreparedWrite::Create(w) | PreparedWrite::Update(w) => { 433 - let blobs = &w.blobs; 434 - let uri = w.uri.clone(); 435 - let handler = self; 436 - 437 - Some(async move { 438 - for blob in blobs { 439 - handler.verify_blob_and_make_permanent(blob).await?; 440 - handler.associate_blob(blob, &uri).await?; 441 - } 442 - Ok(()) 443 - }) 444 - } 445 - _ => None, 446 - }); 447 - 448 - try_join_all(futures).await?; 449 - 450 - Ok(()) 451 - } 452 - 453 - /// Delete blobs that are no longer referenced 454 - pub async fn delete_dereferenced_blobs(&self, writes: Vec<PreparedWrite>) -> Result<()> { 455 - let uris: Vec<String> = writes 456 - .iter() 457 - .filter_map(|w| match w { 458 - PreparedWrite::Delete(w) => Some(w.uri.clone()), 459 - PreparedWrite::Update(w) => Some(w.uri.clone()), 460 - _ => None, 461 - }) 462 - .collect(); 463 - 464 - if uris.is_empty() { 465 - return Ok(()); 466 - } 467 - 468 - let did = self.did.clone(); 469 - 470 - // Delete record-blob associations 471 - let deleted_repo_blobs = self 472 - .db 473 - .run({ 474 - let uris_clone = uris.clone(); 475 - let did_clone = did.clone(); 476 - 477 - move |conn| { 478 - let query = diesel::delete(record_blob::table) 479 - .filter(record_blob::did.eq(&did_clone)) 480 - .filter(record_blob::record_uri.eq_any(&uris_clone)) 481 - .returning(RecordBlob::as_returning()); 482 - 483 - query.load(conn) 484 - } 485 - }) 486 - .await?; 487 - 488 - if deleted_repo_blobs.is_empty() { 489 - return Ok(()); 490 - } 491 - 492 - // Collect deleted blob CIDs 493 - let deleted_repo_blob_cids: Vec<String> = deleted_repo_blobs 494 - .iter() 495 - .map(|rb| rb.blob_cid.clone()) 496 - .collect(); 497 - 498 - // Find duplicates in record_blob table 499 - let duplicate_cids = self 500 - .db 501 - .run({ 502 - let blob_cids = deleted_repo_blob_cids.clone(); 503 - let did_clone = did.clone(); 504 - 505 - move |conn| { 506 - record_blob::table 507 - .filter(record_blob::did.eq(&did_clone)) 508 - .filter(record_blob::blob_cid.eq_any(&blob_cids)) 509 - .select(record_blob::blob_cid) 510 - .load::<String>(conn) 511 - } 512 - }) 513 - .await?; 514 - 515 - // Get new blob CIDs from writes 516 - let new_blob_cids: Vec<String> = writes 517 - .iter() 518 - .filter_map(|w| match w { 519 - PreparedWrite::Create(w) | PreparedWrite::Update(w) => Some( 520 - w.blobs 521 - .iter() 522 - .map(|b| b.cid.to_string()) 523 - .collect::<Vec<String>>(), 524 - ), 525 - _ => None, 526 - }) 527 - .flatten() 528 - .collect(); 529 - 530 - // Determine which CIDs to keep and which to delete 531 - let cids_to_keep: std::collections::HashSet<String> = new_blob_cids 532 - .into_iter() 533 - .chain(duplicate_cids.into_iter()) 534 - .collect(); 535 - 536 - let cids_to_delete: Vec<String> = deleted_repo_blob_cids 537 - .into_iter() 538 - .filter(|cid| !cids_to_keep.contains(cid)) 539 - .collect(); 540 - 541 - if cids_to_delete.is_empty() { 542 - return Ok(()); 543 - } 544 - 545 - // Delete blobs from the database 546 - self.db 547 - .run({ 548 - let cids = cids_to_delete.clone(); 549 - let did_clone = did.clone(); 550 - 551 - move |conn| { 552 - diesel::delete(blob::table) 553 - .filter(blob::did.eq(&did_clone)) 554 - .filter(blob::cid.eq_any(&cids)) 555 - .execute(conn) 556 - } 557 - }) 558 - .await?; 559 - 560 - // Delete blobs from storage 561 - let cids_to_delete_objects: Vec<Cid> = cids_to_delete 562 - .iter() 563 - .filter_map(|cid_str| Cid::from_str(cid_str).ok()) 564 - .collect(); 565 - 566 - // Use background queue if available 567 - if let Some(queue) = &self.background_queue { 568 - let blobstore = self.blobstore.clone(); 569 - queue 570 - .add(async move { 571 - let _ = blobstore.delete_many(cids_to_delete_objects).await; 572 - }) 573 - .await; 574 - } else { 575 - // Otherwise delete directly 576 - if !cids_to_delete_objects.is_empty() { 577 - self.blobstore.delete_many(cids_to_delete_objects).await?; 578 - } 579 - } 580 - 581 - Ok(()) 582 - } 583 - 584 - /// Verify blob integrity and move from temporary to permanent storage 585 - pub async fn verify_blob_and_make_permanent(&self, blob: &PreparedBlobRef) -> Result<()> { 586 - let cid_str = blob.cid.to_string(); 587 - let did = self.did.clone(); 588 - 589 - let found = self 590 - .db 591 - .run(move |conn| { 592 - blob::table 593 - .filter(blob::did.eq(&did)) 594 - .filter(blob::cid.eq(&cid_str)) 595 - .filter(blob::takedownRef.is_null()) 596 - .first::<BlobModel>(conn) 597 - .optional() 598 - }) 599 - .await?; 600 - 601 - let found = match found { 602 - Some(b) => b, 603 - None => bail!("Blob not found: {}", cid_str), 604 - }; 605 - 606 - // Verify blob constraints 607 - if let Some(max_size) = blob.constraints.max_size { 608 - if found.size as usize > max_size { 609 - bail!( 610 - "BlobTooLarge: This file is too large. It is {} but the maximum size is {}", 611 - found.size, 612 - max_size 613 - ); 614 - } 615 - } 616 - 617 - if blob.mime_type != found.mime_type { 618 - bail!( 619 - "InvalidMimeType: Referenced MIME type does not match stored blob. Expected: {}, Got: {}", 620 - found.mime_type, 621 - blob.mime_type 622 - ); 623 - } 624 - 625 - if let Some(ref accept) = blob.constraints.accept { 626 - if !accepted_mime(&blob.mime_type, accept).await { 627 - bail!( 628 - "Wrong type of file. It is {} but it must match {:?}.", 629 - blob.mime_type, 630 - accept 631 - ); 632 - } 633 - } 634 - 635 - // Move blob from temporary to permanent storage if needed 636 - if let Some(temp_key) = found.temp_key { 637 - self.blobstore.make_permanent(&temp_key, blob.cid).await?; 638 - 639 - // Update database to clear temp key 640 - let cid_str = blob.cid.to_string(); 641 - let did = self.did.clone(); 642 - 643 - self.db 644 - .run(move |conn| { 645 - diesel::update(blob::table) 646 - .filter(blob::did.eq(&did)) 647 - .filter(blob::cid.eq(&cid_str)) 648 - .set(blob::temp_key.eq::<Option<String>>(None)) 649 - .execute(conn) 650 - }) 651 - .await?; 652 - } 653 - 654 - Ok(()) 655 - } 656 - 657 - /// Associate a blob with a record 658 - pub async fn associate_blob(&self, blob: &PreparedBlobRef, record_uri: &str) -> Result<()> { 659 - let cid_str = blob.cid.to_string(); 660 - let record_uri = record_uri.to_string(); 661 - let did = self.did.clone(); 662 - 663 - self.db 664 - .run(move |conn| { 665 - diesel::insert_into(record_blob::table) 666 - .values(( 667 - record_blob::blob_cid.eq(&cid_str), 668 - record_blob::record_uri.eq(&record_uri), 669 - record_blob::did.eq(&did), 670 - )) 671 - .on_conflict_do_nothing() 672 - .execute(conn) 673 - }) 674 - .await?; 675 - 676 - Ok(()) 677 - } 678 - 679 - /// Update takedown status for a blob 680 - pub async fn update_blob_takedown_status(&self, blob: Cid, takedown: StatusAttr) -> Result<()> { 681 - let cid_str = blob.to_string(); 682 - let did = self.did.clone(); 683 - 684 - let takedownRef: Option<String> = if takedown.applied { 685 - Some(takedown.r#ref.unwrap_or_else(|| Uuid::new_v4().to_string())) 686 - } else { 687 - None 688 - }; 689 - 690 - // Update database 691 - self.db 692 - .run(move |conn| { 693 - diesel::update(blob::table) 694 - .filter(blob::did.eq(&did)) 695 - .filter(blob::cid.eq(&cid_str)) 696 - .set(blob::takedownRef.eq(takedownRef)) 697 - .execute(conn) 698 - }) 699 - .await?; 700 - 701 - // Update blob storage 702 - if takedown.applied { 703 - self.blobstore.quarantine(blob).await?; 704 - } else { 705 - self.blobstore.unquarantine(blob).await?; 706 - } 707 - 708 - Ok(()) 709 - } 710 - } 711 - 712 - /// Verify MIME type against accepted formats 713 - async fn accepted_mime(mime: &str, accepted: &[String]) -> bool { 714 - // Accept any type 715 - if accepted.contains(&"*/*".to_string()) { 716 - return true; 717 - } 718 - 719 - // Check for glob patterns (e.g., "image/*") 720 - for glob in accepted { 721 - if glob.ends_with("/*") { 722 - let prefix = glob.split('/').next().unwrap(); 723 - if mime.starts_with(&format!("{}/", prefix)) { 724 - return true; 725 - } 726 - } 727 - } 728 - 729 - // Check for exact match 730 - accepted.contains(&mime.to_string()) 731 - }
-54
src/actor_store/blob/placeholder.rs
··· 1 - use anyhow::Result; 2 - use atrium_repo::Cid; 3 - 4 - use super::{BlobStore, BlobStream}; 5 - 6 - /// Placeholder implementation for blob store 7 - #[derive(Clone)] 8 - pub struct BlobStorePlaceholder; 9 - 10 - impl BlobStore for BlobStorePlaceholder { 11 - async fn put_temp(&self, _bytes: &[u8]) -> Result<String> { 12 - todo!("BlobStorePlaceholder::put_temp not implemented"); 13 - } 14 - 15 - async fn make_permanent(&self, _key: &str, _cid: Cid) -> Result<()> { 16 - todo!("BlobStorePlaceholder::make_permanent not implemented"); 17 - } 18 - 19 - async fn put_permanent(&self, _cid: Cid, _bytes: &[u8]) -> Result<()> { 20 - todo!("BlobStorePlaceholder::put_permanent not implemented"); 21 - } 22 - 23 - async fn quarantine(&self, _cid: Cid) -> Result<()> { 24 - todo!("BlobStorePlaceholder::quarantine not implemented"); 25 - } 26 - 27 - async fn unquarantine(&self, _cid: Cid) -> Result<()> { 28 - todo!("BlobStorePlaceholder::unquarantine not implemented"); 29 - } 30 - 31 - async fn get_bytes(&self, _cid: Cid) -> Result<Vec<u8>> { 32 - todo!("BlobStorePlaceholder::get_bytes not implemented"); 33 - } 34 - 35 - async fn get_stream(&self, _cid: Cid) -> Result<BlobStream> { 36 - todo!("BlobStorePlaceholder::get_stream not implemented"); 37 - } 38 - 39 - async fn has_temp(&self, _key: &str) -> Result<bool> { 40 - todo!("BlobStorePlaceholder::has_temp not implemented"); 41 - } 42 - 43 - async fn has_stored(&self, _cid: Cid) -> Result<bool> { 44 - todo!("BlobStorePlaceholder::has_stored not implemented"); 45 - } 46 - 47 - async fn delete(&self, _cid: Cid) -> Result<()> { 48 - todo!("BlobStorePlaceholder::delete not implemented"); 49 - } 50 - 51 - async fn delete_many(&self, _cids: Vec<Cid>) -> Result<()> { 52 - todo!("BlobStorePlaceholder::delete_many not implemented"); 53 - } 54 - }
-1
src/actor_store/db.rs
··· 2 2 3 3 use crate::db::DatabaseConnection; 4 4 use anyhow::{Context as _, Result}; 5 - use diesel::prelude::*; 6 5 7 6 /// Type alias for the actor database. 8 7 pub(crate) type ActorDb = DatabaseConnection;
+2 -7
src/actor_store/mod.rs
··· 1 1 //! Actor store implementation for ATProto PDS. 2 2 3 3 mod actor_store; 4 - mod actor_store_handler; 5 - mod actor_store_resources; 6 4 mod blob; 7 5 mod db; 8 6 mod preference; 9 - mod prepared_write; 10 7 mod record; 11 - mod repo; 8 + mod sql_blob; 12 9 mod sql_repo; 13 10 14 11 pub(crate) use actor_store::ActorStore; 15 - pub(crate) use actor_store_handler::ActorStoreHandler; 16 - pub(crate) use actor_store_resources::ActorStoreResources; 17 12 pub(crate) use db::ActorDb; 18 - pub(crate) use prepared_write::PreparedWrite; 13 + pub(crate) use sql_blob::BlobStoreSql;
+17 -19
src/actor_store/preference.rs
··· 1 1 //! Preference handling for actor store. 2 + //! Based on https://github.com/blacksky-algorithms/rsky/blob/main/rsky-pds/src/actor_store/preference/mod.rs 3 + //! blacksky-algorithms/rsky is licensed under the Apache License 2.0 4 + //! 5 + //! Modified for SQLite backend 2 6 3 7 use anyhow::{Result, bail}; 4 8 use diesel::*; 5 9 use rsky_lexicon::app::bsky::actor::RefPreferences; 6 - use rsky_pds::{ 7 - actor_store::preference::{pref_match_namespace, util::pref_in_scope}, 8 - auth_verifier::AuthScope, 9 - models::AccountPref, 10 - }; 11 - 12 - use crate::actor_store::db::ActorDb; 10 + use rsky_pds::actor_store::preference::pref_match_namespace; 11 + use rsky_pds::actor_store::preference::util::pref_in_scope; 12 + use rsky_pds::auth_verifier::AuthScope; 13 + use rsky_pds::db::DbConn; 14 + use rsky_pds::models; 15 + use rsky_pds::models::AccountPref; 16 + use std::sync::Arc; 13 17 14 - /// Handler for preference operations with both read and write capabilities. 15 - pub(crate) struct PreferenceHandler { 16 - /// Database connection. 17 - pub db: ActorDb, 18 - /// DID of the actor. 18 + pub struct PreferenceReader { 19 19 pub did: String, 20 + pub db: Arc<DbConn>, 20 21 } 21 22 22 - impl PreferenceHandler { 23 - /// Create a new preference handler. 24 - pub(crate) fn new(did: String, db: ActorDb) -> Self { 25 - Self { db, did } 23 + impl PreferenceReader { 24 + pub fn new(did: String, db: Arc<DbConn>) -> Self { 25 + PreferenceReader { did, db } 26 26 } 27 27 28 - /// Get preferences for a namespace. 29 28 pub async fn get_preferences( 30 29 &self, 31 30 namespace: Option<String>, ··· 64 63 .await 65 64 } 66 65 67 - /// Put preferences for a namespace. 68 66 #[tracing::instrument(skip_all)] 69 67 pub async fn put_preferences( 70 68 &self, ··· 97 95 use rsky_pds::schema::pds::account_pref::dsl as AccountPrefSchema; 98 96 let all_prefs = AccountPrefSchema::account_pref 99 97 .filter(AccountPrefSchema::did.eq(&did)) 100 - .select(AccountPref::as_select()) 98 + .select(models::AccountPref::as_select()) 101 99 .load(conn)?; 102 100 let put_prefs = values 103 101 .into_iter()
-76
src/actor_store/prepared_write.rs
··· 1 - use std::str::FromStr; 2 - 3 - use cidv10::Cid as CidV10; 4 - use rsky_repo::types::{ 5 - CommitAction, PreparedBlobRef, PreparedCreateOrUpdate, PreparedDelete, WriteOpAction, 6 - }; 7 - use serde::{Deserialize, Serialize}; 8 - 9 - #[derive(Debug, Clone, PartialEq, Deserialize, Serialize)] 10 - pub enum PreparedWrite { 11 - Create(PreparedCreateOrUpdate), 12 - Update(PreparedCreateOrUpdate), 13 - Delete(PreparedDelete), 14 - } 15 - 16 - impl PreparedWrite { 17 - pub fn uri(&self) -> &String { 18 - match self { 19 - PreparedWrite::Create(w) => &w.uri, 20 - PreparedWrite::Update(w) => &w.uri, 21 - PreparedWrite::Delete(w) => &w.uri, 22 - } 23 - } 24 - 25 - pub fn cid(&self) -> Option<CidV10> { 26 - match self { 27 - PreparedWrite::Create(w) => Some(CidV10::from_str(w.cid.to_string().as_str()).unwrap()), 28 - PreparedWrite::Update(w) => Some(CidV10::from_str(w.cid.to_string().as_str()).unwrap()), 29 - PreparedWrite::Delete(_) => None, 30 - } 31 - } 32 - 33 - pub fn swap_cid(&self) -> Option<CidV10> { 34 - match self { 35 - PreparedWrite::Create(w) => w 36 - .swap_cid 37 - .as_ref() 38 - .map(|cid| CidV10::from_str(cid.to_string().as_str()).unwrap()), 39 - PreparedWrite::Update(w) => w 40 - .swap_cid 41 - .as_ref() 42 - .map(|cid| CidV10::from_str(cid.to_string().as_str()).unwrap()), 43 - PreparedWrite::Delete(w) => w 44 - .swap_cid 45 - .as_ref() 46 - .map(|cid| CidV10::from_str(cid.to_string().as_str()).unwrap()), 47 - } 48 - } 49 - 50 - pub fn action(&self) -> &WriteOpAction { 51 - match self { 52 - PreparedWrite::Create(w) => &w.action, 53 - PreparedWrite::Update(w) => &w.action, 54 - PreparedWrite::Delete(w) => &w.action, 55 - } 56 - } 57 - 58 - /// TEQ: Add blobs() impl 59 - pub fn blobs(&self) -> Option<&Vec<PreparedBlobRef>> { 60 - match self { 61 - PreparedWrite::Create(w) => Some(&w.blobs), 62 - PreparedWrite::Update(w) => Some(&w.blobs), 63 - PreparedWrite::Delete(_) => None, 64 - } 65 - } 66 - } 67 - 68 - impl From<&PreparedWrite> for CommitAction { 69 - fn from(value: &PreparedWrite) -> Self { 70 - match value { 71 - &PreparedWrite::Create(_) => CommitAction::Create, 72 - &PreparedWrite::Update(_) => CommitAction::Update, 73 - &PreparedWrite::Delete(_) => CommitAction::Delete, 74 - } 75 - } 76 - }
+337 -698
src/actor_store/record.rs
··· 1 1 //! Record storage and retrieval for the actor store. 2 + //! Based on https://github.com/blacksky-algorithms/rsky/blob/main/rsky-pds/src/actor_store/record/mod.rs 3 + //! blacksky-algorithms/rsky is licensed under the Apache License 2.0 4 + //! 5 + //! Modified for SQLite backend 2 6 3 - use anyhow::{Context as _, Result, bail}; 4 - use atrium_api::com::atproto::admin::defs::StatusAttr; 5 - use atrium_repo::Cid; 6 - use diesel::associations::HasTable; 7 - use diesel::prelude::*; 7 + use anyhow::{Error, Result, bail}; 8 + use cidv10::Cid; 9 + use diesel::*; 10 + use futures::stream::{self, StreamExt}; 11 + use rsky_lexicon::com::atproto::admin::StatusAttr; 12 + use rsky_pds::actor_store::record::{GetRecord, RecordsForCollection, get_backlinks}; 8 13 use rsky_pds::models::{Backlink, Record}; 9 - use rsky_pds::schema::pds::repo_block::dsl::repo_block; 10 - use rsky_pds::schema::pds::{backlink, record}; 11 - use rsky_repo::types::WriteOpAction; 14 + use rsky_repo::types::{RepoRecord, WriteOpAction}; 15 + use rsky_repo::util::cbor_to_lex_record; 12 16 use rsky_syntax::aturi::AtUri; 17 + use std::env; 13 18 use std::str::FromStr; 14 19 15 - use crate::actor_store::blob::BlobStorePlaceholder; 16 20 use crate::actor_store::db::ActorDb; 17 21 18 22 /// Combined handler for record operations with both read and write capabilities. 19 - pub(crate) struct RecordHandler { 23 + pub(crate) struct RecordReader { 20 24 /// Database connection. 21 25 pub db: ActorDb, 22 26 /// DID of the actor. 23 27 pub did: String, 24 - /// Blob store for handling blobs. 25 - pub blobstore: Option<BlobStorePlaceholder>, 26 28 } 27 29 28 - /// Record descriptor containing URI, path, and CID. 29 - pub(crate) struct RecordDescript { 30 - /// Record URI. 31 - pub uri: String, 32 - /// Record path. 33 - pub path: String, 34 - /// Record CID. 35 - pub cid: Cid, 36 - } 37 - 38 - /// Record data with values. 39 - #[derive(Debug, Clone)] 40 - pub(crate) struct RecordData { 41 - /// Record URI. 42 - pub uri: String, 43 - /// Record CID. 44 - pub cid: String, 45 - /// Record value as JSON. 46 - pub value: serde_json::Value, 47 - /// When the record was indexed. 48 - pub indexedAt: String, 49 - /// Reference for takedown, if any. 50 - pub takedownRef: Option<String>, 51 - } 52 - 53 - /// Options for listing records in a collection. 54 - #[derive(Debug, Clone)] 55 - pub(crate) struct ListRecordsOptions { 56 - /// Collection to list records from. 57 - pub collection: String, 58 - /// Maximum number of records to return. 59 - pub limit: i64, 60 - /// Whether to reverse the sort order. 61 - pub reverse: bool, 62 - /// Cursor for pagination. 63 - pub cursor: Option<String>, 64 - /// Start key (deprecated). 65 - pub rkey_start: Option<String>, 66 - /// End key (deprecated). 67 - pub rkey_end: Option<String>, 68 - /// Whether to include soft-deleted records. 69 - pub include_soft_deleted: bool, 70 - } 71 - 72 - impl RecordHandler { 30 + impl RecordReader { 73 31 /// Create a new record handler. 74 - pub(crate) fn new(db: ActorDb, did: String) -> Self { 75 - Self { 76 - db, 77 - did, 78 - blobstore: None, 79 - } 80 - } 81 - 82 - /// Create a new record handler with blobstore support. 83 - pub(crate) fn new_with_blobstore( 84 - db: ActorDb, 85 - blobstore: BlobStorePlaceholder, 86 - did: String, 87 - ) -> Self { 88 - Self { 89 - db, 90 - did, 91 - blobstore: Some(blobstore), 92 - } 32 + pub(crate) fn new(did: String, db: ActorDb) -> Self { 33 + Self { did, db } 93 34 } 94 35 95 36 /// Count the total number of records. 96 - pub(crate) async fn record_count(&self) -> Result<i64> { 97 - let did = self.did.clone(); 37 + pub(crate) async fn record_count(&mut self) -> Result<i64> { 38 + use rsky_pds::schema::pds::record::dsl::*; 98 39 40 + let other_did = self.did.clone(); 99 41 self.db 100 42 .run(move |conn| { 101 - use rsky_pds::schema::pds::record::dsl::*; 102 - 103 - record.filter(did.eq(&did)).count().get_result(conn) 43 + let res: i64 = record.filter(did.eq(&other_did)).count().get_result(conn)?; 44 + Ok(res) 104 45 }) 105 46 .await 106 47 } 107 48 108 - /// List all records. 109 - pub(crate) async fn list_all(&self) -> Result<Vec<RecordDescript>> { 110 - let did = self.did.clone(); 111 - let mut records = Vec::new(); 112 - let mut current_cursor = Some("".to_string()); 113 - 114 - while let Some(cursor) = current_cursor.take() { 115 - let cursor_clone = cursor.clone(); 116 - let did_clone = did.clone(); 117 - 118 - let rows = self 119 - .db 120 - .run(move |conn| { 121 - use rsky_pds::schema::pds::record::dsl::*; 122 - 123 - record 124 - .filter(did.eq(&did_clone)) 125 - .filter(uri.gt(&cursor_clone)) 126 - .order(uri.asc()) 127 - .limit(1000) 128 - .select((uri, cid)) 129 - .load::<(String, String)>(conn) 130 - }) 131 - .await?; 132 - 133 - for (uri_str, cid_str) in &rows { 134 - let uri = uri_str.clone(); 135 - let parts: Vec<&str> = uri.rsplitn(2, '/').collect(); 136 - let path = if parts.len() == 2 { 137 - format!("{}/{}", parts[1], parts[0]) 138 - } else { 139 - uri.clone() 140 - }; 141 - 142 - match Cid::from_str(&cid_str) { 143 - Ok(cid) => records.push(RecordDescript { uri, path, cid }), 144 - Err(e) => tracing::warn!("Invalid CID in database: {}", e), 145 - } 146 - } 147 - 148 - if let Some(last) = rows.last() { 149 - current_cursor = Some(last.0.clone()); 150 - } else { 151 - break; 152 - } 153 - } 154 - 155 - Ok(records) 156 - } 157 - 158 49 /// List all collections in the repository. 159 50 pub(crate) async fn list_collections(&self) -> Result<Vec<String>> { 160 - let did = self.did.clone(); 51 + use rsky_pds::schema::pds::record::dsl::*; 161 52 53 + let other_did = self.did.clone(); 162 54 self.db 163 55 .run(move |conn| { 164 - use rsky_pds::schema::pds::record::dsl::*; 165 - 166 - record 167 - .filter(did.eq(&did)) 56 + let collections = record 57 + .filter(did.eq(&other_did)) 58 + .select(collection) 168 59 .group_by(collection) 169 - .select(collection) 170 - .load::<String>(conn) 60 + .load::<String>(conn)? 61 + .into_iter() 62 + .collect::<Vec<String>>(); 63 + Ok(collections) 171 64 }) 172 65 .await 173 66 } 174 67 175 68 /// List records for a specific collection. 176 69 pub(crate) async fn list_records_for_collection( 177 - &self, 178 - opts: ListRecordsOptions, 179 - ) -> Result<Vec<RecordData>> { 180 - let did = self.did.clone(); 70 + &mut self, 71 + collection: String, 72 + limit: i64, 73 + reverse: bool, 74 + cursor: Option<String>, 75 + rkey_start: Option<String>, 76 + rkey_end: Option<String>, 77 + include_soft_deleted: Option<bool>, 78 + ) -> Result<Vec<RecordsForCollection>> { 79 + use rsky_pds::schema::pds::record::dsl as RecordSchema; 80 + use rsky_pds::schema::pds::repo_block::dsl as RepoBlockSchema; 181 81 182 - self.db 183 - .run(move |conn| { 184 - // Start building the query 185 - let mut query = record::table 186 - .inner_join(repo_block::table.on(repo_block::cid.eq(record::cid))) 187 - .filter(record::did.eq(&did)) 188 - .filter(record::collection.eq(&opts.collection)) 189 - .into_boxed(); 190 - 191 - // Handle soft-deleted records 192 - if !opts.include_soft_deleted { 193 - query = query.filter(record::takedownRef.is_null()); 194 - } 195 - 196 - // Handle cursor-based pagination first 197 - if let Some(cursor) = &opts.cursor { 198 - if opts.reverse { 199 - query = query.filter(record::rkey.gt(cursor)); 200 - } else { 201 - query = query.filter(record::rkey.lt(cursor)); 202 - } 203 - } else { 204 - // Fall back to deprecated rkey-based pagination 205 - if let Some(start) = &opts.rkey_start { 206 - query = query.filter(record::rkey.gt(start)); 207 - } 208 - if let Some(end) = &opts.rkey_end { 209 - query = query.filter(record::rkey.lt(end)); 210 - } 211 - } 212 - 213 - // Add order and limit 214 - if opts.reverse { 215 - query = query.order(record::rkey.asc()); 216 - } else { 217 - query = query.order(record::rkey.desc()); 218 - } 219 - 220 - query = query.limit(opts.limit); 221 - 222 - // Execute the query 223 - let results = query 224 - .select(( 225 - record::uri, 226 - record::cid, 227 - record::indexedAt, 228 - record::takedownRef, 229 - repo_block::content, 230 - )) 231 - .load::<(String, String, String, Option<String>, Vec<u8>)>(conn)?; 232 - 233 - // Convert results to RecordData 234 - let records = results 235 - .into_iter() 236 - .map(|(uri, cid, indexedAt, takedownRef, content)| { 237 - let value = serde_json::from_slice(&content) 238 - .with_context(|| format!("Failed to decode record {}", cid))?; 239 - 240 - Ok(RecordData { 241 - uri, 242 - cid, 243 - value, 244 - indexedAt, 245 - takedownRef, 246 - }) 247 - }) 248 - .collect::<Result<Vec<_>>>()?; 82 + let include_soft_deleted: bool = if let Some(include_soft_deleted) = include_soft_deleted { 83 + include_soft_deleted 84 + } else { 85 + false 86 + }; 87 + let mut builder = RecordSchema::record 88 + .inner_join(RepoBlockSchema::repo_block.on(RepoBlockSchema::cid.eq(RecordSchema::cid))) 89 + .limit(limit) 90 + .select(( 91 + rsky_pds::models::Record::as_select(), 92 + rsky_pds::models::RepoBlock::as_select(), 93 + )) 94 + .filter(RecordSchema::did.eq(self.did.clone())) 95 + .filter(RecordSchema::collection.eq(collection)) 96 + .into_boxed(); 97 + if !include_soft_deleted { 98 + builder = builder.filter(RecordSchema::takedownRef.is_null()); 99 + } 100 + if reverse { 101 + builder = builder.order(RecordSchema::rkey.asc()); 102 + } else { 103 + builder = builder.order(RecordSchema::rkey.desc()); 104 + } 249 105 250 - Ok(records) 106 + if let Some(cursor) = cursor { 107 + if reverse { 108 + builder = builder.filter(RecordSchema::rkey.gt(cursor)); 109 + } else { 110 + builder = builder.filter(RecordSchema::rkey.lt(cursor)); 111 + } 112 + } else { 113 + if let Some(rkey_start) = rkey_start { 114 + builder = builder.filter(RecordSchema::rkey.gt(rkey_start)); 115 + } 116 + if let Some(rkey_end) = rkey_end { 117 + builder = builder.filter(RecordSchema::rkey.lt(rkey_end)); 118 + } 119 + } 120 + let res: Vec<(rsky_pds::models::Record, rsky_pds::models::RepoBlock)> = 121 + self.db.run(move |conn| builder.load(conn)).await?; 122 + res.into_iter() 123 + .map(|row| { 124 + Ok(RecordsForCollection { 125 + uri: row.0.uri, 126 + cid: row.0.cid, 127 + value: cbor_to_lex_record(row.1.content)?, 128 + }) 251 129 }) 252 - .await 130 + .collect::<Result<Vec<RecordsForCollection>>>() 253 131 } 254 132 255 133 /// Get a specific record by URI. 256 134 pub(crate) async fn get_record( 257 - &self, 135 + &mut self, 258 136 uri: &AtUri, 259 - cid: Option<&str>, 260 - include_soft_deleted: bool, 261 - ) -> Result<Option<RecordData>> { 262 - let did = self.did.clone(); 263 - let uri_str = uri.to_string(); 264 - let cid_opt = cid.map(|c| c.to_string()); 265 - 266 - self.db 267 - .run(move |conn| { 268 - let mut query = record::table 269 - .inner_join(repo_block::table.on(repo_block::cid.eq(record::cid))) 270 - .filter(record::did.eq(&did)) 271 - .filter(record::uri.eq(&uri_str)) 272 - .into_boxed(); 273 - 274 - if !include_soft_deleted { 275 - query = query.filter(record::takedownRef.is_null()); 276 - } 277 - 278 - if let Some(cid_val) = cid_opt { 279 - query = query.filter(record::cid.eq(cid_val)); 280 - } 137 + cid: Option<String>, 138 + include_soft_deleted: Option<bool>, 139 + ) -> Result<Option<GetRecord>> { 140 + use rsky_pds::schema::pds::record::dsl as RecordSchema; 141 + use rsky_pds::schema::pds::repo_block::dsl as RepoBlockSchema; 281 142 282 - let result = query 283 - .select(( 284 - record::uri, 285 - record::cid, 286 - record::indexedAt, 287 - record::takedownRef, 288 - repo_block::content, 289 - )) 290 - .first::<(String, String, String, Option<String>, Vec<u8>)>(conn) 291 - .optional()?; 292 - 293 - if let Some((uri, cid, indexedAt, takedownRef, content)) = result { 294 - let value = serde_json::from_slice(&content) 295 - .with_context(|| format!("Failed to decode record {}", cid))?; 296 - 297 - Ok(Some(RecordData { 298 - uri, 299 - cid, 300 - value, 301 - indexedAt, 302 - takedownRef, 303 - })) 304 - } else { 305 - Ok(None) 306 - } 307 - }) 308 - .await 143 + let include_soft_deleted: bool = if let Some(include_soft_deleted) = include_soft_deleted { 144 + include_soft_deleted 145 + } else { 146 + false 147 + }; 148 + let mut builder = RecordSchema::record 149 + .inner_join(RepoBlockSchema::repo_block.on(RepoBlockSchema::cid.eq(RecordSchema::cid))) 150 + .select(( 151 + rsky_pds::models::Record::as_select(), 152 + rsky_pds::models::RepoBlock::as_select(), 153 + )) 154 + .filter(RecordSchema::uri.eq(uri.to_string())) 155 + .into_boxed(); 156 + if !include_soft_deleted { 157 + builder = builder.filter(RecordSchema::takedownRef.is_null()); 158 + } 159 + if let Some(cid) = cid { 160 + builder = builder.filter(RecordSchema::cid.eq(cid)); 161 + } 162 + let record: Option<(rsky_pds::models::Record, rsky_pds::models::RepoBlock)> = self 163 + .db 164 + .run(move |conn| builder.first(conn).optional()) 165 + .await?; 166 + if let Some(record) = record { 167 + Ok(Some(GetRecord { 168 + uri: record.0.uri, 169 + cid: record.0.cid, 170 + value: cbor_to_lex_record(record.1.content)?, 171 + indexed_at: record.0.indexed_at, 172 + takedown_ref: record.0.takedown_ref, 173 + })) 174 + } else { 175 + Ok(None) 176 + } 309 177 } 310 178 311 179 /// Check if a record exists. 312 180 pub(crate) async fn has_record( 313 - &self, 314 - uri: &str, 315 - cid: Option<&str>, 316 - include_soft_deleted: bool, 181 + &mut self, 182 + uri: String, 183 + cid: Option<String>, 184 + include_soft_deleted: Option<bool>, 317 185 ) -> Result<bool> { 318 - let did = self.did.clone(); 319 - let uri_str = uri.to_string(); 320 - let cid_opt = cid.map(|c| c.to_string()); 186 + use rsky_pds::schema::pds::record::dsl as RecordSchema; 321 187 322 - self.db 323 - .run(move |conn| { 324 - let mut query = record::table 325 - .filter(record::did.eq(&did)) 326 - .filter(record::uri.eq(&uri_str)) 327 - .into_boxed(); 328 - 329 - if !include_soft_deleted { 330 - query = query.filter(record::takedownRef.is_null()); 331 - } 332 - 333 - if let Some(cid_val) = cid_opt { 334 - query = query.filter(record::cid.eq(cid_val)); 335 - } 336 - 337 - let exists = query 338 - .select(record::uri) 339 - .first::<String>(conn) 340 - .optional()? 341 - .is_some(); 342 - 343 - Ok(exists) 344 - }) 345 - .await 188 + let include_soft_deleted: bool = if let Some(include_soft_deleted) = include_soft_deleted { 189 + include_soft_deleted 190 + } else { 191 + false 192 + }; 193 + let mut builder = RecordSchema::record 194 + .select(RecordSchema::uri) 195 + .filter(RecordSchema::uri.eq(uri)) 196 + .into_boxed(); 197 + if !include_soft_deleted { 198 + builder = builder.filter(RecordSchema::takedownRef.is_null()); 199 + } 200 + if let Some(cid) = cid { 201 + builder = builder.filter(RecordSchema::cid.eq(cid)); 202 + } 203 + let record_uri = self 204 + .db 205 + .run(move |conn| builder.first::<String>(conn).optional()) 206 + .await?; 207 + Ok(!!record_uri.is_some()) 346 208 } 347 209 348 210 /// Get the takedown status of a record. 349 - pub(crate) async fn get_record_takedown_status(&self, uri: &str) -> Result<Option<StatusAttr>> { 350 - let did = self.did.clone(); 351 - let uri_str = uri.to_string(); 211 + pub(crate) async fn get_record_takedown_status( 212 + &self, 213 + uri: String, 214 + ) -> Result<Option<StatusAttr>> { 215 + use rsky_pds::schema::pds::record::dsl as RecordSchema; 352 216 353 - self.db 217 + let res = self 218 + .db 354 219 .run(move |conn| { 355 - let result = record::table 356 - .filter(record::did.eq(&did)) 357 - .filter(record::uri.eq(&uri_str)) 358 - .select(record::takedownRef) 220 + RecordSchema::record 221 + .select(RecordSchema::takedownRef) 222 + .filter(RecordSchema::uri.eq(uri)) 359 223 .first::<Option<String>>(conn) 360 - .optional()?; 361 - 362 - match result { 363 - Some(takedown) => match takedown { 364 - Some(takedownRef) => Ok(Some(StatusAttr { 365 - applied: true, 366 - r#ref: Some(takedownRef), 367 - })), 368 - None => Ok(Some(StatusAttr { 369 - applied: false, 370 - r#ref: None, 371 - })), 372 - }, 373 - None => Ok(None), 374 - } 224 + .optional() 375 225 }) 376 - .await 226 + .await?; 227 + if let Some(res) = res { 228 + if let Some(takedown_ref) = res { 229 + Ok(Some(StatusAttr { 230 + applied: true, 231 + r#ref: Some(takedown_ref), 232 + })) 233 + } else { 234 + Ok(Some(StatusAttr { 235 + applied: false, 236 + r#ref: None, 237 + })) 238 + } 239 + } else { 240 + Ok(None) 241 + } 377 242 } 378 243 379 244 /// Get the current CID for a record URI. 380 - pub(crate) async fn get_current_record_cid(&self, uri: &str) -> Result<Option<Cid>> { 381 - let did = self.did.clone(); 382 - let uri_str = uri.to_string(); 245 + pub(crate) async fn get_current_record_cid(&self, uri: String) -> Result<Option<Cid>> { 246 + use rsky_pds::schema::pds::record::dsl as RecordSchema; 383 247 384 - self.db 248 + let res = self 249 + .db 385 250 .run(move |conn| { 386 - let result = record::table 387 - .filter(record::did.eq(&did)) 388 - .filter(record::uri.eq(&uri_str)) 389 - .select(record::cid) 251 + RecordSchema::record 252 + .select(RecordSchema::cid) 253 + .filter(RecordSchema::uri.eq(uri)) 390 254 .first::<String>(conn) 391 - .optional()?; 392 - 393 - match result { 394 - Some(cid_str) => { 395 - let cid = Cid::from_str(&cid_str)?; 396 - Ok(Some(cid)) 397 - } 398 - None => Ok(None), 399 - } 255 + .optional() 400 256 }) 401 - .await 257 + .await?; 258 + if let Some(res) = res { 259 + Ok(Some(Cid::from_str(&res)?)) 260 + } else { 261 + Ok(None) 262 + } 402 263 } 403 264 404 265 /// Get backlinks for a record. 405 266 pub(crate) async fn get_record_backlinks( 406 267 &self, 407 - collection: &str, 408 - path: &str, 409 - linkTo: &str, 268 + collection: String, 269 + path: String, 270 + link_to: String, 410 271 ) -> Result<Vec<Record>> { 411 - let did = self.did.clone(); 412 - let collection_str = collection.to_string(); 413 - let path_str = path.to_string(); 414 - let linkTo_str = linkTo.to_string(); 272 + use rsky_pds::schema::pds::backlink::dsl as BacklinkSchema; 273 + use rsky_pds::schema::pds::record::dsl as RecordSchema; 415 274 416 - self.db 275 + let res = self 276 + .db 417 277 .run(move |conn| { 418 - backlink::table 419 - .inner_join(record::table.on(backlink::uri.eq(record::uri))) 420 - .filter(backlink::path.eq(&path_str)) 421 - .filter(backlink::linkTo.eq(&linkTo_str)) 422 - .filter(record::collection.eq(&collection_str)) 423 - .filter(record::did.eq(&did)) 278 + RecordSchema::record 279 + .inner_join( 280 + BacklinkSchema::backlink.on(BacklinkSchema::uri.eq(RecordSchema::uri)), 281 + ) 424 282 .select(Record::as_select()) 283 + .filter(BacklinkSchema::path.eq(path)) 284 + .filter(BacklinkSchema::linkTo.eq(link_to)) 285 + .filter(RecordSchema::collection.eq(collection)) 425 286 .load::<Record>(conn) 426 287 }) 427 - .await 288 + .await?; 289 + Ok(res) 428 290 } 429 291 430 292 /// Get backlink conflicts for a record. 431 293 pub(crate) async fn get_backlink_conflicts( 432 294 &self, 433 295 uri: &AtUri, 434 - record: &serde_json::Value, 435 - ) -> Result<Vec<String>> { 436 - let backlinks = get_backlinks(uri, record)?; 437 - if backlinks.is_empty() { 438 - return Ok(Vec::new()); 439 - } 440 - 441 - let did = self.did.clone(); 442 - let uri_collection = uri.get_collection().to_string(); 443 - let mut conflicts = Vec::new(); 444 - 445 - for backlink in backlinks { 446 - let path_str = backlink.path.clone(); 447 - let linkTo_str = backlink.linkTo.clone(); 448 - 449 - let results = self 450 - .db 451 - .run(move |conn| { 452 - backlink::table 453 - .inner_join(record::table.on(backlink::uri.eq(record::uri))) 454 - .filter(backlink::path.eq(&path_str)) 455 - .filter(backlink::linkTo.eq(&linkTo_str)) 456 - .filter(record::collection.eq(&uri_collection)) 457 - .filter(record::did.eq(&did)) 458 - .select(record::uri) 459 - .load::<String>(conn) 460 - }) 461 - .await?; 462 - 463 - conflicts.extend(results); 464 - } 465 - 466 - Ok(conflicts) 467 - } 468 - 469 - /// List existing blocks in the repository. 470 - pub(crate) async fn list_existing_blocks(&self) -> Result<Vec<Cid>> { 471 - let did = self.did.clone(); 472 - let mut blocks = Vec::new(); 473 - let mut current_cursor = Some("".to_string()); 474 - 475 - while let Some(cursor) = current_cursor.take() { 476 - let cursor_clone = cursor.clone(); 477 - let did_clone = did.clone(); 478 - 479 - let rows = self 480 - .db 481 - .run(move |conn| { 482 - use rsky_pds::schema::pds::repo_block::dsl::*; 483 - 484 - repo_block 485 - .filter(did.eq(&did_clone)) 486 - .filter(cid.gt(&cursor_clone)) 487 - .order(cid.asc()) 488 - .limit(1000) 489 - .select(cid) 490 - .load::<String>(conn) 491 - }) 492 - .await?; 493 - 494 - for cid_str in &rows { 495 - match Cid::from_str(cid_str) { 496 - Ok(cid) => blocks.push(cid), 497 - Err(e) => tracing::warn!("Invalid CID in database: {}", e), 498 - } 499 - } 500 - 501 - if let Some(last) = rows.last() { 502 - current_cursor = Some(last.clone()); 503 - } else { 504 - break; 505 - } 506 - } 507 - 508 - Ok(blocks) 509 - } 510 - 511 - /// Get the profile record for this repository 512 - pub(crate) async fn get_profile_record(&self) -> Result<Option<serde_json::Value>> { 513 - let did = self.did.clone(); 514 - 515 - self.db 516 - .run(move |conn| { 517 - let result = record::table 518 - .inner_join(repo_block::table.on(repo_block::cid.eq(record::cid))) 519 - .filter(record::did.eq(&did)) 520 - .filter(record::collection.eq("app.bsky.actor.profile")) 521 - .filter(record::rkey.eq("self")) 522 - .select(repo_block::content) 523 - .first::<Vec<u8>>(conn) 524 - .optional()?; 525 - 526 - if let Some(content) = result { 527 - let value = serde_json::from_slice(&content) 528 - .context("Failed to decode profile record")?; 529 - Ok(Some(value)) 530 - } else { 531 - Ok(None) 532 - } 296 + record: &RepoRecord, 297 + ) -> Result<Vec<AtUri>> { 298 + let record_backlinks = get_backlinks(uri, record)?; 299 + let conflicts: Vec<Vec<Record>> = stream::iter(record_backlinks) 300 + .then(|backlink| async move { 301 + Ok::<Vec<Record>, anyhow::Error>( 302 + self.get_record_backlinks( 303 + uri.get_collection(), 304 + backlink.path, 305 + backlink.link_to, 306 + ) 307 + .await?, 308 + ) 533 309 }) 310 + .collect::<Vec<_>>() 534 311 .await 535 - } 536 - 537 - /// Get records created or updated since a specific revision 538 - pub(crate) async fn get_records_since_rev(&self, rev: &str) -> Result<Vec<RecordData>> { 539 - let did = self.did.clone(); 540 - let rev_str = rev.to_string(); 541 - 542 - // First check if the revision exists 543 - let exists = self 544 - .db 545 - .run({ 546 - let did_clone = did.clone(); 547 - let rev_clone = rev_str.clone(); 548 - 549 - move |conn| { 550 - record::table 551 - .filter(record::did.eq(&did_clone)) 552 - .filter(record::repoRev.le(&rev_clone)) 553 - .count() 554 - .get_result::<i64>(conn) 555 - .map(|count| count > 0) 556 - } 312 + .into_iter() 313 + .collect::<Result<Vec<_>, _>>()?; 314 + Ok(conflicts 315 + .into_iter() 316 + .flatten() 317 + .filter_map(|record| { 318 + AtUri::make( 319 + env::var("BLUEPDS_HOST_NAME").unwrap_or("localhost".to_owned()), 320 + Some(String::from(uri.get_collection())), 321 + Some(record.rkey), 322 + ) 323 + .ok() 557 324 }) 558 - .await?; 559 - 560 - if !exists { 561 - // No records before this revision - possible account migration case 562 - return Ok(Vec::new()); 563 - } 564 - 565 - // Get records since the revision 566 - self.db 567 - .run(move |conn| { 568 - let results = record::table 569 - .inner_join(repo_block::table.on(repo_block::cid.eq(record::cid))) 570 - .filter(record::did.eq(&did)) 571 - .filter(record::repoRev.gt(&rev_str)) 572 - .order(record::repoRev.asc()) 573 - .limit(10) 574 - .select(( 575 - record::uri, 576 - record::cid, 577 - record::indexedAt, 578 - repo_block::content, 579 - )) 580 - .load::<(String, String, String, Vec<u8>)>(conn)?; 581 - 582 - let records = results 583 - .into_iter() 584 - .map(|(uri, cid, indexedAt, content)| { 585 - let value = serde_json::from_slice(&content) 586 - .with_context(|| format!("Failed to decode record {}", cid))?; 587 - 588 - Ok(RecordData { 589 - uri, 590 - cid, 591 - value, 592 - indexedAt, 593 - takedownRef: None, // Not included in the query 594 - }) 595 - }) 596 - .collect::<Result<Vec<_>>>()?; 597 - 598 - Ok(records) 599 - }) 600 - .await 325 + .collect::<Vec<AtUri>>()) 601 326 } 602 327 603 328 // Transactor methods 604 329 // ----------------- 605 330 606 331 /// Index a record in the database. 332 + #[tracing::instrument(skip_all)] 607 333 pub(crate) async fn index_record( 608 334 &self, 609 335 uri: AtUri, 610 336 cid: Cid, 611 - record: Option<&serde_json::Value>, 612 - action: WriteOpAction, 613 - repoRev: &str, 337 + record: Option<RepoRecord>, 338 + action: Option<WriteOpAction>, // Create or update with a default of create 339 + repo_rev: String, 614 340 timestamp: Option<String>, 615 341 ) -> Result<()> { 616 - let uri_str = uri.to_string(); 617 - tracing::debug!("Indexing record {}", uri_str); 342 + tracing::debug!("@LOG DEBUG RecordReader::index_record, indexing record {uri}"); 618 343 619 - if !uri_str.starts_with("at://did:") { 620 - return Err(anyhow::anyhow!("Expected indexed URI to contain DID")); 621 - } 344 + let collection = uri.get_collection(); 345 + let rkey = uri.get_rkey(); 346 + let hostname = uri.get_hostname().to_string(); 347 + let action = action.unwrap_or(WriteOpAction::Create); 348 + let indexed_at = timestamp.unwrap_or_else(|| rsky_common::now()); 349 + let row = Record { 350 + did: self.did.clone(), 351 + uri: uri.to_string(), 352 + cid: cid.to_string(), 353 + collection: collection.clone(), 354 + rkey: rkey.to_string(), 355 + repo_rev: Some(repo_rev.clone()), 356 + indexed_at: indexed_at.clone(), 357 + takedown_ref: None, 358 + }; 622 359 623 - let collection = uri.get_collection().to_string(); 624 - let rkey = uri.get_rkey().to_string(); 625 - 626 - if collection.is_empty() { 627 - return Err(anyhow::anyhow!( 628 - "Expected indexed URI to contain a collection" 629 - )); 360 + if !hostname.starts_with("did:") { 361 + bail!("Expected indexed URI to contain DID") 362 + } else if collection.is_empty() { 363 + bail!("Expected indexed URI to contain a collection") 630 364 } else if rkey.is_empty() { 631 - return Err(anyhow::anyhow!( 632 - "Expected indexed URI to contain a record key" 633 - )); 365 + bail!("Expected indexed URI to contain a record key") 634 366 } 635 367 636 - let cid_str = cid.to_string(); 637 - let now = timestamp.unwrap_or_else(|| chrono::Utc::now().to_rfc3339()); 638 - let did = self.did.clone(); 639 - let repoRev = repoRev.to_string(); 368 + use rsky_pds::schema::pds::record::dsl as RecordSchema; 640 369 641 - // Create the record for database insertion 642 - let record_values = ( 643 - record::did.eq(&did), 644 - record::uri.eq(&uri_str), 645 - record::cid.eq(&cid_str), 646 - record::collection.eq(&collection), 647 - record::rkey.eq(&rkey), 648 - record::repoRev.eq(&repoRev), 649 - record::indexedAt.eq(&now), 650 - ); 651 - 652 - self.db 653 - .transaction(move |conn| { 654 - // Track current version of record 655 - diesel::insert_into(record::table) 656 - .values(&record_values) 657 - .on_conflict(record::uri) 370 + // Track current version of record 371 + let (record, uri) = self 372 + .db 373 + .run(move |conn| { 374 + insert_into(RecordSchema::record) 375 + .values(row) 376 + .on_conflict(RecordSchema::uri) 658 377 .do_update() 659 378 .set(( 660 - record::cid.eq(&cid_str), 661 - record::repoRev.eq(&repoRev), 662 - record::indexedAt.eq(&now), 379 + RecordSchema::cid.eq(cid.to_string()), 380 + RecordSchema::repoRev.eq(&repo_rev), 381 + RecordSchema::indexedAt.eq(&indexed_at), 663 382 )) 664 - .execute(conn) 665 - .context("Failed to insert/update record")?; 383 + .execute(conn)?; 384 + Ok::<_, Error>((record, uri)) 385 + }) 386 + .await?; 666 387 667 - // Maintain backlinks if record is provided 668 - if let Some(record_value) = record { 669 - let backlinks = get_backlinks(&uri, record_value)?; 670 - 671 - if action == WriteOpAction::Update { 672 - // On update, clear old backlinks first 673 - diesel::delete(backlink::table) 674 - .filter(backlink::uri.eq(&uri_str)) 675 - .execute(conn) 676 - .context("Failed to delete existing backlinks")?; 677 - } 678 - 679 - if !backlinks.is_empty() { 680 - // Insert all backlinks at once 681 - let backlink_values: Vec<_> = backlinks 682 - .into_iter() 683 - .map(|backlink| { 684 - ( 685 - backlink::uri.eq(&uri_str), 686 - backlink::path.eq(&backlink.path), 687 - backlink::linkTo.eq(&backlink.linkTo), 688 - ) 689 - }) 690 - .collect(); 691 - 692 - diesel::insert_into(backlink::table) 693 - .values(&backlink_values) 694 - .on_conflict_do_nothing() 695 - .execute(conn) 696 - .context("Failed to insert backlinks")?; 697 - } 698 - } 699 - 700 - tracing::info!("Indexed record {}", uri_str); 701 - Ok(()) 702 - }) 703 - .await 388 + if let Some(record) = record { 389 + // Maintain backlinks 390 + let backlinks = get_backlinks(&uri, &record)?; 391 + if let WriteOpAction::Update = action { 392 + // On update just recreate backlinks from scratch for the record, so we can clear out 393 + // the old ones. E.g. for weird cases like updating a follow to be for a different did. 394 + self.remove_backlinks_by_uri(&uri).await?; 395 + } 396 + self.add_backlinks(backlinks).await?; 397 + } 398 + tracing::debug!("@LOG DEBUG RecordReader::index_record, indexed record {uri}"); 399 + Ok(()) 704 400 } 705 401 706 402 /// Delete a record from the database. 403 + #[tracing::instrument(skip_all)] 707 404 pub(crate) async fn delete_record(&self, uri: &AtUri) -> Result<()> { 708 - let uri_str = uri.to_string(); 709 - tracing::debug!("Deleting indexed record {}", uri_str); 710 - 405 + tracing::debug!("@LOG DEBUG RecordReader::delete_record, deleting indexed record {uri}"); 406 + use rsky_pds::schema::pds::backlink::dsl as BacklinkSchema; 407 + use rsky_pds::schema::pds::record::dsl as RecordSchema; 408 + let uri = uri.to_string(); 711 409 self.db 712 - .transaction(move |conn| { 713 - // Delete from record table 714 - diesel::delete(record::table) 715 - .filter(record::uri.eq(&uri_str)) 716 - .execute(conn) 717 - .context("Failed to delete record")?; 718 - 719 - // Delete from backlink table 720 - diesel::delete(backlink::table) 721 - .filter(backlink::uri.eq(&uri_str)) 722 - .execute(conn) 723 - .context("Failed to delete record backlinks")?; 724 - 725 - tracing::info!("Deleted indexed record {}", uri_str); 410 + .run(move |conn| { 411 + delete(RecordSchema::record) 412 + .filter(RecordSchema::uri.eq(&uri)) 413 + .execute(conn)?; 414 + delete(BacklinkSchema::backlink) 415 + .filter(BacklinkSchema::uri.eq(&uri)) 416 + .execute(conn)?; 417 + tracing::debug!( 418 + "@LOG DEBUG RecordReader::delete_record, deleted indexed record {uri}" 419 + ); 726 420 Ok(()) 727 421 }) 728 422 .await 729 423 } 730 424 731 425 /// Remove backlinks for a URI. 732 - pub(crate) async fn remove_backlinks_by_uri(&self, uri: &str) -> Result<()> { 733 - let uri_str = uri.to_string(); 734 - 426 + pub(crate) async fn remove_backlinks_by_uri(&self, uri: &AtUri) -> Result<()> { 427 + use rsky_pds::schema::pds::backlink::dsl as BacklinkSchema; 428 + let uri = uri.to_string(); 735 429 self.db 736 430 .run(move |conn| { 737 - diesel::delete(backlink::table) 738 - .filter(backlink::uri.eq(&uri_str)) 739 - .execute(conn) 740 - .context("Failed to remove backlinks")?; 741 - 431 + delete(BacklinkSchema::backlink) 432 + .filter(BacklinkSchema::uri.eq(uri)) 433 + .execute(conn)?; 742 434 Ok(()) 743 435 }) 744 436 .await ··· 746 438 747 439 /// Add backlinks to the database. 748 440 pub(crate) async fn add_backlinks(&self, backlinks: Vec<Backlink>) -> Result<()> { 749 - if backlinks.is_empty() { 750 - return Ok(()); 441 + if backlinks.len() == 0 { 442 + Ok(()) 443 + } else { 444 + use rsky_pds::schema::pds::backlink::dsl as BacklinkSchema; 445 + self.db 446 + .run(move |conn| { 447 + insert_into(BacklinkSchema::backlink) 448 + .values(&backlinks) 449 + .on_conflict_do_nothing() 450 + .execute(conn)?; 451 + Ok(()) 452 + }) 453 + .await 751 454 } 752 - 753 - self.db 754 - .run(move |conn| { 755 - let backlink_values: Vec<_> = backlinks 756 - .into_iter() 757 - .map(|backlink| { 758 - ( 759 - backlink::uri.eq(&backlink.uri), 760 - backlink::path.eq(&backlink.path), 761 - backlink::linkTo.eq(&backlink.linkTo), 762 - ) 763 - }) 764 - .collect(); 765 - 766 - diesel::insert_into(backlink::table) 767 - .values(&backlink_values) 768 - .on_conflict_do_nothing() 769 - .execute(conn) 770 - .context("Failed to add backlinks")?; 771 - 772 - Ok(()) 773 - }) 774 - .await 775 455 } 776 456 777 457 /// Update the takedown status of a record. ··· 780 460 uri: &AtUri, 781 461 takedown: StatusAttr, 782 462 ) -> Result<()> { 783 - let uri_str = uri.to_string(); 784 - let did = self.did.clone(); 785 - let takedownRef = if takedown.applied { 786 - takedown 787 - .r#ref 788 - .or_else(|| Some(chrono::Utc::now().to_rfc3339())) 789 - } else { 790 - None 463 + use rsky_pds::schema::pds::record::dsl as RecordSchema; 464 + 465 + let takedown_ref: Option<String> = match takedown.applied { 466 + true => match takedown.r#ref { 467 + Some(takedown_ref) => Some(takedown_ref), 468 + None => Some(rsky_common::now()), 469 + }, 470 + false => None, 791 471 }; 472 + let uri_string = uri.to_string(); 792 473 793 474 self.db 794 475 .run(move |conn| { 795 - diesel::update(record::table) 796 - .filter(record::did.eq(&did)) 797 - .filter(record::uri.eq(&uri_str)) 798 - .set(record::takedownRef.eq(takedownRef)) 799 - .execute(conn) 800 - .context("Failed to update record takedown status")?; 801 - 476 + update(RecordSchema::record) 477 + .filter(RecordSchema::uri.eq(uri_string)) 478 + .set(RecordSchema::takedownRef.eq(takedown_ref)) 479 + .execute(conn)?; 802 480 Ok(()) 803 481 }) 804 482 .await 805 483 } 806 484 } 807 - 808 - /// Extract backlinks from a record. 809 - pub(super) fn get_backlinks(uri: &AtUri, record: &serde_json::Value) -> Result<Vec<Backlink>> { 810 - let mut backlinks = Vec::new(); 811 - 812 - // Check for record type 813 - if let Some(record_type) = record.get("$type").and_then(|t| t.as_str()) { 814 - // Handle follow and block records 815 - if record_type == "app.bsky.graph.follow" || record_type == "app.bsky.graph.block" { 816 - if let Some(subject) = record.get("subject").and_then(|s| s.as_str()) { 817 - // Verify it's a valid DID 818 - if subject.starts_with("did:") { 819 - backlinks.push(Backlink { 820 - uri: uri.to_string(), 821 - path: "subject".to_string(), 822 - linkTo: subject.to_string(), 823 - }); 824 - } 825 - } 826 - } 827 - // Handle like and repost records 828 - else if record_type == "app.bsky.feed.like" || record_type == "app.bsky.feed.repost" { 829 - if let Some(subject) = record.get("subject") { 830 - if let Some(subject_uri) = subject.get("uri").and_then(|u| u.as_str()) { 831 - // Verify it's a valid AT URI 832 - if subject_uri.starts_with("at://") { 833 - backlinks.push(Backlink { 834 - uri: uri.to_string(), 835 - path: "subject.uri".to_string(), 836 - linkTo: subject_uri.to_string(), 837 - }); 838 - } 839 - } 840 - } 841 - } 842 - } 843 - 844 - Ok(backlinks) 845 - }
-467
src/actor_store/repo.rs
··· 1 - //! Repository operations for actor store. 2 - 3 - use std::str::FromStr as _; 4 - use std::sync::Arc; 5 - 6 - use anyhow::{Context as _, Result}; 7 - use atrium_repo::Cid; 8 - use cidv10::Cid as CidV10; 9 - use diesel::prelude::*; 10 - use rsky_repo::{ 11 - block_map::BlockMap, 12 - cid_set::CidSet, 13 - repo::Repo, 14 - storage::{readable_blockstore::ReadableBlockstore as _, types::RepoStorage}, 15 - types::{ 16 - CommitAction, CommitData, CommitDataWithOps, CommitOp, PreparedBlobRef, PreparedWrite, 17 - WriteOpAction, write_to_op, 18 - }, 19 - util::format_data_key, 20 - }; 21 - use rsky_syntax::aturi::AtUri; 22 - use tokio::sync::RwLock; 23 - 24 - use super::{ 25 - ActorDb, 26 - blob::{BackgroundQueue, BlobHandler, BlobStorePlaceholder}, 27 - record::RecordHandler, 28 - }; 29 - use crate::SigningKey; 30 - 31 - use crate::actor_store::sql_repo::SqlRepoStorage; 32 - 33 - /// Data for sync events. 34 - pub(crate) struct SyncEventData { 35 - /// The CID of the repository root. 36 - pub cid: Cid, 37 - /// The revision of the repository. 38 - pub rev: String, 39 - /// The blocks in the repository. 40 - pub blocks: BlockMap, 41 - } 42 - 43 - /// Unified repository handler for the actor store with both read and write capabilities. 44 - pub(crate) struct RepoHandler { 45 - /// Actor DID 46 - pub did: String, 47 - /// Backend storage 48 - pub storage: Arc<RwLock<dyn RepoStorage>>, 49 - /// BlobReader for handling blob operations 50 - pub blob: BlobHandler, 51 - /// RecordHandler for handling record operations 52 - pub record: RecordHandler, 53 - /// BlobTransactor for handling blob writes 54 - pub blob_transactor: BlobHandler, 55 - /// RecordHandler for handling record writes 56 - pub record_transactor: RecordHandler, 57 - /// Signing keypair 58 - pub signing_key: Option<Arc<SigningKey>>, 59 - /// Background queue for async operations 60 - pub background_queue: BackgroundQueue, 61 - } 62 - 63 - impl RepoHandler { 64 - /// Create a new repository handler with read/write capabilities. 65 - pub(crate) fn new( 66 - db: ActorDb, 67 - blobstore: BlobStorePlaceholder, 68 - did: String, 69 - signing_key: Arc<SigningKey>, 70 - background_queue: BackgroundQueue, 71 - ) -> Self { 72 - // Create readers 73 - let blob = BlobHandler::new(db.clone(), blobstore.clone()); 74 - let record = RecordHandler::new(db.clone(), did.clone()); 75 - 76 - // Create storage backend with current timestamp 77 - let now = chrono::Utc::now().to_rfc3339(); 78 - let storage = SqlRepoStorage::new(did.clone(), db.clone(), Some(now)); 79 - 80 - // Create transactors 81 - let blob_transactor = 82 - BlobHandler::new(db.clone(), blobstore.clone(), background_queue.clone()); 83 - let record_transactor = RecordHandler::new(db.clone(), blobstore); 84 - 85 - Self { 86 - did, 87 - storage, 88 - blob, 89 - record, 90 - blob_transactor, 91 - record_transactor, 92 - signing_key: Some(signing_key), 93 - background_queue, 94 - } 95 - } 96 - 97 - /// Get event data for synchronization. 98 - pub(crate) async fn get_sync_event_data(&self) -> Result<SyncEventData> { 99 - let root = self.storage.get_root_detailed().await?; 100 - let blocks = self 101 - .storage 102 - .get_blocks(vec![CidV10::from_str(&root.cid.to_string()).unwrap()]) 103 - .await?; 104 - 105 - Ok(SyncEventData { 106 - cid: root.cid, 107 - rev: root.rev, 108 - blocks: blocks.blocks, 109 - }) 110 - } 111 - 112 - /// Try to load repository 113 - pub(crate) async fn maybe_load_repo(&self) -> Result<Option<Repo>> { 114 - match self.storage.get_root().await { 115 - Some(cid) => { 116 - let repo = Repo::load(&self.storage, cid).await?; 117 - Ok(Some(repo)) 118 - } 119 - None => Ok(None), 120 - } 121 - } 122 - 123 - /// Create a new repository with prepared writes 124 - pub(crate) async fn create_repo( 125 - &self, 126 - writes: Vec<PreparedWrite>, 127 - ) -> Result<CommitDataWithOps> { 128 - let signing_key = self 129 - .signing_key 130 - .as_ref() 131 - .ok_or_else(|| anyhow::anyhow!("No signing key available for write operations"))?; 132 - 133 - // Convert writes to operations 134 - let ops = writes 135 - .iter() 136 - .map(|w| write_to_op(w)) 137 - .collect::<Result<Vec<_>>>()?; 138 - 139 - // Format the initial commit 140 - let commit = Repo::format_init_commit(&self.storage, &self.did, signing_key, ops).await?; 141 - 142 - // Apply the commit, index the writes, and process blobs in parallel 143 - let results = futures::future::join3( 144 - self.storage.apply_commit(commit.clone(), Some(true)), 145 - self.index_writes(&writes, &commit.rev), 146 - self.blob_transactor 147 - .process_write_blobs(&commit.rev, writes.clone()), 148 - ) 149 - .await; 150 - 151 - // Check for errors 152 - results.0.context("Failed to apply commit")?; 153 - results.1.context("Failed to index writes")?; 154 - results.2.context("Failed to process blobs")?; 155 - 156 - // Create commit operations 157 - let ops = writes 158 - .iter() 159 - .filter_map(|w| match w { 160 - PreparedWrite::Create(c) | PreparedWrite::Update(c) => { 161 - let uri = AtUri::from_str(&c.uri).ok()?; 162 - Some(CommitOp { 163 - action: CommitAction::Create, 164 - path: format_data_key(uri.get_collection(), uri.get_rkey()), 165 - cid: Some(c.cid), 166 - prev: None, 167 - }) 168 - } 169 - PreparedWrite::Delete(_) => None, 170 - }) 171 - .collect(); 172 - 173 - Ok(CommitDataWithOps { 174 - commit_data: commit, 175 - ops, 176 - prev_data: None, 177 - }) 178 - } 179 - 180 - /// Process writes to the repository 181 - pub(crate) async fn process_writes( 182 - &self, 183 - writes: Vec<PreparedWrite>, 184 - swap_commit_cid: Option<Cid>, 185 - ) -> Result<CommitDataWithOps> { 186 - // Check write limit 187 - if writes.len() > 200 { 188 - return Err(anyhow::anyhow!("Too many writes. Max: 200")); 189 - } 190 - 191 - // Format the commit 192 - let commit = self.format_commit(writes.clone(), swap_commit_cid).await?; 193 - 194 - // Check commit size limit (2MB) 195 - if commit.commit_data.relevant_blocks.byte_size()? > 2_000_000 { 196 - return Err(anyhow::anyhow!("Too many writes. Max event size: 2MB")); 197 - } 198 - 199 - // Apply the commit, index the writes, and process blobs in parallel 200 - let results = futures::future::join3( 201 - self.storage.apply_commit(commit.commit_data.clone(), None), 202 - self.index_writes(&writes, &commit.commit_data.rev), 203 - self.blob_transactor 204 - .process_write_blobs(&commit.commit_data.rev, writes), 205 - ) 206 - .await; 207 - 208 - // Check for errors 209 - results.0.context("Failed to apply commit")?; 210 - results.1.context("Failed to index writes")?; 211 - results.2.context("Failed to process blobs")?; 212 - 213 - Ok(commit) 214 - } 215 - 216 - /// Format a commit for writes 217 - pub(crate) async fn format_commit( 218 - &self, 219 - writes: Vec<PreparedWrite>, 220 - swap_commit_cid: Option<Cid>, 221 - ) -> Result<CommitDataWithOps> { 222 - // Ensure we have a signing key 223 - let signing_key = self 224 - .signing_key 225 - .as_ref() 226 - .ok_or_else(|| anyhow::anyhow!("No signing key available for write operations"))?; 227 - 228 - // Get current root 229 - let curr_root = self 230 - .storage 231 - .get_root_detailed() 232 - .await 233 - .context("Failed to get repository root")?; 234 - 235 - // Check commit swap if requested 236 - if let Some(swap) = swap_commit_cid { 237 - if curr_root.cid != swap { 238 - return Err(anyhow::anyhow!( 239 - "Bad commit swap: current={}, expected={}", 240 - curr_root.cid, 241 - swap 242 - )); 243 - } 244 - } 245 - 246 - // Cache the current revision for better performance 247 - self.storage.cache_rev(&curr_root.rev).await?; 248 - 249 - // Prepare collections for tracking changes 250 - let mut new_record_cids = Vec::new(); 251 - let mut del_and_update_uris = Vec::new(); 252 - let mut commit_ops = Vec::new(); 253 - 254 - // Process each write to build operations and gather info 255 - for write in &writes { 256 - match write { 257 - PreparedWrite::Create(w) => { 258 - new_record_cids.push(w.cid); 259 - let uri = AtUri::from_str(&w.uri)?; 260 - commit_ops.push(CommitOp { 261 - action: CommitAction::Create, 262 - path: format_data_key(uri.get_collection(), uri.get_rkey()), 263 - cid: Some(w.cid), 264 - prev: None, 265 - }); 266 - 267 - // Validate swap_cid conditions 268 - if w.swap_cid.is_some() && w.swap_cid != Some(None) { 269 - return Err(anyhow::anyhow!( 270 - "Bad record swap: there should be no current record for a create" 271 - )); 272 - } 273 - } 274 - PreparedWrite::Update(w) => { 275 - new_record_cids.push(w.cid); 276 - let uri = AtUri::from_str(&w.uri)?; 277 - del_and_update_uris.push(uri.clone()); 278 - 279 - // Get the current record if it exists 280 - let record = self.record.get_record(&uri, None, true).await?; 281 - let curr_record = record.as_ref().map(|r| Cid::from_str(&r.cid).unwrap()); 282 - 283 - commit_ops.push(CommitOp { 284 - action: CommitAction::Update, 285 - path: format_data_key(uri.get_collection(), uri.get_rkey()), 286 - cid: Some(w.cid), 287 - prev: curr_record, 288 - }); 289 - 290 - // Validate swap_cid conditions 291 - if w.swap_cid.is_some() { 292 - if w.swap_cid == Some(None) { 293 - return Err(anyhow::anyhow!( 294 - "Bad record swap: there should be a current record for an update" 295 - )); 296 - } 297 - 298 - if let Some(Some(swap)) = w.swap_cid { 299 - if curr_record.is_some() && curr_record != Some(swap) { 300 - return Err(anyhow::anyhow!( 301 - "Bad record swap: current={:?}, expected={}", 302 - curr_record, 303 - swap 304 - )); 305 - } 306 - } 307 - } 308 - } 309 - PreparedWrite::Delete(w) => { 310 - let uri = AtUri::from_str(&w.uri)?; 311 - del_and_update_uris.push(uri.clone()); 312 - 313 - // Get the current record if it exists 314 - let record = self.record.get_record(&uri, None, true).await?; 315 - let curr_record = record.as_ref().map(|r| Cid::from_str(&r.cid).unwrap()); 316 - 317 - commit_ops.push(CommitOp { 318 - action: CommitAction::Delete, 319 - path: format_data_key(uri.get_collection(), uri.get_rkey()), 320 - cid: None, 321 - prev: curr_record, 322 - }); 323 - 324 - // Validate swap_cid conditions 325 - if w.swap_cid.is_some() { 326 - if w.swap_cid == Some(None) { 327 - return Err(anyhow::anyhow!( 328 - "Bad record swap: there should be a current record for a delete" 329 - )); 330 - } 331 - 332 - if let Some(Some(swap)) = w.swap_cid { 333 - if curr_record.is_some() && curr_record != Some(swap) { 334 - return Err(anyhow::anyhow!( 335 - "Bad record swap: current={:?}, expected={}", 336 - curr_record, 337 - swap 338 - )); 339 - } 340 - } 341 - } 342 - } 343 - } 344 - } 345 - 346 - // Load repository 347 - let repo = Repo::load(&self.storage, curr_root.cid).await?; 348 - let prev_data = repo.commit.data.clone(); 349 - 350 - // Convert writes to repo operations 351 - let write_ops = writes 352 - .iter() 353 - .map(|w| write_to_op(w)) 354 - .collect::<Result<Vec<_>>>()?; 355 - 356 - // Format the commit with the repository 357 - let mut commit = repo.format_commit(write_ops, signing_key).await?; 358 - 359 - // Find blocks that would be deleted but are referenced by another record 360 - let dupe_record_cids = self 361 - .get_duplicate_record_cids(&commit.removed_cids.to_list(), &del_and_update_uris) 362 - .await?; 363 - 364 - // Remove duplicates from removed_cids 365 - for cid in &dupe_record_cids { 366 - commit.removed_cids.delete(*cid); 367 - } 368 - 369 - // Find blocks that are relevant to ops but not included in diff 370 - let new_record_blocks = commit.relevant_blocks.get_many(&new_record_cids)?; 371 - if !new_record_blocks.missing.is_empty() { 372 - let missing_blocks = self.storage.get_blocks(&new_record_blocks.missing).await?; 373 - commit.relevant_blocks.add_map(missing_blocks.blocks)?; 374 - } 375 - 376 - Ok(CommitDataWithOps { 377 - commit_data: commit, 378 - ops: commit_ops, 379 - prev_data: Some(prev_data), 380 - }) 381 - } 382 - 383 - /// Index writes to the database 384 - pub(crate) async fn index_writes(&self, writes: &[PreparedWrite], rev: &str) -> Result<()> { 385 - let timestamp = chrono::Utc::now().to_rfc3339(); 386 - 387 - for write in writes { 388 - match write { 389 - PreparedWrite::Create(w) => { 390 - let uri = AtUri::from_str(&w.uri)?; 391 - self.record_transactor 392 - .index_record( 393 - uri, 394 - w.cid, 395 - Some(&w.record), 396 - WriteOpAction::Create, 397 - rev, 398 - Some(timestamp.clone()), 399 - ) 400 - .await?; 401 - } 402 - PreparedWrite::Update(w) => { 403 - let uri = AtUri::from_str(&w.uri)?; 404 - self.record_transactor 405 - .index_record( 406 - uri, 407 - w.cid, 408 - Some(&w.record), 409 - WriteOpAction::Update, 410 - rev, 411 - Some(timestamp.clone()), 412 - ) 413 - .await?; 414 - } 415 - PreparedWrite::Delete(w) => { 416 - let uri = AtUri::from_str(&w.uri)?; 417 - self.record_transactor.delete_record(&uri).await?; 418 - } 419 - } 420 - } 421 - 422 - Ok(()) 423 - } 424 - 425 - /// Get record CIDs that are duplicated elsewhere in the repository 426 - pub(crate) async fn get_duplicate_record_cids( 427 - &self, 428 - cids: &[Cid], 429 - touched_uris: &[AtUri], 430 - ) -> Result<Vec<Cid>> { 431 - if touched_uris.is_empty() || cids.is_empty() { 432 - return Ok(Vec::new()); 433 - } 434 - 435 - // Convert URIs to strings for the query 436 - let uri_strings: Vec<String> = touched_uris.iter().map(|u| u.to_string()).collect(); 437 - 438 - // Convert CIDs to strings for the query 439 - let cid_strings: Vec<String> = cids.iter().map(|c| c.to_string()).collect(); 440 - 441 - let did = self.did.clone(); 442 - 443 - // Query for records with these CIDs that aren't in the touched URIs 444 - let duplicate_cids = self 445 - .storage 446 - .db 447 - .run(move |conn| { 448 - use rsky_pds::schema::pds::record::dsl::*; 449 - 450 - record 451 - .filter(did.eq(&did)) 452 - .filter(cid.eq_any(&cid_strings)) 453 - .filter(uri.ne_all(&uri_strings)) 454 - .select(cid) 455 - .load::<String>(conn) 456 - }) 457 - .await?; 458 - 459 - // Convert strings back to CIDs 460 - let cids = duplicate_cids 461 - .into_iter() 462 - .filter_map(|c| Cid::from_str(&c).ok()) 463 - .collect(); 464 - 465 - Ok(cids) 466 - } 467 - }
+224
src/actor_store/sql_blob.rs
··· 1 + use std::{path::PathBuf, str::FromStr as _}; 2 + 3 + use anyhow::Result; 4 + use cidv10::Cid; 5 + use rsky_common::get_random_str; 6 + 7 + use crate::db::DatabaseConnection; 8 + 9 + /// Type for stream of blob data 10 + pub type BlobStream = Box<dyn std::io::Read + Send>; 11 + 12 + /// Placeholder implementation for blob store 13 + #[derive(Clone)] 14 + pub(crate) struct BlobStoreSql { 15 + client: DatabaseConnection, 16 + path: PathBuf, 17 + } 18 + 19 + impl BlobStoreSql { 20 + pub fn new(did: String, cfg: &SdkConfig) -> Self { 21 + // let client = aws_sdk_s3::Client::new(cfg); 22 + // BlobStorePlaceholder { 23 + // client, 24 + // bucket: did, 25 + // } 26 + todo!(); 27 + } 28 + 29 + pub fn creator(cfg: &SdkConfig) -> Box<dyn Fn(String) -> BlobStoreSql + '_> { 30 + Box::new(move |did: String| BlobStoreSql::new(did, cfg)) 31 + } 32 + 33 + fn gen_key(&self) -> String { 34 + get_random_str() 35 + } 36 + 37 + fn get_tmp_path(&self, key: &String) -> String { 38 + // format!("tmp/{0}/{1}", self.bucket, key) 39 + todo!(); 40 + } 41 + 42 + fn get_stored_path(&self, cid: Cid) -> String { 43 + // format!("blocks/{0}/{1}", self.bucket, cid) 44 + todo!(); 45 + } 46 + 47 + fn get_quarantined_path(&self, cid: Cid) -> String { 48 + // format!("quarantine/{0}/{1}", self.bucket, cid) 49 + todo!(); 50 + } 51 + 52 + pub async fn put_temp(&self, bytes: Vec<u8>) -> Result<String> { 53 + let key = self.gen_key(); 54 + // let body = ByteStream::from(bytes); 55 + // self.client 56 + // .put_object() 57 + // .body(body) 58 + // .bucket(&self.bucket) 59 + // .key(self.get_tmp_path(&key)) 60 + // .acl(ObjectCannedAcl::PublicRead) 61 + // .send() 62 + // .await?; 63 + // Ok(key) 64 + todo!(); 65 + } 66 + 67 + pub async fn make_permanent(&self, key: String, cid: Cid) -> Result<()> { 68 + // let already_has = self.has_stored(cid).await?; 69 + // if !already_has { 70 + // Ok(self 71 + // .move_object(MoveObject { 72 + // from: self.get_tmp_path(&key), 73 + // to: self.get_stored_path(cid), 74 + // }) 75 + // .await?) 76 + // } else { 77 + // // already saved, so we no-op & just delete the temp 78 + // Ok(self.delete_key(self.get_tmp_path(&key)).await?) 79 + // } 80 + todo!(); 81 + } 82 + 83 + pub async fn put_permanent(&self, cid: Cid, bytes: Vec<u8>) -> Result<()> { 84 + // let body = ByteStream::from(bytes); 85 + // self.client 86 + // .put_object() 87 + // .body(body) 88 + // .bucket(&self.bucket) 89 + // .key(self.get_stored_path(cid)) 90 + // .acl(ObjectCannedAcl::PublicRead) 91 + // .send() 92 + // .await?; 93 + // Ok(()) 94 + todo!(); 95 + } 96 + 97 + pub async fn quarantine(&self, cid: Cid) -> Result<()> { 98 + // self.move_object(MoveObject { 99 + // from: self.get_stored_path(cid), 100 + // to: self.get_quarantined_path(cid), 101 + // }) 102 + // .await 103 + todo!(); 104 + } 105 + 106 + pub async fn unquarantine(&self, cid: Cid) -> Result<()> { 107 + // self.move_object(MoveObject { 108 + // from: self.get_quarantined_path(cid), 109 + // to: self.get_stored_path(cid), 110 + // }) 111 + // .await 112 + todo!(); 113 + } 114 + 115 + async fn get_object(&self, cid: Cid) -> Result<ByteStream> { 116 + // let res = self 117 + // .client 118 + // .get_object() 119 + // .bucket(&self.bucket) 120 + // .key(self.get_stored_path(cid)) 121 + // .send() 122 + // .await; 123 + // match res { 124 + // Ok(res) => Ok(res.body), 125 + // Err(SdkError::ServiceError(s)) => Err(anyhow::Error::new(s.into_err())), 126 + // Err(e) => Err(anyhow::Error::new(e.into_service_error())), 127 + // } 128 + todo!(); 129 + } 130 + 131 + pub async fn get_bytes(&self, cid: Cid) -> Result<Vec<u8>> { 132 + let res = self.get_object(cid).await?; 133 + let bytes = res.collect().await.map(|data| data.into_bytes())?; 134 + Ok(bytes.to_vec()) 135 + } 136 + 137 + pub async fn get_stream(&self, cid: Cid) -> Result<ByteStream> { 138 + self.get_object(cid).await 139 + } 140 + 141 + pub async fn delete(&self, cid: String) -> Result<()> { 142 + self.delete_key(self.get_stored_path(Cid::from_str(&cid)?)) 143 + .await 144 + } 145 + 146 + pub async fn delete_many(&self, cids: Vec<Cid>) -> Result<()> { 147 + let keys: Vec<String> = cids 148 + .into_iter() 149 + .map(|cid| self.get_stored_path(cid)) 150 + .collect(); 151 + self.delete_many_keys(keys).await 152 + } 153 + 154 + pub async fn has_stored(&self, cid: Cid) -> Result<bool> { 155 + Ok(self.has_key(self.get_stored_path(cid)).await) 156 + } 157 + 158 + pub async fn has_temp(&self, key: String) -> Result<bool> { 159 + Ok(self.has_key(self.get_tmp_path(&key)).await) 160 + } 161 + 162 + async fn has_key(&self, key: String) -> bool { 163 + // let res = self 164 + // .client 165 + // .head_object() 166 + // .bucket(&self.bucket) 167 + // .key(key) 168 + // .send() 169 + // .await; 170 + // res.is_ok() 171 + todo!(); 172 + } 173 + 174 + async fn delete_key(&self, key: String) -> Result<()> { 175 + // self.client 176 + // .delete_object() 177 + // .bucket(&self.bucket) 178 + // .key(key) 179 + // .send() 180 + // .await?; 181 + // Ok(()) 182 + todo!(); 183 + } 184 + 185 + async fn delete_many_keys(&self, keys: Vec<String>) -> Result<()> { 186 + // let objects: Vec<ObjectIdentifier> = keys 187 + // .into_iter() 188 + // .map(|key| Ok(ObjectIdentifier::builder().key(key).build()?)) 189 + // .collect::<Result<Vec<ObjectIdentifier>>>()?; 190 + // let deletes = Delete::builder().set_objects(Some(objects)).build()?; 191 + // self.client 192 + // .delete_objects() 193 + // .bucket(&self.bucket) 194 + // .delete(deletes) 195 + // .send() 196 + // .await?; 197 + // Ok(()) 198 + todo!(); 199 + } 200 + 201 + async fn move_object(&self, keys: MoveObject) -> Result<()> { 202 + // self.client 203 + // .copy_object() 204 + // .bucket(&self.bucket) 205 + // .copy_source(format!( 206 + // "{0}/{1}/{2}", 207 + // env_str("AWS_ENDPOINT_BUCKET").unwrap(), 208 + // self.bucket, 209 + // keys.from 210 + // )) 211 + // .key(keys.to) 212 + // .acl(ObjectCannedAcl::PublicRead) 213 + // .send() 214 + // .await?; 215 + // self.client 216 + // .delete_object() 217 + // .bucket(&self.bucket) 218 + // .key(keys.from) 219 + // .send() 220 + // .await?; 221 + // Ok(()) 222 + todo!(); 223 + } 224 + }
+327 -412
src/actor_store/sql_repo.rs
··· 1 - use anyhow::{Context as _, Result}; 2 - use atrium_repo::Cid; 3 - use atrium_repo::blockstore::{ 4 - AsyncBlockStoreRead, AsyncBlockStoreWrite, Error as BlockstoreError, 5 - }; 1 + //! Based on https://github.com/blacksky-algorithms/rsky/blob/main/rsky-pds/src/actor_store/repo/sql_repo.rs 2 + //! blacksky-algorithms/rsky is licensed under the Apache License 2.0 3 + //! 4 + //! Modified for SQLite backend 5 + 6 + use anyhow::Result; 7 + use cidv10::Cid; 8 + use diesel::dsl::sql; 6 9 use diesel::prelude::*; 7 - use diesel::r2d2::{self, ConnectionManager}; 8 - use diesel::sqlite::SqliteConnection; 10 + use diesel::sql_types::{Bool, Text}; 11 + use diesel::*; 9 12 use futures::{StreamExt, TryStreamExt, stream}; 10 - use rsky_pds::models::{RepoBlock, RepoRoot}; 13 + use rsky_pds::models; 14 + use rsky_pds::models::RepoBlock; 11 15 use rsky_repo::block_map::{BlockMap, BlocksAndMissing}; 12 16 use rsky_repo::car::blocks_to_car_file; 13 17 use rsky_repo::cid_set::CidSet; ··· 16 20 use rsky_repo::storage::readable_blockstore::ReadableBlockstore; 17 21 use rsky_repo::storage::types::RepoStorage; 18 22 use rsky_repo::types::CommitData; 19 - use sha2::{Digest, Sha256}; 20 - use std::future::Future; 21 23 use std::pin::Pin; 22 24 use std::str::FromStr; 23 25 use std::sync::Arc; 24 26 use tokio::sync::RwLock; 25 27 26 - use crate::actor_store::db::ActorDb; 28 + use super::ActorDb; 27 29 28 30 #[derive(Clone, Debug)] 29 - pub struct SqlRepoStorage { 30 - /// In-memory cache for blocks 31 + pub struct SqlRepoReader { 31 32 pub cache: Arc<RwLock<BlockMap>>, 32 - /// Database connection 33 33 pub db: ActorDb, 34 - /// DID of the actor 35 - pub did: String, 36 - /// Current timestamp 34 + pub root: Option<Cid>, 35 + pub rev: Option<String>, 37 36 pub now: String, 38 - } 39 - 40 - impl SqlRepoStorage { 41 - /// Create a new SQL repository storage 42 - pub fn new(did: String, db: ActorDb, now: Option<String>) -> Self { 43 - let now = now.unwrap_or_else(|| chrono::Utc::now().to_rfc3339()); 44 - 45 - Self { 46 - cache: Arc::new(RwLock::new(BlockMap::new())), 47 - db, 48 - did, 49 - now, 50 - } 51 - } 52 - 53 - /// Get the CAR stream for the repository 54 - pub async fn get_car_stream(&self, since: Option<String>) -> Result<Vec<u8>> { 55 - match self.get_root().await { 56 - None => Err(anyhow::Error::new(RepoRootNotFoundError)), 57 - Some(root) => { 58 - let mut car = BlockMap::new(); 59 - let mut cursor: Option<CidAndRev> = None; 60 - 61 - loop { 62 - let blocks = self.get_block_range(&since, &cursor).await?; 63 - if blocks.is_empty() { 64 - break; 65 - } 66 - 67 - // Add blocks to car 68 - for block in &blocks { 69 - car.set(Cid::from_str(&block.cid)?, block.content.clone()); 70 - } 71 - 72 - if let Some(last_block) = blocks.last() { 73 - cursor = Some(CidAndRev { 74 - cid: Cid::from_str(&last_block.cid)?, 75 - rev: last_block.repoRev.clone(), 76 - }); 77 - } else { 78 - break; 79 - } 80 - } 81 - 82 - blocks_to_car_file(Some(&root), car).await 83 - } 84 - } 85 - } 86 - 87 - /// Get a range of blocks from the database 88 - pub async fn get_block_range( 89 - &self, 90 - since: &Option<String>, 91 - cursor: &Option<CidAndRev>, 92 - ) -> Result<Vec<RepoBlock>> { 93 - let did = self.did.clone(); 94 - 95 - self.db 96 - .run(move |conn| { 97 - use rsky_pds::schema::pds::repo_block::dsl::*; 98 - 99 - let mut query = repo_block.filter(did.eq(&did)).limit(500).into_boxed(); 100 - 101 - if let Some(c) = cursor { 102 - query = query.filter( 103 - repoRev 104 - .lt(&c.rev) 105 - .or(repoRev.eq(&c.rev).and(cid.lt(&c.cid.to_string()))), 106 - ); 107 - } 108 - 109 - if let Some(s) = since { 110 - query = query.filter(repoRev.gt(s)); 111 - } 112 - 113 - query 114 - .order((repoRev.desc(), cid.desc())) 115 - .load::<RepoBlock>(conn) 116 - }) 117 - .await 118 - } 119 - 120 - /// Count total blocks for this repository 121 - pub async fn count_blocks(&self) -> Result<i64> { 122 - let did = self.did.clone(); 123 - 124 - self.db 125 - .run(move |conn| { 126 - use rsky_pds::schema::pds::repo_block::dsl::*; 127 - 128 - repo_block.filter(did.eq(&did)).count().get_result(conn) 129 - }) 130 - .await 131 - } 132 - 133 - /// Proactively cache blocks from a specific revision 134 - pub async fn cache_rev(&mut self, rev: &str) -> Result<()> { 135 - let did = self.did.clone(); 136 - let rev_string = rev.to_string(); 137 - 138 - let blocks = self 139 - .db 140 - .run(move |conn| { 141 - use rsky_pds::schema::pds::repo_block::dsl::*; 142 - 143 - repo_block 144 - .filter(did.eq(&did)) 145 - .filter(repoRev.eq(&rev_string)) 146 - .select((cid, content)) 147 - .limit(15) 148 - .load::<(String, Vec<u8>)>(conn) 149 - }) 150 - .await?; 151 - 152 - let mut cache_guard = self.cache.write().await; 153 - for (cid_str, content) in blocks { 154 - let cid = Cid::from_str(&cid_str)?; 155 - cache_guard.set(cid, content); 156 - } 157 - 158 - Ok(()) 159 - } 160 - 161 - /// Delete multiple blocks by their CIDs 162 - pub async fn delete_many(&self, cids: Vec<Cid>) -> Result<()> { 163 - if cids.is_empty() { 164 - return Ok(()); 165 - } 166 - 167 - let did = self.did.clone(); 168 - let cid_strings: Vec<String> = cids.into_iter().map(|c| c.to_string()).collect(); 169 - 170 - // Process in chunks to avoid too many parameters 171 - for chunk in cid_strings.chunks(100) { 172 - let chunk_vec = chunk.to_vec(); 173 - let did_clone = did.clone(); 174 - 175 - self.db 176 - .run(move |conn| { 177 - use rsky_pds::schema::pds::repo_block::dsl::*; 178 - 179 - diesel::delete(repo_block) 180 - .filter(did.eq(&did_clone)) 181 - .filter(cid.eq_any(&chunk_vec)) 182 - .execute(conn) 183 - }) 184 - .await?; 185 - } 186 - 187 - Ok(()) 188 - } 189 - 190 - /// Get the detailed root information 191 - pub async fn get_root_detailed(&self) -> Result<CidAndRev> { 192 - let did = self.did.clone(); 193 - 194 - let root = self 195 - .db 196 - .run(move |conn| { 197 - use rsky_pds::schema::pds::repo_root::dsl::*; 198 - 199 - repo_root 200 - .filter(did.eq(&did)) 201 - .first::<RepoRoot>(conn) 202 - .optional() 203 - }) 204 - .await?; 205 - 206 - match root { 207 - Some(r) => Ok(CidAndRev { 208 - cid: Cid::from_str(&r.cid)?, 209 - rev: r.rev, 210 - }), 211 - None => Err(anyhow::Error::new(RepoRootNotFoundError)), 212 - } 213 - } 37 + pub did: String, 214 38 } 215 39 216 - impl ReadableBlockstore for SqlRepoStorage { 40 + impl ReadableBlockstore for SqlRepoReader { 217 41 fn get_bytes<'a>( 218 42 &'a self, 219 43 cid: &'a Cid, 220 44 ) -> Pin<Box<dyn Future<Output = Result<Option<Vec<u8>>>> + Send + Sync + 'a>> { 221 - let did = self.did.clone(); 45 + let did: String = self.did.clone(); 46 + let db: ActorDb = self.db.clone(); 222 47 let cid = cid.clone(); 223 48 224 49 Box::pin(async move { 225 - // Check cache first 226 - { 50 + use rsky_pds::schema::pds::repo_block::dsl as RepoBlockSchema; 51 + let cached = { 227 52 let cache_guard = self.cache.read().await; 228 - if let Some(cached) = cache_guard.get(cid) { 229 - return Ok(Some(cached.clone())); 230 - } 53 + cache_guard.get(cid).map(|v| v.clone()) 54 + }; 55 + if let Some(cached_result) = cached { 56 + return Ok(Some(cached_result.clone())); 231 57 } 232 58 233 - // Not in cache, query database 234 - let cid_str = cid.to_string(); 235 - let result = self 236 - .db 59 + let found: Option<Vec<u8>> = db 237 60 .run(move |conn| { 238 - use rsky_pds::schema::pds::repo_block::dsl::*; 239 - 240 - repo_block 241 - .filter(did.eq(&did)) 242 - .filter(cid.eq(&cid_str)) 243 - .select(content) 244 - .first::<Vec<u8>>(conn) 61 + RepoBlockSchema::repo_block 62 + .filter(RepoBlockSchema::cid.eq(cid.to_string())) 63 + .filter(RepoBlockSchema::did.eq(did)) 64 + .select(RepoBlockSchema::content) 65 + .first(conn) 245 66 .optional() 246 67 }) 247 68 .await?; 248 - 249 - // Update cache if found 250 - if let Some(content) = &result { 251 - let mut cache_guard = self.cache.write().await; 252 - cache_guard.set(cid, content.clone()); 69 + match found { 70 + None => Ok(None), 71 + Some(result) => { 72 + { 73 + let mut cache_guard = self.cache.write().await; 74 + cache_guard.set(cid, result.clone()); 75 + } 76 + Ok(Some(result)) 77 + } 253 78 } 254 - 255 - Ok(result) 256 79 }) 257 80 } 258 81 ··· 261 84 cid: Cid, 262 85 ) -> Pin<Box<dyn Future<Output = Result<bool>> + Send + Sync + 'a>> { 263 86 Box::pin(async move { 264 - let bytes = self.get_bytes(&cid).await?; 265 - Ok(bytes.is_some()) 87 + let got = <Self as ReadableBlockstore>::get_bytes(self, &cid).await?; 88 + Ok(got.is_some()) 266 89 }) 267 90 } 268 91 ··· 270 93 &'a self, 271 94 cids: Vec<Cid>, 272 95 ) -> Pin<Box<dyn Future<Output = Result<BlocksAndMissing>> + Send + Sync + 'a>> { 96 + let did: String = self.did.clone(); 97 + let db: ActorDb = self.db.clone(); 98 + 273 99 Box::pin(async move { 274 - // Check cache first 100 + use rsky_pds::schema::pds::repo_block::dsl as RepoBlockSchema; 275 101 let cached = { 276 102 let mut cache_guard = self.cache.write().await; 277 - cache_guard.get_many(cids.clone())? 103 + cache_guard.get_many(cids)? 278 104 }; 279 105 280 106 if cached.missing.is_empty() { 281 107 return Ok(cached); 282 108 } 283 - 284 - // Prepare data structures for missing blocks 285 109 let missing = CidSet::new(Some(cached.missing.clone())); 286 110 let missing_strings: Vec<String> = 287 - cached.missing.iter().map(|c| c.to_string()).collect(); 288 - let did = self.did.clone(); 111 + cached.missing.into_iter().map(|c| c.to_string()).collect(); 112 + 113 + let blocks = Arc::new(tokio::sync::Mutex::new(BlockMap::new())); 114 + let missing_set = Arc::new(tokio::sync::Mutex::new(missing)); 115 + 116 + let _: Vec<_> = stream::iter(missing_strings.chunks(500)) 117 + .then(|batch| { 118 + let this_db = db.clone(); 119 + let this_did = did.clone(); 120 + let blocks = Arc::clone(&blocks); 121 + let missing = Arc::clone(&missing_set); 122 + let batch = batch.to_vec(); // Convert to owned Vec 289 123 290 - // Create block map for results 291 - let mut blocks = BlockMap::new(); 292 - let mut missing_set = CidSet::new(Some(cached.missing.clone())); 124 + async move { 125 + // Database query 126 + let rows: Vec<(String, Vec<u8>)> = this_db 127 + .run(move |conn| { 128 + RepoBlockSchema::repo_block 129 + .filter(RepoBlockSchema::cid.eq_any(batch)) 130 + .filter(RepoBlockSchema::did.eq(this_did)) 131 + .select((RepoBlockSchema::cid, RepoBlockSchema::content)) 132 + .load(conn) 133 + }) 134 + .await?; 293 135 294 - // Query database in chunks 295 - for chunk in missing_strings.chunks(100) { 296 - let chunk_vec = chunk.to_vec(); 297 - let did_clone = did.clone(); 136 + // Process rows with locked access 137 + let mut blocks = blocks.lock().await; 138 + let mut missing = missing.lock().await; 298 139 299 - let rows = self 300 - .db 301 - .run(move |conn| { 302 - use rsky_pds::schema::pds::repo_block::dsl::*; 140 + for row in rows { 141 + let cid = Cid::from_str(&row.0)?; // Proper error handling 142 + blocks.set(cid, row.1); 143 + missing.delete(cid); 144 + } 303 145 304 - repo_block 305 - .filter(did.eq(&did_clone)) 306 - .filter(cid.eq_any(&chunk_vec)) 307 - .select((cid, content)) 308 - .load::<(String, Vec<u8>)>(conn) 309 - }) 310 - .await?; 146 + Ok::<(), anyhow::Error>(()) 147 + } 148 + }) 149 + .try_collect() 150 + .await?; 311 151 312 - // Process results 313 - for (cid_str, content) in rows { 314 - let block_cid = Cid::from_str(&cid_str)?; 315 - blocks.set(block_cid, content.clone()); 316 - missing_set.delete(block_cid); 152 + // Extract values from synchronization primitives 153 + let mut blocks = Arc::try_unwrap(blocks) 154 + .expect("Arc still has owners") 155 + .into_inner(); 156 + let missing = Arc::try_unwrap(missing_set) 157 + .expect("Arc still has owners") 158 + .into_inner(); 317 159 318 - // Update cache 319 - let mut cache_guard = self.cache.write().await; 320 - cache_guard.set(block_cid, content); 321 - } 160 + { 161 + let mut cache_guard = self.cache.write().await; 162 + cache_guard.add_map(blocks.clone())?; 322 163 } 323 164 324 - // Combine with cached blocks 325 165 blocks.add_map(cached.blocks)?; 326 166 327 167 Ok(BlocksAndMissing { 328 168 blocks, 329 - missing: missing_set.to_list(), 169 + missing: missing.to_list(), 330 170 }) 331 171 }) 332 172 } 333 173 } 334 174 335 - impl RepoStorage for SqlRepoStorage { 175 + impl RepoStorage for SqlRepoReader { 336 176 fn get_root<'a>(&'a self) -> Pin<Box<dyn Future<Output = Option<Cid>> + Send + Sync + 'a>> { 337 177 Box::pin(async move { 338 178 match self.get_root_detailed().await { ··· 348 188 bytes: Vec<u8>, 349 189 rev: String, 350 190 ) -> Pin<Box<dyn Future<Output = Result<()>> + Send + Sync + 'a>> { 351 - let did = self.did.clone(); 352 - let bytes_clone = bytes.clone(); 353 - 191 + let did: String = self.did.clone(); 192 + let db: ActorDb = self.db.clone(); 193 + let bytes_cloned = bytes.clone(); 354 194 Box::pin(async move { 355 - let cid_str = cid.to_string(); 356 - let size = bytes.len() as i32; 195 + use rsky_pds::schema::pds::repo_block::dsl as RepoBlockSchema; 357 196 358 - self.db 359 - .run(move |conn| { 360 - use rsky_pds::schema::pds::repo_block::dsl::*; 361 - 362 - diesel::insert_into(repo_block) 363 - .values(( 364 - did.eq(&did), 365 - cid.eq(&cid_str), 366 - repoRev.eq(&rev), 367 - size.eq(size), 368 - content.eq(&bytes), 369 - )) 370 - .on_conflict_do_nothing() 371 - .execute(conn) 372 - }) 373 - .await?; 374 - 375 - // Update cache 376 - let mut cache_guard = self.cache.write().await; 377 - cache_guard.set(cid, bytes_clone); 378 - 197 + db.run(move |conn| { 198 + insert_into(RepoBlockSchema::repo_block) 199 + .values(( 200 + RepoBlockSchema::did.eq(did), 201 + RepoBlockSchema::cid.eq(cid.to_string()), 202 + RepoBlockSchema::repoRev.eq(rev), 203 + RepoBlockSchema::size.eq(bytes.len() as i32), 204 + RepoBlockSchema::content.eq(bytes), 205 + )) 206 + .execute(conn) 207 + }) 208 + .await?; 209 + { 210 + let mut cache_guard = self.cache.write().await; 211 + cache_guard.set(cid, bytes_cloned); 212 + } 379 213 Ok(()) 380 214 }) 381 215 } ··· 385 219 to_put: BlockMap, 386 220 rev: String, 387 221 ) -> Pin<Box<dyn Future<Output = Result<()>> + Send + Sync + 'a>> { 388 - let did = self.did.clone(); 222 + let did: String = self.did.clone(); 223 + let db: ActorDb = self.db.clone(); 389 224 390 225 Box::pin(async move { 391 - if to_put.size() == 0 { 392 - return Ok(()); 393 - } 226 + use rsky_pds::schema::pds::repo_block::dsl as RepoBlockSchema; 394 227 395 - // Prepare blocks for insertion 396 - let blocks: Vec<(String, String, String, i32, Vec<u8>)> = to_put 228 + let blocks: Vec<RepoBlock> = to_put 397 229 .map 398 230 .iter() 399 - .map(|(cid, bytes)| { 400 - ( 401 - did.clone(), 402 - cid.to_string(), 403 - rev.clone(), 404 - bytes.0.len() as i32, 405 - bytes.0.clone(), 406 - ) 231 + .map(|(cid, bytes)| RepoBlock { 232 + cid: cid.to_string(), 233 + did: did.clone(), 234 + repo_rev: rev.clone(), 235 + size: bytes.0.len() as i32, 236 + content: bytes.0.clone(), 407 237 }) 408 238 .collect(); 409 239 410 - // Process in chunks 411 - for chunk in blocks.chunks(50) { 412 - let chunk_vec = chunk.to_vec(); 240 + let chunks: Vec<Vec<RepoBlock>> = 241 + blocks.chunks(50).map(|chunk| chunk.to_vec()).collect(); 413 242 414 - self.db 415 - .run(move |conn| { 416 - use rsky_pds::schema::pds::repo_block::dsl::*; 417 - 418 - let values: Vec<_> = chunk_vec 419 - .iter() 420 - .map(|(did_val, cid_val, rev_val, size_val, content_val)| { 421 - ( 422 - did.eq(did_val), 423 - cid.eq(cid_val), 424 - repoRev.eq(rev_val), 425 - size.eq(*size_val), 426 - content.eq(content_val), 427 - ) 428 - }) 429 - .collect(); 430 - 431 - diesel::insert_into(repo_block) 432 - .values(&values) 433 - .on_conflict_do_nothing() 434 - .execute(conn) 435 - }) 436 - .await?; 437 - } 438 - 439 - // Update cache with all blocks 440 - { 441 - let mut cache_guard = self.cache.write().await; 442 - for (cid, bytes) in &to_put.map { 443 - cache_guard.set(*cid, bytes.0.clone()); 444 - } 445 - } 243 + let _: Vec<_> = stream::iter(chunks) 244 + .then(|batch| { 245 + let db = db.clone(); 246 + async move { 247 + db.run(move |conn| { 248 + insert_into(RepoBlockSchema::repo_block) 249 + .values(batch) 250 + .on_conflict_do_nothing() 251 + .execute(conn) 252 + .map(|_| ()) 253 + }) 254 + .await 255 + .map_err(anyhow::Error::from) 256 + } 257 + }) 258 + .collect::<Vec<_>>() 259 + .await 260 + .into_iter() 261 + .collect::<Result<Vec<()>>>()?; 446 262 447 263 Ok(()) 448 264 }) 449 265 } 450 - 451 266 fn update_root<'a>( 452 267 &'a self, 453 268 cid: Cid, 454 269 rev: String, 455 270 is_create: Option<bool>, 456 271 ) -> Pin<Box<dyn Future<Output = Result<()>> + Send + Sync + 'a>> { 457 - let did = self.did.clone(); 458 - let now = self.now.clone(); 459 - let is_create = is_create.unwrap_or(false); 272 + let did: String = self.did.clone(); 273 + let db: ActorDb = self.db.clone(); 274 + let now: String = self.now.clone(); 460 275 461 276 Box::pin(async move { 462 - let cid_str = cid.to_string(); 277 + use rsky_pds::schema::pds::repo_root::dsl as RepoRootSchema; 463 278 279 + let is_create = is_create.unwrap_or(false); 464 280 if is_create { 465 - // Insert new root 466 - self.db 467 - .run(move |conn| { 468 - use rsky_pds::schema::pds::repo_root::dsl::*; 469 - 470 - diesel::insert_into(repo_root) 471 - .values(( 472 - did.eq(&did), 473 - cid.eq(&cid_str), 474 - rev.eq(&rev), 475 - indexedAt.eq(&now), 476 - )) 477 - .execute(conn) 478 - }) 479 - .await?; 281 + db.run(move |conn| { 282 + insert_into(RepoRootSchema::repo_root) 283 + .values(( 284 + RepoRootSchema::did.eq(did), 285 + RepoRootSchema::cid.eq(cid.to_string()), 286 + RepoRootSchema::rev.eq(rev), 287 + RepoRootSchema::indexedAt.eq(now), 288 + )) 289 + .execute(conn) 290 + }) 291 + .await?; 480 292 } else { 481 - // Update existing root 482 - self.db 483 - .run(move |conn| { 484 - use rsky_pds::schema::pds::repo_root::dsl::*; 485 - 486 - diesel::update(repo_root) 487 - .filter(did.eq(&did)) 488 - .set((cid.eq(&cid_str), rev.eq(&rev), indexedAt.eq(&now))) 489 - .execute(conn) 490 - }) 491 - .await?; 293 + db.run(move |conn| { 294 + update(RepoRootSchema::repo_root) 295 + .filter(RepoRootSchema::did.eq(did)) 296 + .set(( 297 + RepoRootSchema::cid.eq(cid.to_string()), 298 + RepoRootSchema::rev.eq(rev), 299 + RepoRootSchema::indexedAt.eq(now), 300 + )) 301 + .execute(conn) 302 + }) 303 + .await?; 492 304 } 493 - 494 305 Ok(()) 495 306 }) 496 307 } ··· 501 312 is_create: Option<bool>, 502 313 ) -> Pin<Box<dyn Future<Output = Result<()>> + Send + Sync + 'a>> { 503 314 Box::pin(async move { 504 - // Apply commit in three steps 505 315 self.update_root(commit.cid, commit.rev.clone(), is_create) 506 316 .await?; 507 317 self.put_many(commit.new_blocks, commit.rev).await?; 508 318 self.delete_many(commit.removed_cids.to_list()).await?; 509 - 510 319 Ok(()) 511 320 }) 512 321 } 513 322 } 514 323 515 - #[async_trait::async_trait] 516 - impl AsyncBlockStoreRead for SqlRepoStorage { 517 - async fn read_block(&mut self, cid: Cid) -> Result<Vec<u8>, BlockstoreError> { 518 - let bytes = self 519 - .get_bytes(&cid) 520 - .await 521 - .map_err(|e| BlockstoreError::Other(Box::new(e)))? 522 - .ok_or(BlockstoreError::CidNotFound)?; 324 + // Basically handles getting ipld blocks from db 325 + impl SqlRepoReader { 326 + pub fn new(did: String, now: Option<String>, db: ActorDb) -> Self { 327 + let now = now.unwrap_or_else(rsky_common::now); 328 + SqlRepoReader { 329 + cache: Arc::new(RwLock::new(BlockMap::new())), 330 + root: None, 331 + rev: None, 332 + db, 333 + now, 334 + did, 335 + } 336 + } 523 337 524 - Ok(bytes) 338 + pub async fn get_car_stream(&self, since: Option<String>) -> Result<Vec<u8>> { 339 + match self.get_root().await { 340 + None => Err(anyhow::Error::new(RepoRootNotFoundError)), 341 + Some(root) => { 342 + let mut car = BlockMap::new(); 343 + let mut cursor: Option<CidAndRev> = None; 344 + let mut write_rows = |rows: Vec<RepoBlock>| -> Result<()> { 345 + for row in rows { 346 + car.set(Cid::from_str(&row.cid)?, row.content); 347 + } 348 + Ok(()) 349 + }; 350 + loop { 351 + let res = self.get_block_range(&since, &cursor).await?; 352 + write_rows(res.clone())?; 353 + if let Some(last_row) = res.last() { 354 + cursor = Some(CidAndRev { 355 + cid: Cid::from_str(&last_row.cid)?, 356 + rev: last_row.repo_rev.clone(), 357 + }); 358 + } else { 359 + break; 360 + } 361 + } 362 + blocks_to_car_file(Some(&root), car).await 363 + } 364 + } 525 365 } 526 366 527 - fn read_block_into( 528 - &mut self, 529 - cid: Cid, 530 - contents: &mut Vec<u8>, 531 - ) -> impl Future<Output = Result<(), BlockstoreError>> + Send { 532 - async move { 533 - let bytes = self.read_block(cid).await?; 534 - contents.clear(); 535 - contents.extend_from_slice(&bytes); 536 - Ok(()) 537 - } 367 + pub async fn get_block_range( 368 + &self, 369 + since: &Option<String>, 370 + cursor: &Option<CidAndRev>, 371 + ) -> Result<Vec<RepoBlock>> { 372 + let did: String = self.did.clone(); 373 + let db: ActorDb = self.db.clone(); 374 + let since = since.clone(); 375 + let cursor = cursor.clone(); 376 + use rsky_pds::schema::pds::repo_block::dsl as RepoBlockSchema; 377 + 378 + Ok(db 379 + .run(move |conn| { 380 + let mut builder = RepoBlockSchema::repo_block 381 + .select(RepoBlock::as_select()) 382 + .order((RepoBlockSchema::repoRev.desc(), RepoBlockSchema::cid.desc())) 383 + .filter(RepoBlockSchema::did.eq(did)) 384 + .limit(500) 385 + .into_boxed(); 386 + 387 + if let Some(cursor) = cursor { 388 + // use this syntax to ensure we hit the index 389 + builder = builder.filter( 390 + sql::<Bool>("((") 391 + .bind(RepoBlockSchema::repoRev) 392 + .sql(", ") 393 + .bind(RepoBlockSchema::cid) 394 + .sql(") < (") 395 + .bind::<Text, _>(cursor.rev.clone()) 396 + .sql(", ") 397 + .bind::<Text, _>(cursor.cid.to_string()) 398 + .sql("))"), 399 + ); 400 + } 401 + if let Some(since) = since { 402 + builder = builder.filter(RepoBlockSchema::repoRev.gt(since)); 403 + } 404 + builder.load(conn) 405 + }) 406 + .await?) 538 407 } 539 - } 540 408 541 - #[async_trait::async_trait] 542 - impl AsyncBlockStoreWrite for SqlRepoStorage { 543 - fn write_block( 544 - &mut self, 545 - codec: u64, 546 - hash: u64, 547 - contents: &[u8], 548 - ) -> impl Future<Output = Result<Cid, BlockstoreError>> + Send { 549 - let contents = contents.to_vec(); 550 - let rev = self.now.clone(); 409 + pub async fn count_blocks(&self) -> Result<i64> { 410 + let did: String = self.did.clone(); 411 + let db: ActorDb = self.db.clone(); 412 + use rsky_pds::schema::pds::repo_block::dsl as RepoBlockSchema; 551 413 552 - async move { 553 - // Calculate digest based on hash algorithm 554 - let digest = match hash { 555 - atrium_repo::blockstore::SHA2_256 => sha2::Sha256::digest(&contents), 556 - _ => return Err(BlockstoreError::UnsupportedHash(hash)), 557 - }; 414 + let res = db 415 + .run(move |conn| { 416 + RepoBlockSchema::repo_block 417 + .filter(RepoBlockSchema::did.eq(did)) 418 + .count() 419 + .get_result(conn) 420 + }) 421 + .await?; 422 + Ok(res) 423 + } 558 424 559 - // Create multihash 560 - let multihash = atrium_repo::Multihash::wrap(hash, &digest) 561 - .map_err(|_| BlockstoreError::UnsupportedHash(hash))?; 425 + // Transactors 426 + // ------------------- 562 427 563 - // Create CID 564 - let cid = Cid::new_v1(codec, multihash); 428 + /// Proactively cache all blocks from a particular commit (to prevent multiple roundtrips) 429 + pub async fn cache_rev(&mut self, rev: String) -> Result<()> { 430 + let did: String = self.did.clone(); 431 + let db: ActorDb = self.db.clone(); 432 + use rsky_pds::schema::pds::repo_block::dsl as RepoBlockSchema; 565 433 566 - // Store the block 567 - self.put_block(cid, contents, rev) 568 - .await 569 - .map_err(|e| BlockstoreError::Other(Box::new(e)))?; 434 + let res: Vec<(String, Vec<u8>)> = db 435 + .run(move |conn| { 436 + RepoBlockSchema::repo_block 437 + .filter(RepoBlockSchema::did.eq(did)) 438 + .filter(RepoBlockSchema::repoRev.eq(rev)) 439 + .select((RepoBlockSchema::cid, RepoBlockSchema::content)) 440 + .limit(15) 441 + .get_results::<(String, Vec<u8>)>(conn) 442 + }) 443 + .await?; 444 + for row in res { 445 + let mut cache_guard = self.cache.write().await; 446 + cache_guard.set(Cid::from_str(&row.0)?, row.1) 447 + } 448 + Ok(()) 449 + } 570 450 571 - Ok(cid) 451 + pub async fn delete_many(&self, cids: Vec<Cid>) -> Result<()> { 452 + if cids.is_empty() { 453 + return Ok(()); 572 454 } 455 + let did: String = self.did.clone(); 456 + let db: ActorDb = self.db.clone(); 457 + use rsky_pds::schema::pds::repo_block::dsl as RepoBlockSchema; 458 + 459 + let cid_strings: Vec<String> = cids.into_iter().map(|c| c.to_string()).collect(); 460 + db.run(move |conn| { 461 + delete(RepoBlockSchema::repo_block) 462 + .filter(RepoBlockSchema::did.eq(did)) 463 + .filter(RepoBlockSchema::cid.eq_any(cid_strings)) 464 + .execute(conn) 465 + }) 466 + .await?; 467 + Ok(()) 468 + } 469 + 470 + pub async fn get_root_detailed(&self) -> Result<CidAndRev> { 471 + let did: String = self.did.clone(); 472 + let db: ActorDb = self.db.clone(); 473 + use rsky_pds::schema::pds::repo_root::dsl as RepoRootSchema; 474 + 475 + let res = db 476 + .run(move |conn| { 477 + RepoRootSchema::repo_root 478 + .filter(RepoRootSchema::did.eq(did)) 479 + .select(models::RepoRoot::as_select()) 480 + .first(conn) 481 + }) 482 + .await?; 483 + 484 + Ok(CidAndRev { 485 + cid: Cid::from_str(&res.cid)?, 486 + rev: res.rev, 487 + }) 573 488 } 574 489 }
+7 -8
src/db/mod.rs
··· 1 1 use anyhow::{Context, Result}; 2 2 use diesel::connection::SimpleConnection; 3 - use diesel::prelude::*; 4 - use diesel::r2d2::{self, ConnectionManager, Pool, PooledConnection}; 5 - use diesel::sqlite::{Sqlite, SqliteConnection}; 3 + use diesel::r2d2::{ConnectionManager, Pool, PooledConnection}; 4 + use diesel::sqlite::Sqlite; 5 + use diesel::*; 6 6 use diesel_migrations::{EmbeddedMigrations, MigrationHarness, embed_migrations}; 7 7 use std::path::Path; 8 - use std::sync::Arc; 9 8 use std::time::Duration; 10 9 11 10 pub const MIGRATIONS: EmbeddedMigrations = embed_migrations!("migrations"); ··· 40 39 let manager = ConnectionManager::<SqliteConnection>::new(database_url); 41 40 42 41 // Create the connection pool with SQLite-specific configurations 43 - let pool = r2d2::Pool::builder() 42 + let pool = Pool::builder() 44 43 .max_size(10) 45 44 .connection_timeout(Duration::from_secs(30)) 46 45 .test_on_check_out(true) ··· 118 117 119 118 Err(anyhow::anyhow!( 120 119 "Max retries exceeded: {}", 121 - last_error.unwrap_or_else(|| diesel::result::Error::RollbackTransaction) 120 + last_error.unwrap_or_else(|| result::Error::RollbackTransaction) 122 121 )) 123 122 } 124 123 ··· 136 135 T: Send + 'static, 137 136 { 138 137 self.run(|conn| { 139 - conn.transaction(|tx| f(tx).map_err(|e| diesel::result::Error::RollbackTransaction)) 138 + conn.transaction(|tx| f(tx).map_err(|e| result::Error::RollbackTransaction)) 140 139 }) 141 140 .await 142 141 } ··· 144 143 /// Run a transaction with no retry logic 145 144 pub async fn transaction_no_retry<T, F>(&self, f: F) -> Result<T> 146 145 where 147 - F: FnOnce(&mut SqliteConnection) -> std::result::Result<T, diesel::result::Error> + Send, 146 + F: FnOnce(&mut SqliteConnection) -> std::result::Result<T, result::Error> + Send, 148 147 T: Send + 'static, 149 148 { 150 149 let mut conn = self