Alternative ATProto PDS implementation

prototype repo endpoint

Changed files
+494 -43
src
endpoints
+11 -43
src/endpoints/repo.rs
··· 1 1 //! PDS repository endpoints /xrpc/com.atproto.repo.*) 2 + mod apply_writes; 3 + pub(crate) use apply_writes::apply_writes; 4 + 2 5 use std::{collections::HashSet, str::FromStr}; 3 6 4 7 use anyhow::{Context as _, anyhow}; 5 - use atrium_api::com::atproto::repo::apply_writes::{self, InputWritesItem, OutputResultsItem}; 8 + use atrium_api::com::atproto::repo::apply_writes::{ 9 + self as atrium_apply_writes, InputWritesItem, OutputResultsItem, 10 + }; 6 11 use atrium_api::{ 7 12 com::atproto::repo::{self, defs::CommitMetaData}, 8 13 types::{ ··· 25 30 use serde::Deserialize; 26 31 use tokio::io::AsyncWriteExt as _; 27 32 33 + use crate::repo::block_map::cid_for_cbor; 34 + use crate::repo::types::PreparedCreateOrUpdate; 28 35 use crate::{ 29 36 AppState, Db, Error, Result, SigningKey, 30 37 actor_store::{ActorStore, ActorStoreReader, ActorStoreTransactor, ActorStoreWriter}, ··· 111 118 Ok((did.to_owned(), handle.to_owned())) 112 119 } 113 120 114 - /// Apply a batch transaction of repository creates, updates, and deletes. Requires auth, implemented by PDS. 115 - /// - POST /xrpc/com.atproto.repo.applyWrites 116 - /// ### Request Body 117 - /// - `repo`: `at-identifier` // The handle or DID of the repo (aka, current account). 118 - /// - `validate`: `boolean` // Can be set to 'false' to skip Lexicon schema validation of record data across all operations, 'true' to require it, or leave unset to validate only for known Lexicons. 119 - /// - `writes`: `object[]` // One of: 120 - /// - - com.atproto.repo.applyWrites.create 121 - /// - - com.atproto.repo.applyWrites.update 122 - /// - - com.atproto.repo.applyWrites.delete 123 - /// - `swap_commit`: `cid` // If provided, the entire operation will fail if the current repo commit CID does not match this value. Used to prevent conflicting repo mutations. 124 - async fn apply_writes( 125 - user: AuthenticatedUser, 126 - State(actor_store): State<ActorStore>, 127 - State(skey): State<SigningKey>, 128 - State(config): State<AppConfig>, 129 - State(db): State<Db>, 130 - State(fhp): State<FirehoseProducer>, 131 - Json(input): Json<repo::apply_writes::Input>, 132 - ) -> Result<Json<repo::apply_writes::Output>> { 133 - // TODO: Implement validation when `input.validate` is set 134 - 135 - // Ensure that we are updating the correct repository. 136 - todo!(); 137 - // Convert ATProto writes to our internal format 138 - todo!(); 139 - // Process the writes using the actor store 140 - todo!(); 141 - 142 - // Update metrics 143 - counter!(REPO_COMMITS).increment(1); 144 - todo!(); 145 - 146 - // Send commit to firehose 147 - todo!(); 148 - 149 - // Convert to API response format 150 - todo!(); 151 - } 152 - 153 121 /// Create a single new repository record. Requires auth, implemented by PDS. 154 122 /// - POST /xrpc/com.atproto.repo.createRecord 155 123 /// ### Request Body ··· 172 140 State(fhp): State<FirehoseProducer>, 173 141 Json(input): Json<repo::create_record::Input>, 174 142 ) -> Result<Json<repo::create_record::Output>> { 175 - let write_result = apply_writes( 143 + let write_result = apply_writes::apply_writes( 176 144 user, 177 145 State(actor_store), 178 146 State(skey), ··· 265 233 } 266 234 .into(); 267 235 268 - let write_result = apply_writes( 236 + let write_result = apply_writes::apply_writes( 269 237 user, 270 238 State(actor_store), 271 239 State(skey), ··· 329 297 330 298 Ok(Json( 331 299 repo::delete_record::OutputData { 332 - commit: apply_writes( 300 + commit: apply_writes::apply_writes( 333 301 user, 334 302 State(actor_store), 335 303 State(skey),
+483
src/endpoints/repo/apply_writes.rs
··· 1 + //! Apply a batch transaction of repository creates, updates, and deletes. Requires auth, implemented by PDS. 2 + use std::{collections::HashSet, str::FromStr}; 3 + 4 + use anyhow::{Context as _, anyhow}; 5 + use atrium_api::com::atproto::repo::apply_writes::{self, InputWritesItem, OutputResultsItem}; 6 + use atrium_api::{ 7 + com::atproto::repo::{self, defs::CommitMetaData}, 8 + types::{ 9 + LimitedU32, Object, TryFromUnknown as _, TryIntoUnknown as _, Unknown, 10 + string::{AtIdentifier, Nsid, Tid}, 11 + }, 12 + }; 13 + use atrium_repo::{Cid, blockstore::CarStore}; 14 + use axum::{ 15 + Json, Router, 16 + body::Body, 17 + extract::{Query, Request, State}, 18 + http::{self, StatusCode}, 19 + routing::{get, post}, 20 + }; 21 + use constcat::concat; 22 + use futures::TryStreamExt as _; 23 + use metrics::counter; 24 + use rsky_syntax::aturi::AtUri; 25 + use serde::Deserialize; 26 + use tokio::io::AsyncWriteExt as _; 27 + 28 + use crate::repo::block_map::cid_for_cbor; 29 + use crate::repo::types::PreparedCreateOrUpdate; 30 + use crate::{ 31 + AppState, Db, Error, Result, SigningKey, 32 + actor_store::{ActorStore, ActorStoreReader, ActorStoreTransactor, ActorStoreWriter}, 33 + auth::AuthenticatedUser, 34 + config::AppConfig, 35 + error::ErrorMessage, 36 + firehose::{self, FirehoseProducer, RepoOp}, 37 + metrics::{REPO_COMMITS, REPO_OP_CREATE, REPO_OP_DELETE, REPO_OP_UPDATE}, 38 + repo::types::{PreparedWrite, WriteOpAction}, 39 + storage, 40 + }; 41 + 42 + use super::resolve_did; 43 + 44 + /// Apply a batch transaction of repository creates, updates, and deletes. Requires auth, implemented by PDS. 45 + /// - POST /xrpc/com.atproto.repo.applyWrites 46 + /// ### Request Body 47 + /// - `repo`: `at-identifier` // The handle or DID of the repo (aka, current account). 48 + /// - `validate`: `boolean` // Can be set to 'false' to skip Lexicon schema validation of record data across all operations, 'true' to require it, or leave unset to validate only for known Lexicons. 49 + /// - `writes`: `object[]` // One of: 50 + /// - - com.atproto.repo.applyWrites.create 51 + /// - - com.atproto.repo.applyWrites.update 52 + /// - - com.atproto.repo.applyWrites.delete 53 + /// - `swap_commit`: `cid` // If provided, the entire operation will fail if the current repo commit CID does not match this value. Used to prevent conflicting repo mutations. 54 + pub(crate) async fn apply_writes( 55 + user: AuthenticatedUser, 56 + State(skey): State<SigningKey>, 57 + State(config): State<AppConfig>, 58 + State(db): State<Db>, 59 + State(fhp): State<FirehoseProducer>, 60 + Json(input): Json<repo::apply_writes::Input>, 61 + ) -> Result<Json<repo::apply_writes::Output>> { 62 + // TODO: `input.validate` 63 + 64 + // Resolve DID from identifier 65 + let (target_did, _) = resolve_did(&db, &input.repo) 66 + .await 67 + .context("failed to resolve did")?; 68 + 69 + // Ensure that we are updating the correct repository 70 + if target_did.as_str() != user.did() { 71 + return Err(Error::with_status( 72 + StatusCode::BAD_REQUEST, 73 + anyhow!("repo did not match the authenticated user"), 74 + )); 75 + } 76 + 77 + // Validate writes count 78 + if input.writes.len() > 200 { 79 + return Err(Error::with_status( 80 + StatusCode::BAD_REQUEST, 81 + anyhow!("Too many writes. Max: 200"), 82 + )); 83 + } 84 + 85 + // Convert input writes to prepared format 86 + let mut prepared_writes = Vec::with_capacity(input.writes.len()); 87 + for write in input.writes.iter() { 88 + match write { 89 + InputWritesItem::Create(create) => { 90 + let uri = AtUri::make( 91 + user.did(), 92 + &create.collection.as_str(), 93 + create 94 + .rkey 95 + .as_deref() 96 + .unwrap_or(&Tid::now(LimitedU32::MIN).to_string()), 97 + ); 98 + 99 + let cid = match cid_for_cbor(&create.value) { 100 + Ok(cid) => cid, 101 + Err(e) => { 102 + return Err(Error::with_status( 103 + StatusCode::BAD_REQUEST, 104 + anyhow!("Failed to encode record: {}", e), 105 + )); 106 + } 107 + }; 108 + 109 + let blobs = scan_blobs(&create.value) 110 + .unwrap_or_default() 111 + .into_iter() 112 + .map(|cid| { 113 + // TODO: Create BlobRef from cid with proper metadata 114 + BlobRef { 115 + cid, 116 + mime_type: "application/octet-stream".to_string(), // Default 117 + size: 0, // Unknown at this point 118 + } 119 + }) 120 + .collect(); 121 + 122 + prepared_writes.push(PreparedCreateOrUpdate { 123 + action: WriteOpAction::Create, 124 + uri: uri?.to_string(), 125 + cid, 126 + record: create.value.clone(), 127 + blobs, 128 + swap_cid: None, 129 + }); 130 + } 131 + InputWritesItem::Update(update) => { 132 + let uri = AtUri::make( 133 + user.did(), 134 + Some(update.collection.to_string()), 135 + Some(update.rkey.to_string()), 136 + ); 137 + 138 + let cid = match cid_for_cbor(&update.value) { 139 + Ok(cid) => cid, 140 + Err(e) => { 141 + return Err(Error::with_status( 142 + StatusCode::BAD_REQUEST, 143 + anyhow!("Failed to encode record: {}", e), 144 + )); 145 + } 146 + }; 147 + 148 + let blobs = scan_blobs(&update.value) 149 + .unwrap_or_default() 150 + .into_iter() 151 + .map(|cid| { 152 + // TODO: Create BlobRef from cid with proper metadata 153 + BlobRef { 154 + cid, 155 + mime_type: "application/octet-stream".to_string(), 156 + size: 0, 157 + } 158 + }) 159 + .collect(); 160 + 161 + prepared_writes.push(PreparedCreateOrUpdate { 162 + action: WriteOpAction::Update, 163 + uri: uri?.to_string(), 164 + cid, 165 + record: update.value.clone(), 166 + blobs, 167 + swap_cid: None, 168 + }); 169 + } 170 + InputWritesItem::Delete(delete) => { 171 + let uri = AtUri::make(user.did(), &delete.collection.as_str(), &delete.rkey); 172 + 173 + prepared_writes.push(PreparedCreateOrUpdate { 174 + action: WriteOpAction::Delete, 175 + uri: uri?.to_string(), 176 + cid: Cid::default(), // Not needed for delete 177 + record: serde_json::Value::Null, 178 + blobs: vec![], 179 + swap_cid: None, 180 + }); 181 + } 182 + } 183 + } 184 + 185 + // Get swap commit CID if provided 186 + let swap_commit_cid = input.swap_commit.as_ref().map(|cid| *cid.as_ref()); 187 + 188 + let did_str = user.did(); 189 + let mut repo = storage::open_repo_db(&config.repo, &db, did_str) 190 + .await 191 + .context("failed to open user repo")?; 192 + let orig_cid = repo.root(); 193 + let orig_rev = repo.commit().rev(); 194 + 195 + let mut blobs = vec![]; 196 + let mut res = vec![]; 197 + let mut ops = vec![]; 198 + 199 + for write in &prepared_writes { 200 + let (builder, key) = match write.action { 201 + WriteOpAction::Create => { 202 + let key = format!("{}/{}", write.uri.collection, write.uri.rkey); 203 + let uri = format!("at://{}/{}", user.did(), key); 204 + 205 + let (builder, cid) = repo 206 + .add_raw(&key, &write.record) 207 + .await 208 + .context("failed to add record")?; 209 + 210 + // Extract and track blobs 211 + if let Ok(new_blobs) = scan_blobs(&write.record) { 212 + blobs.extend( 213 + new_blobs 214 + .into_iter() 215 + .map(|blob_cid| (key.clone(), blob_cid)), 216 + ); 217 + } 218 + 219 + ops.push(RepoOp::Create { 220 + cid, 221 + path: key.clone(), 222 + }); 223 + 224 + res.push(OutputResultsItem::CreateResult(Box::new( 225 + apply_writes::CreateResultData { 226 + cid: atrium_api::types::string::Cid::new(cid), 227 + uri, 228 + validation_status: None, 229 + } 230 + .into(), 231 + ))); 232 + 233 + (builder, key) 234 + } 235 + WriteOpAction::Update => { 236 + let key = format!("{}/{}", write.uri.collection, write.uri.rkey); 237 + let uri = format!("at://{}/{}", user.did(), key); 238 + 239 + let prev = repo 240 + .tree() 241 + .get(&key) 242 + .await 243 + .context("failed to search MST")?; 244 + 245 + if prev.is_none() { 246 + // No existing record, treat as create 247 + let (create_builder, cid) = repo 248 + .add_raw(&key, &write.record) 249 + .await 250 + .context("failed to add record")?; 251 + 252 + if let Ok(new_blobs) = scan_blobs(&write.record) { 253 + blobs.extend( 254 + new_blobs 255 + .into_iter() 256 + .map(|blob_cid| (key.clone(), blob_cid)), 257 + ); 258 + } 259 + 260 + ops.push(RepoOp::Create { 261 + cid, 262 + path: key.clone(), 263 + }); 264 + 265 + res.push(OutputResultsItem::CreateResult(Box::new( 266 + apply_writes::CreateResultData { 267 + cid: atrium_api::types::string::Cid::new(cid), 268 + uri, 269 + validation_status: None, 270 + } 271 + .into(), 272 + ))); 273 + 274 + (create_builder, key) 275 + } else { 276 + // Update existing record 277 + let prev = prev.context("should be able to find previous record")?; 278 + let (update_builder, cid) = repo 279 + .update_raw(&key, &write.record) 280 + .await 281 + .context("failed to add record")?; 282 + 283 + if let Ok(new_blobs) = scan_blobs(&write.record) { 284 + blobs.extend( 285 + new_blobs 286 + .into_iter() 287 + .map(|blob_cid| (key.clone(), blob_cid)), 288 + ); 289 + } 290 + 291 + ops.push(RepoOp::Update { 292 + cid, 293 + path: key.clone(), 294 + prev, 295 + }); 296 + 297 + res.push(OutputResultsItem::UpdateResult(Box::new( 298 + apply_writes::UpdateResultData { 299 + cid: atrium_api::types::string::Cid::new(cid), 300 + uri, 301 + validation_status: None, 302 + } 303 + .into(), 304 + ))); 305 + 306 + (update_builder, key) 307 + } 308 + } 309 + WriteOpAction::Delete => { 310 + let key = format!("{}/{}", write.uri.collection, write.uri.rkey); 311 + 312 + let prev = repo 313 + .tree() 314 + .get(&key) 315 + .await 316 + .context("failed to search MST")? 317 + .context("previous record does not exist")?; 318 + 319 + ops.push(RepoOp::Delete { 320 + path: key.clone(), 321 + prev, 322 + }); 323 + 324 + res.push(OutputResultsItem::DeleteResult(Box::new( 325 + apply_writes::DeleteResultData {}.into(), 326 + ))); 327 + 328 + let builder = repo 329 + .delete_raw(&key) 330 + .await 331 + .context("failed to add record")?; 332 + 333 + (builder, key) 334 + } 335 + }; 336 + 337 + let sig = skey 338 + .sign(&builder.bytes()) 339 + .context("failed to sign commit")?; 340 + 341 + _ = builder 342 + .finalize(sig) 343 + .await 344 + .context("failed to write signed commit")?; 345 + } 346 + 347 + // Construct a firehose record 348 + let mut mem = Vec::new(); 349 + let mut store = CarStore::create_with_roots(std::io::Cursor::new(&mut mem), [repo.root()]) 350 + .await 351 + .context("failed to create temp store")?; 352 + 353 + // Extract the records out of the user's repository 354 + for write in &prepared_writes { 355 + let key = format!("{}/{}", write.uri.collection, write.uri.rkey); 356 + repo.extract_raw_into(&key, &mut store) 357 + .await 358 + .context("failed to extract key")?; 359 + } 360 + 361 + let mut tx = db.begin().await.context("failed to begin transaction")?; 362 + 363 + if !swap_commit( 364 + &mut *tx, 365 + repo.root(), 366 + repo.commit().rev(), 367 + input.swap_commit.as_ref().map(|cid| *cid.as_ref()), 368 + &user.did(), 369 + ) 370 + .await 371 + .context("failed to swap commit")? 372 + { 373 + // This should always succeed. 374 + let old = input 375 + .swap_commit 376 + .clone() 377 + .context("swap_commit should always be Some")?; 378 + 379 + // The swap failed. Return the old commit and do not update the repository. 380 + return Ok(Json( 381 + apply_writes::OutputData { 382 + results: None, 383 + commit: Some( 384 + CommitMetaData { 385 + cid: old, 386 + rev: orig_rev, 387 + } 388 + .into(), 389 + ), 390 + } 391 + .into(), 392 + )); 393 + } 394 + 395 + // For updates and removals, unlink the old/deleted record from the blob_ref table 396 + for op in &ops { 397 + match op { 398 + &RepoOp::Update { ref path, .. } | &RepoOp::Delete { ref path, .. } => { 399 + // FIXME: This may cause issues if a user deletes more than one record referencing the same blob. 400 + _ = &sqlx::query!( 401 + r#"UPDATE blob_ref SET record = NULL WHERE did = ? AND record = ?"#, 402 + did_str, 403 + path 404 + ) 405 + .execute(&mut *tx) 406 + .await 407 + .context("failed to remove blob_ref")?; 408 + } 409 + &RepoOp::Create { .. } => {} 410 + } 411 + } 412 + 413 + // Process blobs 414 + for (key, cid) in &blobs { 415 + let cid_str = cid.to_string(); 416 + 417 + // Handle the case where a new record references an existing blob 418 + if sqlx::query!( 419 + r#"UPDATE blob_ref SET record = ? WHERE cid = ? AND did = ? AND record IS NULL"#, 420 + key, 421 + cid_str, 422 + did_str, 423 + ) 424 + .execute(&mut *tx) 425 + .await 426 + .context("failed to update blob_ref")? 427 + .rows_affected() 428 + == 0 429 + { 430 + _ = sqlx::query!( 431 + r#"INSERT INTO blob_ref (record, cid, did) VALUES (?, ?, ?)"#, 432 + key, 433 + cid_str, 434 + did_str, 435 + ) 436 + .execute(&mut *tx) 437 + .await 438 + .context("failed to update blob_ref")?; 439 + } 440 + } 441 + 442 + tx.commit() 443 + .await 444 + .context("failed to commit blob ref to database")?; 445 + 446 + // Update counters 447 + counter!(REPO_COMMITS).increment(1); 448 + for op in &ops { 449 + match *op { 450 + RepoOp::Create { .. } => counter!(REPO_OP_CREATE).increment(1), 451 + RepoOp::Update { .. } => counter!(REPO_OP_UPDATE).increment(1), 452 + RepoOp::Delete { .. } => counter!(REPO_OP_DELETE).increment(1), 453 + } 454 + } 455 + 456 + // We've committed the transaction to the database, and the commit is now stored in the user's 457 + // canonical repository. 458 + // We can now broadcast this on the firehose. 459 + fhp.commit(firehose::Commit { 460 + car: mem, 461 + ops, 462 + cid: repo.root(), 463 + rev: repo.commit().rev().to_string(), 464 + did: atrium_api::types::string::Did::new(user.did()).expect("should be valid DID"), 465 + pcid: Some(orig_cid), 466 + blobs: blobs.into_iter().map(|(_, cid)| cid).collect::<Vec<_>>(), 467 + }) 468 + .await; 469 + 470 + Ok(Json( 471 + apply_writes::OutputData { 472 + results: Some(res), 473 + commit: Some( 474 + CommitMetaData { 475 + cid: atrium_api::types::string::Cid::new(repo.root()), 476 + rev: repo.commit().rev(), 477 + } 478 + .into(), 479 + ), 480 + } 481 + .into(), 482 + )) 483 + }