Alternative ATProto PDS implementation

prototype blob fs storage

Changed files
+43 -33
src
actor_store
+43 -33
src/actor_store/blob_fs.rs
··· 1 1 //! File system implementation of blob storage 2 2 //! Based on the S3 implementation but using local file system instead 3 3 use anyhow::Result; 4 - use std::str::FromStr; 5 4 use cidv10::Cid; 6 5 use rsky_common::get_random_str; 7 6 use rsky_repo::error::BlobError; 8 7 use std::path::{Path, PathBuf}; 8 + use std::str::FromStr; 9 9 use tokio::fs as async_fs; 10 10 use tokio::io::AsyncWriteExt; 11 11 use tracing::{debug, error, warn}; ··· 43 43 44 44 impl BlobStoreFs { 45 45 /// Create a new file system blob store for the given DID and base directory 46 - pub fn new(did: String, base_dir: PathBuf) -> Self { 46 + pub const fn new(did: String, base_dir: PathBuf) -> Self { 47 47 Self { base_dir, did } 48 48 } 49 49 50 50 /// Create a factory function for blob stores 51 - pub fn creator(base_dir: PathBuf) -> Box<dyn Fn(String) -> BlobStoreFs> { 52 - let base_dir_clone = base_dir.clone(); 53 - Box::new(move |did: String| BlobStoreFs::new(did, base_dir_clone.clone())) 51 + pub fn creator(base_dir: PathBuf) -> Box<dyn Fn(String) -> Self> { 52 + let base_dir_clone = base_dir; 53 + Box::new(move |did: String| Self::new(did, base_dir_clone.clone())) 54 54 } 55 55 56 56 /// Generate a random key for temporary storage ··· 66 66 /// Get path to the stored blob with appropriate sharding 67 67 fn get_stored_path(&self, cid: Cid) -> PathBuf { 68 68 let cid_str = cid.to_string(); 69 - 69 + 70 70 // Create two-level sharded structure based on CID 71 71 // First 10 chars for level 1, next 10 chars for level 2 72 72 let first_level = if cid_str.len() >= 10 { ··· 74 74 } else { 75 75 &cid_str 76 76 }; 77 - 77 + 78 78 let second_level = if cid_str.len() >= 20 { 79 79 &cid_str[10..20] 80 80 } else { 81 81 "default" 82 82 }; 83 - 83 + 84 84 self.base_dir 85 85 .join("blocks") 86 86 .join(&self.did) ··· 92 92 /// Get path to the quarantined blob 93 93 fn get_quarantined_path(&self, cid: Cid) -> PathBuf { 94 94 let cid_str = cid.to_string(); 95 - self.base_dir.join("quarantine").join(&self.did).join(&cid_str) 95 + self.base_dir 96 + .join("quarantine") 97 + .join(&self.did) 98 + .join(&cid_str) 96 99 } 97 100 98 101 /// Store a blob temporarily 99 102 pub async fn put_temp(&self, bytes: Vec<u8>) -> Result<String> { 100 103 let key = self.gen_key(); 101 104 let temp_path = self.get_tmp_path(&key); 102 - 105 + 103 106 // Ensure the directory exists 104 107 if let Some(parent) = temp_path.parent() { 105 108 async_fs::create_dir_all(parent).await?; 106 109 } 107 - 110 + 108 111 // Write the temporary blob 109 112 let mut file = async_fs::File::create(&temp_path).await?; 110 113 file.write_all(&bytes).await?; 111 114 file.flush().await?; 112 - 115 + 113 116 debug!("Stored temp blob at: {:?}", temp_path); 114 117 Ok(key) 115 118 } ··· 117 120 /// Make a temporary blob permanent by moving it to the blob store 118 121 pub async fn make_permanent(&self, key: String, cid: Cid) -> Result<()> { 119 122 let already_has = self.has_stored(cid).await?; 120 - 123 + 121 124 if !already_has { 122 125 // Move the temporary blob to permanent storage 123 126 self.move_object(MoveObject { 124 127 from: self.get_tmp_path(&key), 125 128 to: self.get_stored_path(cid), 126 - }).await?; 129 + }) 130 + .await?; 127 131 debug!("Moved temp blob to permanent: {} -> {}", key, cid); 128 132 } else { 129 133 // Already saved, so just delete the temp ··· 133 137 debug!("Deleted temp blob as permanent already exists: {}", key); 134 138 } 135 139 } 136 - 140 + 137 141 Ok(()) 138 142 } 139 143 140 144 /// Store a blob directly as permanent 141 145 pub async fn put_permanent(&self, cid: Cid, bytes: Vec<u8>) -> Result<()> { 142 146 let target_path = self.get_stored_path(cid); 143 - 147 + 144 148 // Ensure the directory exists 145 149 if let Some(parent) = target_path.parent() { 146 150 async_fs::create_dir_all(parent).await?; 147 151 } 148 - 152 + 149 153 // Write the blob 150 154 let mut file = async_fs::File::create(&target_path).await?; 151 155 file.write_all(&bytes).await?; 152 156 file.flush().await?; 153 - 157 + 154 158 debug!("Stored permanent blob: {}", cid); 155 159 Ok(()) 156 160 } ··· 160 164 self.move_object(MoveObject { 161 165 from: self.get_stored_path(cid), 162 166 to: self.get_quarantined_path(cid), 163 - }).await?; 164 - 167 + }) 168 + .await?; 169 + 165 170 debug!("Quarantined blob: {}", cid); 166 171 Ok(()) 167 172 } ··· 171 176 self.move_object(MoveObject { 172 177 from: self.get_quarantined_path(cid), 173 178 to: self.get_stored_path(cid), 174 - }).await?; 175 - 179 + }) 180 + .await?; 181 + 176 182 debug!("Unquarantined blob: {}", cid); 177 183 Ok(()) 178 184 } ··· 180 186 /// Get a blob as a stream 181 187 async fn get_object(&self, cid: Cid) -> Result<ByteStream> { 182 188 let blob_path = self.get_stored_path(cid); 183 - 189 + 184 190 match async_fs::read(&blob_path).await { 185 191 Ok(bytes) => Ok(ByteStream::new(bytes)), 186 192 Err(e) => { ··· 215 221 /// Delete multiple blobs by CID 216 222 pub async fn delete_many(&self, cids: Vec<Cid>) -> Result<()> { 217 223 let mut futures = Vec::with_capacity(cids.len()); 218 - 224 + 219 225 for cid in cids { 220 226 futures.push(self.delete_path(self.get_stored_path(cid))); 221 227 } 222 - 228 + 223 229 // Execute all delete operations concurrently 224 230 let results = futures::future::join_all(futures).await; 225 - 231 + 226 232 // Count errors but don't fail the operation 227 233 let error_count = results.iter().filter(|r| r.is_err()).count(); 228 234 if error_count > 0 { 229 - warn!("{} errors occurred while deleting {} blobs", error_count, results.len()); 235 + warn!( 236 + "{} errors occurred while deleting {} blobs", 237 + error_count, 238 + results.len() 239 + ); 230 240 } 231 - 241 + 232 242 Ok(()) 233 243 } 234 244 ··· 261 271 if !mov.from.exists() { 262 272 return Err(anyhow::Error::new(BlobError::BlobNotFoundError)); 263 273 } 264 - 274 + 265 275 // Ensure the target directory exists 266 276 if let Some(parent) = mov.to.parent() { 267 277 async_fs::create_dir_all(parent).await?; 268 278 } 269 - 279 + 270 280 // Copy first, then delete source after success 271 - async_fs::copy(&mov.from, &mov.to).await?; 281 + _ = async_fs::copy(&mov.from, &mov.to).await?; 272 282 async_fs::remove_file(&mov.from).await?; 273 - 283 + 274 284 debug!("Moved blob: {:?} -> {:?}", mov.from, mov.to); 275 285 Ok(()) 276 286 } 277 - } 287 + }