Monorepo for wisp.place. A static site hosting service built on top of the AT Protocol. wisp.place

Add support for existing blob reuse in deployment process

+1
cli/.gitignore
··· 1 + test/ 1 2 .DS_STORE 2 3 jacquard/ 3 4 binaries/
+3
cli/Cargo.lock
··· 4385 4385 "jacquard-oauth", 4386 4386 "miette", 4387 4387 "mime_guess", 4388 + "multibase", 4389 + "multihash", 4388 4390 "reqwest", 4389 4391 "rustversion", 4390 4392 "serde", 4391 4393 "serde_json", 4394 + "sha2", 4392 4395 "shellexpand", 4393 4396 "tokio", 4394 4397 "walkdir",
+3
cli/Cargo.toml
··· 30 30 mime_guess = "2.0" 31 31 bytes = "1.10" 32 32 futures = "0.3.31" 33 + multihash = "0.19.3" 34 + multibase = "0.9" 35 + sha2 = "0.10"
+92
cli/src/blob_map.rs
··· 1 + use jacquard_common::types::blob::BlobRef; 2 + use jacquard_common::IntoStatic; 3 + use std::collections::HashMap; 4 + 5 + use crate::place_wisp::fs::{Directory, EntryNode}; 6 + 7 + /// Extract blob information from a directory tree 8 + /// Returns a map of file paths to their blob refs and CIDs 9 + /// 10 + /// This mirrors the TypeScript implementation in src/lib/wisp-utils.ts lines 275-302 11 + pub fn extract_blob_map( 12 + directory: &Directory, 13 + ) -> HashMap<String, (BlobRef<'static>, String)> { 14 + extract_blob_map_recursive(directory, String::new()) 15 + } 16 + 17 + fn extract_blob_map_recursive( 18 + directory: &Directory, 19 + current_path: String, 20 + ) -> HashMap<String, (BlobRef<'static>, String)> { 21 + let mut blob_map = HashMap::new(); 22 + 23 + for entry in &directory.entries { 24 + let full_path = if current_path.is_empty() { 25 + entry.name.to_string() 26 + } else { 27 + format!("{}/{}", current_path, entry.name) 28 + }; 29 + 30 + match &entry.node { 31 + EntryNode::File(file_node) => { 32 + // Extract CID from blob ref 33 + // BlobRef is an enum with Blob variant, which has a ref field (CidLink) 34 + let blob_ref = &file_node.blob; 35 + let cid_string = blob_ref.blob().r#ref.to_string(); 36 + 37 + // Store both normalized and full paths 38 + // Normalize by removing base folder prefix (e.g., "cobblemon/index.html" -> "index.html") 39 + let normalized_path = normalize_path(&full_path); 40 + 41 + blob_map.insert( 42 + normalized_path.clone(), 43 + (blob_ref.clone().into_static(), cid_string.clone()) 44 + ); 45 + 46 + // Also store the full path for matching 47 + if normalized_path != full_path { 48 + blob_map.insert( 49 + full_path, 50 + (blob_ref.clone().into_static(), cid_string) 51 + ); 52 + } 53 + } 54 + EntryNode::Directory(subdir) => { 55 + let sub_map = extract_blob_map_recursive(subdir, full_path); 56 + blob_map.extend(sub_map); 57 + } 58 + EntryNode::Unknown(_) => { 59 + // Skip unknown node types 60 + } 61 + } 62 + } 63 + 64 + blob_map 65 + } 66 + 67 + /// Normalize file path by removing base folder prefix 68 + /// Example: "cobblemon/index.html" -> "index.html" 69 + /// 70 + /// Mirrors TypeScript implementation at src/routes/wisp.ts line 291 71 + pub fn normalize_path(path: &str) -> String { 72 + // Remove base folder prefix (everything before first /) 73 + if let Some(idx) = path.find('/') { 74 + path[idx + 1..].to_string() 75 + } else { 76 + path.to_string() 77 + } 78 + } 79 + 80 + #[cfg(test)] 81 + mod tests { 82 + use super::*; 83 + 84 + #[test] 85 + fn test_normalize_path() { 86 + assert_eq!(normalize_path("index.html"), "index.html"); 87 + assert_eq!(normalize_path("cobblemon/index.html"), "index.html"); 88 + assert_eq!(normalize_path("folder/subfolder/file.txt"), "subfolder/file.txt"); 89 + assert_eq!(normalize_path("a/b/c/d.txt"), "b/c/d.txt"); 90 + } 91 + } 92 +
+66
cli/src/cid.rs
··· 1 + use jacquard_common::types::cid::IpldCid; 2 + use sha2::{Digest, Sha256}; 3 + 4 + /// Compute CID (Content Identifier) for blob content 5 + /// Uses the same algorithm as AT Protocol: CIDv1 with raw codec (0x55) and SHA-256 6 + /// 7 + /// CRITICAL: This must be called on BASE64-ENCODED GZIPPED content, not just gzipped content 8 + /// 9 + /// Based on @atproto/common/src/ipld.ts sha256RawToCid implementation 10 + pub fn compute_cid(content: &[u8]) -> String { 11 + // Use node crypto to compute sha256 hash (same as AT Protocol) 12 + let hash = Sha256::digest(content); 13 + 14 + // Create multihash (code 0x12 = sha2-256) 15 + let multihash = multihash::Multihash::wrap(0x12, &hash) 16 + .expect("SHA-256 hash should always fit in multihash"); 17 + 18 + // Create CIDv1 with raw codec (0x55) 19 + let cid = IpldCid::new_v1(0x55, multihash); 20 + 21 + // Convert to base32 string representation 22 + cid.to_string_of_base(multibase::Base::Base32Lower) 23 + .unwrap_or_else(|_| cid.to_string()) 24 + } 25 + 26 + #[cfg(test)] 27 + mod tests { 28 + use super::*; 29 + use base64::Engine; 30 + 31 + #[test] 32 + fn test_compute_cid() { 33 + // Test with a simple string: "hello" 34 + let content = b"hello"; 35 + let cid = compute_cid(content); 36 + 37 + // CID should start with 'baf' for raw codec base32 38 + assert!(cid.starts_with("baf")); 39 + } 40 + 41 + #[test] 42 + fn test_compute_cid_base64_encoded() { 43 + // Simulate the actual use case: gzipped then base64 encoded 44 + use flate2::write::GzEncoder; 45 + use flate2::Compression; 46 + use std::io::Write; 47 + 48 + let original = b"hello world"; 49 + 50 + // Gzip compress 51 + let mut encoder = GzEncoder::new(Vec::new(), Compression::default()); 52 + encoder.write_all(original).unwrap(); 53 + let gzipped = encoder.finish().unwrap(); 54 + 55 + // Base64 encode the gzipped data 56 + let base64_bytes = base64::prelude::BASE64_STANDARD.encode(&gzipped).into_bytes(); 57 + 58 + // Compute CID on the base64 bytes 59 + let cid = compute_cid(&base64_bytes); 60 + 61 + // Should be a valid CID 62 + assert!(cid.starts_with("baf")); 63 + assert!(cid.len() > 10); 64 + } 65 + } 66 +
+121 -38
cli/src/main.rs
··· 1 1 mod builder_types; 2 2 mod place_wisp; 3 + mod cid; 4 + mod blob_map; 3 5 4 6 use clap::Parser; 5 7 use jacquard::CowStr; 6 - use jacquard::client::{Agent, FileAuthStore, AgentSessionExt, MemoryCredentialSession}; 8 + use jacquard::client::{Agent, FileAuthStore, AgentSessionExt, MemoryCredentialSession, AgentSession}; 7 9 use jacquard::oauth::client::OAuthClient; 8 10 use jacquard::oauth::loopback::LoopbackConfig; 9 11 use jacquard::prelude::IdentityResolver; ··· 11 13 use jacquard_common::types::blob::MimeType; 12 14 use miette::IntoDiagnostic; 13 15 use std::path::{Path, PathBuf}; 16 + use std::collections::HashMap; 14 17 use flate2::Compression; 15 18 use flate2::write::GzEncoder; 16 19 use std::io::Write; ··· 107 110 108 111 println!("Deploying site '{}'...", site_name); 109 112 110 - // Build directory tree 111 - let root_dir = build_directory(agent, &path).await?; 113 + // Try to fetch existing manifest for incremental updates 114 + let existing_blob_map: HashMap<String, (jacquard_common::types::blob::BlobRef<'static>, String)> = { 115 + use jacquard_common::types::string::AtUri; 116 + 117 + // Get the DID for this session 118 + let session_info = agent.session_info().await; 119 + if let Some((did, _)) = session_info { 120 + // Construct the AT URI for the record 121 + let uri_string = format!("at://{}/place.wisp.fs/{}", did, site_name); 122 + if let Ok(uri) = AtUri::new(&uri_string) { 123 + match agent.get_record::<Fs>(&uri).await { 124 + Ok(response) => { 125 + match response.into_output() { 126 + Ok(record_output) => { 127 + let existing_manifest = record_output.value; 128 + let blob_map = blob_map::extract_blob_map(&existing_manifest.root); 129 + println!("Found existing manifest with {} files, checking for changes...", blob_map.len()); 130 + blob_map 131 + } 132 + Err(_) => { 133 + println!("No existing manifest found, uploading all files..."); 134 + HashMap::new() 135 + } 136 + } 137 + } 138 + Err(_) => { 139 + // Record doesn't exist yet - this is a new site 140 + println!("No existing manifest found, uploading all files..."); 141 + HashMap::new() 142 + } 143 + } 144 + } else { 145 + println!("No existing manifest found (invalid URI), uploading all files..."); 146 + HashMap::new() 147 + } 148 + } else { 149 + println!("No existing manifest found (could not get DID), uploading all files..."); 150 + HashMap::new() 151 + } 152 + }; 112 153 113 - // Count total files 114 - let file_count = count_files(&root_dir); 154 + // Build directory tree 155 + let (root_dir, total_files, reused_count) = build_directory(agent, &path, &existing_blob_map).await?; 156 + let uploaded_count = total_files - reused_count; 115 157 116 158 // Create the Fs record 117 159 let fs_record = Fs::new() 118 160 .site(CowStr::from(site_name.clone())) 119 161 .root(root_dir) 120 - .file_count(file_count as i64) 162 + .file_count(total_files as i64) 121 163 .created_at(Datetime::now()) 122 164 .build(); 123 165 ··· 132 174 .and_then(|s| s.split('/').next()) 133 175 .ok_or_else(|| miette::miette!("Failed to parse DID from URI"))?; 134 176 135 - println!("Deployed site '{}': {}", site_name, output.uri); 136 - println!("Available at: https://sites.wisp.place/{}/{}", did, site_name); 177 + println!("\n✓ Deployed site '{}': {}", site_name, output.uri); 178 + println!(" Total files: {} ({} reused, {} uploaded)", total_files, reused_count, uploaded_count); 179 + println!(" Available at: https://sites.wisp.place/{}/{}", did, site_name); 137 180 138 181 Ok(()) 139 182 } ··· 142 185 fn build_directory<'a>( 143 186 agent: &'a Agent<impl jacquard::client::AgentSession + IdentityResolver + 'a>, 144 187 dir_path: &'a Path, 145 - ) -> std::pin::Pin<Box<dyn std::future::Future<Output = miette::Result<Directory<'static>>> + 'a>> 188 + existing_blobs: &'a HashMap<String, (jacquard_common::types::blob::BlobRef<'static>, String)>, 189 + ) -> std::pin::Pin<Box<dyn std::future::Future<Output = miette::Result<(Directory<'static>, usize, usize)>> + 'a>> 146 190 { 147 191 Box::pin(async move { 148 192 // Collect all directory entries first ··· 177 221 } 178 222 179 223 // Process files concurrently with a limit of 5 180 - let file_entries: Vec<Entry> = stream::iter(file_tasks) 224 + let file_results: Vec<(Entry<'static>, bool)> = stream::iter(file_tasks) 181 225 .map(|(name, path)| async move { 182 - let file_node = process_file(agent, &path).await?; 183 - Ok::<_, miette::Report>(Entry::new() 226 + let (file_node, reused) = process_file(agent, &path, &name, existing_blobs).await?; 227 + let entry = Entry::new() 184 228 .name(CowStr::from(name)) 185 229 .node(EntryNode::File(Box::new(file_node))) 186 - .build()) 230 + .build(); 231 + Ok::<_, miette::Report>((entry, reused)) 187 232 }) 188 233 .buffer_unordered(5) 189 234 .collect::<Vec<_>>() 190 235 .await 191 236 .into_iter() 192 237 .collect::<miette::Result<Vec<_>>>()?; 238 + 239 + let mut file_entries = Vec::new(); 240 + let mut reused_count = 0; 241 + let mut total_files = 0; 242 + 243 + for (entry, reused) in file_results { 244 + file_entries.push(entry); 245 + total_files += 1; 246 + if reused { 247 + reused_count += 1; 248 + } 249 + } 193 250 194 251 // Process directories recursively (sequentially to avoid too much nesting) 195 252 let mut dir_entries = Vec::new(); 196 253 for (name, path) in dir_tasks { 197 - let subdir = build_directory(agent, &path).await?; 254 + let (subdir, sub_total, sub_reused) = build_directory(agent, &path, existing_blobs).await?; 198 255 dir_entries.push(Entry::new() 199 256 .name(CowStr::from(name)) 200 257 .node(EntryNode::Directory(Box::new(subdir))) 201 258 .build()); 259 + total_files += sub_total; 260 + reused_count += sub_reused; 202 261 } 203 262 204 263 // Combine file and directory entries 205 264 let mut entries = file_entries; 206 265 entries.extend(dir_entries); 207 266 208 - Ok(Directory::new() 267 + let directory = Directory::new() 209 268 .r#type(CowStr::from("directory")) 210 269 .entries(entries) 211 - .build()) 270 + .build(); 271 + 272 + Ok((directory, total_files, reused_count)) 212 273 }) 213 274 } 214 275 215 - /// Process a single file: gzip -> base64 -> upload blob 276 + /// Process a single file: gzip -> base64 -> upload blob (or reuse existing) 277 + /// Returns (File, reused: bool) 216 278 async fn process_file( 217 279 agent: &Agent<impl jacquard::client::AgentSession + IdentityResolver>, 218 280 file_path: &Path, 219 - ) -> miette::Result<File<'static>> 281 + file_name: &str, 282 + existing_blobs: &HashMap<String, (jacquard_common::types::blob::BlobRef<'static>, String)>, 283 + ) -> miette::Result<(File<'static>, bool)> 220 284 { 221 285 // Read file 222 286 let file_data = std::fs::read(file_path).into_diagnostic()?; ··· 234 298 // Base64 encode the gzipped data 235 299 let base64_bytes = base64::prelude::BASE64_STANDARD.encode(&gzipped).into_bytes(); 236 300 237 - // Upload blob as octet-stream 301 + // Compute CID for this file (CRITICAL: on base64-encoded gzipped content) 302 + let file_cid = cid::compute_cid(&base64_bytes); 303 + 304 + // Normalize the file path for comparison 305 + let normalized_path = blob_map::normalize_path(file_name); 306 + 307 + // Check if we have an existing blob with the same CID 308 + let existing_blob = existing_blobs.get(&normalized_path) 309 + .or_else(|| existing_blobs.get(file_name)); 310 + 311 + if let Some((existing_blob_ref, existing_cid)) = existing_blob { 312 + if existing_cid == &file_cid { 313 + // CIDs match - reuse existing blob 314 + println!(" ✓ Reusing blob for {} (CID: {})", file_name, file_cid); 315 + return Ok(( 316 + File::new() 317 + .r#type(CowStr::from("file")) 318 + .blob(existing_blob_ref.clone()) 319 + .encoding(CowStr::from("gzip")) 320 + .mime_type(CowStr::from(original_mime)) 321 + .base64(true) 322 + .build(), 323 + true 324 + )); 325 + } 326 + } 327 + 328 + // File is new or changed - upload it 329 + println!(" ↑ Uploading {} ({} bytes, CID: {})", file_name, base64_bytes.len(), file_cid); 238 330 let blob = agent.upload_blob( 239 331 base64_bytes, 240 332 MimeType::new_static("application/octet-stream"), 241 333 ).await?; 242 334 243 - Ok(File::new() 244 - .r#type(CowStr::from("file")) 245 - .blob(blob) 246 - .encoding(CowStr::from("gzip")) 247 - .mime_type(CowStr::from(original_mime)) 248 - .base64(true) 249 - .build()) 335 + Ok(( 336 + File::new() 337 + .r#type(CowStr::from("file")) 338 + .blob(blob) 339 + .encoding(CowStr::from("gzip")) 340 + .mime_type(CowStr::from(original_mime)) 341 + .base64(true) 342 + .build(), 343 + false 344 + )) 250 345 } 251 346 252 - /// Count total files in a directory tree 253 - fn count_files(dir: &Directory) -> usize { 254 - let mut count = 0; 255 - for entry in &dir.entries { 256 - match &entry.node { 257 - EntryNode::File(_) => count += 1, 258 - EntryNode::Directory(subdir) => count += count_files(subdir), 259 - _ => {} // Unknown variants 260 - } 261 - } 262 - count 263 - }