Monorepo for wisp.place. A static site hosting service built on top of the AT Protocol. wisp.place
96
fork

Configure Feed

Select the types of activity you want to include in your feed.

at e0857955536af5ac0682ae235ff53965d5f31364 857 lines 34 kB view raw
1mod builder_types; 2mod place_wisp; 3mod cid; 4mod blob_map; 5mod metadata; 6mod download; 7mod pull; 8mod serve; 9mod subfs_utils; 10mod redirects; 11mod ignore_patterns; 12 13use clap::{Parser, Subcommand}; 14use jacquard::CowStr; 15use jacquard::client::{Agent, FileAuthStore, AgentSessionExt, MemoryCredentialSession, AgentSession}; 16use jacquard::oauth::client::OAuthClient; 17use jacquard::oauth::loopback::LoopbackConfig; 18use jacquard::prelude::IdentityResolver; 19use jacquard_common::types::string::{Datetime, Rkey, RecordKey, AtUri}; 20use jacquard_common::types::blob::MimeType; 21use miette::IntoDiagnostic; 22use std::path::{Path, PathBuf}; 23use std::collections::HashMap; 24use flate2::Compression; 25use flate2::write::GzEncoder; 26use std::io::Write; 27use base64::Engine; 28use futures::stream::{self, StreamExt}; 29 30use place_wisp::fs::*; 31use place_wisp::settings::*; 32 33#[derive(Parser, Debug)] 34#[command(author, version, about = "wisp.place CLI tool")] 35struct Args { 36 #[command(subcommand)] 37 command: Option<Commands>, 38 39 // Deploy arguments (when no subcommand is specified) 40 /// Handle (e.g., alice.bsky.social), DID, or PDS URL 41 #[arg(global = true, conflicts_with = "command")] 42 input: Option<CowStr<'static>>, 43 44 /// Path to the directory containing your static site 45 #[arg(short, long, global = true, conflicts_with = "command")] 46 path: Option<PathBuf>, 47 48 /// Site name (defaults to directory name) 49 #[arg(short, long, global = true, conflicts_with = "command")] 50 site: Option<String>, 51 52 /// Path to auth store file 53 #[arg(long, global = true, conflicts_with = "command")] 54 store: Option<String>, 55 56 /// App Password for authentication 57 #[arg(long, global = true, conflicts_with = "command")] 58 password: Option<CowStr<'static>>, 59 60 /// Enable directory listing mode for paths without index files 61 #[arg(long, global = true, conflicts_with = "command")] 62 directory: bool, 63 64 /// Enable SPA mode (serve index.html for all routes) 65 #[arg(long, global = true, conflicts_with = "command")] 66 spa: bool, 67} 68 69#[derive(Subcommand, Debug)] 70enum Commands { 71 /// Deploy a static site to wisp.place (default command) 72 Deploy { 73 /// Handle (e.g., alice.bsky.social), DID, or PDS URL 74 input: CowStr<'static>, 75 76 /// Path to the directory containing your static site 77 #[arg(short, long, default_value = ".")] 78 path: PathBuf, 79 80 /// Site name (defaults to directory name) 81 #[arg(short, long)] 82 site: Option<String>, 83 84 /// Path to auth store file (will be created if missing, only used with OAuth) 85 #[arg(long, default_value = "/tmp/wisp-oauth-session.json")] 86 store: String, 87 88 /// App Password for authentication (alternative to OAuth) 89 #[arg(long)] 90 password: Option<CowStr<'static>>, 91 92 /// Enable directory listing mode for paths without index files 93 #[arg(long)] 94 directory: bool, 95 96 /// Enable SPA mode (serve index.html for all routes) 97 #[arg(long)] 98 spa: bool, 99 }, 100 /// Pull a site from the PDS to a local directory 101 Pull { 102 /// Handle (e.g., alice.bsky.social) or DID 103 input: CowStr<'static>, 104 105 /// Site name (record key) 106 #[arg(short, long)] 107 site: String, 108 109 /// Output directory for the downloaded site 110 #[arg(short, long, default_value = ".")] 111 output: PathBuf, 112 }, 113 /// Serve a site locally with real-time firehose updates 114 Serve { 115 /// Handle (e.g., alice.bsky.social) or DID 116 input: CowStr<'static>, 117 118 /// Site name (record key) 119 #[arg(short, long)] 120 site: String, 121 122 /// Output directory for the site files 123 #[arg(short, long, default_value = ".")] 124 output: PathBuf, 125 126 /// Port to serve on 127 #[arg(short, long, default_value = "8080")] 128 port: u16, 129 }, 130} 131 132#[tokio::main] 133async fn main() -> miette::Result<()> { 134 let args = Args::parse(); 135 136 let result = match args.command { 137 Some(Commands::Deploy { input, path, site, store, password, directory, spa }) => { 138 // Dispatch to appropriate authentication method 139 if let Some(password) = password { 140 run_with_app_password(input, password, path, site, directory, spa).await 141 } else { 142 run_with_oauth(input, store, path, site, directory, spa).await 143 } 144 } 145 Some(Commands::Pull { input, site, output }) => { 146 pull::pull_site(input, CowStr::from(site), output).await 147 } 148 Some(Commands::Serve { input, site, output, port }) => { 149 serve::serve_site(input, CowStr::from(site), output, port).await 150 } 151 None => { 152 // Legacy mode: if input is provided, assume deploy command 153 if let Some(input) = args.input { 154 let path = args.path.unwrap_or_else(|| PathBuf::from(".")); 155 let store = args.store.unwrap_or_else(|| "/tmp/wisp-oauth-session.json".to_string()); 156 157 // Dispatch to appropriate authentication method 158 if let Some(password) = args.password { 159 run_with_app_password(input, password, path, args.site, args.directory, args.spa).await 160 } else { 161 run_with_oauth(input, store, path, args.site, args.directory, args.spa).await 162 } 163 } else { 164 // No command and no input, show help 165 use clap::CommandFactory; 166 Args::command().print_help().into_diagnostic()?; 167 Ok(()) 168 } 169 } 170 }; 171 172 // Force exit to avoid hanging on background tasks/connections 173 match result { 174 Ok(_) => std::process::exit(0), 175 Err(e) => { 176 eprintln!("{:?}", e); 177 std::process::exit(1) 178 } 179 } 180} 181 182/// Run deployment with app password authentication 183async fn run_with_app_password( 184 input: CowStr<'static>, 185 password: CowStr<'static>, 186 path: PathBuf, 187 site: Option<String>, 188 directory: bool, 189 spa: bool, 190) -> miette::Result<()> { 191 let (session, auth) = 192 MemoryCredentialSession::authenticated(input, password, None, None).await?; 193 println!("Signed in as {}", auth.handle); 194 195 let agent: Agent<_> = Agent::from(session); 196 deploy_site(&agent, path, site, directory, spa).await 197} 198 199/// Run deployment with OAuth authentication 200async fn run_with_oauth( 201 input: CowStr<'static>, 202 store: String, 203 path: PathBuf, 204 site: Option<String>, 205 directory: bool, 206 spa: bool, 207) -> miette::Result<()> { 208 use jacquard::oauth::scopes::Scope; 209 use jacquard::oauth::atproto::AtprotoClientMetadata; 210 use jacquard::oauth::session::ClientData; 211 use url::Url; 212 213 // Request the necessary scopes for wisp.place (including settings) 214 let scopes = Scope::parse_multiple("atproto repo:place.wisp.fs repo:place.wisp.subfs repo:place.wisp.settings blob:*/*") 215 .map_err(|e| miette::miette!("Failed to parse scopes: {:?}", e))?; 216 217 // Create redirect URIs that match the loopback server (port 4000, path /oauth/callback) 218 let redirect_uris = vec![ 219 Url::parse("http://127.0.0.1:4000/oauth/callback").into_diagnostic()?, 220 Url::parse("http://[::1]:4000/oauth/callback").into_diagnostic()?, 221 ]; 222 223 // Create client metadata with matching redirect URIs and scopes 224 let client_data = ClientData { 225 keyset: None, 226 config: AtprotoClientMetadata::new_localhost( 227 Some(redirect_uris), 228 Some(scopes), 229 ), 230 }; 231 232 let oauth = OAuthClient::new(FileAuthStore::new(&store), client_data); 233 234 let session = oauth 235 .login_with_local_server(input, Default::default(), LoopbackConfig::default()) 236 .await?; 237 238 let agent: Agent<_> = Agent::from(session); 239 deploy_site(&agent, path, site, directory, spa).await 240} 241 242/// Deploy the site using the provided agent 243async fn deploy_site( 244 agent: &Agent<impl jacquard::client::AgentSession + IdentityResolver>, 245 path: PathBuf, 246 site: Option<String>, 247 directory_listing: bool, 248 spa_mode: bool, 249) -> miette::Result<()> { 250 // Verify the path exists 251 if !path.exists() { 252 return Err(miette::miette!("Path does not exist: {}", path.display())); 253 } 254 255 // Get site name 256 let site_name = site.unwrap_or_else(|| { 257 path 258 .file_name() 259 .and_then(|n| n.to_str()) 260 .unwrap_or("site") 261 .to_string() 262 }); 263 264 println!("Deploying site '{}'...", site_name); 265 266 // Try to fetch existing manifest for incremental updates 267 let (existing_blob_map, old_subfs_uris): (HashMap<String, (jacquard_common::types::blob::BlobRef<'static>, String)>, Vec<(String, String)>) = { 268 use jacquard_common::types::string::AtUri; 269 270 // Get the DID for this session 271 let session_info = agent.session_info().await; 272 if let Some((did, _)) = session_info { 273 // Construct the AT URI for the record 274 let uri_string = format!("at://{}/place.wisp.fs/{}", did, site_name); 275 if let Ok(uri) = AtUri::new(&uri_string) { 276 match agent.get_record::<Fs>(&uri).await { 277 Ok(response) => { 278 match response.into_output() { 279 Ok(record_output) => { 280 let existing_manifest = record_output.value; 281 let mut blob_map = blob_map::extract_blob_map(&existing_manifest.root); 282 println!("Found existing manifest with {} files in main record", blob_map.len()); 283 284 // Extract subfs URIs from main record 285 let subfs_uris = subfs_utils::extract_subfs_uris(&existing_manifest.root, String::new()); 286 287 if !subfs_uris.is_empty() { 288 println!("Found {} subfs records, fetching for blob reuse...", subfs_uris.len()); 289 290 // Merge blob maps from all subfs records 291 match subfs_utils::merge_subfs_blob_maps(agent, subfs_uris.clone(), &mut blob_map).await { 292 Ok(merged_count) => { 293 println!("Total blob map: {} files (main + {} from subfs)", blob_map.len(), merged_count); 294 } 295 Err(e) => { 296 eprintln!("⚠️ Failed to merge some subfs blob maps: {}", e); 297 } 298 } 299 300 (blob_map, subfs_uris) 301 } else { 302 (blob_map, Vec::new()) 303 } 304 } 305 Err(_) => { 306 println!("No existing manifest found, uploading all files..."); 307 (HashMap::new(), Vec::new()) 308 } 309 } 310 } 311 Err(_) => { 312 // Record doesn't exist yet - this is a new site 313 println!("No existing manifest found, uploading all files..."); 314 (HashMap::new(), Vec::new()) 315 } 316 } 317 } else { 318 println!("No existing manifest found (invalid URI), uploading all files..."); 319 (HashMap::new(), Vec::new()) 320 } 321 } else { 322 println!("No existing manifest found (could not get DID), uploading all files..."); 323 (HashMap::new(), Vec::new()) 324 } 325 }; 326 327 // Build directory tree with ignore patterns 328 let ignore_matcher = ignore_patterns::IgnoreMatcher::new(&path)?; 329 let (root_dir, total_files, reused_count) = build_directory(agent, &path, &existing_blob_map, String::new(), &ignore_matcher).await?; 330 let uploaded_count = total_files - reused_count; 331 332 // Check if we need to split into subfs records 333 const MAX_MANIFEST_SIZE: usize = 140 * 1024; // 140KB (PDS limit is 150KB) 334 const FILE_COUNT_THRESHOLD: usize = 250; // Start splitting at this many files 335 const TARGET_FILE_COUNT: usize = 200; // Keep main manifest under this 336 337 let mut working_directory = root_dir; 338 let mut current_file_count = total_files; 339 let mut new_subfs_uris: Vec<(String, String)> = Vec::new(); 340 341 // Estimate initial manifest size 342 let mut manifest_size = subfs_utils::estimate_directory_size(&working_directory); 343 344 if total_files >= FILE_COUNT_THRESHOLD || manifest_size > MAX_MANIFEST_SIZE { 345 println!("\n⚠️ Large site detected ({} files, {:.1}KB manifest), splitting into subfs records...", 346 total_files, manifest_size as f64 / 1024.0); 347 348 let mut attempts = 0; 349 const MAX_SPLIT_ATTEMPTS: usize = 50; 350 351 while (manifest_size > MAX_MANIFEST_SIZE || current_file_count > TARGET_FILE_COUNT) && attempts < MAX_SPLIT_ATTEMPTS { 352 attempts += 1; 353 354 // Find large directories to split 355 let directories = subfs_utils::find_large_directories(&working_directory, String::new()); 356 357 if let Some(largest_dir) = directories.first() { 358 println!(" Split #{}: {} ({} files, {:.1}KB)", 359 attempts, largest_dir.path, largest_dir.file_count, largest_dir.size as f64 / 1024.0); 360 361 // Check if this directory is itself too large for a single subfs record 362 const MAX_SUBFS_SIZE: usize = 75 * 1024; // 75KB soft limit for safety 363 let mut subfs_uri = String::new(); 364 365 if largest_dir.size > MAX_SUBFS_SIZE { 366 // Need to split this directory into multiple chunks 367 println!(" → Directory too large, splitting into chunks..."); 368 let chunks = subfs_utils::split_directory_into_chunks(&largest_dir.directory, MAX_SUBFS_SIZE); 369 println!(" → Created {} chunks", chunks.len()); 370 371 // Upload each chunk as a subfs record 372 let mut chunk_uris = Vec::new(); 373 for (i, chunk) in chunks.iter().enumerate() { 374 use jacquard_common::types::string::Tid; 375 let chunk_tid = Tid::now_0(); 376 let chunk_rkey = chunk_tid.to_string(); 377 378 let chunk_file_count = subfs_utils::count_files_in_directory(chunk); 379 let chunk_size = subfs_utils::estimate_directory_size(chunk); 380 381 let chunk_manifest = crate::place_wisp::subfs::SubfsRecord::new() 382 .root(convert_fs_dir_to_subfs_dir(chunk.clone())) 383 .file_count(Some(chunk_file_count as i64)) 384 .created_at(Datetime::now()) 385 .build(); 386 387 println!(" → Uploading chunk {}/{} ({} files, {:.1}KB)...", 388 i + 1, chunks.len(), chunk_file_count, chunk_size as f64 / 1024.0); 389 390 let chunk_output = agent.put_record( 391 RecordKey::from(Rkey::new(&chunk_rkey).into_diagnostic()?), 392 chunk_manifest 393 ).await.into_diagnostic()?; 394 395 let chunk_uri = chunk_output.uri.to_string(); 396 chunk_uris.push((chunk_uri.clone(), format!("{}#{}", largest_dir.path, i))); 397 new_subfs_uris.push((chunk_uri.clone(), format!("{}#{}", largest_dir.path, i))); 398 } 399 400 // Create a parent subfs record that references all chunks 401 // Each chunk reference MUST have flat: true to merge chunk contents 402 println!(" → Creating parent subfs with {} chunk references...", chunk_uris.len()); 403 use jacquard_common::CowStr; 404 use crate::place_wisp::fs::{Subfs}; 405 406 // Convert to fs::Subfs (which has the 'flat' field) instead of subfs::Subfs 407 let parent_entries_fs: Vec<Entry> = chunk_uris.iter().enumerate().map(|(i, (uri, _))| { 408 let uri_string = uri.clone(); 409 let at_uri = AtUri::new_cow(CowStr::from(uri_string)).expect("valid URI"); 410 Entry::new() 411 .name(CowStr::from(format!("chunk{}", i))) 412 .node(EntryNode::Subfs(Box::new( 413 Subfs::new() 414 .r#type(CowStr::from("subfs")) 415 .subject(at_uri) 416 .flat(Some(true)) // EXPLICITLY TRUE - merge chunk contents 417 .build() 418 ))) 419 .build() 420 }).collect(); 421 422 let parent_root_fs = Directory::new() 423 .r#type(CowStr::from("directory")) 424 .entries(parent_entries_fs) 425 .build(); 426 427 // Convert to subfs::Directory for the parent subfs record 428 let parent_root_subfs = convert_fs_dir_to_subfs_dir(parent_root_fs); 429 430 use jacquard_common::types::string::Tid; 431 let parent_tid = Tid::now_0(); 432 let parent_rkey = parent_tid.to_string(); 433 434 let parent_manifest = crate::place_wisp::subfs::SubfsRecord::new() 435 .root(parent_root_subfs) 436 .file_count(Some(largest_dir.file_count as i64)) 437 .created_at(Datetime::now()) 438 .build(); 439 440 let parent_output = agent.put_record( 441 RecordKey::from(Rkey::new(&parent_rkey).into_diagnostic()?), 442 parent_manifest 443 ).await.into_diagnostic()?; 444 445 subfs_uri = parent_output.uri.to_string(); 446 println!(" ✅ Created parent subfs with chunks (flat=true on each chunk): {}", subfs_uri); 447 } else { 448 // Directory fits in a single subfs record 449 use jacquard_common::types::string::Tid; 450 let subfs_tid = Tid::now_0(); 451 let subfs_rkey = subfs_tid.to_string(); 452 453 let subfs_manifest = crate::place_wisp::subfs::SubfsRecord::new() 454 .root(convert_fs_dir_to_subfs_dir(largest_dir.directory.clone())) 455 .file_count(Some(largest_dir.file_count as i64)) 456 .created_at(Datetime::now()) 457 .build(); 458 459 // Upload subfs record 460 let subfs_output = agent.put_record( 461 RecordKey::from(Rkey::new(&subfs_rkey).into_diagnostic()?), 462 subfs_manifest 463 ).await.into_diagnostic()?; 464 465 subfs_uri = subfs_output.uri.to_string(); 466 println!(" ✅ Created subfs: {}", subfs_uri); 467 } 468 469 // Replace directory with subfs node (flat: false to preserve directory structure) 470 working_directory = subfs_utils::replace_directory_with_subfs( 471 working_directory, 472 &largest_dir.path, 473 &subfs_uri, 474 false // Preserve directory - the chunks inside have flat=true 475 )?; 476 477 new_subfs_uris.push((subfs_uri, largest_dir.path.clone())); 478 current_file_count -= largest_dir.file_count; 479 480 // Recalculate manifest size 481 manifest_size = subfs_utils::estimate_directory_size(&working_directory); 482 println!(" → Manifest now {:.1}KB with {} files ({} subfs total)", 483 manifest_size as f64 / 1024.0, current_file_count, new_subfs_uris.len()); 484 485 if manifest_size <= MAX_MANIFEST_SIZE && current_file_count <= TARGET_FILE_COUNT { 486 println!("✅ Manifest now fits within limits"); 487 break; 488 } 489 } else { 490 println!(" No more subdirectories to split - stopping"); 491 break; 492 } 493 } 494 495 if attempts >= MAX_SPLIT_ATTEMPTS { 496 return Err(miette::miette!( 497 "Exceeded maximum split attempts ({}). Manifest still too large: {:.1}KB with {} files", 498 MAX_SPLIT_ATTEMPTS, 499 manifest_size as f64 / 1024.0, 500 current_file_count 501 )); 502 } 503 504 println!("✅ Split complete: {} subfs records, {} files in main manifest, {:.1}KB", 505 new_subfs_uris.len(), current_file_count, manifest_size as f64 / 1024.0); 506 } else { 507 println!("Manifest created ({} files, {:.1}KB) - no splitting needed", 508 total_files, manifest_size as f64 / 1024.0); 509 } 510 511 // Create the final Fs record 512 let fs_record = Fs::new() 513 .site(CowStr::from(site_name.clone())) 514 .root(working_directory) 515 .file_count(current_file_count as i64) 516 .created_at(Datetime::now()) 517 .build(); 518 519 // Use site name as the record key 520 let rkey = Rkey::new(&site_name).map_err(|e| miette::miette!("Invalid rkey: {}", e))?; 521 let output = agent.put_record(RecordKey::from(rkey), fs_record).await?; 522 523 // Extract DID from the AT URI (format: at://did:plc:xxx/collection/rkey) 524 let uri_str = output.uri.to_string(); 525 let did = uri_str 526 .strip_prefix("at://") 527 .and_then(|s| s.split('/').next()) 528 .ok_or_else(|| miette::miette!("Failed to parse DID from URI"))?; 529 530 println!("\n✓ Deployed site '{}': {}", site_name, output.uri); 531 println!(" Total files: {} ({} reused, {} uploaded)", total_files, reused_count, uploaded_count); 532 println!(" Available at: https://sites.wisp.place/{}/{}", did, site_name); 533 534 // Clean up old subfs records 535 if !old_subfs_uris.is_empty() { 536 println!("\nCleaning up {} old subfs records...", old_subfs_uris.len()); 537 538 let mut deleted_count = 0; 539 let mut failed_count = 0; 540 541 for (uri, _path) in old_subfs_uris { 542 match subfs_utils::delete_subfs_record(agent, &uri).await { 543 Ok(_) => { 544 deleted_count += 1; 545 println!(" 🗑️ Deleted old subfs: {}", uri); 546 } 547 Err(e) => { 548 failed_count += 1; 549 eprintln!(" ⚠️ Failed to delete {}: {}", uri, e); 550 } 551 } 552 } 553 554 if failed_count > 0 { 555 eprintln!("⚠️ Cleanup completed with {} deleted, {} failed", deleted_count, failed_count); 556 } else { 557 println!("✅ Cleanup complete: {} old subfs records deleted", deleted_count); 558 } 559 } 560 561 // Upload settings if either flag is set 562 if directory_listing || spa_mode { 563 // Validate mutual exclusivity 564 if directory_listing && spa_mode { 565 return Err(miette::miette!("Cannot enable both --directory and --SPA modes")); 566 } 567 568 println!("\n⚙️ Uploading site settings..."); 569 570 // Build settings record 571 let mut settings_builder = Settings::new(); 572 573 if directory_listing { 574 settings_builder = settings_builder.directory_listing(Some(true)); 575 println!(" • Directory listing: enabled"); 576 } 577 578 if spa_mode { 579 settings_builder = settings_builder.spa_mode(Some(CowStr::from("index.html"))); 580 println!(" • SPA mode: enabled (serving index.html for all routes)"); 581 } 582 583 let settings_record = settings_builder.build(); 584 585 // Upload settings record with same rkey as site 586 let rkey = Rkey::new(&site_name).map_err(|e| miette::miette!("Invalid rkey: {}", e))?; 587 match agent.put_record(RecordKey::from(rkey), settings_record).await { 588 Ok(settings_output) => { 589 println!("✅ Settings uploaded: {}", settings_output.uri); 590 } 591 Err(e) => { 592 eprintln!("⚠️ Failed to upload settings: {}", e); 593 eprintln!(" Site was deployed successfully, but settings may need to be configured manually."); 594 } 595 } 596 } 597 598 Ok(()) 599} 600 601/// Recursively build a Directory from a filesystem path 602/// current_path is the path from the root of the site (e.g., "" for root, "config" for config dir) 603fn build_directory<'a>( 604 agent: &'a Agent<impl jacquard::client::AgentSession + IdentityResolver + 'a>, 605 dir_path: &'a Path, 606 existing_blobs: &'a HashMap<String, (jacquard_common::types::blob::BlobRef<'static>, String)>, 607 current_path: String, 608 ignore_matcher: &'a ignore_patterns::IgnoreMatcher, 609) -> std::pin::Pin<Box<dyn std::future::Future<Output = miette::Result<(Directory<'static>, usize, usize)>> + 'a>> 610{ 611 Box::pin(async move { 612 // Collect all directory entries first 613 let dir_entries: Vec<_> = std::fs::read_dir(dir_path) 614 .into_diagnostic()? 615 .collect::<Result<Vec<_>, _>>() 616 .into_diagnostic()?; 617 618 // Separate files and directories 619 let mut file_tasks = Vec::new(); 620 let mut dir_tasks = Vec::new(); 621 622 for entry in dir_entries { 623 let path = entry.path(); 624 let name = entry.file_name(); 625 let name_str = name.to_str() 626 .ok_or_else(|| miette::miette!("Invalid filename: {:?}", name))? 627 .to_string(); 628 629 // Construct full path for ignore checking 630 let full_path = if current_path.is_empty() { 631 name_str.clone() 632 } else { 633 format!("{}/{}", current_path, name_str) 634 }; 635 636 // Skip files/directories that match ignore patterns 637 if ignore_matcher.is_ignored(&full_path) || ignore_matcher.is_filename_ignored(&name_str) { 638 continue; 639 } 640 641 let metadata = entry.metadata().into_diagnostic()?; 642 643 if metadata.is_file() { 644 // Construct full path for this file (for blob map lookup) 645 let full_path = if current_path.is_empty() { 646 name_str.clone() 647 } else { 648 format!("{}/{}", current_path, name_str) 649 }; 650 file_tasks.push((name_str, path, full_path)); 651 } else if metadata.is_dir() { 652 dir_tasks.push((name_str, path)); 653 } 654 } 655 656 // Process files concurrently with a limit of 5 657 let file_results: Vec<(Entry<'static>, bool)> = stream::iter(file_tasks) 658 .map(|(name, path, full_path)| async move { 659 let (file_node, reused) = process_file(agent, &path, &full_path, existing_blobs).await?; 660 let entry = Entry::new() 661 .name(CowStr::from(name)) 662 .node(EntryNode::File(Box::new(file_node))) 663 .build(); 664 Ok::<_, miette::Report>((entry, reused)) 665 }) 666 .buffer_unordered(5) 667 .collect::<Vec<_>>() 668 .await 669 .into_iter() 670 .collect::<miette::Result<Vec<_>>>()?; 671 672 let mut file_entries = Vec::new(); 673 let mut reused_count = 0; 674 let mut total_files = 0; 675 676 for (entry, reused) in file_results { 677 file_entries.push(entry); 678 total_files += 1; 679 if reused { 680 reused_count += 1; 681 } 682 } 683 684 // Process directories recursively (sequentially to avoid too much nesting) 685 let mut dir_entries = Vec::new(); 686 for (name, path) in dir_tasks { 687 // Construct full path for subdirectory 688 let subdir_path = if current_path.is_empty() { 689 name.clone() 690 } else { 691 format!("{}/{}", current_path, name) 692 }; 693 let (subdir, sub_total, sub_reused) = build_directory(agent, &path, existing_blobs, subdir_path, ignore_matcher).await?; 694 dir_entries.push(Entry::new() 695 .name(CowStr::from(name)) 696 .node(EntryNode::Directory(Box::new(subdir))) 697 .build()); 698 total_files += sub_total; 699 reused_count += sub_reused; 700 } 701 702 // Combine file and directory entries 703 let mut entries = file_entries; 704 entries.extend(dir_entries); 705 706 let directory = Directory::new() 707 .r#type(CowStr::from("directory")) 708 .entries(entries) 709 .build(); 710 711 Ok((directory, total_files, reused_count)) 712 }) 713} 714 715/// Process a single file: gzip -> base64 -> upload blob (or reuse existing) 716/// Returns (File, reused: bool) 717/// file_path_key is the full path from the site root (e.g., "config/file.json") for blob map lookup 718/// 719/// Special handling: _redirects files are NOT compressed (uploaded as-is) 720async fn process_file( 721 agent: &Agent<impl jacquard::client::AgentSession + IdentityResolver>, 722 file_path: &Path, 723 file_path_key: &str, 724 existing_blobs: &HashMap<String, (jacquard_common::types::blob::BlobRef<'static>, String)>, 725) -> miette::Result<(File<'static>, bool)> 726{ 727 // Read file 728 let file_data = std::fs::read(file_path).into_diagnostic()?; 729 730 // Detect original MIME type 731 let original_mime = mime_guess::from_path(file_path) 732 .first_or_octet_stream() 733 .to_string(); 734 735 // Check if this is a _redirects file (don't compress it) 736 let is_redirects_file = file_path.file_name() 737 .and_then(|n| n.to_str()) 738 .map(|n| n == "_redirects") 739 .unwrap_or(false); 740 741 let (upload_bytes, encoding, is_base64) = if is_redirects_file { 742 // Don't compress _redirects - upload as-is 743 (file_data.clone(), None, false) 744 } else { 745 // Gzip compress 746 let mut encoder = GzEncoder::new(Vec::new(), Compression::default()); 747 encoder.write_all(&file_data).into_diagnostic()?; 748 let gzipped = encoder.finish().into_diagnostic()?; 749 750 // Base64 encode the gzipped data 751 let base64_bytes = base64::prelude::BASE64_STANDARD.encode(&gzipped).into_bytes(); 752 (base64_bytes, Some("gzip"), true) 753 }; 754 755 // Compute CID for this file 756 let file_cid = cid::compute_cid(&upload_bytes); 757 758 // Check if we have an existing blob with the same CID 759 let existing_blob = existing_blobs.get(file_path_key); 760 761 if let Some((existing_blob_ref, existing_cid)) = existing_blob { 762 if existing_cid == &file_cid { 763 // CIDs match - reuse existing blob 764 println!(" ✓ Reusing blob for {} (CID: {})", file_path_key, file_cid); 765 let mut file_builder = File::new() 766 .r#type(CowStr::from("file")) 767 .blob(existing_blob_ref.clone()) 768 .mime_type(CowStr::from(original_mime)); 769 770 if let Some(enc) = encoding { 771 file_builder = file_builder.encoding(CowStr::from(enc)); 772 } 773 if is_base64 { 774 file_builder = file_builder.base64(true); 775 } 776 777 return Ok((file_builder.build(), true)); 778 } else { 779 // CID mismatch - file changed 780 println!(" → File changed: {} (old CID: {}, new CID: {})", file_path_key, existing_cid, file_cid); 781 } 782 } else { 783 // File not in existing blob map 784 if file_path_key.starts_with("imgs/") { 785 println!(" → New file (not in blob map): {}", file_path_key); 786 } 787 } 788 789 // File is new or changed - upload it 790 let mime_type = if is_redirects_file { 791 MimeType::new_static("text/plain") 792 } else { 793 MimeType::new_static("application/octet-stream") 794 }; 795 796 println!(" ↑ Uploading {} ({} bytes, CID: {})", file_path_key, upload_bytes.len(), file_cid); 797 let blob = agent.upload_blob(upload_bytes, mime_type).await?; 798 799 let mut file_builder = File::new() 800 .r#type(CowStr::from("file")) 801 .blob(blob) 802 .mime_type(CowStr::from(original_mime)); 803 804 if let Some(enc) = encoding { 805 file_builder = file_builder.encoding(CowStr::from(enc)); 806 } 807 if is_base64 { 808 file_builder = file_builder.base64(true); 809 } 810 811 Ok((file_builder.build(), false)) 812} 813 814/// Convert fs::Directory to subfs::Directory 815/// They have the same structure, but different types 816fn convert_fs_dir_to_subfs_dir(fs_dir: place_wisp::fs::Directory<'static>) -> place_wisp::subfs::Directory<'static> { 817 use place_wisp::subfs::{Directory as SubfsDirectory, Entry as SubfsEntry, EntryNode as SubfsEntryNode, File as SubfsFile}; 818 819 let subfs_entries: Vec<SubfsEntry> = fs_dir.entries.into_iter().map(|entry| { 820 let node = match entry.node { 821 place_wisp::fs::EntryNode::File(file) => { 822 SubfsEntryNode::File(Box::new(SubfsFile::new() 823 .r#type(file.r#type) 824 .blob(file.blob) 825 .encoding(file.encoding) 826 .mime_type(file.mime_type) 827 .base64(file.base64) 828 .build())) 829 } 830 place_wisp::fs::EntryNode::Directory(dir) => { 831 SubfsEntryNode::Directory(Box::new(convert_fs_dir_to_subfs_dir(*dir))) 832 } 833 place_wisp::fs::EntryNode::Subfs(subfs) => { 834 // Nested subfs in the directory we're converting 835 // Note: subfs::Subfs doesn't have the 'flat' field - that's only in fs::Subfs 836 SubfsEntryNode::Subfs(Box::new(place_wisp::subfs::Subfs::new() 837 .r#type(subfs.r#type) 838 .subject(subfs.subject) 839 .build())) 840 } 841 place_wisp::fs::EntryNode::Unknown(unknown) => { 842 SubfsEntryNode::Unknown(unknown) 843 } 844 }; 845 846 SubfsEntry::new() 847 .name(entry.name) 848 .node(node) 849 .build() 850 }).collect(); 851 852 SubfsDirectory::new() 853 .r#type(fs_dir.r#type) 854 .entries(subfs_entries) 855 .build() 856} 857