Monorepo for wisp.place. A static site hosting service built on top of the AT Protocol. wisp.place
96
fork

Configure Feed

Select the types of activity you want to include in your feed.

fix subfs nesting

nekomimi.pet e3f99bc1 c544254d

verified
+340 -85
+115 -19
cli/src/main.rs
··· 15 use jacquard::oauth::client::OAuthClient; 16 use jacquard::oauth::loopback::LoopbackConfig; 17 use jacquard::prelude::IdentityResolver; 18 - use jacquard_common::types::string::{Datetime, Rkey, RecordKey}; 19 use jacquard_common::types::blob::MimeType; 20 use miette::IntoDiagnostic; 21 use std::path::{Path, PathBuf}; ··· 356 println!(" Split #{}: {} ({} files, {:.1}KB)", 357 attempts, largest_dir.path, largest_dir.file_count, largest_dir.size as f64 / 1024.0); 358 359 - // Create a subfs record for this directory 360 - use jacquard_common::types::string::Tid; 361 - let subfs_tid = Tid::now_0(); 362 - let subfs_rkey = subfs_tid.to_string(); 363 364 - let subfs_manifest = crate::place_wisp::subfs::SubfsRecord::new() 365 - .root(convert_fs_dir_to_subfs_dir(largest_dir.directory.clone())) 366 - .file_count(Some(largest_dir.file_count as i64)) 367 - .created_at(Datetime::now()) 368 - .build(); 369 370 - // Upload subfs record 371 - let subfs_output = agent.put_record( 372 - RecordKey::from(Rkey::new(&subfs_rkey).into_diagnostic()?), 373 - subfs_manifest 374 - ).await.into_diagnostic()?; 375 376 - let subfs_uri = subfs_output.uri.to_string(); 377 - println!(" ✅ Created subfs: {}", subfs_uri); 378 379 - // Replace directory with subfs node (flat: false to preserve structure) 380 working_directory = subfs_utils::replace_directory_with_subfs( 381 working_directory, 382 &largest_dir.path, 383 &subfs_uri, 384 - false // Preserve directory structure 385 )?; 386 387 new_subfs_uris.push((subfs_uri, largest_dir.path.clone())); ··· 729 } 730 731 return Ok((file_builder.build(), true)); 732 } 733 } 734
··· 15 use jacquard::oauth::client::OAuthClient; 16 use jacquard::oauth::loopback::LoopbackConfig; 17 use jacquard::prelude::IdentityResolver; 18 + use jacquard_common::types::string::{Datetime, Rkey, RecordKey, AtUri}; 19 use jacquard_common::types::blob::MimeType; 20 use miette::IntoDiagnostic; 21 use std::path::{Path, PathBuf}; ··· 356 println!(" Split #{}: {} ({} files, {:.1}KB)", 357 attempts, largest_dir.path, largest_dir.file_count, largest_dir.size as f64 / 1024.0); 358 359 + // Check if this directory is itself too large for a single subfs record 360 + const MAX_SUBFS_SIZE: usize = 75 * 1024; // 75KB soft limit for safety 361 + let mut subfs_uri = String::new(); 362 + 363 + if largest_dir.size > MAX_SUBFS_SIZE { 364 + // Need to split this directory into multiple chunks 365 + println!(" → Directory too large, splitting into chunks..."); 366 + let chunks = subfs_utils::split_directory_into_chunks(&largest_dir.directory, MAX_SUBFS_SIZE); 367 + println!(" → Created {} chunks", chunks.len()); 368 + 369 + // Upload each chunk as a subfs record 370 + let mut chunk_uris = Vec::new(); 371 + for (i, chunk) in chunks.iter().enumerate() { 372 + use jacquard_common::types::string::Tid; 373 + let chunk_tid = Tid::now_0(); 374 + let chunk_rkey = chunk_tid.to_string(); 375 + 376 + let chunk_file_count = subfs_utils::count_files_in_directory(chunk); 377 + let chunk_size = subfs_utils::estimate_directory_size(chunk); 378 379 + let chunk_manifest = crate::place_wisp::subfs::SubfsRecord::new() 380 + .root(convert_fs_dir_to_subfs_dir(chunk.clone())) 381 + .file_count(Some(chunk_file_count as i64)) 382 + .created_at(Datetime::now()) 383 + .build(); 384 + 385 + println!(" → Uploading chunk {}/{} ({} files, {:.1}KB)...", 386 + i + 1, chunks.len(), chunk_file_count, chunk_size as f64 / 1024.0); 387 + 388 + let chunk_output = agent.put_record( 389 + RecordKey::from(Rkey::new(&chunk_rkey).into_diagnostic()?), 390 + chunk_manifest 391 + ).await.into_diagnostic()?; 392 393 + let chunk_uri = chunk_output.uri.to_string(); 394 + chunk_uris.push((chunk_uri.clone(), format!("{}#{}", largest_dir.path, i))); 395 + new_subfs_uris.push((chunk_uri.clone(), format!("{}#{}", largest_dir.path, i))); 396 + } 397 398 + // Create a parent subfs record that references all chunks 399 + // Each chunk reference MUST have flat: true to merge chunk contents 400 + println!(" → Creating parent subfs with {} chunk references...", chunk_uris.len()); 401 + use jacquard_common::CowStr; 402 + use crate::place_wisp::fs::{Subfs}; 403 404 + // Convert to fs::Subfs (which has the 'flat' field) instead of subfs::Subfs 405 + let parent_entries_fs: Vec<Entry> = chunk_uris.iter().enumerate().map(|(i, (uri, _))| { 406 + let uri_string = uri.clone(); 407 + let at_uri = AtUri::new_cow(CowStr::from(uri_string)).expect("valid URI"); 408 + Entry::new() 409 + .name(CowStr::from(format!("chunk{}", i))) 410 + .node(EntryNode::Subfs(Box::new( 411 + Subfs::new() 412 + .r#type(CowStr::from("subfs")) 413 + .subject(at_uri) 414 + .flat(Some(true)) // EXPLICITLY TRUE - merge chunk contents 415 + .build() 416 + ))) 417 + .build() 418 + }).collect(); 419 + 420 + let parent_root_fs = Directory::new() 421 + .r#type(CowStr::from("directory")) 422 + .entries(parent_entries_fs) 423 + .build(); 424 + 425 + // Convert to subfs::Directory for the parent subfs record 426 + let parent_root_subfs = convert_fs_dir_to_subfs_dir(parent_root_fs); 427 + 428 + use jacquard_common::types::string::Tid; 429 + let parent_tid = Tid::now_0(); 430 + let parent_rkey = parent_tid.to_string(); 431 + 432 + let parent_manifest = crate::place_wisp::subfs::SubfsRecord::new() 433 + .root(parent_root_subfs) 434 + .file_count(Some(largest_dir.file_count as i64)) 435 + .created_at(Datetime::now()) 436 + .build(); 437 + 438 + let parent_output = agent.put_record( 439 + RecordKey::from(Rkey::new(&parent_rkey).into_diagnostic()?), 440 + parent_manifest 441 + ).await.into_diagnostic()?; 442 + 443 + subfs_uri = parent_output.uri.to_string(); 444 + println!(" ✅ Created parent subfs with chunks (flat=true on each chunk): {}", subfs_uri); 445 + } else { 446 + // Directory fits in a single subfs record 447 + use jacquard_common::types::string::Tid; 448 + let subfs_tid = Tid::now_0(); 449 + let subfs_rkey = subfs_tid.to_string(); 450 + 451 + let subfs_manifest = crate::place_wisp::subfs::SubfsRecord::new() 452 + .root(convert_fs_dir_to_subfs_dir(largest_dir.directory.clone())) 453 + .file_count(Some(largest_dir.file_count as i64)) 454 + .created_at(Datetime::now()) 455 + .build(); 456 + 457 + // Upload subfs record 458 + let subfs_output = agent.put_record( 459 + RecordKey::from(Rkey::new(&subfs_rkey).into_diagnostic()?), 460 + subfs_manifest 461 + ).await.into_diagnostic()?; 462 + 463 + subfs_uri = subfs_output.uri.to_string(); 464 + println!(" ✅ Created subfs: {}", subfs_uri); 465 + } 466 + 467 + // Replace directory with subfs node (flat: false to preserve directory structure) 468 working_directory = subfs_utils::replace_directory_with_subfs( 469 working_directory, 470 &largest_dir.path, 471 &subfs_uri, 472 + false // Preserve directory - the chunks inside have flat=true 473 )?; 474 475 new_subfs_uris.push((subfs_uri, largest_dir.path.clone())); ··· 817 } 818 819 return Ok((file_builder.build(), true)); 820 + } else { 821 + // CID mismatch - file changed 822 + println!(" → File changed: {} (old CID: {}, new CID: {})", file_path_key, existing_cid, file_cid); 823 + } 824 + } else { 825 + // File not in existing blob map 826 + if file_path_key.starts_with("imgs/") { 827 + println!(" → New file (not in blob map): {}", file_path_key); 828 } 829 } 830
+30 -32
cli/src/pull.rs
··· 35 let pds_url = resolver.pds_for_did(&did).await.into_diagnostic()?; 36 println!("Resolved PDS: {}", pds_url); 37 38 - // Fetch the place.wisp.fs record 39 - 40 println!("Fetching record from PDS..."); 41 let client = reqwest::Client::new(); 42 - 43 // Use com.atproto.repo.getRecord 44 use jacquard::api::com_atproto::repo::get_record::GetRecord; 45 use jacquard_common::types::string::Rkey as RkeyType; 46 let rkey_parsed = RkeyType::new(&rkey).into_diagnostic()?; 47 - 48 use jacquard_common::types::ident::AtIdentifier; 49 use jacquard_common::types::string::RecordKey; 50 let request = GetRecord::new() ··· 70 println!("Found site '{}' with {} files (in main record)", fs_record.site, file_count); 71 72 // Check for and expand subfs nodes 73 - let expanded_root = expand_subfs_in_pull(&fs_record.root, &pds_url, did.as_str()).await?; 74 let total_file_count = subfs_utils::count_files_in_directory(&expanded_root); 75 76 if total_file_count as i64 != fs_record.file_count.unwrap_or(0) { ··· 402 } 403 404 /// Expand subfs nodes in a directory tree by fetching and merging subfs records (RECURSIVELY) 405 - async fn expand_subfs_in_pull<'a>( 406 directory: &Directory<'a>, 407 pds_url: &Url, 408 - _did: &str, 409 ) -> miette::Result<Directory<'static>> { 410 use crate::place_wisp::subfs::SubfsRecord; 411 - use jacquard_common::types::value::from_data; 412 - use jacquard_common::IntoStatic; 413 414 - // Recursively fetch ALL subfs records (including nested ones) 415 let mut all_subfs_map: HashMap<String, crate::place_wisp::subfs::Directory> = HashMap::new(); 416 let mut to_fetch = subfs_utils::extract_subfs_uris(directory, String::new()); 417 ··· 420 } 421 422 println!("Found {} subfs records, fetching recursively...", to_fetch.len()); 423 - let client = reqwest::Client::new(); 424 425 - // Keep fetching until we've resolved all subfs (including nested ones) 426 let mut iteration = 0; 427 - const MAX_ITERATIONS: usize = 10; // Prevent infinite loops 428 429 while !to_fetch.is_empty() && iteration < MAX_ITERATIONS { 430 iteration += 1; ··· 437 let pds_url = pds_url.clone(); 438 439 fetch_tasks.push(async move { 440 let parts: Vec<&str> = uri.trim_start_matches("at://").split('/').collect(); 441 if parts.len() < 3 { 442 return Err(miette::miette!("Invalid subfs URI: {}", uri)); 443 } 444 445 - let _did = parts[0]; 446 let collection = parts[1]; 447 - let rkey = parts[2]; 448 449 if collection != "place.wisp.subfs" { 450 return Err(miette::miette!("Expected place.wisp.subfs collection, got: {}", collection)); 451 } 452 453 use jacquard::api::com_atproto::repo::get_record::GetRecord; 454 - use jacquard_common::types::string::Rkey as RkeyType; 455 use jacquard_common::types::ident::AtIdentifier; 456 - use jacquard_common::types::string::{RecordKey, Did as DidType}; 457 458 - let rkey_parsed = RkeyType::new(rkey).into_diagnostic()?; 459 - let did_parsed = DidType::new(_did).into_diagnostic()?; 460 461 let request = GetRecord::new() 462 .repo(AtIdentifier::Did(did_parsed)) ··· 472 473 let record_output = response.into_output().into_diagnostic()?; 474 let subfs_record: SubfsRecord = from_data(&record_output.value).into_diagnostic()?; 475 - let subfs_record_static = subfs_record.into_static(); 476 477 - Ok::<_, miette::Report>((path, subfs_record_static)) 478 }); 479 } 480 481 let results: Vec<_> = futures::future::join_all(fetch_tasks).await; 482 483 // Process results and find nested subfs 484 - let mut newly_fetched = Vec::new(); 485 for result in results { 486 match result { 487 Ok((path, record)) => { 488 println!(" ✓ Fetched subfs at {}", path); 489 490 - // Check for nested subfs in this record 491 - let nested_subfs = extract_subfs_from_subfs_dir(&record.root, path.clone()); 492 - newly_fetched.extend(nested_subfs); 493 494 all_subfs_map.insert(path, record.root); 495 } ··· 499 } 500 } 501 502 - // Update to_fetch with only the NEW subfs we haven't fetched yet 503 - to_fetch = newly_fetched 504 .into_iter() 505 - .filter(|(uri, _)| !all_subfs_map.iter().any(|(k, _)| k == uri)) 506 .collect(); 507 } 508 509 if iteration >= MAX_ITERATIONS { 510 - return Err(miette::miette!("Max iterations reached while fetching nested subfs")); 511 } 512 513 println!(" Total subfs records fetched: {}", all_subfs_map.len()); ··· 516 Ok(replace_subfs_with_content(directory.clone(), &all_subfs_map, String::new())) 517 } 518 519 - /// Extract subfs URIs from a subfs::Directory 520 - fn extract_subfs_from_subfs_dir( 521 directory: &crate::place_wisp::subfs::Directory, 522 current_path: String, 523 ) -> Vec<(String, String)> { ··· 535 uris.push((subfs_node.subject.to_string(), full_path.clone())); 536 } 537 crate::place_wisp::subfs::EntryNode::Directory(subdir) => { 538 - let nested = extract_subfs_from_subfs_dir(subdir, full_path); 539 uris.extend(nested); 540 } 541 _ => {}
··· 35 let pds_url = resolver.pds_for_did(&did).await.into_diagnostic()?; 36 println!("Resolved PDS: {}", pds_url); 37 38 + // Create a temporary agent for fetching records (no auth needed for public reads) 39 println!("Fetching record from PDS..."); 40 let client = reqwest::Client::new(); 41 + 42 // Use com.atproto.repo.getRecord 43 use jacquard::api::com_atproto::repo::get_record::GetRecord; 44 use jacquard_common::types::string::Rkey as RkeyType; 45 let rkey_parsed = RkeyType::new(&rkey).into_diagnostic()?; 46 + 47 use jacquard_common::types::ident::AtIdentifier; 48 use jacquard_common::types::string::RecordKey; 49 let request = GetRecord::new() ··· 69 println!("Found site '{}' with {} files (in main record)", fs_record.site, file_count); 70 71 // Check for and expand subfs nodes 72 + // Note: We use a custom expand function for pull since we don't have an Agent 73 + let expanded_root = expand_subfs_in_pull_with_client(&fs_record.root, &client, &pds_url).await?; 74 let total_file_count = subfs_utils::count_files_in_directory(&expanded_root); 75 76 if total_file_count as i64 != fs_record.file_count.unwrap_or(0) { ··· 402 } 403 404 /// Expand subfs nodes in a directory tree by fetching and merging subfs records (RECURSIVELY) 405 + /// Uses reqwest client directly for pull command (no agent needed) 406 + async fn expand_subfs_in_pull_with_client<'a>( 407 directory: &Directory<'a>, 408 + client: &reqwest::Client, 409 pds_url: &Url, 410 ) -> miette::Result<Directory<'static>> { 411 + use jacquard_common::IntoStatic; 412 + use jacquard_common::types::value::from_data; 413 use crate::place_wisp::subfs::SubfsRecord; 414 415 let mut all_subfs_map: HashMap<String, crate::place_wisp::subfs::Directory> = HashMap::new(); 416 let mut to_fetch = subfs_utils::extract_subfs_uris(directory, String::new()); 417 ··· 420 } 421 422 println!("Found {} subfs records, fetching recursively...", to_fetch.len()); 423 424 let mut iteration = 0; 425 + const MAX_ITERATIONS: usize = 10; 426 427 while !to_fetch.is_empty() && iteration < MAX_ITERATIONS { 428 iteration += 1; ··· 435 let pds_url = pds_url.clone(); 436 437 fetch_tasks.push(async move { 438 + // Parse URI 439 let parts: Vec<&str> = uri.trim_start_matches("at://").split('/').collect(); 440 if parts.len() < 3 { 441 return Err(miette::miette!("Invalid subfs URI: {}", uri)); 442 } 443 444 + let did_str = parts[0]; 445 let collection = parts[1]; 446 + let rkey_str = parts[2]; 447 448 if collection != "place.wisp.subfs" { 449 return Err(miette::miette!("Expected place.wisp.subfs collection, got: {}", collection)); 450 } 451 452 + // Fetch using GetRecord 453 use jacquard::api::com_atproto::repo::get_record::GetRecord; 454 + use jacquard_common::types::string::{Rkey as RkeyType, Did as DidType, RecordKey}; 455 use jacquard_common::types::ident::AtIdentifier; 456 457 + let rkey_parsed = RkeyType::new(rkey_str).into_diagnostic()?; 458 + let did_parsed = DidType::new(did_str).into_diagnostic()?; 459 460 let request = GetRecord::new() 461 .repo(AtIdentifier::Did(did_parsed)) ··· 471 472 let record_output = response.into_output().into_diagnostic()?; 473 let subfs_record: SubfsRecord = from_data(&record_output.value).into_diagnostic()?; 474 475 + Ok::<_, miette::Report>((path, subfs_record.into_static())) 476 }); 477 } 478 479 let results: Vec<_> = futures::future::join_all(fetch_tasks).await; 480 481 // Process results and find nested subfs 482 + let mut newly_found_uris = Vec::new(); 483 for result in results { 484 match result { 485 Ok((path, record)) => { 486 println!(" ✓ Fetched subfs at {}", path); 487 488 + // Extract nested subfs URIs 489 + let nested_uris = extract_subfs_uris_from_subfs_dir(&record.root, path.clone()); 490 + newly_found_uris.extend(nested_uris); 491 492 all_subfs_map.insert(path, record.root); 493 } ··· 497 } 498 } 499 500 + // Filter out already-fetched paths 501 + to_fetch = newly_found_uris 502 .into_iter() 503 + .filter(|(_, path)| !all_subfs_map.contains_key(path)) 504 .collect(); 505 } 506 507 if iteration >= MAX_ITERATIONS { 508 + eprintln!("⚠️ Max iterations reached while fetching nested subfs"); 509 } 510 511 println!(" Total subfs records fetched: {}", all_subfs_map.len()); ··· 514 Ok(replace_subfs_with_content(directory.clone(), &all_subfs_map, String::new())) 515 } 516 517 + /// Extract subfs URIs from a subfs::Directory (helper for pull) 518 + fn extract_subfs_uris_from_subfs_dir( 519 directory: &crate::place_wisp::subfs::Directory, 520 current_path: String, 521 ) -> Vec<(String, String)> { ··· 533 uris.push((subfs_node.subject.to_string(), full_path.clone())); 534 } 535 crate::place_wisp::subfs::EntryNode::Directory(subdir) => { 536 + let nested = extract_subfs_uris_from_subfs_dir(subdir, full_path); 537 uris.extend(nested); 538 } 539 _ => {}
+195 -34
cli/src/subfs_utils.rs
··· 72 Ok(record_output.value.into_static()) 73 } 74 75 - /// Merge blob maps from subfs records into the main blob map 76 - /// Returns the total number of blobs merged from all subfs records 77 - pub async fn merge_subfs_blob_maps( 78 agent: &Agent<impl AgentSession + IdentityResolver>, 79 - subfs_uris: Vec<(String, String)>, 80 - main_blob_map: &mut HashMap<String, (BlobRef<'static>, String)>, 81 - ) -> miette::Result<usize> { 82 - let mut total_merged = 0; 83 84 - println!("Fetching {} subfs records for blob reuse...", subfs_uris.len()); 85 86 - // Fetch all subfs records in parallel (but with some concurrency limit) 87 - use futures::stream::{self, StreamExt}; 88 89 - let subfs_results: Vec<_> = stream::iter(subfs_uris) 90 - .map(|(uri, mount_path)| async move { 91 - match fetch_subfs_record(agent, &uri).await { 92 - Ok(record) => Some((record, mount_path)), 93 - Err(e) => { 94 - eprintln!(" ⚠️ Failed to fetch subfs {}: {}", uri, e); 95 - None 96 } 97 } 98 - }) 99 - .buffer_unordered(5) 100 - .collect() 101 - .await; 102 103 - // Convert subfs Directory to fs Directory for blob extraction 104 - // Note: We need to extract blobs from the subfs record's root 105 - for result in subfs_results { 106 - if let Some((subfs_record, mount_path)) = result { 107 - // Extract blobs from this subfs record's root 108 - // The blob_map module works with fs::Directory, but subfs::Directory has the same structure 109 - // We need to convert or work directly with the entries 110 111 - let subfs_blob_map = extract_subfs_blobs(&subfs_record.root, mount_path.clone()); 112 - let count = subfs_blob_map.len(); 113 114 - for (path, blob_info) in subfs_blob_map { 115 - main_blob_map.insert(path, blob_info); 116 } 117 118 - total_merged += count; 119 - println!(" ✓ Merged {} blobs from subfs at {}", count, mount_path); 120 } 121 } 122 123 Ok(total_merged) ··· 334 335 Ok(()) 336 }
··· 72 Ok(record_output.value.into_static()) 73 } 74 75 + /// Recursively fetch all subfs records (including nested ones) 76 + /// Returns a list of (mount_path, SubfsRecord) tuples 77 + /// Note: Multiple records can have the same mount_path (for flat-merged chunks) 78 + pub async fn fetch_all_subfs_records_recursive( 79 agent: &Agent<impl AgentSession + IdentityResolver>, 80 + initial_uris: Vec<(String, String)>, 81 + ) -> miette::Result<Vec<(String, SubfsRecord<'static>)>> { 82 + use futures::stream::{self, StreamExt}; 83 84 + let mut all_subfs: Vec<(String, SubfsRecord<'static>)> = Vec::new(); 85 + let mut fetched_uris: std::collections::HashSet<String> = std::collections::HashSet::new(); 86 + let mut to_fetch = initial_uris; 87 88 + if to_fetch.is_empty() { 89 + return Ok(all_subfs); 90 + } 91 + 92 + println!("Found {} subfs records, fetching recursively...", to_fetch.len()); 93 + 94 + let mut iteration = 0; 95 + const MAX_ITERATIONS: usize = 10; 96 97 + while !to_fetch.is_empty() && iteration < MAX_ITERATIONS { 98 + iteration += 1; 99 + println!(" Iteration {}: fetching {} subfs records...", iteration, to_fetch.len()); 100 + 101 + let subfs_results: Vec<_> = stream::iter(to_fetch.clone()) 102 + .map(|(uri, mount_path)| async move { 103 + match fetch_subfs_record(agent, &uri).await { 104 + Ok(record) => Some((mount_path, record, uri)), 105 + Err(e) => { 106 + eprintln!(" ⚠️ Failed to fetch subfs {}: {}", uri, e); 107 + None 108 + } 109 } 110 + }) 111 + .buffer_unordered(5) 112 + .collect() 113 + .await; 114 + 115 + // Process results and find nested subfs 116 + let mut newly_found_uris = Vec::new(); 117 + for result in subfs_results { 118 + if let Some((mount_path, record, uri)) = result { 119 + println!(" ✓ Fetched subfs at {}", mount_path); 120 + 121 + // Extract nested subfs URIs from this record 122 + let nested_uris = extract_subfs_uris_from_subfs_dir(&record.root, mount_path.clone()); 123 + newly_found_uris.extend(nested_uris); 124 + 125 + all_subfs.push((mount_path, record)); 126 + fetched_uris.insert(uri); 127 } 128 + } 129 130 + // Filter out already-fetched URIs (based on URI, not path) 131 + to_fetch = newly_found_uris 132 + .into_iter() 133 + .filter(|(uri, _)| !fetched_uris.contains(uri)) 134 + .collect(); 135 + } 136 137 + if iteration >= MAX_ITERATIONS { 138 + eprintln!("⚠️ Max iterations reached while fetching nested subfs"); 139 + } 140 141 + println!(" Total subfs records fetched: {}", all_subfs.len()); 142 + 143 + Ok(all_subfs) 144 + } 145 + 146 + /// Extract subfs URIs from a subfs::Directory 147 + fn extract_subfs_uris_from_subfs_dir( 148 + directory: &crate::place_wisp::subfs::Directory, 149 + current_path: String, 150 + ) -> Vec<(String, String)> { 151 + let mut uris = Vec::new(); 152 + 153 + for entry in &directory.entries { 154 + match &entry.node { 155 + crate::place_wisp::subfs::EntryNode::Subfs(subfs_node) => { 156 + // Check if this is a chunk entry (chunk0, chunk1, etc.) 157 + // Chunks should be flat-merged, so use the parent's path 158 + let mount_path = if entry.name.starts_with("chunk") && 159 + entry.name.chars().skip(5).all(|c| c.is_ascii_digit()) { 160 + // This is a chunk - use parent's path for flat merge 161 + println!(" → Found chunk {} at {}, will flat-merge to {}", entry.name, current_path, current_path); 162 + current_path.clone() 163 + } else { 164 + // Normal subfs - append name to path 165 + if current_path.is_empty() { 166 + entry.name.to_string() 167 + } else { 168 + format!("{}/{}", current_path, entry.name) 169 + } 170 + }; 171 + 172 + uris.push((subfs_node.subject.to_string(), mount_path)); 173 } 174 + crate::place_wisp::subfs::EntryNode::Directory(subdir) => { 175 + let full_path = if current_path.is_empty() { 176 + entry.name.to_string() 177 + } else { 178 + format!("{}/{}", current_path, entry.name) 179 + }; 180 + let nested = extract_subfs_uris_from_subfs_dir(subdir, full_path); 181 + uris.extend(nested); 182 + } 183 + _ => {} 184 + } 185 + } 186 187 + uris 188 + } 189 + 190 + /// Merge blob maps from subfs records into the main blob map (RECURSIVE) 191 + /// Returns the total number of blobs merged from all subfs records 192 + pub async fn merge_subfs_blob_maps( 193 + agent: &Agent<impl AgentSession + IdentityResolver>, 194 + subfs_uris: Vec<(String, String)>, 195 + main_blob_map: &mut HashMap<String, (BlobRef<'static>, String)>, 196 + ) -> miette::Result<usize> { 197 + // Fetch all subfs records recursively 198 + let all_subfs = fetch_all_subfs_records_recursive(agent, subfs_uris).await?; 199 + 200 + let mut total_merged = 0; 201 + 202 + // Extract blobs from all fetched subfs records 203 + // Skip parent records that only contain chunk references (no actual files) 204 + for (mount_path, subfs_record) in all_subfs { 205 + // Check if this record only contains chunk subfs references (no files) 206 + let only_has_chunks = subfs_record.root.entries.iter().all(|e| { 207 + matches!(&e.node, crate::place_wisp::subfs::EntryNode::Subfs(_)) && 208 + e.name.starts_with("chunk") && 209 + e.name.chars().skip(5).all(|c| c.is_ascii_digit()) 210 + }); 211 + 212 + if only_has_chunks && !subfs_record.root.entries.is_empty() { 213 + // This is a parent containing only chunks - skip it, blobs are in the chunks 214 + println!(" → Skipping parent subfs at {} ({} chunks, no files)", mount_path, subfs_record.root.entries.len()); 215 + continue; 216 + } 217 + 218 + let subfs_blob_map = extract_subfs_blobs(&subfs_record.root, mount_path.clone()); 219 + let count = subfs_blob_map.len(); 220 + 221 + for (path, blob_info) in subfs_blob_map { 222 + main_blob_map.insert(path, blob_info); 223 } 224 + 225 + total_merged += count; 226 + println!(" ✓ Merged {} blobs from subfs at {}", count, mount_path); 227 } 228 229 Ok(total_merged) ··· 440 441 Ok(()) 442 } 443 + 444 + /// Split a large directory into multiple smaller chunks 445 + /// Returns a list of chunk directories, each small enough to fit in a subfs record 446 + pub fn split_directory_into_chunks( 447 + directory: &FsDirectory, 448 + max_size: usize, 449 + ) -> Vec<FsDirectory<'static>> { 450 + use jacquard_common::CowStr; 451 + 452 + let mut chunks = Vec::new(); 453 + let mut current_chunk_entries = Vec::new(); 454 + let mut current_chunk_size = 100; // Base size for directory structure 455 + 456 + for entry in &directory.entries { 457 + // Estimate the size of this entry 458 + let entry_size = estimate_entry_size(entry); 459 + 460 + // If adding this entry would exceed the max size, start a new chunk 461 + if !current_chunk_entries.is_empty() && (current_chunk_size + entry_size > max_size) { 462 + // Create a chunk from current entries 463 + let chunk = FsDirectory::new() 464 + .r#type(CowStr::from("directory")) 465 + .entries(current_chunk_entries.clone()) 466 + .build(); 467 + 468 + chunks.push(chunk); 469 + 470 + // Start new chunk 471 + current_chunk_entries.clear(); 472 + current_chunk_size = 100; 473 + } 474 + 475 + current_chunk_entries.push(entry.clone().into_static()); 476 + current_chunk_size += entry_size; 477 + } 478 + 479 + // Add the last chunk if it has any entries 480 + if !current_chunk_entries.is_empty() { 481 + let chunk = FsDirectory::new() 482 + .r#type(CowStr::from("directory")) 483 + .entries(current_chunk_entries) 484 + .build(); 485 + chunks.push(chunk); 486 + } 487 + 488 + chunks 489 + } 490 + 491 + /// Estimate the JSON size of a single entry 492 + fn estimate_entry_size(entry: &crate::place_wisp::fs::Entry) -> usize { 493 + match serde_json::to_string(entry) { 494 + Ok(json) => json.len(), 495 + Err(_) => 500, // Conservative estimate if serialization fails 496 + } 497 + }