+115
-19
cli/src/main.rs
+115
-19
cli/src/main.rs
···
15
15
use jacquard::oauth::client::OAuthClient;
16
16
use jacquard::oauth::loopback::LoopbackConfig;
17
17
use jacquard::prelude::IdentityResolver;
18
-
use jacquard_common::types::string::{Datetime, Rkey, RecordKey};
18
+
use jacquard_common::types::string::{Datetime, Rkey, RecordKey, AtUri};
19
19
use jacquard_common::types::blob::MimeType;
20
20
use miette::IntoDiagnostic;
21
21
use std::path::{Path, PathBuf};
···
356
356
println!(" Split #{}: {} ({} files, {:.1}KB)",
357
357
attempts, largest_dir.path, largest_dir.file_count, largest_dir.size as f64 / 1024.0);
358
358
359
-
// Create a subfs record for this directory
360
-
use jacquard_common::types::string::Tid;
361
-
let subfs_tid = Tid::now_0();
362
-
let subfs_rkey = subfs_tid.to_string();
359
+
// Check if this directory is itself too large for a single subfs record
360
+
const MAX_SUBFS_SIZE: usize = 75 * 1024; // 75KB soft limit for safety
361
+
let mut subfs_uri = String::new();
362
+
363
+
if largest_dir.size > MAX_SUBFS_SIZE {
364
+
// Need to split this directory into multiple chunks
365
+
println!(" → Directory too large, splitting into chunks...");
366
+
let chunks = subfs_utils::split_directory_into_chunks(&largest_dir.directory, MAX_SUBFS_SIZE);
367
+
println!(" → Created {} chunks", chunks.len());
368
+
369
+
// Upload each chunk as a subfs record
370
+
let mut chunk_uris = Vec::new();
371
+
for (i, chunk) in chunks.iter().enumerate() {
372
+
use jacquard_common::types::string::Tid;
373
+
let chunk_tid = Tid::now_0();
374
+
let chunk_rkey = chunk_tid.to_string();
375
+
376
+
let chunk_file_count = subfs_utils::count_files_in_directory(chunk);
377
+
let chunk_size = subfs_utils::estimate_directory_size(chunk);
363
378
364
-
let subfs_manifest = crate::place_wisp::subfs::SubfsRecord::new()
365
-
.root(convert_fs_dir_to_subfs_dir(largest_dir.directory.clone()))
366
-
.file_count(Some(largest_dir.file_count as i64))
367
-
.created_at(Datetime::now())
368
-
.build();
379
+
let chunk_manifest = crate::place_wisp::subfs::SubfsRecord::new()
380
+
.root(convert_fs_dir_to_subfs_dir(chunk.clone()))
381
+
.file_count(Some(chunk_file_count as i64))
382
+
.created_at(Datetime::now())
383
+
.build();
384
+
385
+
println!(" → Uploading chunk {}/{} ({} files, {:.1}KB)...",
386
+
i + 1, chunks.len(), chunk_file_count, chunk_size as f64 / 1024.0);
387
+
388
+
let chunk_output = agent.put_record(
389
+
RecordKey::from(Rkey::new(&chunk_rkey).into_diagnostic()?),
390
+
chunk_manifest
391
+
).await.into_diagnostic()?;
369
392
370
-
// Upload subfs record
371
-
let subfs_output = agent.put_record(
372
-
RecordKey::from(Rkey::new(&subfs_rkey).into_diagnostic()?),
373
-
subfs_manifest
374
-
).await.into_diagnostic()?;
393
+
let chunk_uri = chunk_output.uri.to_string();
394
+
chunk_uris.push((chunk_uri.clone(), format!("{}#{}", largest_dir.path, i)));
395
+
new_subfs_uris.push((chunk_uri.clone(), format!("{}#{}", largest_dir.path, i)));
396
+
}
375
397
376
-
let subfs_uri = subfs_output.uri.to_string();
377
-
println!(" ✅ Created subfs: {}", subfs_uri);
398
+
// Create a parent subfs record that references all chunks
399
+
// Each chunk reference MUST have flat: true to merge chunk contents
400
+
println!(" → Creating parent subfs with {} chunk references...", chunk_uris.len());
401
+
use jacquard_common::CowStr;
402
+
use crate::place_wisp::fs::{Subfs};
378
403
379
-
// Replace directory with subfs node (flat: false to preserve structure)
404
+
// Convert to fs::Subfs (which has the 'flat' field) instead of subfs::Subfs
405
+
let parent_entries_fs: Vec<Entry> = chunk_uris.iter().enumerate().map(|(i, (uri, _))| {
406
+
let uri_string = uri.clone();
407
+
let at_uri = AtUri::new_cow(CowStr::from(uri_string)).expect("valid URI");
408
+
Entry::new()
409
+
.name(CowStr::from(format!("chunk{}", i)))
410
+
.node(EntryNode::Subfs(Box::new(
411
+
Subfs::new()
412
+
.r#type(CowStr::from("subfs"))
413
+
.subject(at_uri)
414
+
.flat(Some(true)) // EXPLICITLY TRUE - merge chunk contents
415
+
.build()
416
+
)))
417
+
.build()
418
+
}).collect();
419
+
420
+
let parent_root_fs = Directory::new()
421
+
.r#type(CowStr::from("directory"))
422
+
.entries(parent_entries_fs)
423
+
.build();
424
+
425
+
// Convert to subfs::Directory for the parent subfs record
426
+
let parent_root_subfs = convert_fs_dir_to_subfs_dir(parent_root_fs);
427
+
428
+
use jacquard_common::types::string::Tid;
429
+
let parent_tid = Tid::now_0();
430
+
let parent_rkey = parent_tid.to_string();
431
+
432
+
let parent_manifest = crate::place_wisp::subfs::SubfsRecord::new()
433
+
.root(parent_root_subfs)
434
+
.file_count(Some(largest_dir.file_count as i64))
435
+
.created_at(Datetime::now())
436
+
.build();
437
+
438
+
let parent_output = agent.put_record(
439
+
RecordKey::from(Rkey::new(&parent_rkey).into_diagnostic()?),
440
+
parent_manifest
441
+
).await.into_diagnostic()?;
442
+
443
+
subfs_uri = parent_output.uri.to_string();
444
+
println!(" ✅ Created parent subfs with chunks (flat=true on each chunk): {}", subfs_uri);
445
+
} else {
446
+
// Directory fits in a single subfs record
447
+
use jacquard_common::types::string::Tid;
448
+
let subfs_tid = Tid::now_0();
449
+
let subfs_rkey = subfs_tid.to_string();
450
+
451
+
let subfs_manifest = crate::place_wisp::subfs::SubfsRecord::new()
452
+
.root(convert_fs_dir_to_subfs_dir(largest_dir.directory.clone()))
453
+
.file_count(Some(largest_dir.file_count as i64))
454
+
.created_at(Datetime::now())
455
+
.build();
456
+
457
+
// Upload subfs record
458
+
let subfs_output = agent.put_record(
459
+
RecordKey::from(Rkey::new(&subfs_rkey).into_diagnostic()?),
460
+
subfs_manifest
461
+
).await.into_diagnostic()?;
462
+
463
+
subfs_uri = subfs_output.uri.to_string();
464
+
println!(" ✅ Created subfs: {}", subfs_uri);
465
+
}
466
+
467
+
// Replace directory with subfs node (flat: false to preserve directory structure)
380
468
working_directory = subfs_utils::replace_directory_with_subfs(
381
469
working_directory,
382
470
&largest_dir.path,
383
471
&subfs_uri,
384
-
false // Preserve directory structure
472
+
false // Preserve directory - the chunks inside have flat=true
385
473
)?;
386
474
387
475
new_subfs_uris.push((subfs_uri, largest_dir.path.clone()));
···
729
817
}
730
818
731
819
return Ok((file_builder.build(), true));
820
+
} else {
821
+
// CID mismatch - file changed
822
+
println!(" → File changed: {} (old CID: {}, new CID: {})", file_path_key, existing_cid, file_cid);
823
+
}
824
+
} else {
825
+
// File not in existing blob map
826
+
if file_path_key.starts_with("imgs/") {
827
+
println!(" → New file (not in blob map): {}", file_path_key);
732
828
}
733
829
}
734
830
+30
-32
cli/src/pull.rs
+30
-32
cli/src/pull.rs
···
35
35
let pds_url = resolver.pds_for_did(&did).await.into_diagnostic()?;
36
36
println!("Resolved PDS: {}", pds_url);
37
37
38
-
// Fetch the place.wisp.fs record
39
-
38
+
// Create a temporary agent for fetching records (no auth needed for public reads)
40
39
println!("Fetching record from PDS...");
41
40
let client = reqwest::Client::new();
42
-
41
+
43
42
// Use com.atproto.repo.getRecord
44
43
use jacquard::api::com_atproto::repo::get_record::GetRecord;
45
44
use jacquard_common::types::string::Rkey as RkeyType;
46
45
let rkey_parsed = RkeyType::new(&rkey).into_diagnostic()?;
47
-
46
+
48
47
use jacquard_common::types::ident::AtIdentifier;
49
48
use jacquard_common::types::string::RecordKey;
50
49
let request = GetRecord::new()
···
70
69
println!("Found site '{}' with {} files (in main record)", fs_record.site, file_count);
71
70
72
71
// Check for and expand subfs nodes
73
-
let expanded_root = expand_subfs_in_pull(&fs_record.root, &pds_url, did.as_str()).await?;
72
+
// Note: We use a custom expand function for pull since we don't have an Agent
73
+
let expanded_root = expand_subfs_in_pull_with_client(&fs_record.root, &client, &pds_url).await?;
74
74
let total_file_count = subfs_utils::count_files_in_directory(&expanded_root);
75
75
76
76
if total_file_count as i64 != fs_record.file_count.unwrap_or(0) {
···
402
402
}
403
403
404
404
/// Expand subfs nodes in a directory tree by fetching and merging subfs records (RECURSIVELY)
405
-
async fn expand_subfs_in_pull<'a>(
405
+
/// Uses reqwest client directly for pull command (no agent needed)
406
+
async fn expand_subfs_in_pull_with_client<'a>(
406
407
directory: &Directory<'a>,
408
+
client: &reqwest::Client,
407
409
pds_url: &Url,
408
-
_did: &str,
409
410
) -> miette::Result<Directory<'static>> {
411
+
use jacquard_common::IntoStatic;
412
+
use jacquard_common::types::value::from_data;
410
413
use crate::place_wisp::subfs::SubfsRecord;
411
-
use jacquard_common::types::value::from_data;
412
-
use jacquard_common::IntoStatic;
413
414
414
-
// Recursively fetch ALL subfs records (including nested ones)
415
415
let mut all_subfs_map: HashMap<String, crate::place_wisp::subfs::Directory> = HashMap::new();
416
416
let mut to_fetch = subfs_utils::extract_subfs_uris(directory, String::new());
417
417
···
420
420
}
421
421
422
422
println!("Found {} subfs records, fetching recursively...", to_fetch.len());
423
-
let client = reqwest::Client::new();
424
423
425
-
// Keep fetching until we've resolved all subfs (including nested ones)
426
424
let mut iteration = 0;
427
-
const MAX_ITERATIONS: usize = 10; // Prevent infinite loops
425
+
const MAX_ITERATIONS: usize = 10;
428
426
429
427
while !to_fetch.is_empty() && iteration < MAX_ITERATIONS {
430
428
iteration += 1;
···
437
435
let pds_url = pds_url.clone();
438
436
439
437
fetch_tasks.push(async move {
438
+
// Parse URI
440
439
let parts: Vec<&str> = uri.trim_start_matches("at://").split('/').collect();
441
440
if parts.len() < 3 {
442
441
return Err(miette::miette!("Invalid subfs URI: {}", uri));
443
442
}
444
443
445
-
let _did = parts[0];
444
+
let did_str = parts[0];
446
445
let collection = parts[1];
447
-
let rkey = parts[2];
446
+
let rkey_str = parts[2];
448
447
449
448
if collection != "place.wisp.subfs" {
450
449
return Err(miette::miette!("Expected place.wisp.subfs collection, got: {}", collection));
451
450
}
452
451
452
+
// Fetch using GetRecord
453
453
use jacquard::api::com_atproto::repo::get_record::GetRecord;
454
-
use jacquard_common::types::string::Rkey as RkeyType;
454
+
use jacquard_common::types::string::{Rkey as RkeyType, Did as DidType, RecordKey};
455
455
use jacquard_common::types::ident::AtIdentifier;
456
-
use jacquard_common::types::string::{RecordKey, Did as DidType};
457
456
458
-
let rkey_parsed = RkeyType::new(rkey).into_diagnostic()?;
459
-
let did_parsed = DidType::new(_did).into_diagnostic()?;
457
+
let rkey_parsed = RkeyType::new(rkey_str).into_diagnostic()?;
458
+
let did_parsed = DidType::new(did_str).into_diagnostic()?;
460
459
461
460
let request = GetRecord::new()
462
461
.repo(AtIdentifier::Did(did_parsed))
···
472
471
473
472
let record_output = response.into_output().into_diagnostic()?;
474
473
let subfs_record: SubfsRecord = from_data(&record_output.value).into_diagnostic()?;
475
-
let subfs_record_static = subfs_record.into_static();
476
474
477
-
Ok::<_, miette::Report>((path, subfs_record_static))
475
+
Ok::<_, miette::Report>((path, subfs_record.into_static()))
478
476
});
479
477
}
480
478
481
479
let results: Vec<_> = futures::future::join_all(fetch_tasks).await;
482
480
483
481
// Process results and find nested subfs
484
-
let mut newly_fetched = Vec::new();
482
+
let mut newly_found_uris = Vec::new();
485
483
for result in results {
486
484
match result {
487
485
Ok((path, record)) => {
488
486
println!(" ✓ Fetched subfs at {}", path);
489
487
490
-
// Check for nested subfs in this record
491
-
let nested_subfs = extract_subfs_from_subfs_dir(&record.root, path.clone());
492
-
newly_fetched.extend(nested_subfs);
488
+
// Extract nested subfs URIs
489
+
let nested_uris = extract_subfs_uris_from_subfs_dir(&record.root, path.clone());
490
+
newly_found_uris.extend(nested_uris);
493
491
494
492
all_subfs_map.insert(path, record.root);
495
493
}
···
499
497
}
500
498
}
501
499
502
-
// Update to_fetch with only the NEW subfs we haven't fetched yet
503
-
to_fetch = newly_fetched
500
+
// Filter out already-fetched paths
501
+
to_fetch = newly_found_uris
504
502
.into_iter()
505
-
.filter(|(uri, _)| !all_subfs_map.iter().any(|(k, _)| k == uri))
503
+
.filter(|(_, path)| !all_subfs_map.contains_key(path))
506
504
.collect();
507
505
}
508
506
509
507
if iteration >= MAX_ITERATIONS {
510
-
return Err(miette::miette!("Max iterations reached while fetching nested subfs"));
508
+
eprintln!("⚠️ Max iterations reached while fetching nested subfs");
511
509
}
512
510
513
511
println!(" Total subfs records fetched: {}", all_subfs_map.len());
···
516
514
Ok(replace_subfs_with_content(directory.clone(), &all_subfs_map, String::new()))
517
515
}
518
516
519
-
/// Extract subfs URIs from a subfs::Directory
520
-
fn extract_subfs_from_subfs_dir(
517
+
/// Extract subfs URIs from a subfs::Directory (helper for pull)
518
+
fn extract_subfs_uris_from_subfs_dir(
521
519
directory: &crate::place_wisp::subfs::Directory,
522
520
current_path: String,
523
521
) -> Vec<(String, String)> {
···
535
533
uris.push((subfs_node.subject.to_string(), full_path.clone()));
536
534
}
537
535
crate::place_wisp::subfs::EntryNode::Directory(subdir) => {
538
-
let nested = extract_subfs_from_subfs_dir(subdir, full_path);
536
+
let nested = extract_subfs_uris_from_subfs_dir(subdir, full_path);
539
537
uris.extend(nested);
540
538
}
541
539
_ => {}
+195
-34
cli/src/subfs_utils.rs
+195
-34
cli/src/subfs_utils.rs
···
72
72
Ok(record_output.value.into_static())
73
73
}
74
74
75
-
/// Merge blob maps from subfs records into the main blob map
76
-
/// Returns the total number of blobs merged from all subfs records
77
-
pub async fn merge_subfs_blob_maps(
75
+
/// Recursively fetch all subfs records (including nested ones)
76
+
/// Returns a list of (mount_path, SubfsRecord) tuples
77
+
/// Note: Multiple records can have the same mount_path (for flat-merged chunks)
78
+
pub async fn fetch_all_subfs_records_recursive(
78
79
agent: &Agent<impl AgentSession + IdentityResolver>,
79
-
subfs_uris: Vec<(String, String)>,
80
-
main_blob_map: &mut HashMap<String, (BlobRef<'static>, String)>,
81
-
) -> miette::Result<usize> {
82
-
let mut total_merged = 0;
80
+
initial_uris: Vec<(String, String)>,
81
+
) -> miette::Result<Vec<(String, SubfsRecord<'static>)>> {
82
+
use futures::stream::{self, StreamExt};
83
83
84
-
println!("Fetching {} subfs records for blob reuse...", subfs_uris.len());
84
+
let mut all_subfs: Vec<(String, SubfsRecord<'static>)> = Vec::new();
85
+
let mut fetched_uris: std::collections::HashSet<String> = std::collections::HashSet::new();
86
+
let mut to_fetch = initial_uris;
85
87
86
-
// Fetch all subfs records in parallel (but with some concurrency limit)
87
-
use futures::stream::{self, StreamExt};
88
+
if to_fetch.is_empty() {
89
+
return Ok(all_subfs);
90
+
}
91
+
92
+
println!("Found {} subfs records, fetching recursively...", to_fetch.len());
93
+
94
+
let mut iteration = 0;
95
+
const MAX_ITERATIONS: usize = 10;
88
96
89
-
let subfs_results: Vec<_> = stream::iter(subfs_uris)
90
-
.map(|(uri, mount_path)| async move {
91
-
match fetch_subfs_record(agent, &uri).await {
92
-
Ok(record) => Some((record, mount_path)),
93
-
Err(e) => {
94
-
eprintln!(" ⚠️ Failed to fetch subfs {}: {}", uri, e);
95
-
None
97
+
while !to_fetch.is_empty() && iteration < MAX_ITERATIONS {
98
+
iteration += 1;
99
+
println!(" Iteration {}: fetching {} subfs records...", iteration, to_fetch.len());
100
+
101
+
let subfs_results: Vec<_> = stream::iter(to_fetch.clone())
102
+
.map(|(uri, mount_path)| async move {
103
+
match fetch_subfs_record(agent, &uri).await {
104
+
Ok(record) => Some((mount_path, record, uri)),
105
+
Err(e) => {
106
+
eprintln!(" ⚠️ Failed to fetch subfs {}: {}", uri, e);
107
+
None
108
+
}
96
109
}
110
+
})
111
+
.buffer_unordered(5)
112
+
.collect()
113
+
.await;
114
+
115
+
// Process results and find nested subfs
116
+
let mut newly_found_uris = Vec::new();
117
+
for result in subfs_results {
118
+
if let Some((mount_path, record, uri)) = result {
119
+
println!(" ✓ Fetched subfs at {}", mount_path);
120
+
121
+
// Extract nested subfs URIs from this record
122
+
let nested_uris = extract_subfs_uris_from_subfs_dir(&record.root, mount_path.clone());
123
+
newly_found_uris.extend(nested_uris);
124
+
125
+
all_subfs.push((mount_path, record));
126
+
fetched_uris.insert(uri);
97
127
}
98
-
})
99
-
.buffer_unordered(5)
100
-
.collect()
101
-
.await;
128
+
}
102
129
103
-
// Convert subfs Directory to fs Directory for blob extraction
104
-
// Note: We need to extract blobs from the subfs record's root
105
-
for result in subfs_results {
106
-
if let Some((subfs_record, mount_path)) = result {
107
-
// Extract blobs from this subfs record's root
108
-
// The blob_map module works with fs::Directory, but subfs::Directory has the same structure
109
-
// We need to convert or work directly with the entries
130
+
// Filter out already-fetched URIs (based on URI, not path)
131
+
to_fetch = newly_found_uris
132
+
.into_iter()
133
+
.filter(|(uri, _)| !fetched_uris.contains(uri))
134
+
.collect();
135
+
}
110
136
111
-
let subfs_blob_map = extract_subfs_blobs(&subfs_record.root, mount_path.clone());
112
-
let count = subfs_blob_map.len();
137
+
if iteration >= MAX_ITERATIONS {
138
+
eprintln!("⚠️ Max iterations reached while fetching nested subfs");
139
+
}
113
140
114
-
for (path, blob_info) in subfs_blob_map {
115
-
main_blob_map.insert(path, blob_info);
141
+
println!(" Total subfs records fetched: {}", all_subfs.len());
142
+
143
+
Ok(all_subfs)
144
+
}
145
+
146
+
/// Extract subfs URIs from a subfs::Directory
147
+
fn extract_subfs_uris_from_subfs_dir(
148
+
directory: &crate::place_wisp::subfs::Directory,
149
+
current_path: String,
150
+
) -> Vec<(String, String)> {
151
+
let mut uris = Vec::new();
152
+
153
+
for entry in &directory.entries {
154
+
match &entry.node {
155
+
crate::place_wisp::subfs::EntryNode::Subfs(subfs_node) => {
156
+
// Check if this is a chunk entry (chunk0, chunk1, etc.)
157
+
// Chunks should be flat-merged, so use the parent's path
158
+
let mount_path = if entry.name.starts_with("chunk") &&
159
+
entry.name.chars().skip(5).all(|c| c.is_ascii_digit()) {
160
+
// This is a chunk - use parent's path for flat merge
161
+
println!(" → Found chunk {} at {}, will flat-merge to {}", entry.name, current_path, current_path);
162
+
current_path.clone()
163
+
} else {
164
+
// Normal subfs - append name to path
165
+
if current_path.is_empty() {
166
+
entry.name.to_string()
167
+
} else {
168
+
format!("{}/{}", current_path, entry.name)
169
+
}
170
+
};
171
+
172
+
uris.push((subfs_node.subject.to_string(), mount_path));
116
173
}
174
+
crate::place_wisp::subfs::EntryNode::Directory(subdir) => {
175
+
let full_path = if current_path.is_empty() {
176
+
entry.name.to_string()
177
+
} else {
178
+
format!("{}/{}", current_path, entry.name)
179
+
};
180
+
let nested = extract_subfs_uris_from_subfs_dir(subdir, full_path);
181
+
uris.extend(nested);
182
+
}
183
+
_ => {}
184
+
}
185
+
}
117
186
118
-
total_merged += count;
119
-
println!(" ✓ Merged {} blobs from subfs at {}", count, mount_path);
187
+
uris
188
+
}
189
+
190
+
/// Merge blob maps from subfs records into the main blob map (RECURSIVE)
191
+
/// Returns the total number of blobs merged from all subfs records
192
+
pub async fn merge_subfs_blob_maps(
193
+
agent: &Agent<impl AgentSession + IdentityResolver>,
194
+
subfs_uris: Vec<(String, String)>,
195
+
main_blob_map: &mut HashMap<String, (BlobRef<'static>, String)>,
196
+
) -> miette::Result<usize> {
197
+
// Fetch all subfs records recursively
198
+
let all_subfs = fetch_all_subfs_records_recursive(agent, subfs_uris).await?;
199
+
200
+
let mut total_merged = 0;
201
+
202
+
// Extract blobs from all fetched subfs records
203
+
// Skip parent records that only contain chunk references (no actual files)
204
+
for (mount_path, subfs_record) in all_subfs {
205
+
// Check if this record only contains chunk subfs references (no files)
206
+
let only_has_chunks = subfs_record.root.entries.iter().all(|e| {
207
+
matches!(&e.node, crate::place_wisp::subfs::EntryNode::Subfs(_)) &&
208
+
e.name.starts_with("chunk") &&
209
+
e.name.chars().skip(5).all(|c| c.is_ascii_digit())
210
+
});
211
+
212
+
if only_has_chunks && !subfs_record.root.entries.is_empty() {
213
+
// This is a parent containing only chunks - skip it, blobs are in the chunks
214
+
println!(" → Skipping parent subfs at {} ({} chunks, no files)", mount_path, subfs_record.root.entries.len());
215
+
continue;
216
+
}
217
+
218
+
let subfs_blob_map = extract_subfs_blobs(&subfs_record.root, mount_path.clone());
219
+
let count = subfs_blob_map.len();
220
+
221
+
for (path, blob_info) in subfs_blob_map {
222
+
main_blob_map.insert(path, blob_info);
120
223
}
224
+
225
+
total_merged += count;
226
+
println!(" ✓ Merged {} blobs from subfs at {}", count, mount_path);
121
227
}
122
228
123
229
Ok(total_merged)
···
334
440
335
441
Ok(())
336
442
}
443
+
444
+
/// Split a large directory into multiple smaller chunks
445
+
/// Returns a list of chunk directories, each small enough to fit in a subfs record
446
+
pub fn split_directory_into_chunks(
447
+
directory: &FsDirectory,
448
+
max_size: usize,
449
+
) -> Vec<FsDirectory<'static>> {
450
+
use jacquard_common::CowStr;
451
+
452
+
let mut chunks = Vec::new();
453
+
let mut current_chunk_entries = Vec::new();
454
+
let mut current_chunk_size = 100; // Base size for directory structure
455
+
456
+
for entry in &directory.entries {
457
+
// Estimate the size of this entry
458
+
let entry_size = estimate_entry_size(entry);
459
+
460
+
// If adding this entry would exceed the max size, start a new chunk
461
+
if !current_chunk_entries.is_empty() && (current_chunk_size + entry_size > max_size) {
462
+
// Create a chunk from current entries
463
+
let chunk = FsDirectory::new()
464
+
.r#type(CowStr::from("directory"))
465
+
.entries(current_chunk_entries.clone())
466
+
.build();
467
+
468
+
chunks.push(chunk);
469
+
470
+
// Start new chunk
471
+
current_chunk_entries.clear();
472
+
current_chunk_size = 100;
473
+
}
474
+
475
+
current_chunk_entries.push(entry.clone().into_static());
476
+
current_chunk_size += entry_size;
477
+
}
478
+
479
+
// Add the last chunk if it has any entries
480
+
if !current_chunk_entries.is_empty() {
481
+
let chunk = FsDirectory::new()
482
+
.r#type(CowStr::from("directory"))
483
+
.entries(current_chunk_entries)
484
+
.build();
485
+
chunks.push(chunk);
486
+
}
487
+
488
+
chunks
489
+
}
490
+
491
+
/// Estimate the JSON size of a single entry
492
+
fn estimate_entry_size(entry: &crate::place_wisp::fs::Entry) -> usize {
493
+
match serde_json::to_string(entry) {
494
+
Ok(json) => json.len(),
495
+
Err(_) => 500, // Conservative estimate if serialization fails
496
+
}
497
+
}