at main 632 lines 27 kB view raw
1use super::types::{BlobInfo, BlobName}; 2use crate::{Frontmatter, NotebookContext}; 3use dashmap::DashMap; 4use jacquard::{ 5 client::{Agent, AgentSession, AgentSessionExt}, 6 prelude::IdentityResolver, 7 types::{ 8 ident::AtIdentifier, 9 string::{CowStr, Did, Handle}, 10 }, 11}; 12use markdown_weaver::{CowStr as MdCowStr, Tag, WeaverAttributes}; 13use std::{path::PathBuf, sync::Arc}; 14use yaml_rust2::Yaml; 15 16pub struct AtProtoPreprocessContext<A: AgentSession + IdentityResolver> { 17 // Vault information 18 pub(crate) vault_contents: Arc<[PathBuf]>, 19 pub(crate) current_path: PathBuf, 20 21 // AT Protocol agent 22 agent: Arc<Agent<A>>, 23 24 // Notebook metadata 25 pub(crate) notebook_title: CowStr<'static>, 26 pub(crate) creator_did: Option<Did<'static>>, 27 pub(crate) creator_handle: Option<Handle<'static>>, 28 29 // Blob tracking 30 blob_tracking: Arc<DashMap<BlobName<'static>, BlobInfo>>, 31 32 // Shared with static site 33 frontmatter: Arc<DashMap<PathBuf, Frontmatter>>, 34 titles: Arc<DashMap<PathBuf, MdCowStr<'static>>>, 35 reference_map: Arc<DashMap<MdCowStr<'static>, PathBuf>>, 36 37 // Recursion tracking for markdown embeds 38 embed_depth: usize, 39} 40 41impl<A: AgentSession + IdentityResolver> Clone for AtProtoPreprocessContext<A> { 42 fn clone(&self) -> Self { 43 Self { 44 vault_contents: self.vault_contents.clone(), 45 current_path: self.current_path.clone(), 46 agent: self.agent.clone(), 47 notebook_title: self.notebook_title.clone(), 48 creator_did: self.creator_did.clone(), 49 creator_handle: self.creator_handle.clone(), 50 blob_tracking: self.blob_tracking.clone(), 51 frontmatter: self.frontmatter.clone(), 52 titles: self.titles.clone(), 53 reference_map: self.reference_map.clone(), 54 embed_depth: self.embed_depth, 55 } 56 } 57} 58 59impl<A: AgentSession + IdentityResolver> AtProtoPreprocessContext<A> { 60 pub fn new( 61 vault_contents: Arc<[PathBuf]>, 62 notebook_title: impl Into<CowStr<'static>>, 63 agent: Arc<Agent<A>>, 64 ) -> Self { 65 Self { 66 vault_contents, 67 current_path: PathBuf::new(), 68 agent, 69 notebook_title: notebook_title.into(), 70 creator_did: None, 71 creator_handle: None, 72 blob_tracking: Arc::new(DashMap::new()), 73 frontmatter: Arc::new(DashMap::new()), 74 titles: Arc::new(DashMap::new()), 75 reference_map: Arc::new(DashMap::new()), 76 embed_depth: 0, 77 } 78 } 79 80 pub fn with_creator(mut self, did: Did<'static>, handle: Handle<'static>) -> Self { 81 self.creator_did = Some(did); 82 self.creator_handle = Some(handle); 83 self 84 } 85 86 pub fn blobs(&self) -> Vec<BlobInfo> { 87 self.blob_tracking 88 .iter() 89 .map(|entry| entry.value().clone()) 90 .collect() 91 } 92 93 pub fn set_current_path(&mut self, path: PathBuf) { 94 self.current_path = path; 95 } 96 97 fn with_depth(&self, depth: usize) -> Self { 98 Self { 99 vault_contents: self.vault_contents.clone(), 100 current_path: self.current_path.clone(), 101 agent: self.agent.clone(), 102 notebook_title: self.notebook_title.clone(), 103 creator_did: self.creator_did.clone(), 104 creator_handle: self.creator_handle.clone(), 105 blob_tracking: self.blob_tracking.clone(), 106 frontmatter: self.frontmatter.clone(), 107 titles: self.titles.clone(), 108 reference_map: self.reference_map.clone(), 109 embed_depth: depth, 110 } 111 } 112} 113 114// Stub NotebookContext implementation 115impl<A: AgentSession + IdentityResolver> NotebookContext for AtProtoPreprocessContext<A> { 116 fn set_entry_title(&self, title: MdCowStr<'_>) { 117 let path = self.current_path.clone(); 118 self.titles 119 .insert(path.clone(), title.clone().into_static()); 120 self.frontmatter.get_mut(&path).map(|frontmatter| { 121 if let Ok(mut yaml) = frontmatter.yaml.write() { 122 if yaml.get(0).is_some_and(|y| y.is_hash()) { 123 let map = yaml.get_mut(0).unwrap().as_mut_hash().unwrap(); 124 map.insert( 125 Yaml::String("title".into()), 126 Yaml::String(title.into_static().into()), 127 ); 128 } 129 } 130 }); 131 } 132 133 fn entry_title(&self) -> MdCowStr<'_> { 134 self.titles 135 .get(&self.current_path) 136 .map(|t| t.value().clone()) 137 .unwrap_or_else(|| { 138 // Fall back to file stem if no explicit title set 139 let title = self 140 .current_path 141 .file_stem() 142 .and_then(|s| s.to_str()) 143 .map(|s| MdCowStr::Borrowed(s)) 144 .unwrap_or(MdCowStr::Borrowed("Untitled")); 145 146 // Cache the derived title 147 self.titles 148 .insert(self.current_path.clone(), title.clone().into_static()); 149 title 150 }) 151 } 152 153 fn frontmatter(&self) -> Frontmatter { 154 self.frontmatter 155 .get(&self.current_path) 156 .map(|f| f.value().clone()) 157 .unwrap_or_default() 158 } 159 160 fn set_frontmatter(&self, frontmatter: Frontmatter) { 161 self.frontmatter 162 .insert(self.current_path.clone(), frontmatter); 163 } 164 165 #[tracing::instrument(skip(self, link), fields(dest = ?link))] 166 async fn handle_link<'s>(&self, link: Tag<'s>) -> Tag<'s> { 167 use crate::utils::lookup_filename_in_vault; 168 use weaver_common::LinkUri; 169 170 match &link { 171 Tag::Link { 172 link_type, 173 dest_url, 174 title, 175 id, 176 } => { 177 // Resolve link using LinkUri helper 178 let resolved = LinkUri::resolve(dest_url.as_ref(), &*self.agent).await; 179 180 match resolved { 181 LinkUri::Path(path) => { 182 // Local wikilink - look up in vault 183 if let Some(file_path) = 184 lookup_filename_in_vault(path.as_ref(), &self.vault_contents) 185 { 186 let entry_title = file_path 187 .file_stem() 188 .and_then(|s| s.to_str()) 189 .unwrap_or("untitled"); 190 let normalized_title = normalize_title(entry_title); 191 192 let canonical_url = if let Some(handle) = &self.creator_handle { 193 format!( 194 "/{}/{}/{}", 195 handle.as_ref(), 196 self.notebook_title.as_ref(), 197 normalized_title 198 ) 199 } else { 200 format!("/{}/{}", self.notebook_title.as_ref(), normalized_title) 201 }; 202 203 return Tag::Link { 204 link_type: *link_type, 205 dest_url: MdCowStr::Boxed(canonical_url.into_boxed_str()), 206 title: title.clone(), 207 id: id.clone(), 208 }; 209 } 210 } 211 LinkUri::AtIdent(did, _handle) => { 212 // Profile link - use at://did format 213 let at_uri = format!("at://{}", did.as_ref()); 214 return Tag::Link { 215 link_type: *link_type, 216 dest_url: MdCowStr::Boxed(at_uri.into_boxed_str()), 217 title: title.clone(), 218 id: id.clone(), 219 }; 220 } 221 LinkUri::AtRecord(uri) => { 222 // AT URI - keep as-is or convert to HTTP 223 // For now, keep the at:// URI 224 return Tag::Link { 225 link_type: *link_type, 226 dest_url: MdCowStr::Boxed(uri.as_str().into()), 227 title: title.clone(), 228 id: id.clone(), 229 }; 230 } 231 _ => {} 232 } 233 234 // Pass through other link types (web URLs, headings, etc.) 235 link.clone() 236 } 237 _ => link, 238 } 239 } 240 241 #[tracing::instrument(skip(self, image), fields(dest = ?image))] 242 async fn handle_image<'s>(&self, image: Tag<'s>) -> Tag<'s> { 243 use crate::utils::is_local_path; 244 use jacquard::bytes::Bytes; 245 use jacquard::types::blob::MimeType; 246 use mime_sniffer::MimeTypeSniffer; 247 use tokio::fs; 248 249 match &image { 250 Tag::Image { 251 link_type, 252 dest_url, 253 title, 254 id, 255 attrs, 256 } => { 257 if is_local_path(dest_url) { 258 // Read local file 259 let file_path = if dest_url.starts_with('/') { 260 PathBuf::from(dest_url.as_ref()) 261 } else { 262 self.current_path 263 .parent() 264 .unwrap_or(&self.current_path) 265 .join(dest_url.as_ref()) 266 }; 267 268 tracing::debug!("Reading image file: {}", file_path.display()); 269 if let Ok(image_data) = fs::read(&file_path).await { 270 tracing::debug!( 271 "Read {} bytes from {}", 272 image_data.len(), 273 file_path.display() 274 ); 275 // Derive blob name from filename 276 let filename = file_path 277 .file_stem() 278 .and_then(|s| s.to_str()) 279 .unwrap_or("image"); 280 let blob_name = BlobName::from_filename(filename); 281 282 // Sniff mime type from data 283 let bytes = Bytes::from(image_data.clone()); 284 let mime = MimeType::new_owned( 285 bytes 286 .sniff_mime_type() 287 .unwrap_or("application/octet-stream"), 288 ); 289 290 // Upload blob (dereference Arc) 291 tracing::debug!( 292 "Uploading image blob: {} ({} bytes)", 293 file_path.display(), 294 bytes.len() 295 ); 296 if let Ok(blob) = (*self.agent).upload_blob(bytes, mime.clone()).await { 297 use jacquard::IntoStatic; 298 299 // Store blob info 300 let blob_info = BlobInfo { 301 name: blob_name.clone(), 302 blob: blob.into_static(), 303 alt: if title.is_empty() { 304 None 305 } else { 306 Some(CowStr::Owned(title.as_ref().into())) 307 }, 308 }; 309 self.blob_tracking.insert(blob_name.clone(), blob_info); 310 311 // Rewrite to canonical path 312 let canonical_url = format!( 313 "/{}/image/{}", 314 self.notebook_title.as_ref(), 315 blob_name.as_str() 316 ); 317 318 return Tag::Image { 319 link_type: *link_type, 320 dest_url: MdCowStr::Boxed(canonical_url.into_boxed_str()), 321 title: title.clone(), 322 id: id.clone(), 323 attrs: attrs.clone(), 324 }; 325 } 326 } 327 } 328 // If not local or upload failed, pass through 329 image 330 } 331 _ => image, 332 } 333 } 334 335 #[tracing::instrument(skip(self, embed), fields(dest = ?embed))] 336 async fn handle_embed<'s>(&self, embed: Tag<'s>) -> Tag<'s> { 337 use crate::utils::lookup_filename_in_vault; 338 use weaver_common::LinkUri; 339 340 match &embed { 341 Tag::Embed { 342 embed_type, 343 dest_url, 344 title, 345 id, 346 attrs, 347 } => { 348 // Resolve embed using LinkUri helper 349 let resolved = LinkUri::resolve(dest_url.as_ref(), &*self.agent).await; 350 351 match resolved { 352 LinkUri::Path(path) => { 353 // Entry embed - look up in vault 354 if let Some(file_path) = 355 lookup_filename_in_vault(path.as_ref(), &self.vault_contents) 356 { 357 let entry_title = file_path 358 .file_stem() 359 .and_then(|s| s.to_str()) 360 .unwrap_or("untitled"); 361 let normalized_title = normalize_title(entry_title); 362 363 let canonical_url = if let Some(handle) = &self.creator_handle { 364 format!( 365 "/{}/{}/{}", 366 handle.as_ref(), 367 self.notebook_title.as_ref(), 368 normalized_title 369 ) 370 } else { 371 format!("/{}/{}", self.notebook_title.as_ref(), normalized_title) 372 }; 373 // Markdown embed - look up in vault and render 374 //use tokio::fs; 375 376 // Check depth limit 377 const MAX_DEPTH: usize = 1; 378 if self.embed_depth >= MAX_DEPTH { 379 eprintln!("Max embed depth reached for {}", path.as_ref()); 380 return Tag::Embed { 381 embed_type: *embed_type, 382 dest_url: MdCowStr::Boxed(canonical_url.into_boxed_str()), 383 title: title.clone(), 384 id: id.clone(), 385 attrs: attrs.clone(), 386 }; 387 } 388 // // Read the markdown file 389 // match fs::read_to_string(&file_path).await { 390 // Ok(markdown_content) => { 391 // // Create a child context with incremented depth 392 // let mut child_ctx = self.with_depth(self.embed_depth + 1); 393 // child_ctx.current_path = file_path.clone(); 394 395 // // Render the markdown through the processor 396 // // We'll use markdown_weaver to parse and render to HTML 397 // use markdown_weaver::{Options, Parser}; 398 // use markdown_weaver_escape::StrWrite; 399 400 // let parser = Parser::new_ext(&markdown_content, Options::all()); 401 // let mut html_output = String::new(); 402 403 // // Process events through context callbacks 404 // for event in parser { 405 // match event { 406 // markdown_weaver::Event::Start(tag) => { 407 // let processed = match tag { 408 // Tag::Link { .. } => { 409 // child_ctx.handle_link(tag).await 410 // } 411 // Tag::Image { .. } => { 412 // child_ctx.handle_image(tag).await 413 // } 414 // Tag::Embed { .. } => { 415 // child_ctx.handle_embed(tag).await 416 // } 417 // _ => tag, 418 // }; 419 // // Simple HTML writing (reuse escape logic) 420 // match processed { 421 // Tag::Paragraph => { 422 // html_output.write_str("<p>").ok() 423 // } 424 // _ => None, 425 // }; 426 // } 427 // markdown_weaver::Event::End(tag_end) => { 428 // match tag_end { 429 // markdown_weaver::TagEnd::Paragraph => { 430 // html_output.write_str("</p>\n").ok() 431 // } 432 // _ => None, 433 // }; 434 // } 435 // markdown_weaver::Event::Text(text) => { 436 // use markdown_weaver_escape::escape_html_body_text; 437 // escape_html_body_text(&mut html_output, &text).ok(); 438 // } 439 // _ => {} 440 // } 441 // } 442 443 // let mut new_attrs = 444 // attrs.clone().unwrap_or_else(|| WeaverAttributes { 445 // classes: vec![], 446 // attrs: vec![], 447 // }); 448 449 // new_attrs.attrs.push(("content".into(), html_output.into())); 450 451 // return Tag::Embed { 452 // embed_type: *embed_type, 453 // dest_url: MdCowStr::Boxed(canonical_url.into_boxed_str()), 454 // title: title.clone(), 455 // id: id.clone(), 456 // attrs: Some(new_attrs), 457 // }; 458 // } 459 // Err(e) => { 460 // eprintln!("Failed to read file {:?}: {}", file_path, e); 461 // } 462 // } 463 464 return Tag::Embed { 465 embed_type: *embed_type, 466 dest_url: MdCowStr::Boxed(canonical_url.into_boxed_str()), 467 title: title.clone(), 468 id: id.clone(), 469 attrs: attrs.clone(), 470 }; 471 } 472 } 473 LinkUri::AtIdent(did, _handle) => { 474 // Profile embed - fetch and render 475 use crate::atproto::fetch_and_render_profile; 476 use markdown_weaver::WeaverAttributes; 477 478 let at_uri = format!("at://{}", did.as_ref()); 479 480 tracing::debug!("Fetching profile embed: {}", did.as_ref()); 481 // Fetch and render the profile 482 let content = match fetch_and_render_profile( 483 &AtIdentifier::Did(did.clone()), 484 &*self.agent, 485 ) 486 .await 487 { 488 Ok(html) => Some(html), 489 Err(e) => { 490 eprintln!("Failed to fetch profile {}: {}", did.as_ref(), e); 491 None 492 } 493 }; 494 495 // Build or update attributes 496 let mut new_attrs = attrs.clone().unwrap_or_else(|| WeaverAttributes { 497 classes: vec![], 498 attrs: vec![], 499 }); 500 501 if let Some(content_html) = content { 502 new_attrs 503 .attrs 504 .push(("content".into(), content_html.into())); 505 } 506 507 return Tag::Embed { 508 embed_type: *embed_type, 509 dest_url: MdCowStr::Boxed(at_uri.into_boxed_str()), 510 title: title.clone(), 511 id: id.clone(), 512 attrs: Some(new_attrs), 513 }; 514 } 515 LinkUri::AtRecord(uri) => { 516 // AT URI embed - fetch and render 517 use crate::atproto::{fetch_and_render_generic, fetch_and_render_post}; 518 use markdown_weaver::WeaverAttributes; 519 520 tracing::debug!("Fetching record embed: {}", uri.as_ref()); 521 // Determine if this is a known type 522 let content = if let Some(collection) = uri.collection() { 523 match collection.as_ref() { 524 "app.bsky.feed.post" => { 525 // Bluesky post 526 match fetch_and_render_post(&uri, &*self.agent).await { 527 Ok(html) => Some(html), 528 Err(e) => { 529 eprintln!( 530 "Failed to fetch post {}: {}", 531 uri.as_ref(), 532 e 533 ); 534 None 535 } 536 } 537 } 538 _ => { 539 // Generic record 540 match fetch_and_render_generic(&uri, &*self.agent).await { 541 Ok(html) => Some(html), 542 Err(e) => { 543 eprintln!( 544 "Failed to fetch record {}: {}", 545 uri.as_ref(), 546 e 547 ); 548 None 549 } 550 } 551 } 552 } 553 } else { 554 None 555 }; 556 557 // Build or update attributes 558 let mut new_attrs = attrs.clone().unwrap_or_else(|| WeaverAttributes { 559 classes: vec![], 560 attrs: vec![], 561 }); 562 563 if let Some(content_html) = content { 564 new_attrs 565 .attrs 566 .push(("content".into(), content_html.into())); 567 } 568 569 return Tag::Embed { 570 embed_type: *embed_type, 571 dest_url: MdCowStr::Boxed(uri.as_str().into()), 572 title: title.clone(), 573 id: id.clone(), 574 attrs: Some(new_attrs), 575 }; 576 } 577 578 _ => {} 579 } 580 581 // Pass through other embed types 582 embed.clone() 583 } 584 Tag::Image { .. } => { 585 // Some embeds come through as explicit Tag::Image 586 // Delegate to handle_image for image-specific processing 587 self.handle_image(embed).await 588 } 589 _ => embed, 590 } 591 } 592 593 fn handle_reference(&self, reference: MdCowStr<'_>) -> MdCowStr<'_> { 594 reference.into_static() 595 } 596 597 fn add_reference(&self, reference: MdCowStr<'_>) { 598 self.reference_map 599 .insert(reference.into_static(), self.current_path.clone()); 600 } 601} 602 603/// Normalize entry title to URL-safe format 604fn normalize_title(title: &str) -> String { 605 let mut normalized = String::new(); 606 let mut last_was_space = false; 607 608 for c in title.chars() { 609 if c.is_ascii_alphanumeric() { 610 normalized.push(c); 611 last_was_space = false; 612 } else if c.is_whitespace() && !last_was_space && !normalized.is_empty() { 613 normalized.push('_'); 614 last_was_space = true; 615 } 616 } 617 618 // Remove trailing underscore if present 619 if normalized.ends_with('_') { 620 normalized.pop(); 621 } 622 623 normalized 624} 625 626#[cfg(test)] 627mod tests { 628 use super::*; 629 630 // Tests require an actual Agent instance, which needs authentication setup. 631 // These will be tested via integration tests instead. 632}