editor extraction core sorta done?

Orual a8be2ece 13db455b

+4690 -16
+7
Cargo.lock
··· 12125 12125 name = "weaver-editor-core" 12126 12126 version = "0.1.0" 12127 12127 dependencies = [ 12128 + "jacquard", 12129 + "markdown-weaver", 12130 + "markdown-weaver-escape", 12128 12131 "ropey", 12129 12132 "smol_str", 12133 + "syntect", 12130 12134 "thiserror 2.0.17", 12131 12135 "tracing", 12136 + "weaver-api", 12137 + "weaver-common", 12138 + "weaver-renderer", 12132 12139 "web-time", 12133 12140 ] 12134 12141
+7
crates/weaver-editor-core/Cargo.toml
··· 10 10 thiserror = { workspace = true } 11 11 tracing = { workspace = true } 12 12 web-time = "1.1" 13 + markdown-weaver = { workspace = true } 14 + markdown-weaver-escape = { workspace = true } 15 + weaver-common = { path = "../weaver-common" } 16 + weaver-renderer = { path = "../weaver-renderer" } 17 + weaver-api = { path = "../weaver-api" } 18 + jacquard = { workspace = true } 19 + syntect = { workspace = true } 13 20 14 21 [dev-dependencies]
+461
crates/weaver-editor-core/src/document.rs
··· 1 + //! Core editor document trait and implementations. 2 + //! 3 + //! Defines the `EditorDocument` trait for abstracting editor behavior, 4 + //! allowing different storage strategies (plain fields vs Signals) while 5 + //! sharing the core editing logic. 6 + 7 + use std::ops::Range; 8 + 9 + use smol_str::SmolStr; 10 + use web_time::Instant; 11 + 12 + use crate::text::TextBuffer; 13 + use crate::types::{BLOCK_SYNTAX_ZONE, CompositionState, CursorState, EditInfo, Selection}; 14 + use crate::undo::UndoManager; 15 + 16 + /// Core trait for editor documents. 17 + /// 18 + /// Defines the interface for any editor implementation. Different backends 19 + /// can implement this trait with different storage strategies: 20 + /// - `PlainEditor<T>`: Simple field-based storage 21 + /// - Reactive implementations: Use Signals/state management 22 + /// 23 + /// The trait is generic over the buffer type, which must implement both 24 + /// `TextBuffer` (for text operations) and `UndoManager` (for undo/redo). 25 + pub trait EditorDocument { 26 + /// The buffer type used for text storage and undo. 27 + type Buffer: TextBuffer + UndoManager; 28 + 29 + // === Required: Buffer access === 30 + 31 + /// Get a reference to the underlying buffer. 32 + fn buffer(&self) -> &Self::Buffer; 33 + 34 + /// Get a mutable reference to the underlying buffer. 35 + fn buffer_mut(&mut self) -> &mut Self::Buffer; 36 + 37 + // === Required: Cursor/selection state === 38 + 39 + /// Get the current cursor state. 40 + fn cursor(&self) -> CursorState; 41 + 42 + /// Set the cursor state. 43 + fn set_cursor(&mut self, cursor: CursorState); 44 + 45 + /// Get the current selection, if any. 46 + fn selection(&self) -> Option<Selection>; 47 + 48 + /// Set the selection. 49 + fn set_selection(&mut self, selection: Option<Selection>); 50 + 51 + // === Required: Edit tracking === 52 + 53 + /// Get the last edit info, if any. 54 + fn last_edit(&self) -> Option<EditInfo>; 55 + 56 + /// Set the last edit info. 57 + fn set_last_edit(&mut self, edit: Option<EditInfo>); 58 + 59 + // === Required: Composition (IME) state === 60 + 61 + /// Get the current composition state. 62 + fn composition(&self) -> Option<CompositionState>; 63 + 64 + /// Set the composition state. 65 + fn set_composition(&mut self, composition: Option<CompositionState>); 66 + 67 + // === Provided: Convenience accessors === 68 + 69 + /// Get the cursor offset. 70 + fn cursor_offset(&self) -> usize { 71 + self.cursor().offset 72 + } 73 + 74 + /// Set just the cursor offset, preserving other cursor state. 75 + fn set_cursor_offset(&mut self, offset: usize) { 76 + let mut cursor = self.cursor(); 77 + cursor.offset = offset; 78 + self.set_cursor(cursor); 79 + } 80 + 81 + /// Get the full content as a String. 82 + fn content_string(&self) -> String { 83 + self.buffer().to_string() 84 + } 85 + 86 + /// Get length in characters. 87 + fn len_chars(&self) -> usize { 88 + self.buffer().len_chars() 89 + } 90 + 91 + /// Get length in bytes. 92 + fn len_bytes(&self) -> usize { 93 + self.buffer().len_bytes() 94 + } 95 + 96 + /// Check if document is empty. 97 + fn is_empty(&self) -> bool { 98 + self.buffer().len_chars() == 0 99 + } 100 + 101 + /// Get a slice of the content. 102 + fn slice(&self, range: Range<usize>) -> Option<SmolStr> { 103 + self.buffer().slice(range) 104 + } 105 + 106 + /// Get character at offset. 107 + fn char_at(&self, offset: usize) -> Option<char> { 108 + self.buffer().char_at(offset) 109 + } 110 + 111 + /// Convert char offset to byte offset. 112 + fn char_to_byte(&self, char_offset: usize) -> usize { 113 + self.buffer().char_to_byte(char_offset) 114 + } 115 + 116 + /// Convert byte offset to char offset. 117 + fn byte_to_char(&self, byte_offset: usize) -> usize { 118 + self.buffer().byte_to_char(byte_offset) 119 + } 120 + 121 + /// Get selected text, if any. 122 + fn selected_text(&self) -> Option<SmolStr> { 123 + self.selection() 124 + .and_then(|sel| self.buffer().slice(sel.to_range())) 125 + } 126 + 127 + // === Provided: Text operations === 128 + 129 + /// Insert text at char offset, returning edit info. 130 + fn insert(&mut self, offset: usize, text: &str) -> EditInfo { 131 + let contains_newline = text.contains('\n'); 132 + let in_block_syntax_zone = self.is_in_block_syntax_zone(offset); 133 + 134 + self.buffer_mut().insert(offset, text); 135 + 136 + let inserted_len = text.chars().count(); 137 + self.set_cursor_offset(offset + inserted_len); 138 + 139 + let edit = EditInfo { 140 + edit_char_pos: offset, 141 + inserted_len, 142 + deleted_len: 0, 143 + contains_newline, 144 + in_block_syntax_zone, 145 + doc_len_after: self.buffer().len_chars(), 146 + timestamp: Instant::now(), 147 + }; 148 + 149 + self.set_last_edit(Some(edit.clone())); 150 + edit 151 + } 152 + 153 + /// Delete char range, returning edit info. 154 + fn delete(&mut self, range: Range<usize>) -> EditInfo { 155 + let deleted_text = self.buffer().slice(range.clone()); 156 + let contains_newline = deleted_text 157 + .as_ref() 158 + .map(|s| s.contains('\n')) 159 + .unwrap_or(false); 160 + let in_block_syntax_zone = self.is_in_block_syntax_zone(range.start); 161 + let deleted_len = range.end - range.start; 162 + 163 + self.buffer_mut().delete(range.clone()); 164 + self.set_cursor_offset(range.start); 165 + 166 + let edit = EditInfo { 167 + edit_char_pos: range.start, 168 + inserted_len: 0, 169 + deleted_len, 170 + contains_newline, 171 + in_block_syntax_zone, 172 + doc_len_after: self.buffer().len_chars(), 173 + timestamp: Instant::now(), 174 + }; 175 + 176 + self.set_last_edit(Some(edit.clone())); 177 + edit 178 + } 179 + 180 + /// Replace char range with text, returning edit info. 181 + fn replace(&mut self, range: Range<usize>, text: &str) -> EditInfo { 182 + let deleted_text = self.buffer().slice(range.clone()); 183 + let deleted_contains_newline = deleted_text 184 + .as_ref() 185 + .map(|s| s.contains('\n')) 186 + .unwrap_or(false); 187 + let contains_newline = text.contains('\n') || deleted_contains_newline; 188 + let in_block_syntax_zone = self.is_in_block_syntax_zone(range.start); 189 + let deleted_len = range.end - range.start; 190 + 191 + self.buffer_mut().delete(range.clone()); 192 + self.buffer_mut().insert(range.start, text); 193 + 194 + let inserted_len = text.chars().count(); 195 + self.set_cursor_offset(range.start + inserted_len); 196 + 197 + let edit = EditInfo { 198 + edit_char_pos: range.start, 199 + inserted_len, 200 + deleted_len, 201 + contains_newline, 202 + in_block_syntax_zone, 203 + doc_len_after: self.buffer().len_chars(), 204 + timestamp: Instant::now(), 205 + }; 206 + 207 + self.set_last_edit(Some(edit.clone())); 208 + edit 209 + } 210 + 211 + /// Delete the current selection, if any. 212 + fn delete_selection(&mut self) -> Option<EditInfo> { 213 + let sel = self.selection()?; 214 + self.set_selection(None); 215 + if sel.is_collapsed() { 216 + return None; 217 + } 218 + Some(self.delete(sel.to_range())) 219 + } 220 + 221 + // === Provided: Undo/Redo === 222 + 223 + fn undo(&mut self) -> bool { 224 + self.buffer_mut().undo() 225 + } 226 + 227 + fn redo(&mut self) -> bool { 228 + self.buffer_mut().redo() 229 + } 230 + 231 + fn can_undo(&self) -> bool { 232 + self.buffer().can_undo() 233 + } 234 + 235 + fn can_redo(&self) -> bool { 236 + self.buffer().can_redo() 237 + } 238 + 239 + fn clear_history(&mut self) { 240 + self.buffer_mut().clear_history(); 241 + } 242 + 243 + // === Provided: Helpers === 244 + 245 + /// Check if offset is in the block-syntax zone (first ~6 chars of line). 246 + fn is_in_block_syntax_zone(&self, offset: usize) -> bool { 247 + let mut line_start = offset; 248 + while line_start > 0 { 249 + if let Some('\n') = self.buffer().char_at(line_start - 1) { 250 + break; 251 + } 252 + line_start -= 1; 253 + } 254 + offset - line_start < BLOCK_SYNTAX_ZONE 255 + } 256 + } 257 + 258 + /// Simple field-based implementation of EditorDocument. 259 + /// 260 + /// Stores cursor, selection, and edit state as plain fields. 261 + /// Use this for non-reactive contexts or as a base for testing. 262 + #[derive(Clone)] 263 + pub struct PlainEditor<T: TextBuffer + UndoManager> { 264 + buffer: T, 265 + cursor: CursorState, 266 + selection: Option<Selection>, 267 + last_edit: Option<EditInfo>, 268 + composition: Option<CompositionState>, 269 + } 270 + 271 + impl<T: TextBuffer + UndoManager + Default> Default for PlainEditor<T> { 272 + fn default() -> Self { 273 + Self::new(T::default()) 274 + } 275 + } 276 + 277 + impl<T: TextBuffer + UndoManager> PlainEditor<T> { 278 + /// Create a new editor with the given buffer. 279 + pub fn new(buffer: T) -> Self { 280 + Self { 281 + buffer, 282 + cursor: CursorState::default(), 283 + selection: None, 284 + last_edit: None, 285 + composition: None, 286 + } 287 + } 288 + 289 + /// Get direct access to the inner buffer (bypasses trait). 290 + pub fn inner(&self) -> &T { 291 + &self.buffer 292 + } 293 + 294 + /// Get direct mutable access to the inner buffer (bypasses trait). 295 + pub fn inner_mut(&mut self) -> &mut T { 296 + &mut self.buffer 297 + } 298 + } 299 + 300 + impl<T: TextBuffer + UndoManager> EditorDocument for PlainEditor<T> { 301 + type Buffer = T; 302 + 303 + fn buffer(&self) -> &Self::Buffer { 304 + &self.buffer 305 + } 306 + 307 + fn buffer_mut(&mut self) -> &mut Self::Buffer { 308 + &mut self.buffer 309 + } 310 + 311 + fn cursor(&self) -> CursorState { 312 + self.cursor.clone() 313 + } 314 + 315 + fn set_cursor(&mut self, cursor: CursorState) { 316 + self.cursor = cursor; 317 + } 318 + 319 + fn selection(&self) -> Option<Selection> { 320 + self.selection.clone() 321 + } 322 + 323 + fn set_selection(&mut self, selection: Option<Selection>) { 324 + self.selection = selection; 325 + } 326 + 327 + fn last_edit(&self) -> Option<EditInfo> { 328 + self.last_edit.clone() 329 + } 330 + 331 + fn set_last_edit(&mut self, edit: Option<EditInfo>) { 332 + self.last_edit = edit; 333 + } 334 + 335 + fn composition(&self) -> Option<CompositionState> { 336 + self.composition.clone() 337 + } 338 + 339 + fn set_composition(&mut self, composition: Option<CompositionState>) { 340 + self.composition = composition; 341 + } 342 + } 343 + 344 + #[cfg(test)] 345 + mod tests { 346 + use super::*; 347 + use crate::{EditorRope, UndoableBuffer}; 348 + 349 + type TestEditor = PlainEditor<UndoableBuffer<EditorRope>>; 350 + 351 + fn make_editor(content: &str) -> TestEditor { 352 + let rope = EditorRope::from_str(content); 353 + let buf = UndoableBuffer::new(rope, 100); 354 + PlainEditor::new(buf) 355 + } 356 + 357 + #[test] 358 + fn test_basic_insert() { 359 + let mut editor = make_editor("hello"); 360 + assert_eq!(editor.content_string(), "hello"); 361 + 362 + let edit = editor.insert(5, " world"); 363 + assert_eq!(editor.content_string(), "hello world"); 364 + assert_eq!(edit.inserted_len, 6); 365 + assert_eq!(editor.cursor_offset(), 11); 366 + } 367 + 368 + #[test] 369 + fn test_delete() { 370 + let mut editor = make_editor("hello world"); 371 + 372 + let edit = editor.delete(5..11); 373 + assert_eq!(editor.content_string(), "hello"); 374 + assert_eq!(edit.deleted_len, 6); 375 + assert_eq!(editor.cursor_offset(), 5); 376 + } 377 + 378 + #[test] 379 + fn test_replace() { 380 + let mut editor = make_editor("hello world"); 381 + 382 + let edit = editor.replace(6..11, "rust"); 383 + assert_eq!(editor.content_string(), "hello rust"); 384 + assert_eq!(edit.deleted_len, 5); 385 + assert_eq!(edit.inserted_len, 4); 386 + } 387 + 388 + #[test] 389 + fn test_undo_redo() { 390 + let mut editor = make_editor("hello"); 391 + 392 + editor.insert(5, " world"); 393 + assert_eq!(editor.content_string(), "hello world"); 394 + 395 + assert!(editor.undo()); 396 + assert_eq!(editor.content_string(), "hello"); 397 + 398 + assert!(editor.redo()); 399 + assert_eq!(editor.content_string(), "hello world"); 400 + } 401 + 402 + #[test] 403 + fn test_selection() { 404 + let mut editor = make_editor("hello world"); 405 + 406 + editor.set_selection(Some(Selection::new(0, 5))); 407 + assert_eq!(editor.selected_text(), Some("hello".into())); 408 + 409 + let edit = editor.delete_selection(); 410 + assert!(edit.is_some()); 411 + assert_eq!(editor.content_string(), " world"); 412 + assert!(editor.selection().is_none()); 413 + } 414 + 415 + #[test] 416 + fn test_block_syntax_zone() { 417 + let mut editor = make_editor("# heading\nparagraph"); 418 + 419 + // Position 0 is in block syntax zone 420 + let edit = editor.insert(0, "x"); 421 + assert!(edit.in_block_syntax_zone); 422 + 423 + // Position after newline (start of "paragraph") is also in zone 424 + // Original was "# heading\nparagraph", after insert "x# heading\nparagraph" 425 + // Position 11 is start of "paragraph" line 426 + let edit = editor.insert(11, "y"); 427 + assert!(edit.in_block_syntax_zone); 428 + } 429 + 430 + #[test] 431 + fn test_composition_state() { 432 + let mut editor = make_editor("hello"); 433 + 434 + assert!(editor.composition().is_none()); 435 + 436 + let comp = CompositionState::new(5, "わ".into()); 437 + editor.set_composition(Some(comp.clone())); 438 + 439 + assert_eq!(editor.composition(), Some(comp)); 440 + 441 + editor.set_composition(None); 442 + assert!(editor.composition().is_none()); 443 + } 444 + 445 + #[test] 446 + fn test_offset_conversions() { 447 + let editor = make_editor("héllo wörld"); // multi-byte chars 448 + 449 + // 'é' is 2 bytes, 'ö' is 2 bytes 450 + // chars: h é l l o w ö r l d 451 + // idx: 0 1 2 3 4 5 6 7 8 9 10 452 + 453 + assert_eq!(editor.len_chars(), 11); 454 + assert!(editor.len_bytes() > 11); // multi-byte chars 455 + 456 + // char 1 ('é') starts at byte 1 457 + assert_eq!(editor.char_to_byte(1), 1); 458 + // char 2 ('l') starts after 'é' (2 bytes) 459 + assert_eq!(editor.char_to_byte(2), 3); 460 + } 461 + }
+13 -3
crates/weaver-editor-core/src/lib.rs
··· 3 3 //! This crate provides: 4 4 //! - `TextBuffer` trait for text storage abstraction 5 5 //! - `EditorRope` - ropey-backed implementation 6 - //! - `EditorDocument<T>` - generic document with undo support 7 - //! - Rendering, actions, formatting - all generic over TextBuffer 6 + //! - `UndoableBuffer<T>` - TextBuffer wrapper with undo/redo 7 + //! - `EditorDocument` trait - interface for editor implementations 8 + //! - `PlainEditor<T>` - simple field-based EditorDocument impl 9 + //! - Rendering types and offset mapping utilities 8 10 11 + pub mod document; 9 12 pub mod offset_map; 10 13 pub mod paragraph; 14 + pub mod render; 11 15 pub mod syntax; 12 16 pub mod text; 13 17 pub mod types; 18 + pub mod undo; 14 19 pub mod visibility; 20 + pub mod writer; 15 21 16 22 pub use offset_map::{ 17 23 OffsetMapping, RenderResult, SnapDirection, SnappedPosition, find_mapping_for_byte, ··· 22 28 pub use syntax::{SyntaxSpanInfo, SyntaxType, classify_syntax}; 23 29 pub use text::{EditorRope, TextBuffer}; 24 30 pub use types::{ 25 - Affinity, CompositionState, CursorState, EditInfo, Selection, BLOCK_SYNTAX_ZONE, 31 + Affinity, CompositionState, CursorState, EditInfo, EditorImage, Selection, BLOCK_SYNTAX_ZONE, 26 32 }; 33 + pub use document::{EditorDocument, PlainEditor}; 34 + pub use render::{EmbedContentProvider, ImageResolver, WikilinkValidator}; 35 + pub use undo::{UndoManager, UndoableBuffer}; 27 36 pub use visibility::VisibilityState; 37 + pub use writer::{EditorImageResolver, EditorWriter, SegmentedWriter, WriterResult};
+15 -13
crates/weaver-editor-core/src/offset_map.rs
··· 4 4 //! and content gets split across nodes (syntax highlighting). Offset maps 5 5 //! track how source byte positions map to DOM node positions. 6 6 7 + use smol_str::SmolStr; 7 8 use std::ops::Range; 8 9 9 10 /// Result of rendering markdown with offset tracking. ··· 46 47 47 48 /// DOM node ID containing this content 48 49 /// For invisible content, this is the nearest visible container 49 - pub node_id: String, 50 + pub node_id: SmolStr, 50 51 51 52 /// Position within the node 52 53 /// - If child_index is Some: cursor at that child index in the element ··· 284 285 #[cfg(test)] 285 286 mod tests { 286 287 use super::*; 288 + use smol_str::ToSmolStr; 287 289 288 290 #[test] 289 291 fn test_find_mapping_by_byte() { ··· 291 293 OffsetMapping { 292 294 byte_range: 0..2, 293 295 char_range: 0..2, 294 - node_id: "n0".to_string(), 296 + node_id: "n0".to_smolstr(), 295 297 char_offset_in_node: 0, 296 298 child_index: None, 297 299 utf16_len: 0, // invisible ··· 299 301 OffsetMapping { 300 302 byte_range: 2..5, 301 303 char_range: 2..5, 302 - node_id: "n0".to_string(), 304 + node_id: "n0".to_smolstr(), 303 305 char_offset_in_node: 0, 304 306 child_index: None, 305 307 utf16_len: 3, ··· 307 309 OffsetMapping { 308 310 byte_range: 5..7, 309 311 char_range: 5..7, 310 - node_id: "n0".to_string(), 312 + node_id: "n0".to_smolstr(), 311 313 char_offset_in_node: 3, 312 314 child_index: None, 313 315 utf16_len: 0, // invisible ··· 336 338 OffsetMapping { 337 339 byte_range: 0..2, 338 340 char_range: 0..2, 339 - node_id: "n0".to_string(), 341 + node_id: "n0".to_smolstr(), 340 342 char_offset_in_node: 0, 341 343 child_index: None, 342 344 utf16_len: 0, // invisible ··· 344 346 OffsetMapping { 345 347 byte_range: 2..5, 346 348 char_range: 2..5, 347 - node_id: "n0".to_string(), 349 + node_id: "n0".to_smolstr(), 348 350 char_offset_in_node: 0, 349 351 child_index: None, 350 352 utf16_len: 3, ··· 352 354 OffsetMapping { 353 355 byte_range: 5..7, 354 356 char_range: 5..7, 355 - node_id: "n0".to_string(), 357 + node_id: "n0".to_smolstr(), 356 358 char_offset_in_node: 3, 357 359 child_index: None, 358 360 utf16_len: 0, // invisible ··· 380 382 let mapping = OffsetMapping { 381 383 byte_range: 10..20, 382 384 char_range: 10..20, 383 - node_id: "test".to_string(), 385 + node_id: "test".to_smolstr(), 384 386 char_offset_in_node: 0, 385 387 child_index: None, 386 388 utf16_len: 5, ··· 398 400 let mapping = OffsetMapping { 399 401 byte_range: 10..20, 400 402 char_range: 8..15, // emoji example: fewer chars than bytes 401 - node_id: "test".to_string(), 403 + node_id: "test".to_smolstr(), 402 404 char_offset_in_node: 0, 403 405 child_index: None, 404 406 utf16_len: 5, ··· 416 418 OffsetMapping { 417 419 byte_range: 0..2, 418 420 char_range: 0..2, 419 - node_id: "n0".to_string(), 421 + node_id: "n0".to_smolstr(), 420 422 char_offset_in_node: 0, 421 423 child_index: None, 422 424 utf16_len: 0, // invisible: "![" ··· 424 426 OffsetMapping { 425 427 byte_range: 2..5, 426 428 char_range: 2..5, 427 - node_id: "n0".to_string(), 429 + node_id: "n0".to_smolstr(), 428 430 char_offset_in_node: 0, 429 431 child_index: None, 430 432 utf16_len: 3, // visible: "alt" ··· 432 434 OffsetMapping { 433 435 byte_range: 5..15, 434 436 char_range: 5..15, 435 - node_id: "n0".to_string(), 437 + node_id: "n0".to_smolstr(), 436 438 char_offset_in_node: 3, 437 439 child_index: None, 438 440 utf16_len: 0, // invisible: "](url.png)" ··· 440 442 OffsetMapping { 441 443 byte_range: 15..20, 442 444 char_range: 15..20, 443 - node_id: "n0".to_string(), 445 + node_id: "n0".to_smolstr(), 444 446 char_offset_in_node: 3, 445 447 child_index: None, 446 448 utf16_len: 5, // visible: " text"
+192
crates/weaver-editor-core/src/render.rs
··· 1 + //! Rendering traits for the editor. 2 + //! 3 + //! These traits abstract over external concerns during rendering: 4 + //! - Resolving embed URLs to HTML content 5 + //! - Resolving image URLs to CDN paths 6 + //! - Validating wikilinks 7 + //! 8 + //! Implementations are provided by the consuming application (e.g., weaver-app). 9 + 10 + /// Provides HTML content for embedded resources. 11 + /// 12 + /// When rendering markdown with embeds (e.g., `![[at://...]]`), this trait 13 + /// is consulted to get the pre-rendered HTML for the embed. 14 + pub trait EmbedContentProvider { 15 + /// Get HTML content for an embed URL. 16 + /// 17 + /// Returns `Some(html)` if the embed content is available, 18 + /// `None` to render a placeholder. 19 + fn get_embed_html(&self, url: &str) -> Option<&str>; 20 + } 21 + 22 + /// Unit type implementation - no embeds available. 23 + impl EmbedContentProvider for () { 24 + fn get_embed_html(&self, _url: &str) -> Option<&str> { 25 + None 26 + } 27 + } 28 + 29 + /// Resolves image URLs from markdown to actual paths. 30 + /// 31 + /// Markdown may reference images by name (e.g., `/image/photo.jpg`). 32 + /// This trait maps those to actual CDN URLs or data URLs. 33 + pub trait ImageResolver { 34 + /// Resolve an image URL from markdown to an actual URL. 35 + /// 36 + /// Returns `Some(resolved_url)` if the image is found, 37 + /// `None` to use the original URL unchanged. 38 + fn resolve_image_url(&self, url: &str) -> Option<String>; 39 + } 40 + 41 + /// Unit type implementation - no image resolution. 42 + impl ImageResolver for () { 43 + fn resolve_image_url(&self, _url: &str) -> Option<String> { 44 + None 45 + } 46 + } 47 + 48 + /// Validates wikilinks during rendering. 49 + /// 50 + /// Used to add CSS classes indicating whether a wikilink target exists. 51 + pub trait WikilinkValidator { 52 + /// Check if a wikilink target is valid (exists). 53 + fn is_valid_link(&self, target: &str) -> bool; 54 + } 55 + 56 + /// Unit type implementation - all links are valid. 57 + impl WikilinkValidator for () { 58 + fn is_valid_link(&self, _target: &str) -> bool { 59 + true 60 + } 61 + } 62 + 63 + /// Reference implementations for common patterns. 64 + 65 + impl<T: EmbedContentProvider> EmbedContentProvider for &T { 66 + fn get_embed_html(&self, url: &str) -> Option<&str> { 67 + (*self).get_embed_html(url) 68 + } 69 + } 70 + 71 + impl<T: ImageResolver> ImageResolver for &T { 72 + fn resolve_image_url(&self, url: &str) -> Option<String> { 73 + (*self).resolve_image_url(url) 74 + } 75 + } 76 + 77 + impl<T: WikilinkValidator> WikilinkValidator for &T { 78 + fn is_valid_link(&self, target: &str) -> bool { 79 + (*self).is_valid_link(target) 80 + } 81 + } 82 + 83 + impl<T: EmbedContentProvider> EmbedContentProvider for Option<T> { 84 + fn get_embed_html(&self, url: &str) -> Option<&str> { 85 + self.as_ref().and_then(|p| p.get_embed_html(url)) 86 + } 87 + } 88 + 89 + impl<T: ImageResolver> ImageResolver for Option<T> { 90 + fn resolve_image_url(&self, url: &str) -> Option<String> { 91 + self.as_ref().and_then(|r| r.resolve_image_url(url)) 92 + } 93 + } 94 + 95 + impl<T: WikilinkValidator> WikilinkValidator for Option<T> { 96 + fn is_valid_link(&self, target: &str) -> bool { 97 + self.as_ref().map(|v| v.is_valid_link(target)).unwrap_or(true) 98 + } 99 + } 100 + 101 + #[cfg(test)] 102 + mod tests { 103 + use super::*; 104 + 105 + struct TestEmbedProvider; 106 + 107 + impl EmbedContentProvider for TestEmbedProvider { 108 + fn get_embed_html(&self, url: &str) -> Option<&str> { 109 + if url == "at://test/embed" { 110 + Some("<div>Test Embed</div>") 111 + } else { 112 + None 113 + } 114 + } 115 + } 116 + 117 + struct TestImageResolver; 118 + 119 + impl ImageResolver for TestImageResolver { 120 + fn resolve_image_url(&self, url: &str) -> Option<String> { 121 + if url.starts_with("/image/") { 122 + Some(format!("https://cdn.example.com{}", url)) 123 + } else { 124 + None 125 + } 126 + } 127 + } 128 + 129 + struct TestWikilinkValidator { 130 + valid: Vec<String>, 131 + } 132 + 133 + impl WikilinkValidator for TestWikilinkValidator { 134 + fn is_valid_link(&self, target: &str) -> bool { 135 + self.valid.iter().any(|v| v == target) 136 + } 137 + } 138 + 139 + #[test] 140 + fn test_embed_provider() { 141 + let provider = TestEmbedProvider; 142 + assert_eq!( 143 + provider.get_embed_html("at://test/embed"), 144 + Some("<div>Test Embed</div>") 145 + ); 146 + assert_eq!(provider.get_embed_html("at://other"), None); 147 + } 148 + 149 + #[test] 150 + fn test_image_resolver() { 151 + let resolver = TestImageResolver; 152 + assert_eq!( 153 + resolver.resolve_image_url("/image/photo.jpg"), 154 + Some("https://cdn.example.com/image/photo.jpg".to_string()) 155 + ); 156 + assert_eq!(resolver.resolve_image_url("https://other.com/img.png"), None); 157 + } 158 + 159 + #[test] 160 + fn test_wikilink_validator() { 161 + let validator = TestWikilinkValidator { 162 + valid: vec!["Home".to_string(), "About".to_string()], 163 + }; 164 + assert!(validator.is_valid_link("Home")); 165 + assert!(validator.is_valid_link("About")); 166 + assert!(!validator.is_valid_link("Missing")); 167 + } 168 + 169 + #[test] 170 + fn test_unit_impls() { 171 + let embed: () = (); 172 + assert_eq!(embed.get_embed_html("anything"), None); 173 + 174 + let image: () = (); 175 + assert_eq!(image.resolve_image_url("anything"), None); 176 + 177 + let wiki: () = (); 178 + assert!(wiki.is_valid_link("anything")); // default true 179 + } 180 + 181 + #[test] 182 + fn test_option_impls() { 183 + let some_provider: Option<TestEmbedProvider> = Some(TestEmbedProvider); 184 + assert_eq!( 185 + some_provider.get_embed_html("at://test/embed"), 186 + Some("<div>Test Embed</div>") 187 + ); 188 + 189 + let none_provider: Option<TestEmbedProvider> = None; 190 + assert_eq!(none_provider.get_embed_html("at://test/embed"), None); 191 + } 192 + }
+13
crates/weaver-editor-core/src/types.rs
··· 3 3 //! These types are framework-agnostic and can be used with any text buffer implementation. 4 4 5 5 use std::ops::Range; 6 + 7 + use jacquard::types::string::AtUri; 8 + use weaver_api::sh_weaver::embed::images::Image; 6 9 use web_time::Instant; 10 + 11 + /// Image stored in the editor, with optional publish state tracking. 12 + #[derive(Clone, Debug)] 13 + pub struct EditorImage { 14 + /// The lexicon Image type (deserialized via from_json_value) 15 + pub image: Image<'static>, 16 + /// AT-URI of the PublishedBlob record (for cleanup on publish/delete). 17 + /// None for existing images that are already in an entry record. 18 + pub published_blob_uri: Option<AtUri<'static>>, 19 + } 7 20 8 21 /// Cursor state including position and affinity. 9 22 #[derive(Clone, Debug, Copy, PartialEq, Eq)]
+333
crates/weaver-editor-core/src/undo.rs
··· 1 + //! Undo/redo management for editor operations. 2 + //! 3 + //! Provides: 4 + //! - `UndoManager` trait for abstracting undo implementations 5 + //! - `UndoableBuffer<T>` - wraps a TextBuffer and provides undo/redo 6 + 7 + use std::ops::Range; 8 + 9 + use smol_str::{SmolStr, ToSmolStr}; 10 + 11 + use crate::text::TextBuffer; 12 + 13 + /// Trait for managing undo/redo operations. 14 + /// 15 + /// Implementations must actually perform the undo/redo, not just track state. 16 + /// For local editing, use `UndoableBuffer<T>` which wraps a TextBuffer. 17 + /// For Loro, wrap LoroText + loro::UndoManager together. 18 + pub trait UndoManager { 19 + /// Check if undo is available. 20 + fn can_undo(&self) -> bool; 21 + 22 + /// Check if redo is available. 23 + fn can_redo(&self) -> bool; 24 + 25 + /// Perform undo. Returns true if successful. 26 + fn undo(&mut self) -> bool; 27 + 28 + /// Perform redo. Returns true if successful. 29 + fn redo(&mut self) -> bool; 30 + 31 + /// Clear all undo/redo history. 32 + fn clear_history(&mut self); 33 + } 34 + 35 + /// A recorded edit operation for undo/redo. 36 + #[derive(Debug, Clone)] 37 + struct EditOperation { 38 + /// Character position where edit occurred 39 + pos: usize, 40 + /// Text that was deleted (empty for pure insertions) 41 + deleted: SmolStr, 42 + /// Text that was inserted (empty for pure deletions) 43 + inserted: SmolStr, 44 + } 45 + 46 + /// A TextBuffer wrapper that tracks edits and provides undo/redo. 47 + /// 48 + /// This is the standard way to get undo support for local editing. 49 + /// All mutations go through this wrapper, which records them for undo. 50 + #[derive(Clone)] 51 + pub struct UndoableBuffer<T: TextBuffer> { 52 + buffer: T, 53 + undo_stack: Vec<EditOperation>, 54 + redo_stack: Vec<EditOperation>, 55 + max_steps: usize, 56 + } 57 + 58 + impl<T: TextBuffer> Default for UndoableBuffer<T> { 59 + fn default() -> Self { 60 + Self::new(T::default(), 100) 61 + } 62 + } 63 + 64 + impl<T: TextBuffer> UndoableBuffer<T> { 65 + /// Create a new undoable buffer wrapping the given buffer. 66 + pub fn new(buffer: T, max_steps: usize) -> Self { 67 + Self { 68 + buffer, 69 + undo_stack: Vec::new(), 70 + redo_stack: Vec::new(), 71 + max_steps, 72 + } 73 + } 74 + 75 + /// Get a reference to the inner buffer. 76 + pub fn inner(&self) -> &T { 77 + &self.buffer 78 + } 79 + 80 + /// Get a mutable reference to the inner buffer. 81 + /// WARNING: Edits made directly bypass undo tracking! 82 + pub fn inner_mut(&mut self) -> &mut T { 83 + &mut self.buffer 84 + } 85 + 86 + /// Record an operation (called internally by TextBuffer impl). 87 + fn record_op(&mut self, pos: usize, deleted: &str, inserted: &str) { 88 + // Clear redo stack on new edit 89 + self.redo_stack.clear(); 90 + 91 + let op = EditOperation { 92 + pos, 93 + deleted: deleted.to_smolstr(), 94 + inserted: inserted.to_smolstr(), 95 + }; 96 + 97 + self.undo_stack.push(op); 98 + 99 + // Trim if over max 100 + while self.undo_stack.len() > self.max_steps { 101 + self.undo_stack.remove(0); 102 + } 103 + } 104 + } 105 + 106 + // Implement TextBuffer by delegating to inner buffer + recording operations 107 + impl<T: TextBuffer> TextBuffer for UndoableBuffer<T> { 108 + fn len_bytes(&self) -> usize { 109 + self.buffer.len_bytes() 110 + } 111 + 112 + fn len_chars(&self) -> usize { 113 + self.buffer.len_chars() 114 + } 115 + 116 + fn insert(&mut self, char_offset: usize, text: &str) { 117 + self.record_op(char_offset, "", text); 118 + self.buffer.insert(char_offset, text); 119 + } 120 + 121 + fn delete(&mut self, char_range: Range<usize>) { 122 + // Get the text being deleted for undo 123 + let deleted = self 124 + .buffer 125 + .slice(char_range.clone()) 126 + .map(|s| s.to_string()) 127 + .unwrap_or_default(); 128 + self.record_op(char_range.start, &deleted, ""); 129 + self.buffer.delete(char_range); 130 + } 131 + 132 + fn slice(&self, char_range: Range<usize>) -> Option<SmolStr> { 133 + self.buffer.slice(char_range) 134 + } 135 + 136 + fn char_at(&self, char_offset: usize) -> Option<char> { 137 + self.buffer.char_at(char_offset) 138 + } 139 + 140 + fn to_string(&self) -> String { 141 + self.buffer.to_string() 142 + } 143 + 144 + fn char_to_byte(&self, char_offset: usize) -> usize { 145 + self.buffer.char_to_byte(char_offset) 146 + } 147 + 148 + fn byte_to_char(&self, byte_offset: usize) -> usize { 149 + self.buffer.byte_to_char(byte_offset) 150 + } 151 + } 152 + 153 + impl<T: TextBuffer> UndoManager for UndoableBuffer<T> { 154 + fn can_undo(&self) -> bool { 155 + !self.undo_stack.is_empty() 156 + } 157 + 158 + fn can_redo(&self) -> bool { 159 + !self.redo_stack.is_empty() 160 + } 161 + 162 + fn undo(&mut self) -> bool { 163 + let Some(op) = self.undo_stack.pop() else { 164 + return false; 165 + }; 166 + 167 + // Apply inverse: delete what was inserted, insert what was deleted 168 + let inserted_chars = op.inserted.chars().count(); 169 + if inserted_chars > 0 { 170 + self.buffer.delete(op.pos..op.pos + inserted_chars); 171 + } 172 + if !op.deleted.is_empty() { 173 + self.buffer.insert(op.pos, &op.deleted); 174 + } 175 + 176 + self.redo_stack.push(op); 177 + true 178 + } 179 + 180 + fn redo(&mut self) -> bool { 181 + let Some(op) = self.redo_stack.pop() else { 182 + return false; 183 + }; 184 + 185 + // Re-apply original: delete what was deleted, insert what was inserted 186 + let deleted_chars = op.deleted.chars().count(); 187 + if deleted_chars > 0 { 188 + self.buffer.delete(op.pos..op.pos + deleted_chars); 189 + } 190 + if !op.inserted.is_empty() { 191 + self.buffer.insert(op.pos, &op.inserted); 192 + } 193 + 194 + self.undo_stack.push(op); 195 + true 196 + } 197 + 198 + fn clear_history(&mut self) { 199 + self.undo_stack.clear(); 200 + self.redo_stack.clear(); 201 + } 202 + } 203 + 204 + #[cfg(test)] 205 + mod tests { 206 + use super::*; 207 + use crate::EditorRope; 208 + 209 + #[test] 210 + fn test_undoable_buffer_insert_undo() { 211 + let rope = EditorRope::from_str("hello"); 212 + let mut buf = UndoableBuffer::new(rope, 100); 213 + 214 + assert_eq!(buf.to_string(), "hello"); 215 + assert!(!buf.can_undo()); 216 + 217 + // Insert " world" 218 + buf.insert(5, " world"); 219 + assert_eq!(buf.to_string(), "hello world"); 220 + assert!(buf.can_undo()); 221 + 222 + // Undo 223 + assert!(buf.undo()); 224 + assert_eq!(buf.to_string(), "hello"); 225 + assert!(!buf.can_undo()); 226 + assert!(buf.can_redo()); 227 + 228 + // Redo 229 + assert!(buf.redo()); 230 + assert_eq!(buf.to_string(), "hello world"); 231 + assert!(buf.can_undo()); 232 + assert!(!buf.can_redo()); 233 + } 234 + 235 + #[test] 236 + fn test_undoable_buffer_delete_undo() { 237 + let rope = EditorRope::from_str("hello world"); 238 + let mut buf = UndoableBuffer::new(rope, 100); 239 + 240 + // Delete " world" 241 + buf.delete(5..11); 242 + assert_eq!(buf.to_string(), "hello"); 243 + assert!(buf.can_undo()); 244 + 245 + // Undo 246 + assert!(buf.undo()); 247 + assert_eq!(buf.to_string(), "hello world"); 248 + } 249 + 250 + #[test] 251 + fn test_undoable_buffer_replace_undo() { 252 + let rope = EditorRope::from_str("hello world"); 253 + let mut buf = UndoableBuffer::new(rope, 100); 254 + 255 + // Replace "world" with "rust" 256 + buf.delete(6..11); 257 + buf.insert(6, "rust"); 258 + assert_eq!(buf.to_string(), "hello rust"); 259 + 260 + // Undo insert 261 + assert!(buf.undo()); 262 + assert_eq!(buf.to_string(), "hello "); 263 + 264 + // Undo delete 265 + assert!(buf.undo()); 266 + assert_eq!(buf.to_string(), "hello world"); 267 + } 268 + 269 + #[test] 270 + fn test_new_edit_clears_redo() { 271 + let rope = EditorRope::from_str("abc"); 272 + let mut buf = UndoableBuffer::new(rope, 100); 273 + 274 + buf.insert(3, "d"); 275 + assert!(buf.undo()); 276 + assert!(buf.can_redo()); 277 + 278 + // New edit should clear redo 279 + buf.insert(3, "e"); 280 + assert!(!buf.can_redo()); 281 + } 282 + 283 + #[test] 284 + fn test_max_steps() { 285 + let rope = EditorRope::from_str(""); 286 + let mut buf = UndoableBuffer::new(rope, 3); 287 + 288 + buf.insert(0, "a"); 289 + buf.insert(1, "b"); 290 + buf.insert(2, "c"); 291 + buf.insert(3, "d"); // should evict "a" 292 + 293 + assert_eq!(buf.to_string(), "abcd"); 294 + 295 + // Should only be able to undo 3 times 296 + assert!(buf.undo()); // removes d 297 + assert!(buf.undo()); // removes c 298 + assert!(buf.undo()); // removes b 299 + assert!(!buf.undo()); // a was evicted 300 + 301 + assert_eq!(buf.to_string(), "a"); 302 + } 303 + 304 + #[test] 305 + fn test_multiple_undo_redo_cycles() { 306 + let rope = EditorRope::from_str(""); 307 + let mut buf = UndoableBuffer::new(rope, 100); 308 + 309 + buf.insert(0, "a"); 310 + buf.insert(1, "b"); 311 + buf.insert(2, "c"); 312 + assert_eq!(buf.to_string(), "abc"); 313 + 314 + // Undo all 315 + assert!(buf.undo()); 316 + assert!(buf.undo()); 317 + assert!(buf.undo()); 318 + assert_eq!(buf.to_string(), ""); 319 + 320 + // Redo all 321 + assert!(buf.redo()); 322 + assert!(buf.redo()); 323 + assert!(buf.redo()); 324 + assert_eq!(buf.to_string(), "abc"); 325 + 326 + // Partial undo then new edit 327 + assert!(buf.undo()); // "ab" 328 + assert!(buf.undo()); // "a" 329 + buf.insert(1, "x"); 330 + assert_eq!(buf.to_string(), "ax"); 331 + assert!(!buf.can_redo()); // redo cleared 332 + } 333 + }
+321
crates/weaver-editor-core/src/writer/embed.rs
··· 1 + //! Embed rendering and image resolution for EditorWriter. 2 + 3 + use core::fmt; 4 + use std::collections::HashMap; 5 + use std::ops::Range; 6 + 7 + use jacquard::IntoStatic; 8 + use jacquard::types::{ident::AtIdentifier, string::Rkey}; 9 + use markdown_weaver::{CowStr, EmbedType, Event}; 10 + use markdown_weaver_escape::{StrWrite, escape_html}; 11 + 12 + use crate::render::{EmbedContentProvider, ImageResolver, WikilinkValidator}; 13 + use crate::syntax::{SyntaxSpanInfo, SyntaxType}; 14 + use crate::types::EditorImage; 15 + 16 + use super::EditorWriter; 17 + 18 + /// Resolved image path type. 19 + #[derive(Clone, Debug)] 20 + enum ResolvedImage { 21 + /// Data URL for immediate preview (still uploading) 22 + Pending(String), 23 + /// Draft image: `/image/{ident}/draft/{blob_rkey}/{name}` 24 + Draft { 25 + blob_rkey: Rkey<'static>, 26 + ident: AtIdentifier<'static>, 27 + }, 28 + /// Published image: `/image/{ident}/{entry_rkey}/{name}` 29 + Published { 30 + entry_rkey: Rkey<'static>, 31 + ident: AtIdentifier<'static>, 32 + }, 33 + } 34 + 35 + /// Resolves image paths in the editor. 36 + /// 37 + /// Supports three states for images: 38 + /// - Pending: uses data URL for immediate preview while upload is in progress 39 + /// - Draft: uses path format `/image/{did}/draft/{blob_rkey}/{name}` 40 + /// - Published: uses path format `/image/{did}/{entry_rkey}/{name}` 41 + /// 42 + /// Image URLs in markdown use the format `/image/{name}`. 43 + #[derive(Clone, Default)] 44 + pub struct EditorImageResolver { 45 + /// All resolved images: name -> resolved path info 46 + images: HashMap<String, ResolvedImage>, 47 + } 48 + 49 + impl EditorImageResolver { 50 + pub fn new() -> Self { 51 + Self::default() 52 + } 53 + 54 + /// Add a pending image with a data URL for immediate preview. 55 + pub fn add_pending(&mut self, name: String, data_url: String) { 56 + self.images.insert(name, ResolvedImage::Pending(data_url)); 57 + } 58 + 59 + /// Promote a pending image to uploaded (draft) status. 60 + pub fn promote_to_uploaded( 61 + &mut self, 62 + name: &str, 63 + blob_rkey: Rkey<'static>, 64 + ident: AtIdentifier<'static>, 65 + ) { 66 + self.images 67 + .insert(name.to_string(), ResolvedImage::Draft { blob_rkey, ident }); 68 + } 69 + 70 + /// Add an already-uploaded draft image. 71 + pub fn add_uploaded( 72 + &mut self, 73 + name: String, 74 + blob_rkey: Rkey<'static>, 75 + ident: AtIdentifier<'static>, 76 + ) { 77 + self.images 78 + .insert(name, ResolvedImage::Draft { blob_rkey, ident }); 79 + } 80 + 81 + /// Add a published image. 82 + pub fn add_published( 83 + &mut self, 84 + name: String, 85 + entry_rkey: Rkey<'static>, 86 + ident: AtIdentifier<'static>, 87 + ) { 88 + self.images 89 + .insert(name, ResolvedImage::Published { entry_rkey, ident }); 90 + } 91 + 92 + /// Check if an image is pending upload. 93 + pub fn is_pending(&self, name: &str) -> bool { 94 + matches!(self.images.get(name), Some(ResolvedImage::Pending(_))) 95 + } 96 + 97 + /// Build a resolver from editor images and user identifier. 98 + /// 99 + /// For draft mode (entry_rkey=None), only images with a `published_blob_uri` are included. 100 + /// For published mode (entry_rkey=Some), all images are included. 101 + pub fn from_images<'a>( 102 + images: impl IntoIterator<Item = &'a EditorImage>, 103 + ident: AtIdentifier<'static>, 104 + entry_rkey: Option<Rkey<'static>>, 105 + ) -> Self { 106 + let mut resolver = Self::new(); 107 + for editor_image in images { 108 + // Get the name from the Image (use alt text as fallback if name is empty) 109 + let name = editor_image 110 + .image 111 + .name 112 + .as_ref() 113 + .map(|n| n.to_string()) 114 + .unwrap_or_else(|| editor_image.image.alt.to_string()); 115 + 116 + if name.is_empty() { 117 + continue; 118 + } 119 + 120 + match &entry_rkey { 121 + // Published mode: use entry rkey for all images 122 + Some(rkey) => { 123 + resolver.add_published(name, rkey.clone(), ident.clone()); 124 + } 125 + // Draft mode: use published_blob_uri rkey 126 + None => { 127 + let blob_rkey = match &editor_image.published_blob_uri { 128 + Some(uri) => match uri.rkey() { 129 + Some(rkey) => rkey.0.clone().into_static(), 130 + None => continue, 131 + }, 132 + None => continue, 133 + }; 134 + resolver.add_uploaded(name, blob_rkey, ident.clone()); 135 + } 136 + } 137 + } 138 + resolver 139 + } 140 + } 141 + 142 + impl ImageResolver for EditorImageResolver { 143 + fn resolve_image_url(&self, url: &str) -> Option<String> { 144 + // Extract image name from /image/{name} format 145 + let name = url.strip_prefix("/image/").unwrap_or(url); 146 + 147 + let resolved = self.images.get(name)?; 148 + match resolved { 149 + ResolvedImage::Pending(data_url) => Some(data_url.clone()), 150 + ResolvedImage::Draft { blob_rkey, ident } => { 151 + Some(format!("/image/{}/draft/{}/{}", ident, blob_rkey, name)) 152 + } 153 + ResolvedImage::Published { entry_rkey, ident } => { 154 + Some(format!("/image/{}/{}/{}", ident, entry_rkey, name)) 155 + } 156 + } 157 + } 158 + } 159 + 160 + // write_embed implementation 161 + impl<'a, I, E, R, W> EditorWriter<'a, I, E, R, W> 162 + where 163 + I: Iterator<Item = (Event<'a>, Range<usize>)>, 164 + E: EmbedContentProvider, 165 + R: ImageResolver, 166 + W: WikilinkValidator, 167 + { 168 + pub(crate) fn write_embed( 169 + &mut self, 170 + range: Range<usize>, 171 + _embed_type: EmbedType, 172 + dest_url: CowStr<'_>, 173 + title: CowStr<'_>, 174 + _id: CowStr<'_>, 175 + attrs: Option<markdown_weaver::WeaverAttributes<'_>>, 176 + ) -> Result<(), fmt::Error> { 177 + // Embed rendering: all syntax elements share one syn_id for visibility toggling 178 + // Structure: ![[ url-as-link ]] <embed-content> 179 + let raw_text = &self.source[range.clone()]; 180 + let syn_id = self.gen_syn_id(); 181 + let opening_char_start = self.last_char_offset; 182 + 183 + // Extract the URL from raw text (between ![[ and ]]) 184 + let url_text = if raw_text.starts_with("![[") && raw_text.ends_with("]]") { 185 + &raw_text[3..raw_text.len() - 2] 186 + } else { 187 + dest_url.as_ref() 188 + }; 189 + 190 + // Calculate char positions 191 + let url_char_len = url_text.chars().count(); 192 + let opening_char_end = opening_char_start + 3; // "![[" 193 + let url_char_start = opening_char_end; 194 + let url_char_end = url_char_start + url_char_len; 195 + let closing_char_start = url_char_end; 196 + let closing_char_end = closing_char_start + 2; // "]]" 197 + let formatted_range = opening_char_start..closing_char_end; 198 + 199 + // 1. Emit opening ![[ syntax span 200 + if raw_text.starts_with("![[") { 201 + write!( 202 + &mut self.writer, 203 + "<span class=\"md-syntax-inline\" data-syn-id=\"{}\" data-char-start=\"{}\" data-char-end=\"{}\" spellcheck=\"false\">![[</span>", 204 + syn_id, opening_char_start, opening_char_end 205 + )?; 206 + 207 + self.current_para.syntax_spans.push(SyntaxSpanInfo { 208 + syn_id: syn_id.clone(), 209 + char_range: opening_char_start..opening_char_end, 210 + syntax_type: SyntaxType::Inline, 211 + formatted_range: Some(formatted_range.clone()), 212 + }); 213 + 214 + self.record_mapping( 215 + range.start..range.start + 3, 216 + opening_char_start..opening_char_end, 217 + ); 218 + } 219 + 220 + // 2. Emit URL as a clickable link (same syn_id, shown/hidden with syntax) 221 + let url = dest_url.as_ref(); 222 + let link_href = if url.starts_with("at://") { 223 + format!("https://alpha.weaver.sh/record/{}", url) 224 + } else { 225 + url.to_string() 226 + }; 227 + 228 + write!( 229 + &mut self.writer, 230 + "<a class=\"image-alt embed-url\" href=\"{}\" data-syn-id=\"{}\" data-char-start=\"{}\" data-char-end=\"{}\" target=\"_blank\">", 231 + link_href, syn_id, url_char_start, url_char_end 232 + )?; 233 + escape_html(&mut self.writer, url_text)?; 234 + self.write("</a>")?; 235 + 236 + self.current_para.syntax_spans.push(SyntaxSpanInfo { 237 + syn_id: syn_id.clone(), 238 + char_range: url_char_start..url_char_end, 239 + syntax_type: SyntaxType::Inline, 240 + formatted_range: Some(formatted_range.clone()), 241 + }); 242 + 243 + self.record_mapping(range.start + 3..range.end - 2, url_char_start..url_char_end); 244 + 245 + // 3. Emit closing ]] syntax span 246 + if raw_text.ends_with("]]") { 247 + write!( 248 + &mut self.writer, 249 + "<span class=\"md-syntax-inline\" data-syn-id=\"{}\" data-char-start=\"{}\" data-char-end=\"{}\" spellcheck=\"false\">]]</span>", 250 + syn_id, closing_char_start, closing_char_end 251 + )?; 252 + 253 + self.current_para.syntax_spans.push(SyntaxSpanInfo { 254 + syn_id: syn_id.clone(), 255 + char_range: closing_char_start..closing_char_end, 256 + syntax_type: SyntaxType::Inline, 257 + formatted_range: Some(formatted_range.clone()), 258 + }); 259 + 260 + self.record_mapping( 261 + range.end - 2..range.end, 262 + closing_char_start..closing_char_end, 263 + ); 264 + } 265 + 266 + // Collect AT URI for later resolution 267 + if url.starts_with("at://") || url.starts_with("did:") { 268 + self.ref_collector.add_at_embed( 269 + url, 270 + if title.is_empty() { 271 + None 272 + } else { 273 + Some(title.as_ref()) 274 + }, 275 + ); 276 + } 277 + 278 + // 4. Emit the actual embed content 279 + // Try to get content from attributes first 280 + let content_from_attrs = if let Some(ref attrs) = attrs { 281 + attrs 282 + .attrs 283 + .iter() 284 + .find(|(k, _)| k.as_ref() == "content") 285 + .map(|(_, v)| v.as_ref().to_string()) 286 + } else { 287 + None 288 + }; 289 + 290 + // If no content in attrs, try provider 291 + // Convert to owned to avoid borrow checker issues with self.write() 292 + // TODO: figure out a way to do this that doesn't involve cloning 293 + let content: Option<String> = if content_from_attrs.is_some() { 294 + content_from_attrs 295 + } else if let Some(ref provider) = self.embed_provider { 296 + provider.get_embed_html(url).map(|s| s.to_string()) 297 + } else { 298 + None 299 + }; 300 + 301 + if let Some(ref html_content) = content { 302 + // Write the pre-rendered content directly 303 + self.write(html_content)?; 304 + } else { 305 + // Fallback: render as placeholder div 306 + self.write("<div class=\"atproto-embed atproto-embed-placeholder\">")?; 307 + self.write("<span class=\"embed-loading\">Loading embed...</span>")?; 308 + self.write("</div>")?; 309 + } 310 + 311 + // Consume the text events for the URL (they're still in the iterator) 312 + // Use consume_until_end() since we already wrote the URL from source 313 + self.consume_until_end(); 314 + 315 + // Update offsets 316 + self.last_char_offset = closing_char_end; 317 + self.last_byte_offset = range.end; 318 + 319 + Ok(()) 320 + } 321 + }
+686
crates/weaver-editor-core/src/writer/events.rs
··· 1 + //! Event processing for EditorWriter - the main run loop and event dispatch. 2 + 3 + use core::fmt; 4 + use std::fmt::Write as _; 5 + use std::ops::Range; 6 + 7 + use markdown_weaver::{Event, TagEnd}; 8 + use markdown_weaver_escape::{escape_html, escape_html_body_text_with_char_count}; 9 + 10 + use crate::offset_map::OffsetMapping; 11 + use crate::render::{EmbedContentProvider, ImageResolver, WikilinkValidator}; 12 + use crate::syntax::{SyntaxSpanInfo, SyntaxType}; 13 + 14 + use super::{EditorWriter, WriterResult}; 15 + 16 + // Main run loop 17 + impl<'a, I, E, R, W> EditorWriter<'a, I, E, R, W> 18 + where 19 + I: Iterator<Item = (Event<'a>, Range<usize>)>, 20 + E: EmbedContentProvider, 21 + R: ImageResolver, 22 + W: WikilinkValidator, 23 + { 24 + /// Process markdown events and write HTML. 25 + /// 26 + /// Returns offset mappings and paragraph boundaries. The HTML is written 27 + /// to the writer passed in the constructor. 28 + pub fn run(mut self) -> Result<WriterResult, fmt::Error> { 29 + while let Some((event, range)) = self.events.next() { 30 + tracing::trace!( 31 + target: "weaver::writer", 32 + event = ?event, 33 + byte_range = ?range, 34 + "processing event" 35 + ); 36 + 37 + // For End events, emit any trailing content within the event's range 38 + // BEFORE calling end_tag (which calls end_node and clears current_node_id) 39 + // 40 + // EXCEPTION: For inline formatting tags (Strong, Emphasis, Strikethrough), 41 + // the closing syntax must be emitted AFTER the closing HTML tag, not before. 42 + // Otherwise the closing `**` span ends up INSIDE the <strong> element. 43 + // These tags handle their own closing syntax in end_tag(). 44 + // Image and Embed handle ALL their syntax in the Start event, so exclude them too. 45 + let is_self_handled_end = matches!( 46 + &event, 47 + Event::End( 48 + TagEnd::Strong 49 + | TagEnd::Emphasis 50 + | TagEnd::Strikethrough 51 + | TagEnd::Image 52 + | TagEnd::Embed 53 + ) 54 + ); 55 + 56 + if matches!(&event, Event::End(_)) && !is_self_handled_end { 57 + // Emit gap from last_byte_offset to range.end 58 + self.emit_gap_before(range.end)?; 59 + } else if !matches!(&event, Event::End(_)) { 60 + // For other events, emit any gap before range.start 61 + // (emit_syntax handles char offset tracking) 62 + self.emit_gap_before(range.start)?; 63 + } 64 + // For inline format End events, gap is emitted inside end_tag() AFTER the closing HTML 65 + 66 + // Store last_byte before processing 67 + let last_byte_before = self.last_byte_offset; 68 + 69 + // Process the event (passing range for tag syntax) 70 + self.process_event(event, range.clone())?; 71 + 72 + // Update tracking - but don't override if start_tag manually updated it 73 + // (for inline formatting tags that emit opening syntax) 74 + if self.last_byte_offset == last_byte_before { 75 + // Event didn't update offset, so we update it 76 + self.last_byte_offset = range.end; 77 + } 78 + // else: Event updated offset (e.g. start_tag emitted opening syntax), keep that value 79 + } 80 + 81 + // Emit any trailing syntax 82 + self.emit_gap_before(self.source.len())?; 83 + 84 + // Handle unmapped trailing content (stripped by parser) 85 + // This includes trailing spaces that markdown ignores 86 + let doc_byte_len = self.source.len(); 87 + let doc_char_len = self.source_len_chars; 88 + 89 + if self.last_byte_offset < doc_byte_len || self.last_char_offset < doc_char_len { 90 + // Emit the trailing content as visible syntax 91 + if self.last_byte_offset < doc_byte_len { 92 + let trailing = &self.source[self.last_byte_offset..]; 93 + if !trailing.is_empty() { 94 + let char_start = self.last_char_offset; 95 + let trailing_char_len = trailing.chars().count(); 96 + 97 + let char_end = char_start + trailing_char_len; 98 + let syn_id = self.gen_syn_id(); 99 + 100 + write!( 101 + &mut self.writer, 102 + "<span class=\"md-placeholder\" data-syn-id=\"{}\" data-char-start=\"{}\" data-char-end=\"{}\">", 103 + syn_id, char_start, char_end 104 + )?; 105 + escape_html(&mut self.writer, trailing)?; 106 + self.write("</span>")?; 107 + 108 + // Record mapping if we have a node 109 + if let Some(ref node_id) = self.current_node.id { 110 + let mapping = OffsetMapping { 111 + byte_range: self.last_byte_offset..doc_byte_len, 112 + char_range: char_start..char_end, 113 + node_id: node_id.clone(), 114 + char_offset_in_node: self.current_node.char_offset, 115 + child_index: None, 116 + utf16_len: trailing_char_len, // visible 117 + }; 118 + self.current_para.offset_maps.push(mapping); 119 + self.current_node.char_offset += trailing_char_len; 120 + } 121 + 122 + self.last_char_offset = char_start + trailing_char_len; 123 + } 124 + } 125 + } 126 + 127 + // Add any remaining accumulated data for the last paragraph 128 + // (content that wasn't followed by a paragraph boundary) 129 + if !self.current_para.offset_maps.is_empty() 130 + || !self.current_para.syntax_spans.is_empty() 131 + || !self.ref_collector.refs.is_empty() 132 + { 133 + self.offset_maps_by_para 134 + .push(std::mem::take(&mut self.current_para.offset_maps)); 135 + self.syntax_spans_by_para 136 + .push(std::mem::take(&mut self.current_para.syntax_spans)); 137 + self.refs_by_para 138 + .push(std::mem::take(&mut self.ref_collector.refs)); 139 + } 140 + 141 + // Get HTML segments from writer 142 + let html_segments = self.writer.into_segments(); 143 + 144 + Ok(WriterResult { 145 + html_segments, 146 + offset_maps_by_paragraph: self.offset_maps_by_para, 147 + paragraph_ranges: self.paragraphs.ranges, 148 + syntax_spans_by_paragraph: self.syntax_spans_by_para, 149 + collected_refs_by_paragraph: self.refs_by_para, 150 + }) 151 + } 152 + 153 + fn process_event(&mut self, event: Event<'_>, range: Range<usize>) -> Result<(), fmt::Error> { 154 + use Event::*; 155 + 156 + match event { 157 + Start(tag) => self.start_tag(tag, range)?, 158 + End(tag) => self.end_tag(tag, range)?, 159 + Text(text) => { 160 + // If buffering code, append to buffer instead of writing 161 + if let Some((_, ref mut content)) = self.code_block.buffer { 162 + content.push_str(&text); 163 + 164 + // Track byte and char ranges for code block content 165 + let text_char_len = text.chars().count(); 166 + let text_byte_len = text.len(); 167 + if let Some(ref mut code_byte_range) = self.code_block.byte_range { 168 + // Extend existing ranges 169 + code_byte_range.end = range.end; 170 + if let Some(ref mut code_char_range) = self.code_block.char_range { 171 + code_char_range.end = self.last_char_offset + text_char_len; 172 + } 173 + } else { 174 + // First text in code block - start tracking 175 + self.code_block.byte_range = Some(range.clone()); 176 + self.code_block.char_range = 177 + Some(self.last_char_offset..self.last_char_offset + text_char_len); 178 + } 179 + // Update offsets so paragraph boundary is correct 180 + self.last_char_offset += text_char_len; 181 + self.last_byte_offset += text_byte_len; 182 + } else if !self.in_non_writing_block { 183 + // Escape HTML and count chars in one pass 184 + let char_start = self.last_char_offset; 185 + let text_char_len = 186 + escape_html_body_text_with_char_count(&mut self.writer, &text)?; 187 + let char_end = char_start + text_char_len; 188 + 189 + // Text becomes a text node child of the current container 190 + if text_char_len > 0 { 191 + self.current_node.child_count += 1; 192 + } 193 + 194 + // Record offset mapping 195 + self.record_mapping(range.clone(), char_start..char_end); 196 + 197 + // Update char offset tracking 198 + self.last_char_offset = char_end; 199 + self.end_newline = text.ends_with('\n'); 200 + } 201 + } 202 + Code(text) => { 203 + let format_start = self.last_char_offset; 204 + let raw_text = &self.source[range.clone()]; 205 + 206 + // Track opening span index so we can set formatted_range later 207 + let opening_span_idx = if raw_text.starts_with('`') { 208 + let syn_id = self.gen_syn_id(); 209 + let char_start = self.last_char_offset; 210 + let backtick_char_end = char_start + 1; 211 + write!( 212 + &mut self.writer, 213 + "<span class=\"md-syntax-inline\" data-syn-id=\"{}\" data-char-start=\"{}\" data-char-end=\"{}\" spellcheck=\"false\">`</span>", 214 + syn_id, char_start, backtick_char_end 215 + )?; 216 + self.current_para.syntax_spans.push(SyntaxSpanInfo { 217 + syn_id, 218 + char_range: char_start..backtick_char_end, 219 + syntax_type: SyntaxType::Inline, 220 + formatted_range: None, // Set after we know the full range 221 + }); 222 + self.last_char_offset += 1; 223 + Some(self.current_para.syntax_spans.len() - 1) 224 + } else { 225 + None 226 + }; 227 + 228 + self.write("<code>")?; 229 + 230 + // Track offset mapping for code content 231 + let content_char_start = self.last_char_offset; 232 + let text_char_len = 233 + escape_html_body_text_with_char_count(&mut self.writer, &text)?; 234 + let content_char_end = content_char_start + text_char_len; 235 + 236 + // Record offset mapping (code content is visible) 237 + self.record_mapping(range.clone(), content_char_start..content_char_end); 238 + self.last_char_offset = content_char_end; 239 + 240 + self.write("</code>")?; 241 + 242 + // Emit closing backtick and track it 243 + if raw_text.ends_with('`') { 244 + let syn_id = self.gen_syn_id(); 245 + let backtick_char_start = self.last_char_offset; 246 + let backtick_char_end = backtick_char_start + 1; 247 + write!( 248 + &mut self.writer, 249 + "<span class=\"md-syntax-inline\" data-syn-id=\"{}\" data-char-start=\"{}\" data-char-end=\"{}\" spellcheck=\"false\">`</span>", 250 + syn_id, backtick_char_start, backtick_char_end 251 + )?; 252 + 253 + // Now we know the full formatted range 254 + let formatted_range = format_start..backtick_char_end; 255 + 256 + self.current_para.syntax_spans.push(SyntaxSpanInfo { 257 + syn_id, 258 + char_range: backtick_char_start..backtick_char_end, 259 + syntax_type: SyntaxType::Inline, 260 + formatted_range: Some(formatted_range.clone()), 261 + }); 262 + 263 + // Update opening span with formatted_range 264 + if let Some(idx) = opening_span_idx { 265 + self.current_para.syntax_spans[idx].formatted_range = 266 + Some(formatted_range); 267 + } 268 + 269 + self.last_char_offset += 1; 270 + } 271 + } 272 + InlineMath(text) => { 273 + self.process_inline_math(&text, range)?; 274 + } 275 + DisplayMath(text) => { 276 + self.process_display_math(&text, range)?; 277 + } 278 + Html(html) => { 279 + // Track offset mapping for raw HTML 280 + let char_start = self.last_char_offset; 281 + let html_char_len = html.chars().count(); 282 + let char_end = char_start + html_char_len; 283 + 284 + self.write(&html)?; 285 + 286 + // Record mapping for inline HTML 287 + self.record_mapping(range.clone(), char_start..char_end); 288 + self.last_char_offset = char_end; 289 + } 290 + InlineHtml(html) => { 291 + // Track offset mapping for raw HTML 292 + let char_start = self.last_char_offset; 293 + let html_char_len = html.chars().count(); 294 + let char_end = char_start + html_char_len; 295 + self.write(r#"<span class="html-embed html-embed-inline">"#)?; 296 + self.write(&html)?; 297 + self.write("</span>")?; 298 + // Record mapping for inline HTML 299 + self.record_mapping(range.clone(), char_start..char_end); 300 + self.last_char_offset = char_end; 301 + } 302 + SoftBreak => { 303 + // Emit <br> for visual line break, plus a space for cursor positioning. 304 + // This space maps to the \n so the cursor can land here when navigating. 305 + let char_start = self.last_char_offset; 306 + 307 + // Emit <br> 308 + self.write("<br />")?; 309 + self.current_node.child_count += 1; 310 + 311 + // Emit space for cursor positioning - this gives the browser somewhere 312 + // to place the cursor when navigating to this line 313 + self.write("\u{200B}")?; 314 + self.current_node.child_count += 1; 315 + 316 + // Map the space to the newline position - cursor landing here means 317 + // we're at the end of the line (after the \n) 318 + if let Some(ref node_id) = self.current_node.id { 319 + let mapping = OffsetMapping { 320 + byte_range: range.clone(), 321 + char_range: char_start..char_start + 1, 322 + node_id: node_id.clone(), 323 + char_offset_in_node: self.current_node.char_offset, 324 + child_index: None, 325 + utf16_len: 1, // the space we emitted 326 + }; 327 + self.current_para.offset_maps.push(mapping); 328 + self.current_node.char_offset += 1; 329 + } 330 + 331 + self.last_char_offset = char_start + 1; // +1 for the \n 332 + } 333 + HardBreak => { 334 + // Emit the two spaces as visible (dimmed) text, then <br> 335 + let gap = &self.source[range.clone()]; 336 + if gap.ends_with('\n') { 337 + let spaces = &gap[..gap.len() - 1]; // everything except the \n 338 + let char_start = self.last_char_offset; 339 + let spaces_char_len = spaces.chars().count(); 340 + let char_end = char_start + spaces_char_len; 341 + 342 + // Emit and map the visible spaces 343 + let syn_id = self.gen_syn_id(); 344 + write!( 345 + &mut self.writer, 346 + "<span class=\"md-placeholder\" data-syn-id=\"{}\" data-char-start=\"{}\" data-char-end=\"{}\">", 347 + syn_id, char_start, char_end 348 + )?; 349 + escape_html(&mut self.writer, spaces)?; 350 + self.write("</span>")?; 351 + 352 + // Count this span as a child 353 + self.current_node.child_count += 1; 354 + 355 + self.record_mapping( 356 + range.start..range.start + spaces.len(), 357 + char_start..char_end, 358 + ); 359 + 360 + // Now the actual line break <br> 361 + self.write("<br />")?; 362 + 363 + // Count the <br> as a child 364 + self.current_node.child_count += 1; 365 + 366 + // After <br>, emit plain zero-width space for cursor positioning 367 + self.write("\u{200B}")?; 368 + 369 + // Count the zero-width space text node as a child 370 + self.current_node.child_count += 1; 371 + 372 + // Map the newline position to the zero-width space text node 373 + if let Some(ref node_id) = self.current_node.id { 374 + let newline_char_offset = char_start + spaces_char_len; 375 + let mapping = OffsetMapping { 376 + byte_range: range.start + spaces.len()..range.end, 377 + char_range: newline_char_offset..newline_char_offset + 1, 378 + node_id: node_id.clone(), 379 + char_offset_in_node: self.current_node.char_offset, 380 + child_index: None, // text node - TreeWalker will find it 381 + utf16_len: 1, // zero-width space is 1 UTF-16 unit 382 + }; 383 + self.current_para.offset_maps.push(mapping); 384 + 385 + // Increment char offset - TreeWalker will encounter this text node 386 + self.current_node.char_offset += 1; 387 + } 388 + 389 + self.last_char_offset = char_start + spaces_char_len + 1; // +1 for \n 390 + } else { 391 + // Fallback: just <br> 392 + self.write("<br />")?; 393 + } 394 + } 395 + Rule => { 396 + if !self.end_newline { 397 + self.write("\n")?; 398 + } 399 + 400 + // Emit syntax span before the rendered element 401 + if range.start < range.end { 402 + let raw_text = &self.source[range]; 403 + let trimmed = raw_text.trim(); 404 + if !trimmed.is_empty() { 405 + let syn_id = self.gen_syn_id(); 406 + let char_start = self.last_char_offset; 407 + let char_len = trimmed.chars().count(); 408 + let char_end = char_start + char_len; 409 + 410 + write!( 411 + &mut self.writer, 412 + "<span class=\"md-syntax-block\" data-syn-id=\"{}\" data-char-start=\"{}\" data-char-end=\"{}\" spellcheck=\"false\">", 413 + syn_id, char_start, char_end 414 + )?; 415 + escape_html(&mut self.writer, trimmed)?; 416 + self.write("</span>")?; 417 + 418 + self.current_para.syntax_spans.push(SyntaxSpanInfo { 419 + syn_id, 420 + char_range: char_start..char_end, 421 + syntax_type: SyntaxType::Block, 422 + formatted_range: None, 423 + }); 424 + } 425 + } 426 + 427 + // Wrap <hr /> in toggle-block for future cursor-based toggling 428 + self.write("<div class=\"toggle-block\"><hr /></div>")?; 429 + } 430 + FootnoteReference(name) => { 431 + // Emit [^name] as styled (but NOT hidden) inline span 432 + let raw_text = &self.source[range.clone()]; 433 + let char_start = self.last_char_offset; 434 + let syntax_char_len = raw_text.chars().count(); 435 + let char_end = char_start + syntax_char_len; 436 + 437 + // Use footnote-ref class for styling, not md-syntax-inline (which hides) 438 + write!( 439 + &mut self.writer, 440 + "<span class=\"footnote-ref\" data-char-start=\"{}\" data-char-end=\"{}\" data-footnote=\"{}\">", 441 + char_start, char_end, name 442 + )?; 443 + escape_html(&mut self.writer, raw_text)?; 444 + self.write("</span>")?; 445 + 446 + // Record offset mapping 447 + self.record_mapping(range.clone(), char_start..char_end); 448 + 449 + // Count as child 450 + self.current_node.child_count += 1; 451 + 452 + // Update tracking 453 + self.last_char_offset = char_end; 454 + self.last_byte_offset = range.end; 455 + } 456 + TaskListMarker(checked) => { 457 + // Emit the [ ] or [x] syntax 458 + if range.start < range.end { 459 + let raw_text = &self.source[range]; 460 + if let Some(bracket_pos) = raw_text.find('[') { 461 + let end_pos = raw_text.find(']').map(|p| p + 1).unwrap_or(bracket_pos + 3); 462 + let syntax = &raw_text[bracket_pos..end_pos.min(raw_text.len())]; 463 + 464 + let syn_id = self.gen_syn_id(); 465 + let char_start = self.last_char_offset; 466 + let syntax_char_len = syntax.chars().count(); 467 + let char_end = char_start + syntax_char_len; 468 + 469 + write!( 470 + &mut self.writer, 471 + "<span class=\"md-syntax-inline\" data-syn-id=\"{}\" data-char-start=\"{}\" data-char-end=\"{}\" spellcheck=\"false\">", 472 + syn_id, char_start, char_end 473 + )?; 474 + escape_html(&mut self.writer, syntax)?; 475 + self.write("</span> ")?; 476 + 477 + self.current_para.syntax_spans.push(SyntaxSpanInfo { 478 + syn_id, 479 + char_range: char_start..char_end, 480 + syntax_type: SyntaxType::Inline, 481 + formatted_range: None, 482 + }); 483 + } 484 + } 485 + 486 + if checked { 487 + self.write("<input disabled=\"\" type=\"checkbox\" checked=\"\"/>")?; 488 + } else { 489 + self.write("<input disabled=\"\" type=\"checkbox\"/>")?; 490 + } 491 + } 492 + WeaverBlock(text) => { 493 + // Buffer WeaverBlock content for parsing on End 494 + self.weaver_block.buffer.push_str(&text); 495 + } 496 + } 497 + Ok(()) 498 + } 499 + 500 + /// Process inline math ($...$) 501 + fn process_inline_math(&mut self, text: &str, range: Range<usize>) -> Result<(), fmt::Error> { 502 + let raw_text = &self.source[range.clone()]; 503 + let syn_id = self.gen_syn_id(); 504 + let opening_char_start = self.last_char_offset; 505 + 506 + // Calculate char positions 507 + let text_char_len = text.chars().count(); 508 + let opening_char_end = opening_char_start + 1; // "$" 509 + let content_char_start = opening_char_end; 510 + let content_char_end = content_char_start + text_char_len; 511 + let closing_char_start = content_char_end; 512 + let closing_char_end = closing_char_start + 1; // "$" 513 + let formatted_range = opening_char_start..closing_char_end; 514 + 515 + // 1. Emit opening $ syntax span 516 + if raw_text.starts_with('$') { 517 + write!( 518 + &mut self.writer, 519 + "<span class=\"md-syntax-inline\" data-syn-id=\"{}\" data-char-start=\"{}\" data-char-end=\"{}\" spellcheck=\"false\">$</span>", 520 + syn_id, opening_char_start, opening_char_end 521 + )?; 522 + self.current_para.syntax_spans.push(SyntaxSpanInfo { 523 + syn_id: syn_id.clone(), 524 + char_range: opening_char_start..opening_char_end, 525 + syntax_type: SyntaxType::Inline, 526 + formatted_range: Some(formatted_range.clone()), 527 + }); 528 + self.record_mapping( 529 + range.start..range.start + 1, 530 + opening_char_start..opening_char_end, 531 + ); 532 + } 533 + 534 + // 2. Emit raw LaTeX content (hidden with syntax when cursor outside) 535 + write!( 536 + &mut self.writer, 537 + "<span class=\"math-source\" data-syn-id=\"{}\" data-char-start=\"{}\" data-char-end=\"{}\" spellcheck=\"false\">", 538 + syn_id, content_char_start, content_char_end 539 + )?; 540 + escape_html(&mut self.writer, text)?; 541 + self.write("</span>")?; 542 + self.current_para.syntax_spans.push(SyntaxSpanInfo { 543 + syn_id: syn_id.clone(), 544 + char_range: content_char_start..content_char_end, 545 + syntax_type: SyntaxType::Inline, 546 + formatted_range: Some(formatted_range.clone()), 547 + }); 548 + self.record_mapping( 549 + range.start + 1..range.end - 1, 550 + content_char_start..content_char_end, 551 + ); 552 + 553 + // 3. Emit closing $ syntax span 554 + if raw_text.ends_with('$') { 555 + write!( 556 + &mut self.writer, 557 + "<span class=\"md-syntax-inline\" data-syn-id=\"{}\" data-char-start=\"{}\" data-char-end=\"{}\" spellcheck=\"false\">$</span>", 558 + syn_id, closing_char_start, closing_char_end 559 + )?; 560 + self.current_para.syntax_spans.push(SyntaxSpanInfo { 561 + syn_id: syn_id.clone(), 562 + char_range: closing_char_start..closing_char_end, 563 + syntax_type: SyntaxType::Inline, 564 + formatted_range: Some(formatted_range.clone()), 565 + }); 566 + self.record_mapping( 567 + range.end - 1..range.end, 568 + closing_char_start..closing_char_end, 569 + ); 570 + } 571 + 572 + // 4. Emit rendered MathML (always visible, not tied to syn_id) 573 + // Include data-char-target so clicking moves cursor into the math region 574 + // contenteditable="false" so DOM walker skips this for offset counting 575 + match weaver_renderer::math::render_math(text, false) { 576 + weaver_renderer::math::MathResult::Success(mathml) => { 577 + write!( 578 + &mut self.writer, 579 + "<span class=\"math math-inline math-rendered math-clickable\" contenteditable=\"false\" data-char-target=\"{}\">{}</span>", 580 + content_char_start, mathml 581 + )?; 582 + } 583 + weaver_renderer::math::MathResult::Error { html, .. } => { 584 + // Show error indicator (also always visible) 585 + self.write(&html)?; 586 + } 587 + } 588 + 589 + self.last_char_offset = closing_char_end; 590 + Ok(()) 591 + } 592 + 593 + /// Process display math ($$...$$) 594 + fn process_display_math(&mut self, text: &str, range: Range<usize>) -> Result<(), fmt::Error> { 595 + let raw_text = &self.source[range.clone()]; 596 + let syn_id = self.gen_syn_id(); 597 + let opening_char_start = self.last_char_offset; 598 + 599 + // Calculate char positions 600 + let text_char_len = text.chars().count(); 601 + let opening_char_end = opening_char_start + 2; // "$$" 602 + let content_char_start = opening_char_end; 603 + let content_char_end = content_char_start + text_char_len; 604 + let closing_char_start = content_char_end; 605 + let closing_char_end = closing_char_start + 2; // "$$" 606 + let formatted_range = opening_char_start..closing_char_end; 607 + 608 + // 1. Emit opening $$ syntax span 609 + // Use Block syntax type so visibility is based on "cursor in same paragraph" 610 + if raw_text.starts_with("$$") { 611 + write!( 612 + &mut self.writer, 613 + "<span class=\"md-syntax-block\" data-syn-id=\"{}\" data-char-start=\"{}\" data-char-end=\"{}\" spellcheck=\"false\">$$</span>", 614 + syn_id, opening_char_start, opening_char_end 615 + )?; 616 + self.current_para.syntax_spans.push(SyntaxSpanInfo { 617 + syn_id: syn_id.clone(), 618 + char_range: opening_char_start..opening_char_end, 619 + syntax_type: SyntaxType::Block, 620 + formatted_range: Some(formatted_range.clone()), 621 + }); 622 + self.record_mapping( 623 + range.start..range.start + 2, 624 + opening_char_start..opening_char_end, 625 + ); 626 + } 627 + 628 + // 2. Emit raw LaTeX content (hidden with syntax when cursor outside) 629 + write!( 630 + &mut self.writer, 631 + "<span class=\"math-source\" data-syn-id=\"{}\" data-char-start=\"{}\" data-char-end=\"{}\" spellcheck=\"false\">", 632 + syn_id, content_char_start, content_char_end 633 + )?; 634 + escape_html(&mut self.writer, text)?; 635 + self.write("</span>")?; 636 + self.current_para.syntax_spans.push(SyntaxSpanInfo { 637 + syn_id: syn_id.clone(), 638 + char_range: content_char_start..content_char_end, 639 + syntax_type: SyntaxType::Block, 640 + formatted_range: Some(formatted_range.clone()), 641 + }); 642 + self.record_mapping( 643 + range.start + 2..range.end - 2, 644 + content_char_start..content_char_end, 645 + ); 646 + 647 + // 3. Emit closing $$ syntax span 648 + if raw_text.ends_with("$$") { 649 + write!( 650 + &mut self.writer, 651 + "<span class=\"md-syntax-block\" data-syn-id=\"{}\" data-char-start=\"{}\" data-char-end=\"{}\" spellcheck=\"false\">$$</span>", 652 + syn_id, closing_char_start, closing_char_end 653 + )?; 654 + self.current_para.syntax_spans.push(SyntaxSpanInfo { 655 + syn_id: syn_id.clone(), 656 + char_range: closing_char_start..closing_char_end, 657 + syntax_type: SyntaxType::Block, 658 + formatted_range: Some(formatted_range.clone()), 659 + }); 660 + self.record_mapping( 661 + range.end - 2..range.end, 662 + closing_char_start..closing_char_end, 663 + ); 664 + } 665 + 666 + // 4. Emit rendered MathML (always visible, not tied to syn_id) 667 + // Include data-char-target so clicking moves cursor into the math region 668 + // contenteditable="false" so DOM walker skips this for offset counting 669 + match weaver_renderer::math::render_math(text, true) { 670 + weaver_renderer::math::MathResult::Success(mathml) => { 671 + write!( 672 + &mut self.writer, 673 + "<span class=\"math math-display math-rendered math-clickable\" contenteditable=\"false\" data-char-target=\"{}\">{}</span>", 674 + content_char_start, mathml 675 + )?; 676 + } 677 + weaver_renderer::math::MathResult::Error { html, .. } => { 678 + // Show error indicator (also always visible) 679 + self.write(&html)?; 680 + } 681 + } 682 + 683 + self.last_char_offset = closing_char_end; 684 + Ok(()) 685 + } 686 + }
+459
crates/weaver-editor-core/src/writer/mod.rs
··· 1 + //! EditorWriter - HTML generation for markdown with visible formatting. 2 + //! 3 + //! Refactored to use grouped state structs for clarity. 4 + //! Generic over TextBuffer - works with ropey (local) or can be adapted for Loro (collab). 5 + 6 + mod embed; 7 + mod events; 8 + mod state; 9 + mod syntax; 10 + mod tags; 11 + 12 + pub use embed::EditorImageResolver; 13 + pub use state::*; 14 + 15 + use std::collections::HashMap; 16 + use std::fmt::{self, Write as FmtWrite}; 17 + use std::ops::Range; 18 + 19 + use markdown_weaver::Event; 20 + use smol_str::SmolStr; 21 + 22 + use crate::offset_map::OffsetMapping; 23 + use crate::render::{EmbedContentProvider, ImageResolver, WikilinkValidator}; 24 + use crate::syntax::SyntaxSpanInfo; 25 + 26 + /// Result of rendering with EditorWriter. 27 + #[derive(Debug, Clone, Default)] 28 + pub struct WriterResult { 29 + /// HTML segments, one per paragraph 30 + pub html_segments: Vec<String>, 31 + /// Offset mappings per paragraph 32 + pub offset_maps_by_paragraph: Vec<Vec<OffsetMapping>>, 33 + /// Paragraph boundaries: (byte_range, char_range) 34 + pub paragraph_ranges: Vec<(Range<usize>, Range<usize>)>, 35 + /// Syntax spans per paragraph 36 + pub syntax_spans_by_paragraph: Vec<Vec<SyntaxSpanInfo>>, 37 + /// Collected refs per paragraph 38 + pub collected_refs_by_paragraph: Vec<Vec<weaver_common::ExtractedRef>>, 39 + } 40 + 41 + /// Segmented HTML output writer. 42 + #[derive(Debug, Clone, Default)] 43 + pub struct SegmentedWriter { 44 + segments: Vec<String>, 45 + current: String, 46 + } 47 + 48 + impl SegmentedWriter { 49 + pub fn new() -> Self { 50 + Self::default() 51 + } 52 + 53 + pub fn write_str(&mut self, s: &str) -> fmt::Result { 54 + self.current.push_str(s); 55 + Ok(()) 56 + } 57 + 58 + pub fn new_segment(&mut self) { 59 + if !self.current.is_empty() { 60 + self.segments.push(std::mem::take(&mut self.current)); 61 + } 62 + } 63 + 64 + pub fn into_segments(mut self) -> Vec<String> { 65 + self.new_segment(); 66 + self.segments 67 + } 68 + 69 + pub fn current_len(&self) -> usize { 70 + self.current.len() 71 + } 72 + } 73 + 74 + impl FmtWrite for SegmentedWriter { 75 + fn write_str(&mut self, s: &str) -> fmt::Result { 76 + self.current.push_str(s); 77 + Ok(()) 78 + } 79 + } 80 + 81 + impl markdown_weaver_escape::StrWrite for SegmentedWriter { 82 + type Error = fmt::Error; 83 + 84 + fn write_str(&mut self, s: &str) -> fmt::Result { 85 + self.current.push_str(s); 86 + Ok(()) 87 + } 88 + 89 + fn write_fmt(&mut self, args: fmt::Arguments<'_>) -> fmt::Result { 90 + std::fmt::Write::write_fmt(&mut self.current, args) 91 + } 92 + } 93 + 94 + /// HTML writer that preserves markdown formatting characters. 95 + /// 96 + /// Generic over: 97 + /// - `I`: Iterator of markdown events with byte ranges 98 + /// - `E`: Embed content provider (optional) 99 + /// - `R`: Image resolver (optional) 100 + /// - `W`: Wikilink validator (optional) 101 + pub struct EditorWriter<'a, I, E = (), R = (), W = ()> 102 + where 103 + I: Iterator<Item = (Event<'a>, Range<usize>)>, 104 + { 105 + // === Input === 106 + source: &'a str, 107 + source_len_chars: usize, 108 + events: I, 109 + 110 + // === Output === 111 + writer: SegmentedWriter, 112 + 113 + // === Position tracking === 114 + last_byte_offset: usize, 115 + last_char_offset: usize, 116 + 117 + // === Rendering flags === 118 + end_newline: bool, 119 + in_non_writing_block: bool, 120 + 121 + // === Grouped state === 122 + pub(crate) table: TableContext, 123 + pub(crate) code_block: CodeBlockContext, 124 + pub(crate) node_ids: NodeIdGenerator, 125 + pub(crate) current_node: CurrentNodeState, 126 + pub(crate) paragraphs: ParagraphTracker, 127 + pub(crate) current_para: ParagraphBuildState, 128 + pub(crate) weaver_block: WeaverBlockContext, 129 + pub(crate) footnotes: FootnoteContext, 130 + pub(crate) utf16: Utf16Tracker, 131 + 132 + // === Per-paragraph results === 133 + offset_maps_by_para: Vec<Vec<OffsetMapping>>, 134 + syntax_spans_by_para: Vec<Vec<SyntaxSpanInfo>>, 135 + refs_by_para: Vec<Vec<weaver_common::ExtractedRef>>, 136 + 137 + // === External resolvers === 138 + embed_provider: Option<E>, 139 + image_resolver: Option<R>, 140 + wikilink_validator: Option<W>, 141 + entry_index: Option<&'a weaver_common::EntryIndex>, 142 + 143 + // === Misc === 144 + numbers: HashMap<String, usize>, 145 + pending_blockquote_range: Option<Range<usize>>, 146 + ref_collector: weaver_common::RefCollector, 147 + } 148 + 149 + impl<'a, I, E, R, W> EditorWriter<'a, I, E, R, W> 150 + where 151 + I: Iterator<Item = (Event<'a>, Range<usize>)>, 152 + { 153 + /// Create a new EditorWriter. 154 + /// 155 + /// `source` is the markdown source text. 156 + /// `source_len_chars` is the length in Unicode chars (for bounds checking). 157 + /// `events` is the markdown parser event iterator. 158 + pub fn new(source: &'a str, source_len_chars: usize, events: I) -> Self { 159 + Self { 160 + source, 161 + source_len_chars, 162 + events, 163 + writer: SegmentedWriter::new(), 164 + last_byte_offset: 0, 165 + last_char_offset: 0, 166 + end_newline: true, 167 + in_non_writing_block: false, 168 + table: TableContext::default(), 169 + code_block: CodeBlockContext::default(), 170 + node_ids: NodeIdGenerator::default(), 171 + current_node: CurrentNodeState::default(), 172 + paragraphs: ParagraphTracker::default(), 173 + current_para: ParagraphBuildState::default(), 174 + weaver_block: WeaverBlockContext::default(), 175 + footnotes: FootnoteContext::default(), 176 + utf16: Utf16Tracker::new(), 177 + offset_maps_by_para: Vec::new(), 178 + syntax_spans_by_para: Vec::new(), 179 + refs_by_para: Vec::new(), 180 + embed_provider: None, 181 + image_resolver: None, 182 + wikilink_validator: None, 183 + entry_index: None, 184 + numbers: HashMap::new(), 185 + pending_blockquote_range: None, 186 + ref_collector: weaver_common::RefCollector::new(), 187 + } 188 + } 189 + 190 + /// Set a static node ID prefix for all paragraphs. 191 + pub fn with_node_id_prefix(mut self, prefix: &str) -> Self { 192 + self.node_ids.prefix = Some(SmolStr::new(prefix)); 193 + self.node_ids.next_node_id = 0; 194 + self 195 + } 196 + 197 + /// Use auto-incrementing paragraph prefixes starting from `base`. 198 + pub fn with_auto_incrementing_prefix(mut self, base: usize) -> Self { 199 + use smol_str::format_smolstr; 200 + self.node_ids.auto_increment_base = Some(base); 201 + self.node_ids.prefix = Some(format_smolstr!("p-{}", base)); 202 + self.node_ids.next_node_id = 0; 203 + self 204 + } 205 + 206 + /// Override prefix for a specific paragraph index. 207 + pub fn with_static_prefix_at_index(mut self, index: usize, prefix: &str) -> Self { 208 + self.node_ids.static_override = Some((index, SmolStr::new(prefix))); 209 + if index == 0 { 210 + self.node_ids.prefix = Some(SmolStr::new(prefix)); 211 + self.node_ids.next_node_id = 0; 212 + } 213 + self 214 + } 215 + 216 + /// Set initial offsets (for rendering a subset of the document). 217 + pub fn with_offsets( 218 + mut self, 219 + byte_offset: usize, 220 + char_offset: usize, 221 + node_id_offset: usize, 222 + syn_id_offset: usize, 223 + ) -> Self { 224 + self.last_byte_offset = byte_offset; 225 + self.last_char_offset = char_offset; 226 + self.node_ids.next_node_id = node_id_offset; 227 + self.node_ids.next_syn_id = syn_id_offset; 228 + self 229 + } 230 + 231 + /// Set embed content provider. 232 + pub fn with_embed_provider<E2: EmbedContentProvider>( 233 + self, 234 + provider: E2, 235 + ) -> EditorWriter<'a, I, E2, R, W> { 236 + EditorWriter { 237 + source: self.source, 238 + source_len_chars: self.source_len_chars, 239 + events: self.events, 240 + writer: self.writer, 241 + last_byte_offset: self.last_byte_offset, 242 + last_char_offset: self.last_char_offset, 243 + end_newline: self.end_newline, 244 + in_non_writing_block: self.in_non_writing_block, 245 + table: self.table, 246 + code_block: self.code_block, 247 + node_ids: self.node_ids, 248 + current_node: self.current_node, 249 + paragraphs: self.paragraphs, 250 + current_para: self.current_para, 251 + weaver_block: self.weaver_block, 252 + footnotes: self.footnotes, 253 + utf16: self.utf16, 254 + offset_maps_by_para: self.offset_maps_by_para, 255 + syntax_spans_by_para: self.syntax_spans_by_para, 256 + refs_by_para: self.refs_by_para, 257 + embed_provider: Some(provider), 258 + image_resolver: self.image_resolver, 259 + wikilink_validator: self.wikilink_validator, 260 + entry_index: self.entry_index, 261 + numbers: self.numbers, 262 + pending_blockquote_range: self.pending_blockquote_range, 263 + ref_collector: self.ref_collector, 264 + } 265 + } 266 + 267 + /// Set image resolver. 268 + pub fn with_image_resolver<R2: ImageResolver>( 269 + self, 270 + resolver: R2, 271 + ) -> EditorWriter<'a, I, E, R2, W> { 272 + EditorWriter { 273 + source: self.source, 274 + source_len_chars: self.source_len_chars, 275 + events: self.events, 276 + writer: self.writer, 277 + last_byte_offset: self.last_byte_offset, 278 + last_char_offset: self.last_char_offset, 279 + end_newline: self.end_newline, 280 + in_non_writing_block: self.in_non_writing_block, 281 + table: self.table, 282 + code_block: self.code_block, 283 + node_ids: self.node_ids, 284 + current_node: self.current_node, 285 + paragraphs: self.paragraphs, 286 + current_para: self.current_para, 287 + weaver_block: self.weaver_block, 288 + footnotes: self.footnotes, 289 + utf16: self.utf16, 290 + offset_maps_by_para: self.offset_maps_by_para, 291 + syntax_spans_by_para: self.syntax_spans_by_para, 292 + refs_by_para: self.refs_by_para, 293 + embed_provider: self.embed_provider, 294 + image_resolver: Some(resolver), 295 + wikilink_validator: self.wikilink_validator, 296 + entry_index: self.entry_index, 297 + numbers: self.numbers, 298 + pending_blockquote_range: self.pending_blockquote_range, 299 + ref_collector: self.ref_collector, 300 + } 301 + } 302 + 303 + /// Set wikilink validator. 304 + pub fn with_wikilink_validator<W2: WikilinkValidator>( 305 + self, 306 + validator: W2, 307 + ) -> EditorWriter<'a, I, E, R, W2> { 308 + EditorWriter { 309 + source: self.source, 310 + source_len_chars: self.source_len_chars, 311 + events: self.events, 312 + writer: self.writer, 313 + last_byte_offset: self.last_byte_offset, 314 + last_char_offset: self.last_char_offset, 315 + end_newline: self.end_newline, 316 + in_non_writing_block: self.in_non_writing_block, 317 + table: self.table, 318 + code_block: self.code_block, 319 + node_ids: self.node_ids, 320 + current_node: self.current_node, 321 + paragraphs: self.paragraphs, 322 + current_para: self.current_para, 323 + weaver_block: self.weaver_block, 324 + footnotes: self.footnotes, 325 + utf16: self.utf16, 326 + offset_maps_by_para: self.offset_maps_by_para, 327 + syntax_spans_by_para: self.syntax_spans_by_para, 328 + refs_by_para: self.refs_by_para, 329 + embed_provider: self.embed_provider, 330 + image_resolver: self.image_resolver, 331 + wikilink_validator: Some(validator), 332 + entry_index: self.entry_index, 333 + numbers: self.numbers, 334 + pending_blockquote_range: self.pending_blockquote_range, 335 + ref_collector: self.ref_collector, 336 + } 337 + } 338 + 339 + /// Set entry index for wikilink resolution. 340 + pub fn with_entry_index(mut self, index: &'a weaver_common::EntryIndex) -> Self { 341 + self.entry_index = Some(index); 342 + self 343 + } 344 + } 345 + 346 + // Core helper methods 347 + impl<'a, I, E, R, W> EditorWriter<'a, I, E, R, W> 348 + where 349 + I: Iterator<Item = (Event<'a>, Range<usize>)>, 350 + { 351 + /// Write a string to the output. 352 + #[inline] 353 + pub(crate) fn write(&mut self, s: &str) -> fmt::Result { 354 + if !s.is_empty() { 355 + self.end_newline = s.ends_with('\n'); 356 + } 357 + self.writer.write_str(s) 358 + } 359 + 360 + /// Write a newline. 361 + #[inline] 362 + pub(crate) fn write_newline(&mut self) -> fmt::Result { 363 + self.end_newline = true; 364 + self.writer.write_str("\n") 365 + } 366 + 367 + /// Generate a unique node ID. 368 + pub(crate) fn gen_node_id(&mut self) -> SmolStr { 369 + self.node_ids.next_node() 370 + } 371 + 372 + /// Generate a unique syntax span ID. 373 + pub(crate) fn gen_syn_id(&mut self) -> SmolStr { 374 + self.node_ids.next_syn() 375 + } 376 + 377 + /// Start tracking a new text container node. 378 + pub(crate) fn begin_node(&mut self, node_id: SmolStr) { 379 + self.current_node.begin(node_id); 380 + } 381 + 382 + /// Stop tracking current node. 383 + pub(crate) fn end_node(&mut self) { 384 + self.current_node.end(); 385 + } 386 + 387 + /// Compute UTF-16 length for a text slice (fast path for ASCII). 388 + #[inline] 389 + pub(crate) fn utf16_len_for_slice(text: &str) -> usize { 390 + let byte_len = text.len(); 391 + let char_len = text.chars().count(); 392 + 393 + if byte_len == char_len { 394 + char_len 395 + } else { 396 + text.encode_utf16().count() 397 + } 398 + } 399 + 400 + /// Record an offset mapping. 401 + pub(crate) fn record_mapping(&mut self, byte_range: Range<usize>, char_range: Range<usize>) { 402 + if let Some(ref node_id) = self.current_node.id { 403 + let text_slice = &self.source[byte_range.clone()]; 404 + let utf16_len = Self::utf16_len_for_slice(text_slice); 405 + 406 + // Record UTF-16 checkpoint 407 + let last = self.utf16.last(); 408 + let new_utf16 = last.1 + utf16_len; 409 + if char_range.end > last.0 { 410 + self.utf16.checkpoint(char_range.end, new_utf16); 411 + } 412 + 413 + let mapping = OffsetMapping { 414 + byte_range, 415 + char_range: char_range.clone(), 416 + node_id: node_id.clone(), 417 + char_offset_in_node: self.current_node.char_offset, 418 + child_index: None, 419 + utf16_len, 420 + }; 421 + self.current_para.offset_maps.push(mapping); 422 + self.current_node.char_offset += utf16_len; 423 + } 424 + } 425 + 426 + /// Finalize the current paragraph. 427 + pub(crate) fn finalize_paragraph( 428 + &mut self, 429 + byte_range: Range<usize>, 430 + char_range: Range<usize>, 431 + ) { 432 + self.paragraphs.ranges.push((byte_range, char_range)); 433 + 434 + let (maps, spans, refs) = self.current_para.take_all(); 435 + self.offset_maps_by_para.push(maps); 436 + self.syntax_spans_by_para.push(spans); 437 + self.refs_by_para.push(refs); 438 + 439 + self.node_ids.next_paragraph(); 440 + self.writer.new_segment(); 441 + } 442 + 443 + /// Consume events until End tag without writing. 444 + pub(crate) fn consume_until_end(&mut self) { 445 + let mut nest = 0; 446 + while let Some((event, _)) = self.events.next() { 447 + match event { 448 + Event::Start(_) => nest += 1, 449 + Event::End(_) => { 450 + if nest == 0 { 451 + break; 452 + } 453 + nest -= 1; 454 + } 455 + _ => {} 456 + } 457 + } 458 + } 459 + }
+358
crates/weaver-editor-core/src/writer/state.rs
··· 1 + //! State structures for EditorWriter, grouped by concern. 2 + 3 + use std::collections::HashMap; 4 + use std::ops::Range; 5 + 6 + use markdown_weaver::Alignment; 7 + use smol_str::{SmolStr, ToSmolStr, format_smolstr}; 8 + 9 + use crate::offset_map::OffsetMapping; 10 + use crate::syntax::{SyntaxSpanInfo, SyntaxType}; 11 + 12 + /// Table rendering state. 13 + #[derive(Debug, Clone, Default)] 14 + pub struct TableContext { 15 + pub state: TableState, 16 + pub alignments: Vec<Alignment>, 17 + pub cell_index: usize, 18 + pub render_as_markdown: bool, 19 + pub start_offset: Option<usize>, 20 + } 21 + 22 + #[derive(Debug, Clone, Copy, Default, PartialEq, Eq)] 23 + pub enum TableState { 24 + #[default] 25 + Head, 26 + Body, 27 + } 28 + 29 + /// Code block buffering state. 30 + #[derive(Debug, Clone, Default)] 31 + pub struct CodeBlockContext { 32 + /// (language, content) being buffered 33 + pub buffer: Option<(Option<SmolStr>, String)>, 34 + /// Byte range of buffered content 35 + pub byte_range: Option<Range<usize>>, 36 + /// Char range of buffered content 37 + pub char_range: Option<Range<usize>>, 38 + /// Char offset where code block started 39 + pub block_start: Option<usize>, 40 + /// Index of opening fence syntax span 41 + pub opening_span_idx: Option<usize>, 42 + } 43 + 44 + impl CodeBlockContext { 45 + pub fn is_active(&self) -> bool { 46 + self.buffer.is_some() 47 + } 48 + 49 + pub fn clear(&mut self) { 50 + *self = Self::default(); 51 + } 52 + } 53 + 54 + /// Node ID generation for DOM element IDs. 55 + #[derive(Debug, Clone)] 56 + pub struct NodeIdGenerator { 57 + /// Paragraph ID prefix (e.g., "p-0") 58 + pub prefix: Option<SmolStr>, 59 + /// Auto-increment base for paragraph prefixes 60 + pub auto_increment_base: Option<usize>, 61 + /// Override for specific paragraph index 62 + pub static_override: Option<(usize, SmolStr)>, 63 + /// Current paragraph index (0-indexed) 64 + pub current_paragraph: usize, 65 + /// Next node ID counter within paragraph 66 + pub next_node_id: usize, 67 + /// Next syntax span ID counter 68 + pub next_syn_id: usize, 69 + } 70 + 71 + impl Default for NodeIdGenerator { 72 + fn default() -> Self { 73 + Self { 74 + prefix: None, 75 + auto_increment_base: None, 76 + static_override: None, 77 + current_paragraph: 0, 78 + next_node_id: 0, 79 + next_syn_id: 0, 80 + } 81 + } 82 + } 83 + 84 + impl NodeIdGenerator { 85 + /// Get the current paragraph prefix. 86 + pub fn current_prefix(&self) -> SmolStr { 87 + if let Some((idx, ref prefix)) = self.static_override { 88 + if idx == self.current_paragraph { 89 + return prefix.clone(); 90 + } 91 + } 92 + if let Some(base) = self.auto_increment_base { 93 + return format_smolstr!("p-{}", base + self.current_paragraph); 94 + } 95 + self.prefix.clone().unwrap_or_else(|| "p-0".to_smolstr()) 96 + } 97 + 98 + /// Generate a node ID (e.g., "p-0-n3") 99 + pub fn next_node(&mut self) -> SmolStr { 100 + let id = if let Some(ref prefix) = self.prefix { 101 + format_smolstr!("{}-n{}", prefix, self.next_node_id) 102 + } else { 103 + format_smolstr!("n{}", self.next_node_id) 104 + }; 105 + self.next_node_id += 1; 106 + SmolStr::new(id) 107 + } 108 + 109 + /// Generate a syntax span ID (e.g., "s5") 110 + pub fn next_syn(&mut self) -> SmolStr { 111 + let id = format_smolstr!("s{}", self.next_syn_id); 112 + self.next_syn_id += 1; 113 + SmolStr::new(id) 114 + } 115 + 116 + /// Advance to next paragraph. 117 + pub fn next_paragraph(&mut self) { 118 + self.current_paragraph += 1; 119 + self.next_node_id = 0; 120 + 121 + // Update prefix for next paragraph 122 + if let Some((override_idx, ref override_prefix)) = self.static_override { 123 + if self.current_paragraph == override_idx { 124 + self.prefix = Some(override_prefix.clone()); 125 + } else if let Some(base) = self.auto_increment_base { 126 + self.prefix = Some(format_smolstr!("p-{}", base + self.current_paragraph)); 127 + } 128 + } else if let Some(base) = self.auto_increment_base { 129 + self.prefix = Some(format_smolstr!("p-{}", base + self.current_paragraph)); 130 + } 131 + } 132 + } 133 + 134 + /// Current DOM node tracking for offset mapping. 135 + #[derive(Debug, Clone, Default)] 136 + pub struct CurrentNodeState { 137 + /// Node ID for current text container 138 + pub id: Option<SmolStr>, 139 + /// UTF-16 offset within current node 140 + pub char_offset: usize, 141 + /// Number of child elements in current container 142 + pub child_count: usize, 143 + } 144 + 145 + impl CurrentNodeState { 146 + pub fn begin(&mut self, id: SmolStr) { 147 + self.id = Some(id); 148 + self.char_offset = 0; 149 + self.child_count = 0; 150 + } 151 + 152 + pub fn end(&mut self) { 153 + self.id = None; 154 + self.char_offset = 0; 155 + self.child_count = 0; 156 + } 157 + } 158 + 159 + /// Paragraph boundary tracking. 160 + #[derive(Debug, Clone, Default)] 161 + pub struct ParagraphTracker { 162 + /// Completed paragraph ranges: (byte_range, char_range) 163 + pub ranges: Vec<(Range<usize>, Range<usize>)>, 164 + /// Start of current paragraph: (byte_offset, char_offset) 165 + pub current_start: Option<(usize, usize)>, 166 + /// List nesting depth (suppress paragraph boundaries inside lists) 167 + pub list_depth: usize, 168 + /// In footnote definition (suppress inner paragraph boundaries) 169 + pub in_footnote_def: bool, 170 + } 171 + 172 + impl ParagraphTracker { 173 + pub fn start_paragraph(&mut self, byte_offset: usize, char_offset: usize) { 174 + self.current_start = Some((byte_offset, char_offset)); 175 + } 176 + 177 + pub fn end_paragraph( 178 + &mut self, 179 + byte_offset: usize, 180 + char_offset: usize, 181 + ) -> Option<(Range<usize>, Range<usize>)> { 182 + if let Some((start_byte, start_char)) = self.current_start.take() { 183 + let ranges = (start_byte..byte_offset, start_char..char_offset); 184 + self.ranges.push(ranges.clone()); 185 + Some(ranges) 186 + } else { 187 + None 188 + } 189 + } 190 + 191 + pub fn in_list(&self) -> bool { 192 + self.list_depth > 0 193 + } 194 + 195 + pub fn should_track_boundaries(&self) -> bool { 196 + self.list_depth == 0 && !self.in_footnote_def 197 + } 198 + } 199 + 200 + /// Current paragraph build state (offset maps, syntax spans, refs). 201 + #[derive(Debug, Clone, Default)] 202 + pub struct ParagraphBuildState { 203 + /// Offset mappings for current paragraph 204 + pub offset_maps: Vec<OffsetMapping>, 205 + /// Syntax spans for current paragraph 206 + pub syntax_spans: Vec<SyntaxSpanInfo>, 207 + /// Collected refs for current paragraph 208 + pub collected_refs: Vec<weaver_common::ExtractedRef>, 209 + /// Stack of pending inline formats: (syn_id, char_start) 210 + pub pending_inline_formats: Vec<(SmolStr, usize)>, 211 + } 212 + 213 + impl ParagraphBuildState { 214 + pub fn take_all( 215 + &mut self, 216 + ) -> ( 217 + Vec<OffsetMapping>, 218 + Vec<SyntaxSpanInfo>, 219 + Vec<weaver_common::ExtractedRef>, 220 + ) { 221 + ( 222 + std::mem::take(&mut self.offset_maps), 223 + std::mem::take(&mut self.syntax_spans), 224 + std::mem::take(&mut self.collected_refs), 225 + ) 226 + } 227 + 228 + /// Finalize a paired inline format (Strong, Emphasis, Strikethrough). 229 + pub fn finalize_paired_format(&mut self, last_char_offset: usize) { 230 + if let Some((opening_syn_id, format_start)) = self.pending_inline_formats.pop() { 231 + let formatted_range = format_start..last_char_offset; 232 + 233 + // Update opening span 234 + if let Some(span) = self 235 + .syntax_spans 236 + .iter_mut() 237 + .find(|s| s.syn_id == opening_syn_id) 238 + { 239 + span.formatted_range = Some(formatted_range.clone()); 240 + } 241 + 242 + // Update closing span (most recent) 243 + if let Some(closing) = self.syntax_spans.last_mut() { 244 + if closing.syntax_type == SyntaxType::Inline { 245 + closing.formatted_range = Some(formatted_range); 246 + } 247 + } 248 + } 249 + } 250 + } 251 + 252 + /// WeaverBlock prefix system state. 253 + #[derive(Debug, Clone, Default)] 254 + pub struct WeaverBlockContext { 255 + /// Pending attrs to apply to next block element 256 + pub pending_attrs: Option<markdown_weaver::WeaverAttributes<'static>>, 257 + /// Type of wrapper element currently open 258 + pub active_wrapper: Option<WrapperElement>, 259 + /// Buffer for WeaverBlock text content 260 + pub buffer: String, 261 + /// Start char offset of current WeaverBlock 262 + pub char_start: Option<usize>, 263 + } 264 + 265 + #[derive(Debug, Clone, Copy, PartialEq, Eq)] 266 + pub enum WrapperElement { 267 + Aside, 268 + Div, 269 + } 270 + 271 + /// Footnote reference/definition linking state. 272 + #[derive(Debug, Clone, Default)] 273 + pub struct FootnoteContext { 274 + /// Maps footnote name -> (syntax_span_index, char_start) 275 + pub ref_spans: HashMap<String, (usize, usize)>, 276 + /// Current footnote def being processed: (name, span_idx, char_start) 277 + pub current_def: Option<(String, usize, usize)>, 278 + } 279 + 280 + /// UTF-16 offset checkpoints for incremental tracking. 281 + #[derive(Debug, Clone, Default)] 282 + pub struct Utf16Tracker { 283 + /// Checkpoints: (char_offset, utf16_offset) 284 + pub checkpoints: Vec<(usize, usize)>, 285 + } 286 + 287 + impl Utf16Tracker { 288 + pub fn new() -> Self { 289 + Self { 290 + checkpoints: vec![(0, 0)], 291 + } 292 + } 293 + 294 + /// Add a checkpoint. 295 + pub fn checkpoint(&mut self, char_offset: usize, utf16_offset: usize) { 296 + if self.checkpoints.last().map(|(c, _)| *c) != Some(char_offset) { 297 + self.checkpoints.push((char_offset, utf16_offset)); 298 + } 299 + } 300 + 301 + /// Get the last checkpoint. 302 + pub fn last(&self) -> (usize, usize) { 303 + self.checkpoints.last().copied().unwrap_or((0, 0)) 304 + } 305 + } 306 + 307 + #[cfg(test)] 308 + mod tests { 309 + use super::*; 310 + 311 + #[test] 312 + fn test_node_id_generator() { 313 + let mut generator = NodeIdGenerator::default(); 314 + generator.prefix = Some("p-0".to_smolstr()); 315 + 316 + assert_eq!(generator.next_node().as_str(), "p-0-n0"); 317 + assert_eq!(generator.next_node().as_str(), "p-0-n1"); 318 + assert_eq!(generator.next_syn().as_str(), "s0"); 319 + assert_eq!(generator.next_syn().as_str(), "s1"); 320 + } 321 + 322 + #[test] 323 + fn test_node_id_generator_auto_increment() { 324 + let mut generator = NodeIdGenerator::default(); 325 + generator.auto_increment_base = Some(0); 326 + generator.prefix = Some("p-0".to_smolstr()); 327 + 328 + assert_eq!(generator.next_node().as_str(), "p-0-n0"); 329 + generator.next_paragraph(); 330 + assert_eq!(generator.prefix, Some("p-1".to_smolstr())); 331 + assert_eq!(generator.next_node().as_str(), "p-1-n0"); 332 + } 333 + 334 + #[test] 335 + fn test_paragraph_tracker() { 336 + let mut tracker = ParagraphTracker::default(); 337 + 338 + tracker.start_paragraph(0, 0); 339 + let ranges = tracker.end_paragraph(10, 10); 340 + assert_eq!(ranges, Some((0..10, 0..10))); 341 + 342 + tracker.list_depth = 1; 343 + assert!(tracker.in_list()); 344 + assert!(!tracker.should_track_boundaries()); 345 + } 346 + 347 + #[test] 348 + fn test_code_block_context() { 349 + let mut ctx = CodeBlockContext::default(); 350 + assert!(!ctx.is_active()); 351 + 352 + ctx.buffer = Some((Some("rust".to_smolstr()), String::new())); 353 + assert!(ctx.is_active()); 354 + 355 + ctx.clear(); 356 + assert!(!ctx.is_active()); 357 + } 358 + }
+196
crates/weaver-editor-core/src/writer/syntax.rs
··· 1 + //! Syntax span emission methods for EditorWriter. 2 + 3 + use core::fmt; 4 + use std::ops::Range; 5 + 6 + use markdown_weaver::Event; 7 + use markdown_weaver_escape::{StrWrite, escape_html}; 8 + 9 + use crate::render::{EmbedContentProvider, ImageResolver, WikilinkValidator}; 10 + use crate::syntax::{SyntaxSpanInfo, SyntaxType, classify_syntax}; 11 + 12 + use super::EditorWriter; 13 + 14 + impl<'a, I, E, R, W> EditorWriter<'a, I, E, R, W> 15 + where 16 + I: Iterator<Item = (Event<'a>, Range<usize>)>, 17 + E: EmbedContentProvider, 18 + R: ImageResolver, 19 + W: WikilinkValidator, 20 + { 21 + /// Emit syntax span for a given range and record offset mapping. 22 + pub(crate) fn emit_syntax(&mut self, range: Range<usize>) -> Result<(), fmt::Error> { 23 + if range.start < range.end { 24 + let syntax = &self.source[range.clone()]; 25 + if !syntax.is_empty() { 26 + let char_start = self.last_char_offset; 27 + let syntax_char_len = syntax.chars().count(); 28 + let char_end = char_start + syntax_char_len; 29 + 30 + tracing::trace!( 31 + target: "weaver::writer", 32 + byte_range = ?range, 33 + char_range = ?(char_start..char_end), 34 + syntax = %syntax.escape_debug(), 35 + "emit_syntax" 36 + ); 37 + 38 + // Whitespace-only content (trailing spaces, newlines) should be emitted 39 + // as plain text, not wrapped in a hideable syntax span 40 + let is_whitespace_only = syntax.trim().is_empty(); 41 + 42 + if is_whitespace_only { 43 + // Emit as plain text with tracking span (not hideable) 44 + let created_node = if self.current_node.id.is_none() { 45 + let node_id = self.gen_node_id(); 46 + write!(&mut self.writer, "<span id=\"{}\">", node_id)?; 47 + self.begin_node(node_id); 48 + true 49 + } else { 50 + false 51 + }; 52 + 53 + escape_html(&mut self.writer, syntax)?; 54 + 55 + // Record offset mapping BEFORE end_node (which clears current_node.id) 56 + self.record_mapping(range.clone(), char_start..char_end); 57 + self.last_char_offset = char_end; 58 + self.last_byte_offset = range.end; 59 + 60 + if created_node { 61 + self.write("</span>")?; 62 + self.end_node(); 63 + } 64 + } else { 65 + // Real syntax - wrap in hideable span 66 + let syntax_type = classify_syntax(syntax); 67 + let class = match syntax_type { 68 + SyntaxType::Inline => "md-syntax-inline", 69 + SyntaxType::Block => "md-syntax-block", 70 + }; 71 + 72 + // Generate unique ID for this syntax span 73 + let syn_id = self.gen_syn_id(); 74 + 75 + // If we're outside any node, create a wrapper span for tracking 76 + let created_node = if self.current_node.id.is_none() { 77 + let node_id = self.gen_node_id(); 78 + write!( 79 + &mut self.writer, 80 + "<span id=\"{}\" class=\"{}\" data-syn-id=\"{}\" data-char-start=\"{}\" data-char-end=\"{}\">", 81 + node_id, class, syn_id, char_start, char_end 82 + )?; 83 + self.begin_node(node_id); 84 + true 85 + } else { 86 + write!( 87 + &mut self.writer, 88 + "<span class=\"{}\" data-syn-id=\"{}\" data-char-start=\"{}\" data-char-end=\"{}\">", 89 + class, syn_id, char_start, char_end 90 + )?; 91 + false 92 + }; 93 + 94 + escape_html(&mut self.writer, syntax)?; 95 + self.write("</span>")?; 96 + 97 + // Record syntax span info for visibility toggling 98 + self.current_para.syntax_spans.push(SyntaxSpanInfo { 99 + syn_id, 100 + char_range: char_start..char_end, 101 + syntax_type, 102 + formatted_range: None, 103 + }); 104 + 105 + // Record offset mapping for this syntax 106 + self.record_mapping(range.clone(), char_start..char_end); 107 + self.last_char_offset = char_end; 108 + self.last_byte_offset = range.end; 109 + 110 + // Close wrapper if we created one 111 + if created_node { 112 + self.write("</span>")?; 113 + self.end_node(); 114 + } 115 + } 116 + } 117 + } 118 + Ok(()) 119 + } 120 + 121 + /// Emit syntax span inside current node with full offset tracking. 122 + /// 123 + /// Use this for syntax markers that appear inside block elements (headings, lists, 124 + /// blockquotes, code fences). Unlike `emit_syntax` which is for gaps and creates 125 + /// wrapper nodes, this assumes we're already inside a tracked node. 126 + /// 127 + /// - Writes `<span class="md-syntax-{class}">{syntax}</span>` 128 + /// - Records offset mapping (for cursor positioning) 129 + /// - Updates both `last_char_offset` and `last_byte_offset` 130 + pub(crate) fn emit_inner_syntax( 131 + &mut self, 132 + syntax: &str, 133 + byte_start: usize, 134 + syntax_type: SyntaxType, 135 + ) -> Result<(), fmt::Error> { 136 + if syntax.is_empty() { 137 + return Ok(()); 138 + } 139 + 140 + let char_start = self.last_char_offset; 141 + let syntax_char_len = syntax.chars().count(); 142 + let char_end = char_start + syntax_char_len; 143 + let byte_end = byte_start + syntax.len(); 144 + 145 + let class_str = match syntax_type { 146 + SyntaxType::Inline => "md-syntax-inline", 147 + SyntaxType::Block => "md-syntax-block", 148 + }; 149 + 150 + // Generate unique ID for this syntax span 151 + let syn_id = self.gen_syn_id(); 152 + 153 + write!( 154 + &mut self.writer, 155 + "<span class=\"{}\" data-syn-id=\"{}\" data-char-start=\"{}\" data-char-end=\"{}\">", 156 + class_str, syn_id, char_start, char_end 157 + )?; 158 + escape_html(&mut self.writer, syntax)?; 159 + self.write("</span>")?; 160 + 161 + // Record syntax span info for visibility toggling 162 + self.current_para.syntax_spans.push(SyntaxSpanInfo { 163 + syn_id, 164 + char_range: char_start..char_end, 165 + syntax_type, 166 + formatted_range: None, 167 + }); 168 + 169 + // Record offset mapping for cursor positioning 170 + self.record_mapping(byte_start..byte_end, char_start..char_end); 171 + 172 + self.last_char_offset = char_end; 173 + self.last_byte_offset = byte_end; 174 + 175 + Ok(()) 176 + } 177 + 178 + /// Emit any gap between last position and next offset. 179 + pub(crate) fn emit_gap_before(&mut self, next_offset: usize) -> Result<(), fmt::Error> { 180 + // Skip gap emission if we're inside a table being rendered as markdown 181 + if self.table.start_offset.is_some() && self.table.render_as_markdown { 182 + return Ok(()); 183 + } 184 + 185 + // Skip gap emission if we're buffering code block content 186 + // The code block handler manages its own syntax emission 187 + if self.code_block.is_active() { 188 + return Ok(()); 189 + } 190 + 191 + if next_offset > self.last_byte_offset { 192 + self.emit_syntax(self.last_byte_offset..next_offset)?; 193 + } 194 + Ok(()) 195 + } 196 + }
+1629
crates/weaver-editor-core/src/writer/tags.rs
··· 1 + //! Tag handling for EditorWriter - start_tag and end_tag methods. 2 + 3 + use core::fmt; 4 + use std::ops::Range; 5 + 6 + use markdown_weaver::{Alignment, BlockQuoteKind, CodeBlockKind, EmbedType, Event, LinkType, Tag}; 7 + use markdown_weaver_escape::{StrWrite, escape_href, escape_html, escape_html_body_text}; 8 + 9 + use crate::offset_map::OffsetMapping; 10 + use crate::render::{EmbedContentProvider, ImageResolver, WikilinkValidator}; 11 + use crate::syntax::{SyntaxSpanInfo, SyntaxType, classify_syntax}; 12 + 13 + use super::{EditorWriter, TableState}; 14 + 15 + impl<'a, I, E, R, W> EditorWriter<'a, I, E, R, W> 16 + where 17 + I: Iterator<Item = (Event<'a>, Range<usize>)>, 18 + E: EmbedContentProvider, 19 + R: ImageResolver, 20 + W: WikilinkValidator, 21 + { 22 + /// Detect text direction by scanning source from a byte offset. 23 + /// Looks for the first strong directional character. 24 + /// Returns Some("rtl") for RTL scripts, Some("ltr") for LTR, None if no strong char found. 25 + fn detect_paragraph_direction(&self, start_byte: usize) -> Option<&'static str> { 26 + if start_byte >= self.source.len() { 27 + return None; 28 + } 29 + 30 + // Scan from start_byte through the source looking for first strong directional char 31 + let text = &self.source[start_byte..]; 32 + weaver_renderer::utils::detect_text_direction(text) 33 + } 34 + 35 + pub(crate) fn start_tag( 36 + &mut self, 37 + tag: Tag<'_>, 38 + range: Range<usize>, 39 + ) -> Result<(), fmt::Error> { 40 + // Check if this is a block-level tag that should have syntax inside 41 + let is_block_tag = matches!(tag, Tag::Heading { .. } | Tag::BlockQuote(_)); 42 + 43 + // For inline tags, emit syntax before tag 44 + if !is_block_tag && range.start < range.end { 45 + let raw_text = &self.source[range.clone()]; 46 + let opening_syntax = match &tag { 47 + Tag::Strong => { 48 + if raw_text.starts_with("**") { 49 + Some("**") 50 + } else if raw_text.starts_with("__") { 51 + Some("__") 52 + } else { 53 + None 54 + } 55 + } 56 + Tag::Emphasis => { 57 + if raw_text.starts_with("*") { 58 + Some("*") 59 + } else if raw_text.starts_with("_") { 60 + Some("_") 61 + } else { 62 + None 63 + } 64 + } 65 + Tag::Strikethrough => { 66 + if raw_text.starts_with("~~") { 67 + Some("~~") 68 + } else { 69 + None 70 + } 71 + } 72 + Tag::Link { link_type, .. } => { 73 + if matches!(link_type, LinkType::WikiLink { .. }) { 74 + if raw_text.starts_with("[[") { 75 + Some("[[") 76 + } else { 77 + None 78 + } 79 + } else if raw_text.starts_with('[') { 80 + Some("[") 81 + } else { 82 + None 83 + } 84 + } 85 + // Note: Tag::Image and Tag::Embed handle their own syntax spans 86 + // in their respective handlers, so don't emit here 87 + _ => None, 88 + }; 89 + 90 + if let Some(syntax) = opening_syntax { 91 + let syntax_type = classify_syntax(syntax); 92 + let class = match syntax_type { 93 + SyntaxType::Inline => "md-syntax-inline", 94 + SyntaxType::Block => "md-syntax-block", 95 + }; 96 + 97 + let char_start = self.last_char_offset; 98 + let syntax_char_len = syntax.chars().count(); 99 + let char_end = char_start + syntax_char_len; 100 + let syntax_byte_len = syntax.len(); 101 + 102 + // Generate unique ID for this syntax span 103 + let syn_id = self.gen_syn_id(); 104 + 105 + write!( 106 + &mut self.writer, 107 + "<span class=\"{}\" data-syn-id=\"{}\" data-char-start=\"{}\" data-char-end=\"{}\">", 108 + class, syn_id, char_start, char_end 109 + )?; 110 + escape_html(&mut self.writer, syntax)?; 111 + self.write("</span>")?; 112 + 113 + // Record syntax span info for visibility toggling 114 + self.current_para.syntax_spans.push(SyntaxSpanInfo { 115 + syn_id: syn_id.clone(), 116 + char_range: char_start..char_end, 117 + syntax_type, 118 + formatted_range: None, // Will be updated when closing tag is emitted 119 + }); 120 + 121 + // Record offset mapping for cursor positioning 122 + // This is critical - without it, current_node_char_offset is wrong 123 + // and all subsequent cursor positions are shifted 124 + let byte_start = range.start; 125 + let byte_end = range.start + syntax_byte_len; 126 + self.record_mapping(byte_start..byte_end, char_start..char_end); 127 + 128 + // For paired inline syntax, track opening span for formatted_range 129 + if matches!( 130 + tag, 131 + Tag::Strong | Tag::Emphasis | Tag::Strikethrough | Tag::Link { .. } 132 + ) { 133 + self.current_para 134 + .pending_inline_formats 135 + .push((syn_id, char_start)); 136 + } 137 + 138 + // Update tracking - we've consumed this opening syntax 139 + self.last_char_offset = char_end; 140 + self.last_byte_offset = range.start + syntax_byte_len; 141 + } 142 + } 143 + 144 + // Emit the opening tag 145 + match tag { 146 + // HTML blocks get their own paragraph to try and corral them better 147 + Tag::HtmlBlock => { 148 + // Record paragraph start for boundary tracking 149 + // Skip if inside a list or footnote def - they own their paragraph boundary 150 + if self.paragraphs.list_depth == 0 && !self.paragraphs.in_footnote_def { 151 + self.paragraphs.current_start = 152 + Some((self.last_byte_offset, self.last_char_offset)); 153 + } 154 + let node_id = self.gen_node_id(); 155 + 156 + if self.end_newline { 157 + write!( 158 + &mut self.writer, 159 + r#"<p id="{}" class="html-embed html-embed-block">"#, 160 + node_id 161 + )?; 162 + } else { 163 + write!( 164 + &mut self.writer, 165 + r#"<p id="{}" class="html-embed html-embed-block">"#, 166 + node_id 167 + )?; 168 + } 169 + self.begin_node(node_id.clone()); 170 + 171 + // Map the start position of the paragraph (before any content) 172 + // This allows cursor to be placed at the very beginning 173 + let para_start_char = self.last_char_offset; 174 + let mapping = OffsetMapping { 175 + byte_range: range.start..range.start, 176 + char_range: para_start_char..para_start_char, 177 + node_id, 178 + char_offset_in_node: 0, 179 + child_index: Some(0), // position before first child 180 + utf16_len: 0, 181 + }; 182 + self.current_para.offset_maps.push(mapping); 183 + 184 + Ok(()) 185 + } 186 + Tag::Paragraph(_) => { 187 + // Handle wrapper before block 188 + self.emit_wrapper_start()?; 189 + 190 + // Record paragraph start for boundary tracking 191 + // Skip if inside a list or footnote def - they own their paragraph boundary 192 + if self.paragraphs.list_depth == 0 && !self.paragraphs.in_footnote_def { 193 + self.paragraphs.current_start = 194 + Some((self.last_byte_offset, self.last_char_offset)); 195 + } 196 + 197 + let node_id = self.gen_node_id(); 198 + 199 + // Detect text direction for this paragraph 200 + let dir = self.detect_paragraph_direction(self.last_byte_offset); 201 + 202 + if self.end_newline { 203 + if let Some(dir_value) = dir { 204 + write!( 205 + &mut self.writer, 206 + "<p id=\"{}\" dir=\"{}\">", 207 + node_id, dir_value 208 + )?; 209 + } else { 210 + write!(&mut self.writer, "<p id=\"{}\">", node_id)?; 211 + } 212 + } else { 213 + if let Some(dir_value) = dir { 214 + write!( 215 + &mut self.writer, 216 + "<p id=\"{}\" dir=\"{}\">", 217 + node_id, dir_value 218 + )?; 219 + } else { 220 + write!(&mut self.writer, "<p id=\"{}\">", node_id)?; 221 + } 222 + } 223 + self.begin_node(node_id.clone()); 224 + 225 + // Map the start position of the paragraph (before any content) 226 + // This allows cursor to be placed at the very beginning 227 + let para_start_char = self.last_char_offset; 228 + let mapping = OffsetMapping { 229 + byte_range: range.start..range.start, 230 + char_range: para_start_char..para_start_char, 231 + node_id, 232 + char_offset_in_node: 0, 233 + child_index: Some(0), // position before first child 234 + utf16_len: 0, 235 + }; 236 + self.current_para.offset_maps.push(mapping); 237 + 238 + // Emit > syntax if we're inside a blockquote 239 + if let Some(bq_range) = self.pending_blockquote_range.take() { 240 + if bq_range.start < bq_range.end { 241 + let raw_text = &self.source[bq_range.clone()]; 242 + if let Some(gt_pos) = raw_text.find('>') { 243 + // Extract > [!NOTE] or just > 244 + let after_gt = &raw_text[gt_pos + 1..]; 245 + let syntax_end = if after_gt.trim_start().starts_with("[!") { 246 + // Find the closing ] 247 + if let Some(close_bracket) = after_gt.find(']') { 248 + gt_pos + 1 + close_bracket + 1 249 + } else { 250 + gt_pos + 1 251 + } 252 + } else { 253 + // Just > and maybe a space 254 + (gt_pos + 1).min(raw_text.len()) 255 + }; 256 + 257 + let syntax = &raw_text[gt_pos..syntax_end]; 258 + let syntax_byte_start = bq_range.start + gt_pos; 259 + self.emit_inner_syntax(syntax, syntax_byte_start, SyntaxType::Block)?; 260 + } 261 + } 262 + } 263 + Ok(()) 264 + } 265 + Tag::Heading { 266 + level, 267 + id, 268 + classes, 269 + attrs, 270 + } => { 271 + // Emit wrapper if pending (but don't close on heading end - wraps following block too) 272 + self.emit_wrapper_start()?; 273 + 274 + // Record paragraph start for boundary tracking 275 + // Treat headings as paragraph-level blocks 276 + self.paragraphs.current_start = 277 + Some((self.last_byte_offset, self.last_char_offset)); 278 + 279 + if !self.end_newline { 280 + self.write_newline()?; 281 + } 282 + 283 + // Generate node ID for offset tracking 284 + let node_id = self.gen_node_id(); 285 + 286 + // Detect text direction for this heading 287 + let dir = self.detect_paragraph_direction(self.last_byte_offset); 288 + 289 + self.write("<")?; 290 + write!(&mut self.writer, "{}", level)?; 291 + 292 + // Add our tracking ID as data attribute (preserve user's id if present) 293 + self.write(" data-node-id=\"")?; 294 + self.write(&node_id)?; 295 + self.write("\"")?; 296 + 297 + if let Some(id) = id { 298 + self.write(" id=\"")?; 299 + escape_html(&mut self.writer, &id)?; 300 + self.write("\"")?; 301 + } 302 + if !classes.is_empty() { 303 + self.write(" class=\"")?; 304 + for (i, class) in classes.iter().enumerate() { 305 + if i > 0 { 306 + self.write(" ")?; 307 + } 308 + escape_html(&mut self.writer, class)?; 309 + } 310 + self.write("\"")?; 311 + } 312 + 313 + // Add dir attribute if text direction was detected 314 + if let Some(dir_value) = dir { 315 + self.write(" dir=\"")?; 316 + self.write(dir_value)?; 317 + self.write("\"")?; 318 + } 319 + 320 + for (attr, value) in attrs { 321 + self.write(" ")?; 322 + escape_html(&mut self.writer, &attr)?; 323 + if let Some(val) = value { 324 + self.write("=\"")?; 325 + escape_html(&mut self.writer, &val)?; 326 + self.write("\"")?; 327 + } else { 328 + self.write("=\"\"")?; 329 + } 330 + } 331 + self.write(">")?; 332 + 333 + // Begin node tracking for offset mapping 334 + self.begin_node(node_id.clone()); 335 + 336 + // Map the start position of the heading (before any content) 337 + // This allows cursor to be placed at the very beginning 338 + let heading_start_char = self.last_char_offset; 339 + let mapping = OffsetMapping { 340 + byte_range: range.start..range.start, 341 + char_range: heading_start_char..heading_start_char, 342 + node_id: node_id.clone(), 343 + char_offset_in_node: 0, 344 + child_index: Some(0), // position before first child 345 + utf16_len: 0, 346 + }; 347 + self.current_para.offset_maps.push(mapping); 348 + 349 + // Emit # syntax inside the heading tag 350 + if range.start < range.end { 351 + let raw_text = &self.source[range.clone()]; 352 + let count = level as usize; 353 + let pattern = "#".repeat(count); 354 + 355 + // Find where the # actually starts (might have leading whitespace) 356 + if let Some(hash_pos) = raw_text.find(&pattern) { 357 + // Extract "# " or "## " etc 358 + let syntax_end = (hash_pos + count + 1).min(raw_text.len()); 359 + let syntax = &raw_text[hash_pos..syntax_end]; 360 + let syntax_byte_start = range.start + hash_pos; 361 + 362 + self.emit_inner_syntax(syntax, syntax_byte_start, SyntaxType::Block)?; 363 + } 364 + } 365 + Ok(()) 366 + } 367 + Tag::Table(alignments) => { 368 + if self.table.render_as_markdown { 369 + // Store start offset and skip HTML rendering 370 + self.table.start_offset = Some(range.start); 371 + self.in_non_writing_block = true; // Suppress content output 372 + Ok(()) 373 + } else { 374 + self.emit_wrapper_start()?; 375 + self.table.alignments = alignments; 376 + self.write("<table>") 377 + } 378 + } 379 + Tag::TableHead => { 380 + if self.table.render_as_markdown { 381 + Ok(()) // Skip HTML rendering 382 + } else { 383 + self.table.state = TableState::Head; 384 + self.table.cell_index = 0; 385 + self.write("<thead><tr>") 386 + } 387 + } 388 + Tag::TableRow => { 389 + if self.table.render_as_markdown { 390 + Ok(()) // Skip HTML rendering 391 + } else { 392 + self.table.cell_index = 0; 393 + self.write("<tr>") 394 + } 395 + } 396 + Tag::TableCell => { 397 + if self.table.render_as_markdown { 398 + Ok(()) // Skip HTML rendering 399 + } else { 400 + match self.table.state { 401 + TableState::Head => self.write("<th")?, 402 + TableState::Body => self.write("<td")?, 403 + } 404 + match self.table.alignments.get(self.table.cell_index) { 405 + Some(&Alignment::Left) => self.write(" style=\"text-align: left\">"), 406 + Some(&Alignment::Center) => self.write(" style=\"text-align: center\">"), 407 + Some(&Alignment::Right) => self.write(" style=\"text-align: right\">"), 408 + _ => self.write(">"), 409 + } 410 + } 411 + } 412 + Tag::BlockQuote(kind) => { 413 + self.emit_wrapper_start()?; 414 + 415 + let class_str = match kind { 416 + None => "", 417 + Some(BlockQuoteKind::Note) => " class=\"markdown-alert-note\"", 418 + Some(BlockQuoteKind::Tip) => " class=\"markdown-alert-tip\"", 419 + Some(BlockQuoteKind::Important) => " class=\"markdown-alert-important\"", 420 + Some(BlockQuoteKind::Warning) => " class=\"markdown-alert-warning\"", 421 + Some(BlockQuoteKind::Caution) => " class=\"markdown-alert-caution\"", 422 + }; 423 + if self.end_newline { 424 + write!(&mut self.writer, "<blockquote{}>", class_str)?; 425 + } else { 426 + write!(&mut self.writer, "<blockquote{}>", class_str)?; 427 + } 428 + 429 + // Store range for emitting > inside the next paragraph 430 + self.pending_blockquote_range = Some(range); 431 + Ok(()) 432 + } 433 + Tag::CodeBlock(info) => { 434 + self.emit_wrapper_start()?; 435 + 436 + // Track code block as paragraph-level block 437 + self.paragraphs.current_start = 438 + Some((self.last_byte_offset, self.last_char_offset)); 439 + 440 + if !self.end_newline { 441 + self.write_newline()?; 442 + } 443 + 444 + // Generate node ID for code block 445 + let node_id = self.gen_node_id(); 446 + 447 + match info { 448 + CodeBlockKind::Fenced(info) => { 449 + // Emit opening ```language and track both char and byte offsets 450 + if range.start < range.end { 451 + let raw_text = &self.source[range.clone()]; 452 + if let Some(fence_pos) = raw_text.find("```") { 453 + let fence_end = (fence_pos + 3 + info.len()).min(raw_text.len()); 454 + let syntax = &raw_text[fence_pos..fence_end]; 455 + let syntax_char_len = syntax.chars().count() + 1; // +1 for newline 456 + let syntax_byte_len = syntax.len() + 1; // +1 for newline 457 + 458 + let syn_id = self.gen_syn_id(); 459 + let char_start = self.last_char_offset; 460 + let char_end = char_start + syntax_char_len; 461 + 462 + write!( 463 + &mut self.writer, 464 + "<span class=\"md-syntax-block\" data-syn-id=\"{}\" data-char-start=\"{}\" data-char-end=\"{}\" spellcheck=\"false\">", 465 + syn_id, char_start, char_end 466 + )?; 467 + escape_html(&mut self.writer, syntax)?; 468 + self.write("</span>")?; 469 + 470 + // Track opening span index for formatted_range update later 471 + self.code_block.opening_span_idx = 472 + Some(self.current_para.syntax_spans.len()); 473 + self.code_block.block_start = Some(char_start); 474 + 475 + self.current_para.syntax_spans.push(SyntaxSpanInfo { 476 + syn_id, 477 + char_range: char_start..char_end, 478 + syntax_type: SyntaxType::Block, 479 + formatted_range: None, // Will be set in TagEnd::CodeBlock 480 + }); 481 + 482 + self.last_char_offset += syntax_char_len; 483 + self.last_byte_offset = range.start + fence_pos + syntax_byte_len; 484 + } 485 + } 486 + 487 + let lang = info.split(' ').next().unwrap(); 488 + let lang_opt = if lang.is_empty() { 489 + None 490 + } else { 491 + Some(smol_str::SmolStr::new(lang)) 492 + }; 493 + // Start buffering 494 + self.code_block.buffer = Some((lang_opt, String::new())); 495 + 496 + // Begin node tracking for offset mapping 497 + self.begin_node(node_id); 498 + Ok(()) 499 + } 500 + CodeBlockKind::Indented => { 501 + // Ignore indented code blocks (as per executive decision) 502 + self.code_block.buffer = Some((None, String::new())); 503 + 504 + // Begin node tracking for offset mapping 505 + self.begin_node(node_id); 506 + Ok(()) 507 + } 508 + } 509 + } 510 + Tag::List(Some(1)) => { 511 + self.emit_wrapper_start()?; 512 + // Track list as paragraph-level block 513 + self.paragraphs.current_start = 514 + Some((self.last_byte_offset, self.last_char_offset)); 515 + self.paragraphs.list_depth += 1; 516 + if self.end_newline { 517 + self.write("<ol>") 518 + } else { 519 + self.write("<ol>") 520 + } 521 + } 522 + Tag::List(Some(start)) => { 523 + self.emit_wrapper_start()?; 524 + // Track list as paragraph-level block 525 + self.paragraphs.current_start = 526 + Some((self.last_byte_offset, self.last_char_offset)); 527 + self.paragraphs.list_depth += 1; 528 + if self.end_newline { 529 + self.write("<ol start=\"")?; 530 + } else { 531 + self.write("<ol start=\"")?; 532 + } 533 + write!(&mut self.writer, "{}", start)?; 534 + self.write("\">") 535 + } 536 + Tag::List(None) => { 537 + self.emit_wrapper_start()?; 538 + // Track list as paragraph-level block 539 + self.paragraphs.current_start = 540 + Some((self.last_byte_offset, self.last_char_offset)); 541 + self.paragraphs.list_depth += 1; 542 + if self.end_newline { 543 + self.write("<ul>") 544 + } else { 545 + self.write("<ul>") 546 + } 547 + } 548 + Tag::Item => { 549 + // Generate node ID for list item 550 + let node_id = self.gen_node_id(); 551 + 552 + if self.end_newline { 553 + write!(&mut self.writer, "<li data-node-id=\"{}\">", node_id)?; 554 + } else { 555 + write!(&mut self.writer, "<li data-node-id=\"{}\">", node_id)?; 556 + } 557 + 558 + // Begin node tracking 559 + self.begin_node(node_id); 560 + 561 + // Emit list marker syntax inside the <li> tag and track both offsets 562 + if range.start < range.end { 563 + let raw_text = &self.source[range.clone()]; 564 + 565 + // Try to find the list marker (-, *, or digit.) 566 + let trimmed = raw_text.trim_start(); 567 + let leading_ws_bytes = raw_text.len() - trimmed.len(); 568 + let leading_ws_chars = raw_text.chars().count() - trimmed.chars().count(); 569 + 570 + if let Some(marker) = trimmed.chars().next() { 571 + if marker == '-' || marker == '*' { 572 + // Unordered list: extract "- " or "* " 573 + let marker_end = trimmed 574 + .find(|c: char| c != '-' && c != '*') 575 + .map(|pos| pos + 1) 576 + .unwrap_or(1); 577 + let syntax = &trimmed[..marker_end.min(trimmed.len())]; 578 + let char_start = self.last_char_offset; 579 + let syntax_char_len = leading_ws_chars + syntax.chars().count(); 580 + let syntax_byte_len = leading_ws_bytes + syntax.len(); 581 + let char_end = char_start + syntax_char_len; 582 + 583 + let syn_id = self.gen_syn_id(); 584 + write!( 585 + &mut self.writer, 586 + "<span class=\"md-syntax-block\" data-syn-id=\"{}\" data-char-start=\"{}\" data-char-end=\"{}\" spellcheck=\"false\">", 587 + syn_id, char_start, char_end 588 + )?; 589 + escape_html(&mut self.writer, syntax)?; 590 + self.write("</span>")?; 591 + 592 + self.current_para.syntax_spans.push(SyntaxSpanInfo { 593 + syn_id, 594 + char_range: char_start..char_end, 595 + syntax_type: SyntaxType::Block, 596 + formatted_range: None, 597 + }); 598 + 599 + // Record offset mapping for cursor positioning 600 + self.record_mapping( 601 + range.start..range.start + syntax_byte_len, 602 + char_start..char_end, 603 + ); 604 + self.last_char_offset = char_end; 605 + self.last_byte_offset = range.start + syntax_byte_len; 606 + } else if marker.is_ascii_digit() { 607 + // Ordered list: extract "1. " or similar (including trailing space) 608 + if let Some(dot_pos) = trimmed.find('.') { 609 + let syntax_end = (dot_pos + 2).min(trimmed.len()); 610 + let syntax = &trimmed[..syntax_end]; 611 + let char_start = self.last_char_offset; 612 + let syntax_char_len = leading_ws_chars + syntax.chars().count(); 613 + let syntax_byte_len = leading_ws_bytes + syntax.len(); 614 + let char_end = char_start + syntax_char_len; 615 + 616 + let syn_id = self.gen_syn_id(); 617 + write!( 618 + &mut self.writer, 619 + "<span class=\"md-syntax-block\" data-syn-id=\"{}\" data-char-start=\"{}\" data-char-end=\"{}\" spellcheck=\"false\">", 620 + syn_id, char_start, char_end 621 + )?; 622 + escape_html(&mut self.writer, syntax)?; 623 + self.write("</span>")?; 624 + 625 + self.current_para.syntax_spans.push(SyntaxSpanInfo { 626 + syn_id, 627 + char_range: char_start..char_end, 628 + syntax_type: SyntaxType::Block, 629 + formatted_range: None, 630 + }); 631 + 632 + // Record offset mapping for cursor positioning 633 + self.record_mapping( 634 + range.start..range.start + syntax_byte_len, 635 + char_start..char_end, 636 + ); 637 + self.last_char_offset = char_end; 638 + self.last_byte_offset = range.start + syntax_byte_len; 639 + } 640 + } 641 + } 642 + } 643 + Ok(()) 644 + } 645 + Tag::DefinitionList => { 646 + self.emit_wrapper_start()?; 647 + if self.end_newline { 648 + self.write("<dl>") 649 + } else { 650 + self.write("<dl>") 651 + } 652 + } 653 + Tag::DefinitionListTitle => { 654 + let node_id = self.gen_node_id(); 655 + 656 + if self.end_newline { 657 + write!(&mut self.writer, "<dt data-node-id=\"{}\">", node_id)?; 658 + } else { 659 + write!(&mut self.writer, "<dt data-node-id=\"{}\">", node_id)?; 660 + } 661 + 662 + self.begin_node(node_id); 663 + Ok(()) 664 + } 665 + Tag::DefinitionListDefinition => { 666 + let node_id = self.gen_node_id(); 667 + 668 + if self.end_newline { 669 + write!(&mut self.writer, "<dd data-node-id=\"{}\">", node_id)?; 670 + } else { 671 + write!(&mut self.writer, "<dd data-node-id=\"{}\">", node_id)?; 672 + } 673 + 674 + self.begin_node(node_id); 675 + Ok(()) 676 + } 677 + Tag::Subscript => self.write("<sub>"), 678 + Tag::Superscript => self.write("<sup>"), 679 + Tag::Emphasis => self.write("<em>"), 680 + Tag::Strong => self.write("<strong>"), 681 + Tag::Strikethrough => self.write("<s>"), 682 + Tag::Link { 683 + link_type: LinkType::Email, 684 + dest_url, 685 + title, 686 + .. 687 + } => { 688 + self.write("<a href=\"mailto:")?; 689 + escape_href(&mut self.writer, &dest_url)?; 690 + if !title.is_empty() { 691 + self.write("\" title=\"")?; 692 + escape_html(&mut self.writer, &title)?; 693 + } 694 + self.write("\">") 695 + } 696 + Tag::Link { 697 + link_type, 698 + dest_url, 699 + title, 700 + .. 701 + } => { 702 + // Collect refs for later resolution 703 + let url = dest_url.as_ref(); 704 + if matches!(link_type, LinkType::WikiLink { .. }) { 705 + let (target, fragment) = weaver_common::EntryIndex::parse_wikilink(url); 706 + self.ref_collector.add_wikilink(target, fragment, None); 707 + } else if url.starts_with("at://") { 708 + self.ref_collector.add_at_link(url); 709 + } 710 + 711 + // Determine link validity class for wikilinks 712 + let validity_class = if matches!(link_type, LinkType::WikiLink { .. }) { 713 + if let Some(index) = &self.entry_index { 714 + if index.resolve(dest_url.as_ref()).is_some() { 715 + " link-valid" 716 + } else { 717 + " link-broken" 718 + } 719 + } else { 720 + "" 721 + } 722 + } else { 723 + "" 724 + }; 725 + 726 + self.write("<a class=\"link")?; 727 + self.write(validity_class)?; 728 + self.write("\" href=\"")?; 729 + escape_href(&mut self.writer, &dest_url)?; 730 + if !title.is_empty() { 731 + self.write("\" title=\"")?; 732 + escape_html(&mut self.writer, &title)?; 733 + } 734 + self.write("\">") 735 + } 736 + Tag::Image { 737 + link_type, 738 + dest_url, 739 + title, 740 + id, 741 + attrs, 742 + } => { 743 + // Check if this is actually an AT embed disguised as a wikilink image 744 + // (markdown-weaver parses ![[at://...]] as Image with WikiLink link_type) 745 + let url = dest_url.as_ref(); 746 + if matches!(link_type, LinkType::WikiLink { .. }) 747 + && (url.starts_with("at://") || url.starts_with("did:")) 748 + { 749 + return self.write_embed( 750 + range, 751 + EmbedType::Other, // AT embeds - disambiguated via NSID later 752 + dest_url, 753 + title, 754 + id, 755 + attrs, 756 + ); 757 + } 758 + 759 + // Image rendering: all syntax elements share one syn_id for visibility toggling 760 + // Structure: ![ alt text ](url) <img> cursor-landing 761 + let raw_text = &self.source[range.clone()]; 762 + let syn_id = self.gen_syn_id(); 763 + let opening_char_start = self.last_char_offset; 764 + 765 + // Find the alt text and closing syntax positions 766 + let paren_pos = raw_text.rfind("](").unwrap_or(raw_text.len()); 767 + let alt_text = if raw_text.starts_with("![") && paren_pos > 2 { 768 + &raw_text[2..paren_pos] 769 + } else { 770 + "" 771 + }; 772 + let closing_syntax = if paren_pos < raw_text.len() { 773 + &raw_text[paren_pos..] 774 + } else { 775 + "" 776 + }; 777 + 778 + // Calculate char positions 779 + let alt_char_len = alt_text.chars().count(); 780 + let closing_char_len = closing_syntax.chars().count(); 781 + let opening_char_end = opening_char_start + 2; // "![" 782 + let alt_char_start = opening_char_end; 783 + let alt_char_end = alt_char_start + alt_char_len; 784 + let closing_char_start = alt_char_end; 785 + let closing_char_end = closing_char_start + closing_char_len; 786 + let formatted_range = opening_char_start..closing_char_end; 787 + 788 + // 1. Emit opening ![ syntax span 789 + if raw_text.starts_with("![") { 790 + write!( 791 + &mut self.writer, 792 + "<span class=\"md-syntax-inline\" data-syn-id=\"{}\" data-char-start=\"{}\" data-char-end=\"{}\" spellcheck=\"false\">![</span>", 793 + syn_id, opening_char_start, opening_char_end 794 + )?; 795 + 796 + self.current_para.syntax_spans.push(SyntaxSpanInfo { 797 + syn_id: syn_id.clone(), 798 + char_range: opening_char_start..opening_char_end, 799 + syntax_type: SyntaxType::Inline, 800 + formatted_range: Some(formatted_range.clone()), 801 + }); 802 + 803 + // Record offset mapping for ![ 804 + self.record_mapping( 805 + range.start..range.start + 2, 806 + opening_char_start..opening_char_end, 807 + ); 808 + } 809 + 810 + // 2. Emit alt text span (same syn_id, editable when visible) 811 + if !alt_text.is_empty() { 812 + write!( 813 + &mut self.writer, 814 + "<span class=\"image-alt\" data-syn-id=\"{}\" data-char-start=\"{}\" data-char-end=\"{}\">", 815 + syn_id, alt_char_start, alt_char_end 816 + )?; 817 + escape_html(&mut self.writer, alt_text)?; 818 + self.write("</span>")?; 819 + 820 + self.current_para.syntax_spans.push(SyntaxSpanInfo { 821 + syn_id: syn_id.clone(), 822 + char_range: alt_char_start..alt_char_end, 823 + syntax_type: SyntaxType::Inline, 824 + formatted_range: Some(formatted_range.clone()), 825 + }); 826 + 827 + // Record offset mapping for alt text 828 + self.record_mapping( 829 + range.start + 2..range.start + 2 + alt_text.len(), 830 + alt_char_start..alt_char_end, 831 + ); 832 + } 833 + 834 + // 3. Emit closing ](url) syntax span 835 + if !closing_syntax.is_empty() { 836 + write!( 837 + &mut self.writer, 838 + "<span class=\"md-syntax-inline\" data-syn-id=\"{}\" data-char-start=\"{}\" data-char-end=\"{}\" spellcheck=\"false\">", 839 + syn_id, closing_char_start, closing_char_end 840 + )?; 841 + escape_html(&mut self.writer, closing_syntax)?; 842 + self.write("</span>")?; 843 + 844 + self.current_para.syntax_spans.push(SyntaxSpanInfo { 845 + syn_id: syn_id.clone(), 846 + char_range: closing_char_start..closing_char_end, 847 + syntax_type: SyntaxType::Inline, 848 + formatted_range: Some(formatted_range.clone()), 849 + }); 850 + 851 + // Record offset mapping for ](url) 852 + self.record_mapping( 853 + range.start + paren_pos..range.end, 854 + closing_char_start..closing_char_end, 855 + ); 856 + } 857 + 858 + // 4. Emit <img> element (no syn_id - always visible) 859 + self.write("<img src=\"")?; 860 + let resolved_url = self 861 + .image_resolver 862 + .as_ref() 863 + .and_then(|r| r.resolve_image_url(&dest_url)); 864 + if let Some(ref cdn_url) = resolved_url { 865 + escape_href(&mut self.writer, cdn_url)?; 866 + } else { 867 + escape_href(&mut self.writer, &dest_url)?; 868 + } 869 + self.write("\" alt=\"")?; 870 + escape_html(&mut self.writer, alt_text)?; 871 + self.write("\"")?; 872 + if !title.is_empty() { 873 + self.write(" title=\"")?; 874 + escape_html(&mut self.writer, &title)?; 875 + self.write("\"")?; 876 + } 877 + if let Some(attrs) = attrs { 878 + if !attrs.classes.is_empty() { 879 + self.write(" class=\"")?; 880 + for (i, class) in attrs.classes.iter().enumerate() { 881 + if i > 0 { 882 + self.write(" ")?; 883 + } 884 + escape_html(&mut self.writer, class)?; 885 + } 886 + self.write("\"")?; 887 + } 888 + for (attr, value) in &attrs.attrs { 889 + self.write(" ")?; 890 + escape_html(&mut self.writer, attr)?; 891 + self.write("=\"")?; 892 + escape_html(&mut self.writer, value)?; 893 + self.write("\"")?; 894 + } 895 + } 896 + self.write(" />")?; 897 + 898 + // Consume the text events for alt (they're still in the iterator) 899 + // Use consume_until_end() since we already wrote alt text from source 900 + self.consume_until_end(); 901 + 902 + // Update offsets 903 + self.last_char_offset = closing_char_end; 904 + self.last_byte_offset = range.end; 905 + 906 + Ok(()) 907 + } 908 + Tag::Embed { 909 + embed_type, 910 + dest_url, 911 + title, 912 + id, 913 + attrs, 914 + } => self.write_embed(range, embed_type, dest_url, title, id, attrs), 915 + Tag::WeaverBlock(_, attrs) => { 916 + self.in_non_writing_block = true; 917 + self.weaver_block.buffer.clear(); 918 + self.weaver_block.char_start = Some(self.last_char_offset); 919 + // Store attrs from Start tag, will merge with parsed text on End 920 + if !attrs.classes.is_empty() || !attrs.attrs.is_empty() { 921 + self.weaver_block.pending_attrs = Some(attrs.into_static()); 922 + } 923 + Ok(()) 924 + } 925 + Tag::FootnoteDefinition(name) => { 926 + // Track as paragraph-level block for incremental rendering 927 + self.paragraphs.current_start = 928 + Some((self.last_byte_offset, self.last_char_offset)); 929 + // Suppress inner paragraph boundaries (footnote def owns its paragraph) 930 + self.paragraphs.in_footnote_def = true; 931 + 932 + if !self.end_newline { 933 + self.write_newline()?; 934 + } 935 + 936 + // Generate node ID for cursor tracking 937 + let node_id = self.gen_node_id(); 938 + 939 + // Emit wrapper div with NEW class (not footnote-definition which has order:9999) 940 + // This keeps footnotes in-place instead of reordering to bottom 941 + write!( 942 + &mut self.writer, 943 + "<div class=\"footnote-def-editor\" data-node-id=\"{}\">", 944 + node_id 945 + )?; 946 + 947 + // Begin node tracking BEFORE emitting prefix 948 + self.begin_node(node_id.clone()); 949 + 950 + // Map the start position (before any content) 951 + let fn_start_char = self.last_char_offset; 952 + let mapping = OffsetMapping { 953 + byte_range: range.start..range.start, 954 + char_range: fn_start_char..fn_start_char, 955 + node_id, 956 + char_offset_in_node: 0, 957 + child_index: Some(0), 958 + utf16_len: 0, 959 + }; 960 + self.current_para.offset_maps.push(mapping); 961 + 962 + // Extract ACTUAL prefix from source (not constructed string) 963 + // This ensures byte offsets match reality 964 + let raw_text = &self.source[range.clone()]; 965 + let prefix_end = raw_text 966 + .find("]:") 967 + .map(|p| { 968 + // Include ]: and any single trailing space 969 + let after_colon = p + 2; 970 + if raw_text.get(after_colon..after_colon + 1) == Some(" ") { 971 + after_colon + 1 972 + } else { 973 + after_colon 974 + } 975 + }) 976 + .unwrap_or(0); 977 + let prefix = &raw_text[..prefix_end]; 978 + let prefix_byte_len = prefix.len(); 979 + let prefix_char_len = prefix.chars().count(); 980 + 981 + let char_start = self.last_char_offset; 982 + let char_end = char_start + prefix_char_len; 983 + 984 + write!( 985 + &mut self.writer, 986 + "<span class=\"footnote-def-syntax\" data-char-start=\"{}\" data-char-end=\"{}\">", 987 + char_start, char_end 988 + )?; 989 + escape_html(&mut self.writer, prefix)?; 990 + self.write("</span>")?; 991 + 992 + // Store the definition info (no longer tracking syntax spans for hide/show) 993 + self.footnotes.current_def = Some((name.to_string(), 0, char_start)); 994 + 995 + // Record offset mapping for the prefix 996 + self.record_mapping( 997 + range.start..range.start + prefix_byte_len, 998 + char_start..char_end, 999 + ); 1000 + 1001 + // Update tracking for the prefix 1002 + self.last_char_offset = char_end; 1003 + self.last_byte_offset = range.start + prefix_byte_len; 1004 + 1005 + Ok(()) 1006 + } 1007 + Tag::MetadataBlock(_) => { 1008 + self.in_non_writing_block = true; 1009 + Ok(()) 1010 + } 1011 + } 1012 + } 1013 + 1014 + pub(crate) fn end_tag( 1015 + &mut self, 1016 + tag: markdown_weaver::TagEnd, 1017 + range: Range<usize>, 1018 + ) -> Result<(), fmt::Error> { 1019 + use markdown_weaver::TagEnd; 1020 + 1021 + // Emit tag HTML first 1022 + let result = match tag { 1023 + TagEnd::HtmlBlock => { 1024 + // Capture paragraph boundary info BEFORE writing closing HTML 1025 + // Skip if inside a list or footnote def - they own their paragraph boundary 1026 + let para_boundary = 1027 + if self.paragraphs.list_depth == 0 && !self.paragraphs.in_footnote_def { 1028 + self.paragraphs 1029 + .current_start 1030 + .take() 1031 + .map(|(byte_start, char_start)| { 1032 + ( 1033 + byte_start..self.last_byte_offset, 1034 + char_start..self.last_char_offset, 1035 + ) 1036 + }) 1037 + } else { 1038 + None 1039 + }; 1040 + 1041 + // Write closing HTML to current segment 1042 + self.end_node(); 1043 + self.write("</p>")?; 1044 + 1045 + // Now finalize paragraph (starts new segment) 1046 + if let Some((byte_range, char_range)) = para_boundary { 1047 + self.finalize_paragraph(byte_range, char_range); 1048 + } 1049 + Ok(()) 1050 + } 1051 + TagEnd::Paragraph(_) => { 1052 + // Capture paragraph boundary info BEFORE writing closing HTML 1053 + // Skip if inside a list or footnote def - they own their paragraph boundary 1054 + let para_boundary = 1055 + if self.paragraphs.list_depth == 0 && !self.paragraphs.in_footnote_def { 1056 + self.paragraphs 1057 + .current_start 1058 + .take() 1059 + .map(|(byte_start, char_start)| { 1060 + ( 1061 + byte_start..self.last_byte_offset, 1062 + char_start..self.last_char_offset, 1063 + ) 1064 + }) 1065 + } else { 1066 + None 1067 + }; 1068 + 1069 + // Write closing HTML to current segment 1070 + self.end_node(); 1071 + self.write("</p>")?; 1072 + self.close_wrapper()?; 1073 + 1074 + // Now finalize paragraph (starts new segment) 1075 + if let Some((byte_range, char_range)) = para_boundary { 1076 + self.finalize_paragraph(byte_range, char_range); 1077 + } 1078 + Ok(()) 1079 + } 1080 + TagEnd::Heading(level) => { 1081 + // Capture paragraph boundary info BEFORE writing closing HTML 1082 + let para_boundary = 1083 + self.paragraphs 1084 + .current_start 1085 + .take() 1086 + .map(|(byte_start, char_start)| { 1087 + ( 1088 + byte_start..self.last_byte_offset, 1089 + char_start..self.last_char_offset, 1090 + ) 1091 + }); 1092 + 1093 + // Write closing HTML to current segment 1094 + self.end_node(); 1095 + self.write("</")?; 1096 + write!(&mut self.writer, "{}", level)?; 1097 + self.write(">")?; 1098 + // Note: Don't close wrapper here - headings typically go with following block 1099 + 1100 + // Now finalize paragraph (starts new segment) 1101 + if let Some((byte_range, char_range)) = para_boundary { 1102 + self.finalize_paragraph(byte_range, char_range); 1103 + } 1104 + Ok(()) 1105 + } 1106 + TagEnd::Table => { 1107 + if self.table.render_as_markdown { 1108 + // Emit the raw markdown table 1109 + if let Some(start) = self.table.start_offset.take() { 1110 + let table_text = &self.source[start..range.end]; 1111 + self.in_non_writing_block = false; 1112 + 1113 + // Wrap in a pre or div for styling 1114 + self.write("<pre class=\"table-markdown\">")?; 1115 + escape_html(&mut self.writer, table_text)?; 1116 + self.write("</pre>")?; 1117 + } 1118 + Ok(()) 1119 + } else { 1120 + self.write("</tbody></table>") 1121 + } 1122 + } 1123 + TagEnd::TableHead => { 1124 + if self.table.render_as_markdown { 1125 + Ok(()) // Skip HTML rendering 1126 + } else { 1127 + self.write("</tr></thead><tbody>")?; 1128 + self.table.state = TableState::Body; 1129 + Ok(()) 1130 + } 1131 + } 1132 + TagEnd::TableRow => { 1133 + if self.table.render_as_markdown { 1134 + Ok(()) // Skip HTML rendering 1135 + } else { 1136 + self.write("</tr>") 1137 + } 1138 + } 1139 + TagEnd::TableCell => { 1140 + if self.table.render_as_markdown { 1141 + Ok(()) // Skip HTML rendering 1142 + } else { 1143 + match self.table.state { 1144 + TableState::Head => self.write("</th>")?, 1145 + TableState::Body => self.write("</td>")?, 1146 + } 1147 + self.table.cell_index += 1; 1148 + Ok(()) 1149 + } 1150 + } 1151 + TagEnd::BlockQuote(_) => { 1152 + // If pending_blockquote_range is still set, the blockquote was empty 1153 + // (no paragraph inside). Emit the > as its own minimal paragraph. 1154 + let mut para_boundary = None; 1155 + if let Some(bq_range) = self.pending_blockquote_range.take() { 1156 + if bq_range.start < bq_range.end { 1157 + let raw_text = &self.source[bq_range.clone()]; 1158 + if let Some(gt_pos) = raw_text.find('>') { 1159 + let para_byte_start = bq_range.start + gt_pos; 1160 + let para_char_start = self.last_char_offset; 1161 + 1162 + // Create a minimal paragraph for the empty blockquote 1163 + let node_id = self.gen_node_id(); 1164 + write!(&mut self.writer, "<div id=\"{}\"", node_id)?; 1165 + 1166 + // Record start-of-node mapping for cursor positioning 1167 + self.current_para.offset_maps.push(OffsetMapping { 1168 + byte_range: para_byte_start..para_byte_start, 1169 + char_range: para_char_start..para_char_start, 1170 + node_id: node_id.clone(), 1171 + char_offset_in_node: gt_pos, 1172 + child_index: Some(0), 1173 + utf16_len: 0, 1174 + }); 1175 + 1176 + // Emit the > as block syntax 1177 + let syntax = &raw_text[gt_pos..gt_pos + 1]; 1178 + self.emit_inner_syntax(syntax, para_byte_start, SyntaxType::Block)?; 1179 + 1180 + self.write("</div>")?; 1181 + self.end_node(); 1182 + 1183 + // Capture paragraph boundary for later finalization 1184 + let byte_range = para_byte_start..bq_range.end; 1185 + let char_range = para_char_start..self.last_char_offset; 1186 + para_boundary = Some((byte_range, char_range)); 1187 + } 1188 + } 1189 + } 1190 + self.write("</blockquote>")?; 1191 + self.close_wrapper()?; 1192 + 1193 + // Now finalize paragraph if we had one 1194 + if let Some((byte_range, char_range)) = para_boundary { 1195 + self.finalize_paragraph(byte_range, char_range); 1196 + } 1197 + Ok(()) 1198 + } 1199 + TagEnd::CodeBlock => { 1200 + use std::sync::LazyLock; 1201 + use syntect::parsing::SyntaxSet; 1202 + static SYNTAX_SET: LazyLock<SyntaxSet> = 1203 + LazyLock::new(|| SyntaxSet::load_defaults_newlines()); 1204 + 1205 + if let Some((lang, buffer)) = self.code_block.buffer.take() { 1206 + // Create offset mapping for code block content if we tracked ranges 1207 + if let (Some(code_byte_range), Some(code_char_range)) = ( 1208 + self.code_block.byte_range.take(), 1209 + self.code_block.char_range.take(), 1210 + ) { 1211 + // Record mapping before writing HTML 1212 + // (current_node.id should be set by start_tag for CodeBlock) 1213 + self.record_mapping(code_byte_range, code_char_range); 1214 + } 1215 + 1216 + // Get node_id for data-node-id attribute (needed for cursor positioning) 1217 + let node_id = self.current_node.id.clone(); 1218 + 1219 + if let Some(ref lang_str) = lang { 1220 + // Use a temporary String buffer for syntect 1221 + let mut temp_output = String::new(); 1222 + match weaver_renderer::code_pretty::highlight( 1223 + &SYNTAX_SET, 1224 + Some(lang_str), 1225 + &buffer, 1226 + &mut temp_output, 1227 + ) { 1228 + Ok(_) => { 1229 + // Inject data-node-id into the <pre> tag for cursor positioning 1230 + if let Some(ref nid) = node_id { 1231 + let injected = temp_output.replacen( 1232 + "<pre>", 1233 + &format!("<pre data-node-id=\"{}\">", nid), 1234 + 1, 1235 + ); 1236 + self.write(&injected)?; 1237 + } else { 1238 + self.write(&temp_output)?; 1239 + } 1240 + } 1241 + Err(_) => { 1242 + // Fallback to plain code block 1243 + if let Some(ref nid) = node_id { 1244 + write!( 1245 + &mut self.writer, 1246 + "<pre data-node-id=\"{}\"><code class=\"language-", 1247 + nid 1248 + )?; 1249 + } else { 1250 + self.write("<pre><code class=\"language-")?; 1251 + } 1252 + escape_html(&mut self.writer, lang_str)?; 1253 + self.write("\">")?; 1254 + escape_html_body_text(&mut self.writer, &buffer)?; 1255 + self.write("</code></pre>")?; 1256 + } 1257 + } 1258 + } else { 1259 + if let Some(ref nid) = node_id { 1260 + write!(&mut self.writer, "<pre data-node-id=\"{}\"><code>", nid)?; 1261 + } else { 1262 + self.write("<pre><code>")?; 1263 + } 1264 + escape_html_body_text(&mut self.writer, &buffer)?; 1265 + self.write("</code></pre>")?; 1266 + } 1267 + 1268 + // End node tracking 1269 + self.end_node(); 1270 + } else { 1271 + self.write("</code></pre>")?; 1272 + } 1273 + 1274 + // Emit closing ``` (emit_gap_before is skipped while buffering) 1275 + // Track the opening span index and char start before we potentially clear them 1276 + let opening_span_idx = self.code_block.opening_span_idx.take(); 1277 + let code_block_start = self.code_block.block_start.take(); 1278 + 1279 + if range.start < range.end { 1280 + let raw_text = &self.source[range.clone()]; 1281 + if let Some(fence_line) = raw_text.lines().last() { 1282 + if fence_line.trim().starts_with("```") { 1283 + let fence = fence_line.trim(); 1284 + let fence_char_len = fence.chars().count(); 1285 + 1286 + let syn_id = self.gen_syn_id(); 1287 + let char_start = self.last_char_offset; 1288 + let char_end = char_start + fence_char_len; 1289 + 1290 + write!( 1291 + &mut self.writer, 1292 + "<span class=\"md-syntax-block\" data-syn-id=\"{}\" data-char-start=\"{}\" data-char-end=\"{}\" spellcheck=\"false\">", 1293 + syn_id, char_start, char_end 1294 + )?; 1295 + escape_html(&mut self.writer, fence)?; 1296 + self.write("</span>")?; 1297 + 1298 + self.last_char_offset += fence_char_len; 1299 + self.last_byte_offset += fence.len(); 1300 + 1301 + // Compute formatted_range for entire code block (opening fence to closing fence) 1302 + let formatted_range = 1303 + code_block_start.map(|start| start..self.last_char_offset); 1304 + 1305 + // Update opening fence span with formatted_range 1306 + if let (Some(idx), Some(fr)) = 1307 + (opening_span_idx, formatted_range.as_ref()) 1308 + { 1309 + if let Some(span) = self.current_para.syntax_spans.get_mut(idx) { 1310 + span.formatted_range = Some(fr.clone()); 1311 + } 1312 + } 1313 + 1314 + // Push closing fence span with formatted_range 1315 + self.current_para.syntax_spans.push(SyntaxSpanInfo { 1316 + syn_id, 1317 + char_range: char_start..char_end, 1318 + syntax_type: SyntaxType::Block, 1319 + formatted_range, 1320 + }); 1321 + } 1322 + } 1323 + } 1324 + 1325 + // Finalize code block paragraph 1326 + if let Some((byte_start, char_start)) = self.paragraphs.current_start.take() { 1327 + let byte_range = byte_start..self.last_byte_offset; 1328 + let char_range = char_start..self.last_char_offset; 1329 + self.finalize_paragraph(byte_range, char_range); 1330 + } 1331 + 1332 + Ok(()) 1333 + } 1334 + TagEnd::List(true) => { 1335 + self.paragraphs.list_depth = self.paragraphs.list_depth.saturating_sub(1); 1336 + // Capture paragraph boundary BEFORE writing closing HTML 1337 + let para_boundary = 1338 + self.paragraphs 1339 + .current_start 1340 + .take() 1341 + .map(|(byte_start, char_start)| { 1342 + ( 1343 + byte_start..self.last_byte_offset, 1344 + char_start..self.last_char_offset, 1345 + ) 1346 + }); 1347 + 1348 + self.write("</ol>")?; 1349 + self.close_wrapper()?; 1350 + 1351 + // Finalize paragraph after closing HTML 1352 + if let Some((byte_range, char_range)) = para_boundary { 1353 + self.finalize_paragraph(byte_range, char_range); 1354 + } 1355 + Ok(()) 1356 + } 1357 + TagEnd::List(false) => { 1358 + self.paragraphs.list_depth = self.paragraphs.list_depth.saturating_sub(1); 1359 + // Capture paragraph boundary BEFORE writing closing HTML 1360 + let para_boundary = 1361 + self.paragraphs 1362 + .current_start 1363 + .take() 1364 + .map(|(byte_start, char_start)| { 1365 + ( 1366 + byte_start..self.last_byte_offset, 1367 + char_start..self.last_char_offset, 1368 + ) 1369 + }); 1370 + 1371 + self.write("</ul>")?; 1372 + self.close_wrapper()?; 1373 + 1374 + // Finalize paragraph after closing HTML 1375 + if let Some((byte_range, char_range)) = para_boundary { 1376 + self.finalize_paragraph(byte_range, char_range); 1377 + } 1378 + Ok(()) 1379 + } 1380 + TagEnd::Item => { 1381 + self.end_node(); 1382 + self.write("</li>") 1383 + } 1384 + TagEnd::DefinitionList => { 1385 + self.write("</dl>")?; 1386 + self.close_wrapper() 1387 + } 1388 + TagEnd::DefinitionListTitle => { 1389 + self.end_node(); 1390 + self.write("</dt>") 1391 + } 1392 + TagEnd::DefinitionListDefinition => { 1393 + self.end_node(); 1394 + self.write("</dd>") 1395 + } 1396 + TagEnd::Emphasis => { 1397 + // Write closing tag FIRST, then emit closing syntax OUTSIDE the tag 1398 + self.write("</em>")?; 1399 + self.emit_gap_before(range.end)?; 1400 + self.current_para 1401 + .finalize_paired_format(self.last_char_offset); 1402 + Ok(()) 1403 + } 1404 + TagEnd::Superscript => self.write("</sup>"), 1405 + TagEnd::Subscript => self.write("</sub>"), 1406 + TagEnd::Strong => { 1407 + // Write closing tag FIRST, then emit closing syntax OUTSIDE the tag 1408 + self.write("</strong>")?; 1409 + self.emit_gap_before(range.end)?; 1410 + self.current_para 1411 + .finalize_paired_format(self.last_char_offset); 1412 + Ok(()) 1413 + } 1414 + TagEnd::Strikethrough => { 1415 + // Write closing tag FIRST, then emit closing syntax OUTSIDE the tag 1416 + self.write("</s>")?; 1417 + self.emit_gap_before(range.end)?; 1418 + self.current_para 1419 + .finalize_paired_format(self.last_char_offset); 1420 + Ok(()) 1421 + } 1422 + TagEnd::Link => { 1423 + self.write("</a>")?; 1424 + // Check if this is a wiki link (ends with ]]) vs regular link (ends with )) 1425 + let raw_text = &self.source[range.clone()]; 1426 + if raw_text.ends_with("]]") { 1427 + // WikiLink: emit ]] as closing syntax 1428 + let syn_id = self.gen_syn_id(); 1429 + let char_start = self.last_char_offset; 1430 + let char_end = char_start + 2; 1431 + 1432 + write!( 1433 + &mut self.writer, 1434 + "<span class=\"md-syntax-inline\" data-syn-id=\"{}\" data-char-start=\"{}\" data-char-end=\"{}\" spellcheck=\"false\">]]</span>", 1435 + syn_id, char_start, char_end 1436 + )?; 1437 + 1438 + self.current_para.syntax_spans.push(SyntaxSpanInfo { 1439 + syn_id, 1440 + char_range: char_start..char_end, 1441 + syntax_type: SyntaxType::Inline, 1442 + formatted_range: None, // Will be set by finalize 1443 + }); 1444 + 1445 + self.last_char_offset = char_end; 1446 + self.last_byte_offset = range.end; 1447 + } else { 1448 + self.emit_gap_before(range.end)?; 1449 + } 1450 + self.current_para 1451 + .finalize_paired_format(self.last_char_offset); 1452 + Ok(()) 1453 + } 1454 + TagEnd::Image => Ok(()), // No-op: raw_text() already consumed the End(Image) event 1455 + TagEnd::Embed => Ok(()), 1456 + TagEnd::WeaverBlock(_) => { 1457 + self.in_non_writing_block = false; 1458 + 1459 + // Emit the { content } as a hideable syntax span 1460 + if let Some(char_start) = self.weaver_block.char_start.take() { 1461 + // Build the full syntax text: { buffered_content } 1462 + let syntax_text = format!("{{{}}}", self.weaver_block.buffer); 1463 + let syntax_char_len = syntax_text.chars().count(); 1464 + let char_end = char_start + syntax_char_len; 1465 + 1466 + let syn_id = self.gen_syn_id(); 1467 + 1468 + write!( 1469 + &mut self.writer, 1470 + "<span class=\"md-syntax-block\" data-syn-id=\"{}\" data-char-start=\"{}\" data-char-end=\"{}\" spellcheck=\"false\">", 1471 + syn_id, char_start, char_end 1472 + )?; 1473 + escape_html(&mut self.writer, &syntax_text)?; 1474 + self.write("</span>")?; 1475 + 1476 + // Track the syntax span 1477 + self.current_para.syntax_spans.push(SyntaxSpanInfo { 1478 + syn_id, 1479 + char_range: char_start..char_end, 1480 + syntax_type: SyntaxType::Block, 1481 + formatted_range: None, 1482 + }); 1483 + 1484 + // Record offset mapping for the syntax span 1485 + self.record_mapping(range.clone(), char_start..char_end); 1486 + 1487 + // Update tracking 1488 + self.last_char_offset = char_end; 1489 + self.last_byte_offset = range.end; 1490 + } 1491 + 1492 + // Parse the buffered text for attrs and store for next block 1493 + if !self.weaver_block.buffer.is_empty() { 1494 + let parsed = Self::parse_weaver_attrs(&self.weaver_block.buffer); 1495 + self.weaver_block.buffer.clear(); 1496 + // Merge with any existing pending attrs or set new 1497 + if let Some(ref mut existing) = self.weaver_block.pending_attrs { 1498 + existing.classes.extend(parsed.classes); 1499 + existing.attrs.extend(parsed.attrs); 1500 + } else { 1501 + self.weaver_block.pending_attrs = Some(parsed); 1502 + } 1503 + } 1504 + 1505 + Ok(()) 1506 + } 1507 + TagEnd::FootnoteDefinition => { 1508 + // End node tracking (inner paragraphs may have already cleared it) 1509 + self.end_node(); 1510 + self.write("</div>")?; 1511 + 1512 + // Clear footnote tracking 1513 + self.footnotes.current_def.take(); 1514 + self.paragraphs.in_footnote_def = false; 1515 + 1516 + // Finalize paragraph boundary for incremental rendering 1517 + if let Some((byte_start, char_start)) = self.paragraphs.current_start.take() { 1518 + let byte_range = byte_start..self.last_byte_offset; 1519 + let char_range = char_start..self.last_char_offset; 1520 + self.finalize_paragraph(byte_range, char_range); 1521 + } 1522 + 1523 + Ok(()) 1524 + } 1525 + TagEnd::MetadataBlock(_) => { 1526 + self.in_non_writing_block = false; 1527 + Ok(()) 1528 + } 1529 + }; 1530 + 1531 + result?; 1532 + 1533 + // Note: Closing syntax for inline formatting tags (Strong, Emphasis, Strikethrough) 1534 + // is handled INSIDE their respective match arms above, AFTER writing the closing HTML. 1535 + // This ensures the closing syntax span appears OUTSIDE the formatted element. 1536 + // Other End events have their closing syntax emitted by emit_gap_before() in the main loop. 1537 + 1538 + Ok(()) 1539 + } 1540 + 1541 + /// Emit wrapper start if pending attributes require it. 1542 + fn emit_wrapper_start(&mut self) -> Result<(), fmt::Error> { 1543 + if let Some(attrs) = self.weaver_block.pending_attrs.take() { 1544 + let wrapper = if attrs.classes.iter().any(|c| c.as_ref() == "aside") { 1545 + super::WrapperElement::Aside 1546 + } else { 1547 + super::WrapperElement::Div 1548 + }; 1549 + 1550 + match wrapper { 1551 + super::WrapperElement::Aside => { 1552 + self.write("<aside")?; 1553 + } 1554 + super::WrapperElement::Div => { 1555 + self.write("<div")?; 1556 + } 1557 + } 1558 + 1559 + // Emit classes (excluding "aside" which is the wrapper itself) 1560 + let classes: Vec<_> = attrs 1561 + .classes 1562 + .iter() 1563 + .filter(|c| c.as_ref() != "aside") 1564 + .collect(); 1565 + if !classes.is_empty() { 1566 + self.write(" class=\"")?; 1567 + for (i, class) in classes.iter().enumerate() { 1568 + if i > 0 { 1569 + self.write(" ")?; 1570 + } 1571 + escape_html(&mut self.writer, class)?; 1572 + } 1573 + self.write("\"")?; 1574 + } 1575 + 1576 + // Emit other attributes 1577 + for (attr, value) in &attrs.attrs { 1578 + self.write(" ")?; 1579 + escape_html(&mut self.writer, attr)?; 1580 + self.write("=\"")?; 1581 + escape_html(&mut self.writer, value)?; 1582 + self.write("\"")?; 1583 + } 1584 + 1585 + self.write(">")?; 1586 + self.weaver_block.active_wrapper = Some(wrapper); 1587 + } 1588 + Ok(()) 1589 + } 1590 + 1591 + /// Close wrapper if one is active. 1592 + fn close_wrapper(&mut self) -> Result<(), fmt::Error> { 1593 + if let Some(wrapper) = self.weaver_block.active_wrapper.take() { 1594 + match wrapper { 1595 + super::WrapperElement::Aside => self.write("</aside>")?, 1596 + super::WrapperElement::Div => self.write("</div>")?, 1597 + } 1598 + } 1599 + Ok(()) 1600 + } 1601 + 1602 + /// Parse weaver block text into attributes. 1603 + fn parse_weaver_attrs(text: &str) -> markdown_weaver::WeaverAttributes<'static> { 1604 + let mut classes = Vec::new(); 1605 + let mut attrs = Vec::new(); 1606 + 1607 + for part in text.split_whitespace() { 1608 + if part.starts_with('.') { 1609 + // Class: .classname 1610 + classes.push(markdown_weaver::CowStr::from(part[1..].to_string())); 1611 + } else if part.starts_with('#') { 1612 + // ID: #idname -> id="idname" 1613 + attrs.push(( 1614 + markdown_weaver::CowStr::from("id".to_string()), 1615 + markdown_weaver::CowStr::from(part[1..].to_string()), 1616 + )); 1617 + } else if let Some((key, value)) = part.split_once('=') { 1618 + // Key=value attribute 1619 + let value = value.trim_matches('"').trim_matches('\''); 1620 + attrs.push(( 1621 + markdown_weaver::CowStr::from(key.to_string()), 1622 + markdown_weaver::CowStr::from(value.to_string()), 1623 + )); 1624 + } 1625 + } 1626 + 1627 + markdown_weaver::WeaverAttributes { classes, attrs } 1628 + } 1629 + }