bunch of bug fixes + refactor in editor renderer

Orual 6483830a b95a826f

+1034 -564
+81 -8
crates/weaver-app/src/components/editor/beforeinput.rs
··· 283 // === Insertion === 284 InputType::InsertText => { 285 if let Some(text) = ctx.data { 286 - let action = EditorAction::Insert { text, range }; 287 execute_action(doc, &action); 288 - BeforeInputResult::Handled 289 } else { 290 BeforeInputResult::PassThrough 291 } ··· 338 }; 339 } 340 341 - let action = EditorAction::DeleteBackward { range }; 342 - execute_action(doc, &action); 343 - BeforeInputResult::Handled 344 } 345 346 InputType::DeleteContentForward => { 347 - let action = EditorAction::DeleteForward { range }; 348 - execute_action(doc, &action); 349 - BeforeInputResult::Handled 350 } 351 352 InputType::DeleteWordBackward | InputType::DeleteEntireWordBackward => { ··· 486 let start_offset = static_range.startOffset() as usize; 487 let end_container = static_range.endContainer(); 488 let end_offset = static_range.endOffset() as usize; 489 490 let start = dom_position_to_text_offset( 491 &dom_document,
··· 283 // === Insertion === 284 InputType::InsertText => { 285 if let Some(text) = ctx.data { 286 + // Simple text insert - update model, let browser handle DOM 287 + // This mirrors the simple delete handling: we track in model, 288 + // browser handles visual update, DOM sync skips innerHTML for 289 + // cursor paragraph when syntax is unchanged 290 + let action = EditorAction::Insert { 291 + text: text.clone(), 292 + range, 293 + }; 294 execute_action(doc, &action); 295 + tracing::trace!( 296 + text_len = text.len(), 297 + range_start = range.start, 298 + range_end = range.end, 299 + cursor_after = doc.cursor.read().offset, 300 + "insertText: updated model, PassThrough to browser" 301 + ); 302 + BeforeInputResult::PassThrough 303 } else { 304 BeforeInputResult::PassThrough 305 } ··· 352 }; 353 } 354 355 + // Check if this delete requires special handling (newlines, zero-width chars) 356 + // If not, let browser handle DOM while we just track in model 357 + let needs_special_handling = if !range.is_caret() { 358 + // Selection delete - we handle to ensure consistency 359 + true 360 + } else if range.start == 0 { 361 + // At start of document, nothing to delete 362 + false 363 + } else { 364 + // Check what char we're deleting 365 + let prev_char = super::input::get_char_at(doc.loro_text(), range.start - 1); 366 + matches!(prev_char, Some('\n') | Some('\u{200C}') | Some('\u{200B}')) 367 + }; 368 + 369 + if needs_special_handling { 370 + // Complex delete - we handle everything, prevent browser default 371 + let action = EditorAction::DeleteBackward { range }; 372 + execute_action(doc, &action); 373 + BeforeInputResult::Handled 374 + } else { 375 + // Simple single-char delete - track in model, let browser handle DOM 376 + tracing::debug!( 377 + range_start = range.start, 378 + "deleteContentBackward: simple delete, will PassThrough to browser" 379 + ); 380 + if range.start > 0 { 381 + let _ = doc.remove_tracked(range.start - 1, 1); 382 + doc.cursor.write().offset = range.start - 1; 383 + doc.selection.set(None); 384 + } 385 + tracing::debug!("deleteContentBackward: after model update, returning PassThrough"); 386 + BeforeInputResult::PassThrough 387 + } 388 } 389 390 InputType::DeleteContentForward => { 391 + // Check if this delete requires special handling 392 + let needs_special_handling = if !range.is_caret() { 393 + true 394 + } else if range.start >= doc.len_chars() { 395 + false 396 + } else { 397 + let next_char = super::input::get_char_at(doc.loro_text(), range.start); 398 + matches!(next_char, Some('\n') | Some('\u{200C}') | Some('\u{200B}')) 399 + }; 400 + 401 + if needs_special_handling { 402 + let action = EditorAction::DeleteForward { range }; 403 + execute_action(doc, &action); 404 + BeforeInputResult::Handled 405 + } else { 406 + // Simple single-char delete - track in model, let browser handle DOM 407 + if range.start < doc.len_chars() { 408 + let _ = doc.remove_tracked(range.start, 1); 409 + doc.selection.set(None); 410 + } 411 + BeforeInputResult::PassThrough 412 + } 413 } 414 415 InputType::DeleteWordBackward | InputType::DeleteEntireWordBackward => { ··· 549 let start_offset = static_range.startOffset() as usize; 550 let end_container = static_range.endContainer(); 551 let end_offset = static_range.endOffset() as usize; 552 + 553 + // Log raw DOM position for debugging 554 + let start_node_name = start_container.node_name(); 555 + let start_text = start_container.text_content().unwrap_or_default(); 556 + tracing::trace!( 557 + start_node_name = %start_node_name, 558 + start_offset, 559 + start_text_preview = %start_text.chars().take(20).collect::<String>(), 560 + "get_target_range_from_event: raw DOM position" 561 + ); 562 563 let start = dom_position_to_text_offset( 564 &dom_document,
+34 -28
crates/weaver-app/src/components/editor/component.rs
··· 101 if let Some(did) = fetcher.current_did().await { 102 let ident = jacquard::types::ident::AtIdentifier::Did(did); 103 match fetcher.get_notebook(ident, title.clone()).await { 104 - Ok(Some(notebook_data)) => { 105 - Some(notebook_data.0.uri.to_smolstr()) 106 - } 107 Ok(None) | Err(_) => { 108 tracing::debug!("Could not resolve notebook '{}' to URI", title); 109 None ··· 472 resolved.embed_content.len() 473 ); 474 475 let (paras, new_cache, refs) = render::render_paragraphs_incremental( 476 doc_for_memo.loro_text(), 477 Some(&cache), 478 edit.as_ref(), 479 Some(&resolver), 480 entry_index_for_memo.as_ref(), ··· 677 update_paragraph_dom(editor_id, &prev, &new_paras, cursor_offset, false); 678 679 // Only restore cursor if we actually re-rendered the paragraph it's in 680 - if cursor_para_updated { 681 - use wasm_bindgen::JsCast; 682 - use wasm_bindgen::prelude::*; 683 684 - // Read and consume pending snap direction 685 - let snap_direction = doc_for_dom.pending_snap.write().take(); 686 687 - // Use requestAnimationFrame to wait for browser paint 688 - if let Some(window) = web_sys::window() { 689 - let closure = Closure::once(move || { 690 - if let Err(e) = super::cursor::restore_cursor_position( 691 - cursor_offset, 692 - &map, 693 - editor_id, 694 - snap_direction, 695 - ) { 696 - tracing::warn!("Cursor restoration failed: {:?}", e); 697 - } 698 - }); 699 700 - let _ = window.request_animation_frame(closure.as_ref().unchecked_ref()); 701 - closure.forget(); 702 - } 703 - } 704 705 // Store for next comparison AND for event handlers (write-only, no reactive read) 706 cached_paragraphs.set(new_paras.clone()); ··· 1314 && matches!(evt.key(), Key::Character(ref c) if c == "a"); 1315 1316 if navigation || select_all { 1317 let paras = cached_paragraphs(); 1318 if let Some(dir) = direction_hint { 1319 sync_cursor_from_dom_with_direction(&mut doc, editor_id, &paras, Some(dir)); ··· 1336 onselect: { 1337 let mut doc = document.clone(); 1338 move |_evt| { 1339 - tracing::trace!("onselect fired"); 1340 let paras = cached_paragraphs(); 1341 sync_cursor_from_dom(&mut doc, editor_id, &paras); 1342 let spans = syntax_spans(); ··· 1354 onselectstart: { 1355 let mut doc = document.clone(); 1356 move |_evt| { 1357 - tracing::trace!("onselectstart fired"); 1358 let paras = cached_paragraphs(); 1359 sync_cursor_from_dom(&mut doc, editor_id, &paras); 1360 let spans = syntax_spans(); ··· 1372 onselectionchange: { 1373 let mut doc = document.clone(); 1374 move |_evt| { 1375 - tracing::trace!("onselectionchange fired"); 1376 let paras = cached_paragraphs(); 1377 sync_cursor_from_dom(&mut doc, editor_id, &paras); 1378 let spans = syntax_spans(); ··· 1390 onclick: { 1391 let mut doc = document.clone(); 1392 move |evt| { 1393 - tracing::trace!("onclick fired"); 1394 let paras = cached_paragraphs(); 1395 1396 // Check if click target is a math-clickable element
··· 101 if let Some(did) = fetcher.current_did().await { 102 let ident = jacquard::types::ident::AtIdentifier::Did(did); 103 match fetcher.get_notebook(ident, title.clone()).await { 104 + Ok(Some(notebook_data)) => Some(notebook_data.0.uri.to_smolstr()), 105 Ok(None) | Err(_) => { 106 tracing::debug!("Could not resolve notebook '{}' to URI", title); 107 None ··· 470 resolved.embed_content.len() 471 ); 472 473 + let cursor_offset = doc_for_memo.cursor.read().offset; 474 let (paras, new_cache, refs) = render::render_paragraphs_incremental( 475 doc_for_memo.loro_text(), 476 Some(&cache), 477 + cursor_offset, 478 edit.as_ref(), 479 Some(&resolver), 480 entry_index_for_memo.as_ref(), ··· 677 update_paragraph_dom(editor_id, &prev, &new_paras, cursor_offset, false); 678 679 // Only restore cursor if we actually re-rendered the paragraph it's in 680 + // if cursor_para_updated { 681 + // use wasm_bindgen::JsCast; 682 + // use wasm_bindgen::prelude::*; 683 684 + // // Read and consume pending snap direction 685 + // let snap_direction = doc_for_dom.pending_snap.write().take(); 686 687 + // // Use requestAnimationFrame to wait for browser paint 688 + // if let Some(window) = web_sys::window() { 689 + // let closure = Closure::once(move || { 690 + // if let Err(e) = super::cursor::restore_cursor_position( 691 + // cursor_offset, 692 + // &map, 693 + // editor_id, 694 + // snap_direction, 695 + // ) { 696 + // tracing::warn!("Cursor restoration failed: {:?}", e); 697 + // } 698 + // }); 699 700 + // let _ = window.request_animation_frame(closure.as_ref().unchecked_ref()); 701 + // closure.forget(); 702 + // } 703 + // } 704 705 // Store for next comparison AND for event handlers (write-only, no reactive read) 706 cached_paragraphs.set(new_paras.clone()); ··· 1314 && matches!(evt.key(), Key::Character(ref c) if c == "a"); 1315 1316 if navigation || select_all { 1317 + tracing::debug!( 1318 + key = ?evt.key(), 1319 + navigation, 1320 + select_all, 1321 + "onkeyup navigation - syncing cursor from DOM" 1322 + ); 1323 let paras = cached_paragraphs(); 1324 if let Some(dir) = direction_hint { 1325 sync_cursor_from_dom_with_direction(&mut doc, editor_id, &paras, Some(dir)); ··· 1342 onselect: { 1343 let mut doc = document.clone(); 1344 move |_evt| { 1345 + tracing::debug!("onselect fired - syncing cursor from DOM"); 1346 let paras = cached_paragraphs(); 1347 sync_cursor_from_dom(&mut doc, editor_id, &paras); 1348 let spans = syntax_spans(); ··· 1360 onselectstart: { 1361 let mut doc = document.clone(); 1362 move |_evt| { 1363 + tracing::debug!("onselectstart fired - syncing cursor from DOM"); 1364 let paras = cached_paragraphs(); 1365 sync_cursor_from_dom(&mut doc, editor_id, &paras); 1366 let spans = syntax_spans(); ··· 1378 onselectionchange: { 1379 let mut doc = document.clone(); 1380 move |_evt| { 1381 + tracing::debug!("onselectionchange fired - syncing cursor from DOM"); 1382 let paras = cached_paragraphs(); 1383 sync_cursor_from_dom(&mut doc, editor_id, &paras); 1384 let spans = syntax_spans(); ··· 1396 onclick: { 1397 let mut doc = document.clone(); 1398 move |evt| { 1399 + tracing::debug!("onclick fired - syncing cursor from DOM"); 1400 let paras = cached_paragraphs(); 1401 1402 // Check if click target is a math-clickable element
+34 -2
crates/weaver-app/src/components/editor/document.rs
··· 844 /// Call this after OUR edits where we know the new cursor position. 845 pub fn sync_loro_cursor(&mut self) { 846 let offset = self.cursor.read().offset; 847 self.loro_cursor = self.content.get_cursor(offset, Side::default()); 848 } 849 ··· 853 pub fn sync_cursor_from_loro(&mut self) -> Option<usize> { 854 let loro_cursor = self.loro_cursor.as_ref()?; 855 let result = self.doc.get_cursor_pos(loro_cursor).ok()?; 856 let new_offset = result.current.pos.min(self.len_chars()); 857 self.cursor.with_mut(|c| c.offset = new_offset); 858 Some(new_offset) 859 } ··· 865 866 /// Set the Loro cursor (used when restoring from storage). 867 pub fn set_loro_cursor(&mut self, cursor: Option<Cursor>) { 868 self.loro_cursor = cursor; 869 // Sync cursor.offset from the restored Loro cursor 870 if self.loro_cursor.is_some() { ··· 997 /// Import updates from a PDS diff blob. 998 /// Used when loading edit history from the PDS. 999 pub fn import_updates(&mut self, updates: &[u8]) -> LoroResult<()> { 1000 self.doc.import(updates)?; 1001 - // Trigger re-render after importing remote changes 1002 - self.last_edit.set(None); 1003 Ok(()) 1004 } 1005
··· 844 /// Call this after OUR edits where we know the new cursor position. 845 pub fn sync_loro_cursor(&mut self) { 846 let offset = self.cursor.read().offset; 847 + tracing::debug!(offset, "sync_loro_cursor: saving cursor position to Loro"); 848 self.loro_cursor = self.content.get_cursor(offset, Side::default()); 849 } 850 ··· 854 pub fn sync_cursor_from_loro(&mut self) -> Option<usize> { 855 let loro_cursor = self.loro_cursor.as_ref()?; 856 let result = self.doc.get_cursor_pos(loro_cursor).ok()?; 857 + let old_offset = self.cursor.read().offset; 858 let new_offset = result.current.pos.min(self.len_chars()); 859 + let jump = if new_offset > old_offset { new_offset - old_offset } else { old_offset - new_offset }; 860 + if jump > 100 { 861 + tracing::warn!( 862 + old_offset, 863 + new_offset, 864 + jump, 865 + "sync_cursor_from_loro: LARGE CURSOR JUMP detected" 866 + ); 867 + } 868 + tracing::debug!(old_offset, new_offset, "sync_cursor_from_loro: updating cursor from Loro"); 869 self.cursor.with_mut(|c| c.offset = new_offset); 870 Some(new_offset) 871 } ··· 877 878 /// Set the Loro cursor (used when restoring from storage). 879 pub fn set_loro_cursor(&mut self, cursor: Option<Cursor>) { 880 + tracing::debug!(has_cursor = cursor.is_some(), "set_loro_cursor called"); 881 self.loro_cursor = cursor; 882 // Sync cursor.offset from the restored Loro cursor 883 if self.loro_cursor.is_some() { ··· 1010 /// Import updates from a PDS diff blob. 1011 /// Used when loading edit history from the PDS. 1012 pub fn import_updates(&mut self, updates: &[u8]) -> LoroResult<()> { 1013 + let len_before = self.content.len_unicode(); 1014 + let vv_before = self.doc.oplog_vv(); 1015 + 1016 self.doc.import(updates)?; 1017 + 1018 + let len_after = self.content.len_unicode(); 1019 + let vv_after = self.doc.oplog_vv(); 1020 + let vv_changed = vv_before != vv_after; 1021 + let len_changed = len_before != len_after; 1022 + 1023 + tracing::debug!( 1024 + len_before, 1025 + len_after, 1026 + len_changed, 1027 + vv_changed, 1028 + "import_updates: merge result" 1029 + ); 1030 + 1031 + // Only trigger re-render if something actually changed 1032 + if vv_changed { 1033 + self.last_edit.set(None); 1034 + } 1035 Ok(()) 1036 } 1037
+192 -41
crates/weaver-app/src/components/editor/dom_sync.rs
··· 5 6 use dioxus::prelude::*; 7 8 use super::document::{EditorDocument, Selection}; 9 use super::offset_map::{SnapDirection, find_nearest_valid_position, is_valid_cursor_position}; 10 use super::paragraph::ParagraphRender; ··· 90 91 match (anchor_rope, focus_rope) { 92 (Some(anchor), Some(focus)) => { 93 doc.cursor.write().offset = focus; 94 if anchor != focus { 95 doc.selection.set(Some(Selection { ··· 144 .or_else(|| element.get_attribute("data-node-id")); 145 146 if let Some(id) = id { 147 - if id.starts_with('n') && id[1..].parse::<usize>().is_ok() { 148 break Some(id); 149 } 150 } ··· 206 } 207 } 208 209 for para in paragraphs { 210 for mapping in &para.offset_map { 211 if mapping.node_id == node_id { 212 let mapping_start = mapping.char_offset_in_node; 213 let mapping_end = mapping.char_offset_in_node + mapping.utf16_len; 214 215 if utf16_offset_in_container >= mapping_start 216 && utf16_offset_in_container <= mapping_end 217 { ··· 267 ) { 268 } 269 270 - /// Update paragraph DOM elements incrementally. 271 /// 272 - /// Only modifies paragraphs that changed (by comparing source_hash). 273 - /// Browser preserves cursor naturally in unchanged paragraphs. 274 /// 275 /// Returns true if the paragraph containing the cursor was updated. 276 #[cfg(all(target_arch = "wasm32", target_os = "unknown"))] ··· 281 cursor_offset: usize, 282 force: bool, 283 ) -> bool { 284 use wasm_bindgen::JsCast; 285 286 let window = match web_sys::window() { ··· 298 None => return false, 299 }; 300 301 - // Find which paragraph contains cursor 302 - // Use end-inclusive matching: cursor at position N belongs to paragraph (0..N) 303 - // This handles typing at end of paragraph, which is the common case 304 - // The empty paragraph at document end catches any trailing cursor positions 305 - let cursor_para_idx = new_paragraphs 306 .iter() 307 - .position(|p| p.char_range.start <= cursor_offset && cursor_offset <= p.char_range.end); 308 309 - let mut cursor_para_updated = false; 310 311 - for (idx, new_para) in new_paragraphs.iter().enumerate() { 312 - let para_id = format!("para-{}", idx); 313 314 - if let Some(old_para) = old_paragraphs.get(idx) { 315 - if force || new_para.source_hash != old_para.source_hash { 316 - // Changed - clear and update innerHTML 317 - // We clear first to ensure any browser-added content (from IME composition, 318 - // contenteditable quirks, etc.) is fully removed before setting new content 319 - if let Some(elem) = document.get_element_by_id(&para_id) { 320 - if force && cursor_para_idx.is_some() { 321 - // skip re-rendering where the cursor is if we're forcing it 322 - // we don't want to fuck up what the user is doing 323 } else { 324 - elem.set_text_content(None); // Clear completely 325 - elem.set_inner_html(&new_para.html); 326 } 327 - } 328 329 - if !force { 330 - if Some(idx) == cursor_para_idx { 331 cursor_para_updated = true; 332 } 333 } 334 } 335 } else { 336 if let Ok(div) = document.create_element("div") { 337 - div.set_id(&para_id); 338 div.set_inner_html(&new_para.html); 339 - let _ = editor.append_child(&div); 340 } 341 342 - if Some(idx) == cursor_para_idx { 343 cursor_para_updated = true; 344 } 345 } 346 } 347 348 - // Remove extra paragraphs if document got shorter 349 - // Also mark cursor as needing restoration since structure changed 350 - if new_paragraphs.len() < old_paragraphs.len() { 351 - cursor_para_updated = true; 352 - } 353 - // TODO: i think this is the cause of a number of bits of cursor jank 354 - for idx in new_paragraphs.len()..old_paragraphs.len() { 355 - let para_id = format!("para-{}", idx); 356 - if let Some(elem) = document.get_element_by_id(&para_id) { 357 - let _ = elem.remove(); 358 - } 359 } 360 361 cursor_para_updated
··· 5 6 use dioxus::prelude::*; 7 8 + use super::cursor::restore_cursor_position; 9 use super::document::{EditorDocument, Selection}; 10 use super::offset_map::{SnapDirection, find_nearest_valid_position, is_valid_cursor_position}; 11 use super::paragraph::ParagraphRender; ··· 91 92 match (anchor_rope, focus_rope) { 93 (Some(anchor), Some(focus)) => { 94 + let old_offset = doc.cursor.read().offset; 95 + // Warn if cursor is jumping a large distance - likely a bug 96 + let jump = if focus > old_offset { focus - old_offset } else { old_offset - focus }; 97 + if jump > 100 { 98 + tracing::warn!( 99 + old_offset, 100 + new_offset = focus, 101 + jump, 102 + "sync_cursor_from_dom: LARGE CURSOR JUMP detected" 103 + ); 104 + } 105 doc.cursor.write().offset = focus; 106 if anchor != focus { 107 doc.selection.set(Some(Selection { ··· 156 .or_else(|| element.get_attribute("data-node-id")); 157 158 if let Some(id) = id { 159 + // Match both old-style "n0" and paragraph-prefixed "p-2-n0" node IDs 160 + let is_node_id = id.starts_with('n') || id.contains("-n"); 161 + if is_node_id { 162 break Some(id); 163 } 164 } ··· 220 } 221 } 222 223 + // Log what we're looking for 224 + tracing::trace!( 225 + node_id = %node_id, 226 + utf16_offset = utf16_offset_in_container, 227 + num_paragraphs = paragraphs.len(), 228 + "dom_position_to_text_offset: looking up mapping" 229 + ); 230 + 231 for para in paragraphs { 232 for mapping in &para.offset_map { 233 if mapping.node_id == node_id { 234 let mapping_start = mapping.char_offset_in_node; 235 let mapping_end = mapping.char_offset_in_node + mapping.utf16_len; 236 237 + tracing::trace!( 238 + mapping_node_id = %mapping.node_id, 239 + mapping_start, 240 + mapping_end, 241 + char_range_start = mapping.char_range.start, 242 + char_range_end = mapping.char_range.end, 243 + "dom_position_to_text_offset: found matching node_id" 244 + ); 245 + 246 if utf16_offset_in_container >= mapping_start 247 && utf16_offset_in_container <= mapping_end 248 { ··· 298 ) { 299 } 300 301 + /// Update paragraph DOM elements incrementally using pool-based surgical diffing. 302 /// 303 + /// Uses stable content-based paragraph IDs for efficient DOM reconciliation: 304 + /// - Unchanged paragraphs (same ID + hash) are not touched 305 + /// - Changed paragraphs (same ID, different hash) get innerHTML updated 306 + /// - New paragraphs get created and inserted at correct position 307 + /// - Removed paragraphs get deleted 308 /// 309 /// Returns true if the paragraph containing the cursor was updated. 310 #[cfg(all(target_arch = "wasm32", target_os = "unknown"))] ··· 315 cursor_offset: usize, 316 force: bool, 317 ) -> bool { 318 + use std::collections::HashMap; 319 use wasm_bindgen::JsCast; 320 321 let window = match web_sys::window() { ··· 333 None => return false, 334 }; 335 336 + let mut cursor_para_updated = false; 337 + 338 + // Build lookup for old paragraphs by ID (for syntax span comparison) 339 + let old_para_map: HashMap<&str, &ParagraphRender> = old_paragraphs 340 .iter() 341 + .map(|p| (p.id.as_str(), p)) 342 + .collect(); 343 + 344 + // Build pool of existing DOM elements by ID 345 + let mut old_elements: HashMap<String, web_sys::Element> = HashMap::new(); 346 + let mut child_opt = editor.first_element_child(); 347 + while let Some(child) = child_opt { 348 + if let Some(id) = child.get_attribute("id") { 349 + let next = child.next_element_sibling(); 350 + old_elements.insert(id, child); 351 + child_opt = next; 352 + } else { 353 + child_opt = child.next_element_sibling(); 354 + } 355 + } 356 + 357 + // Track position for insertBefore - starts at first element child 358 + // (use first_element_child to skip any stray text nodes) 359 + let mut cursor_node: Option<web_sys::Node> = 360 + editor.first_element_child().map(|e| e.into()); 361 + 362 + // Single pass through new paragraphs 363 + for new_para in new_paragraphs.iter() { 364 + let para_id = &new_para.id; 365 + let new_hash = format!("{:x}", new_para.source_hash); 366 + let is_cursor_para = 367 + new_para.char_range.start <= cursor_offset && cursor_offset <= new_para.char_range.end; 368 369 + if let Some(existing_elem) = old_elements.remove(para_id) { 370 + // Element exists - check if it needs updating 371 + let old_hash = existing_elem.get_attribute("data-hash").unwrap_or_default(); 372 + let needs_update = force || old_hash != new_hash; 373 374 + // Check if element is at correct position (compare as nodes) 375 + let existing_as_node: &web_sys::Node = existing_elem.as_ref(); 376 + let at_correct_position = cursor_node 377 + .as_ref() 378 + .map(|c| c == existing_as_node) 379 + .unwrap_or(false); 380 + 381 + if !at_correct_position { 382 + tracing::warn!( 383 + para_id, 384 + is_cursor_para, 385 + "update_paragraph_dom: element not at correct position, moving" 386 + ); 387 + let _ = editor.insert_before(existing_as_node, cursor_node.as_ref()); 388 + if is_cursor_para { 389 + cursor_para_updated = true; 390 + } 391 + } else { 392 + // Use next_element_sibling to skip any stray text nodes 393 + cursor_node = existing_elem.next_element_sibling().map(|e| e.into()); 394 + } 395 + 396 + if needs_update { 397 + // TESTING: Force innerHTML update to measure timing cost 398 + // TODO: Remove this flag after benchmarking 399 + const FORCE_INNERHTML_UPDATE: bool = true; 400 + 401 + // For cursor paragraph: only update if syntax/formatting changed 402 + // This prevents destroying browser selection during fast typing 403 + // 404 + // HOWEVER: we must verify browser actually updated the DOM. 405 + // PassThrough assumes browser handles edit, but sometimes it doesn't. 406 + let should_skip_cursor_update = !FORCE_INNERHTML_UPDATE && is_cursor_para && !force && { 407 + let old_para = old_para_map.get(para_id.as_str()); 408 + let syntax_unchanged = old_para 409 + .map(|old| old.syntax_spans == new_para.syntax_spans) 410 + .unwrap_or(false); 411 412 + // Verify DOM content length matches expected - if not, browser didn't handle it 413 + // NOTE: Get inner element (the <p>) not outer div, to avoid counting 414 + // the newline from </p>\n in the HTML 415 + let dom_matches_expected = if syntax_unchanged { 416 + let inner_elem = existing_elem.first_element_child(); 417 + let dom_text = inner_elem 418 + .as_ref() 419 + .and_then(|e| e.text_content()) 420 + .unwrap_or_default(); 421 + let expected_len = new_para.byte_range.end - new_para.byte_range.start; 422 + let dom_len = dom_text.len(); 423 + let matches = dom_len == expected_len; 424 + // Always log for debugging 425 + tracing::debug!( 426 + para_id = %para_id, 427 + dom_len, 428 + expected_len, 429 + matches, 430 + dom_text = %dom_text, 431 + "DOM sync check" 432 + ); 433 + matches 434 } else { 435 + false 436 + }; 437 + 438 + syntax_unchanged && dom_matches_expected 439 + }; 440 + 441 + if should_skip_cursor_update { 442 + tracing::trace!( 443 + para_id, 444 + "update_paragraph_dom: skipping cursor para innerHTML (syntax unchanged, DOM verified)" 445 + ); 446 + // Update hash - browser native editing has the correct content 447 + let _ = existing_elem.set_attribute("data-hash", &new_hash); 448 + } else { 449 + // Timing instrumentation for innerHTML update cost 450 + let start = web_sys::window() 451 + .and_then(|w| w.performance()) 452 + .map(|p| p.now()); 453 + 454 + existing_elem.set_inner_html(&new_para.html); 455 + let _ = existing_elem.set_attribute("data-hash", &new_hash); 456 + 457 + if let Some(start_time) = start { 458 + if let Some(end_time) = web_sys::window() 459 + .and_then(|w| w.performance()) 460 + .map(|p| p.now()) 461 + { 462 + let elapsed_ms = end_time - start_time; 463 + tracing::debug!( 464 + para_id, 465 + is_cursor_para, 466 + elapsed_ms, 467 + html_len = new_para.html.len(), 468 + old_hash = %old_hash, 469 + new_hash = %new_hash, 470 + "update_paragraph_dom: innerHTML update timing" 471 + ); 472 + } 473 } 474 475 + if is_cursor_para { 476 + // Restore cursor synchronously - don't wait for rAF 477 + // This prevents race conditions with fast typing 478 + if let Err(e) = restore_cursor_position( 479 + cursor_offset, 480 + &new_para.offset_map, 481 + editor_id, 482 + None, 483 + ) { 484 + tracing::warn!("Synchronous cursor restore failed: {:?}", e); 485 + } 486 cursor_para_updated = true; 487 } 488 } 489 } 490 } else { 491 + // New element - create and insert at current position 492 if let Ok(div) = document.create_element("div") { 493 + div.set_id(para_id); 494 div.set_inner_html(&new_para.html); 495 + let _ = div.set_attribute("data-hash", &new_hash); 496 + let div_node: &web_sys::Node = div.as_ref(); 497 + let _ = editor.insert_before(div_node, cursor_node.as_ref()); 498 } 499 500 + if is_cursor_para { 501 cursor_para_updated = true; 502 } 503 } 504 } 505 506 + // Remove stale elements (still in pool = not in new paragraphs) 507 + for (_, elem) in old_elements { 508 + let _ = elem.remove(); 509 + cursor_para_updated = true; // Structure changed, cursor may need restoration 510 } 511 512 cursor_para_updated
+9
crates/weaver-app/src/components/editor/paragraph.rs
··· 11 /// A rendered paragraph with its source range and offset mappings. 12 #[derive(Debug, Clone, PartialEq)] 13 pub struct ParagraphRender { 14 /// Source byte range in the rope 15 pub byte_range: Range<usize>, 16 ··· 38 let mut hasher = DefaultHasher::new(); 39 text.hash(&mut hasher); 40 hasher.finish() 41 } 42 43 /// Extract substring from LoroText as String
··· 11 /// A rendered paragraph with its source range and offset mappings. 12 #[derive(Debug, Clone, PartialEq)] 13 pub struct ParagraphRender { 14 + /// Stable content-based ID for DOM diffing (format: `p-{hash_prefix}-{collision_idx}`) 15 + pub id: String, 16 + 17 /// Source byte range in the rope 18 pub byte_range: Range<usize>, 19 ··· 41 let mut hasher = DefaultHasher::new(); 42 text.hash(&mut hasher); 43 hasher.finish() 44 + } 45 + 46 + /// Generate a paragraph ID from monotonic counter. 47 + /// IDs are stable across content changes - only position/cursor determines identity. 48 + pub fn make_paragraph_id(id: usize) -> String { 49 + format!("p-{}", id) 50 } 51 52 /// Extract substring from LoroText as String
+395 -232
crates/weaver-app/src/components/editor/render.rs
··· 6 7 use super::document::EditInfo; 8 use super::offset_map::{OffsetMapping, RenderResult}; 9 - use super::paragraph::{ParagraphRender, hash_source, text_slice_to_string}; 10 use super::writer::{EditorImageResolver, EditorWriter, ImageResolver, SyntaxSpanInfo}; 11 use loro::LoroText; 12 use markdown_weaver::Parser; 13 use std::ops::Range; 14 use weaver_common::{EntryIndex, ResolvedContent}; 15 ··· 23 pub next_node_id: usize, 24 /// Next available syntax span ID for fresh renders 25 pub next_syn_id: usize, 26 } 27 28 /// A cached paragraph render that can be reused if source hasn't changed. 29 #[derive(Clone, Debug)] 30 pub struct CachedParagraph { 31 /// Hash of paragraph source text for change detection 32 pub source_hash: u64, 33 /// Byte range in source document ··· 72 } 73 } 74 75 /// Render markdown with incremental caching. 76 /// 77 /// Uses cached paragraph renders when possible, only re-rendering changed paragraphs. 78 - /// For "safe" edits (no boundary changes), skips boundary rediscovery entirely. 79 /// 80 /// # Parameters 81 /// - `entry_index`: Optional index for wikilink validation (adds link-valid/link-broken classes) 82 /// - `resolved_content`: Pre-resolved embed content for sync rendering 83 /// ··· 86 pub fn render_paragraphs_incremental( 87 text: &LoroText, 88 cache: Option<&RenderCache>, 89 edit: Option<&EditInfo>, 90 image_resolver: Option<&EditorImageResolver>, 91 entry_index: Option<&EntryIndex>, ··· 102 if source.is_empty() { 103 let empty_node_id = "n0".to_string(); 104 let empty_html = format!(r#"<span id="{}">{}</span>"#, empty_node_id, '\u{200B}'); 105 106 let para = ParagraphRender { 107 byte_range: 0..0, 108 char_range: 0..0, 109 html: empty_html.clone(), ··· 114 115 let new_cache = RenderCache { 116 paragraphs: vec![CachedParagraph { 117 source_hash: 0, 118 byte_range: 0..0, 119 char_range: 0..0, ··· 124 }], 125 next_node_id: 1, 126 next_syn_id: 0, 127 }; 128 129 return (vec![para], new_cache, vec![]); ··· 132 // Determine if we can use fast path (skip boundary discovery) 133 // Need cache and non-boundary-affecting edit info (for edit position) 134 let current_len = text.len_unicode(); 135 136 let use_fast_path = cache.is_some() && edit.is_some() && !is_boundary_affecting(edit.unwrap()); 137 ··· 156 157 // Compute delta from actual length difference, not edit info 158 // This handles stale edits gracefully (delta = 0 if lengths match) 159 - let cached_len = cache 160 .paragraphs 161 .last() 162 - .map(|p| p.char_range.end) 163 - .unwrap_or(0); 164 let char_delta = current_len as isize - cached_len as isize; 165 166 // Adjust each cached paragraph's range 167 cache ··· 173 (p.byte_range.clone(), p.char_range.clone()) 174 } else if p.char_range.start > edit_pos { 175 // After edit - shift by delta (edit is strictly before this paragraph) 176 - // Calculate byte delta (approximation: assume 1 byte per char for ASCII) 177 - // This is imprecise but boundaries are rediscovered on slow path anyway 178 - let byte_delta = char_delta; // TODO: proper byte calculation 179 ( 180 apply_delta(p.byte_range.start, byte_delta) 181 ..apply_delta(p.byte_range.end, byte_delta), ··· 185 } else { 186 // Edit is at or within this paragraph - expand its end 187 ( 188 - p.byte_range.start..apply_delta(p.byte_range.end, char_delta), 189 p.char_range.start..apply_delta(p.char_range.end, char_delta), 190 ) 191 } ··· 196 }; 197 198 // Validate fast path results - if any ranges are invalid, use slow path 199 - let paragraph_ranges = if !paragraph_ranges.is_empty() { 200 let all_valid = paragraph_ranges 201 .iter() 202 .all(|(_, char_range)| char_range.start <= char_range.end); 203 - if all_valid { 204 - paragraph_ranges 205 - } else { 206 tracing::debug!( 207 target: "weaver::render", 208 "fast path produced invalid ranges, falling back to slow path" 209 ); 210 - vec![] // Trigger slow path 211 } 212 } else { 213 - paragraph_ranges 214 }; 215 216 - // Slow path: run boundary-only pass to discover paragraph boundaries 217 - let paragraph_ranges = if paragraph_ranges.is_empty() { 218 - let boundary_start = crate::perf::now(); 219 - let parser = 220 - Parser::new_ext(&source, weaver_renderer::default_md_options()).into_offset_iter(); 221 - let mut scratch_output = String::new(); 222 223 - let result = match EditorWriter::<_, _, ()>::new_boundary_only( 224 - &source, 225 - text, 226 - parser, 227 - &mut scratch_output, 228 - ) 229 - .run() 230 - { 231 - Ok(result) => result.paragraph_ranges, 232 - Err(_) => return (Vec::new(), RenderCache::default(), vec![]), 233 }; 234 - let boundary_ms = crate::perf::now() - boundary_start; 235 - tracing::debug!(boundary_ms, paragraphs = result.len(), "boundary discovery (slow path)"); 236 - result 237 - } else { 238 - paragraph_ranges 239 }; 240 241 // Log discovered paragraphs 242 for (i, (byte_range, char_range)) in paragraph_ranges.iter().enumerate() { ··· 254 ); 255 } 256 257 - // Render paragraphs, reusing cache where possible 258 - let render_loop_start = crate::perf::now(); 259 let mut paragraphs = Vec::with_capacity(paragraph_ranges.len()); 260 let mut new_cached = Vec::with_capacity(paragraph_ranges.len()); 261 let mut all_refs: Vec<weaver_common::ExtractedRef> = Vec::new(); 262 - let mut node_id_offset = cache.map(|c| c.next_node_id).unwrap_or(0); 263 - let mut syn_id_offset = cache.map(|c| c.next_syn_id).unwrap_or(0); 264 - let mut cache_hits = 0usize; 265 - let mut cache_misses = 0usize; 266 - let mut fresh_render_ms = 0.0f64; 267 268 for (idx, (byte_range, char_range)) in paragraph_ranges.iter().enumerate() { 269 let para_source = text_slice_to_string(text, char_range.clone()); 270 let source_hash = hash_source(&para_source); 271 272 - // Check if we have a cached render with matching hash 273 - let cached_match = 274 - cache.and_then(|c| c.paragraphs.iter().find(|p| p.source_hash == source_hash)); 275 276 - let (html, offset_map, syntax_spans, para_refs) = if let Some(cached) = cached_match { 277 - cache_hits += 1; 278 - // Reuse cached HTML, offset map, and syntax spans (adjusted for position) 279 - let char_delta = char_range.start as isize - cached.char_range.start as isize; 280 - let byte_delta = byte_range.start as isize - cached.byte_range.start as isize; 281 - 282 - let mut adjusted_map = cached.offset_map.clone(); 283 - for mapping in &mut adjusted_map { 284 - mapping.char_range.start = 285 - (mapping.char_range.start as isize + char_delta) as usize; 286 - mapping.char_range.end = (mapping.char_range.end as isize + char_delta) as usize; 287 - mapping.byte_range.start = 288 - (mapping.byte_range.start as isize + byte_delta) as usize; 289 - mapping.byte_range.end = (mapping.byte_range.end as isize + byte_delta) as usize; 290 } 291 - 292 - let mut adjusted_syntax = cached.syntax_spans.clone(); 293 - for span in &mut adjusted_syntax { 294 - span.adjust_positions(char_delta); 295 - } 296 - 297 - // Include cached refs in all_refs 298 - all_refs.extend(cached.collected_refs.clone()); 299 - 300 - ( 301 - cached.html.clone(), 302 - adjusted_map, 303 - adjusted_syntax, 304 - cached.collected_refs.clone(), 305 - ) 306 } else { 307 - cache_misses += 1; 308 - let para_render_start = crate::perf::now(); 309 - // Fresh render needed - create detached LoroDoc for this paragraph 310 - let para_doc = loro::LoroDoc::new(); 311 - let para_text = para_doc.get_text("content"); 312 - let _ = para_text.insert(0, &para_source); 313 314 - let parser = Parser::new_ext(&para_source, weaver_renderer::default_md_options()) 315 - .into_offset_iter(); 316 - let mut output = String::new(); 317 318 - // Use provided resolver or empty default 319 - let resolver = image_resolver.cloned().unwrap_or_default(); 320 - 321 - // Build writer with optional entry index for wikilink validation 322 - // Pass paragraph's document-level offsets so all embedded char/byte positions are absolute 323 - let mut writer = 324 - EditorWriter::<_, _, &ResolvedContent, &EditorImageResolver>::new_with_all_offsets( 325 - &para_source, 326 - &para_text, 327 - parser, 328 - &mut output, 329 - node_id_offset, 330 - syn_id_offset, 331 - char_range.start, 332 - byte_range.start, 333 - ) 334 - .with_image_resolver(&resolver) 335 - .with_embed_provider(resolved_content); 336 - 337 - if let Some(idx) = entry_index { 338 - writer = writer.with_entry_index(idx); 339 - } 340 - 341 - let (mut offset_map, mut syntax_spans, para_refs) = match writer.run() { 342 - Ok(result) => { 343 - // Update node ID offset 344 - let max_node_id = result 345 - .offset_maps 346 - .iter() 347 - .filter_map(|m| { 348 - m.node_id 349 - .strip_prefix("n") 350 - .and_then(|s| s.parse::<usize>().ok()) 351 - }) 352 - .max() 353 - .unwrap_or(node_id_offset); 354 - node_id_offset = max_node_id + 1; 355 - 356 - // Update syn ID offset 357 - let max_syn_id = result 358 - .syntax_spans 359 - .iter() 360 - .filter_map(|s| { 361 - s.syn_id 362 - .strip_prefix("s") 363 - .and_then(|id| id.parse::<usize>().ok()) 364 - }) 365 - .max() 366 - .unwrap_or(syn_id_offset.saturating_sub(1)); 367 - syn_id_offset = max_syn_id + 1; 368 - 369 - // Collect refs from this paragraph 370 - let para_refs = result.collected_refs; 371 - all_refs.extend(para_refs.clone()); 372 - 373 - (result.offset_maps, result.syntax_spans, para_refs) 374 - } 375 - Err(_) => (Vec::new(), Vec::new(), Vec::new()), 376 - }; 377 - 378 - // Offsets are already document-absolute since we pass char_range.start/byte_range.start 379 - // to the writer constructor 380 - fresh_render_ms += crate::perf::now() - para_render_start; 381 - (output, offset_map, syntax_spans, para_refs) 382 - }; 383 384 // Store in cache 385 new_cached.push(CachedParagraph { 386 source_hash, 387 byte_range: byte_range.clone(), 388 char_range: char_range.clone(), 389 html: html.clone(), 390 offset_map: offset_map.clone(), 391 syntax_spans: syntax_spans.clone(), 392 - collected_refs: para_refs, 393 }); 394 395 paragraphs.push(ParagraphRender { 396 byte_range: byte_range.clone(), 397 char_range: char_range.clone(), 398 html, ··· 402 }); 403 } 404 405 - // Insert gap paragraphs for EXTRA whitespace between blocks. 406 - // Standard paragraph break is 2 newlines (\n\n) - no gap needed for that. 407 - // Gaps are only for whitespace BEYOND the minimum, giving cursor a landing spot. 408 - const MIN_PARAGRAPH_BREAK_INCR: usize = 2; // \n\n 409 - 410 - let mut paragraphs_with_gaps = Vec::with_capacity(paragraphs.len() * 2); 411 - let mut prev_end_char = 0usize; 412 - let mut prev_end_byte = 0usize; 413 - 414 - for para in paragraphs { 415 - // Check for gap before this paragraph - only if MORE than minimum break 416 - let gap_size = para.char_range.start.saturating_sub(prev_end_char); 417 - if gap_size > MIN_PARAGRAPH_BREAK_INCR { 418 - // Visible gap element covers EXTRA whitespace beyond minimum break 419 - let gap_start_char = prev_end_char + MIN_PARAGRAPH_BREAK_INCR; 420 - let gap_end_char = para.char_range.start; 421 - let gap_start_byte = prev_end_byte + MIN_PARAGRAPH_BREAK_INCR; 422 - let gap_end_byte = para.byte_range.start; 423 - 424 - // Position-based ID: deterministic, stable across cache states 425 - let gap_node_id = format!("gap-{}-{}", gap_start_char, gap_end_char); 426 - let gap_html = format!(r#"<span id="{}">{}</span>"#, gap_node_id, '\u{200B}'); 427 - 428 - // Gap paragraph covers ALL whitespace (like trailing gaps do) 429 - // so cursor anywhere in the inter-paragraph zone triggers restoration 430 - paragraphs_with_gaps.push(ParagraphRender { 431 - byte_range: prev_end_byte..gap_end_byte, 432 - char_range: prev_end_char..gap_end_char, 433 - html: gap_html, 434 - offset_map: vec![OffsetMapping { 435 - byte_range: prev_end_byte..gap_end_byte, 436 - char_range: prev_end_char..gap_end_char, 437 - node_id: gap_node_id, 438 - char_offset_in_node: 0, 439 - child_index: None, 440 - utf16_len: 1, 441 - }], 442 - syntax_spans: vec![], 443 - source_hash: hash_source(&text_slice_to_string(text, gap_start_char..gap_end_char)), 444 - }); 445 - } 446 - 447 - prev_end_char = para.char_range.end; 448 - prev_end_byte = para.byte_range.end; 449 - paragraphs_with_gaps.push(para); 450 - } 451 - 452 - // Add trailing gap if needed 453 - let has_trailing_newlines = source.ends_with("\n\n") || source.ends_with("\n"); 454 - if has_trailing_newlines { 455 - let doc_end_char = text.len_unicode(); 456 - let doc_end_byte = text.len_utf8(); 457 - 458 - if doc_end_char > prev_end_char { 459 - // Position-based ID for trailing gap 460 - let trailing_node_id = format!("gap-{}-{}", prev_end_char, doc_end_char); 461 - let trailing_html = format!(r#"<span id="{}">{}</span>"#, trailing_node_id, '\u{200B}'); 462 463 - paragraphs_with_gaps.push(ParagraphRender { 464 - byte_range: prev_end_byte..doc_end_byte, 465 - char_range: prev_end_char..doc_end_char, 466 - html: trailing_html, 467 - offset_map: vec![OffsetMapping { 468 - byte_range: prev_end_byte..doc_end_byte, 469 - char_range: prev_end_char..doc_end_char, 470 - node_id: trailing_node_id, 471 - char_offset_in_node: 0, 472 - child_index: None, 473 - utf16_len: 1, 474 - }], 475 - syntax_spans: vec![], 476 - source_hash: 0, 477 - }); 478 - } 479 - } 480 481 let new_cache = RenderCache { 482 paragraphs: new_cached, 483 - next_node_id: node_id_offset, 484 - next_syn_id: syn_id_offset, 485 }; 486 487 - let render_loop_ms = crate::perf::now() - render_loop_start; 488 let total_ms = crate::perf::now() - fn_start; 489 tracing::debug!( 490 total_ms, 491 - render_loop_ms, 492 - fresh_render_ms, 493 - cache_hits, 494 - cache_misses, 495 paragraphs = paragraphs_with_gaps.len(), 496 - use_fast_path, 497 "render_paragraphs_incremental timing" 498 ); 499
··· 6 7 use super::document::EditInfo; 8 use super::offset_map::{OffsetMapping, RenderResult}; 9 + use super::paragraph::{ParagraphRender, hash_source, make_paragraph_id, text_slice_to_string}; 10 use super::writer::{EditorImageResolver, EditorWriter, ImageResolver, SyntaxSpanInfo}; 11 use loro::LoroText; 12 use markdown_weaver::Parser; 13 + use std::collections::HashMap; 14 use std::ops::Range; 15 use weaver_common::{EntryIndex, ResolvedContent}; 16 ··· 24 pub next_node_id: usize, 25 /// Next available syntax span ID for fresh renders 26 pub next_syn_id: usize, 27 + /// Next available paragraph ID (monotonic counter) 28 + pub next_para_id: usize, 29 } 30 31 /// A cached paragraph render that can be reused if source hasn't changed. 32 #[derive(Clone, Debug)] 33 pub struct CachedParagraph { 34 + /// Stable monotonic ID for DOM element identity 35 + pub id: String, 36 /// Hash of paragraph source text for change detection 37 pub source_hash: u64, 38 /// Byte range in source document ··· 77 } 78 } 79 80 + /// Insert gap paragraphs for extra whitespace between blocks. 81 + fn add_gap_paragraphs( 82 + paragraphs: Vec<ParagraphRender>, 83 + text: &LoroText, 84 + source: &str, 85 + ) -> Vec<ParagraphRender> { 86 + const MIN_PARAGRAPH_BREAK_INCR: usize = 2; // \n\n 87 + 88 + let mut paragraphs_with_gaps = Vec::with_capacity(paragraphs.len() * 2); 89 + let mut prev_end_char = 0usize; 90 + let mut prev_end_byte = 0usize; 91 + 92 + for para in paragraphs { 93 + let gap_size = para.char_range.start.saturating_sub(prev_end_char); 94 + if gap_size > MIN_PARAGRAPH_BREAK_INCR { 95 + let gap_start_char = prev_end_char + MIN_PARAGRAPH_BREAK_INCR; 96 + let gap_end_char = para.char_range.start; 97 + let gap_start_byte = prev_end_byte + MIN_PARAGRAPH_BREAK_INCR; 98 + let gap_end_byte = para.byte_range.start; 99 + 100 + let gap_node_id = format!("gap-{}-{}", gap_start_char, gap_end_char); 101 + let gap_html = format!(r#"<span id="{}">{}</span>"#, gap_node_id, '\u{200B}'); 102 + 103 + paragraphs_with_gaps.push(ParagraphRender { 104 + id: gap_node_id.clone(), 105 + byte_range: prev_end_byte..gap_end_byte, 106 + char_range: prev_end_char..gap_end_char, 107 + html: gap_html, 108 + offset_map: vec![OffsetMapping { 109 + byte_range: prev_end_byte..gap_end_byte, 110 + char_range: prev_end_char..gap_end_char, 111 + node_id: gap_node_id, 112 + char_offset_in_node: 0, 113 + child_index: None, 114 + utf16_len: 1, 115 + }], 116 + syntax_spans: vec![], 117 + source_hash: hash_source(&text_slice_to_string(text, gap_start_char..gap_end_char)), 118 + }); 119 + } 120 + 121 + prev_end_char = para.char_range.end; 122 + prev_end_byte = para.byte_range.end; 123 + paragraphs_with_gaps.push(para); 124 + } 125 + 126 + // Add trailing gap if needed 127 + let has_trailing_newlines = source.ends_with("\n\n") || source.ends_with("\n"); 128 + if has_trailing_newlines { 129 + let doc_end_char = text.len_unicode(); 130 + let doc_end_byte = text.len_utf8(); 131 + 132 + if doc_end_char > prev_end_char { 133 + let trailing_node_id = format!("gap-{}-{}", prev_end_char, doc_end_char); 134 + let trailing_html = format!(r#"<span id="{}">{}</span>"#, trailing_node_id, '\u{200B}'); 135 + 136 + paragraphs_with_gaps.push(ParagraphRender { 137 + id: trailing_node_id.clone(), 138 + byte_range: prev_end_byte..doc_end_byte, 139 + char_range: prev_end_char..doc_end_char, 140 + html: trailing_html, 141 + offset_map: vec![OffsetMapping { 142 + byte_range: prev_end_byte..doc_end_byte, 143 + char_range: prev_end_char..doc_end_char, 144 + node_id: trailing_node_id, 145 + char_offset_in_node: 0, 146 + child_index: None, 147 + utf16_len: 1, 148 + }], 149 + syntax_spans: vec![], 150 + source_hash: 0, 151 + }); 152 + } 153 + } 154 + 155 + paragraphs_with_gaps 156 + } 157 + 158 /// Render markdown with incremental caching. 159 /// 160 /// Uses cached paragraph renders when possible, only re-rendering changed paragraphs. 161 /// 162 /// # Parameters 163 + /// - `cursor_offset`: Current cursor position (for finding which NEW paragraph is the cursor para) 164 + /// - `edit`: Edit info for stable ID assignment. Uses `edit_char_pos` to find which OLD cached 165 + /// paragraph to reuse the ID from (since cursor may have moved after the edit). 166 /// - `entry_index`: Optional index for wikilink validation (adds link-valid/link-broken classes) 167 /// - `resolved_content`: Pre-resolved embed content for sync rendering 168 /// ··· 171 pub fn render_paragraphs_incremental( 172 text: &LoroText, 173 cache: Option<&RenderCache>, 174 + cursor_offset: usize, 175 edit: Option<&EditInfo>, 176 image_resolver: Option<&EditorImageResolver>, 177 entry_index: Option<&EntryIndex>, ··· 188 if source.is_empty() { 189 let empty_node_id = "n0".to_string(); 190 let empty_html = format!(r#"<span id="{}">{}</span>"#, empty_node_id, '\u{200B}'); 191 + let para_id = make_paragraph_id(0); 192 193 let para = ParagraphRender { 194 + id: para_id.clone(), 195 byte_range: 0..0, 196 char_range: 0..0, 197 html: empty_html.clone(), ··· 202 203 let new_cache = RenderCache { 204 paragraphs: vec![CachedParagraph { 205 + id: para_id, 206 source_hash: 0, 207 byte_range: 0..0, 208 char_range: 0..0, ··· 213 }], 214 next_node_id: 1, 215 next_syn_id: 0, 216 + next_para_id: 1, 217 }; 218 219 return (vec![para], new_cache, vec![]); ··· 222 // Determine if we can use fast path (skip boundary discovery) 223 // Need cache and non-boundary-affecting edit info (for edit position) 224 let current_len = text.len_unicode(); 225 + let current_byte_len = text.len_utf8(); 226 227 let use_fast_path = cache.is_some() && edit.is_some() && !is_boundary_affecting(edit.unwrap()); 228 ··· 247 248 // Compute delta from actual length difference, not edit info 249 // This handles stale edits gracefully (delta = 0 if lengths match) 250 + let (cached_len, cached_byte_len) = cache 251 .paragraphs 252 .last() 253 + .map(|p| (p.char_range.end, p.byte_range.end)) 254 + .unwrap_or((0, 0)); 255 let char_delta = current_len as isize - cached_len as isize; 256 + let byte_delta = current_byte_len as isize - cached_byte_len as isize; 257 258 // Adjust each cached paragraph's range 259 cache ··· 265 (p.byte_range.clone(), p.char_range.clone()) 266 } else if p.char_range.start > edit_pos { 267 // After edit - shift by delta (edit is strictly before this paragraph) 268 ( 269 apply_delta(p.byte_range.start, byte_delta) 270 ..apply_delta(p.byte_range.end, byte_delta), ··· 274 } else { 275 // Edit is at or within this paragraph - expand its end 276 ( 277 + p.byte_range.start..apply_delta(p.byte_range.end, byte_delta), 278 p.char_range.start..apply_delta(p.char_range.end, char_delta), 279 ) 280 } ··· 285 }; 286 287 // Validate fast path results - if any ranges are invalid, use slow path 288 + let use_fast_path = if !paragraph_ranges.is_empty() { 289 let all_valid = paragraph_ranges 290 .iter() 291 .all(|(_, char_range)| char_range.start <= char_range.end); 292 + if !all_valid { 293 tracing::debug!( 294 target: "weaver::render", 295 "fast path produced invalid ranges, falling back to slow path" 296 ); 297 + false 298 + } else { 299 + true 300 } 301 } else { 302 + false 303 }; 304 305 + // ============ FAST PATH ============ 306 + // Reuse cached paragraphs with offset adjustment, only re-render cursor paragraph 307 + if use_fast_path { 308 + let fast_start = crate::perf::now(); 309 + let cache = cache.unwrap(); 310 + let edit = edit.unwrap(); 311 + let edit_pos = edit.edit_char_pos; 312 + 313 + // Compute deltas 314 + let (cached_len, cached_byte_len) = cache 315 + .paragraphs 316 + .last() 317 + .map(|p| (p.char_range.end, p.byte_range.end)) 318 + .unwrap_or((0, 0)); 319 + let char_delta = current_len as isize - cached_len as isize; 320 + let byte_delta = current_byte_len as isize - cached_byte_len as isize; 321 + 322 + // Find cursor paragraph index 323 + let cursor_para_idx = cache 324 + .paragraphs 325 + .iter() 326 + .position(|p| p.char_range.start <= edit_pos && edit_pos <= p.char_range.end); 327 328 + let mut paragraphs = Vec::with_capacity(cache.paragraphs.len()); 329 + let mut new_cached = Vec::with_capacity(cache.paragraphs.len()); 330 + let mut all_refs: Vec<weaver_common::ExtractedRef> = Vec::new(); 331 + 332 + for (idx, cached_para) in cache.paragraphs.iter().enumerate() { 333 + let is_cursor_para = Some(idx) == cursor_para_idx; 334 + 335 + // Adjust ranges based on position relative to edit 336 + let (byte_range, char_range) = if cached_para.char_range.end < edit_pos { 337 + // Before edit - no change 338 + (cached_para.byte_range.clone(), cached_para.char_range.clone()) 339 + } else if cached_para.char_range.start > edit_pos { 340 + // After edit - shift by delta 341 + ( 342 + apply_delta(cached_para.byte_range.start, byte_delta) 343 + ..apply_delta(cached_para.byte_range.end, byte_delta), 344 + apply_delta(cached_para.char_range.start, char_delta) 345 + ..apply_delta(cached_para.char_range.end, char_delta), 346 + ) 347 + } else { 348 + // Contains edit - expand end 349 + ( 350 + cached_para.byte_range.start..apply_delta(cached_para.byte_range.end, byte_delta), 351 + cached_para.char_range.start..apply_delta(cached_para.char_range.end, char_delta), 352 + ) 353 + }; 354 + 355 + let para_source = text_slice_to_string(text, char_range.clone()); 356 + let source_hash = hash_source(&para_source); 357 + 358 + if is_cursor_para { 359 + // Re-render cursor paragraph for fresh syntax detection 360 + let resolver = image_resolver.cloned().unwrap_or_default(); 361 + let parser = Parser::new_ext(&para_source, weaver_renderer::default_md_options()) 362 + .into_offset_iter(); 363 + 364 + let para_doc = loro::LoroDoc::new(); 365 + let para_text = para_doc.get_text("content"); 366 + let _ = para_text.insert(0, &para_source); 367 + 368 + let mut writer = EditorWriter::<_, &ResolvedContent, &EditorImageResolver>::new( 369 + &para_source, 370 + &para_text, 371 + parser, 372 + ) 373 + .with_image_resolver(&resolver) 374 + .with_embed_provider(resolved_content); 375 + 376 + if let Some(idx) = entry_index { 377 + writer = writer.with_entry_index(idx); 378 + } 379 + 380 + let (html, offset_map, syntax_spans, para_refs) = match writer.run() { 381 + Ok(result) => { 382 + // Adjust offsets to be document-absolute 383 + let mut offset_map = result.offset_maps_by_paragraph.into_iter().next().unwrap_or_default(); 384 + for m in &mut offset_map { 385 + m.char_range.start += char_range.start; 386 + m.char_range.end += char_range.start; 387 + m.byte_range.start += byte_range.start; 388 + m.byte_range.end += byte_range.start; 389 + } 390 + let mut syntax_spans = result.syntax_spans_by_paragraph.into_iter().next().unwrap_or_default(); 391 + for s in &mut syntax_spans { 392 + s.adjust_positions(char_range.start as isize); 393 + } 394 + let para_refs = result.collected_refs_by_paragraph.into_iter().next().unwrap_or_default(); 395 + let html = result.html_segments.into_iter().next().unwrap_or_default(); 396 + (html, offset_map, syntax_spans, para_refs) 397 + } 398 + Err(_) => (String::new(), Vec::new(), Vec::new(), Vec::new()), 399 + }; 400 + 401 + all_refs.extend(para_refs.clone()); 402 + 403 + new_cached.push(CachedParagraph { 404 + id: cached_para.id.clone(), 405 + source_hash, 406 + byte_range: byte_range.clone(), 407 + char_range: char_range.clone(), 408 + html: html.clone(), 409 + offset_map: offset_map.clone(), 410 + syntax_spans: syntax_spans.clone(), 411 + collected_refs: para_refs.clone(), 412 + }); 413 + 414 + paragraphs.push(ParagraphRender { 415 + id: cached_para.id.clone(), 416 + byte_range, 417 + char_range, 418 + html, 419 + offset_map, 420 + syntax_spans, 421 + source_hash, 422 + }); 423 + } else { 424 + // Reuse cached with adjusted offsets 425 + let mut offset_map = cached_para.offset_map.clone(); 426 + let mut syntax_spans = cached_para.syntax_spans.clone(); 427 + 428 + if cached_para.char_range.start > edit_pos { 429 + // After edit - adjust offsets 430 + for m in &mut offset_map { 431 + m.char_range.start = apply_delta(m.char_range.start, char_delta); 432 + m.char_range.end = apply_delta(m.char_range.end, char_delta); 433 + m.byte_range.start = apply_delta(m.byte_range.start, byte_delta); 434 + m.byte_range.end = apply_delta(m.byte_range.end, byte_delta); 435 + } 436 + for s in &mut syntax_spans { 437 + s.adjust_positions(char_delta); 438 + } 439 + } 440 + 441 + all_refs.extend(cached_para.collected_refs.clone()); 442 + 443 + new_cached.push(CachedParagraph { 444 + id: cached_para.id.clone(), 445 + source_hash, 446 + byte_range: byte_range.clone(), 447 + char_range: char_range.clone(), 448 + html: cached_para.html.clone(), 449 + offset_map: offset_map.clone(), 450 + syntax_spans: syntax_spans.clone(), 451 + collected_refs: cached_para.collected_refs.clone(), 452 + }); 453 + 454 + paragraphs.push(ParagraphRender { 455 + id: cached_para.id.clone(), 456 + byte_range, 457 + char_range, 458 + html: cached_para.html.clone(), 459 + offset_map, 460 + syntax_spans, 461 + source_hash, 462 + }); 463 + } 464 + } 465 + 466 + // Add gaps (reuse gap logic from below) 467 + let paragraphs_with_gaps = add_gap_paragraphs(paragraphs, text, &source); 468 + 469 + let new_cache = RenderCache { 470 + paragraphs: new_cached, 471 + next_node_id: 0, 472 + next_syn_id: 0, 473 + next_para_id: cache.next_para_id, 474 }; 475 + 476 + let fast_ms = crate::perf::now() - fast_start; 477 + tracing::debug!( 478 + fast_ms, 479 + paragraphs = paragraphs_with_gaps.len(), 480 + cursor_para_idx, 481 + "fast path render timing" 482 + ); 483 + 484 + return (paragraphs_with_gaps, new_cache, all_refs); 485 + } 486 + 487 + // ============ SLOW PATH ============ 488 + // Full render when boundaries might have changed 489 + let render_start = crate::perf::now(); 490 + let parser = 491 + Parser::new_ext(&source, weaver_renderer::default_md_options()).into_offset_iter(); 492 + 493 + // Use provided resolver or empty default 494 + let resolver = image_resolver.cloned().unwrap_or_default(); 495 + 496 + // Build writer with all resolvers 497 + let mut writer = EditorWriter::<_, &ResolvedContent, &EditorImageResolver>::new( 498 + &source, 499 + text, 500 + parser, 501 + ) 502 + .with_image_resolver(&resolver) 503 + .with_embed_provider(resolved_content); 504 + 505 + if let Some(idx) = entry_index { 506 + writer = writer.with_entry_index(idx); 507 + } 508 + 509 + let writer_result = match writer.run() { 510 + Ok(result) => result, 511 + Err(_) => return (Vec::new(), RenderCache::default(), vec![]), 512 }; 513 + 514 + let render_ms = crate::perf::now() - render_start; 515 + 516 + let paragraph_ranges = writer_result.paragraph_ranges.clone(); 517 518 // Log discovered paragraphs 519 for (i, (byte_range, char_range)) in paragraph_ranges.iter().enumerate() { ··· 531 ); 532 } 533 534 + // Build paragraphs from full render segments 535 + let build_start = crate::perf::now(); 536 let mut paragraphs = Vec::with_capacity(paragraph_ranges.len()); 537 let mut new_cached = Vec::with_capacity(paragraph_ranges.len()); 538 let mut all_refs: Vec<weaver_common::ExtractedRef> = Vec::new(); 539 + let mut next_para_id = cache.map(|c| c.next_para_id).unwrap_or(0); 540 + 541 + // Find which paragraph contains cursor (for stable ID assignment) 542 + let cursor_para_idx = paragraph_ranges.iter().position(|(_, char_range)| { 543 + char_range.start <= cursor_offset && cursor_offset <= char_range.end 544 + }); 545 + 546 + tracing::debug!( 547 + cursor_offset, 548 + ?cursor_para_idx, 549 + edit_char_pos = ?edit.map(|e| e.edit_char_pos), 550 + "ID assignment: cursor and edit info" 551 + ); 552 + 553 + // Build hash->cached_para lookup for non-cursor matching 554 + let cached_by_hash: HashMap<u64, &CachedParagraph> = cache 555 + .map(|c| c.paragraphs.iter().map(|p| (p.source_hash, p)).collect()) 556 + .unwrap_or_default(); 557 558 for (idx, (byte_range, char_range)) in paragraph_ranges.iter().enumerate() { 559 let para_source = text_slice_to_string(text, char_range.clone()); 560 let source_hash = hash_source(&para_source); 561 + let is_cursor_para = Some(idx) == cursor_para_idx; 562 563 + // ID assignment: cursor paragraph matches by edit position, others match by hash 564 + let para_id = if is_cursor_para { 565 + let edit_in_this_para = edit 566 + .map(|e| char_range.start <= e.edit_char_pos && e.edit_char_pos <= char_range.end) 567 + .unwrap_or(false); 568 + let lookup_pos = if edit_in_this_para { 569 + edit.map(|e| e.edit_char_pos).unwrap_or(cursor_offset) 570 + } else { 571 + cursor_offset 572 + }; 573 + let found_cached = cache.and_then(|c| { 574 + c.paragraphs 575 + .iter() 576 + .find(|p| p.char_range.start <= lookup_pos && lookup_pos <= p.char_range.end) 577 + }); 578 579 + if let Some(cached) = found_cached { 580 + tracing::debug!( 581 + lookup_pos, 582 + edit_in_this_para, 583 + cursor_offset, 584 + cached_id = %cached.id, 585 + cached_range = ?cached.char_range, 586 + "cursor para: reusing cached ID" 587 + ); 588 + cached.id.clone() 589 + } else { 590 + let id = make_paragraph_id(next_para_id); 591 + next_para_id += 1; 592 + id 593 } 594 } else { 595 + // Non-cursor: match by content hash 596 + cached_by_hash 597 + .get(&source_hash) 598 + .map(|p| p.id.clone()) 599 + .unwrap_or_else(|| { 600 + let id = make_paragraph_id(next_para_id); 601 + next_para_id += 1; 602 + id 603 + }) 604 + }; 605 606 + // Get data from full render segments 607 + let html = writer_result.html_segments.get(idx).cloned().unwrap_or_default(); 608 + let offset_map = writer_result.offset_maps_by_paragraph.get(idx).cloned().unwrap_or_default(); 609 + let syntax_spans = writer_result.syntax_spans_by_paragraph.get(idx).cloned().unwrap_or_default(); 610 + let para_refs = writer_result.collected_refs_by_paragraph.get(idx).cloned().unwrap_or_default(); 611 612 + all_refs.extend(para_refs.clone()); 613 614 // Store in cache 615 new_cached.push(CachedParagraph { 616 + id: para_id.clone(), 617 source_hash, 618 byte_range: byte_range.clone(), 619 char_range: char_range.clone(), 620 html: html.clone(), 621 offset_map: offset_map.clone(), 622 syntax_spans: syntax_spans.clone(), 623 + collected_refs: para_refs.clone(), 624 }); 625 626 paragraphs.push(ParagraphRender { 627 + id: para_id, 628 byte_range: byte_range.clone(), 629 char_range: char_range.clone(), 630 html, ··· 634 }); 635 } 636 637 + let build_ms = crate::perf::now() - build_start; 638 + tracing::debug!( 639 + render_ms, 640 + build_ms, 641 + paragraphs = paragraph_ranges.len(), 642 + "single-pass render timing" 643 + ); 644 645 + let paragraphs_with_gaps = add_gap_paragraphs(paragraphs, text, &source); 646 647 let new_cache = RenderCache { 648 paragraphs: new_cached, 649 + next_node_id: 0, // Not used in single-pass mode 650 + next_syn_id: 0, // Not used in single-pass mode 651 + next_para_id, 652 }; 653 654 let total_ms = crate::perf::now() - fn_start; 655 tracing::debug!( 656 total_ms, 657 + render_ms, 658 + build_ms, 659 paragraphs = paragraphs_with_gaps.len(), 660 "render_paragraphs_incremental timing" 661 ); 662
+7 -5
crates/weaver-app/src/components/editor/tests.rs
··· 59 let text = doc.get_text("content"); 60 text.insert(0, input).unwrap(); 61 let (paragraphs, _cache, _refs) = 62 - render_paragraphs_incremental(&text, None, None, None, None, &ResolvedContent::default()); 63 paragraphs.iter().map(TestParagraph::from).collect() 64 } 65 ··· 648 // Initial state: "#" is a valid empty heading 649 text.insert(0, "#").unwrap(); 650 let (paras1, cache1, _refs1) = 651 - render_paragraphs_incremental(&text, None, None, None, None, &ResolvedContent::default()); 652 653 eprintln!("State 1 ('#'): {}", paras1[0].html); 654 assert!(paras1[0].html.contains("<h1"), "# alone should be heading"); ··· 662 let (paras2, _cache2, _refs2) = render_paragraphs_incremental( 663 &text, 664 Some(&cache1), 665 None, 666 None, 667 None, ··· 776 let text = doc.get_text("content"); 777 text.insert(0, input).unwrap(); 778 let (paragraphs, _cache, _refs) = 779 - render_paragraphs_incremental(&text, None, None, None, None, &ResolvedContent::default()); 780 781 // With standard \n\n break, we expect 2 paragraphs (no gap element) 782 // Paragraph ranges include some trailing whitespace from markdown parsing ··· 806 let text = doc.get_text("content"); 807 text.insert(0, input).unwrap(); 808 let (paragraphs, _cache, _refs) = 809 - render_paragraphs_incremental(&text, None, None, None, None, &ResolvedContent::default()); 810 811 // With extra newlines, we expect 3 elements: para, gap, para 812 assert_eq!( ··· 907 text.insert(0, input).unwrap(); 908 909 let (paras1, cache1, _refs1) = 910 - render_paragraphs_incremental(&text, None, None, None, None, &ResolvedContent::default()); 911 assert!(!cache1.paragraphs.is_empty(), "Cache should be populated"); 912 913 // Second render with same content should reuse cache 914 let (paras2, _cache2, _refs2) = render_paragraphs_incremental( 915 &text, 916 Some(&cache1), 917 None, 918 None, 919 None,
··· 59 let text = doc.get_text("content"); 60 text.insert(0, input).unwrap(); 61 let (paragraphs, _cache, _refs) = 62 + render_paragraphs_incremental(&text, None, 0, None, None, None, &ResolvedContent::default()); 63 paragraphs.iter().map(TestParagraph::from).collect() 64 } 65 ··· 648 // Initial state: "#" is a valid empty heading 649 text.insert(0, "#").unwrap(); 650 let (paras1, cache1, _refs1) = 651 + render_paragraphs_incremental(&text, None, 0, None, None, None, &ResolvedContent::default()); 652 653 eprintln!("State 1 ('#'): {}", paras1[0].html); 654 assert!(paras1[0].html.contains("<h1"), "# alone should be heading"); ··· 662 let (paras2, _cache2, _refs2) = render_paragraphs_incremental( 663 &text, 664 Some(&cache1), 665 + 0, 666 None, 667 None, 668 None, ··· 777 let text = doc.get_text("content"); 778 text.insert(0, input).unwrap(); 779 let (paragraphs, _cache, _refs) = 780 + render_paragraphs_incremental(&text, None, 0, None, None, None, &ResolvedContent::default()); 781 782 // With standard \n\n break, we expect 2 paragraphs (no gap element) 783 // Paragraph ranges include some trailing whitespace from markdown parsing ··· 807 let text = doc.get_text("content"); 808 text.insert(0, input).unwrap(); 809 let (paragraphs, _cache, _refs) = 810 + render_paragraphs_incremental(&text, None, 0, None, None, None, &ResolvedContent::default()); 811 812 // With extra newlines, we expect 3 elements: para, gap, para 813 assert_eq!( ··· 908 text.insert(0, input).unwrap(); 909 910 let (paras1, cache1, _refs1) = 911 + render_paragraphs_incremental(&text, None, 0, None, None, None, &ResolvedContent::default()); 912 assert!(!cache1.paragraphs.is_empty(), "Cache should be populated"); 913 914 // Second render with same content should reuse cache 915 let (paras2, _cache2, _refs2) = render_paragraphs_incremental( 916 &text, 917 Some(&cache1), 918 + 0, 919 None, 920 None, 921 None,
+1
crates/weaver-app/src/components/editor/visibility.rs
··· 276 277 fn make_para(start: usize, end: usize, syntax_spans: Vec<SyntaxSpanInfo>) -> ParagraphRender { 278 ParagraphRender { 279 byte_range: start..end, 280 char_range: start..end, 281 html: String::new(),
··· 276 277 fn make_para(start: usize, end: usize, syntax_spans: Vec<SyntaxSpanInfo>) -> ParagraphRender { 278 ParagraphRender { 279 + id: format!("test-{}-{}", start, end), 280 byte_range: start..end, 281 char_range: start..end, 282 html: String::new(),
+281 -248
crates/weaver-app/src/components/editor/writer.rs
··· 17 escape_html_body_text_with_char_count, 18 }; 19 use std::collections::HashMap; 20 use std::ops::Range; 21 use weaver_common::{EntryIndex, ResolvedContent}; 22 23 /// Result of rendering with the EditorWriter. 24 #[derive(Debug, Clone)] 25 pub struct WriterResult { 26 - /// Offset mappings from source to DOM positions 27 - pub offset_maps: Vec<OffsetMapping>, 28 29 /// Paragraph boundaries in source: (byte_range, char_range) 30 /// These are extracted during rendering by tracking Tag::Paragraph events 31 pub paragraph_ranges: Vec<(Range<usize>, Range<usize>)>, 32 33 - /// Syntax spans that can be conditionally hidden 34 - pub syntax_spans: Vec<SyntaxSpanInfo>, 35 36 - /// Refs (wikilinks, AT embeds) collected during this render pass 37 - pub collected_refs: Vec<weaver_common::ExtractedRef>, 38 } 39 40 /// Classification of markdown syntax characters ··· 312 /// 313 /// This writer processes offset-iter events to detect gaps (consumed formatting) 314 /// and emits them as styled spans for visibility in the editor. 315 - pub struct EditorWriter< 316 - 'a, 317 - I: Iterator<Item = (Event<'a>, Range<usize>)>, 318 - W: StrWrite, 319 - E = (), 320 - R = (), 321 - > { 322 source: &'a str, 323 source_text: &'a LoroText, 324 events: I, 325 - writer: W, 326 last_byte_offset: usize, 327 last_char_offset: usize, 328 ··· 350 render_tables_as_markdown: bool, 351 table_start_offset: Option<usize>, // track start of table for markdown rendering 352 353 - // Offset mapping tracking 354 offset_maps: Vec<OffsetMapping>, 355 next_node_id: usize, 356 current_node_id: Option<String>, // node ID for current text container 357 current_node_char_offset: usize, // UTF-16 offset within current node ··· 367 current_paragraph_start: Option<(usize, usize)>, // (byte_offset, char_offset) 368 list_depth: usize, // Track nesting depth to avoid paragraph boundary override inside lists 369 370 - /// When true, skip HTML generation and only track paragraph boundaries. 371 - /// Used for fast boundary discovery in incremental rendering. 372 - boundary_only: bool, 373 - 374 - // Syntax span tracking for conditional visibility 375 syntax_spans: Vec<SyntaxSpanInfo>, 376 next_syn_id: usize, 377 /// Stack of pending inline formats: (syn_id of opening span, char start of region) 378 /// Used to set formatted_range when closing paired inline markers 379 pending_inline_formats: Vec<(String, usize)>, 380 381 - /// Collected refs (wikilinks, AT embeds, AT links) during this render pass 382 ref_collector: weaver_common::RefCollector, 383 384 _phantom: std::marker::PhantomData<&'a ()>, 385 } 386 ··· 390 Body, 391 } 392 393 - impl< 394 - 'a, 395 - I: Iterator<Item = (Event<'a>, Range<usize>)>, 396 - W: StrWrite, 397 - E: EmbedContentProvider, 398 - R: ImageResolver, 399 - > EditorWriter<'a, I, W, E, R> 400 { 401 - pub fn new(source: &'a str, source_text: &'a LoroText, events: I, writer: W) -> Self { 402 - Self::new_with_node_offset(source, source_text, events, writer, 0) 403 - } 404 - 405 - pub fn new_with_node_offset( 406 - source: &'a str, 407 - source_text: &'a LoroText, 408 - events: I, 409 - writer: W, 410 - node_id_offset: usize, 411 - ) -> Self { 412 - Self::new_with_offsets(source, source_text, events, writer, node_id_offset, 0) 413 - } 414 - 415 - pub fn new_with_offsets( 416 - source: &'a str, 417 - source_text: &'a LoroText, 418 - events: I, 419 - writer: W, 420 - node_id_offset: usize, 421 - syn_id_offset: usize, 422 - ) -> Self { 423 - Self::new_with_all_offsets( 424 - source, 425 - source_text, 426 - events, 427 - writer, 428 - node_id_offset, 429 - syn_id_offset, 430 - 0, 431 - 0, 432 - ) 433 } 434 435 pub fn new_with_all_offsets( 436 source: &'a str, 437 source_text: &'a LoroText, 438 events: I, 439 - writer: W, 440 node_id_offset: usize, 441 syn_id_offset: usize, 442 char_offset_base: usize, ··· 446 source, 447 source_text, 448 events, 449 - writer, 450 last_byte_offset: byte_offset_base, 451 last_char_offset: char_offset_base, 452 end_newline: true, ··· 464 code_block_char_start: None, 465 code_block_opening_span_idx: None, 466 pending_blockquote_range: None, 467 - render_tables_as_markdown: true, // Default to markdown rendering 468 table_start_offset: None, 469 offset_maps: Vec::new(), 470 next_node_id: node_id_offset, 471 current_node_id: None, 472 current_node_char_offset: 0, ··· 475 paragraph_ranges: Vec::new(), 476 current_paragraph_start: None, 477 list_depth: 0, 478 - boundary_only: false, 479 syntax_spans: Vec::new(), 480 next_syn_id: syn_id_offset, 481 pending_inline_formats: Vec::new(), 482 ref_collector: weaver_common::RefCollector::new(), 483 - _phantom: std::marker::PhantomData, 484 - } 485 - } 486 - 487 - /// Create a writer that only tracks paragraph boundaries without generating HTML. 488 - /// Used for fast boundary discovery in incremental rendering. 489 - pub fn new_boundary_only( 490 - source: &'a str, 491 - source_text: &'a LoroText, 492 - events: I, 493 - writer: W, 494 - ) -> Self { 495 - Self { 496 - source, 497 - source_text, 498 - events, 499 - writer, 500 - last_byte_offset: 0, 501 - last_char_offset: 0, 502 - end_newline: true, 503 - in_non_writing_block: false, 504 - table_state: TableState::Head, 505 - table_alignments: vec![], 506 - table_cell_index: 0, 507 - numbers: HashMap::new(), 508 - embed_provider: None, 509 - image_resolver: None, 510 - entry_index: None, 511 - code_buffer: None, 512 - code_buffer_byte_range: None, 513 - code_buffer_char_range: None, 514 - code_block_char_start: None, 515 - code_block_opening_span_idx: None, 516 - pending_blockquote_range: None, 517 - render_tables_as_markdown: true, 518 - table_start_offset: None, 519 - offset_maps: Vec::new(), 520 - next_node_id: 0, 521 - current_node_id: None, 522 - current_node_char_offset: 0, 523 - current_node_child_count: 0, 524 - utf16_checkpoints: vec![(0, 0)], 525 - syntax_spans: Vec::new(), 526 - next_syn_id: 0, 527 - pending_inline_formats: Vec::new(), 528 - ref_collector: weaver_common::RefCollector::new(), 529 - paragraph_ranges: Vec::new(), 530 - current_paragraph_start: None, 531 - list_depth: 0, 532 - boundary_only: true, 533 _phantom: std::marker::PhantomData, 534 } 535 } 536 537 /// Add an embed content provider 538 - pub fn with_embed_provider(self, provider: E) -> EditorWriter<'a, I, W, E, R> { 539 - EditorWriter { 540 - source: self.source, 541 - source_text: self.source_text, 542 - events: self.events, 543 - writer: self.writer, 544 - last_byte_offset: self.last_byte_offset, 545 - last_char_offset: self.last_char_offset, 546 - end_newline: self.end_newline, 547 - in_non_writing_block: self.in_non_writing_block, 548 - table_state: self.table_state, 549 - table_alignments: self.table_alignments, 550 - table_cell_index: self.table_cell_index, 551 - numbers: self.numbers, 552 - embed_provider: Some(provider), 553 - image_resolver: self.image_resolver, 554 - entry_index: self.entry_index, 555 - code_buffer: self.code_buffer, 556 - code_buffer_byte_range: self.code_buffer_byte_range, 557 - code_buffer_char_range: self.code_buffer_char_range, 558 - code_block_char_start: self.code_block_char_start, 559 - code_block_opening_span_idx: self.code_block_opening_span_idx, 560 - pending_blockquote_range: self.pending_blockquote_range, 561 - render_tables_as_markdown: self.render_tables_as_markdown, 562 - table_start_offset: self.table_start_offset, 563 - offset_maps: self.offset_maps, 564 - next_node_id: self.next_node_id, 565 - current_node_id: self.current_node_id, 566 - current_node_char_offset: self.current_node_char_offset, 567 - current_node_child_count: self.current_node_child_count, 568 - utf16_checkpoints: self.utf16_checkpoints, 569 - paragraph_ranges: self.paragraph_ranges, 570 - current_paragraph_start: self.current_paragraph_start, 571 - list_depth: self.list_depth, 572 - boundary_only: self.boundary_only, 573 - syntax_spans: self.syntax_spans, 574 - next_syn_id: self.next_syn_id, 575 - pending_inline_formats: self.pending_inline_formats, 576 - ref_collector: self.ref_collector, 577 - _phantom: std::marker::PhantomData, 578 - } 579 } 580 581 /// Add an image resolver for mapping markdown image URLs to CDN URLs 582 pub fn with_image_resolver<R2: ImageResolver>( 583 self, 584 resolver: R2, 585 - ) -> EditorWriter<'a, I, W, E, R2> { 586 EditorWriter { 587 source: self.source, 588 source_text: self.source_text, ··· 608 render_tables_as_markdown: self.render_tables_as_markdown, 609 table_start_offset: self.table_start_offset, 610 offset_maps: self.offset_maps, 611 next_node_id: self.next_node_id, 612 current_node_id: self.current_node_id, 613 current_node_char_offset: self.current_node_char_offset, ··· 616 paragraph_ranges: self.paragraph_ranges, 617 current_paragraph_start: self.current_paragraph_start, 618 list_depth: self.list_depth, 619 - boundary_only: self.boundary_only, 620 syntax_spans: self.syntax_spans, 621 next_syn_id: self.next_syn_id, 622 pending_inline_formats: self.pending_inline_formats, 623 ref_collector: self.ref_collector, 624 _phantom: std::marker::PhantomData, 625 } 626 } ··· 631 self 632 } 633 634 #[inline] 635 - fn write_newline(&mut self) -> Result<(), W::Error> { 636 self.end_newline = true; 637 - if self.boundary_only { 638 - return Ok(()); 639 - } 640 self.writer.write_str("\n") 641 } 642 643 #[inline] 644 - fn write(&mut self, s: &str) -> Result<(), W::Error> { 645 if !s.is_empty() { 646 self.end_newline = s.ends_with('\n'); 647 - } 648 - if self.boundary_only { 649 - return Ok(()); 650 } 651 self.writer.write_str(s) 652 } ··· 691 } 692 693 /// Emit syntax span for a given range and record offset mapping 694 - fn emit_syntax(&mut self, range: Range<usize>) -> Result<(), W::Error> { 695 if range.start < range.end { 696 let syntax = &self.source[range.clone()]; 697 if !syntax.is_empty() { ··· 706 syntax = %syntax.escape_debug(), 707 "emit_syntax" 708 ); 709 - 710 - // In boundary_only mode, just update offsets without HTML 711 - if self.boundary_only { 712 - self.last_char_offset = char_end; 713 - self.last_byte_offset = range.end; 714 - return Ok(()); 715 - } 716 717 // Whitespace-only content (trailing spaces, newlines) should be emitted 718 // as plain text, not wrapped in a hideable syntax span ··· 811 syntax: &str, 812 byte_start: usize, 813 syntax_type: SyntaxType, 814 - ) -> Result<(), W::Error> { 815 if syntax.is_empty() { 816 return Ok(()); 817 } ··· 821 let char_end = char_start + syntax_char_len; 822 let byte_end = byte_start + syntax.len(); 823 824 - // In boundary_only mode, just update offsets 825 - if self.boundary_only { 826 - self.last_char_offset = char_end; 827 - self.last_byte_offset = byte_end; 828 - return Ok(()); 829 - } 830 - 831 let class_str = match syntax_type { 832 SyntaxType::Inline => "md-syntax-inline", 833 SyntaxType::Block => "md-syntax-block", ··· 862 } 863 864 /// Emit any gap between last position and next offset 865 - fn emit_gap_before(&mut self, next_offset: usize) -> Result<(), W::Error> { 866 // Skip gap emission if we're inside a table being rendered as markdown 867 if self.table_start_offset.is_some() && self.render_tables_as_markdown { 868 return Ok(()); ··· 880 Ok(()) 881 } 882 883 - /// Generate a unique node ID 884 fn gen_node_id(&mut self) -> String { 885 - let id = format!("n{}", self.next_node_id); 886 self.next_node_id += 1; 887 id 888 } ··· 954 /// 955 /// Returns offset mappings and paragraph boundaries. The HTML is written 956 /// to the writer passed in the constructor. 957 - pub fn run(mut self) -> Result<WriterResult, W::Error> { 958 while let Some((event, range)) = self.events.next() { 959 tracing::trace!( 960 target: "weaver::writer", ··· 1062 } 1063 } 1064 1065 Ok(WriterResult { 1066 - offset_maps: self.offset_maps, 1067 paragraph_ranges: self.paragraph_ranges, 1068 - syntax_spans: self.syntax_spans, 1069 - collected_refs: self.ref_collector.take(), 1070 }) 1071 } 1072 1073 // Consume raw text events until end tag, for alt attributes 1074 - fn raw_text(&mut self) -> Result<(), W::Error> { 1075 use Event::*; 1076 let mut nest = 0; 1077 while let Some((event, _range)) = self.events.next() { ··· 1135 } 1136 } 1137 1138 - fn process_event(&mut self, event: Event<'_>, range: Range<usize>) -> Result<(), W::Error> { 1139 use Event::*; 1140 1141 match event { ··· 1650 Ok(()) 1651 } 1652 1653 - fn start_tag(&mut self, tag: Tag<'_>, range: Range<usize>) -> Result<(), W::Error> { 1654 // Check if this is a block-level tag that should have syntax inside 1655 let is_block_tag = matches!(tag, Tag::Heading { .. } | Tag::BlockQuote(_)); 1656 ··· 1768 if self.end_newline { 1769 write!( 1770 &mut self.writer, 1771 - r#"<p id="{}", class="html-embed html-embed-block">"#, 1772 node_id 1773 )?; 1774 } else { 1775 write!( 1776 &mut self.writer, 1777 - r#"\n<p id="{}", class="html-embed html-embed-block">"#, 1778 node_id 1779 )?; 1780 } ··· 2500 &mut self, 2501 tag: markdown_weaver::TagEnd, 2502 range: Range<usize>, 2503 - ) -> Result<(), W::Error> { 2504 use markdown_weaver::TagEnd; 2505 2506 // Emit tag HTML first 2507 let result = match tag { 2508 TagEnd::HtmlBlock => { 2509 - // Record paragraph end for boundary tracking 2510 - // BUT skip if inside a list - list owns the paragraph boundary 2511 - if self.list_depth == 0 { 2512 - if let Some((byte_start, char_start)) = self.current_paragraph_start.take() { 2513 - let byte_range = byte_start..self.last_byte_offset; 2514 - let char_range = char_start..self.last_char_offset; 2515 - self.paragraph_ranges.push((byte_range, char_range)); 2516 - } 2517 - } 2518 2519 self.end_node(); 2520 - self.write("</p>\n") 2521 } 2522 TagEnd::Paragraph => { 2523 - // Record paragraph end for boundary tracking 2524 - // BUT skip if inside a list - list owns the paragraph boundary 2525 - if self.list_depth == 0 { 2526 - if let Some((byte_start, char_start)) = self.current_paragraph_start.take() { 2527 - let byte_range = byte_start..self.last_byte_offset; 2528 - let char_range = char_start..self.last_char_offset; 2529 - self.paragraph_ranges.push((byte_range, char_range)); 2530 - } 2531 - } 2532 2533 self.end_node(); 2534 - self.write("</p>\n") 2535 } 2536 TagEnd::Heading(level) => { 2537 - // Record paragraph end for boundary tracking 2538 - if let Some((byte_start, char_start)) = self.current_paragraph_start.take() { 2539 - let byte_range = byte_start..self.last_byte_offset; 2540 - let char_range = char_start..self.last_char_offset; 2541 - self.paragraph_ranges.push((byte_range, char_range)); 2542 - } 2543 2544 self.end_node(); 2545 self.write("</")?; 2546 write!(&mut self.writer, "{}", level)?; 2547 - self.write(">\n") 2548 } 2549 TagEnd::Table => { 2550 if self.render_tables_as_markdown { ··· 2594 TagEnd::BlockQuote(_) => { 2595 // If pending_blockquote_range is still set, the blockquote was empty 2596 // (no paragraph inside). Emit the > as its own minimal paragraph. 2597 if let Some(bq_range) = self.pending_blockquote_range.take() { 2598 if bq_range.start < bq_range.end { 2599 let raw_text = &self.source[bq_range.clone()]; ··· 2604 // Create a minimal paragraph for the empty blockquote 2605 let node_id = self.gen_node_id(); 2606 write!(&mut self.writer, "<div id=\"{}\"", node_id)?; 2607 - // self.begin_node(node_id.clone()); 2608 2609 - // // Record start-of-node mapping for cursor positioning 2610 self.offset_maps.push(OffsetMapping { 2611 byte_range: para_byte_start..para_byte_start, 2612 char_range: para_char_start..para_char_start, ··· 2623 self.write("</div>\n")?; 2624 self.end_node(); 2625 2626 - // Record paragraph boundary for incremental rendering 2627 let byte_range = para_byte_start..bq_range.end; 2628 let char_range = para_char_start..self.last_char_offset; 2629 - self.paragraph_ranges.push((byte_range, char_range)); 2630 } 2631 } 2632 } 2633 - self.write("</blockquote>\n") 2634 } 2635 TagEnd::CodeBlock => { 2636 use std::sync::LazyLock; ··· 2758 } 2759 } 2760 2761 - // Record code block end for paragraph boundary tracking 2762 if let Some((byte_start, char_start)) = self.current_paragraph_start.take() { 2763 let byte_range = byte_start..self.last_byte_offset; 2764 let char_range = char_start..self.last_char_offset; 2765 - self.paragraph_ranges.push((byte_range, char_range)); 2766 } 2767 2768 Ok(()) 2769 } 2770 TagEnd::List(true) => { 2771 self.list_depth = self.list_depth.saturating_sub(1); 2772 - // Record list end for paragraph boundary tracking 2773 - if let Some((byte_start, char_start)) = self.current_paragraph_start.take() { 2774 - let byte_range = byte_start..self.last_byte_offset; 2775 - let char_range = char_start..self.last_char_offset; 2776 - self.paragraph_ranges.push((byte_range, char_range)); 2777 } 2778 - self.write("</ol>\n") 2779 } 2780 TagEnd::List(false) => { 2781 self.list_depth = self.list_depth.saturating_sub(1); 2782 - // Record list end for paragraph boundary tracking 2783 - if let Some((byte_start, char_start)) = self.current_paragraph_start.take() { 2784 - let byte_range = byte_start..self.last_byte_offset; 2785 - let char_range = char_start..self.last_char_offset; 2786 - self.paragraph_ranges.push((byte_range, char_range)); 2787 } 2788 - self.write("</ul>\n") 2789 } 2790 TagEnd::Item => { 2791 self.end_node(); ··· 2878 } 2879 } 2880 2881 - impl< 2882 - 'a, 2883 - I: Iterator<Item = (Event<'a>, Range<usize>)>, 2884 - W: StrWrite, 2885 - E: EmbedContentProvider, 2886 - R: ImageResolver, 2887 - > EditorWriter<'a, I, W, E, R> 2888 { 2889 fn write_embed( 2890 &mut self, ··· 2894 title: CowStr<'_>, 2895 id: CowStr<'_>, 2896 attrs: Option<markdown_weaver::WeaverAttributes<'_>>, 2897 - ) -> Result<(), W::Error> { 2898 // Embed rendering: all syntax elements share one syn_id for visibility toggling 2899 // Structure: ![[ url-as-link ]] <embed-content> 2900 let raw_text = &self.source[range.clone()];
··· 17 escape_html_body_text_with_char_count, 18 }; 19 use std::collections::HashMap; 20 + use std::fmt; 21 use std::ops::Range; 22 use weaver_common::{EntryIndex, ResolvedContent}; 23 24 + /// Writer that segments output by paragraph boundaries. 25 + /// 26 + /// Each paragraph's HTML is written to a separate String in the segments Vec. 27 + /// Call `new_segment()` at paragraph boundaries to start a new segment. 28 + #[derive(Debug, Clone, Default)] 29 + pub struct SegmentedWriter { 30 + segments: Vec<String>, 31 + } 32 + 33 + impl SegmentedWriter { 34 + pub fn new() -> Self { 35 + Self { 36 + segments: vec![String::new()], 37 + } 38 + } 39 + 40 + /// Start a new segment for the next paragraph. 41 + pub fn new_segment(&mut self) { 42 + self.segments.push(String::new()); 43 + } 44 + 45 + /// Get the completed segments. 46 + pub fn into_segments(self) -> Vec<String> { 47 + self.segments 48 + } 49 + 50 + /// Get current segment count. 51 + pub fn segment_count(&self) -> usize { 52 + self.segments.len() 53 + } 54 + } 55 + 56 + impl StrWrite for SegmentedWriter { 57 + type Error = fmt::Error; 58 + 59 + #[inline] 60 + fn write_str(&mut self, s: &str) -> Result<(), Self::Error> { 61 + if let Some(segment) = self.segments.last_mut() { 62 + segment.push_str(s); 63 + } 64 + Ok(()) 65 + } 66 + 67 + #[inline] 68 + fn write_fmt(&mut self, args: fmt::Arguments) -> Result<(), Self::Error> { 69 + if let Some(segment) = self.segments.last_mut() { 70 + fmt::Write::write_fmt(segment, args)?; 71 + } 72 + Ok(()) 73 + } 74 + } 75 + 76 + impl fmt::Write for SegmentedWriter { 77 + fn write_str(&mut self, s: &str) -> fmt::Result { 78 + <Self as StrWrite>::write_str(self, s) 79 + } 80 + 81 + fn write_fmt(&mut self, args: fmt::Arguments<'_>) -> fmt::Result { 82 + <Self as StrWrite>::write_fmt(self, args) 83 + } 84 + } 85 + 86 /// Result of rendering with the EditorWriter. 87 #[derive(Debug, Clone)] 88 pub struct WriterResult { 89 + /// HTML segments, one per paragraph (parallel to paragraph_ranges) 90 + pub html_segments: Vec<String>, 91 + 92 + /// Offset mappings from source to DOM positions, grouped by paragraph 93 + /// Each inner Vec corresponds to a paragraph in html_segments 94 + pub offset_maps_by_paragraph: Vec<Vec<OffsetMapping>>, 95 96 /// Paragraph boundaries in source: (byte_range, char_range) 97 /// These are extracted during rendering by tracking Tag::Paragraph events 98 pub paragraph_ranges: Vec<(Range<usize>, Range<usize>)>, 99 100 + /// Syntax spans that can be conditionally hidden, grouped by paragraph 101 + pub syntax_spans_by_paragraph: Vec<Vec<SyntaxSpanInfo>>, 102 103 + /// Refs (wikilinks, AT embeds) collected during this render pass, grouped by paragraph 104 + pub collected_refs_by_paragraph: Vec<Vec<weaver_common::ExtractedRef>>, 105 } 106 107 /// Classification of markdown syntax characters ··· 379 /// 380 /// This writer processes offset-iter events to detect gaps (consumed formatting) 381 /// and emits them as styled spans for visibility in the editor. 382 + /// 383 + /// Output is segmented by paragraph boundaries - each paragraph's HTML goes into 384 + /// a separate String in the output segments Vec. 385 + pub struct EditorWriter<'a, I: Iterator<Item = (Event<'a>, Range<usize>)>, E = (), R = ()> { 386 source: &'a str, 387 source_text: &'a LoroText, 388 events: I, 389 + writer: SegmentedWriter, 390 last_byte_offset: usize, 391 last_char_offset: usize, 392 ··· 414 render_tables_as_markdown: bool, 415 table_start_offset: Option<usize>, // track start of table for markdown rendering 416 417 + // Offset mapping tracking - current paragraph 418 offset_maps: Vec<OffsetMapping>, 419 + node_id_prefix: Option<String>, // paragraph ID prefix for stable node IDs 420 next_node_id: usize, 421 current_node_id: Option<String>, // node ID for current text container 422 current_node_char_offset: usize, // UTF-16 offset within current node ··· 432 current_paragraph_start: Option<(usize, usize)>, // (byte_offset, char_offset) 433 list_depth: usize, // Track nesting depth to avoid paragraph boundary override inside lists 434 435 + // Syntax span tracking for conditional visibility - current paragraph 436 syntax_spans: Vec<SyntaxSpanInfo>, 437 next_syn_id: usize, 438 /// Stack of pending inline formats: (syn_id of opening span, char start of region) 439 /// Used to set formatted_range when closing paired inline markers 440 pending_inline_formats: Vec<(String, usize)>, 441 442 + /// Collected refs (wikilinks, AT embeds, AT links) for current paragraph 443 ref_collector: weaver_common::RefCollector, 444 445 + // Per-paragraph accumulated results (completed paragraphs) 446 + offset_maps_by_para: Vec<Vec<OffsetMapping>>, 447 + syntax_spans_by_para: Vec<Vec<SyntaxSpanInfo>>, 448 + refs_by_para: Vec<Vec<weaver_common::ExtractedRef>>, 449 + 450 _phantom: std::marker::PhantomData<&'a ()>, 451 } 452 ··· 456 Body, 457 } 458 459 + impl<'a, I: Iterator<Item = (Event<'a>, Range<usize>)>, E: EmbedContentProvider, R: ImageResolver> 460 + EditorWriter<'a, I, E, R> 461 { 462 + pub fn new(source: &'a str, source_text: &'a LoroText, events: I) -> Self { 463 + Self::new_with_all_offsets(source, source_text, events, 0, 0, 0, 0) 464 } 465 466 pub fn new_with_all_offsets( 467 source: &'a str, 468 source_text: &'a LoroText, 469 events: I, 470 node_id_offset: usize, 471 syn_id_offset: usize, 472 char_offset_base: usize, ··· 476 source, 477 source_text, 478 events, 479 + writer: SegmentedWriter::new(), 480 last_byte_offset: byte_offset_base, 481 last_char_offset: char_offset_base, 482 end_newline: true, ··· 494 code_block_char_start: None, 495 code_block_opening_span_idx: None, 496 pending_blockquote_range: None, 497 + render_tables_as_markdown: true, 498 table_start_offset: None, 499 offset_maps: Vec::new(), 500 + node_id_prefix: None, 501 next_node_id: node_id_offset, 502 current_node_id: None, 503 current_node_char_offset: 0, ··· 506 paragraph_ranges: Vec::new(), 507 current_paragraph_start: None, 508 list_depth: 0, 509 syntax_spans: Vec::new(), 510 next_syn_id: syn_id_offset, 511 pending_inline_formats: Vec::new(), 512 ref_collector: weaver_common::RefCollector::new(), 513 + offset_maps_by_para: Vec::new(), 514 + syntax_spans_by_para: Vec::new(), 515 + refs_by_para: Vec::new(), 516 _phantom: std::marker::PhantomData, 517 } 518 } 519 520 /// Add an embed content provider 521 + pub fn with_embed_provider(mut self, provider: E) -> EditorWriter<'a, I, E, R> { 522 + self.embed_provider = Some(provider); 523 + self 524 } 525 526 /// Add an image resolver for mapping markdown image URLs to CDN URLs 527 pub fn with_image_resolver<R2: ImageResolver>( 528 self, 529 resolver: R2, 530 + ) -> EditorWriter<'a, I, E, R2> { 531 EditorWriter { 532 source: self.source, 533 source_text: self.source_text, ··· 553 render_tables_as_markdown: self.render_tables_as_markdown, 554 table_start_offset: self.table_start_offset, 555 offset_maps: self.offset_maps, 556 + node_id_prefix: self.node_id_prefix, 557 next_node_id: self.next_node_id, 558 current_node_id: self.current_node_id, 559 current_node_char_offset: self.current_node_char_offset, ··· 562 paragraph_ranges: self.paragraph_ranges, 563 current_paragraph_start: self.current_paragraph_start, 564 list_depth: self.list_depth, 565 syntax_spans: self.syntax_spans, 566 next_syn_id: self.next_syn_id, 567 pending_inline_formats: self.pending_inline_formats, 568 ref_collector: self.ref_collector, 569 + offset_maps_by_para: self.offset_maps_by_para, 570 + syntax_spans_by_para: self.syntax_spans_by_para, 571 + refs_by_para: self.refs_by_para, 572 _phantom: std::marker::PhantomData, 573 } 574 } ··· 579 self 580 } 581 582 + /// Set a prefix for node IDs (typically the paragraph ID). 583 + /// This makes node IDs paragraph-scoped and stable across re-renders. 584 + pub fn with_node_id_prefix(mut self, prefix: &str) -> Self { 585 + self.node_id_prefix = Some(prefix.to_string()); 586 + self.next_node_id = 0; // Reset counter since each paragraph is independent 587 + self 588 + } 589 + 590 + /// Finalize the current paragraph: move accumulated items to per-para vectors, 591 + /// start a new output segment for the next paragraph. 592 + fn finalize_paragraph(&mut self, byte_range: Range<usize>, char_range: Range<usize>) { 593 + // Record paragraph boundary 594 + self.paragraph_ranges.push((byte_range, char_range)); 595 + 596 + // Move current paragraph's data to per-para vectors 597 + self.offset_maps_by_para 598 + .push(std::mem::take(&mut self.offset_maps)); 599 + self.syntax_spans_by_para 600 + .push(std::mem::take(&mut self.syntax_spans)); 601 + self.refs_by_para 602 + .push(std::mem::take(&mut self.ref_collector.refs)); 603 + 604 + // Start new output segment for next paragraph 605 + self.writer.new_segment(); 606 + } 607 + 608 #[inline] 609 + fn write_newline(&mut self) -> fmt::Result { 610 self.end_newline = true; 611 self.writer.write_str("\n") 612 } 613 614 #[inline] 615 + fn write(&mut self, s: &str) -> fmt::Result { 616 if !s.is_empty() { 617 self.end_newline = s.ends_with('\n'); 618 } 619 self.writer.write_str(s) 620 } ··· 659 } 660 661 /// Emit syntax span for a given range and record offset mapping 662 + fn emit_syntax(&mut self, range: Range<usize>) -> Result<(), fmt::Error> { 663 if range.start < range.end { 664 let syntax = &self.source[range.clone()]; 665 if !syntax.is_empty() { ··· 674 syntax = %syntax.escape_debug(), 675 "emit_syntax" 676 ); 677 678 // Whitespace-only content (trailing spaces, newlines) should be emitted 679 // as plain text, not wrapped in a hideable syntax span ··· 772 syntax: &str, 773 byte_start: usize, 774 syntax_type: SyntaxType, 775 + ) -> Result<(), fmt::Error> { 776 if syntax.is_empty() { 777 return Ok(()); 778 } ··· 782 let char_end = char_start + syntax_char_len; 783 let byte_end = byte_start + syntax.len(); 784 785 let class_str = match syntax_type { 786 SyntaxType::Inline => "md-syntax-inline", 787 SyntaxType::Block => "md-syntax-block", ··· 816 } 817 818 /// Emit any gap between last position and next offset 819 + fn emit_gap_before(&mut self, next_offset: usize) -> Result<(), fmt::Error> { 820 // Skip gap emission if we're inside a table being rendered as markdown 821 if self.table_start_offset.is_some() && self.render_tables_as_markdown { 822 return Ok(()); ··· 834 Ok(()) 835 } 836 837 + /// Generate a unique node ID. 838 + /// If a prefix is set (paragraph ID), produces `{prefix}-n{counter}`. 839 + /// Otherwise produces `n{counter}` for backwards compatibility. 840 fn gen_node_id(&mut self) -> String { 841 + let id = if let Some(ref prefix) = self.node_id_prefix { 842 + format!("{}-n{}", prefix, self.next_node_id) 843 + } else { 844 + format!("n{}", self.next_node_id) 845 + }; 846 self.next_node_id += 1; 847 id 848 } ··· 914 /// 915 /// Returns offset mappings and paragraph boundaries. The HTML is written 916 /// to the writer passed in the constructor. 917 + pub fn run(mut self) -> Result<WriterResult, fmt::Error> { 918 while let Some((event, range)) = self.events.next() { 919 tracing::trace!( 920 target: "weaver::writer", ··· 1022 } 1023 } 1024 1025 + // Add any remaining accumulated data for the last paragraph 1026 + // (content that wasn't followed by a paragraph boundary) 1027 + if !self.offset_maps.is_empty() 1028 + || !self.syntax_spans.is_empty() 1029 + || !self.ref_collector.refs.is_empty() 1030 + { 1031 + self.offset_maps_by_para.push(self.offset_maps); 1032 + self.syntax_spans_by_para.push(self.syntax_spans); 1033 + self.refs_by_para.push(self.ref_collector.refs); 1034 + } 1035 + 1036 + // Get HTML segments from writer 1037 + let html_segments = self.writer.into_segments(); 1038 + 1039 Ok(WriterResult { 1040 + html_segments, 1041 + offset_maps_by_paragraph: self.offset_maps_by_para, 1042 paragraph_ranges: self.paragraph_ranges, 1043 + syntax_spans_by_paragraph: self.syntax_spans_by_para, 1044 + collected_refs_by_paragraph: self.refs_by_para, 1045 }) 1046 } 1047 1048 // Consume raw text events until end tag, for alt attributes 1049 + fn raw_text(&mut self) -> Result<(), fmt::Error> { 1050 use Event::*; 1051 let mut nest = 0; 1052 while let Some((event, _range)) = self.events.next() { ··· 1110 } 1111 } 1112 1113 + fn process_event(&mut self, event: Event<'_>, range: Range<usize>) -> Result<(), fmt::Error> { 1114 use Event::*; 1115 1116 match event { ··· 1625 Ok(()) 1626 } 1627 1628 + fn start_tag(&mut self, tag: Tag<'_>, range: Range<usize>) -> Result<(), fmt::Error> { 1629 // Check if this is a block-level tag that should have syntax inside 1630 let is_block_tag = matches!(tag, Tag::Heading { .. } | Tag::BlockQuote(_)); 1631 ··· 1743 if self.end_newline { 1744 write!( 1745 &mut self.writer, 1746 + r#"<p id="{}" class="html-embed html-embed-block">"#, 1747 node_id 1748 )?; 1749 } else { 1750 write!( 1751 &mut self.writer, 1752 + r#"\n<p id="{}" class="html-embed html-embed-block">"#, 1753 node_id 1754 )?; 1755 } ··· 2475 &mut self, 2476 tag: markdown_weaver::TagEnd, 2477 range: Range<usize>, 2478 + ) -> Result<(), fmt::Error> { 2479 use markdown_weaver::TagEnd; 2480 2481 // Emit tag HTML first 2482 let result = match tag { 2483 TagEnd::HtmlBlock => { 2484 + // Capture paragraph boundary info BEFORE writing closing HTML 2485 + // Skip if inside a list - list owns the paragraph boundary 2486 + let para_boundary = if self.list_depth == 0 { 2487 + self.current_paragraph_start 2488 + .take() 2489 + .map(|(byte_start, char_start)| { 2490 + ( 2491 + byte_start..self.last_byte_offset, 2492 + char_start..self.last_char_offset, 2493 + ) 2494 + }) 2495 + } else { 2496 + None 2497 + }; 2498 2499 + // Write closing HTML to current segment 2500 self.end_node(); 2501 + self.write("</p>\n")?; 2502 + 2503 + // Now finalize paragraph (starts new segment) 2504 + if let Some((byte_range, char_range)) = para_boundary { 2505 + self.finalize_paragraph(byte_range, char_range); 2506 + } 2507 + Ok(()) 2508 } 2509 TagEnd::Paragraph => { 2510 + // Capture paragraph boundary info BEFORE writing closing HTML 2511 + // Skip if inside a list - list owns the paragraph boundary 2512 + let para_boundary = if self.list_depth == 0 { 2513 + self.current_paragraph_start 2514 + .take() 2515 + .map(|(byte_start, char_start)| { 2516 + ( 2517 + byte_start..self.last_byte_offset, 2518 + char_start..self.last_char_offset, 2519 + ) 2520 + }) 2521 + } else { 2522 + None 2523 + }; 2524 2525 + // Write closing HTML to current segment 2526 self.end_node(); 2527 + self.write("</p>\n")?; 2528 + 2529 + // Now finalize paragraph (starts new segment) 2530 + if let Some((byte_range, char_range)) = para_boundary { 2531 + self.finalize_paragraph(byte_range, char_range); 2532 + } 2533 + Ok(()) 2534 } 2535 TagEnd::Heading(level) => { 2536 + // Capture paragraph boundary info BEFORE writing closing HTML 2537 + let para_boundary = 2538 + self.current_paragraph_start 2539 + .take() 2540 + .map(|(byte_start, char_start)| { 2541 + ( 2542 + byte_start..self.last_byte_offset, 2543 + char_start..self.last_char_offset, 2544 + ) 2545 + }); 2546 2547 + // Write closing HTML to current segment 2548 self.end_node(); 2549 self.write("</")?; 2550 write!(&mut self.writer, "{}", level)?; 2551 + self.write(">\n")?; 2552 + 2553 + // Now finalize paragraph (starts new segment) 2554 + if let Some((byte_range, char_range)) = para_boundary { 2555 + self.finalize_paragraph(byte_range, char_range); 2556 + } 2557 + Ok(()) 2558 } 2559 TagEnd::Table => { 2560 if self.render_tables_as_markdown { ··· 2604 TagEnd::BlockQuote(_) => { 2605 // If pending_blockquote_range is still set, the blockquote was empty 2606 // (no paragraph inside). Emit the > as its own minimal paragraph. 2607 + let mut para_boundary = None; 2608 if let Some(bq_range) = self.pending_blockquote_range.take() { 2609 if bq_range.start < bq_range.end { 2610 let raw_text = &self.source[bq_range.clone()]; ··· 2615 // Create a minimal paragraph for the empty blockquote 2616 let node_id = self.gen_node_id(); 2617 write!(&mut self.writer, "<div id=\"{}\"", node_id)?; 2618 2619 + // Record start-of-node mapping for cursor positioning 2620 self.offset_maps.push(OffsetMapping { 2621 byte_range: para_byte_start..para_byte_start, 2622 char_range: para_char_start..para_char_start, ··· 2633 self.write("</div>\n")?; 2634 self.end_node(); 2635 2636 + // Capture paragraph boundary for later finalization 2637 let byte_range = para_byte_start..bq_range.end; 2638 let char_range = para_char_start..self.last_char_offset; 2639 + para_boundary = Some((byte_range, char_range)); 2640 } 2641 } 2642 } 2643 + self.write("</blockquote>\n")?; 2644 + 2645 + // Now finalize paragraph if we had one 2646 + if let Some((byte_range, char_range)) = para_boundary { 2647 + self.finalize_paragraph(byte_range, char_range); 2648 + } 2649 + Ok(()) 2650 } 2651 TagEnd::CodeBlock => { 2652 use std::sync::LazyLock; ··· 2774 } 2775 } 2776 2777 + // Finalize code block paragraph 2778 if let Some((byte_start, char_start)) = self.current_paragraph_start.take() { 2779 let byte_range = byte_start..self.last_byte_offset; 2780 let char_range = char_start..self.last_char_offset; 2781 + self.finalize_paragraph(byte_range, char_range); 2782 } 2783 2784 Ok(()) 2785 } 2786 TagEnd::List(true) => { 2787 self.list_depth = self.list_depth.saturating_sub(1); 2788 + // Capture paragraph boundary BEFORE writing closing HTML 2789 + let para_boundary = 2790 + self.current_paragraph_start 2791 + .take() 2792 + .map(|(byte_start, char_start)| { 2793 + ( 2794 + byte_start..self.last_byte_offset, 2795 + char_start..self.last_char_offset, 2796 + ) 2797 + }); 2798 + 2799 + self.write("</ol>\n")?; 2800 + 2801 + // Finalize paragraph after closing HTML 2802 + if let Some((byte_range, char_range)) = para_boundary { 2803 + self.finalize_paragraph(byte_range, char_range); 2804 } 2805 + Ok(()) 2806 } 2807 TagEnd::List(false) => { 2808 self.list_depth = self.list_depth.saturating_sub(1); 2809 + // Capture paragraph boundary BEFORE writing closing HTML 2810 + let para_boundary = 2811 + self.current_paragraph_start 2812 + .take() 2813 + .map(|(byte_start, char_start)| { 2814 + ( 2815 + byte_start..self.last_byte_offset, 2816 + char_start..self.last_char_offset, 2817 + ) 2818 + }); 2819 + 2820 + self.write("</ul>\n")?; 2821 + 2822 + // Finalize paragraph after closing HTML 2823 + if let Some((byte_range, char_range)) = para_boundary { 2824 + self.finalize_paragraph(byte_range, char_range); 2825 } 2826 + Ok(()) 2827 } 2828 TagEnd::Item => { 2829 self.end_node(); ··· 2916 } 2917 } 2918 2919 + impl<'a, I: Iterator<Item = (Event<'a>, Range<usize>)>, E: EmbedContentProvider, R: ImageResolver> 2920 + EditorWriter<'a, I, E, R> 2921 { 2922 fn write_embed( 2923 &mut self, ··· 2927 title: CowStr<'_>, 2928 id: CowStr<'_>, 2929 attrs: Option<markdown_weaver::WeaverAttributes<'_>>, 2930 + ) -> Result<(), fmt::Error> { 2931 // Embed rendering: all syntax elements share one syn_id for visibility toggling 2932 // Structure: ![[ url-as-link ]] <embed-content> 2933 let raw_text = &self.source[range.clone()];