commit 03aa2553ae2ac82172f03f5f4fcb82249cde047f · nonbinary.computer/weaver

+25 -2

crates/weaver-app/assets/styling/editor.css

··· 86 86 .md-syntax-inline { 87 87 color: var(--color-muted); 88 88 opacity: 0.6; 89 - user-select: none; 89 + } 90 + 91 + .md-syntax-inline:[hidden] { 92 + color: var(--color-muted); 93 + opacity: 0.6; 94 + width: 0; 95 + user-select: none; /* idk if we want this when its hidden or not */ 90 96 } 91 97 92 98 /* Markdown syntax characters - block level (#, >, -, etc) */ 93 99 .md-syntax-block { 94 100 color: var(--color-muted); 95 101 opacity: 0.7; 96 - user-select: none; 102 + width: 0; 97 103 font-weight: normal; 104 + } 105 + 106 + .md-syntax-block:[hidden] { 107 + content: attr(data-syntax); 108 + display: inline-block; 109 + margin-right: 4px; 110 + user-select: none; /* idk if we want this when its hidden or not */ 111 + } 112 + 113 + /* Cursor positioning helper after */ 114 + .br-cursor { 115 + display: inline-block; 116 + font-size: 0; 117 + width: 0; 118 + height: 1em; /* force height so cursor is visible */ 119 + line-height: 1em; 120 + vertical-align: baseline; 98 121 } 99 122 100 123 /* Future: contextual hiding based on cursor position */

+13 -1

crates/weaver-app/src/components/editor/cursor.rs

··· 48 48 let (mapping, should_snap) = find_mapping_for_char(offset_map, char_offset) 49 49 .ok_or("no mapping found for cursor offset")?; 50 50 51 + tracing::info!("[CURSOR] Restoring cursor at offset {}", char_offset); 52 + tracing::info!("[CURSOR] found mapping: char_range {:?}, node_id '{}', char_offset_in_node {}", 53 + mapping.char_range, mapping.node_id, mapping.char_offset_in_node); 54 + 51 55 // If cursor is in invisible content, snap to next visible position 52 56 // For now, we'll still use the mapping but this is a future enhancement 53 57 if should_snap { ··· 58 62 let window = web_sys::window().ok_or("no window")?; 59 63 let document = window.document().ok_or("no document")?; 60 64 61 - // Get the container element by node ID 65 + // Get the container element by node ID (try id attribute first, then data-node-id) 62 66 let container = document 63 67 .get_element_by_id(&mapping.node_id) 68 + .or_else(|| { 69 + let selector = format!("[data-node-id='{}']", mapping.node_id); 70 + document.query_selector(&selector).ok().flatten() 71 + }) 64 72 .ok_or_else(|| format!("element not found: {}", mapping.node_id))?; 65 73 66 74 // Set selection using Range API ··· 116 124 let mut accumulated_utf16 = 0; 117 125 let mut last_node: Option<web_sys::Node> = None; 118 126 127 + tracing::info!("[CURSOR] Walking text nodes, target_utf16_offset = {}", target_utf16_offset); 119 128 while let Some(node) = walker.next_node()? { 120 129 last_node = Some(node.clone()); 121 130 122 131 if let Some(text) = node.text_content() { 123 132 let text_len = text.encode_utf16().count(); 133 + tracing::info!("[CURSOR] text node: '{}' (utf16_len {}), accumulated = {}", 134 + text.chars().take(20).collect::<String>(), text_len, accumulated_utf16); 124 135 125 136 // Found the node containing target offset 126 137 if accumulated_utf16 + text_len >= target_utf16_offset { 127 138 let offset_in_node = target_utf16_offset - accumulated_utf16; 139 + tracing::info!("[CURSOR] -> FOUND at offset {} in this node", offset_in_node); 128 140 return Ok((node, offset_in_node)); 129 141 } 130 142

+430 -73

crates/weaver-app/src/components/editor/mod.rs

··· 8 8 mod document; 9 9 mod formatting; 10 10 mod offset_map; 11 - mod offsets; 11 + mod paragraph; 12 12 mod render; 13 13 mod rope_writer; 14 14 mod storage; ··· 18 18 pub use document::{Affinity, CompositionState, CursorState, EditorDocument, Selection}; 19 19 pub use formatting::{FormatAction, apply_formatting, find_word_boundaries}; 20 20 pub use offset_map::{OffsetMapping, RenderResult, find_mapping_for_byte}; 21 - pub use render::render_markdown_simple; 21 + pub use paragraph::ParagraphRender; 22 + pub use render::{render_markdown_simple, render_paragraphs}; 22 23 pub use rope_writer::RopeWriter; 23 24 pub use storage::{EditorSnapshot, clear_storage, load_from_storage, save_to_storage}; 24 25 pub use toolbar::EditorToolbar; 26 + pub use writer::WriterResult; 25 27 26 28 use dioxus::prelude::*; 27 29 ··· 58 60 let mut document = use_signal(|| EditorDocument::new(restored())); 59 61 let editor_id = "markdown-editor"; 60 62 61 - // Render markdown to HTML with offset mappings 62 - let render_result = use_memo(move || render::render_markdown_simple(&document().to_string())); 63 - let rendered_html = use_memo(move || render_result.read().html.clone()); 64 - let offset_map = use_memo(move || render_result.read().offset_map.clone()); 63 + // Render paragraphs for incremental updates 64 + let paragraphs = use_memo(move || render::render_paragraphs(&document().rope)); 65 + 66 + // Flatten offset maps from all paragraphs 67 + let offset_map = use_memo(move || { 68 + paragraphs() 69 + .iter() 70 + .flat_map(|p| p.offset_map.iter().cloned()) 71 + .collect::<Vec<_>>() 72 + }); 73 + 74 + // Track previous paragraphs for change detection (outside effect so it persists) 75 + let mut prev_paragraphs = use_signal(|| Vec::<ParagraphRender>::new()); 76 + 77 + // Update DOM when paragraphs change (incremental rendering) 78 + #[cfg(all(target_arch = "wasm32", target_os = "unknown"))] 79 + use_effect(move || { 80 + let new_paras = paragraphs(); 81 + let cursor_offset = document().cursor.offset; 82 + 83 + // Use peek() to avoid creating reactive dependency on prev_paragraphs 84 + let prev = prev_paragraphs.peek().clone(); 85 + 86 + let cursor_para_updated = update_paragraph_dom(editor_id, &prev, &new_paras, cursor_offset); 87 + 88 + // Only restore cursor if we actually re-rendered the paragraph it's in 89 + if cursor_para_updated { 90 + use wasm_bindgen::JsCast; 91 + use wasm_bindgen::prelude::*; 92 + 93 + let rope = document().rope.clone(); 94 + let map = offset_map(); 95 + 96 + // Use requestAnimationFrame to wait for browser paint 97 + if let Some(window) = web_sys::window() { 98 + let closure = Closure::once(move || { 99 + if let Err(e) = 100 + cursor::restore_cursor_position(&rope, cursor_offset, &map, editor_id) 101 + { 102 + tracing::warn!("Cursor restoration failed: {:?}", e); 103 + } 104 + }); 105 + 106 + let _ = window.request_animation_frame(closure.as_ref().unchecked_ref()); 107 + closure.forget(); 108 + } 109 + } 110 + 111 + // Store for next comparison (write-only, no reactive read) 112 + prev_paragraphs.set(new_paras); 113 + }); 65 114 66 115 // Auto-save with debounce 67 116 #[cfg(all(target_arch = "wasm32", target_os = "unknown"))] ··· 75 124 timer.forget(); 76 125 }); 77 126 78 - // Restore cursor after re-render 79 - #[cfg(all(target_arch = "wasm32", target_os = "unknown"))] 80 - use_effect(move || { 81 - use wasm_bindgen::prelude::*; 82 - use wasm_bindgen::JsCast; 83 - 84 - let cursor_offset = document().cursor.offset; 85 - let rope = document().rope.clone(); 86 - let map = offset_map.read().clone(); 87 - 88 - // Use requestAnimationFrame to wait for browser paint 89 - let window = web_sys::window().expect("no window"); 90 - 91 - let closure = Closure::once(move || { 92 - if let Err(e) = cursor::restore_cursor_position(&rope, cursor_offset, &map, editor_id) { 93 - tracing::warn!("Cursor restoration failed: {:?}", e); 94 - } 95 - }); 96 - 97 - let _ = window.request_animation_frame(closure.as_ref().unchecked_ref()); 98 - closure.forget(); 99 - }); 100 - 101 127 rsx! { 102 128 Stylesheet { href: asset!("/assets/styling/editor.css") } 103 129 div { class: "markdown-editor-container", ··· 111 137 id: "{editor_id}", 112 138 class: "editor-content", 113 139 contenteditable: "true", 114 - dangerous_inner_html: "{rendered_html}", 140 + // DOM populated via web-sys in use_effect for incremental updates 115 141 116 142 onkeydown: move |evt| { 117 - evt.prevent_default(); 118 - handle_keydown(evt, &mut document); 143 + // Only prevent default for operations that modify content 144 + // Let browser handle arrow keys, Home/End naturally 145 + if should_intercept_key(&evt) { 146 + evt.prevent_default(); 147 + handle_keydown(evt, &mut document); 148 + } 149 + }, 150 + 151 + onkeyup: move |evt| { 152 + // After any key (including arrow keys), sync cursor from DOM 153 + sync_cursor_from_dom(&mut document, editor_id); 154 + }, 155 + 156 + onclick: move |_evt| { 157 + // After mouse click, sync cursor from DOM 158 + sync_cursor_from_dom(&mut document, editor_id); 119 159 }, 120 160 121 161 onpaste: move |evt| { 122 162 evt.prevent_default(); 123 163 handle_paste(evt, &mut document); 124 164 }, 125 - 126 - // Phase 1: Accept that cursor position will jump 127 - // Phase 2: Restore cursor properly 128 165 } 129 166 130 167 ··· 141 178 } 142 179 } 143 180 181 + /// Check if we need to intercept this key event 182 + /// Returns true for content-modifying operations, false for navigation 183 + fn should_intercept_key(evt: &Event<KeyboardData>) -> bool { 184 + use dioxus::prelude::keyboard_types::Key; 185 + 186 + let key = evt.key(); 187 + let mods = evt.modifiers(); 188 + 189 + // Intercept shortcuts 190 + if mods.ctrl() || mods.meta() { 191 + return true; 192 + } 193 + 194 + // Intercept content modifications 195 + matches!( 196 + key, 197 + Key::Character(_) | Key::Backspace | Key::Delete | Key::Enter | Key::Tab 198 + ) 199 + } 200 + 201 + /// Sync internal cursor state from browser DOM selection 202 + #[cfg(all(target_arch = "wasm32", target_os = "unknown"))] 203 + fn sync_cursor_from_dom(document: &mut Signal<EditorDocument>, editor_id: &str) { 204 + use wasm_bindgen::JsCast; 205 + 206 + let window = match web_sys::window() { 207 + Some(w) => w, 208 + None => return, 209 + }; 210 + 211 + let dom_document = match window.document() { 212 + Some(d) => d, 213 + None => return, 214 + }; 215 + 216 + // Get editor element as boundary for search 217 + let editor_element = match dom_document.get_element_by_id(editor_id) { 218 + Some(e) => e, 219 + None => return, 220 + }; 221 + 222 + let selection = match window.get_selection() { 223 + Ok(Some(sel)) => sel, 224 + _ => return, 225 + }; 226 + 227 + // Get cursor position from selection 228 + let focus_node = match selection.focus_node() { 229 + Some(node) => node, 230 + None => return, 231 + }; 232 + 233 + let focus_offset = selection.focus_offset() as usize; 234 + 235 + // Find the text node's containing element with an ID (from offset map) 236 + // Walk up but stop at editor boundary to avoid escaping the editor 237 + let mut current_node = focus_node.clone(); 238 + let node_id = loop { 239 + if let Some(element) = current_node.dyn_ref::<web_sys::Element>() { 240 + // Stop if we've reached the editor boundary 241 + if element == &editor_element { 242 + break None; 243 + } 244 + 245 + // Check both id and data-node-id attributes 246 + // (paragraphs use id, headings use data-node-id to preserve user heading IDs) 247 + let id = element 248 + .get_attribute("id") 249 + .or_else(|| element.get_attribute("data-node-id")); 250 + 251 + if let Some(id) = id { 252 + // Look for node IDs like "n0", "n1", etc (from offset map) 253 + if id.starts_with('n') && id[1..].parse::<usize>().is_ok() { 254 + break Some(id); 255 + } 256 + } 257 + } 258 + 259 + current_node = match current_node.parent_node() { 260 + Some(parent) => parent, 261 + None => break None, 262 + }; 263 + }; 264 + 265 + let node_id = match node_id { 266 + Some(id) => id, 267 + None => { 268 + tracing::warn!("Could not find node_id for cursor position"); 269 + return; 270 + } 271 + }; 272 + 273 + let container = match dom_document.get_element_by_id(&node_id).or_else(|| { 274 + let selector = format!("[data-node-id='{}']", node_id); 275 + dom_document.query_selector(&selector).ok().flatten() 276 + }) { 277 + Some(e) => e, 278 + None => return, 279 + }; 280 + 281 + // Calculate UTF-16 offset from start of container to focus position 282 + let mut utf16_offset_in_container = 0; 283 + 284 + // Create tree walker for text nodes in container 285 + if let Ok(walker) = dom_document.create_tree_walker_with_what_to_show(&container, 4) { 286 + while let Ok(Some(node)) = walker.next_node() { 287 + if node == focus_node { 288 + // Found the exact text node, add the offset within it 289 + utf16_offset_in_container += focus_offset; 290 + break; 291 + } 292 + 293 + // Accumulate length of previous text nodes 294 + if let Some(text) = node.text_content() { 295 + utf16_offset_in_container += text.encode_utf16().count(); 296 + } 297 + } 298 + } 299 + 300 + // Now look up this position in the offset map 301 + // We need to find the mapping with this node_id and calculate rope offset 302 + document.with_mut(|doc| { 303 + // Render to get current offset maps 304 + let paragraphs = render::render_paragraphs(&doc.rope); 305 + 306 + tracing::debug!("[SYNC] Looking for node_id: {}, utf16_offset_in_container: {}", node_id, utf16_offset_in_container); 307 + 308 + // Find mapping with this node_id 309 + for para in paragraphs { 310 + for mapping in para.offset_map { 311 + if mapping.node_id == node_id { 312 + // Check if our utf16 offset falls within this mapping's range 313 + // End-INCLUSIVE to allow cursor at the end of text nodes 314 + let mapping_start = mapping.char_offset_in_node; 315 + let mapping_end = mapping.char_offset_in_node + mapping.utf16_len; 316 + 317 + if utf16_offset_in_container >= mapping_start && utf16_offset_in_container <= mapping_end { 318 + // Calculate rope offset 319 + let offset_in_mapping = utf16_offset_in_container - mapping_start; 320 + let rope_offset = mapping.char_range.start + offset_in_mapping; 321 + 322 + tracing::debug!("[SYNC] -> MATCHED! rope_offset: {} (was {})", rope_offset, doc.cursor.offset); 323 + doc.cursor.offset = rope_offset; 324 + return; 325 + } 326 + } 327 + } 328 + } 329 + 330 + tracing::warn!("Could not map DOM cursor position to rope offset"); 331 + }); 332 + } 333 + 334 + #[cfg(not(all(target_arch = "wasm32", target_os = "unknown")))] 335 + fn sync_cursor_from_dom(_document: &mut Signal<EditorDocument>, _editor_id: &str) { 336 + // No-op on non-wasm 337 + } 338 + 144 339 /// Handle paste events and insert text at cursor 145 340 fn handle_paste(evt: Event<ClipboardData>, document: &mut Signal<EditorDocument>) { 146 341 // Downcast to web_sys event to get clipboard data ··· 213 408 doc.cursor.offset = start; 214 409 doc.selection = None; 215 410 } else if doc.cursor.offset > 0 { 216 - // Delete previous char 217 - let prev = doc.cursor.offset - 1; 218 - doc.rope.remove(prev..doc.cursor.offset); 219 - doc.cursor.offset = prev; 411 + // Check if we're about to delete a newline 412 + let prev_char = get_char_at(&doc.rope, doc.cursor.offset - 1); 413 + 414 + if prev_char == Some('\n') { 415 + let newline_pos = doc.cursor.offset - 1; 416 + let mut delete_start = newline_pos; 417 + let mut delete_end = doc.cursor.offset; 418 + 419 + // Check if there's another newline before this one (empty paragraph) 420 + // If so, delete both newlines to merge paragraphs 421 + if newline_pos > 0 { 422 + let prev_prev_char = get_char_at(&doc.rope, newline_pos - 1); 423 + if prev_prev_char == Some('\n') { 424 + // Empty paragraph case: delete both newlines 425 + delete_start = newline_pos - 1; 426 + } 427 + } 428 + 429 + // Also check if there's a zero-width char after cursor (inserted by Shift+Enter) 430 + if let Some(ch) = get_char_at(&doc.rope, delete_end) { 431 + if ch == '\u{200C}' || ch == '\u{200B}' { 432 + delete_end += 1; 433 + } 434 + } 435 + 436 + // Scan backwards through whitespace before the newline(s) 437 + while delete_start > 0 { 438 + let ch = get_char_at(&doc.rope, delete_start - 1); 439 + match ch { 440 + Some(' ') | Some('\t') | Some('\u{200C}') | Some('\u{200B}') => { 441 + delete_start -= 1; 442 + } 443 + Some('\n') => break, // stop at another newline 444 + _ => break, // stop at actual content 445 + } 446 + } 447 + 448 + // Delete from where we stopped to end (including any trailing zero-width) 449 + doc.rope.remove(delete_start..delete_end); 450 + doc.cursor.offset = delete_start; 451 + } else { 452 + // Normal backspace - delete one char 453 + let prev = doc.cursor.offset - 1; 454 + doc.rope.remove(prev..doc.cursor.offset); 455 + doc.cursor.offset = prev; 456 + } 220 457 } 221 458 } 222 459 ··· 233 470 } 234 471 } 235 472 236 - Key::ArrowLeft => { 237 - if mods.ctrl() { 238 - // Word boundary (implement later) 239 - if doc.cursor.offset > 0 { 240 - doc.cursor.offset -= 1; 241 - } 242 - } else if doc.cursor.offset > 0 { 243 - doc.cursor.offset -= 1; 244 - } 245 - doc.selection = None; 246 - } 247 - 248 - Key::ArrowRight => { 249 - if mods.ctrl() { 250 - // Word boundary (implement later) 251 - if doc.cursor.offset < doc.len_chars() { 252 - doc.cursor.offset += 1; 253 - } 254 - } else if doc.cursor.offset < doc.len_chars() { 255 - doc.cursor.offset += 1; 256 - } 257 - doc.selection = None; 473 + // Arrow keys handled by browser, synced in onkeyup 474 + Key::ArrowLeft | Key::ArrowRight | Key::ArrowUp | Key::ArrowDown => { 475 + // Browser handles these naturally 258 476 } 259 477 260 478 Key::Enter => { ··· 265 483 doc.cursor.offset = start; 266 484 doc.selection = None; 267 485 } 268 - // Insert two spaces + newline for hard line break 269 - doc.rope.insert(doc.cursor.offset, " \n"); 270 - doc.cursor.offset += 3; 271 - } 272 486 273 - Key::Home => { 274 - let line_start = find_line_start(&doc.rope, doc.cursor.offset); 275 - doc.cursor.offset = line_start; 276 - doc.selection = None; 487 + if mods.shift() { 488 + // Shift+Enter: hard line break (soft break) 489 + doc.rope.insert(doc.cursor.offset, " \n\u{200C}"); 490 + doc.cursor.offset += 3; 491 + } else { 492 + // Enter: paragraph break (much cleaner, less jank) 493 + tracing::info!( 494 + "[ENTER] Before insert - cursor at {}, rope len {}", 495 + doc.cursor.offset, 496 + doc.len_chars() 497 + ); 498 + doc.rope.insert(doc.cursor.offset, "\n\n"); 499 + doc.cursor.offset += 2; 500 + tracing::info!( 501 + "[ENTER] After insert - cursor at {}, rope len {}", 502 + doc.cursor.offset, 503 + doc.len_chars() 504 + ); 505 + } 277 506 } 278 507 279 - Key::End => { 280 - let line_end = find_line_end(&doc.rope, doc.cursor.offset); 281 - doc.cursor.offset = line_end; 282 - doc.selection = None; 508 + // Home/End handled by browser, synced in onkeyup 509 + Key::Home | Key::End => { 510 + // Browser handles these naturally 283 511 } 284 512 285 513 _ => {} ··· 287 515 }); 288 516 } 289 517 518 + /// Get character at the given offset in the rope 519 + fn get_char_at(rope: &jumprope::JumpRopeBuf, offset: usize) -> Option<char> { 520 + if offset >= rope.len_chars() { 521 + return None; 522 + } 523 + 524 + let rope = rope.borrow(); 525 + let mut current = 0; 526 + for substr in rope.slice_substrings(offset..offset + 1) { 527 + for c in substr.chars() { 528 + if current == 0 { 529 + return Some(c); 530 + } 531 + current += 1; 532 + } 533 + } 534 + None 535 + } 536 + 290 537 /// Find start of line containing offset 291 538 fn find_line_start(rope: &jumprope::JumpRopeBuf, offset: usize) -> usize { 292 539 // Search backwards from cursor for newline ··· 326 573 327 574 rope.len_chars() 328 575 } 576 + 577 + /// Update paragraph DOM elements incrementally. 578 + /// 579 + /// Only modifies paragraphs that changed (by comparing source_hash). 580 + /// Browser preserves cursor naturally in unchanged paragraphs. 581 + /// 582 + /// Returns true if the paragraph containing the cursor was updated. 583 + #[cfg(all(target_arch = "wasm32", target_os = "unknown"))] 584 + fn update_paragraph_dom( 585 + editor_id: &str, 586 + old_paragraphs: &[ParagraphRender], 587 + new_paragraphs: &[ParagraphRender], 588 + cursor_offset: usize, 589 + ) -> bool { 590 + use wasm_bindgen::JsCast; 591 + 592 + let window = match web_sys::window() { 593 + Some(w) => w, 594 + None => return false, 595 + }; 596 + 597 + let document = match window.document() { 598 + Some(d) => d, 599 + None => return false, 600 + }; 601 + 602 + let editor = match document.get_element_by_id(editor_id) { 603 + Some(e) => e, 604 + None => return false, 605 + }; 606 + 607 + // Find which paragraph contains cursor 608 + // Use end-inclusive matching: cursor at position N belongs to paragraph (0..N) 609 + // This handles typing at end of paragraph, which is the common case 610 + // The empty paragraph at document end catches any trailing cursor positions 611 + let cursor_para_idx = new_paragraphs 612 + .iter() 613 + .position(|p| p.char_range.start <= cursor_offset && cursor_offset <= p.char_range.end); 614 + 615 + tracing::info!( 616 + "[DOM] cursor_offset = {}, cursor_para_idx = {:?}", 617 + cursor_offset, 618 + cursor_para_idx 619 + ); 620 + for (idx, para) in new_paragraphs.iter().enumerate() { 621 + let matches = 622 + para.char_range.start <= cursor_offset && cursor_offset <= para.char_range.end; 623 + tracing::info!( 624 + "[DOM] para {}: char_range {:?}, matches cursor? {}", 625 + idx, 626 + para.char_range, 627 + matches 628 + ); 629 + } 630 + 631 + let mut cursor_para_updated = false; 632 + 633 + // Update or create paragraphs 634 + for (idx, new_para) in new_paragraphs.iter().enumerate() { 635 + let para_id = format!("para-{}", idx); 636 + 637 + if let Some(old_para) = old_paragraphs.get(idx) { 638 + // Paragraph exists - check if changed 639 + if new_para.source_hash != old_para.source_hash { 640 + // Changed - update innerHTML 641 + if let Some(elem) = document.get_element_by_id(&para_id) { 642 + elem.set_inner_html(&new_para.html); 643 + } 644 + 645 + // Track if we updated the cursor's paragraph 646 + if Some(idx) == cursor_para_idx { 647 + cursor_para_updated = true; 648 + } 649 + } 650 + // Unchanged - do nothing, browser preserves cursor 651 + } else { 652 + // New paragraph - create div 653 + if let Ok(div) = document.create_element("div") { 654 + div.set_id(&para_id); 655 + div.set_inner_html(&new_para.html); 656 + let _ = editor.append_child(&div); 657 + } 658 + 659 + // Track if we created the cursor's paragraph 660 + if Some(idx) == cursor_para_idx { 661 + cursor_para_updated = true; 662 + } 663 + } 664 + } 665 + 666 + // Remove extra paragraphs if document got shorter 667 + for idx in new_paragraphs.len()..old_paragraphs.len() { 668 + let para_id = format!("para-{}", idx); 669 + if let Some(elem) = document.get_element_by_id(&para_id) { 670 + let _ = elem.remove(); 671 + } 672 + } 673 + 674 + cursor_para_updated 675 + } 676 + 677 + #[cfg(not(all(target_arch = "wasm32", target_os = "unknown")))] 678 + fn update_paragraph_dom( 679 + _editor_id: &str, 680 + _old_paragraphs: &[ParagraphRender], 681 + _new_paragraphs: &[ParagraphRender], 682 + _cursor_offset: usize, 683 + ) -> bool { 684 + false 685 + }

-134

crates/weaver-app/src/components/editor/offsets.rs

··· 1 - //! Offset conversion utilities for converting between different offset systems. 2 - //! 3 - //! The editor deals with multiple offset systems: 4 - //! 1. **JumpRope**: Unicode scalar values (Rust `char` count) 5 - //! 2. **markdown-weaver**: UTF-8 byte offsets 6 - //! 3. **Rust strings**: UTF-8 byte indexing 7 - //! 4. **JavaScript DOM**: UTF-16 code units (Phase 2+) 8 - //! 9 - //! # Performance Notes 10 - //! 11 - //! **Prefer JumpRope's built-in methods:** 12 - //! - `rope.len_chars()` - O(1) character count 13 - //! - `rope.len_bytes()` - O(1) byte count 14 - //! - `rope.len_wchars()` - O(1) UTF-16 code unit count (Phase 2 with wchar_conversion) 15 - //! 16 - //! **Only use these conversion functions when:** 17 - //! - Converting markdown-weaver byte offsets to char offsets 18 - //! - Converting char offsets to byte offsets for markdown parsing 19 - //! 20 - //! For Phase 2+, use JumpRope's O(log n) UTF-16 conversions via the helpers below: 21 - //! - `char_to_utf16()` - O(log n) 22 - //! - `utf16_to_char()` - O(log n) 23 - 24 - /// Convert JumpRope char offset to UTF-8 byte offset. 25 - /// 26 - /// This is O(n) but acceptable for Phase 1 since we only render once per keystroke. 27 - /// For Phase 2+, we can optimize by caching or using string-offsets crate. 28 - /// 29 - /// # Example 30 - /// ``` 31 - /// let text = "Hello 🐻‍❄️ World"; 32 - /// // "Hello " = 6 chars, 6 bytes 33 - /// // "🐻‍❄️" = 4 chars, 13 bytes 34 - /// // Total at char 6 = byte 6 35 - /// assert_eq!(char_to_byte(text, 6), 6); 36 - /// // Total at char 10 (after emoji) = byte 19 37 - /// assert_eq!(char_to_byte(text, 10), 19); 38 - /// ``` 39 - pub fn char_to_byte(text: &str, char_offset: usize) -> usize { 40 - text.char_indices() 41 - .nth(char_offset) 42 - .map(|(byte_idx, _)| byte_idx) 43 - .unwrap_or(text.len()) 44 - } 45 - 46 - /// Convert UTF-8 byte offset to JumpRope char offset. 47 - /// 48 - /// Used when we need to map markdown-weaver byte offsets back to rope positions. 49 - /// 50 - /// # Example 51 - /// ``` 52 - /// let text = "Hello 🐻‍❄️ World"; 53 - /// assert_eq!(byte_to_char(text, 6), 6); 54 - /// assert_eq!(byte_to_char(text, 19), 10); 55 - /// ``` 56 - pub fn byte_to_char(text: &str, byte_offset: usize) -> usize { 57 - text.char_indices() 58 - .take_while(|(idx, _)| *idx < byte_offset) 59 - .count() 60 - } 61 - 62 - /// Convert JumpRope char offset to UTF-16 code units (for DOM Selection API). 63 - /// 64 - /// O(log n) - uses JumpRope's internal index. 65 - /// 66 - /// # Example 67 - /// ``` 68 - /// let rope = JumpRopeBuf::from("🐻‍❄️"); 69 - /// // Polar bear is 4 chars, 5 UTF-16 code units 70 - /// assert_eq!(char_to_utf16(&rope, 0), 0); 71 - /// assert_eq!(char_to_utf16(&rope, 4), 5); 72 - /// ``` 73 - pub fn char_to_utf16(rope: &jumprope::JumpRopeBuf, char_offset: usize) -> usize { 74 - rope.borrow().chars_to_wchars(char_offset) 75 - } 76 - 77 - /// Convert UTF-16 code units (from DOM) to JumpRope char offset. 78 - /// 79 - /// O(log n) - uses JumpRope's internal index. 80 - /// 81 - /// # Example 82 - /// ``` 83 - /// let rope = JumpRopeBuf::from("🐻‍❄️"); 84 - /// assert_eq!(utf16_to_char(&rope, 0), 0); 85 - /// assert_eq!(utf16_to_char(&rope, 5), 4); 86 - /// ``` 87 - pub fn utf16_to_char(rope: &jumprope::JumpRopeBuf, utf16_offset: usize) -> usize { 88 - rope.borrow().wchars_to_chars(utf16_offset) 89 - } 90 - 91 - #[cfg(test)] 92 - mod tests { 93 - use super::*; 94 - 95 - #[test] 96 - fn test_ascii() { 97 - let text = "hello"; 98 - assert_eq!(char_to_byte(text, 0), 0); 99 - assert_eq!(char_to_byte(text, 2), 2); 100 - assert_eq!(byte_to_char(text, 0), 0); 101 - assert_eq!(byte_to_char(text, 2), 2); 102 - } 103 - 104 - #[test] 105 - fn test_emoji() { 106 - // Polar bear: 4 chars, 13 bytes 107 - let text = "🐻‍❄️"; 108 - assert_eq!(text.chars().count(), 4); 109 - assert_eq!(text.len(), 13); 110 - 111 - assert_eq!(char_to_byte(text, 0), 0); 112 - assert_eq!(char_to_byte(text, 4), 13); 113 - 114 - assert_eq!(byte_to_char(text, 0), 0); 115 - assert_eq!(byte_to_char(text, 13), 4); 116 - } 117 - 118 - #[test] 119 - fn test_mixed() { 120 - let text = "Hello 🐻‍❄️ World"; 121 - // "Hello " = 6 chars, 6 bytes 122 - // "🐻‍❄️" = 4 chars, 13 bytes 123 - // " World" = 6 chars, 6 bytes 124 - // Total: 16 chars, 25 bytes 125 - 126 - assert_eq!(text.chars().count(), 16); 127 - assert_eq!(text.len(), 25); 128 - 129 - // Char 6 is start of emoji (byte 6) 130 - assert_eq!(char_to_byte(text, 6), 6); 131 - // Char 10 is after emoji (byte 19) 132 - assert_eq!(char_to_byte(text, 10), 19); 133 - } 134 - }

+50

crates/weaver-app/src/components/editor/paragraph.rs

··· 1 + //! Paragraph-level rendering for incremental updates. 2 + //! 3 + //! Paragraphs are discovered during markdown rendering by tracking 4 + //! Tag::Paragraph events. This allows updating only changed paragraphs in the DOM. 5 + 6 + use super::offset_map::OffsetMapping; 7 + use jumprope::JumpRopeBuf; 8 + use std::ops::Range; 9 + 10 + /// A rendered paragraph with its source range and offset mappings. 11 + #[derive(Debug, Clone, PartialEq)] 12 + pub struct ParagraphRender { 13 + /// Source byte range in the rope 14 + pub byte_range: Range<usize>, 15 + 16 + /// Source char range in the rope 17 + pub char_range: Range<usize>, 18 + 19 + /// Rendered HTML content (without wrapper div) 20 + pub html: String, 21 + 22 + /// Offset mappings for this paragraph 23 + pub offset_map: Vec<OffsetMapping>, 24 + 25 + /// Hash of source text for quick change detection 26 + pub source_hash: u64, 27 + } 28 + 29 + /// Simple hash function for source text comparison 30 + pub fn hash_source(text: &str) -> u64 { 31 + use std::collections::hash_map::DefaultHasher; 32 + use std::hash::{Hash, Hasher}; 33 + 34 + let mut hasher = DefaultHasher::new(); 35 + text.hash(&mut hasher); 36 + hasher.finish() 37 + } 38 + 39 + /// Extract substring from rope as String 40 + pub fn rope_slice_to_string(rope: &JumpRopeBuf, range: Range<usize>) -> String { 41 + let rope_borrow = rope.borrow(); 42 + let mut result = String::new(); 43 + 44 + for substr in rope_borrow.slice_substrings(range) { 45 + result.push_str(substr); 46 + } 47 + 48 + result 49 + } 50 +

+155 -13

crates/weaver-app/src/components/editor/render.rs

··· 1 1 //! Markdown rendering for the editor. 2 2 //! 3 - //! Phase 2: Full-document rendering with formatting characters visible as styled spans. 4 - //! Future: Incremental paragraph rendering and contextual formatting visibility. 3 + //! Phase 2: Paragraph-level incremental rendering with formatting characters visible. 5 4 //! 6 5 //! Uses EditorWriter which tracks gaps in offset_iter to preserve formatting characters. 7 6 8 - use markdown_weaver::Parser; 9 - use super::offset_map::RenderResult; 7 + use super::offset_map::{OffsetMapping, RenderResult}; 8 + use super::paragraph::{ParagraphRender, hash_source, rope_slice_to_string}; 10 9 use super::writer::EditorWriter; 10 + use jumprope::JumpRopeBuf; 11 + use markdown_weaver::Parser; 11 12 12 13 /// Render markdown to HTML with visible formatting characters and offset mappings. 13 14 /// ··· 24 25 /// - Offset map generation for cursor restoration 25 26 /// - Full document re-render (fast enough for current needs) 26 27 /// 27 - /// # Future improvements 28 - /// - Paragraph-level incremental rendering 29 - /// - Contextual formatting hiding based on cursor position 28 + /// # Deprecated: Use `render_paragraphs()` for incremental rendering 30 29 pub fn render_markdown_simple(source: &str) -> RenderResult { 31 - use jumprope::JumpRopeBuf; 32 - 33 30 let source_rope = JumpRopeBuf::from(source); 34 - let parser = Parser::new_ext(source, weaver_renderer::default_md_options()) 35 - .into_offset_iter(); 31 + let parser = Parser::new_ext(source, weaver_renderer::default_md_options()).into_offset_iter(); 36 32 let mut output = String::new(); 37 33 38 34 match EditorWriter::<_, _, ()>::new(source, &source_rope, parser, &mut output).run() { 39 - Ok(offset_map) => RenderResult { 35 + Ok(result) => RenderResult { 40 36 html: output, 41 - offset_map, 37 + offset_map: result.offset_maps, 42 38 }, 43 39 Err(_) => { 44 40 // Fallback to empty result on error ··· 49 45 } 50 46 } 51 47 } 48 + 49 + /// Render markdown in paragraph chunks for incremental DOM updates. 50 + /// 51 + /// First renders the whole document to discover paragraph boundaries via 52 + /// markdown events (Tag::Paragraph), then re-renders each paragraph separately. 53 + /// This allows updating only changed paragraphs in the DOM, preserving cursor 54 + /// position naturally. 55 + /// 56 + /// # Returns 57 + /// 58 + /// A vector of `ParagraphRender` structs, each containing: 59 + /// - Source byte and char ranges 60 + /// - Rendered HTML (without wrapper div) 61 + /// - Offset mappings for that paragraph 62 + /// - Source hash for change detection 63 + /// 64 + /// # Phase 2 Benefits 65 + /// - Only re-render changed paragraphs 66 + /// - Browser preserves cursor in unchanged paragraphs naturally 67 + /// - Faster for large documents 68 + /// - No manual cursor restoration needed for most edits 69 + pub fn render_paragraphs(rope: &JumpRopeBuf) -> Vec<ParagraphRender> { 70 + let source = rope.to_string(); 71 + 72 + // Handle empty rope - return single empty paragraph for cursor positioning 73 + if source.is_empty() { 74 + let empty_node_id = "n0".to_string(); 75 + let empty_html = format!(r#"{}"#, empty_node_id, '\u{200B}'); 76 + 77 + return vec![ParagraphRender { 78 + byte_range: 0..0, 79 + char_range: 0..0, 80 + html: empty_html, 81 + offset_map: vec![], 82 + source_hash: 0, 83 + }]; 84 + } 85 + 86 + // First pass: render whole document to get paragraph boundaries 87 + // TODO: CACHE THIS! 88 + let parser = Parser::new_ext(&source, weaver_renderer::default_md_options()).into_offset_iter(); 89 + let mut scratch_output = String::new(); 90 + 91 + let paragraph_ranges = 92 + match EditorWriter::<_, _, ()>::new(&source, rope, parser, &mut scratch_output).run() { 93 + Ok(result) => result.paragraph_ranges, 94 + Err(_) => return Vec::new(), 95 + }; 96 + 97 + // Second pass: render each paragraph separately 98 + let mut paragraphs = Vec::with_capacity(paragraph_ranges.len()); 99 + let mut node_id_offset = 0; // Track total nodes used so far for unique IDs 100 + 101 + tracing::info!("[RENDER] Rendering {} paragraphs", paragraph_ranges.len()); 102 + for (idx, (byte_range, char_range)) in paragraph_ranges.iter().enumerate() { 103 + tracing::info!("[RENDER] Paragraph {}: char_range {:?}", idx, char_range); 104 + // Extract paragraph source 105 + let para_source = rope_slice_to_string(rope, char_range.clone()); 106 + let source_hash = hash_source(&para_source); 107 + 108 + // Render this paragraph with unique node IDs 109 + let para_rope = JumpRopeBuf::from(para_source.as_str()); 110 + let parser = 111 + Parser::new_ext(&para_source, weaver_renderer::default_md_options()).into_offset_iter(); 112 + let mut output = String::new(); 113 + 114 + let mut offset_map = match EditorWriter::<_, _, ()>::new_with_node_offset( 115 + &para_source, 116 + &para_rope, 117 + parser, 118 + &mut output, 119 + node_id_offset, 120 + ) 121 + .run() 122 + { 123 + Ok(result) => { 124 + // Update node ID offset for next paragraph 125 + // Count how many unique node IDs were used in this paragraph 126 + let max_node_id = result 127 + .offset_maps 128 + .iter() 129 + .filter_map(|m| { 130 + m.node_id 131 + .strip_prefix("n") 132 + .and_then(|s| s.parse::<usize>().ok()) 133 + }) 134 + .max() 135 + .unwrap_or(node_id_offset); 136 + node_id_offset = max_node_id + 1; 137 + 138 + result.offset_maps 139 + } 140 + Err(_) => Vec::new(), 141 + }; 142 + 143 + // Adjust offset map to be relative to document, not paragraph 144 + // Each mapping's ranges need to be shifted by paragraph start 145 + let para_char_start = char_range.start; 146 + let para_byte_start = byte_range.start; 147 + 148 + for mapping in &mut offset_map { 149 + mapping.byte_range.start += para_byte_start; 150 + mapping.byte_range.end += para_byte_start; 151 + mapping.char_range.start += para_char_start; 152 + mapping.char_range.end += para_char_start; 153 + } 154 + 155 + paragraphs.push(ParagraphRender { 156 + byte_range: byte_range.clone(), 157 + char_range: char_range.clone(), 158 + html: output, 159 + offset_map, 160 + source_hash, 161 + }); 162 + } 163 + 164 + // Check if rope ends with trailing newlines (empty paragraph at end) 165 + // If so, add an empty paragraph div for cursor positioning 166 + let source = rope.to_string(); 167 + let has_trailing_newlines = source.ends_with("\n\n") || source.ends_with("\n"); 168 + 169 + if has_trailing_newlines { 170 + let doc_end_char = rope.len_chars(); 171 + let doc_end_byte = rope.len_bytes(); 172 + 173 + let empty_node_id = format!("n{}", node_id_offset); 174 + let empty_html = format!(r#"{}"#, empty_node_id, '\u{200B}'); 175 + 176 + paragraphs.push(ParagraphRender { 177 + byte_range: doc_end_byte..doc_end_byte, 178 + char_range: doc_end_char..doc_end_char + 1, // range for the zero-width space 179 + html: empty_html, 180 + offset_map: vec![OffsetMapping { 181 + byte_range: doc_end_byte..doc_end_byte, 182 + char_range: doc_end_char..doc_end_char + 1, 183 + node_id: empty_node_id, 184 + char_offset_in_node: 0, 185 + child_index: None, 186 + utf16_len: 1, // zero-width space is 1 UTF-16 code unit 187 + }], 188 + source_hash: 0, // always render this paragraph 189 + }); 190 + } 191 + 192 + paragraphs 193 + }

+177 -126

crates/weaver-app/src/components/editor/writer.rs

··· 7 7 //! represent consumed formatting characters. 8 8 9 9 use super::offset_map::{OffsetMapping, RenderResult}; 10 - use super::offsets::{byte_to_char, char_to_byte}; 11 10 use jumprope::JumpRopeBuf; 12 11 use markdown_weaver::{ 13 12 Alignment, BlockQuoteKind, CodeBlockKind, CowStr, EmbedType, Event, LinkType, Tag, ··· 18 17 }; 19 18 use std::collections::HashMap; 20 19 use std::ops::Range; 20 + 21 + /// Result of rendering with the EditorWriter. 22 + #[derive(Debug, Clone)] 23 + pub struct WriterResult { 24 + /// Offset mappings from source to DOM positions 25 + pub offset_maps: Vec<OffsetMapping>, 26 + 27 + /// Paragraph boundaries in source: (byte_range, char_range) 28 + /// These are extracted during rendering by tracking Tag::Paragraph events 29 + pub paragraph_ranges: Vec<(Range<usize>, Range<usize>)>, 30 + } 21 31 22 32 /// Classification of markdown syntax characters 23 33 #[derive(Debug, Clone, Copy, PartialEq)] ··· 100 110 101 111 code_buffer: Option<(Option<String>, String)>, // (lang, content) 102 112 code_buffer_byte_range: Option<Range<usize>>, // byte range of buffered code content 113 + code_buffer_char_range: Option<Range<usize>>, // char range of buffered code content 103 114 pending_blockquote_range: Option<Range<usize>>, // range for emitting > inside next paragraph 104 115 105 116 // Table rendering mode ··· 113 124 current_node_char_offset: usize, // UTF-16 offset within current node 114 125 current_node_child_count: usize, // number of child elements/text nodes in current container 115 126 127 + // Paragraph boundary tracking for incremental rendering 128 + paragraph_ranges: Vec<(Range<usize>, Range<usize>)>, // (byte_range, char_range) 129 + current_paragraph_start: Option<(usize, usize)>, // (byte_offset, char_offset) 130 + 116 131 _phantom: std::marker::PhantomData<&'a ()>, 117 132 } 118 133 ··· 126 141 EditorWriter<'a, I, W, E> 127 142 { 128 143 pub fn new(source: &'a str, source_rope: &'a JumpRopeBuf, events: I, writer: W) -> Self { 144 + Self::new_with_node_offset(source, source_rope, events, writer, 0) 145 + } 146 + 147 + pub fn new_with_node_offset( 148 + source: &'a str, 149 + source_rope: &'a JumpRopeBuf, 150 + events: I, 151 + writer: W, 152 + node_id_offset: usize, 153 + ) -> Self { 129 154 Self { 130 155 source, 131 156 source_rope, ··· 142 167 embed_provider: None, 143 168 code_buffer: None, 144 169 code_buffer_byte_range: None, 170 + code_buffer_char_range: None, 145 171 pending_blockquote_range: None, 146 172 render_tables_as_markdown: true, // Default to markdown rendering 147 173 table_start_offset: None, 148 174 offset_maps: Vec::new(), 149 - next_node_id: 0, 175 + next_node_id: node_id_offset, 150 176 current_node_id: None, 151 177 current_node_char_offset: 0, 152 178 current_node_child_count: 0, 179 + paragraph_ranges: Vec::new(), 180 + current_paragraph_start: None, 153 181 _phantom: std::marker::PhantomData, 154 182 } 155 183 } ··· 172 200 embed_provider: Some(provider), 173 201 code_buffer: self.code_buffer, 174 202 code_buffer_byte_range: self.code_buffer_byte_range, 203 + code_buffer_char_range: self.code_buffer_char_range, 175 204 pending_blockquote_range: self.pending_blockquote_range, 176 205 render_tables_as_markdown: self.render_tables_as_markdown, 177 206 table_start_offset: self.table_start_offset, ··· 180 209 current_node_id: self.current_node_id, 181 210 current_node_char_offset: self.current_node_char_offset, 182 211 current_node_child_count: self.current_node_child_count, 212 + paragraph_ranges: self.paragraph_ranges, 213 + current_paragraph_start: self.current_paragraph_start, 183 214 _phantom: std::marker::PhantomData, 184 215 } 185 216 } ··· 211 242 let char_start = self.last_char_offset; 212 243 let syntax_char_len = syntax.chars().count(); 213 244 214 - tracing::debug!( 215 - "emit_syntax: range={:?}, chars={}..{}, syntax={:?}", 216 - range, 217 - char_start, 218 - char_start + syntax_char_len, 219 - syntax 220 - ); 221 - 222 245 // If we're outside any node, create a wrapper span for tracking 223 246 let created_node = if self.current_node_id.is_none() { 224 247 let node_id = self.gen_node_id(); ··· 241 264 242 265 // Record offset mapping for this syntax 243 266 self.record_mapping(range.clone(), char_start..char_start + syntax_char_len); 244 - self.last_char_offset = char_start + syntax_char_len; 245 - self.last_byte_offset = range.end; // Mark bytes as processed 267 + let new_char = char_start + syntax_char_len; 268 + let new_byte = range.end; 269 + tracing::debug!("[EMIT_SYNTAX] Updating offsets: last_char {} -> {}, last_byte {} -> {}", 270 + self.last_char_offset, new_char, self.last_byte_offset, new_byte); 271 + self.last_char_offset = new_char; 272 + self.last_byte_offset = new_byte; // Mark bytes as processed 246 273 247 274 // Close wrapper if we created one 248 275 if created_node { ··· 300 327 let utf16_len = wchar_end - wchar_start; 301 328 302 329 let mapping = OffsetMapping { 303 - byte_range, 304 - char_range, 330 + byte_range: byte_range.clone(), 331 + char_range: char_range.clone(), 305 332 node_id: node_id.clone(), 306 333 char_offset_in_node: self.current_node_char_offset, 307 334 child_index: None, // text-based position ··· 309 336 }; 310 337 self.offset_maps.push(mapping); 311 338 self.current_node_char_offset += utf16_len; 339 + } else { 340 + tracing::warn!("[RECORD_MAPPING] SKIPPED - current_node_id is None!"); 312 341 } 313 342 } 314 343 315 344 /// Process markdown events and write HTML. 316 345 /// 317 - /// Returns the offset mappings. The HTML is written to the writer 318 - /// passed in the constructor. 319 - pub fn run(mut self) -> Result<Vec<OffsetMapping>, W::Error> { 346 + /// Returns offset mappings and paragraph boundaries. The HTML is written 347 + /// to the writer passed in the constructor. 348 + pub fn run(mut self) -> Result<WriterResult, W::Error> { 320 349 while let Some((event, range)) = self.events.next() { 350 + // Log events for debugging 351 + tracing::debug!("[WRITER] Event: {:?}, range: {:?}, last_byte: {}, last_char: {}", 352 + match &event { 353 + Event::Start(tag) => format!("Start({:?})", tag), 354 + Event::End(tag) => format!("End({:?})", tag), 355 + Event::Text(t) => format!("Text('{}')", t), 356 + Event::Code(t) => format!("Code('{}')", t), 357 + Event::Html(t) => format!("Html('{}')", t), 358 + Event::InlineHtml(t) => format!("InlineHtml('{}')", t), 359 + Event::FootnoteReference(t) => format!("FootnoteReference('{}')", t), 360 + Event::SoftBreak => "SoftBreak".to_string(), 361 + Event::HardBreak => "HardBreak".to_string(), 362 + Event::Rule => "Rule".to_string(), 363 + Event::TaskListMarker(b) => format!("TaskListMarker({})", b), 364 + Event::WeaverBlock(t) => format!("WeaverBlock('{}')", t), 365 + Event::InlineMath(t) => format!("InlineMath('{}')", t), 366 + Event::DisplayMath(t) => format!("DisplayMath('{}')", t), 367 + }, 368 + &range, 369 + self.last_byte_offset, 370 + self.last_char_offset 371 + ); 372 + 321 373 // For End events, emit any trailing content within the event's range 322 374 // BEFORE calling end_tag (which calls end_node and clears current_node_id) 323 375 if matches!(&event, Event::End(_)) { ··· 330 382 self.emit_gap_before(range.start)?; 331 383 } 332 384 385 + // Store last_byte before processing 386 + let last_byte_before = self.last_byte_offset; 387 + 333 388 // Process the event (passing range for tag syntax) 334 389 self.process_event(event, range.clone())?; 335 390 336 - // Update tracking 337 - self.last_byte_offset = range.end; 391 + // Update tracking - but don't override if start_tag manually updated it 392 + // (for inline formatting tags that emit opening syntax) 393 + if self.last_byte_offset == last_byte_before { 394 + // Event didn't update offset, so we update it 395 + self.last_byte_offset = range.end; 396 + } 397 + // else: Event updated offset (e.g. start_tag emitted opening syntax), keep that value 338 398 } 339 399 340 400 // Emit any trailing syntax ··· 346 406 let doc_char_len = self.source_rope.len_chars(); 347 407 348 408 if self.last_byte_offset < doc_byte_len || self.last_char_offset < doc_char_len { 349 - tracing::debug!( 350 - "Unmapped trailing content: bytes {}..{}, chars {}..{}", 351 - self.last_byte_offset, 352 - doc_byte_len, 353 - self.last_char_offset, 354 - doc_char_len 355 - ); 356 - 357 409 // Emit the trailing content as visible syntax 358 410 if self.last_byte_offset < doc_byte_len { 359 411 let trailing = &self.source[self.last_byte_offset..]; ··· 384 436 } 385 437 } 386 438 387 - Ok(self.offset_maps) 439 + Ok(WriterResult { 440 + offset_maps: self.offset_maps, 441 + paragraph_ranges: self.paragraph_ranges, 442 + }) 388 443 } 389 444 390 445 // Consume raw text events until end tag, for alt attributes ··· 436 491 fn process_event(&mut self, event: Event<'_>, range: Range<usize>) -> Result<(), W::Error> { 437 492 use Event::*; 438 493 439 - tracing::debug!( 440 - "Event: {:?}, range: {:?}", 441 - match &event { 442 - Start(tag) => format!("Start({:?})", tag), 443 - End(tag) => format!("End({:?})", tag), 444 - Text(t) => format!("Text({:?})", &t[..t.len().min(20)]), 445 - _ => format!("{:?}", event), 446 - }, 447 - range 448 - ); 449 494 match event { 450 495 Start(tag) => self.start_tag(tag, range)?, 451 496 End(tag) => self.end_tag(tag, range)?, ··· 454 499 if let Some((_, ref mut buffer)) = self.code_buffer { 455 500 buffer.push_str(&text); 456 501 457 - // Track byte range for code block content 458 - if let Some(ref mut code_range) = self.code_buffer_byte_range { 459 - // Extend existing range 460 - code_range.end = range.end; 502 + // Track byte and char ranges for code block content 503 + let text_char_len = text.chars().count(); 504 + if let Some(ref mut code_byte_range) = self.code_buffer_byte_range { 505 + // Extend existing ranges 506 + code_byte_range.end = range.end; 507 + if let Some(ref mut code_char_range) = self.code_buffer_char_range { 508 + code_char_range.end = self.last_char_offset + text_char_len; 509 + } 461 510 } else { 462 511 // First text in code block - start tracking 463 512 self.code_buffer_byte_range = Some(range.clone()); 513 + self.code_buffer_char_range = Some(self.last_char_offset..self.last_char_offset + text_char_len); 464 514 } 465 515 } else if !self.in_non_writing_block { 466 516 // Escape HTML and count chars in one pass ··· 468 518 let text_char_len = 469 519 escape_html_body_text_with_char_count(&mut self.writer, &text)?; 470 520 let char_end = char_start + text_char_len; 471 - 472 - tracing::debug!( 473 - "Text event: range={:?}, chars={}..{}, text={:?}", 474 - range, 475 - char_start, 476 - char_end, 477 - &text[..text.len().min(40)] 478 - ); 479 521 480 522 // Text becomes a text node child of the current container 481 523 if text_char_len > 0 { ··· 580 622 let gap = &self.source[range.clone()]; 581 623 if gap.ends_with('\n') { 582 624 let spaces = &gap[..gap.len() - 1]; // everything except the \n 583 - let char_start = byte_to_char(self.source, range.start); 625 + let char_start = self.last_char_offset; 584 626 let spaces_char_len = spaces.chars().count(); 585 627 586 628 // Emit and map the visible spaces ··· 602 644 // Count the as a child 603 645 self.current_node_child_count += 1; 604 646 605 - // Map the newline to an element-based position (after the ) 606 - // The binary search is end-inclusive, so cursor at position N+1 607 - // will match a mapping with range N..N+1 647 + // After , emit plain zero-width space for cursor positioning 648 + self.write("\u{200B}")?; 649 + 650 + // Count the zero-width space text node as a child 651 + self.current_node_child_count += 1; 652 + 653 + // Map the newline position to the zero-width space text node 608 654 if let Some(ref node_id) = self.current_node_id { 609 655 let newline_char_offset = char_start + spaces_char_len; 610 656 let mapping = OffsetMapping { 611 657 byte_range: range.start + spaces.len()..range.end, 612 658 char_range: newline_char_offset..newline_char_offset + 1, 613 659 node_id: node_id.clone(), 614 - char_offset_in_node: 0, 615 - child_index: Some(self.current_node_child_count), 616 - utf16_len: 0, 660 + char_offset_in_node: self.current_node_char_offset, 661 + child_index: None, // text node - TreeWalker will find it 662 + utf16_len: 1, // zero-width space is 1 UTF-16 unit 617 663 }; 618 664 self.offset_maps.push(mapping); 665 + 666 + // Increment char offset - TreeWalker will encounter this text node 667 + self.current_node_char_offset += 1; 619 668 } 620 669 670 + // DO NOT increment last_char_offset - zero-width space is not in source 671 + // The \n itself IS in source, so we already accounted for it 621 672 self.last_char_offset = char_start + spaces_char_len + 1; // +1 for \n 622 673 } else { 623 674 // Fallback: just ··· 724 775 SyntaxClass::Inline => "md-syntax-inline", 725 776 SyntaxClass::Block => "md-syntax-block", 726 777 }; 778 + 779 + let char_start = self.last_char_offset; 780 + let syntax_char_len = syntax.chars().count(); 781 + let syntax_byte_len = syntax.len(); 782 + 727 783 self.write("")?; 730 786 escape_html(&mut self.writer, syntax)?; 731 787 self.write("")?; 788 + 789 + // Update tracking - we've consumed this opening syntax 790 + tracing::debug!("[START_TAG] Opening syntax '{}': last_char {} -> {}, last_byte {} -> {}", 791 + syntax, self.last_char_offset, char_start + syntax_char_len, 792 + self.last_byte_offset, range.start + syntax_byte_len); 793 + self.last_char_offset = char_start + syntax_char_len; 794 + self.last_byte_offset = range.start + syntax_byte_len; 732 795 } 733 796 } 734 797 ··· 736 799 match tag { 737 800 Tag::HtmlBlock => Ok(()), 738 801 Tag::Paragraph => { 802 + // Record paragraph start for boundary tracking 803 + self.current_paragraph_start = Some((self.last_byte_offset, self.last_char_offset)); 804 + 739 805 let node_id = self.gen_node_id(); 740 806 if self.end_newline { 741 807 write!(&mut self.writer, "", node_id)?; ··· 791 857 classes, 792 858 attrs, 793 859 } => { 860 + // Record paragraph start for boundary tracking 861 + // Treat headings as paragraph-level blocks 862 + self.current_paragraph_start = Some((self.last_byte_offset, self.last_char_offset)); 863 + 794 864 if !self.end_newline { 795 865 self.write("\n")?; 796 866 } ··· 835 905 self.write(">")?; 836 906 837 907 // Begin node tracking for offset mapping 838 - self.begin_node(node_id); 908 + self.begin_node(node_id.clone()); 909 + 910 + // Map the start position of the heading (before any content) 911 + // This allows cursor to be placed at the very beginning 912 + let heading_start_char = self.last_char_offset; 913 + let mapping = OffsetMapping { 914 + byte_range: range.start..range.start, 915 + char_range: heading_start_char..heading_start_char, 916 + node_id: node_id.clone(), 917 + char_offset_in_node: 0, 918 + child_index: Some(0), // position before first child 919 + utf16_len: 0, 920 + }; 921 + self.offset_maps.push(mapping); 839 922 840 923 // Emit # syntax inside the heading tag 841 924 if range.start < range.end { 842 - let raw_text = &self.source[range]; 925 + let raw_text = &self.source[range.clone()]; 843 926 let count = level as usize; 844 927 let pattern = "#".repeat(count); 845 928 ··· 849 932 let syntax_start = hash_pos; 850 933 let syntax_end = (hash_pos + count + 1).min(raw_text.len()); 851 934 let syntax = &raw_text[syntax_start..syntax_end]; 935 + let syntax_char_len = syntax.chars().count(); 936 + 937 + // Calculate byte range for this syntax in the source 938 + let syntax_byte_start = range.start + syntax_start; 939 + let syntax_byte_end = range.start + syntax_end; 940 + let char_start = self.last_char_offset; 852 941 853 942 self.write("")?; 854 943 escape_html(&mut self.writer, syntax)?; 855 944 self.write("")?; 945 + 946 + // Record offset mapping and update char tracking 947 + // Note: last_byte_offset is managed by the main event loop 948 + self.record_mapping( 949 + syntax_byte_start..syntax_byte_end, 950 + char_start..char_start + syntax_char_len 951 + ); 952 + self.last_char_offset = char_start + syntax_char_len; 856 953 } 857 954 } 858 955 Ok(()) ··· 1193 1290 let result = match tag { 1194 1291 TagEnd::HtmlBlock => Ok(()), 1195 1292 TagEnd::Paragraph => { 1293 + // Record paragraph end for boundary tracking 1294 + if let Some((byte_start, char_start)) = self.current_paragraph_start.take() { 1295 + let byte_range = byte_start..self.last_byte_offset; 1296 + let char_range = char_start..self.last_char_offset; 1297 + self.paragraph_ranges.push((byte_range, char_range)); 1298 + } 1299 + 1196 1300 self.end_node(); 1197 1301 self.write("\n") 1198 1302 } 1199 1303 TagEnd::Heading(level) => { 1304 + // Record paragraph end for boundary tracking 1305 + if let Some((byte_start, char_start)) = self.current_paragraph_start.take() { 1306 + let byte_range = byte_start..self.last_byte_offset; 1307 + let char_range = char_start..self.last_char_offset; 1308 + self.paragraph_ranges.push((byte_range, char_range)); 1309 + } 1310 + 1200 1311 self.end_node(); 1201 1312 self.write("</")?; 1202 1313 write!(&mut self.writer, "{}", level)?; ··· 1255 1366 LazyLock::new(|| SyntaxSet::load_defaults_newlines()); 1256 1367 1257 1368 if let Some((lang, buffer)) = self.code_buffer.take() { 1258 - // Create offset mapping for code block content if we tracked a range 1259 - if let Some(code_byte_range) = self.code_buffer_byte_range.take() { 1260 - // Calculate char range from the tracked byte range 1261 - let char_start = byte_to_char(self.source, code_byte_range.start); 1262 - let char_end = byte_to_char(self.source, code_byte_range.end); 1263 - let char_range = char_start..char_end; 1264 - 1369 + // Create offset mapping for code block content if we tracked ranges 1370 + if let (Some(code_byte_range), Some(code_char_range)) = 1371 + (self.code_buffer_byte_range.take(), self.code_buffer_char_range.take()) { 1265 1372 // Record mapping before writing HTML 1266 1373 // (current_node_id should be set by start_tag for CodeBlock) 1267 - self.record_mapping(code_byte_range, char_range); 1374 + self.record_mapping(code_byte_range, code_char_range); 1268 1375 } 1269 1376 1270 1377 if let Some(ref lang_str) = lang { ··· 1348 1455 1349 1456 result?; 1350 1457 1351 - // Extract and emit closing syntax based on tag type 1352 - if range.start < range.end { 1353 - let raw_text = &self.source[range]; 1354 - let closing_syntax = match &tag { 1355 - TagEnd::Strong => { 1356 - if raw_text.ends_with("**") { 1357 - Some("**") 1358 - } else if raw_text.ends_with("__") { 1359 - Some("__") 1360 - } else { 1361 - None 1362 - } 1363 - } 1364 - TagEnd::Emphasis => { 1365 - if raw_text.ends_with("*") { 1366 - Some("*") 1367 - } else if raw_text.ends_with("_") { 1368 - Some("_") 1369 - } else { 1370 - None 1371 - } 1372 - } 1373 - TagEnd::Strikethrough => { 1374 - if raw_text.ends_with("~~") { 1375 - Some("~~") 1376 - } else { 1377 - None 1378 - } 1379 - } 1380 - TagEnd::Link => { 1381 - // Extract ](url) part 1382 - if let Some(idx) = raw_text.rfind("](") { 1383 - Some(&raw_text[idx..]) 1384 - } else { 1385 - None 1386 - } 1387 - } 1388 - TagEnd::CodeBlock => { 1389 - if raw_text.ends_with("```") { 1390 - raw_text.lines().last() 1391 - } else { 1392 - None 1393 - } 1394 - } 1395 - _ => None, 1396 - }; 1397 - 1398 - if let Some(syntax) = closing_syntax { 1399 - let class = match classify_syntax(syntax) { 1400 - SyntaxClass::Inline => "md-syntax-inline", 1401 - SyntaxClass::Block => "md-syntax-block", 1402 - }; 1403 - self.write("")?; 1406 - escape_html(&mut self.writer, syntax)?; 1407 - self.write("")?; 1408 - } 1409 - } 1458 + // Note: Closing syntax for inline tags (Strong, Emphasis, etc.) is now handled 1459 + // by emit_gap_before(range.end) which is called before end_tag() in the main loop. 1460 + // No need for manual emission here anymore. 1410 1461 1411 1462 Ok(()) 1412 1463 }