···33use super::offset_map::{OffsetMapping, find_mapping_for_char};
44use super::paragraph::ParagraphRender;
55use super::render::render_paragraphs_incremental;
66-use jumprope::JumpRopeBuf;
66+use loro::LoroDoc;
77use serde::Serialize;
8899/// Serializable version of ParagraphRender for snapshot testing.
···54545555/// Helper: render markdown and convert to serializable test output.
5656fn render_test(input: &str) -> Vec<TestParagraph> {
5757- let rope = JumpRopeBuf::from(input);
5858- let (paragraphs, _cache) = render_paragraphs_incremental(&rope, None, None);
5757+ let doc = LoroDoc::new();
5858+ let text = doc.get_text("content");
5959+ text.insert(0, input).unwrap();
6060+ let (paragraphs, _cache) = render_paragraphs_incremental(&text, None, None);
5961 paragraphs.iter().map(TestParagraph::from).collect()
6062}
6163···434436 // cursor snaps to adjacent paragraphs for standard breaks.
435437 // Only EXTRA whitespace beyond \n\n gets gap elements.
436438 let input = "Hello\n\nWorld";
437437- let rope = JumpRopeBuf::from(input);
438438- let (paragraphs, _cache) = render_paragraphs_incremental(&rope, None, None);
439439+ let doc = LoroDoc::new();
440440+ let text = doc.get_text("content");
441441+ text.insert(0, input).unwrap();
442442+ let (paragraphs, _cache) = render_paragraphs_incremental(&text, None, None);
439443440444 // With standard \n\n break, we expect 2 paragraphs (no gap element)
441445 // Paragraph ranges include some trailing whitespace from markdown parsing
···453457 // Extra whitespace beyond MIN_PARAGRAPH_BREAK (2) gets gap elements
454458 // Plain paragraphs don't consume trailing newlines like headings do
455459 let input = "Hello\n\n\n\nWorld"; // 4 newlines = gap of 4 > 2
456456- let rope = JumpRopeBuf::from(input);
457457- let (paragraphs, _cache) = render_paragraphs_incremental(&rope, None, None);
460460+ let doc = LoroDoc::new();
461461+ let text = doc.get_text("content");
462462+ text.insert(0, input).unwrap();
463463+ let (paragraphs, _cache) = render_paragraphs_incremental(&text, None, None);
458464459465 // With extra newlines, we expect 3 elements: para, gap, para
460466 assert_eq!(paragraphs.len(), 3, "Expected 3 elements with extra whitespace");
···542548fn test_incremental_cache_reuse() {
543549 // Verify cache is populated and can be reused
544550 let input = "First para\n\nSecond para";
545545- let rope = JumpRopeBuf::from(input);
551551+ let doc = LoroDoc::new();
552552+ let text = doc.get_text("content");
553553+ text.insert(0, input).unwrap();
546554547547- let (paras1, cache1) = render_paragraphs_incremental(&rope, None, None);
555555+ let (paras1, cache1) = render_paragraphs_incremental(&text, None, None);
548556 assert!(!cache1.paragraphs.is_empty(), "Cache should be populated");
549557550558 // Second render with same content should reuse cache
551551- let (paras2, _cache2) = render_paragraphs_incremental(&rope, Some(&cache1), None);
559559+ let (paras2, _cache2) = render_paragraphs_incremental(&text, Some(&cache1), None);
552560553561 // Should produce identical output
554562 assert_eq!(paras1.len(), paras2.len());
···3333 let mut visible = HashSet::new();
34343535 for span in syntax_spans {
3636+ // Find the paragraph containing this span for boundary clamping
3737+ let para_bounds = find_paragraph_bounds(&span.char_range, paragraphs);
3838+3639 let should_show = match span.syntax_type {
3740 SyntaxType::Inline => {
3841 // Show if cursor within formatted span content OR adjacent to markers
3939- // "Adjacent" means within 1 char of the syntax boundaries
4040- let extended_range = span.char_range.start.saturating_sub(1)
4141- ..span.char_range.end.saturating_add(1);
4242+ // "Adjacent" means within 1 char of the syntax boundaries,
4343+ // clamped to paragraph bounds (paragraphs are split by newlines,
4444+ // so clamping to para bounds prevents cross-line extension)
4545+ let extended_start =
4646+ safe_extend_left(span.char_range.start, 1, para_bounds.as_ref());
4747+ let extended_end =
4848+ safe_extend_right(span.char_range.end, 1, para_bounds.as_ref());
4949+ let extended_range = extended_start..extended_end;
42504351 // Also show if cursor is anywhere in the formatted_range
4452 // (the region between paired opening/closing markers)
5353+ // Extend by 1 char on BOTH sides for symmetric "approaching" behavior,
5454+ // clamped to paragraph bounds.
4555 let in_formatted_region = span
4656 .formatted_range
4757 .as_ref()
4848- .map(|r| r.contains(&cursor_offset))
5858+ .map(|r| {
5959+ let ext_start = safe_extend_left(r.start, 1, para_bounds.as_ref());
6060+ let ext_end = safe_extend_right(r.end, 1, para_bounds.as_ref());
6161+ cursor_offset >= ext_start && cursor_offset <= ext_end
6262+ })
4963 .unwrap_or(false);
50645151- extended_range.contains(&cursor_offset)
6565+ let in_extended = extended_range.contains(&cursor_offset);
6666+ let result = in_extended
5267 || in_formatted_region
5368 || selection_overlaps(selection, &span.char_range)
5469 || span
5570 .formatted_range
5671 .as_ref()
5772 .map(|r| selection_overlaps(selection, r))
5858- .unwrap_or(false)
7373+ .unwrap_or(false);
7474+7575+ tracing::debug!(
7676+ "[VISIBILITY] span {} char_range {:?} formatted_range {:?} cursor {} -> in_extended={} in_formatted={} visible={}",
7777+ span.syn_id,
7878+ span.char_range,
7979+ span.formatted_range,
8080+ cursor_offset,
8181+ in_extended,
8282+ in_formatted_region,
8383+ result
8484+ );
8585+8686+ result
5987 }
6088 SyntaxType::Block => {
6189 // Show if cursor anywhere in same paragraph
···116144 false
117145}
118146147147+/// Find the paragraph bounds containing a syntax span.
148148+fn find_paragraph_bounds(
149149+ syntax_range: &Range<usize>,
150150+ paragraphs: &[ParagraphRender],
151151+) -> Option<Range<usize>> {
152152+ for para in paragraphs {
153153+ // Skip gap paragraphs
154154+ if para.syntax_spans.is_empty() && !para.char_range.is_empty() {
155155+ continue;
156156+ }
157157+158158+ if para.char_range.start <= syntax_range.start && syntax_range.end <= para.char_range.end {
159159+ return Some(para.char_range.clone());
160160+ }
161161+ }
162162+ None
163163+}
164164+165165+/// Safely extend a position leftward by `amount` chars, clamped to paragraph bounds.
166166+///
167167+/// Paragraphs are already split by newlines, so clamping to paragraph bounds
168168+/// naturally prevents extending across line boundaries.
169169+fn safe_extend_left(pos: usize, amount: usize, para_bounds: Option<&Range<usize>>) -> usize {
170170+ let min_pos = para_bounds.map(|p| p.start).unwrap_or(0);
171171+ pos.saturating_sub(amount).max(min_pos)
172172+}
173173+174174+/// Safely extend a position rightward by `amount` chars, clamped to paragraph bounds.
175175+///
176176+/// Paragraphs are already split by newlines, so clamping to paragraph bounds
177177+/// naturally prevents extending across line boundaries.
178178+fn safe_extend_right(pos: usize, amount: usize, para_bounds: Option<&Range<usize>>) -> usize {
179179+ let max_pos = para_bounds.map(|p| p.end).unwrap_or(usize::MAX);
180180+ pos.saturating_add(amount).min(max_pos)
181181+}
182182+119183#[cfg(test)]
120184mod tests {
121185 use super::*;
···197261198262 #[test]
199263 fn test_inline_visibility_cursor_adjacent() {
264264+ // "test **bold** after"
265265+ // 5 7
200266 let spans = vec![
201267 make_span("s0", 5, 7, SyntaxType::Inline), // ** at positions 5-6
202268 ];
203203- let paras = vec![make_para(0, 20, spans.clone())];
269269+ let paras = vec![make_para(0, 19, spans.clone())];
204270205271 // Cursor at position 4 (one before ** which starts at 5)
206272 let vis = VisibilityState::calculate(4, None, &spans, ¶s);
···216282 let spans = vec![
217283 make_span("s0", 10, 12, SyntaxType::Inline),
218284 ];
219219- let paras = vec![make_para(0, 30, spans.clone())];
285285+ let paras = vec![make_para(0, 33, spans.clone())];
220286221287 // Cursor at position 0 (far from **)
222288 let vis = VisibilityState::calculate(0, None, &spans, ¶s);
···259325 let spans = vec![
260326 make_span("s0", 5, 7, SyntaxType::Inline),
261327 ];
262262- let paras = vec![make_para(0, 20, spans.clone())];
328328+ let paras = vec![make_para(0, 24, spans.clone())];
263329264330 // Selection overlaps the syntax span
265331 let selection = Selection { anchor: 3, head: 10 };
266332 let vis = VisibilityState::calculate(10, Some(&selection), &spans, ¶s);
267333 assert!(vis.is_visible("s0"), "** should be visible when selection overlaps");
334334+ }
335335+336336+ #[test]
337337+ fn test_paragraph_boundary_blocks_extension() {
338338+ // Cursor in paragraph 2 should NOT reveal syntax in paragraph 1,
339339+ // even if cursor is only 1 char after the paragraph boundary
340340+ // (paragraph bounds clamp the extension)
341341+ let spans = vec![
342342+ make_span_with_range("s0", 0, 2, SyntaxType::Inline, 0..8), // opening **
343343+ make_span_with_range("s1", 6, 8, SyntaxType::Inline, 0..8), // closing **
344344+ ];
345345+ let paras = vec![
346346+ make_para(0, 8, spans.clone()), // "**bold**"
347347+ make_para(9, 13, vec![]), // "text" (after newline)
348348+ ];
349349+350350+ // Cursor at position 9 (start of second paragraph)
351351+ // Should NOT reveal the closing ** because para bounds clamp extension
352352+ let vis = VisibilityState::calculate(9, None, &spans, ¶s);
353353+ assert!(!vis.is_visible("s1"), "closing ** should NOT be visible when cursor is in next paragraph");
354354+ }
355355+356356+ #[test]
357357+ fn test_extension_clamps_to_paragraph() {
358358+ // Syntax at very start of paragraph - extension left should stop at para start
359359+ let spans = vec![
360360+ make_span_with_range("s0", 0, 2, SyntaxType::Inline, 0..8),
361361+ ];
362362+ let paras = vec![make_para(0, 8, spans.clone())];
363363+364364+ // Cursor at position 0 - should still see the opening **
365365+ let vis = VisibilityState::calculate(0, None, &spans, ¶s);
366366+ assert!(vis.is_visible("s0"), "** at start should be visible when cursor at position 0");
268367 }
269368}
+95-26
crates/weaver-app/src/components/editor/writer.rs
···77//! represent consumed formatting characters.
8899use super::offset_map::{OffsetMapping, RenderResult};
1010-use jumprope::JumpRopeBuf;
1010+use loro::LoroText;
1111use markdown_weaver::{
1212 Alignment, BlockQuoteKind, CodeBlockKind, CowStr, EmbedType, Event, LinkType, Tag,
1313};
···109109/// and emits them as styled spans for visibility in the editor.
110110pub struct EditorWriter<'a, I: Iterator<Item = (Event<'a>, Range<usize>)>, W: StrWrite, E = ()> {
111111 source: &'a str,
112112- source_rope: &'a JumpRopeBuf,
112112+ source_text: &'a LoroText,
113113 events: I,
114114 writer: W,
115115 last_byte_offset: usize,
···141141 current_node_id: Option<String>, // node ID for current text container
142142 current_node_char_offset: usize, // UTF-16 offset within current node
143143 current_node_child_count: usize, // number of child elements/text nodes in current container
144144+145145+ // Incremental UTF-16 offset tracking (replaces rope.chars_to_wchars)
146146+ // Maps char_offset -> utf16_offset at checkpoints we've traversed.
147147+ // Can be reused for future lookups or passed to subsequent writers.
148148+ utf16_checkpoints: Vec<(usize, usize)>, // (char_offset, utf16_offset)
144149145150 // Paragraph boundary tracking for incremental rendering
146151 paragraph_ranges: Vec<(Range<usize>, Range<usize>)>, // (byte_range, char_range)
···170175impl<'a, I: Iterator<Item = (Event<'a>, Range<usize>)>, W: StrWrite, E: EmbedContentProvider>
171176 EditorWriter<'a, I, W, E>
172177{
173173- pub fn new(source: &'a str, source_rope: &'a JumpRopeBuf, events: I, writer: W) -> Self {
174174- Self::new_with_node_offset(source, source_rope, events, writer, 0)
178178+ pub fn new(source: &'a str, source_text: &'a LoroText, events: I, writer: W) -> Self {
179179+ Self::new_with_node_offset(source, source_text, events, writer, 0)
175180 }
176181177182 pub fn new_with_node_offset(
178183 source: &'a str,
179179- source_rope: &'a JumpRopeBuf,
184184+ source_text: &'a LoroText,
180185 events: I,
181186 writer: W,
182187 node_id_offset: usize,
183188 ) -> Self {
184184- Self::new_with_offsets(source, source_rope, events, writer, node_id_offset, 0)
189189+ Self::new_with_offsets(source, source_text, events, writer, node_id_offset, 0)
185190 }
186191187192 pub fn new_with_offsets(
188193 source: &'a str,
189189- source_rope: &'a JumpRopeBuf,
194194+ source_text: &'a LoroText,
190195 events: I,
191196 writer: W,
192197 node_id_offset: usize,
···194199 ) -> Self {
195200 Self {
196201 source,
197197- source_rope,
202202+ source_text,
198203 events,
199204 writer,
200205 last_byte_offset: 0,
···217222 current_node_id: None,
218223 current_node_char_offset: 0,
219224 current_node_child_count: 0,
225225+ utf16_checkpoints: vec![(0, 0)],
220226 paragraph_ranges: Vec::new(),
221227 current_paragraph_start: None,
222228 list_depth: 0,
···232238 /// Used for fast boundary discovery in incremental rendering.
233239 pub fn new_boundary_only(
234240 source: &'a str,
235235- source_rope: &'a JumpRopeBuf,
241241+ source_text: &'a LoroText,
236242 events: I,
237243 writer: W,
238244 ) -> Self {
239245 Self {
240246 source,
241241- source_rope,
247247+ source_text,
242248 events,
243249 writer,
244250 last_byte_offset: 0,
···261267 current_node_id: None,
262268 current_node_char_offset: 0,
263269 current_node_child_count: 0,
270270+ utf16_checkpoints: vec![(0, 0)],
264271 syntax_spans: Vec::new(),
265272 next_syn_id: 0,
266273 pending_inline_formats: Vec::new(),
···276283 pub fn with_embed_provider(self, provider: E) -> EditorWriter<'a, I, W, E> {
277284 EditorWriter {
278285 source: self.source,
279279- source_rope: self.source_rope,
286286+ source_text: self.source_text,
280287 events: self.events,
281288 writer: self.writer,
282289 last_byte_offset: self.last_byte_offset,
···299306 current_node_id: self.current_node_id,
300307 current_node_char_offset: self.current_node_char_offset,
301308 current_node_child_count: self.current_node_child_count,
309309+ utf16_checkpoints: self.utf16_checkpoints,
302310 paragraph_ranges: self.paragraph_ranges,
303311 current_paragraph_start: self.current_paragraph_start,
304312 list_depth: self.list_depth,
···343351 let format_end = self.last_char_offset;
344352 let formatted_range = format_start..format_end;
345353354354+ tracing::debug!(
355355+ "[FINALIZE_PAIRED] Setting formatted_range {:?} for opening '{}' and closing (last span)",
356356+ formatted_range,
357357+ opening_syn_id
358358+ );
359359+346360 // Update the opening span's formatted_range
347361 if let Some(opening_span) = self
348362 .syntax_spans
···350364 .find(|s| s.syn_id == opening_syn_id)
351365 {
352366 opening_span.formatted_range = Some(formatted_range.clone());
367367+ tracing::debug!("[FINALIZE_PAIRED] Updated opening span {}", opening_syn_id);
368368+ } else {
369369+ tracing::warn!("[FINALIZE_PAIRED] Could not find opening span {}", opening_syn_id);
353370 }
354371355372 // Update the closing span's formatted_range (the most recent one)
···358375 // Only update if it's an inline span (closing syntax should be inline)
359376 if closing_span.syntax_type == SyntaxType::Inline {
360377 closing_span.formatted_range = Some(formatted_range);
378378+ tracing::debug!("[FINALIZE_PAIRED] Updated closing span {}", closing_span.syn_id);
361379 }
362380 }
363381 }
···544562 self.current_node_child_count = 0;
545563 }
546564565565+ /// Compute UTF-16 length for a text slice with fast path for ASCII.
566566+ #[inline]
567567+ fn utf16_len_for_slice(text: &str) -> usize {
568568+ let byte_len = text.len();
569569+ let char_len = text.chars().count();
570570+571571+ // Fast path: if byte_len == char_len, all ASCII, so utf16_len == char_len
572572+ if byte_len == char_len {
573573+ char_len
574574+ } else {
575575+ // Slow path: has multi-byte chars, need to count UTF-16 code units
576576+ text.encode_utf16().count()
577577+ }
578578+ }
579579+547580 /// Record an offset mapping for the given byte and char ranges.
548581 ///
549549- /// Computes UTF-16 length efficiently using the rope's internal indexing.
582582+ /// Builds up utf16_checkpoints incrementally for efficient lookups.
550583 fn record_mapping(&mut self, byte_range: Range<usize>, char_range: Range<usize>) {
551584 if let Some(ref node_id) = self.current_node_id {
552552- // Use rope to convert char offsets to UTF-16 (wchar) offsets - O(log n)
553553- let rope = self.source_rope.borrow();
554554- let wchar_start = rope.chars_to_wchars(char_range.start);
555555- let wchar_end = rope.chars_to_wchars(char_range.end);
556556- let utf16_len = wchar_end - wchar_start;
585585+ // Get UTF-16 length using fast path
586586+ let text_slice = &self.source[byte_range.clone()];
587587+ let utf16_len = Self::utf16_len_for_slice(text_slice);
588588+589589+ // Record checkpoint at end of this range for future lookups
590590+ let last_checkpoint = self.utf16_checkpoints.last().copied().unwrap_or((0, 0));
591591+ let new_utf16_offset = last_checkpoint.1 + utf16_len;
592592+593593+ // Only add checkpoint if we've advanced
594594+ if char_range.end > last_checkpoint.0 {
595595+ self.utf16_checkpoints.push((char_range.end, new_utf16_offset));
596596+ }
557597558598 let mapping = OffsetMapping {
559599 byte_range: byte_range.clone(),
···601641602642 // For End events, emit any trailing content within the event's range
603643 // BEFORE calling end_tag (which calls end_node and clears current_node_id)
604604- if matches!(&event, Event::End(_)) {
644644+ //
645645+ // EXCEPTION: For inline formatting tags (Strong, Emphasis, Strikethrough),
646646+ // the closing syntax must be emitted AFTER the closing HTML tag, not before.
647647+ // Otherwise the closing `**` span ends up INSIDE the <strong> element.
648648+ // These tags handle their own closing syntax in end_tag().
649649+ use markdown_weaver::TagEnd;
650650+ let is_inline_format_end = matches!(
651651+ &event,
652652+ Event::End(TagEnd::Strong | TagEnd::Emphasis | TagEnd::Strikethrough)
653653+ );
654654+655655+ if matches!(&event, Event::End(_)) && !is_inline_format_end {
605656 // Emit gap from last_byte_offset to range.end
606657 // (emit_syntax handles char offset tracking)
607658 self.emit_gap_before(range.end)?;
608608- } else {
659659+ } else if !matches!(&event, Event::End(_)) {
609660 // For other events, emit any gap before range.start
610661 // (emit_syntax handles char offset tracking)
611662 self.emit_gap_before(range.start)?;
612663 }
664664+ // For inline format End events, gap is emitted inside end_tag() AFTER the closing HTML
613665614666 // Store last_byte before processing
615667 let last_byte_before = self.last_byte_offset;
···632684 // Handle unmapped trailing content (stripped by parser)
633685 // This includes trailing spaces that markdown ignores
634686 let doc_byte_len = self.source.len();
635635- let doc_char_len = self.source_rope.len_chars();
687687+ let doc_char_len = self.source_text.len_unicode();
636688637689 if self.last_byte_offset < doc_byte_len || self.last_char_offset < doc_char_len {
638690 // Emit the trailing content as visible syntax
···11731225 syntax_type,
11741226 formatted_range: None, // Will be updated when closing tag is emitted
11751227 });
12281228+12291229+ // Record offset mapping for cursor positioning
12301230+ // This is critical - without it, current_node_char_offset is wrong
12311231+ // and all subsequent cursor positions are shifted
12321232+ let byte_start = range.start;
12331233+ let byte_end = range.start + syntax_byte_len;
12341234+ self.record_mapping(byte_start..byte_end, char_start..char_end);
1176123511771236 // For paired inline syntax (Strong, Emphasis, Strikethrough),
11781237 // track the opening span so we can set formatted_range when closing
···19902049 self.write("</dd>\n")
19912050 }
19922051 TagEnd::Emphasis => {
20522052+ // Write closing tag FIRST, then emit closing syntax OUTSIDE the tag
20532053+ self.write("</em>")?;
20542054+ self.emit_gap_before(range.end)?;
19932055 self.finalize_paired_inline_format();
19941994- self.write("</em>")
20562056+ Ok(())
19952057 }
19962058 TagEnd::Superscript => self.write("</sup>"),
19972059 TagEnd::Subscript => self.write("</sub>"),
19982060 TagEnd::Strong => {
20612061+ // Write closing tag FIRST, then emit closing syntax OUTSIDE the tag
20622062+ self.write("</strong>")?;
20632063+ self.emit_gap_before(range.end)?;
19992064 self.finalize_paired_inline_format();
20002000- self.write("</strong>")
20652065+ Ok(())
20012066 }
20022067 TagEnd::Strikethrough => {
20682068+ // Write closing tag FIRST, then emit closing syntax OUTSIDE the tag
20692069+ self.write("</s>")?;
20702070+ self.emit_gap_before(range.end)?;
20032071 self.finalize_paired_inline_format();
20042004- self.write("</s>")
20722072+ Ok(())
20052073 }
20062074 TagEnd::Link => self.write("</a>"),
20072075 TagEnd::Image => Ok(()), // No-op: raw_text() already consumed the End(Image) event
···2019208720202088 result?;
2021208920222022- // Note: Closing syntax for inline tags (Strong, Emphasis, etc.) is now handled
20232023- // by emit_gap_before(range.end) which is called before end_tag() in the main loop.
20242024- // No need for manual emission here anymore.
20902090+ // Note: Closing syntax for inline formatting tags (Strong, Emphasis, Strikethrough)
20912091+ // is handled INSIDE their respective match arms above, AFTER writing the closing HTML.
20922092+ // This ensures the closing syntax span appears OUTSIDE the formatted element.
20932093+ // Other End events have their closing syntax emitted by emit_gap_before() in the main loop.
2025209420262095 Ok(())
20272096 }