···426426}
427427428428// =============================================================================
429429+// Syntax Span Edge Case Tests
430430+// =============================================================================
431431+432432+#[test]
433433+fn test_invalid_heading_no_space() {
434434+ // "#text" without space is NOT a valid heading - should be plain text
435435+ // The '#' should NOT be wrapped in a syntax span
436436+ let result = render_test("#text");
437437+438438+ // Should be a single paragraph with plain text
439439+ assert_eq!(result.len(), 1, "Should have 1 paragraph");
440440+441441+ // HTML should NOT contain md-syntax-block for the #
442442+ assert!(
443443+ !result[0].html.contains("md-syntax-block"),
444444+ "Invalid heading '#text' should NOT have block syntax span. HTML: {}",
445445+ result[0].html
446446+ );
447447+448448+ // The # should be visible as regular text content
449449+ assert!(
450450+ result[0].html.contains("#text") || result[0].html.contains("#text"),
451451+ "The '#text' should appear as regular text. HTML: {}",
452452+ result[0].html
453453+ );
454454+}
455455+456456+#[test]
457457+fn test_valid_heading_with_space() {
458458+ // "# text" WITH space IS a valid heading
459459+ let result = render_test("# Heading");
460460+461461+ // Should have heading syntax span
462462+ assert!(
463463+ result[0].html.contains("md-syntax-block"),
464464+ "Valid heading should have block syntax span. HTML: {}",
465465+ result[0].html
466466+ );
467467+468468+ // Should have <h1> tag
469469+ assert!(
470470+ result[0].html.contains("<h1"),
471471+ "Valid heading should render as h1. HTML: {}",
472472+ result[0].html
473473+ );
474474+}
475475+476476+#[test]
477477+fn test_hash_in_middle_of_text() {
478478+ // "#" in middle of text should not be treated as heading syntax
479479+ let result = render_test("Some #hashtag here");
480480+481481+ assert!(
482482+ !result[0].html.contains("md-syntax-block"),
483483+ "# in middle of text should NOT be block syntax. HTML: {}",
484484+ result[0].html
485485+ );
486486+}
487487+488488+#[test]
489489+fn test_unclosed_bold() {
490490+ // "**text" without closing ** should be plain text, not bold
491491+ let result = render_test("**unclosed bold");
492492+493493+ // Should NOT have <strong> tag
494494+ assert!(
495495+ !result[0].html.contains("<strong>"),
496496+ "Unclosed ** should NOT render as bold. HTML: {}",
497497+ result[0].html
498498+ );
499499+}
500500+501501+#[test]
502502+fn test_unclosed_italic() {
503503+ // "*text" without closing * should be plain text, not italic
504504+ let result = render_test("*unclosed italic");
505505+506506+ // Should NOT have <em> tag
507507+ assert!(
508508+ !result[0].html.contains("<em>"),
509509+ "Unclosed * should NOT render as italic. HTML: {}",
510510+ result[0].html
511511+ );
512512+}
513513+514514+#[test]
515515+fn test_asterisk_not_emphasis() {
516516+ // Single * surrounded by spaces is not emphasis
517517+ let result = render_test("5 * 3 = 15");
518518+519519+ // Should NOT have <em> tag
520520+ assert!(
521521+ !result[0].html.contains("<em>"),
522522+ "Math expression with * should NOT be italic. HTML: {}",
523523+ result[0].html
524524+ );
525525+}
526526+527527+#[test]
528528+fn test_list_marker_needs_space() {
529529+ // "-text" without space is NOT a list item
530530+ let result = render_test("-not-a-list");
531531+532532+ // Should NOT have <li> or <ul> tags
533533+ assert!(
534534+ !result[0].html.contains("<li>") && !result[0].html.contains("<ul>"),
535535+ "'-text' without space should NOT be a list. HTML: {}",
536536+ result[0].html
537537+ );
538538+}
539539+540540+#[test]
541541+fn test_valid_list_with_space() {
542542+ // "- text" WITH space IS a valid list item
543543+ let result = render_test("- List item");
544544+545545+ // Should have list markup
546546+ assert!(
547547+ result[0].html.contains("<li>") || result[0].html.contains("<ul>"),
548548+ "Valid list should have list markup. HTML: {}",
549549+ result[0].html
550550+ );
551551+552552+ // Should have block syntax span for the marker
553553+ assert!(
554554+ result[0].html.contains("md-syntax-block"),
555555+ "List marker should have block syntax span. HTML: {}",
556556+ result[0].html
557557+ );
558558+}
559559+560560+#[test]
561561+fn test_number_dot_needs_space() {
562562+ // "1.text" without space is NOT an ordered list
563563+ let result = render_test("1.not-a-list");
564564+565565+ // Should NOT have <ol> tag
566566+ assert!(
567567+ !result[0].html.contains("<ol>"),
568568+ "'1.text' without space should NOT be ordered list. HTML: {}",
569569+ result[0].html
570570+ );
571571+}
572572+573573+#[test]
574574+fn test_hash_with_zero_width_char() {
575575+ // "#\u{200B}text" - zero-width space after # should NOT make it a valid heading
576576+ let result = render_test("#\u{200B}text");
577577+578578+ // Debug: print what we got
579579+ eprintln!("HTML for '#\\u{{200B}}text': {}", result[0].html);
580580+581581+ // Should NOT be a heading - zero-width space is not a real space
582582+ assert!(
583583+ !result[0].html.contains("<h1"),
584584+ "# followed by zero-width space should NOT be h1. HTML: {}",
585585+ result[0].html
586586+ );
587587+}
588588+589589+#[test]
590590+fn test_hash_with_zwj() {
591591+ // Test with zero-width joiner
592592+ let result = render_test("#\u{200C}text");
593593+594594+ eprintln!("HTML for '#\\u{{200C}}text': {}", result[0].html);
595595+596596+ assert!(
597597+ !result[0].html.contains("<h1"),
598598+ "# followed by ZWNJ should NOT be h1. HTML: {}",
599599+ result[0].html
600600+ );
601601+}
602602+603603+#[test]
604604+fn test_hash_space_then_zero_width() {
605605+ // "# \u{200B}" - valid heading marker, but content is just zero-width
606606+ let result = render_test("# \u{200B}");
607607+608608+ eprintln!("HTML for '# \\u{{200B}}': {}", result[0].html);
609609+ eprintln!("Syntax spans: {:?}", result[0].offset_map);
610610+611611+ // This IS a valid heading (has space after #), even if content is "invisible"
612612+ // The question is: should we hide the # syntax in this case?
613613+}
614614+615615+#[test]
616616+fn test_hash_alone() {
617617+ // Just "#" at EOL IS a valid empty heading (standard CommonMark behavior)
618618+ let result = render_test("#");
619619+ eprintln!("HTML for '#': {}", result[0].html);
620620+621621+ // This IS a heading - empty headings are valid
622622+ assert!(
623623+ result[0].html.contains("<h1"),
624624+ "'#' alone IS a valid empty h1. HTML: {}",
625625+ result[0].html
626626+ );
627627+}
628628+629629+#[test]
630630+fn test_heading_to_non_heading_transition() {
631631+ // Simulates typing: start with "#" (heading), then add "t" to make "#t" (not heading)
632632+ // This tests that the syntax spans are correctly updated on content change.
633633+ use loro::LoroDoc;
634634+ use super::render::render_paragraphs_incremental;
635635+636636+ let doc = LoroDoc::new();
637637+ let text = doc.get_text("content");
638638+639639+ // Initial state: "#" is a valid empty heading
640640+ text.insert(0, "#").unwrap();
641641+ let (paras1, cache1) = render_paragraphs_incremental(&text, None, None);
642642+643643+ eprintln!("State 1 ('#'): {}", paras1[0].html);
644644+ assert!(paras1[0].html.contains("<h1"), "# alone should be heading");
645645+ assert!(
646646+ paras1[0].html.contains("md-syntax-block"),
647647+ "# should have syntax span"
648648+ );
649649+650650+ // Transition: add "t" to make "#t" - no longer a heading
651651+ text.insert(1, "t").unwrap();
652652+ let (paras2, _cache2) = render_paragraphs_incremental(&text, Some(&cache1), None);
653653+654654+ eprintln!("State 2 ('#t'): {}", paras2[0].html);
655655+ assert!(
656656+ !paras2[0].html.contains("<h1"),
657657+ "#t should NOT be heading. HTML: {}",
658658+ paras2[0].html
659659+ );
660660+ assert!(
661661+ !paras2[0].html.contains("md-syntax-block"),
662662+ "#t should NOT have block syntax span. HTML: {}",
663663+ paras2[0].html
664664+ );
665665+}
666666+667667+#[test]
668668+fn test_hash_space_alone() {
669669+ // "# " (hash + space, no content) - IS this a heading?
670670+ let result = render_test("# ");
671671+ eprintln!("HTML for '# ': {}", result[0].html);
672672+673673+ // Document actual behavior - this determines if empty headings are valid
674674+}
675675+676676+#[test]
677677+fn test_empty_blockquote() {
678678+ // Just ">" alone - empty blockquote
679679+ // BUG: Currently produces 0 paragraphs, making the > invisible!
680680+ let result = render_test(">");
681681+ eprintln!("Paragraphs for '>': {:?}", result.len());
682682+ for (i, p) in result.iter().enumerate() {
683683+ eprintln!(" Para {}: html={}, char_range={:?}", i, p.html, p.char_range);
684684+ }
685685+686686+ // Empty blockquote should still produce at least one paragraph
687687+ // containing the > syntax so it can be rendered and edited
688688+ assert!(
689689+ !result.is_empty(),
690690+ "Empty blockquote should produce at least one paragraph, got 0"
691691+ );
692692+}
693693+694694+#[test]
695695+fn test_blockquote_needs_space_or_newline() {
696696+ // ">text" directly attached might not be a blockquote depending on parser
697697+ // This test documents expected behavior
698698+ let result = render_test(">quote");
699699+700700+ // Whether this is a blockquote depends on the parser - document actual behavior
701701+ insta::assert_yaml_snapshot!(result, @r#"
702702+ - byte_range:
703703+ - 6
704704+ - 6
705705+ char_range:
706706+ - 0
707707+ - 6
708708+ html: "<blockquote>\n<p id=\"n0\"><span class=\"md-syntax-block\" data-syn-id=\"s0\" data-char-start=\"0\" data-char-end=\"1\">></span>quote</p>\n</blockquote>\n"
709709+ offset_map:
710710+ - byte_range:
711711+ - 7
712712+ - 7
713713+ char_range:
714714+ - 0
715715+ - 0
716716+ node_id: n0
717717+ char_offset_in_node: 0
718718+ child_index: 0
719719+ utf16_len: 0
720720+ - byte_range:
721721+ - 6
722722+ - 7
723723+ char_range:
724724+ - 0
725725+ - 1
726726+ node_id: n0
727727+ char_offset_in_node: 0
728728+ child_index: ~
729729+ utf16_len: 1
730730+ - byte_range:
731731+ - 7
732732+ - 12
733733+ char_range:
734734+ - 1
735735+ - 6
736736+ node_id: n0
737737+ char_offset_in_node: 1
738738+ child_index: ~
739739+ utf16_len: 5
740740+ source_hash: 6279293067953035109
741741+ "#);
742742+}
743743+744744+// =============================================================================
429745// Char Range Coverage Tests
430746// =============================================================================
431747
+136-61
crates/weaver-app/src/components/editor/writer.rs
···359359 {
360360 opening_span.formatted_range = Some(formatted_range.clone());
361361 } else {
362362- tracing::warn!("[FINALIZE_PAIRED] Could not find opening span {}", opening_syn_id);
362362+ tracing::warn!(
363363+ "[FINALIZE_PAIRED] Could not find opening span {}",
364364+ opening_syn_id
365365+ );
363366 }
364367365368 // Update the closing span's formatted_range (the most recent one)
···397400 return Ok(());
398401 }
399402400400- let syntax_type = classify_syntax(syntax);
401401- let class = match syntax_type {
402402- SyntaxType::Inline => "md-syntax-inline",
403403- SyntaxType::Block => "md-syntax-block",
404404- };
403403+ // Whitespace-only content (trailing spaces, newlines) should be emitted
404404+ // as plain text, not wrapped in a hideable syntax span
405405+ let is_whitespace_only = syntax.trim().is_empty();
406406+407407+ if is_whitespace_only {
408408+ // Emit as plain text with tracking span (not hideable)
409409+ let created_node = if self.current_node_id.is_none() {
410410+ let node_id = self.gen_node_id();
411411+ write!(&mut self.writer, "<span id=\"{}\">", node_id)?;
412412+ self.begin_node(node_id);
413413+ true
414414+ } else {
415415+ false
416416+ };
417417+418418+ escape_html(&mut self.writer, syntax)?;
405419406406- // Generate unique ID for this syntax span
407407- let syn_id = self.gen_syn_id();
420420+ if created_node {
421421+ self.write("</span>")?;
422422+ self.end_node();
423423+ }
408424409409- // If we're outside any node, create a wrapper span for tracking
410410- let created_node = if self.current_node_id.is_none() {
411411- let node_id = self.gen_node_id();
412412- write!(
413413- &mut self.writer,
414414- "<span id=\"{}\" class=\"{}\" data-syn-id=\"{}\" data-char-start=\"{}\" data-char-end=\"{}\">",
415415- node_id, class, syn_id, char_start, char_end
416416- )?;
417417- self.begin_node(node_id);
418418- true
425425+ // Record offset mapping but no syntax span info
426426+ self.record_mapping(range.clone(), char_start..char_end);
427427+ self.last_char_offset = char_end;
428428+ self.last_byte_offset = range.end;
419429 } else {
420420- write!(
421421- &mut self.writer,
422422- "<span class=\"{}\" data-syn-id=\"{}\" data-char-start=\"{}\" data-char-end=\"{}\">",
423423- class, syn_id, char_start, char_end
424424- )?;
425425- false
426426- };
430430+ // Real syntax - wrap in hideable span
431431+ let syntax_type = classify_syntax(syntax);
432432+ let class = match syntax_type {
433433+ SyntaxType::Inline => "md-syntax-inline",
434434+ SyntaxType::Block => "md-syntax-block",
435435+ };
436436+437437+ // Generate unique ID for this syntax span
438438+ let syn_id = self.gen_syn_id();
439439+440440+ // If we're outside any node, create a wrapper span for tracking
441441+ let created_node = if self.current_node_id.is_none() {
442442+ let node_id = self.gen_node_id();
443443+ write!(
444444+ &mut self.writer,
445445+ "<span id=\"{}\" class=\"{}\" data-syn-id=\"{}\" data-char-start=\"{}\" data-char-end=\"{}\">",
446446+ node_id, class, syn_id, char_start, char_end
447447+ )?;
448448+ self.begin_node(node_id);
449449+ true
450450+ } else {
451451+ write!(
452452+ &mut self.writer,
453453+ "<span class=\"{}\" data-syn-id=\"{}\" data-char-start=\"{}\" data-char-end=\"{}\">",
454454+ class, syn_id, char_start, char_end
455455+ )?;
456456+ false
457457+ };
427458428428- escape_html(&mut self.writer, syntax)?;
429429- self.write("</span>")?;
459459+ escape_html(&mut self.writer, syntax)?;
460460+ self.write("</span>")?;
430461431431- // Record syntax span info for visibility toggling
432432- self.syntax_spans.push(SyntaxSpanInfo {
433433- syn_id,
434434- char_range: char_start..char_end,
435435- syntax_type,
436436- formatted_range: None,
437437- });
462462+ // Record syntax span info for visibility toggling
463463+ self.syntax_spans.push(SyntaxSpanInfo {
464464+ syn_id,
465465+ char_range: char_start..char_end,
466466+ syntax_type,
467467+ formatted_range: None,
468468+ });
438469439439- // Record offset mapping for this syntax
440440- self.record_mapping(range.clone(), char_start..char_end);
441441- self.last_char_offset = char_end;
442442- self.last_byte_offset = range.end; // Mark bytes as processed
470470+ // Record offset mapping for this syntax
471471+ self.record_mapping(range.clone(), char_start..char_end);
472472+ self.last_char_offset = char_end;
473473+ self.last_byte_offset = range.end;
443474444444- // Close wrapper if we created one
445445- if created_node {
446446- self.write("</span>")?;
447447- self.end_node();
475475+ // Close wrapper if we created one
476476+ if created_node {
477477+ self.write("</span>")?;
478478+ self.end_node();
479479+ }
448480 }
449481 }
450482 }
···585617586618 // Only add checkpoint if we've advanced
587619 if char_range.end > last_checkpoint.0 {
588588- self.utf16_checkpoints.push((char_range.end, new_utf16_offset));
620620+ self.utf16_checkpoints
621621+ .push((char_range.end, new_utf16_offset));
589622 }
590623591624 let mapping = OffsetMapping {
···675708676709 write!(
677710 &mut self.writer,
678678- "<span class=\"md-syntax-inline\" data-syn-id=\"{}\" data-char-start=\"{}\" data-char-end=\"{}\">",
711711+ "<span class=\"md-placeholder\" data-syn-id=\"{}\" data-char-start=\"{}\" data-char-end=\"{}\">",
679712 syn_id, char_start, char_end
680713 )?;
681714 escape_html(&mut self.writer, trailing)?;
682715 self.write("</span>")?;
683716684717 // Record syntax span info
685685- self.syntax_spans.push(SyntaxSpanInfo {
686686- syn_id,
687687- char_range: char_start..char_end,
688688- syntax_type: SyntaxType::Inline,
689689- formatted_range: None,
690690- });
718718+ // self.syntax_spans.push(SyntaxSpanInfo {
719719+ // syn_id,
720720+ // char_range: char_start..char_end,
721721+ // syntax_type: SyntaxType::Inline,
722722+ // formatted_range: None,
723723+ // });
691724692725 // Record mapping if we have a node
693726 if let Some(ref node_id) = self.current_node_id {
···9841017 let syn_id = self.gen_syn_id();
9851018 write!(
9861019 &mut self.writer,
987987- "<span class=\"md-syntax-inline\" data-syn-id=\"{}\" data-char-start=\"{}\" data-char-end=\"{}\">",
10201020+ "<span class=\"md-placeholder\" data-syn-id=\"{}\" data-char-start=\"{}\" data-char-end=\"{}\">",
9881021 syn_id, char_start, char_end
9891022 )?;
9901023 escape_html(&mut self.writer, spaces)?;
9911024 self.write("</span>")?;
99210259931026 // Record syntax span info
994994- self.syntax_spans.push(SyntaxSpanInfo {
995995- syn_id,
996996- char_range: char_start..char_end,
997997- syntax_type: SyntaxType::Inline,
998998- formatted_range: None,
999999- });
10271027+ // self.syntax_spans.push(SyntaxSpanInfo {
10281028+ // syn_id,
10291029+ // char_range: char_start..char_end,
10301030+ // syntax_type: SyntaxType::Inline,
10311031+ // formatted_range: None,
10321032+ // });
1000103310011034 // Count this span as a child
10021035 self.current_node_child_count += 1;
···10131046 self.current_node_child_count += 1;
1014104710151048 // After <br>, emit plain zero-width space for cursor positioning
10161016- self.write("\u{200B}")?;
10491049+ self.write(" ")?;
10501050+ //self.write("\u{200B}")?;
1017105110181052 // Count the zero-width space text node as a child
10191053 self.current_node_child_count += 1;
···12281262 // Record paragraph start for boundary tracking
12291263 // BUT skip if inside a list - list owns the paragraph boundary
12301264 if self.list_depth == 0 {
12311231- self.current_paragraph_start = Some((self.last_byte_offset, self.last_char_offset));
12651265+ self.current_paragraph_start =
12661266+ Some((self.last_byte_offset, self.last_char_offset));
12321267 }
1233126812341269 let node_id = self.gen_node_id();
···12681303 }
12691304 } else {
12701305 // Just > and maybe a space
12711271- (gt_pos + 2).min(raw_text.len())
13061306+ (gt_pos + 1).min(raw_text.len())
12721307 };
1273130812741309 let syntax = &raw_text[gt_pos..syntax_end];
···18901925 Ok(())
18911926 }
18921927 }
18931893- TagEnd::BlockQuote(_) => self.write("</blockquote>\n"),
19281928+ TagEnd::BlockQuote(_) => {
19291929+ // If pending_blockquote_range is still set, the blockquote was empty
19301930+ // (no paragraph inside). Emit the > as its own minimal paragraph.
19311931+ if let Some(bq_range) = self.pending_blockquote_range.take() {
19321932+ if bq_range.start < bq_range.end {
19331933+ let raw_text = &self.source[bq_range.clone()];
19341934+ if let Some(gt_pos) = raw_text.find('>') {
19351935+ let para_byte_start = bq_range.start + gt_pos;
19361936+ let para_char_start = self.last_char_offset;
19371937+19381938+ // Create a minimal paragraph for the empty blockquote
19391939+ // let node_id = self.gen_node_id();
19401940+ // write!(&mut self.writer, "<p id=\"{}\"", node_id)?;
19411941+ // self.begin_node(node_id.clone());
19421942+19431943+ // // Record start-of-node mapping for cursor positioning
19441944+ // self.offset_maps.push(OffsetMapping {
19451945+ // byte_range: para_byte_start..para_byte_start,
19461946+ // char_range: para_char_start..para_char_start,
19471947+ // node_id: node_id.clone(),
19481948+ // char_offset_in_node: 0,
19491949+ // child_index: Some(0),
19501950+ // utf16_len: 0,
19511951+ // });
19521952+19531953+ // Emit the > as block syntax
19541954+ let syntax = &raw_text[gt_pos..gt_pos + 1];
19551955+ self.emit_inner_syntax(syntax, para_byte_start, SyntaxType::Block)?;
19561956+19571957+ // self.write("</p>\n")?;
19581958+ // self.end_node();
19591959+19601960+ // Record paragraph boundary for incremental rendering
19611961+ let byte_range = para_byte_start..bq_range.end;
19621962+ let char_range = para_char_start..self.last_char_offset;
19631963+ self.paragraph_ranges.push((byte_range, char_range));
19641964+ }
19651965+ }
19661966+ }
19671967+ self.write("</blockquote>\n")
19681968+ }
18941969 TagEnd::CodeBlock => {
18951970 use std::sync::LazyLock;
18961971 use syntect::parsing::SyntaxSet;