crates/weaver-app/src/components/editor/tests.rs at main · nonbinary.computer/weaver

atproto blogging
weaver / crates / weaver-app / src / components / editor / tests.rs
at main 1188 lines 35 kB view raw
   1//! Snapshot tests for the markdown editor rendering pipeline.
   2
   3use serde::Serialize;
   4use weaver_common::ResolvedContent;
   5use weaver_editor_core::ParagraphRender;
   6use weaver_editor_core::{
   7    EditorImageResolver, OffsetMapping, TextBuffer, find_mapping_for_char,
   8    render_paragraphs_incremental,
   9};
  10use weaver_editor_crdt::LoroTextBuffer;
  11
  12/// Serializable version of ParagraphRender for snapshot testing.
  13#[derive(Debug, Serialize)]
  14struct TestParagraph {
  15    byte_range: (usize, usize),
  16    char_range: (usize, usize),
  17    html: String,
  18    offset_map: Vec<TestOffsetMapping>,
  19    source_hash: u64,
  20}
  21
  22impl From<&ParagraphRender> for TestParagraph {
  23    fn from(p: &ParagraphRender) -> Self {
  24        TestParagraph {
  25            byte_range: (p.byte_range.start, p.byte_range.end),
  26            char_range: (p.char_range.start, p.char_range.end),
  27            html: p.html.clone(),
  28            offset_map: p.offset_map.iter().map(TestOffsetMapping::from).collect(),
  29            source_hash: p.source_hash,
  30        }
  31    }
  32}
  33
  34/// Serializable version of OffsetMapping for snapshot testing.
  35#[derive(Debug, Serialize)]
  36struct TestOffsetMapping {
  37    byte_range: (usize, usize),
  38    char_range: (usize, usize),
  39    node_id: String,
  40    char_offset_in_node: usize,
  41    child_index: Option<usize>,
  42    utf16_len: usize,
  43}
  44
  45impl From<&OffsetMapping> for TestOffsetMapping {
  46    fn from(m: &OffsetMapping) -> Self {
  47        TestOffsetMapping {
  48            byte_range: (m.byte_range.start, m.byte_range.end),
  49            char_range: (m.char_range.start, m.char_range.end),
  50            node_id: m.node_id.to_string(),
  51            char_offset_in_node: m.char_offset_in_node,
  52            child_index: m.child_index,
  53            utf16_len: m.utf16_len,
  54        }
  55    }
  56}
  57
  58/// Helper: render markdown and convert to serializable test output.
  59fn render_test(input: &str) -> Vec<TestParagraph> {
  60    let mut buffer = LoroTextBuffer::new();
  61    buffer.insert(0, input);
  62    let result = render_paragraphs_incremental(
  63        &buffer,
  64        None,
  65        0,
  66        None,
  67        None::<&EditorImageResolver>,
  68        None,
  69        &ResolvedContent::default(),
  70    );
  71    result.paragraphs.iter().map(TestParagraph::from).collect()
  72}
  73
  74// =============================================================================
  75// Basic Paragraph Tests
  76// =============================================================================
  77
  78#[test]
  79fn test_single_paragraph() {
  80    let result = render_test("Hello world");
  81    insta::assert_yaml_snapshot!(result);
  82}
  83
  84#[test]
  85fn test_two_paragraphs() {
  86    let result = render_test("First paragraph.\n\nSecond paragraph.");
  87    insta::assert_yaml_snapshot!(result);
  88}
  89
  90#[test]
  91fn test_three_paragraphs() {
  92    let result = render_test("One.\n\nTwo.\n\nThree.");
  93    insta::assert_yaml_snapshot!(result);
  94}
  95
  96// =============================================================================
  97// Block Element Tests
  98// =============================================================================
  99
 100#[test]
 101fn test_heading_h1() {
 102    let result = render_test("# Heading 1");
 103    insta::assert_yaml_snapshot!(result);
 104}
 105
 106#[test]
 107fn test_heading_levels() {
 108    let result = render_test("# H1\n\n## H2\n\n### H3\n\n#### H4");
 109    insta::assert_yaml_snapshot!(result);
 110}
 111
 112#[test]
 113fn test_code_block_fenced() {
 114    let result = render_test("```rust\nfn main() {}\n```");
 115    insta::assert_yaml_snapshot!(result);
 116}
 117
 118#[test]
 119fn test_unordered_list() {
 120    let result = render_test("- Item 1\n- Item 2\n- Item 3");
 121    insta::assert_yaml_snapshot!(result);
 122}
 123
 124#[test]
 125fn test_ordered_list() {
 126    let result = render_test("1. First\n2. Second\n3. Third");
 127    insta::assert_yaml_snapshot!(result);
 128}
 129
 130#[test]
 131fn test_nested_list() {
 132    let result = render_test("- Parent\n  - Child 1\n  - Child 2\n- Another parent");
 133    insta::assert_yaml_snapshot!(result);
 134}
 135
 136#[test]
 137fn test_blockquote() {
 138    let result = render_test("> This is a quote\n>\n> With multiple lines");
 139    insta::assert_yaml_snapshot!(result);
 140}
 141
 142// =============================================================================
 143// Inline Formatting Tests
 144// =============================================================================
 145
 146#[test]
 147fn test_bold() {
 148    let result = render_test("Some **bold** text");
 149    insta::assert_yaml_snapshot!(result);
 150}
 151
 152#[test]
 153fn test_italic() {
 154    let result = render_test("Some *italic* text");
 155    insta::assert_yaml_snapshot!(result);
 156}
 157
 158#[test]
 159fn test_inline_code() {
 160    let result = render_test("Some `code` here");
 161    insta::assert_yaml_snapshot!(result);
 162}
 163
 164#[test]
 165fn test_bold_italic() {
 166    let result = render_test("Some ***bold italic*** text");
 167    insta::assert_yaml_snapshot!(result);
 168}
 169
 170#[test]
 171fn test_multiple_inline_formats() {
 172    let result = render_test("**Bold** and *italic* and `code`");
 173    insta::assert_yaml_snapshot!(result);
 174}
 175
 176// =============================================================================
 177// Gap Paragraph Tests
 178// =============================================================================
 179
 180#[test]
 181fn test_gap_between_blocks() {
 182    // Verify gap paragraphs are inserted for whitespace between blocks
 183    let result = render_test("# Heading\n\nParagraph below");
 184    // Should have: heading, gap for \n\n, paragraph
 185    insta::assert_yaml_snapshot!(result);
 186}
 187
 188#[test]
 189fn test_multiple_blank_lines() {
 190    let result = render_test("First\n\n\n\nSecond");
 191    // Extra blank lines should be captured in gap paragraphs
 192    insta::assert_yaml_snapshot!(result);
 193}
 194
 195// =============================================================================
 196// Edge Case Tests
 197// =============================================================================
 198
 199#[test]
 200fn test_empty_document() {
 201    let result = render_test("");
 202    insta::assert_yaml_snapshot!(result);
 203}
 204
 205#[test]
 206fn test_only_newlines() {
 207    let result = render_test("\n\n\n");
 208    insta::assert_yaml_snapshot!(result);
 209}
 210
 211#[test]
 212fn test_trailing_single_newline() {
 213    let result = render_test("Hello\n");
 214    insta::assert_yaml_snapshot!(result);
 215}
 216
 217#[test]
 218fn test_trailing_double_newline() {
 219    let result = render_test("Hello\n\n");
 220    insta::assert_yaml_snapshot!(result);
 221}
 222
 223#[test]
 224fn test_hard_break() {
 225    // Two trailing spaces + newline = hard break
 226    let result = render_test("Line one  \nLine two");
 227    insta::assert_yaml_snapshot!(result);
 228}
 229
 230#[test]
 231fn test_unicode_emoji() {
 232    let result = render_test("Hello 🎉 world");
 233    insta::assert_yaml_snapshot!(result);
 234}
 235
 236#[test]
 237fn test_unicode_cjk() {
 238    let result = render_test("你好世界");
 239    insta::assert_yaml_snapshot!(result);
 240}
 241
 242#[test]
 243fn test_mixed_unicode_ascii() {
 244    let result = render_test("Hello 你好 world 🎉");
 245    insta::assert_yaml_snapshot!(result);
 246}
 247
 248// =============================================================================
 249// Offset Map Lookup Tests
 250// =============================================================================
 251
 252#[test]
 253fn test_find_mapping_exact_start() {
 254    let mappings = vec![OffsetMapping {
 255        byte_range: 0..5,
 256        char_range: 0..5,
 257        node_id: "n0".into(),
 258        char_offset_in_node: 0,
 259        child_index: None,
 260        utf16_len: 5,
 261    }];
 262
 263    let result = find_mapping_for_char(&mappings, 0);
 264    assert!(result.is_some());
 265    let (mapping, _) = result.unwrap();
 266    assert_eq!(mapping.char_range, 0..5);
 267}
 268
 269#[test]
 270fn test_find_mapping_exact_end_inclusive() {
 271    // Bug #1 regression: cursor at end of range should match
 272    let mappings = vec![OffsetMapping {
 273        byte_range: 0..5,
 274        char_range: 0..5,
 275        node_id: "n0".into(),
 276        char_offset_in_node: 0,
 277        child_index: None,
 278        utf16_len: 5,
 279    }];
 280
 281    // Position 5 should match the range 0..5 (end-inclusive for cursor)
 282    let result = find_mapping_for_char(&mappings, 5);
 283    assert!(result.is_some(), "cursor at end of range should match");
 284}
 285
 286#[test]
 287fn test_find_mapping_middle() {
 288    let mappings = vec![OffsetMapping {
 289        byte_range: 0..10,
 290        char_range: 0..10,
 291        node_id: "n0".into(),
 292        char_offset_in_node: 0,
 293        child_index: None,
 294        utf16_len: 10,
 295    }];
 296
 297    let result = find_mapping_for_char(&mappings, 5);
 298    assert!(result.is_some());
 299}
 300
 301#[test]
 302fn test_find_mapping_before_first() {
 303    let mappings = vec![OffsetMapping {
 304        byte_range: 5..10,
 305        char_range: 5..10,
 306        node_id: "n0".into(),
 307        char_offset_in_node: 0,
 308        child_index: None,
 309        utf16_len: 5,
 310    }];
 311
 312    // Position 2 is before the first mapping
 313    let result = find_mapping_for_char(&mappings, 2);
 314    assert!(result.is_none());
 315}
 316
 317#[test]
 318fn test_find_mapping_after_last() {
 319    let mappings = vec![OffsetMapping {
 320        byte_range: 0..5,
 321        char_range: 0..5,
 322        node_id: "n0".into(),
 323        char_offset_in_node: 0,
 324        child_index: None,
 325        utf16_len: 5,
 326    }];
 327
 328    // Position 10 is after the last mapping
 329    let result = find_mapping_for_char(&mappings, 10);
 330    assert!(result.is_none());
 331}
 332
 333#[test]
 334fn test_find_mapping_empty() {
 335    let mappings: Vec<OffsetMapping> = vec![];
 336    let result = find_mapping_for_char(&mappings, 0);
 337    assert!(result.is_none());
 338}
 339
 340#[test]
 341fn test_find_mapping_invisible_snaps() {
 342    // Invisible content should flag should_snap=true
 343    let mappings = vec![OffsetMapping {
 344        byte_range: 0..2,
 345        char_range: 0..2,
 346        node_id: "n0".into(),
 347        char_offset_in_node: 0,
 348        child_index: None,
 349        utf16_len: 0, // invisible
 350    }];
 351
 352    let result = find_mapping_for_char(&mappings, 1);
 353    assert!(result.is_some());
 354    let (_, should_snap) = result.unwrap();
 355    assert!(should_snap, "invisible content should trigger snap");
 356}
 357
 358// =============================================================================
 359// Regression Tests (from status doc bugs)
 360// =============================================================================
 361
 362#[test]
 363fn regression_bug6_heading_as_paragraph_boundary() {
 364    // Bug #6: Headings should be tracked as paragraph boundaries
 365    let result = render_test("# Heading\n\nParagraph");
 366
 367    // Should have at least 2 content paragraphs (heading + paragraph)
 368    // Plus potential gap paragraphs
 369    assert!(
 370        result.len() >= 2,
 371        "heading should create separate paragraph"
 372    );
 373
 374    // First paragraph should contain heading
 375    assert!(
 376        result[0].html.contains("<h1>") || result[0].html.contains("Heading"),
 377        "first paragraph should be heading"
 378    );
 379}
 380
 381#[test]
 382fn regression_bug8_inline_formatting_no_double_syntax() {
 383    // Bug #8: Inline formatting should not produce double **
 384    let result = render_test("some **bold** text");
 385
 386    // Count occurrences of ** in HTML
 387    let html = &result[0].html;
 388    let double_star_count = html.matches("**").count();
 389
 390    // Should have exactly 2 occurrences (opening and closing, wrapped in spans)
 391    // The bug was producing 4 (doubled emission)
 392    assert!(
 393        double_star_count <= 2,
 394        "should not have double ** syntax: found {} in {}",
 395        double_star_count,
 396        html
 397    );
 398}
 399
 400#[test]
 401fn regression_bug9_lists_as_paragraph_boundary() {
 402    // Bug #9: Lists should be tracked as paragraph boundaries
 403    let result = render_test("Before\n\n- Item 1\n- Item 2\n\nAfter");
 404
 405    // Should have paragraphs for: Before, list, After (plus gaps)
 406    let has_list = result
 407        .iter()
 408        .any(|p| p.html.contains("<li>") || p.html.contains("<ul>"));
 409    assert!(has_list, "list should be present in rendered output");
 410}
 411
 412#[test]
 413fn regression_bug9_code_blocks_as_paragraph_boundary() {
 414    // Bug #9: Code blocks should be tracked as paragraph boundaries
 415    let result = render_test("Before\n\n```\ncode\n```\n\nAfter");
 416
 417    let has_code = result
 418        .iter()
 419        .any(|p| p.html.contains("<pre>") || p.html.contains("<code>"));
 420    assert!(has_code, "code block should be present in rendered output");
 421}
 422
 423// ignored bc changing paragraph spacing
 424// #[test]
 425// fn regression_bug11_gap_paragraphs_for_whitespace() {
 426//     // Bug #11: Gap paragraphs should be created for EXTRA inter-block whitespace
 427//     // Note: Headings consume trailing newline, so need 4 newlines total for gap > MIN_PARAGRAPH_BREAK
 428
 429//     // Test with extra whitespace (4 newlines = heading eats 1, leaves 3, gap = 3 > 2)
 430//     let result = render_test("# Title\n\n\n\nContent"); // 4 newlines
 431//     assert_eq!(result.len(), 3, "Expected 3 elements with extra whitespace");
 432//     assert!(
 433//         result[1].html.contains("gap-"),
 434//         "Middle element should be a gap"
 435//     );
 436
 437//     // Test standard break (3 newlines = heading eats 1, leaves 2, gap = 2 = MIN, no gap element)
 438//     let result2 = render_test("# Title\n\n\nContent"); // 3 newlines
 439//     assert_eq!(
 440//         result2.len(),
 441//         2,
 442//         "Expected 2 elements with standard break equivalent"
 443//     );
 444// }
 445
 446// =============================================================================
 447// Syntax Span Edge Case Tests
 448// =============================================================================
 449
 450#[test]
 451fn test_invalid_heading_no_space() {
 452    // "#text" without space is NOT a valid heading - should be plain text
 453    // The '#' should NOT be wrapped in a syntax span
 454    let result = render_test("#text");
 455
 456    // Should be a single paragraph with plain text
 457    assert_eq!(result.len(), 1, "Should have 1 paragraph");
 458
 459    // HTML should NOT contain md-syntax-block for the #
 460    assert!(
 461        !result[0].html.contains("md-syntax-block"),
 462        "Invalid heading '#text' should NOT have block syntax span. HTML: {}",
 463        result[0].html
 464    );
 465
 466    // The # should be visible as regular text content
 467    assert!(
 468        result[0].html.contains("#text") || result[0].html.contains("&num;text"),
 469        "The '#text' should appear as regular text. HTML: {}",
 470        result[0].html
 471    );
 472}
 473
 474#[test]
 475fn test_valid_heading_with_space() {
 476    // "# text" WITH space IS a valid heading
 477    let result = render_test("# Heading");
 478
 479    // Should have heading syntax span
 480    assert!(
 481        result[0].html.contains("md-syntax-block"),
 482        "Valid heading should have block syntax span. HTML: {}",
 483        result[0].html
 484    );
 485
 486    // Should have <h1> tag
 487    assert!(
 488        result[0].html.contains("<h1"),
 489        "Valid heading should render as h1. HTML: {}",
 490        result[0].html
 491    );
 492}
 493
 494#[test]
 495fn test_hash_in_middle_of_text() {
 496    // "#" in middle of text should not be treated as heading syntax
 497    let result = render_test("Some #hashtag here");
 498
 499    assert!(
 500        !result[0].html.contains("md-syntax-block"),
 501        "# in middle of text should NOT be block syntax. HTML: {}",
 502        result[0].html
 503    );
 504}
 505
 506#[test]
 507fn test_unclosed_bold() {
 508    // "**text" without closing ** should be plain text, not bold
 509    let result = render_test("**unclosed bold");
 510
 511    // Should NOT have <strong> tag
 512    assert!(
 513        !result[0].html.contains("<strong>"),
 514        "Unclosed ** should NOT render as bold. HTML: {}",
 515        result[0].html
 516    );
 517}
 518
 519#[test]
 520fn test_unclosed_italic() {
 521    // "*text" without closing * should be plain text, not italic
 522    let result = render_test("*unclosed italic");
 523
 524    // Should NOT have <em> tag
 525    assert!(
 526        !result[0].html.contains("<em>"),
 527        "Unclosed * should NOT render as italic. HTML: {}",
 528        result[0].html
 529    );
 530}
 531
 532#[test]
 533fn test_asterisk_not_emphasis() {
 534    // Single * surrounded by spaces is not emphasis
 535    let result = render_test("5 * 3 = 15");
 536
 537    // Should NOT have <em> tag
 538    assert!(
 539        !result[0].html.contains("<em>"),
 540        "Math expression with * should NOT be italic. HTML: {}",
 541        result[0].html
 542    );
 543}
 544
 545#[test]
 546fn test_list_marker_needs_space() {
 547    // "-text" without space is NOT a list item
 548    let result = render_test("-not-a-list");
 549
 550    // Should NOT have <li> or <ul> tags
 551    assert!(
 552        !result[0].html.contains("<li>") && !result[0].html.contains("<ul>"),
 553        "'-text' without space should NOT be a list. HTML: {}",
 554        result[0].html
 555    );
 556}
 557
 558#[test]
 559fn test_valid_list_with_space() {
 560    // "- text" WITH space IS a valid list item
 561    let result = render_test("- List item");
 562
 563    // Should have list markup
 564    assert!(
 565        result[0].html.contains("<li>") || result[0].html.contains("<ul>"),
 566        "Valid list should have list markup. HTML: {}",
 567        result[0].html
 568    );
 569
 570    // Should have block syntax span for the marker
 571    assert!(
 572        result[0].html.contains("md-syntax-block"),
 573        "List marker should have block syntax span. HTML: {}",
 574        result[0].html
 575    );
 576}
 577
 578#[test]
 579fn test_number_dot_needs_space() {
 580    // "1.text" without space is NOT an ordered list
 581    let result = render_test("1.not-a-list");
 582
 583    // Should NOT have <ol> tag
 584    assert!(
 585        !result[0].html.contains("<ol>"),
 586        "'1.text' without space should NOT be ordered list. HTML: {}",
 587        result[0].html
 588    );
 589}
 590
 591#[test]
 592fn test_hash_with_zero_width_char() {
 593    // "#\u{200B}text" - zero-width space after # should NOT make it a valid heading
 594    let result = render_test("#\u{200B}text");
 595
 596    // Debug: print what we got
 597    eprintln!("HTML for '#\\u{{200B}}text': {}", result[0].html);
 598
 599    // Should NOT be a heading - zero-width space is not a real space
 600    assert!(
 601        !result[0].html.contains("<h1"),
 602        "# followed by zero-width space should NOT be h1. HTML: {}",
 603        result[0].html
 604    );
 605}
 606
 607#[test]
 608fn test_hash_with_zwj() {
 609    // Test with zero-width joiner
 610    let result = render_test("#\u{200C}text");
 611
 612    eprintln!("HTML for '#\\u{{200C}}text': {}", result[0].html);
 613
 614    assert!(
 615        !result[0].html.contains("<h1"),
 616        "# followed by ZWNJ should NOT be h1. HTML: {}",
 617        result[0].html
 618    );
 619}
 620
 621#[test]
 622fn test_hash_space_then_zero_width() {
 623    // "# \u{200B}" - valid heading marker, but content is just zero-width
 624    let result = render_test("# \u{200B}");
 625
 626    eprintln!("HTML for '# \\u{{200B}}': {}", result[0].html);
 627    eprintln!("Syntax spans: {:?}", result[0].offset_map);
 628
 629    // This IS a valid heading (has space after #), even if content is "invisible"
 630    // The question is: should we hide the # syntax in this case?
 631}
 632
 633#[test]
 634fn test_hash_alone() {
 635    // Just "#" at EOL IS a valid empty heading (standard CommonMark behavior)
 636    let result = render_test("#");
 637    eprintln!("HTML for '#': {}", result[0].html);
 638
 639    // This IS a heading - empty headings are valid
 640    assert!(
 641        result[0].html.contains("<h1"),
 642        "'#' alone IS a valid empty h1. HTML: {}",
 643        result[0].html
 644    );
 645}
 646
 647#[test]
 648fn test_heading_to_non_heading_transition() {
 649    // Simulates typing: start with "#" (heading), then add "t" to make "#t" (not heading)
 650    // This tests that the syntax spans are correctly updated on content change.
 651    use weaver_editor_core::render_paragraphs_incremental;
 652
 653    let mut buffer = LoroTextBuffer::new();
 654
 655    // Initial state: "#" is a valid empty heading
 656    buffer.insert(0, "#");
 657    let result1 = render_paragraphs_incremental(
 658        &buffer,
 659        None,
 660        0,
 661        None,
 662        None::<&EditorImageResolver>,
 663        None,
 664        &ResolvedContent::default(),
 665    );
 666    let paras1 = result1.paragraphs;
 667    let cache1 = result1.cache;
 668
 669    eprintln!("State 1 ('#'): {}", paras1[0].html);
 670    assert!(paras1[0].html.contains("<h1"), "# alone should be heading");
 671    assert!(
 672        paras1[0].html.contains("md-syntax-block"),
 673        "# should have syntax span"
 674    );
 675
 676    // Transition: add "t" to make "#t" - no longer a heading
 677    buffer.insert(1, "t");
 678    let result2 = render_paragraphs_incremental(
 679        &buffer,
 680        Some(&cache1),
 681        0,
 682        None,
 683        None::<&EditorImageResolver>,
 684        None,
 685        &ResolvedContent::default(),
 686    );
 687    let paras2 = result2.paragraphs;
 688
 689    eprintln!("State 2 ('#t'): {}", paras2[0].html);
 690    assert!(
 691        !paras2[0].html.contains("<h1"),
 692        "#t should NOT be heading. HTML: {}",
 693        paras2[0].html
 694    );
 695    assert!(
 696        !paras2[0].html.contains("md-syntax-block"),
 697        "#t should NOT have block syntax span. HTML: {}",
 698        paras2[0].html
 699    );
 700}
 701
 702#[test]
 703fn test_hash_space_alone() {
 704    // "# " (hash + space, no content) - IS this a heading?
 705    let result = render_test("# ");
 706    eprintln!("HTML for '# ': {}", result[0].html);
 707
 708    // Document actual behavior - this determines if empty headings are valid
 709}
 710
 711#[test]
 712fn test_empty_blockquote() {
 713    // Just ">" alone - empty blockquote
 714    // BUG: Currently produces 0 paragraphs, making the > invisible!
 715    let result = render_test(">");
 716    eprintln!("Paragraphs for '>': {:?}", result.len());
 717    for (i, p) in result.iter().enumerate() {
 718        eprintln!(
 719            "  Para {}: html={}, char_range={:?}",
 720            i, p.html, p.char_range
 721        );
 722    }
 723
 724    // Empty blockquote should still produce at least one paragraph
 725    // containing the > syntax so it can be rendered and edited
 726    assert!(
 727        !result.is_empty(),
 728        "Empty blockquote should produce at least one paragraph, got 0"
 729    );
 730}
 731
 732#[test]
 733fn test_blockquote_needs_space_or_newline() {
 734    // ">text" directly attached might not be a blockquote depending on parser
 735    // This test documents expected behavior
 736    let result = render_test(">quote");
 737
 738    // Whether this is a blockquote depends on the parser - document actual behavior
 739    insta::assert_yaml_snapshot!(result, @r#"
 740    - byte_range:
 741        - 6
 742        - 6
 743      char_range:
 744        - 0
 745        - 6
 746      html: "<blockquote><p id=\"p-0-n0\"><span class=\"md-syntax-block\" data-syn-id=\"s0\" data-char-start=\"0\" data-char-end=\"1\">&gt;</span>quote</p>"
 747      offset_map:
 748        - byte_range:
 749            - 1
 750            - 1
 751          char_range:
 752            - 0
 753            - 0
 754          node_id: p-0-n0
 755          char_offset_in_node: 0
 756          child_index: 0
 757          utf16_len: 0
 758        - byte_range:
 759            - 0
 760            - 1
 761          char_range:
 762            - 0
 763            - 1
 764          node_id: p-0-n0
 765          char_offset_in_node: 0
 766          child_index: ~
 767          utf16_len: 1
 768        - byte_range:
 769            - 1
 770            - 6
 771          char_range:
 772            - 1
 773            - 6
 774          node_id: p-0-n0
 775          char_offset_in_node: 1
 776          child_index: ~
 777          utf16_len: 5
 778      source_hash: 6279293067953035109
 779    "#);
 780}
 781
 782// =============================================================================
 783// Char Range Coverage Tests
 784// =============================================================================
 785
 786#[test]
 787fn test_char_range_coverage_allows_paragraph_breaks() {
 788    // Verify char ranges cover document content, allowing standard \n\n breaks
 789    // The MIN_PARAGRAPH_BREAK zone (2 chars) is intentionally not covered -
 790    // cursor snaps to adjacent paragraphs for standard breaks.
 791    // Only EXTRA whitespace beyond \n\n gets gap elements.
 792    let input = "Hello\n\nWorld";
 793    let mut buffer = LoroTextBuffer::new();
 794    buffer.insert(0, input);
 795    let result = render_paragraphs_incremental(
 796        &buffer,
 797        None,
 798        0,
 799        None,
 800        None::<&EditorImageResolver>,
 801        None,
 802        &ResolvedContent::default(),
 803    );
 804    let paragraphs = result.paragraphs;
 805
 806    // With standard \n\n break, we expect 2 paragraphs (no gap element)
 807    // Paragraph ranges include some trailing whitespace from markdown parsing
 808    assert_eq!(
 809        paragraphs.len(),
 810        2,
 811        "Expected 2 paragraphs for standard break"
 812    );
 813
 814    // First paragraph ends before second starts, with gap for \n\n
 815    let gap_start = paragraphs[0].char_range.end;
 816    let gap_end = paragraphs[1].char_range.start;
 817    let gap_size = gap_end - gap_start;
 818    assert!(
 819        gap_size <= 2,
 820        "Gap should be at most MIN_PARAGRAPH_BREAK (2), got {}",
 821        gap_size
 822    );
 823}
 824
 825// old behaviour, need to re-check
 826// #[test]
 827// fn test_char_range_coverage_with_extra_whitespace() {
 828//     // Extra whitespace beyond MIN_PARAGRAPH_BREAK (2) gets gap elements
 829//     // Plain paragraphs don't consume trailing newlines like headings do
 830//     let input = "Hello\n\n\n\nWorld"; // 4 newlines = gap of 4 > 2
 831//     let mut buffer = LoroTextBuffer::new();
 832//     buffer.insert(0, input);
 833//     let (paragraphs, _cache, _refs) = render_paragraphs_incremental(
 834//         &buffer,
 835//         None,
 836//         0,
 837//         None,
 838//         None,
 839//         None,
 840//         &ResolvedContent::default(),
 841//     );
 842
 843//     // With extra newlines, we expect 3 elements: para, gap, para
 844//     assert_eq!(
 845//         paragraphs.len(),
 846//         3,
 847//         "Expected 3 elements with extra whitespace"
 848//     );
 849
 850//     // Gap element should exist and cover whitespace zone
 851//     let gap = &paragraphs[1];
 852//     assert!(gap.html.contains("gap-"), "Second element should be a gap");
 853
 854//     // Gap should cover ALL whitespace (not just extra)
 855//     assert_eq!(
 856//         gap.char_range.start, paragraphs[0].char_range.end,
 857//         "Gap should start where first paragraph ends"
 858//     );
 859//     assert_eq!(
 860//         gap.char_range.end, paragraphs[2].char_range.start,
 861//         "Gap should end where second paragraph starts"
 862//     );
 863// }
 864
 865#[test]
 866fn test_node_ids_unique_across_paragraphs() {
 867    // Verify HTML id attributes are unique across paragraphs
 868    let result = render_test("# Heading\n\nParagraph with **bold**\n\n- List item");
 869
 870    // Print rendered output for debugging failures
 871    for (i, para) in result.iter().enumerate() {
 872        eprintln!("--- Paragraph {} ---", i);
 873        eprintln!("char_range: {:?}", para.char_range);
 874        eprintln!("html: {}", para.html);
 875        eprintln!(
 876            "offset_map node_ids: {:?}",
 877            para.offset_map
 878                .iter()
 879                .map(|m| &m.node_id)
 880                .collect::<Vec<_>>()
 881        );
 882    }
 883
 884    // Extract all id and data-node-id attributes from HTML
 885    let id_regex = regex::Regex::new(r#"(?:id|data-node-id)="([^"]+)""#).unwrap();
 886
 887    let mut all_html_ids = std::collections::HashSet::new();
 888    for (para_idx, para) in result.iter().enumerate() {
 889        for cap in id_regex.captures_iter(&para.html) {
 890            let id = cap.get(1).unwrap().as_str();
 891            assert!(
 892                all_html_ids.insert(id.to_string()),
 893                "Duplicate HTML id '{}' in paragraph {}",
 894                id,
 895                para_idx
 896            );
 897        }
 898    }
 899}
 900
 901#[test]
 902fn test_offset_mappings_reference_own_paragraph() {
 903    // Verify offset mappings only reference node IDs that exist in their paragraph's HTML
 904    let result = render_test("# Heading\n\nParagraph with **bold**\n\n- List item");
 905
 906    let id_regex = regex::Regex::new(r#"(?:id|data-node-id)="([^"]+)""#).unwrap();
 907
 908    for (para_idx, para) in result.iter().enumerate() {
 909        // Collect all node IDs in this paragraph's HTML
 910        let html_ids: std::collections::HashSet<_> = id_regex
 911            .captures_iter(&para.html)
 912            .map(|cap| cap.get(1).unwrap().as_str().to_string())
 913            .collect();
 914
 915        // Verify each offset mapping references a node in this paragraph
 916        for mapping in &para.offset_map {
 917            assert!(
 918                html_ids.contains(&mapping.node_id),
 919                "Paragraph {} has offset mapping referencing '{}' but HTML only has {:?}\nHTML: {}",
 920                para_idx,
 921                mapping.node_id,
 922                html_ids,
 923                para.html
 924            );
 925        }
 926    }
 927}
 928
 929// =============================================================================
 930// Incremental Rendering Tests
 931// =============================================================================
 932
 933#[test]
 934fn test_incremental_cache_reuse() {
 935    // Verify cache is populated and can be reused
 936    let input = "First para\n\nSecond para";
 937    let mut buffer = LoroTextBuffer::new();
 938    buffer.insert(0, input);
 939
 940    let result1 = render_paragraphs_incremental(
 941        &buffer,
 942        None,
 943        0,
 944        None,
 945        None::<&EditorImageResolver>,
 946        None,
 947        &ResolvedContent::default(),
 948    );
 949    let paras1 = result1.paragraphs;
 950    let cache1 = result1.cache;
 951    assert!(!cache1.paragraphs.is_empty(), "Cache should be populated");
 952
 953    // Second render with same content should reuse cache
 954    let result2 = render_paragraphs_incremental(
 955        &buffer,
 956        Some(&cache1),
 957        0,
 958        None,
 959        None::<&EditorImageResolver>,
 960        None,
 961        &ResolvedContent::default(),
 962    );
 963    let paras2 = result2.paragraphs;
 964
 965    // Should produce identical output
 966    assert_eq!(paras1.len(), paras2.len());
 967    for (p1, p2) in paras1.iter().zip(paras2.iter()) {
 968        assert_eq!(p1.html, p2.html);
 969    }
 970}
 971
 972// =============================================================================
 973// Loro CRDT API Spike Tests
 974// =============================================================================
 975
 976#[test]
 977fn test_loro_basic_text_operations() {
 978    use loro::LoroDoc;
 979
 980    let doc = LoroDoc::new();
 981    let text = doc.get_text("content");
 982
 983    // Insert
 984    text.insert(0, "Hello").unwrap();
 985    assert_eq!(text.to_string(), "Hello");
 986    assert_eq!(text.len_unicode(), 5);
 987
 988    // Insert at position
 989    text.insert(5, " world").unwrap();
 990    assert_eq!(text.to_string(), "Hello world");
 991    assert_eq!(text.len_unicode(), 11);
 992
 993    // Delete
 994    text.delete(5, 6).unwrap(); // delete " world"
 995    assert_eq!(text.to_string(), "Hello");
 996    assert_eq!(text.len_unicode(), 5);
 997}
 998
 999#[test]
1000fn test_loro_unicode_handling() {
1001    use loro::LoroDoc;
1002
1003    let doc = LoroDoc::new();
1004    let text = doc.get_text("content");
1005
1006    // Insert unicode
1007    text.insert(0, "Hello 🎉 世界").unwrap();
1008
1009    // Check lengths
1010    let content = text.to_string();
1011    assert_eq!(content, "Hello 🎉 世界");
1012
1013    // Unicode length (chars)
1014    assert_eq!(text.len_unicode(), 10); // H e l l o   🎉   世 界
1015
1016    // UTF-16 length (for DOM)
1017    // 🎉 is a surrogate pair (2 UTF-16 units), rest are 1 each
1018    assert_eq!(text.len_utf16(), 11); // 6 + 2 + 1 + 2 = 11
1019
1020    // UTF-8 length (bytes)
1021    assert_eq!(text.len_utf8(), content.len());
1022}
1023
1024#[test]
1025fn test_loro_undo_redo() {
1026    use loro::{LoroDoc, UndoManager};
1027
1028    let doc = LoroDoc::new();
1029    let text = doc.get_text("content");
1030    let mut undo_mgr = UndoManager::new(&doc);
1031
1032    // Type some text
1033    text.insert(0, "Hello").unwrap();
1034    doc.commit();
1035
1036    text.insert(5, " world").unwrap();
1037    doc.commit();
1038
1039    assert_eq!(text.to_string(), "Hello world");
1040
1041    // Undo last change
1042    assert!(undo_mgr.can_undo());
1043    undo_mgr.undo().unwrap();
1044    assert_eq!(text.to_string(), "Hello");
1045
1046    // Undo first change
1047    undo_mgr.undo().unwrap();
1048    assert_eq!(text.to_string(), "");
1049
1050    // Redo
1051    assert!(undo_mgr.can_redo());
1052    undo_mgr.redo().unwrap();
1053    assert_eq!(text.to_string(), "Hello");
1054
1055    undo_mgr.redo().unwrap();
1056    assert_eq!(text.to_string(), "Hello world");
1057}
1058
1059#[test]
1060fn test_loro_char_to_utf16_conversion() {
1061    use loro::LoroDoc;
1062
1063    let doc = LoroDoc::new();
1064    let text = doc.get_text("content");
1065
1066    text.insert(0, "Hello 🎉 世界").unwrap();
1067
1068    // Simulate char→UTF16 conversion for cursor positioning
1069    // Given a char offset, compute UTF-16 offset
1070    fn char_to_utf16(text: &loro::LoroText, char_pos: usize) -> usize {
1071        if char_pos == 0 {
1072            return 0;
1073        }
1074        // Fast path: if all ASCII, char == UTF-16
1075        if text.len_unicode() == text.len_utf16() {
1076            return char_pos;
1077        }
1078        // Slow path: get slice and count UTF-16 units
1079        match text.slice(0, char_pos) {
1080            Ok(slice) => slice.encode_utf16().count(),
1081            Err(_) => 0,
1082        }
1083    }
1084
1085    // "Hello 🎉 世界"
1086    // Positions: H(0) e(1) l(2) l(3) o(4) ' '(5) 🎉(6) ' '(7) 世(8) 界(9)
1087    // UTF-16:    0     1    2    3    4     5     6,7    8     9    10
1088
1089    assert_eq!(char_to_utf16(&text, 0), 0);
1090    assert_eq!(char_to_utf16(&text, 6), 6); // before emoji
1091    assert_eq!(char_to_utf16(&text, 7), 8); // after emoji (emoji is 2 UTF-16 units)
1092    assert_eq!(char_to_utf16(&text, 10), 11); // end
1093}
1094
1095#[test]
1096fn test_loro_ascii_fast_path() {
1097    use loro::LoroDoc;
1098
1099    let doc = LoroDoc::new();
1100    let text = doc.get_text("content");
1101
1102    // Pure ASCII content
1103    text.insert(0, "Hello world, this is a test!").unwrap();
1104
1105    // Verify fast path condition: all lengths equal for ASCII
1106    assert_eq!(text.len_unicode(), text.len_utf8());
1107    assert_eq!(text.len_unicode(), text.len_utf16());
1108
1109    // Fast path should just return char_pos directly
1110    fn char_to_utf16(text: &loro::LoroText, char_pos: usize) -> usize {
1111        if char_pos == 0 {
1112            return 0;
1113        }
1114        if text.len_unicode() == text.len_utf16() {
1115            return char_pos; // fast path
1116        }
1117        text.slice(0, char_pos)
1118            .map(|s| s.encode_utf16().count())
1119            .unwrap_or(0)
1120    }
1121
1122    // All positions should be identity for ASCII
1123    for i in 0..=text.len_unicode() {
1124        assert_eq!(
1125            char_to_utf16(&text, i),
1126            i,
1127            "ASCII fast path failed at pos {}",
1128            i
1129        );
1130    }
1131}
1132
1133// =============================================================================
1134// Text Direction Tests
1135// =============================================================================
1136
1137#[test]
1138fn test_paragraph_dir_ltr() {
1139    let result = render_test("Hello world");
1140    // Verify HTML contains dir="ltr"
1141    assert!(result[0].html.contains("dir=\"ltr\""));
1142}
1143
1144#[test]
1145fn test_paragraph_dir_rtl_hebrew() {
1146    let result = render_test("שלום עולם");
1147    // Verify HTML contains dir="rtl"
1148    assert!(result[0].html.contains("dir=\"rtl\""));
1149}
1150
1151#[test]
1152fn test_paragraph_dir_rtl_arabic() {
1153    let result = render_test("مرحبا بالعالم");
1154    // Verify HTML contains dir="rtl"
1155    assert!(result[0].html.contains("dir=\"rtl\""));
1156}
1157
1158#[test]
1159fn test_paragraph_dir_mixed_leading_neutrals() {
1160    // Leading numbers and punctuation should be skipped, Hebrew should be detected
1161    let result = render_test("123... שלום");
1162    assert!(result[0].html.contains("dir=\"rtl\""));
1163}
1164
1165#[test]
1166fn test_heading_dir_rtl() {
1167    let result = render_test("# שלום");
1168    // Verify heading has dir="rtl"
1169    assert!(result[0].html.contains("dir=\"rtl\""));
1170}
1171
1172#[test]
1173fn test_heading_dir_ltr() {
1174    let result = render_test("# Hello");
1175    // Verify heading has dir="ltr"
1176    assert!(result[0].html.contains("dir=\"ltr\""));
1177}
1178
1179#[test]
1180fn test_multiple_paragraphs_different_directions() {
1181    let result = render_test("Hello world\n\nשלום עולם\n\nBack to English");
1182    // First paragraph should be LTR
1183    assert!(result[0].html.contains("dir=\"ltr\""));
1184    // Second paragraph should be RTL
1185    assert!(result[1].html.contains("dir=\"rtl\""));
1186    // Third paragraph should be LTR
1187    assert!(result[2].html.contains("dir=\"ltr\""));
1188}