at main 526 lines 18 kB view raw
1//! Offset mapping between source text and rendered DOM. 2//! 3//! When rendering markdown to HTML, some characters disappear (table pipes) 4//! and content gets split across nodes (syntax highlighting). Offset maps 5//! track how source byte positions map to DOM node positions. 6 7use smol_str::SmolStr; 8use std::ops::Range; 9 10/// Result of rendering markdown with offset tracking. 11#[derive(Debug, Clone, PartialEq)] 12pub struct RenderResult { 13 /// Rendered HTML string 14 pub html: String, 15 16 /// Mappings from source bytes to DOM positions 17 pub offset_map: Vec<OffsetMapping>, 18} 19 20/// Maps a source range to a position in the rendered DOM. 21/// 22/// # Example 23/// 24/// Source: `| foo | bar |` 25/// Bytes: 0 2-5 7-10 12 26/// Chars: 0 2-5 7-10 12 (ASCII, so same) 27/// 28/// Rendered: 29/// ```html 30/// <table id="t0"> 31/// <tr><td id="t0-c0">foo</td><td id="t0-c1">bar</td></tr> 32/// </table> 33/// ``` 34/// 35/// Mappings: 36/// - `{ byte_range: 0..2, char_range: 0..2, node_id: "t0-c0", char_offset_in_node: 0, utf16_len: 0 }` - "| " invisible 37/// - `{ byte_range: 2..5, char_range: 2..5, node_id: "t0-c0", char_offset_in_node: 0, utf16_len: 3 }` - "foo" visible 38/// - `{ byte_range: 5..7, char_range: 5..7, node_id: "t0-c0", char_offset_in_node: 3, utf16_len: 0 }` - " |" invisible 39/// - etc. 40#[derive(Debug, Clone, PartialEq)] 41pub struct OffsetMapping { 42 /// Source byte range (UTF-8 bytes, from parser) 43 pub byte_range: Range<usize>, 44 45 /// Source char range (Unicode scalar values, for rope indexing) 46 pub char_range: Range<usize>, 47 48 /// DOM node ID containing this content 49 /// For invisible content, this is the nearest visible container 50 pub node_id: SmolStr, 51 52 /// Position within the node 53 /// - If child_index is Some: cursor at that child index in the element 54 /// - If child_index is None: UTF-16 offset in text content 55 pub char_offset_in_node: usize, 56 57 /// If Some, position cursor at this child index in the element (not in text) 58 /// Used for positions after <br /> or at empty lines 59 pub child_index: Option<usize>, 60 61 /// Length of this mapping in UTF-16 chars in DOM 62 /// If 0, these source bytes aren't rendered (table pipes, etc) 63 pub utf16_len: usize, 64} 65 66impl OffsetMapping { 67 /// Check if this mapping contains the given byte offset 68 pub fn contains_byte(&self, byte_offset: usize) -> bool { 69 self.byte_range.contains(&byte_offset) 70 } 71 72 /// Check if this mapping contains the given char offset 73 pub fn contains_char(&self, char_offset: usize) -> bool { 74 self.char_range.contains(&char_offset) 75 } 76 77 /// Check if this mapping represents invisible content 78 pub fn is_invisible(&self) -> bool { 79 self.utf16_len == 0 80 } 81} 82 83/// Find the offset mapping containing the given byte offset. 84/// 85/// Returns the mapping and whether the cursor should snap to the next 86/// visible position (for invisible content). 87pub fn find_mapping_for_byte( 88 offset_map: &[OffsetMapping], 89 byte_offset: usize, 90) -> Option<(&OffsetMapping, bool)> { 91 // Binary search for the mapping 92 // Note: We allow cursor at the end boundary of a mapping (cursor after text) 93 let idx = offset_map 94 .binary_search_by(|mapping| { 95 if mapping.byte_range.end < byte_offset { 96 std::cmp::Ordering::Less 97 } else if mapping.byte_range.start > byte_offset { 98 std::cmp::Ordering::Greater 99 } else { 100 std::cmp::Ordering::Equal 101 } 102 }) 103 .ok()?; 104 105 let mapping = &offset_map[idx]; 106 let should_snap = mapping.is_invisible(); 107 108 Some((mapping, should_snap)) 109} 110 111/// Find the offset mapping containing the given char offset. 112/// 113/// This is the primary lookup method for cursor restoration, since 114/// cursor positions are tracked as char offsets in the rope. 115/// 116/// Returns the mapping and whether the cursor should snap to the next 117/// visible position (for invisible content). 118pub fn find_mapping_for_char( 119 offset_map: &[OffsetMapping], 120 char_offset: usize, 121) -> Option<(&OffsetMapping, bool)> { 122 // Binary search for the mapping 123 // Rust ranges are end-exclusive, so range 0..10 covers positions 0-9. 124 // When cursor is exactly at a boundary (e.g., position 10 between 0..10 and 10..20), 125 // prefer the NEXT mapping so cursor goes "down" to new content. 126 let result = offset_map.binary_search_by(|mapping| { 127 if mapping.char_range.end <= char_offset { 128 // Cursor is at or after end of this mapping - look forward 129 std::cmp::Ordering::Less 130 } else if mapping.char_range.start > char_offset { 131 // Cursor is before this mapping 132 std::cmp::Ordering::Greater 133 } else { 134 // Cursor is within [start, end) 135 std::cmp::Ordering::Equal 136 } 137 }); 138 139 let mapping = match result { 140 Ok(idx) => &offset_map[idx], 141 Err(idx) => { 142 // No exact match - cursor is at boundary between mappings (or past end) 143 // If cursor is exactly at end of previous mapping, return that mapping 144 // This handles cursor at end of document or end of last mapping 145 if idx > 0 && offset_map[idx - 1].char_range.end == char_offset { 146 &offset_map[idx - 1] 147 } else { 148 return None; 149 } 150 } 151 }; 152 153 let should_snap = mapping.is_invisible(); 154 Some((mapping, should_snap)) 155} 156 157/// Direction hint for cursor snapping. 158#[derive(Debug, Clone, Copy, PartialEq, Eq)] 159pub enum SnapDirection { 160 Backward, 161 Forward, 162} 163 164/// Result of finding a valid cursor position. 165#[derive(Debug, Clone)] 166pub struct SnappedPosition<'a> { 167 pub mapping: &'a OffsetMapping, 168 pub offset_in_mapping: usize, 169 pub snapped: Option<SnapDirection>, 170} 171 172impl SnappedPosition<'_> { 173 /// Get the absolute char offset for this position. 174 pub fn char_offset(&self) -> usize { 175 self.mapping.char_range.start + self.offset_in_mapping 176 } 177} 178 179/// Find the nearest valid cursor position to a char offset. 180/// 181/// A valid position is one that maps to visible content (utf16_len > 0). 182/// If the position is already valid, returns it directly. Otherwise, 183/// searches in the preferred direction first, falling back to the other 184/// direction if needed. 185pub fn find_nearest_valid_position( 186 offset_map: &[OffsetMapping], 187 char_offset: usize, 188 preferred_direction: Option<SnapDirection>, 189) -> Option<SnappedPosition<'_>> { 190 if offset_map.is_empty() { 191 return None; 192 } 193 194 // Try exact match first 195 if let Some((mapping, should_snap)) = find_mapping_for_char(offset_map, char_offset) { 196 if !should_snap { 197 // Position is valid, return it directly 198 let offset_in_mapping = char_offset.saturating_sub(mapping.char_range.start); 199 return Some(SnappedPosition { 200 mapping, 201 offset_in_mapping, 202 snapped: None, 203 }); 204 } 205 } 206 207 // Position is invalid or not found - search for nearest valid 208 let search_order = match preferred_direction { 209 Some(SnapDirection::Backward) => [SnapDirection::Backward, SnapDirection::Forward], 210 Some(SnapDirection::Forward) | None => [SnapDirection::Forward, SnapDirection::Backward], 211 }; 212 213 for direction in search_order { 214 if let Some(pos) = find_valid_in_direction(offset_map, char_offset, direction) { 215 return Some(pos); 216 } 217 } 218 219 None 220} 221 222/// Search for a valid position in a specific direction. 223fn find_valid_in_direction( 224 offset_map: &[OffsetMapping], 225 char_offset: usize, 226 direction: SnapDirection, 227) -> Option<SnappedPosition<'_>> { 228 match direction { 229 SnapDirection::Forward => { 230 // Find first visible mapping at or after char_offset 231 for mapping in offset_map { 232 if mapping.char_range.start >= char_offset && !mapping.is_invisible() { 233 return Some(SnappedPosition { 234 mapping, 235 offset_in_mapping: 0, 236 snapped: Some(SnapDirection::Forward), 237 }); 238 } 239 // Also check if char_offset falls within this visible mapping 240 if mapping.char_range.contains(&char_offset) && !mapping.is_invisible() { 241 let offset_in_mapping = char_offset - mapping.char_range.start; 242 return Some(SnappedPosition { 243 mapping, 244 offset_in_mapping, 245 snapped: Some(SnapDirection::Forward), 246 }); 247 } 248 } 249 None 250 } 251 SnapDirection::Backward => { 252 // Find last visible mapping at or before char_offset 253 for mapping in offset_map.iter().rev() { 254 if mapping.char_range.end <= char_offset && !mapping.is_invisible() { 255 // Snap to end of this mapping 256 let offset_in_mapping = mapping.char_range.len(); 257 return Some(SnappedPosition { 258 mapping, 259 offset_in_mapping, 260 snapped: Some(SnapDirection::Backward), 261 }); 262 } 263 // Also check if char_offset falls within this visible mapping 264 if mapping.char_range.contains(&char_offset) && !mapping.is_invisible() { 265 let offset_in_mapping = char_offset - mapping.char_range.start; 266 return Some(SnappedPosition { 267 mapping, 268 offset_in_mapping, 269 snapped: Some(SnapDirection::Backward), 270 }); 271 } 272 } 273 None 274 } 275 } 276} 277 278/// Check if a char offset is at a valid (non-invisible) cursor position. 279pub fn is_valid_cursor_position(offset_map: &[OffsetMapping], char_offset: usize) -> bool { 280 find_mapping_for_char(offset_map, char_offset) 281 .map(|(m, should_snap)| !should_snap && m.utf16_len > 0) 282 .unwrap_or(false) 283} 284 285#[cfg(test)] 286mod tests { 287 use super::*; 288 use smol_str::ToSmolStr; 289 290 #[test] 291 fn test_find_mapping_by_byte() { 292 let mappings = vec![ 293 OffsetMapping { 294 byte_range: 0..2, 295 char_range: 0..2, 296 node_id: "n0".to_smolstr(), 297 char_offset_in_node: 0, 298 child_index: None, 299 utf16_len: 0, // invisible 300 }, 301 OffsetMapping { 302 byte_range: 2..5, 303 char_range: 2..5, 304 node_id: "n0".to_smolstr(), 305 char_offset_in_node: 0, 306 child_index: None, 307 utf16_len: 3, 308 }, 309 OffsetMapping { 310 byte_range: 5..7, 311 char_range: 5..7, 312 node_id: "n0".to_smolstr(), 313 char_offset_in_node: 3, 314 child_index: None, 315 utf16_len: 0, // invisible 316 }, 317 ]; 318 319 // Byte 0 (invisible) 320 let (mapping, should_snap) = find_mapping_for_byte(&mappings, 0).unwrap(); 321 assert_eq!(mapping.byte_range, 0..2); 322 assert!(should_snap); 323 324 // Byte 3 (visible) 325 let (mapping, should_snap) = find_mapping_for_byte(&mappings, 3).unwrap(); 326 assert_eq!(mapping.byte_range, 2..5); 327 assert!(!should_snap); 328 329 // Byte 6 (invisible) 330 let (mapping, should_snap) = find_mapping_for_byte(&mappings, 6).unwrap(); 331 assert_eq!(mapping.byte_range, 5..7); 332 assert!(should_snap); 333 } 334 335 #[test] 336 fn test_find_mapping_by_char() { 337 let mappings = vec![ 338 OffsetMapping { 339 byte_range: 0..2, 340 char_range: 0..2, 341 node_id: "n0".to_smolstr(), 342 char_offset_in_node: 0, 343 child_index: None, 344 utf16_len: 0, // invisible 345 }, 346 OffsetMapping { 347 byte_range: 2..5, 348 char_range: 2..5, 349 node_id: "n0".to_smolstr(), 350 char_offset_in_node: 0, 351 child_index: None, 352 utf16_len: 3, 353 }, 354 OffsetMapping { 355 byte_range: 5..7, 356 char_range: 5..7, 357 node_id: "n0".to_smolstr(), 358 char_offset_in_node: 3, 359 child_index: None, 360 utf16_len: 0, // invisible 361 }, 362 ]; 363 364 // Char 0 (invisible) 365 let (mapping, should_snap) = find_mapping_for_char(&mappings, 0).unwrap(); 366 assert_eq!(mapping.char_range, 0..2); 367 assert!(should_snap); 368 369 // Char 3 (visible) 370 let (mapping, should_snap) = find_mapping_for_char(&mappings, 3).unwrap(); 371 assert_eq!(mapping.char_range, 2..5); 372 assert!(!should_snap); 373 374 // Char 6 (invisible) 375 let (mapping, should_snap) = find_mapping_for_char(&mappings, 6).unwrap(); 376 assert_eq!(mapping.char_range, 5..7); 377 assert!(should_snap); 378 } 379 380 #[test] 381 fn test_contains_byte() { 382 let mapping = OffsetMapping { 383 byte_range: 10..20, 384 char_range: 10..20, 385 node_id: "test".to_smolstr(), 386 char_offset_in_node: 0, 387 child_index: None, 388 utf16_len: 5, 389 }; 390 391 assert!(!mapping.contains_byte(9)); 392 assert!(mapping.contains_byte(10)); 393 assert!(mapping.contains_byte(15)); 394 assert!(mapping.contains_byte(19)); 395 assert!(!mapping.contains_byte(20)); 396 } 397 398 #[test] 399 fn test_contains_char() { 400 let mapping = OffsetMapping { 401 byte_range: 10..20, 402 char_range: 8..15, // emoji example: fewer chars than bytes 403 node_id: "test".to_smolstr(), 404 char_offset_in_node: 0, 405 child_index: None, 406 utf16_len: 5, 407 }; 408 409 assert!(!mapping.contains_char(7)); 410 assert!(mapping.contains_char(8)); 411 assert!(mapping.contains_char(12)); 412 assert!(mapping.contains_char(14)); 413 assert!(!mapping.contains_char(15)); 414 } 415 416 fn make_test_mappings() -> Vec<OffsetMapping> { 417 vec![ 418 OffsetMapping { 419 byte_range: 0..2, 420 char_range: 0..2, 421 node_id: "n0".to_smolstr(), 422 char_offset_in_node: 0, 423 child_index: None, 424 utf16_len: 0, // invisible: "![" 425 }, 426 OffsetMapping { 427 byte_range: 2..5, 428 char_range: 2..5, 429 node_id: "n0".to_smolstr(), 430 char_offset_in_node: 0, 431 child_index: None, 432 utf16_len: 3, // visible: "alt" 433 }, 434 OffsetMapping { 435 byte_range: 5..15, 436 char_range: 5..15, 437 node_id: "n0".to_smolstr(), 438 char_offset_in_node: 3, 439 child_index: None, 440 utf16_len: 0, // invisible: "](url.png)" 441 }, 442 OffsetMapping { 443 byte_range: 15..20, 444 char_range: 15..20, 445 node_id: "n0".to_smolstr(), 446 char_offset_in_node: 3, 447 child_index: None, 448 utf16_len: 5, // visible: " text" 449 }, 450 ] 451 } 452 453 #[test] 454 fn test_find_nearest_valid_position_exact_match() { 455 let mappings = make_test_mappings(); 456 457 // Position 3 is in visible mapping (2..5) 458 let pos = find_nearest_valid_position(&mappings, 3, None).unwrap(); 459 assert_eq!(pos.char_offset(), 3); 460 assert!(pos.snapped.is_none()); 461 } 462 463 #[test] 464 fn test_find_nearest_valid_position_snap_forward() { 465 let mappings = make_test_mappings(); 466 467 // Position 0 is invisible, should snap forward to 2 468 let pos = find_nearest_valid_position(&mappings, 0, Some(SnapDirection::Forward)).unwrap(); 469 assert_eq!(pos.char_offset(), 2); 470 assert_eq!(pos.snapped, Some(SnapDirection::Forward)); 471 } 472 473 #[test] 474 fn test_find_nearest_valid_position_snap_backward() { 475 let mappings = make_test_mappings(); 476 477 // Position 10 is invisible (in 5..15), prefer backward to end of "alt" (position 5) 478 let pos = 479 find_nearest_valid_position(&mappings, 10, Some(SnapDirection::Backward)).unwrap(); 480 assert_eq!(pos.char_offset(), 5); // end of "alt" mapping 481 assert_eq!(pos.snapped, Some(SnapDirection::Backward)); 482 } 483 484 #[test] 485 fn test_find_nearest_valid_position_default_forward() { 486 let mappings = make_test_mappings(); 487 488 // Position 0 is invisible, None direction defaults to forward 489 let pos = find_nearest_valid_position(&mappings, 0, None).unwrap(); 490 assert_eq!(pos.char_offset(), 2); 491 assert_eq!(pos.snapped, Some(SnapDirection::Forward)); 492 } 493 494 #[test] 495 fn test_find_nearest_valid_position_snap_forward_from_invisible() { 496 let mappings = make_test_mappings(); 497 498 // Position 10 is in invisible range (5..15), forward finds visible (15..20) 499 let pos = find_nearest_valid_position(&mappings, 10, Some(SnapDirection::Forward)).unwrap(); 500 assert_eq!(pos.char_offset(), 15); 501 assert_eq!(pos.snapped, Some(SnapDirection::Forward)); 502 } 503 504 #[test] 505 fn test_is_valid_cursor_position() { 506 let mappings = make_test_mappings(); 507 508 // Invisible positions 509 assert!(!is_valid_cursor_position(&mappings, 0)); 510 assert!(!is_valid_cursor_position(&mappings, 1)); 511 assert!(!is_valid_cursor_position(&mappings, 10)); 512 513 // Visible positions 514 assert!(is_valid_cursor_position(&mappings, 2)); 515 assert!(is_valid_cursor_position(&mappings, 3)); 516 assert!(is_valid_cursor_position(&mappings, 4)); 517 assert!(is_valid_cursor_position(&mappings, 15)); 518 assert!(is_valid_cursor_position(&mappings, 17)); 519 } 520 521 #[test] 522 fn test_find_nearest_valid_position_empty() { 523 let mappings: Vec<OffsetMapping> = vec![]; 524 assert!(find_nearest_valid_position(&mappings, 0, None).is_none()); 525 } 526}