atproto blogging
1//! Offset mapping between source text and rendered DOM.
2//!
3//! When rendering markdown to HTML, some characters disappear (table pipes)
4//! and content gets split across nodes (syntax highlighting). Offset maps
5//! track how source byte positions map to DOM node positions.
6
7use smol_str::SmolStr;
8use std::ops::Range;
9
10/// Result of rendering markdown with offset tracking.
11#[derive(Debug, Clone, PartialEq)]
12pub struct RenderResult {
13 /// Rendered HTML string
14 pub html: String,
15
16 /// Mappings from source bytes to DOM positions
17 pub offset_map: Vec<OffsetMapping>,
18}
19
20/// Maps a source range to a position in the rendered DOM.
21///
22/// # Example
23///
24/// Source: `| foo | bar |`
25/// Bytes: 0 2-5 7-10 12
26/// Chars: 0 2-5 7-10 12 (ASCII, so same)
27///
28/// Rendered:
29/// ```html
30/// <table id="t0">
31/// <tr><td id="t0-c0">foo</td><td id="t0-c1">bar</td></tr>
32/// </table>
33/// ```
34///
35/// Mappings:
36/// - `{ byte_range: 0..2, char_range: 0..2, node_id: "t0-c0", char_offset_in_node: 0, utf16_len: 0 }` - "| " invisible
37/// - `{ byte_range: 2..5, char_range: 2..5, node_id: "t0-c0", char_offset_in_node: 0, utf16_len: 3 }` - "foo" visible
38/// - `{ byte_range: 5..7, char_range: 5..7, node_id: "t0-c0", char_offset_in_node: 3, utf16_len: 0 }` - " |" invisible
39/// - etc.
40#[derive(Debug, Clone, PartialEq)]
41pub struct OffsetMapping {
42 /// Source byte range (UTF-8 bytes, from parser)
43 pub byte_range: Range<usize>,
44
45 /// Source char range (Unicode scalar values, for rope indexing)
46 pub char_range: Range<usize>,
47
48 /// DOM node ID containing this content
49 /// For invisible content, this is the nearest visible container
50 pub node_id: SmolStr,
51
52 /// Position within the node
53 /// - If child_index is Some: cursor at that child index in the element
54 /// - If child_index is None: UTF-16 offset in text content
55 pub char_offset_in_node: usize,
56
57 /// If Some, position cursor at this child index in the element (not in text)
58 /// Used for positions after <br /> or at empty lines
59 pub child_index: Option<usize>,
60
61 /// Length of this mapping in UTF-16 chars in DOM
62 /// If 0, these source bytes aren't rendered (table pipes, etc)
63 pub utf16_len: usize,
64}
65
66impl OffsetMapping {
67 /// Check if this mapping contains the given byte offset
68 pub fn contains_byte(&self, byte_offset: usize) -> bool {
69 self.byte_range.contains(&byte_offset)
70 }
71
72 /// Check if this mapping contains the given char offset
73 pub fn contains_char(&self, char_offset: usize) -> bool {
74 self.char_range.contains(&char_offset)
75 }
76
77 /// Check if this mapping represents invisible content
78 pub fn is_invisible(&self) -> bool {
79 self.utf16_len == 0
80 }
81}
82
83/// Find the offset mapping containing the given byte offset.
84///
85/// Returns the mapping and whether the cursor should snap to the next
86/// visible position (for invisible content).
87pub fn find_mapping_for_byte(
88 offset_map: &[OffsetMapping],
89 byte_offset: usize,
90) -> Option<(&OffsetMapping, bool)> {
91 // Binary search for the mapping
92 // Note: We allow cursor at the end boundary of a mapping (cursor after text)
93 let idx = offset_map
94 .binary_search_by(|mapping| {
95 if mapping.byte_range.end < byte_offset {
96 std::cmp::Ordering::Less
97 } else if mapping.byte_range.start > byte_offset {
98 std::cmp::Ordering::Greater
99 } else {
100 std::cmp::Ordering::Equal
101 }
102 })
103 .ok()?;
104
105 let mapping = &offset_map[idx];
106 let should_snap = mapping.is_invisible();
107
108 Some((mapping, should_snap))
109}
110
111/// Find the offset mapping containing the given char offset.
112///
113/// This is the primary lookup method for cursor restoration, since
114/// cursor positions are tracked as char offsets in the rope.
115///
116/// Returns the mapping and whether the cursor should snap to the next
117/// visible position (for invisible content).
118pub fn find_mapping_for_char(
119 offset_map: &[OffsetMapping],
120 char_offset: usize,
121) -> Option<(&OffsetMapping, bool)> {
122 // Binary search for the mapping
123 // Rust ranges are end-exclusive, so range 0..10 covers positions 0-9.
124 // When cursor is exactly at a boundary (e.g., position 10 between 0..10 and 10..20),
125 // prefer the NEXT mapping so cursor goes "down" to new content.
126 let result = offset_map.binary_search_by(|mapping| {
127 if mapping.char_range.end <= char_offset {
128 // Cursor is at or after end of this mapping - look forward
129 std::cmp::Ordering::Less
130 } else if mapping.char_range.start > char_offset {
131 // Cursor is before this mapping
132 std::cmp::Ordering::Greater
133 } else {
134 // Cursor is within [start, end)
135 std::cmp::Ordering::Equal
136 }
137 });
138
139 let mapping = match result {
140 Ok(idx) => &offset_map[idx],
141 Err(idx) => {
142 // No exact match - cursor is at boundary between mappings (or past end)
143 // If cursor is exactly at end of previous mapping, return that mapping
144 // This handles cursor at end of document or end of last mapping
145 if idx > 0 && offset_map[idx - 1].char_range.end == char_offset {
146 &offset_map[idx - 1]
147 } else {
148 return None;
149 }
150 }
151 };
152
153 let should_snap = mapping.is_invisible();
154 Some((mapping, should_snap))
155}
156
157/// Direction hint for cursor snapping.
158#[derive(Debug, Clone, Copy, PartialEq, Eq)]
159pub enum SnapDirection {
160 Backward,
161 Forward,
162}
163
164/// Result of finding a valid cursor position.
165#[derive(Debug, Clone)]
166pub struct SnappedPosition<'a> {
167 pub mapping: &'a OffsetMapping,
168 pub offset_in_mapping: usize,
169 pub snapped: Option<SnapDirection>,
170}
171
172impl SnappedPosition<'_> {
173 /// Get the absolute char offset for this position.
174 pub fn char_offset(&self) -> usize {
175 self.mapping.char_range.start + self.offset_in_mapping
176 }
177}
178
179/// Find the nearest valid cursor position to a char offset.
180///
181/// A valid position is one that maps to visible content (utf16_len > 0).
182/// If the position is already valid, returns it directly. Otherwise,
183/// searches in the preferred direction first, falling back to the other
184/// direction if needed.
185pub fn find_nearest_valid_position(
186 offset_map: &[OffsetMapping],
187 char_offset: usize,
188 preferred_direction: Option<SnapDirection>,
189) -> Option<SnappedPosition<'_>> {
190 if offset_map.is_empty() {
191 return None;
192 }
193
194 // Try exact match first
195 if let Some((mapping, should_snap)) = find_mapping_for_char(offset_map, char_offset) {
196 if !should_snap {
197 // Position is valid, return it directly
198 let offset_in_mapping = char_offset.saturating_sub(mapping.char_range.start);
199 return Some(SnappedPosition {
200 mapping,
201 offset_in_mapping,
202 snapped: None,
203 });
204 }
205 }
206
207 // Position is invalid or not found - search for nearest valid
208 let search_order = match preferred_direction {
209 Some(SnapDirection::Backward) => [SnapDirection::Backward, SnapDirection::Forward],
210 Some(SnapDirection::Forward) | None => [SnapDirection::Forward, SnapDirection::Backward],
211 };
212
213 for direction in search_order {
214 if let Some(pos) = find_valid_in_direction(offset_map, char_offset, direction) {
215 return Some(pos);
216 }
217 }
218
219 None
220}
221
222/// Search for a valid position in a specific direction.
223fn find_valid_in_direction(
224 offset_map: &[OffsetMapping],
225 char_offset: usize,
226 direction: SnapDirection,
227) -> Option<SnappedPosition<'_>> {
228 match direction {
229 SnapDirection::Forward => {
230 // Find first visible mapping at or after char_offset
231 for mapping in offset_map {
232 if mapping.char_range.start >= char_offset && !mapping.is_invisible() {
233 return Some(SnappedPosition {
234 mapping,
235 offset_in_mapping: 0,
236 snapped: Some(SnapDirection::Forward),
237 });
238 }
239 // Also check if char_offset falls within this visible mapping
240 if mapping.char_range.contains(&char_offset) && !mapping.is_invisible() {
241 let offset_in_mapping = char_offset - mapping.char_range.start;
242 return Some(SnappedPosition {
243 mapping,
244 offset_in_mapping,
245 snapped: Some(SnapDirection::Forward),
246 });
247 }
248 }
249 None
250 }
251 SnapDirection::Backward => {
252 // Find last visible mapping at or before char_offset
253 for mapping in offset_map.iter().rev() {
254 if mapping.char_range.end <= char_offset && !mapping.is_invisible() {
255 // Snap to end of this mapping
256 let offset_in_mapping = mapping.char_range.len();
257 return Some(SnappedPosition {
258 mapping,
259 offset_in_mapping,
260 snapped: Some(SnapDirection::Backward),
261 });
262 }
263 // Also check if char_offset falls within this visible mapping
264 if mapping.char_range.contains(&char_offset) && !mapping.is_invisible() {
265 let offset_in_mapping = char_offset - mapping.char_range.start;
266 return Some(SnappedPosition {
267 mapping,
268 offset_in_mapping,
269 snapped: Some(SnapDirection::Backward),
270 });
271 }
272 }
273 None
274 }
275 }
276}
277
278/// Check if a char offset is at a valid (non-invisible) cursor position.
279pub fn is_valid_cursor_position(offset_map: &[OffsetMapping], char_offset: usize) -> bool {
280 find_mapping_for_char(offset_map, char_offset)
281 .map(|(m, should_snap)| !should_snap && m.utf16_len > 0)
282 .unwrap_or(false)
283}
284
285#[cfg(test)]
286mod tests {
287 use super::*;
288 use smol_str::ToSmolStr;
289
290 #[test]
291 fn test_find_mapping_by_byte() {
292 let mappings = vec![
293 OffsetMapping {
294 byte_range: 0..2,
295 char_range: 0..2,
296 node_id: "n0".to_smolstr(),
297 char_offset_in_node: 0,
298 child_index: None,
299 utf16_len: 0, // invisible
300 },
301 OffsetMapping {
302 byte_range: 2..5,
303 char_range: 2..5,
304 node_id: "n0".to_smolstr(),
305 char_offset_in_node: 0,
306 child_index: None,
307 utf16_len: 3,
308 },
309 OffsetMapping {
310 byte_range: 5..7,
311 char_range: 5..7,
312 node_id: "n0".to_smolstr(),
313 char_offset_in_node: 3,
314 child_index: None,
315 utf16_len: 0, // invisible
316 },
317 ];
318
319 // Byte 0 (invisible)
320 let (mapping, should_snap) = find_mapping_for_byte(&mappings, 0).unwrap();
321 assert_eq!(mapping.byte_range, 0..2);
322 assert!(should_snap);
323
324 // Byte 3 (visible)
325 let (mapping, should_snap) = find_mapping_for_byte(&mappings, 3).unwrap();
326 assert_eq!(mapping.byte_range, 2..5);
327 assert!(!should_snap);
328
329 // Byte 6 (invisible)
330 let (mapping, should_snap) = find_mapping_for_byte(&mappings, 6).unwrap();
331 assert_eq!(mapping.byte_range, 5..7);
332 assert!(should_snap);
333 }
334
335 #[test]
336 fn test_find_mapping_by_char() {
337 let mappings = vec![
338 OffsetMapping {
339 byte_range: 0..2,
340 char_range: 0..2,
341 node_id: "n0".to_smolstr(),
342 char_offset_in_node: 0,
343 child_index: None,
344 utf16_len: 0, // invisible
345 },
346 OffsetMapping {
347 byte_range: 2..5,
348 char_range: 2..5,
349 node_id: "n0".to_smolstr(),
350 char_offset_in_node: 0,
351 child_index: None,
352 utf16_len: 3,
353 },
354 OffsetMapping {
355 byte_range: 5..7,
356 char_range: 5..7,
357 node_id: "n0".to_smolstr(),
358 char_offset_in_node: 3,
359 child_index: None,
360 utf16_len: 0, // invisible
361 },
362 ];
363
364 // Char 0 (invisible)
365 let (mapping, should_snap) = find_mapping_for_char(&mappings, 0).unwrap();
366 assert_eq!(mapping.char_range, 0..2);
367 assert!(should_snap);
368
369 // Char 3 (visible)
370 let (mapping, should_snap) = find_mapping_for_char(&mappings, 3).unwrap();
371 assert_eq!(mapping.char_range, 2..5);
372 assert!(!should_snap);
373
374 // Char 6 (invisible)
375 let (mapping, should_snap) = find_mapping_for_char(&mappings, 6).unwrap();
376 assert_eq!(mapping.char_range, 5..7);
377 assert!(should_snap);
378 }
379
380 #[test]
381 fn test_contains_byte() {
382 let mapping = OffsetMapping {
383 byte_range: 10..20,
384 char_range: 10..20,
385 node_id: "test".to_smolstr(),
386 char_offset_in_node: 0,
387 child_index: None,
388 utf16_len: 5,
389 };
390
391 assert!(!mapping.contains_byte(9));
392 assert!(mapping.contains_byte(10));
393 assert!(mapping.contains_byte(15));
394 assert!(mapping.contains_byte(19));
395 assert!(!mapping.contains_byte(20));
396 }
397
398 #[test]
399 fn test_contains_char() {
400 let mapping = OffsetMapping {
401 byte_range: 10..20,
402 char_range: 8..15, // emoji example: fewer chars than bytes
403 node_id: "test".to_smolstr(),
404 char_offset_in_node: 0,
405 child_index: None,
406 utf16_len: 5,
407 };
408
409 assert!(!mapping.contains_char(7));
410 assert!(mapping.contains_char(8));
411 assert!(mapping.contains_char(12));
412 assert!(mapping.contains_char(14));
413 assert!(!mapping.contains_char(15));
414 }
415
416 fn make_test_mappings() -> Vec<OffsetMapping> {
417 vec"
441 },
442 OffsetMapping {
443 byte_range: 15..20,
444 char_range: 15..20,
445 node_id: "n0".to_smolstr(),
446 char_offset_in_node: 3,
447 child_index: None,
448 utf16_len: 5, // visible: " text"
449 },
450 ]
451 }
452
453 #[test]
454 fn test_find_nearest_valid_position_exact_match() {
455 let mappings = make_test_mappings();
456
457 // Position 3 is in visible mapping (2..5)
458 let pos = find_nearest_valid_position(&mappings, 3, None).unwrap();
459 assert_eq!(pos.char_offset(), 3);
460 assert!(pos.snapped.is_none());
461 }
462
463 #[test]
464 fn test_find_nearest_valid_position_snap_forward() {
465 let mappings = make_test_mappings();
466
467 // Position 0 is invisible, should snap forward to 2
468 let pos = find_nearest_valid_position(&mappings, 0, Some(SnapDirection::Forward)).unwrap();
469 assert_eq!(pos.char_offset(), 2);
470 assert_eq!(pos.snapped, Some(SnapDirection::Forward));
471 }
472
473 #[test]
474 fn test_find_nearest_valid_position_snap_backward() {
475 let mappings = make_test_mappings();
476
477 // Position 10 is invisible (in 5..15), prefer backward to end of "alt" (position 5)
478 let pos =
479 find_nearest_valid_position(&mappings, 10, Some(SnapDirection::Backward)).unwrap();
480 assert_eq!(pos.char_offset(), 5); // end of "alt" mapping
481 assert_eq!(pos.snapped, Some(SnapDirection::Backward));
482 }
483
484 #[test]
485 fn test_find_nearest_valid_position_default_forward() {
486 let mappings = make_test_mappings();
487
488 // Position 0 is invisible, None direction defaults to forward
489 let pos = find_nearest_valid_position(&mappings, 0, None).unwrap();
490 assert_eq!(pos.char_offset(), 2);
491 assert_eq!(pos.snapped, Some(SnapDirection::Forward));
492 }
493
494 #[test]
495 fn test_find_nearest_valid_position_snap_forward_from_invisible() {
496 let mappings = make_test_mappings();
497
498 // Position 10 is in invisible range (5..15), forward finds visible (15..20)
499 let pos = find_nearest_valid_position(&mappings, 10, Some(SnapDirection::Forward)).unwrap();
500 assert_eq!(pos.char_offset(), 15);
501 assert_eq!(pos.snapped, Some(SnapDirection::Forward));
502 }
503
504 #[test]
505 fn test_is_valid_cursor_position() {
506 let mappings = make_test_mappings();
507
508 // Invisible positions
509 assert!(!is_valid_cursor_position(&mappings, 0));
510 assert!(!is_valid_cursor_position(&mappings, 1));
511 assert!(!is_valid_cursor_position(&mappings, 10));
512
513 // Visible positions
514 assert!(is_valid_cursor_position(&mappings, 2));
515 assert!(is_valid_cursor_position(&mappings, 3));
516 assert!(is_valid_cursor_position(&mappings, 4));
517 assert!(is_valid_cursor_position(&mappings, 15));
518 assert!(is_valid_cursor_position(&mappings, 17));
519 }
520
521 #[test]
522 fn test_find_nearest_valid_position_empty() {
523 let mappings: Vec<OffsetMapping> = vec![];
524 assert!(find_nearest_valid_position(&mappings, 0, None).is_none());
525 }
526}