atproto blogging
1//! Text navigation and analysis helpers.
2//!
3//! These functions work with the `EditorDocument` trait to provide
4//! common text operations like finding line boundaries and word boundaries.
5
6use crate::document::EditorDocument;
7
8/// Find start of line containing offset.
9pub fn find_line_start<D: EditorDocument>(doc: &D, offset: usize) -> usize {
10 if offset == 0 {
11 return 0;
12 }
13
14 let mut pos = offset;
15 while pos > 0 {
16 if let Some('\n') = doc.char_at(pos - 1) {
17 return pos;
18 }
19 pos -= 1;
20 }
21 0
22}
23
24/// Find end of line containing offset (position of newline or end of doc).
25pub fn find_line_end<D: EditorDocument>(doc: &D, offset: usize) -> usize {
26 let len = doc.len_chars();
27 if offset >= len {
28 return len;
29 }
30
31 let mut pos = offset;
32 while pos < len {
33 if let Some('\n') = doc.char_at(pos) {
34 return pos;
35 }
36 pos += 1;
37 }
38 len
39}
40
41/// Find word boundary backward from cursor.
42pub fn find_word_boundary_backward<D: EditorDocument>(doc: &D, cursor: usize) -> usize {
43 if cursor == 0 {
44 return 0;
45 }
46
47 let mut pos = cursor;
48
49 // Skip any whitespace/punctuation immediately before cursor.
50 while pos > 0 {
51 match doc.char_at(pos - 1) {
52 Some(c) if c.is_alphanumeric() || c == '_' => break,
53 Some(_) => pos -= 1,
54 None => break,
55 }
56 }
57
58 // Skip the word characters.
59 while pos > 0 {
60 match doc.char_at(pos - 1) {
61 Some(c) if c.is_alphanumeric() || c == '_' => pos -= 1,
62 _ => break,
63 }
64 }
65
66 pos
67}
68
69/// Find word boundary forward from cursor.
70pub fn find_word_boundary_forward<D: EditorDocument>(doc: &D, cursor: usize) -> usize {
71 let len = doc.len_chars();
72 if cursor >= len {
73 return len;
74 }
75
76 let mut pos = cursor;
77
78 // Skip word characters first.
79 while pos < len {
80 match doc.char_at(pos) {
81 Some(c) if c.is_alphanumeric() || c == '_' => pos += 1,
82 _ => break,
83 }
84 }
85
86 // Then skip whitespace/punctuation.
87 while pos < len {
88 match doc.char_at(pos) {
89 Some(c) if c.is_alphanumeric() || c == '_' => break,
90 Some(_) => pos += 1,
91 None => break,
92 }
93 }
94
95 pos
96}
97
98/// Describes what kind of list item the cursor is in, if any.
99#[derive(Debug, Clone)]
100pub enum ListContext {
101 /// Unordered list with the given marker char ('-' or '*') and indentation.
102 Unordered { indent: String, marker: char },
103 /// Ordered list with the current number and indentation.
104 Ordered { indent: String, number: usize },
105}
106
107/// Detect if cursor is in a list item and return context for continuation.
108pub fn detect_list_context<D: EditorDocument>(doc: &D, cursor_offset: usize) -> Option<ListContext> {
109 let line_start = find_line_start(doc, cursor_offset);
110 let line_end = find_line_end(doc, cursor_offset);
111
112 if line_start >= line_end {
113 return None;
114 }
115
116 let line = doc.slice(line_start..line_end)?;
117
118 // Parse indentation.
119 let indent: String = line
120 .chars()
121 .take_while(|c| *c == ' ' || *c == '\t')
122 .collect();
123 let trimmed = &line[indent.len()..];
124
125 // Check for unordered list marker: "- " or "* ".
126 if trimmed.starts_with("- ") {
127 return Some(ListContext::Unordered {
128 indent,
129 marker: '-',
130 });
131 }
132 if trimmed.starts_with("* ") {
133 return Some(ListContext::Unordered {
134 indent,
135 marker: '*',
136 });
137 }
138
139 // Check for ordered list marker: "1. ", "2. ", etc.
140 if let Some(dot_pos) = trimmed.find(". ") {
141 let num_part = &trimmed[..dot_pos];
142 if !num_part.is_empty() && num_part.chars().all(|c| c.is_ascii_digit()) {
143 if let Ok(number) = num_part.parse::<usize>() {
144 return Some(ListContext::Ordered { indent, number });
145 }
146 }
147 }
148
149 None
150}
151
152/// Check if the current list item is empty (just the marker, no content).
153pub fn is_list_item_empty<D: EditorDocument>(
154 doc: &D,
155 cursor_offset: usize,
156 ctx: &ListContext,
157) -> bool {
158 let line_start = find_line_start(doc, cursor_offset);
159 let line_end = find_line_end(doc, cursor_offset);
160
161 let line = match doc.slice(line_start..line_end) {
162 Some(s) => s,
163 None => return false,
164 };
165
166 // Calculate expected marker length.
167 let marker_len = match ctx {
168 ListContext::Unordered { indent, .. } => indent.len() + 2, // "- "
169 ListContext::Ordered { indent, number } => {
170 indent.len() + number.to_string().len() + 2 // "1. "
171 }
172 };
173
174 line.len() <= marker_len
175}
176
177/// Count leading zero-width characters before offset.
178pub fn count_leading_zero_width<D: EditorDocument>(doc: &D, offset: usize) -> usize {
179 let mut count = 0;
180 let mut pos = offset;
181
182 while pos > 0 {
183 match doc.char_at(pos - 1) {
184 Some('\u{200C}') | Some('\u{200B}') => {
185 count += 1;
186 pos -= 1;
187 }
188 _ => break,
189 }
190 }
191
192 count
193}
194
195/// Check if character at offset is a zero-width character.
196pub fn is_zero_width_char<D: EditorDocument>(doc: &D, offset: usize) -> bool {
197 matches!(doc.char_at(offset), Some('\u{200C}') | Some('\u{200B}'))
198}
199
200#[cfg(test)]
201mod tests {
202 use super::*;
203 use crate::{EditorRope, PlainEditor, UndoableBuffer};
204
205 type TestEditor = PlainEditor<UndoableBuffer<EditorRope>>;
206
207 fn make_editor(content: &str) -> TestEditor {
208 let rope = EditorRope::from_str(content);
209 let buf = UndoableBuffer::new(rope, 100);
210 PlainEditor::new(buf)
211 }
212
213 #[test]
214 fn test_find_line_start() {
215 let editor = make_editor("hello\nworld\ntest");
216
217 assert_eq!(find_line_start(&editor, 0), 0);
218 assert_eq!(find_line_start(&editor, 3), 0);
219 assert_eq!(find_line_start(&editor, 5), 0); // at newline
220 assert_eq!(find_line_start(&editor, 6), 6); // start of "world"
221 assert_eq!(find_line_start(&editor, 8), 6);
222 assert_eq!(find_line_start(&editor, 12), 12); // start of "test"
223 }
224
225 #[test]
226 fn test_find_line_end() {
227 let editor = make_editor("hello\nworld\ntest");
228
229 assert_eq!(find_line_end(&editor, 0), 5);
230 assert_eq!(find_line_end(&editor, 3), 5);
231 assert_eq!(find_line_end(&editor, 6), 11);
232 assert_eq!(find_line_end(&editor, 12), 16);
233 }
234
235 #[test]
236 fn test_find_word_boundary_backward() {
237 let editor = make_editor("hello world test");
238
239 assert_eq!(find_word_boundary_backward(&editor, 16), 12); // from end
240 assert_eq!(find_word_boundary_backward(&editor, 12), 6); // from "test"
241 assert_eq!(find_word_boundary_backward(&editor, 11), 6); // from space before "test"
242 assert_eq!(find_word_boundary_backward(&editor, 5), 0); // from end of "hello"
243 }
244
245 #[test]
246 fn test_find_word_boundary_forward() {
247 let editor = make_editor("hello world test");
248
249 assert_eq!(find_word_boundary_forward(&editor, 0), 6); // from start
250 assert_eq!(find_word_boundary_forward(&editor, 6), 12); // from space
251 assert_eq!(find_word_boundary_forward(&editor, 12), 16); // from "test"
252 }
253
254 #[test]
255 fn test_detect_list_context_unordered() {
256 let editor = make_editor("- item one\n- item two");
257
258 let ctx = detect_list_context(&editor, 5);
259 assert!(matches!(ctx, Some(ListContext::Unordered { marker: '-', .. })));
260
261 let ctx = detect_list_context(&editor, 15);
262 assert!(matches!(ctx, Some(ListContext::Unordered { marker: '-', .. })));
263 }
264
265 #[test]
266 fn test_detect_list_context_ordered() {
267 let editor = make_editor("1. first\n2. second");
268
269 let ctx = detect_list_context(&editor, 5);
270 assert!(matches!(ctx, Some(ListContext::Ordered { number: 1, .. })));
271
272 let ctx = detect_list_context(&editor, 12);
273 assert!(matches!(ctx, Some(ListContext::Ordered { number: 2, .. })));
274 }
275
276 #[test]
277 fn test_is_list_item_empty() {
278 let editor = make_editor("- \n- item");
279
280 let ctx = detect_list_context(&editor, 1).unwrap();
281 assert!(is_list_item_empty(&editor, 1, &ctx));
282
283 let ctx = detect_list_context(&editor, 5).unwrap();
284 assert!(!is_list_item_empty(&editor, 5, &ctx));
285 }
286}