atproto blogging
1//! Text buffer abstraction for editor storage.
2//!
3//! The `TextBuffer` trait provides a common interface for text storage,
4//! allowing the editor to work with different backends (ropey for local,
5//! Loro for CRDT collaboration).
6
7use smol_str::{SmolStr, ToSmolStr};
8use std::ops::Range;
9use web_time::Instant;
10
11use crate::types::{EditInfo, BLOCK_SYNTAX_ZONE};
12
13/// A text buffer that supports efficient editing and offset conversion.
14///
15/// All offsets are in Unicode scalar values (chars), not bytes or UTF-16.
16pub trait TextBuffer {
17 /// Total length in bytes (UTF-8).
18 fn len_bytes(&self) -> usize;
19
20 /// Total length in chars (Unicode scalar values).
21 fn len_chars(&self) -> usize;
22
23 /// Check if empty.
24 fn is_empty(&self) -> bool {
25 self.len_chars() == 0
26 }
27
28 /// Insert text at char offset.
29 fn insert(&mut self, char_offset: usize, text: &str);
30
31 /// Append text at end.
32 ///
33 /// Default implementation calls insert at len_chars(). Override if
34 /// the underlying buffer has a more efficient append operation.
35 fn push(&mut self, text: &str) {
36 self.insert(self.len_chars(), text);
37 }
38
39 /// Delete char range.
40 fn delete(&mut self, char_range: Range<usize>);
41
42 /// Replace char range with text.
43 fn replace(&mut self, char_range: Range<usize>, text: &str) {
44 self.delete(char_range.clone());
45 self.insert(char_range.start, text);
46 }
47
48 /// Get a slice as SmolStr. Returns None if range is invalid.
49 ///
50 /// SmolStr is used for efficiency: strings ≤23 bytes are stored inline
51 /// (no heap allocation), longer strings are Arc'd (cheap to clone).
52 fn slice(&self, char_range: Range<usize>) -> Option<SmolStr>;
53
54 /// Get character at offset. Returns None if out of bounds.
55 fn char_at(&self, char_offset: usize) -> Option<char>;
56
57 /// Convert entire buffer to String.
58 fn to_string(&self) -> String;
59
60 /// Convert char offset to byte offset.
61 fn char_to_byte(&self, char_offset: usize) -> usize;
62
63 /// Convert byte offset to char offset.
64 fn byte_to_char(&self, byte_offset: usize) -> usize;
65
66 /// Get info about the last edit operation, if any.
67 fn last_edit(&self) -> Option<EditInfo>;
68
69 /// Check if a char offset is in the block-syntax zone (first few chars of a line).
70 fn is_in_block_syntax_zone(&self, offset: usize) -> bool {
71 if offset <= BLOCK_SYNTAX_ZONE {
72 return true;
73 }
74
75 // Get slice of the search range and look for newline.
76 let search_start = offset.saturating_sub(BLOCK_SYNTAX_ZONE + 1);
77 match self.slice(search_start..offset) {
78 Some(s) => match s.rfind('\n') {
79 Some(pos) => {
80 // Distance from character after newline to current offset
81 let newline_abs_pos = search_start + pos;
82 let dist = offset.saturating_sub(newline_abs_pos + 1);
83 dist <= BLOCK_SYNTAX_ZONE
84 }
85 None => false, // No newline in range, offset > BLOCK_SYNTAX_ZONE.
86 },
87 None => false,
88 }
89 }
90}
91
92/// Ropey-backed text buffer for local editing.
93///
94/// Provides O(log n) editing operations and offset conversions.
95#[derive(Clone)]
96pub struct EditorRope {
97 rope: ropey::Rope,
98 last_edit: Option<EditInfo>,
99}
100
101impl Default for EditorRope {
102 fn default() -> Self {
103 Self {
104 rope: ropey::Rope::default(),
105 last_edit: None,
106 }
107 }
108}
109
110impl EditorRope {
111 /// Create a new empty rope.
112 pub fn new() -> Self {
113 Self::default()
114 }
115
116 /// Create from string.
117 pub fn from_str(s: &str) -> Self {
118 Self {
119 rope: ropey::Rope::from_str(s),
120 last_edit: None,
121 }
122 }
123
124 /// Get a reference to the underlying rope (for advanced operations).
125 pub fn rope(&self) -> &ropey::Rope {
126 &self.rope
127 }
128
129 /// Get a rope slice for zero-copy iteration over chunks.
130 ///
131 /// Use this when you need to iterate over the text without allocating,
132 /// e.g., for hashing or character-by-character processing.
133 pub fn rope_slice(&self, char_range: Range<usize>) -> Option<ropey::RopeSlice<'_>> {
134 if char_range.end > self.rope.len_chars() {
135 return None;
136 }
137 Some(self.rope.slice(char_range))
138 }
139}
140
141impl TextBuffer for EditorRope {
142 fn len_bytes(&self) -> usize {
143 self.rope.len_bytes()
144 }
145
146 fn len_chars(&self) -> usize {
147 self.rope.len_chars()
148 }
149
150 fn insert(&mut self, char_offset: usize, text: &str) {
151 let in_block_syntax_zone = self.is_in_block_syntax_zone(char_offset);
152 let contains_newline = text.contains('\n');
153
154 self.rope.insert(char_offset, text);
155
156 self.last_edit = Some(EditInfo {
157 edit_char_pos: char_offset,
158 inserted_len: text.chars().count(),
159 deleted_len: 0,
160 contains_newline,
161 in_block_syntax_zone,
162 doc_len_after: self.rope.len_chars(),
163 timestamp: Instant::now(),
164 });
165 }
166
167 // Ropey's insert is O(log n) regardless of position, so push is the same.
168 // Override for consistency with trait.
169 fn push(&mut self, text: &str) {
170 self.insert(self.rope.len_chars(), text);
171 }
172
173 fn delete(&mut self, char_range: Range<usize>) {
174 let in_block_syntax_zone = self.is_in_block_syntax_zone(char_range.start);
175 let contains_newline = self
176 .slice(char_range.clone())
177 .map(|s| s.contains('\n'))
178 .unwrap_or(false);
179 let deleted_len = char_range.len();
180
181 self.rope.remove(char_range.clone());
182
183 self.last_edit = Some(EditInfo {
184 edit_char_pos: char_range.start,
185 inserted_len: 0,
186 deleted_len,
187 contains_newline,
188 in_block_syntax_zone,
189 doc_len_after: self.rope.len_chars(),
190 timestamp: Instant::now(),
191 });
192 }
193
194 fn slice(&self, char_range: Range<usize>) -> Option<SmolStr> {
195 if char_range.end > self.len_chars() {
196 return None;
197 }
198 Some(self.rope.slice(char_range).to_smolstr())
199 }
200
201 fn char_at(&self, char_offset: usize) -> Option<char> {
202 if char_offset >= self.len_chars() {
203 return None;
204 }
205 Some(self.rope.char(char_offset))
206 }
207
208 fn to_string(&self) -> String {
209 self.rope.to_string()
210 }
211
212 fn char_to_byte(&self, char_offset: usize) -> usize {
213 self.rope.char_to_byte(char_offset)
214 }
215
216 fn byte_to_char(&self, byte_offset: usize) -> usize {
217 self.rope.byte_to_char(byte_offset)
218 }
219
220 fn last_edit(&self) -> Option<EditInfo> {
221 self.last_edit
222 }
223
224 fn is_in_block_syntax_zone(&self, offset: usize) -> bool {
225 if offset > self.rope.len_chars() {
226 return false;
227 }
228 let line_num = self.rope.char_to_line(offset);
229 let line_start = self.rope.line_to_char(line_num);
230 (offset - line_start) <= BLOCK_SYNTAX_ZONE
231 }
232}
233
234impl From<&str> for EditorRope {
235 fn from(s: &str) -> Self {
236 Self::from_str(s)
237 }
238}
239
240impl From<String> for EditorRope {
241 fn from(s: String) -> Self {
242 Self::from_str(&s)
243 }
244}
245
246#[cfg(test)]
247mod tests {
248 use super::*;
249
250 #[test]
251 fn test_basic_operations() {
252 let mut rope = EditorRope::from_str("hello world");
253 assert_eq!(rope.len_chars(), 11);
254 assert_eq!(rope.to_string(), "hello world");
255
256 rope.insert(5, " beautiful");
257 assert_eq!(rope.to_string(), "hello beautiful world");
258
259 // " beautiful" is 10 chars at positions 5..15
260 rope.delete(5..15);
261 assert_eq!(rope.to_string(), "hello world");
262 }
263
264 #[test]
265 fn test_char_at() {
266 let rope = EditorRope::from_str("hello");
267 assert_eq!(rope.char_at(0), Some('h'));
268 assert_eq!(rope.char_at(4), Some('o'));
269 assert_eq!(rope.char_at(5), None);
270 }
271
272 #[test]
273 fn test_slice() {
274 let rope = EditorRope::from_str("hello world");
275 assert_eq!(rope.slice(0..5).as_deref(), Some("hello"));
276 assert_eq!(rope.slice(6..11).as_deref(), Some("world"));
277 assert_eq!(rope.slice(0..100), None);
278 }
279
280 #[test]
281 fn test_offset_conversion() {
282 // "hello 🌍" - emoji is 4 bytes, 1 char
283 let rope = EditorRope::from_str("hello 🌍");
284 assert_eq!(rope.len_chars(), 7); // h e l l o 🌍
285 assert_eq!(rope.len_bytes(), 10); // 6 + 4
286
287 assert_eq!(rope.char_to_byte(6), 6); // before emoji
288 assert_eq!(rope.char_to_byte(7), 10); // after emoji
289 assert_eq!(rope.byte_to_char(6), 6);
290 assert_eq!(rope.byte_to_char(10), 7);
291 }
292
293 #[test]
294 fn test_replace() {
295 let mut rope = EditorRope::from_str("hello world");
296 rope.replace(6..11, "rust");
297 assert_eq!(rope.to_string(), "hello rust");
298 }
299}