at main 299 lines 9.0 kB view raw
1//! Text buffer abstraction for editor storage. 2//! 3//! The `TextBuffer` trait provides a common interface for text storage, 4//! allowing the editor to work with different backends (ropey for local, 5//! Loro for CRDT collaboration). 6 7use smol_str::{SmolStr, ToSmolStr}; 8use std::ops::Range; 9use web_time::Instant; 10 11use crate::types::{EditInfo, BLOCK_SYNTAX_ZONE}; 12 13/// A text buffer that supports efficient editing and offset conversion. 14/// 15/// All offsets are in Unicode scalar values (chars), not bytes or UTF-16. 16pub trait TextBuffer { 17 /// Total length in bytes (UTF-8). 18 fn len_bytes(&self) -> usize; 19 20 /// Total length in chars (Unicode scalar values). 21 fn len_chars(&self) -> usize; 22 23 /// Check if empty. 24 fn is_empty(&self) -> bool { 25 self.len_chars() == 0 26 } 27 28 /// Insert text at char offset. 29 fn insert(&mut self, char_offset: usize, text: &str); 30 31 /// Append text at end. 32 /// 33 /// Default implementation calls insert at len_chars(). Override if 34 /// the underlying buffer has a more efficient append operation. 35 fn push(&mut self, text: &str) { 36 self.insert(self.len_chars(), text); 37 } 38 39 /// Delete char range. 40 fn delete(&mut self, char_range: Range<usize>); 41 42 /// Replace char range with text. 43 fn replace(&mut self, char_range: Range<usize>, text: &str) { 44 self.delete(char_range.clone()); 45 self.insert(char_range.start, text); 46 } 47 48 /// Get a slice as SmolStr. Returns None if range is invalid. 49 /// 50 /// SmolStr is used for efficiency: strings ≤23 bytes are stored inline 51 /// (no heap allocation), longer strings are Arc'd (cheap to clone). 52 fn slice(&self, char_range: Range<usize>) -> Option<SmolStr>; 53 54 /// Get character at offset. Returns None if out of bounds. 55 fn char_at(&self, char_offset: usize) -> Option<char>; 56 57 /// Convert entire buffer to String. 58 fn to_string(&self) -> String; 59 60 /// Convert char offset to byte offset. 61 fn char_to_byte(&self, char_offset: usize) -> usize; 62 63 /// Convert byte offset to char offset. 64 fn byte_to_char(&self, byte_offset: usize) -> usize; 65 66 /// Get info about the last edit operation, if any. 67 fn last_edit(&self) -> Option<EditInfo>; 68 69 /// Check if a char offset is in the block-syntax zone (first few chars of a line). 70 fn is_in_block_syntax_zone(&self, offset: usize) -> bool { 71 if offset <= BLOCK_SYNTAX_ZONE { 72 return true; 73 } 74 75 // Get slice of the search range and look for newline. 76 let search_start = offset.saturating_sub(BLOCK_SYNTAX_ZONE + 1); 77 match self.slice(search_start..offset) { 78 Some(s) => match s.rfind('\n') { 79 Some(pos) => { 80 // Distance from character after newline to current offset 81 let newline_abs_pos = search_start + pos; 82 let dist = offset.saturating_sub(newline_abs_pos + 1); 83 dist <= BLOCK_SYNTAX_ZONE 84 } 85 None => false, // No newline in range, offset > BLOCK_SYNTAX_ZONE. 86 }, 87 None => false, 88 } 89 } 90} 91 92/// Ropey-backed text buffer for local editing. 93/// 94/// Provides O(log n) editing operations and offset conversions. 95#[derive(Clone)] 96pub struct EditorRope { 97 rope: ropey::Rope, 98 last_edit: Option<EditInfo>, 99} 100 101impl Default for EditorRope { 102 fn default() -> Self { 103 Self { 104 rope: ropey::Rope::default(), 105 last_edit: None, 106 } 107 } 108} 109 110impl EditorRope { 111 /// Create a new empty rope. 112 pub fn new() -> Self { 113 Self::default() 114 } 115 116 /// Create from string. 117 pub fn from_str(s: &str) -> Self { 118 Self { 119 rope: ropey::Rope::from_str(s), 120 last_edit: None, 121 } 122 } 123 124 /// Get a reference to the underlying rope (for advanced operations). 125 pub fn rope(&self) -> &ropey::Rope { 126 &self.rope 127 } 128 129 /// Get a rope slice for zero-copy iteration over chunks. 130 /// 131 /// Use this when you need to iterate over the text without allocating, 132 /// e.g., for hashing or character-by-character processing. 133 pub fn rope_slice(&self, char_range: Range<usize>) -> Option<ropey::RopeSlice<'_>> { 134 if char_range.end > self.rope.len_chars() { 135 return None; 136 } 137 Some(self.rope.slice(char_range)) 138 } 139} 140 141impl TextBuffer for EditorRope { 142 fn len_bytes(&self) -> usize { 143 self.rope.len_bytes() 144 } 145 146 fn len_chars(&self) -> usize { 147 self.rope.len_chars() 148 } 149 150 fn insert(&mut self, char_offset: usize, text: &str) { 151 let in_block_syntax_zone = self.is_in_block_syntax_zone(char_offset); 152 let contains_newline = text.contains('\n'); 153 154 self.rope.insert(char_offset, text); 155 156 self.last_edit = Some(EditInfo { 157 edit_char_pos: char_offset, 158 inserted_len: text.chars().count(), 159 deleted_len: 0, 160 contains_newline, 161 in_block_syntax_zone, 162 doc_len_after: self.rope.len_chars(), 163 timestamp: Instant::now(), 164 }); 165 } 166 167 // Ropey's insert is O(log n) regardless of position, so push is the same. 168 // Override for consistency with trait. 169 fn push(&mut self, text: &str) { 170 self.insert(self.rope.len_chars(), text); 171 } 172 173 fn delete(&mut self, char_range: Range<usize>) { 174 let in_block_syntax_zone = self.is_in_block_syntax_zone(char_range.start); 175 let contains_newline = self 176 .slice(char_range.clone()) 177 .map(|s| s.contains('\n')) 178 .unwrap_or(false); 179 let deleted_len = char_range.len(); 180 181 self.rope.remove(char_range.clone()); 182 183 self.last_edit = Some(EditInfo { 184 edit_char_pos: char_range.start, 185 inserted_len: 0, 186 deleted_len, 187 contains_newline, 188 in_block_syntax_zone, 189 doc_len_after: self.rope.len_chars(), 190 timestamp: Instant::now(), 191 }); 192 } 193 194 fn slice(&self, char_range: Range<usize>) -> Option<SmolStr> { 195 if char_range.end > self.len_chars() { 196 return None; 197 } 198 Some(self.rope.slice(char_range).to_smolstr()) 199 } 200 201 fn char_at(&self, char_offset: usize) -> Option<char> { 202 if char_offset >= self.len_chars() { 203 return None; 204 } 205 Some(self.rope.char(char_offset)) 206 } 207 208 fn to_string(&self) -> String { 209 self.rope.to_string() 210 } 211 212 fn char_to_byte(&self, char_offset: usize) -> usize { 213 self.rope.char_to_byte(char_offset) 214 } 215 216 fn byte_to_char(&self, byte_offset: usize) -> usize { 217 self.rope.byte_to_char(byte_offset) 218 } 219 220 fn last_edit(&self) -> Option<EditInfo> { 221 self.last_edit 222 } 223 224 fn is_in_block_syntax_zone(&self, offset: usize) -> bool { 225 if offset > self.rope.len_chars() { 226 return false; 227 } 228 let line_num = self.rope.char_to_line(offset); 229 let line_start = self.rope.line_to_char(line_num); 230 (offset - line_start) <= BLOCK_SYNTAX_ZONE 231 } 232} 233 234impl From<&str> for EditorRope { 235 fn from(s: &str) -> Self { 236 Self::from_str(s) 237 } 238} 239 240impl From<String> for EditorRope { 241 fn from(s: String) -> Self { 242 Self::from_str(&s) 243 } 244} 245 246#[cfg(test)] 247mod tests { 248 use super::*; 249 250 #[test] 251 fn test_basic_operations() { 252 let mut rope = EditorRope::from_str("hello world"); 253 assert_eq!(rope.len_chars(), 11); 254 assert_eq!(rope.to_string(), "hello world"); 255 256 rope.insert(5, " beautiful"); 257 assert_eq!(rope.to_string(), "hello beautiful world"); 258 259 // " beautiful" is 10 chars at positions 5..15 260 rope.delete(5..15); 261 assert_eq!(rope.to_string(), "hello world"); 262 } 263 264 #[test] 265 fn test_char_at() { 266 let rope = EditorRope::from_str("hello"); 267 assert_eq!(rope.char_at(0), Some('h')); 268 assert_eq!(rope.char_at(4), Some('o')); 269 assert_eq!(rope.char_at(5), None); 270 } 271 272 #[test] 273 fn test_slice() { 274 let rope = EditorRope::from_str("hello world"); 275 assert_eq!(rope.slice(0..5).as_deref(), Some("hello")); 276 assert_eq!(rope.slice(6..11).as_deref(), Some("world")); 277 assert_eq!(rope.slice(0..100), None); 278 } 279 280 #[test] 281 fn test_offset_conversion() { 282 // "hello 🌍" - emoji is 4 bytes, 1 char 283 let rope = EditorRope::from_str("hello 🌍"); 284 assert_eq!(rope.len_chars(), 7); // h e l l o 🌍 285 assert_eq!(rope.len_bytes(), 10); // 6 + 4 286 287 assert_eq!(rope.char_to_byte(6), 6); // before emoji 288 assert_eq!(rope.char_to_byte(7), 10); // after emoji 289 assert_eq!(rope.byte_to_char(6), 6); 290 assert_eq!(rope.byte_to_char(10), 7); 291 } 292 293 #[test] 294 fn test_replace() { 295 let mut rope = EditorRope::from_str("hello world"); 296 rope.replace(6..11, "rust"); 297 assert_eq!(rope.to_string(), "hello rust"); 298 } 299}