A gap buffer implementation in Go.
at main 489 lines 13 kB view raw
1// SPDX-FileCopyrightText: Copyright 2024 Roland Csaszar 2// SPDX-License-Identifier: MIT 3// 4// Project: go-gap-buffer 5// File: gap-buffer.go 6// Date: 07.Feb.2024 7// 8// ============================================================================= 9 10// This library implements a gap buffer, which is a data structure to be used as 11// the container of the text for a (simple or not so simple) text editor. 12// 13// Movements to the left or right and deletion to the left and right work on 14// Unicode runes. It supports line movements (up and down a line from the current 15// one), it splits lines based on the newline character '\n'. 16// 17// Caveats: 18// - Only line feed '\n' line endings are supported by this library, so 19// Windows-style CR-LF (`\r\n`) line endings will not work with movements 20// up and down a line. Single Unicode rune movements are going to work, but 21// the CR '\r' character is handled as a "normal" character, as part of the 22// string. 23// - This implementation is not thread save 24// - A gap buffer is not ideal for using multiple cursors, as that would 25// involve multiple jumps and copying of data in the gap buffer 26// 27// A gap buffer is an array with a "gap" - unused elements in the array - at the 28// cursor position, where text is to be inserted and deleted. 29// 30// The string "Hello world!" with the cursor at the end of "Hello" - 31// "Hello| world!" - looks like this in a gap buffer array: 32// 33// Hello|< gap start, the cursor position gap end >| world! 34// 35// ['H', 'e', 'l', 'l', 'o', 0, 0, 0, 0, 0, ' ', 'w', 'o', 'r', 'l', 'd', '!'] 36// 0 1 2 3 4 | gap | 5 6 7 8 9 10 11 37// 38// Movement in the gap buffer works by moving the start and end of the gap, same 39// with deletion of unicode runes in both directions. 40// 41// Moving the cursor two runes to the left: 42// 43// Hel|< gap start, the cursor position gap end >|lo world! 44// 45// ['H', 'e', 'l', 0, 0, 0, 0, 0, 'l', 'o', ' ', 'w', 'o', 'r', 'l', 'd', '!'] 46// 0 1 2 | gap | 3 4 5 6 7 8 9 10 11 47// 48// Deleting three runes to the left: 49// 50// |< gap start, the cursor position gap end >|lo world! 51// 52// ['H', 'e', 'l', 0, 0, 0, 0, 0, 'l', 'o', ' ', 'w', 'o', 'r', 'l', 'd', '!'] 53// | gap | 1 2 3 4 5 6 7 8 9 54// 55// Insertion happens at the cursor position by appending at the start of the gap 56// and moving the start of the gap accordingly. 57// 58// New|< gap start, the cursor position gap end >|lo world! 59// 60// ['N', 'e', 'w', 0, 0, 0, 0, 0, 'l', 'o', ' ', 'w', 'o', 'r', 'l', 'd', '!'] 61// 0 1 2 | gap | 3 4 5 6 7 8 9 10 11 62// 63// Implementation Detail: 64// The line lengths are stored in a another gap buffer, which is synchronized 65// with the gap buffer of the text itself. 66package gapbuffer 67 68import ( 69 "strings" 70 "unicode/utf8" 71) 72 73// GapBuffer represents a gap buffer. 74type GapBuffer struct { 75 // The index in the gap buffer `GapBuffer.data` of the start of the gap. 76 // The position of the cursor. 77 start int 78 79 // The index in the gap buffer `GapBuffer.data` of the end of the gap. 80 // The position of the first unicode scalar point after the cursor. 81 end int 82 83 // `wantsCol` is the rune column (not byte column!) the cursor wants to hold 84 // when going up or down. 85 wantsCol int 86 87 // The lineBuffer that stores the line length information of the gap buffer. 88 // 89 // See [lineBuffer]. 90 lines lineBuffer 91 92 // The data of the gap buffer. 93 data []byte 94} 95 96const ( 97 defaultCapacity = 1024 // The default size of a gap buffer in bytes. 98 99 // 1/10th of the capacity of the gap buffer, default: 102. 100 lineCapFactor = 10 101 102 // Minimum size in int of the line buffer `GapBuffer.lines`. A lineBuffer 103 // has at least this size, even if the [lineCapFactor] would yield a smaller 104 // one. 105 minLineCap = 10 106 107 // The factor by which to grow the gap buffer and line buffer, if needed. 108 growFactor = 2 109) 110 111// Return the contents of the gap buffer as a string. 112func (g *GapBuffer) String() string { 113 var builder strings.Builder 114 115 builder.Grow(len(g.data) - (g.end - g.start)) 116 builder.Write(g.data[:g.start]) 117 builder.Write(g.data[g.end:]) 118 119 return builder.String() 120} 121 122// Return the contents of the gap buffer as two strings. The part to the left of 123// the cursor is returned in `left` and the part to the right of the cursor is 124// returned in `right`. 125func (g *GapBuffer) StringPair() (left string, right string) { 126 return string(g.data[:g.start]), string(g.data[g.end:]) 127} 128 129// Return the length in bytes of the contents of the gap buffer. 130func (g *GapBuffer) StringLength() int { 131 return len(g.data) - (g.end - g.start) 132} 133 134// Construct a new GapBuffer from a capacity. The capacity is the number of 135// bytes the gap buffer can hold without a resize. 136// 137// The default size is 1024 bytes, if you know that you need less or more space, 138// you can set the initial size to something more appropriate. 139// 140// See also [New], [NewStr], [NewStrCap]. 141func NewCap(size int) *GapBuffer { 142 return &GapBuffer{ 143 start: 0, 144 end: size, 145 wantsCol: 0, 146 data: make([]byte, size), 147 lines: *newLineBuf(size), 148 } 149} 150 151// Construct a new, empty GapBuffer with the default capacity. 152// 153// See also [NewCap], [NewStr], [NewStrCap]. 154func New() *GapBuffer { 155 return NewCap(defaultCapacity) 156} 157 158// Construct a new GapBuffer from a string and a capacity. The cursor position 159// is set to the end of the string. The capacity is the number of bytes the gap 160// buffer can hold without a resize. 161// 162// The default size is 1024 bytes, if you know that you need less or more space, 163// you can set the initial size to something more appropriate. 164// 165// See also [New], [NewCap], [NewStr], [GapBuffer.Size]. 166func NewStrCap(str string, c int) *GapBuffer { 167 if str == "" { 168 return NewCap(c) 169 } 170 171 size := len(str) * growFactor 172 if size < c { 173 size = c 174 } 175 176 dat := make([]byte, size) 177 sIdx := copy(dat, str) 178 lines := newLineBufStr(str, size) 179 runeCol := 0 180 lineStart := lines.curLineStart() 181 182 if lineStart < sIdx { 183 runeCol = utf8.RuneCount(dat[lineStart:sIdx]) 184 } 185 186 return &GapBuffer{ 187 start: sIdx, 188 end: size, 189 wantsCol: runeCol, 190 data: dat, 191 lines: *lines, 192 } 193} 194 195// Construct a new GapBuffer from a string. The cursor position is set to the 196// end of the string. 197// 198// See also [New], [NewCap], [NewStrCap]. 199func NewStr(s string) *GapBuffer { 200 return NewStrCap(s, defaultCapacity) 201} 202 203// Return the current number of bytes in the buffer, including the "empty" space 204// in the gap. 205func (g *GapBuffer) Size() int { 206 return len(g.data) 207} 208 209// Return the byte column of the cursor, the number of bytes from the start of 210// the line to the cursor. 211// 212// Numbering starts from 1. 213// 214// See also [GapBuffer.RuneCol], [GapBuffer.LineCol], [GapBuffer.LineRuneCol]. 215func (g *GapBuffer) Col() int { 216 return g.start - g.lines.curLineStart() 217} 218 219// Return the rune column of the cursor, the number of unicode runes from the 220// start of the line to the cursor. 221// 222// Numbering starts from 1. 223// 224// See also [GapBuffer.Col], [GapBuffer.LineCol], [GapBuffer.LineRuneCol]. 225func (g *GapBuffer) RuneCol() int { 226 return utf8.RuneCount(g.data[g.lines.curLineStart():g.start]) 227} 228 229// Return the length of the current line the cursor is in, in bytes. 230// This returns the "whole" line length, including the part to the right of the 231// cursor but without the newline character at the end of the line. 232// 233// See also [GapBuffer.Col], [GapBuffer.RuneCol], which is the length to the 234// left of the cursor. 235func (g *GapBuffer) LineLength() int { 236 if g.lines.isLastLine() { 237 return g.lines.curLineLength() 238 } 239 240 return g.lines.curLineLength() - 1 241} 242 243// Return the line number of the current line the cursor is in. 244// 245// Numbering starts from 1. 246// 247// See also [GapBuffer.Col], [GapBuffer.RuneCol], [GapBuffer.LineRuneCol]. 248func (g *GapBuffer) Line() int { 249 return g.lines.curLine() 250} 251 252// Return the line and byte column of the cursor. Byte column means the number 253// of bytes from the start of the line to the cursor. 254// 255// Numbering starts from 1 for both the line number and the column number. 256// 257// See also [GapBuffer.Col], [GapBuffer.RuneCol], [GapBuffer.LineRuneCol], 258// [GapBuffer.Line]. 259func (g *GapBuffer) LineCol() (line int, col int) { 260 return g.lines.curLine(), g.Col() 261} 262 263// Return the line and rune column of the cursor. Rune column means the number 264// of unicode runes from the start of the line to the cursor. 265// 266// Numbering starts from 1 for both the line number and the column number. 267// 268// See also [GapBuffer.Line], [GapBuffer.Col], [GapBuffer.RuneCol], [GapBuffer.LineCol]. 269func (g *GapBuffer) LineRuneCol() (line int, runeCol int) { 270 return g.lines.curLine(), g.RuneCol() 271} 272 273// Delete the unicode rune to the left of the cursor. Like the "backspace" key. 274// 275// See also [GapBuffer.RightDel], [GapBuffer.LeftMv], [GapBuffer.RightMv], 276// [GapBuffer.UpMv], [GapBuffer.DownMv]. 277func (g *GapBuffer) LeftDel() { 278 if g.start < 1 { 279 return 280 } 281 282 r, rSize := utf8.DecodeLastRune(g.data[:g.start]) 283 g.start -= rSize 284 285 if r == '\n' { 286 g.lines.upDel() 287 } else { 288 g.lines.del(rSize) 289 } 290 291 g.wantsCol = g.RuneCol() 292} 293 294// Delete the unicode rune to the right of the cursor. Like the "delete" key. 295// 296// See also [GapBuffer.LeftDel], [GapBuffer.RightMv], [GapBuffer.LeftMv], 297// [GapBuffer.UpMv], [GapBuffer.DownMv]. 298func (g *GapBuffer) RightDel() { 299 if g.end > len(g.data)-1 { 300 return 301 } 302 303 r, rSize := utf8.DecodeRune(g.data[g.end:]) 304 g.end += rSize 305 306 if r == '\n' { 307 g.lines.downDel() 308 } else { 309 g.lines.del(rSize) 310 } 311} 312 313// Move the cursor one unicode rune to the left. 314// 315// See also [GapBuffer.RightMv], [GapBuffer.LeftDel], [GapBuffer.RightDel], 316// [GapBuffer.UpMv], [GapBuffer.DownMv]. 317func (g *GapBuffer) LeftMv() { 318 if g.start < 1 { 319 return 320 } 321 322 rChar, d := utf8.DecodeLastRune(g.data[:g.start]) 323 g.end -= d 324 325 _ = copy(g.data[g.end:], g.data[g.start-d:g.start]) 326 g.start -= d 327 328 if rChar == '\n' { 329 g.lines.up() 330 } 331 332 g.wantsCol = g.RuneCol() 333} 334 335// Move the cursor one unicode rune to the right. 336// 337// See also [GapBuffer.LeftMv], [GapBuffer.LeftDel], [GapBuffer.RightDel], 338// [GapBuffer.UpMv], [GapBuffer.DownMv]. 339func (g *GapBuffer) RightMv() { 340 if g.end > len(g.data)-1 { 341 return 342 } 343 344 r, d := utf8.DecodeRune(g.data[g.end:]) 345 _ = copy(g.data[g.start:], g.data[g.end:g.end+d]) 346 g.start += d 347 g.end += d 348 349 if r == '\n' { 350 g.lines.down() 351 } 352 353 g.wantsCol = g.RuneCol() 354} 355 356// Move the cursor up one line. 357// 358// The cursor "tries" to hold the current position in the new line, like we are 359// used to in text editors. 360// 361// Before the moves: 362// 363// Some text 364// No 365// More |text 366// 367// After the first move: 368// 369// Some text 370// No| 371// More text 372// 373// After the second move: 374// 375// Some |text 376// No 377// More text 378// 379// See also [GapBuffer.DownMv], [GapBuffer.LeftMv], [GapBuffer.RightMv], 380// [GapBuffer.LeftDel], [GapBuffer.RightDel]. 381func (g *GapBuffer) UpMv() { 382 if g.lines.curLine() == 1 { 383 return 384 } 385 386 g.lines.up() 387 lineStart := g.lines.curLineStart() 388 newStart := lineStart 389 maxRune := g.lines.curLineEnd() 390 runeCnt := 0 391 392 for idx := lineStart; idx < maxRune+1; { 393 newStart = idx 394 395 if runeCnt == g.wantsCol { 396 break 397 } 398 399 _, d := utf8.DecodeRune(g.data[idx:]) 400 idx += d 401 runeCnt++ 402 } 403 404 g.end -= (g.start - newStart) 405 _ = copy(g.data[g.end:], g.data[newStart:g.start]) 406 g.start = newStart 407} 408 409// Move the cursor down one line. 410// 411// The cursor "tries" to hold the current position in the new line, like we are 412// used to in text editors. 413// 414// Before the moves: 415// 416// Some |text 417// No 418// More text 419// 420// After the first move: 421// 422// Some text 423// No| 424// More text 425// 426// After the second move: 427// 428// Some text 429// No 430// More |text 431// 432// See also [GapBuffer.UpMv], [GapBuffer.LeftMv], [GapBuffer.RightMv], 433// [GapBuffer.LeftDel], [GapBuffer.RightDel]. 434func (g *GapBuffer) DownMv() { 435 if g.lines.isLastLine() { 436 return 437 } 438 439 newLine := g.lines.curLineEnd() + 1 - g.start 440 idx := newLine 441 runeCnt := 0 442 443 for g.end+idx < len(g.data) && g.data[g.end+idx] != '\n' { 444 if runeCnt == g.wantsCol { 445 break 446 } 447 448 _, d := utf8.DecodeRune(g.data[g.end+idx:]) 449 idx += d 450 runeCnt++ 451 452 if g.end+idx > len(g.data)-1 { 453 break 454 } 455 } 456 457 _ = copy(g.data[g.start:], g.data[g.end:g.end+idx]) 458 g.start += idx 459 g.end += idx 460 461 g.lines.down() 462} 463 464// grow resizes the gap buffer by `growFactor` times its current size and copies 465// the existing data. 466func (g *GapBuffer) grow() { 467 tmp := make([]byte, len(g.data)*growFactor) 468 _ = copy(tmp, g.data[:g.start]) 469 nE := len(tmp) - (len(g.data) - g.end) 470 _ = copy(tmp[nE:], g.data[g.end:]) 471 g.end = nE 472 g.data = tmp 473} 474 475// Insert inserts the given string at the current cursor position. 476// The string can be a single unicode scalar point or text of arbitrary size and 477// anything in between (like a single unicode rune). 478// 479// The cursor is moved to the end of the inserted text. 480func (g *GapBuffer) Insert(str string) { 481 if g.end-g.start < len(str)+1 { 482 g.grow() 483 } 484 485 g.lines.insert(str, g.start) 486 l := copy(g.data[g.start:], str) 487 g.start += l 488 g.wantsCol = g.RuneCol() 489}