A gap buffer implementation in Go.
1// SPDX-FileCopyrightText: Copyright 2024 Roland Csaszar
2// SPDX-License-Identifier: MIT
3//
4// Project: go-gap-buffer
5// File: gap-buffer.go
6// Date: 07.Feb.2024
7//
8// =============================================================================
9
10// This library implements a gap buffer, which is a data structure to be used as
11// the container of the text for a (simple or not so simple) text editor.
12//
13// Movements to the left or right and deletion to the left and right work on
14// Unicode runes. It supports line movements (up and down a line from the current
15// one), it splits lines based on the newline character '\n'.
16//
17// Caveats:
18// - Only line feed '\n' line endings are supported by this library, so
19// Windows-style CR-LF (`\r\n`) line endings will not work with movements
20// up and down a line. Single Unicode rune movements are going to work, but
21// the CR '\r' character is handled as a "normal" character, as part of the
22// string.
23// - This implementation is not thread save
24// - A gap buffer is not ideal for using multiple cursors, as that would
25// involve multiple jumps and copying of data in the gap buffer
26//
27// A gap buffer is an array with a "gap" - unused elements in the array - at the
28// cursor position, where text is to be inserted and deleted.
29//
30// The string "Hello world!" with the cursor at the end of "Hello" -
31// "Hello| world!" - looks like this in a gap buffer array:
32//
33// Hello|< gap start, the cursor position gap end >| world!
34//
35// ['H', 'e', 'l', 'l', 'o', 0, 0, 0, 0, 0, ' ', 'w', 'o', 'r', 'l', 'd', '!']
36// 0 1 2 3 4 | gap | 5 6 7 8 9 10 11
37//
38// Movement in the gap buffer works by moving the start and end of the gap, same
39// with deletion of unicode runes in both directions.
40//
41// Moving the cursor two runes to the left:
42//
43// Hel|< gap start, the cursor position gap end >|lo world!
44//
45// ['H', 'e', 'l', 0, 0, 0, 0, 0, 'l', 'o', ' ', 'w', 'o', 'r', 'l', 'd', '!']
46// 0 1 2 | gap | 3 4 5 6 7 8 9 10 11
47//
48// Deleting three runes to the left:
49//
50// |< gap start, the cursor position gap end >|lo world!
51//
52// ['H', 'e', 'l', 0, 0, 0, 0, 0, 'l', 'o', ' ', 'w', 'o', 'r', 'l', 'd', '!']
53// | gap | 1 2 3 4 5 6 7 8 9
54//
55// Insertion happens at the cursor position by appending at the start of the gap
56// and moving the start of the gap accordingly.
57//
58// New|< gap start, the cursor position gap end >|lo world!
59//
60// ['N', 'e', 'w', 0, 0, 0, 0, 0, 'l', 'o', ' ', 'w', 'o', 'r', 'l', 'd', '!']
61// 0 1 2 | gap | 3 4 5 6 7 8 9 10 11
62//
63// Implementation Detail:
64// The line lengths are stored in a another gap buffer, which is synchronized
65// with the gap buffer of the text itself.
66package gapbuffer
67
68import (
69 "strings"
70 "unicode/utf8"
71)
72
73// GapBuffer represents a gap buffer.
74type GapBuffer struct {
75 // The index in the gap buffer `GapBuffer.data` of the start of the gap.
76 // The position of the cursor.
77 start int
78
79 // The index in the gap buffer `GapBuffer.data` of the end of the gap.
80 // The position of the first unicode scalar point after the cursor.
81 end int
82
83 // `wantsCol` is the rune column (not byte column!) the cursor wants to hold
84 // when going up or down.
85 wantsCol int
86
87 // The lineBuffer that stores the line length information of the gap buffer.
88 //
89 // See [lineBuffer].
90 lines lineBuffer
91
92 // The data of the gap buffer.
93 data []byte
94}
95
96const (
97 defaultCapacity = 1024 // The default size of a gap buffer in bytes.
98
99 // 1/10th of the capacity of the gap buffer, default: 102.
100 lineCapFactor = 10
101
102 // Minimum size in int of the line buffer `GapBuffer.lines`. A lineBuffer
103 // has at least this size, even if the [lineCapFactor] would yield a smaller
104 // one.
105 minLineCap = 10
106
107 // The factor by which to grow the gap buffer and line buffer, if needed.
108 growFactor = 2
109)
110
111// Return the contents of the gap buffer as a string.
112func (g *GapBuffer) String() string {
113 var builder strings.Builder
114
115 builder.Grow(len(g.data) - (g.end - g.start))
116 builder.Write(g.data[:g.start])
117 builder.Write(g.data[g.end:])
118
119 return builder.String()
120}
121
122// Return the contents of the gap buffer as two strings. The part to the left of
123// the cursor is returned in `left` and the part to the right of the cursor is
124// returned in `right`.
125func (g *GapBuffer) StringPair() (left string, right string) {
126 return string(g.data[:g.start]), string(g.data[g.end:])
127}
128
129// Return the length in bytes of the contents of the gap buffer.
130func (g *GapBuffer) StringLength() int {
131 return len(g.data) - (g.end - g.start)
132}
133
134// Construct a new GapBuffer from a capacity. The capacity is the number of
135// bytes the gap buffer can hold without a resize.
136//
137// The default size is 1024 bytes, if you know that you need less or more space,
138// you can set the initial size to something more appropriate.
139//
140// See also [New], [NewStr], [NewStrCap].
141func NewCap(size int) *GapBuffer {
142 return &GapBuffer{
143 start: 0,
144 end: size,
145 wantsCol: 0,
146 data: make([]byte, size),
147 lines: *newLineBuf(size),
148 }
149}
150
151// Construct a new, empty GapBuffer with the default capacity.
152//
153// See also [NewCap], [NewStr], [NewStrCap].
154func New() *GapBuffer {
155 return NewCap(defaultCapacity)
156}
157
158// Construct a new GapBuffer from a string and a capacity. The cursor position
159// is set to the end of the string. The capacity is the number of bytes the gap
160// buffer can hold without a resize.
161//
162// The default size is 1024 bytes, if you know that you need less or more space,
163// you can set the initial size to something more appropriate.
164//
165// See also [New], [NewCap], [NewStr], [GapBuffer.Size].
166func NewStrCap(str string, c int) *GapBuffer {
167 if str == "" {
168 return NewCap(c)
169 }
170
171 size := len(str) * growFactor
172 if size < c {
173 size = c
174 }
175
176 dat := make([]byte, size)
177 sIdx := copy(dat, str)
178 lines := newLineBufStr(str, size)
179 runeCol := 0
180 lineStart := lines.curLineStart()
181
182 if lineStart < sIdx {
183 runeCol = utf8.RuneCount(dat[lineStart:sIdx])
184 }
185
186 return &GapBuffer{
187 start: sIdx,
188 end: size,
189 wantsCol: runeCol,
190 data: dat,
191 lines: *lines,
192 }
193}
194
195// Construct a new GapBuffer from a string. The cursor position is set to the
196// end of the string.
197//
198// See also [New], [NewCap], [NewStrCap].
199func NewStr(s string) *GapBuffer {
200 return NewStrCap(s, defaultCapacity)
201}
202
203// Return the current number of bytes in the buffer, including the "empty" space
204// in the gap.
205func (g *GapBuffer) Size() int {
206 return len(g.data)
207}
208
209// Return the byte column of the cursor, the number of bytes from the start of
210// the line to the cursor.
211//
212// Numbering starts from 1.
213//
214// See also [GapBuffer.RuneCol], [GapBuffer.LineCol], [GapBuffer.LineRuneCol].
215func (g *GapBuffer) Col() int {
216 return g.start - g.lines.curLineStart()
217}
218
219// Return the rune column of the cursor, the number of unicode runes from the
220// start of the line to the cursor.
221//
222// Numbering starts from 1.
223//
224// See also [GapBuffer.Col], [GapBuffer.LineCol], [GapBuffer.LineRuneCol].
225func (g *GapBuffer) RuneCol() int {
226 return utf8.RuneCount(g.data[g.lines.curLineStart():g.start])
227}
228
229// Return the length of the current line the cursor is in, in bytes.
230// This returns the "whole" line length, including the part to the right of the
231// cursor but without the newline character at the end of the line.
232//
233// See also [GapBuffer.Col], [GapBuffer.RuneCol], which is the length to the
234// left of the cursor.
235func (g *GapBuffer) LineLength() int {
236 if g.lines.isLastLine() {
237 return g.lines.curLineLength()
238 }
239
240 return g.lines.curLineLength() - 1
241}
242
243// Return the line number of the current line the cursor is in.
244//
245// Numbering starts from 1.
246//
247// See also [GapBuffer.Col], [GapBuffer.RuneCol], [GapBuffer.LineRuneCol].
248func (g *GapBuffer) Line() int {
249 return g.lines.curLine()
250}
251
252// Return the line and byte column of the cursor. Byte column means the number
253// of bytes from the start of the line to the cursor.
254//
255// Numbering starts from 1 for both the line number and the column number.
256//
257// See also [GapBuffer.Col], [GapBuffer.RuneCol], [GapBuffer.LineRuneCol],
258// [GapBuffer.Line].
259func (g *GapBuffer) LineCol() (line int, col int) {
260 return g.lines.curLine(), g.Col()
261}
262
263// Return the line and rune column of the cursor. Rune column means the number
264// of unicode runes from the start of the line to the cursor.
265//
266// Numbering starts from 1 for both the line number and the column number.
267//
268// See also [GapBuffer.Line], [GapBuffer.Col], [GapBuffer.RuneCol], [GapBuffer.LineCol].
269func (g *GapBuffer) LineRuneCol() (line int, runeCol int) {
270 return g.lines.curLine(), g.RuneCol()
271}
272
273// Delete the unicode rune to the left of the cursor. Like the "backspace" key.
274//
275// See also [GapBuffer.RightDel], [GapBuffer.LeftMv], [GapBuffer.RightMv],
276// [GapBuffer.UpMv], [GapBuffer.DownMv].
277func (g *GapBuffer) LeftDel() {
278 if g.start < 1 {
279 return
280 }
281
282 r, rSize := utf8.DecodeLastRune(g.data[:g.start])
283 g.start -= rSize
284
285 if r == '\n' {
286 g.lines.upDel()
287 } else {
288 g.lines.del(rSize)
289 }
290
291 g.wantsCol = g.RuneCol()
292}
293
294// Delete the unicode rune to the right of the cursor. Like the "delete" key.
295//
296// See also [GapBuffer.LeftDel], [GapBuffer.RightMv], [GapBuffer.LeftMv],
297// [GapBuffer.UpMv], [GapBuffer.DownMv].
298func (g *GapBuffer) RightDel() {
299 if g.end > len(g.data)-1 {
300 return
301 }
302
303 r, rSize := utf8.DecodeRune(g.data[g.end:])
304 g.end += rSize
305
306 if r == '\n' {
307 g.lines.downDel()
308 } else {
309 g.lines.del(rSize)
310 }
311}
312
313// Move the cursor one unicode rune to the left.
314//
315// See also [GapBuffer.RightMv], [GapBuffer.LeftDel], [GapBuffer.RightDel],
316// [GapBuffer.UpMv], [GapBuffer.DownMv].
317func (g *GapBuffer) LeftMv() {
318 if g.start < 1 {
319 return
320 }
321
322 rChar, d := utf8.DecodeLastRune(g.data[:g.start])
323 g.end -= d
324
325 _ = copy(g.data[g.end:], g.data[g.start-d:g.start])
326 g.start -= d
327
328 if rChar == '\n' {
329 g.lines.up()
330 }
331
332 g.wantsCol = g.RuneCol()
333}
334
335// Move the cursor one unicode rune to the right.
336//
337// See also [GapBuffer.LeftMv], [GapBuffer.LeftDel], [GapBuffer.RightDel],
338// [GapBuffer.UpMv], [GapBuffer.DownMv].
339func (g *GapBuffer) RightMv() {
340 if g.end > len(g.data)-1 {
341 return
342 }
343
344 r, d := utf8.DecodeRune(g.data[g.end:])
345 _ = copy(g.data[g.start:], g.data[g.end:g.end+d])
346 g.start += d
347 g.end += d
348
349 if r == '\n' {
350 g.lines.down()
351 }
352
353 g.wantsCol = g.RuneCol()
354}
355
356// Move the cursor up one line.
357//
358// The cursor "tries" to hold the current position in the new line, like we are
359// used to in text editors.
360//
361// Before the moves:
362//
363// Some text
364// No
365// More |text
366//
367// After the first move:
368//
369// Some text
370// No|
371// More text
372//
373// After the second move:
374//
375// Some |text
376// No
377// More text
378//
379// See also [GapBuffer.DownMv], [GapBuffer.LeftMv], [GapBuffer.RightMv],
380// [GapBuffer.LeftDel], [GapBuffer.RightDel].
381func (g *GapBuffer) UpMv() {
382 if g.lines.curLine() == 1 {
383 return
384 }
385
386 g.lines.up()
387 lineStart := g.lines.curLineStart()
388 newStart := lineStart
389 maxRune := g.lines.curLineEnd()
390 runeCnt := 0
391
392 for idx := lineStart; idx < maxRune+1; {
393 newStart = idx
394
395 if runeCnt == g.wantsCol {
396 break
397 }
398
399 _, d := utf8.DecodeRune(g.data[idx:])
400 idx += d
401 runeCnt++
402 }
403
404 g.end -= (g.start - newStart)
405 _ = copy(g.data[g.end:], g.data[newStart:g.start])
406 g.start = newStart
407}
408
409// Move the cursor down one line.
410//
411// The cursor "tries" to hold the current position in the new line, like we are
412// used to in text editors.
413//
414// Before the moves:
415//
416// Some |text
417// No
418// More text
419//
420// After the first move:
421//
422// Some text
423// No|
424// More text
425//
426// After the second move:
427//
428// Some text
429// No
430// More |text
431//
432// See also [GapBuffer.UpMv], [GapBuffer.LeftMv], [GapBuffer.RightMv],
433// [GapBuffer.LeftDel], [GapBuffer.RightDel].
434func (g *GapBuffer) DownMv() {
435 if g.lines.isLastLine() {
436 return
437 }
438
439 newLine := g.lines.curLineEnd() + 1 - g.start
440 idx := newLine
441 runeCnt := 0
442
443 for g.end+idx < len(g.data) && g.data[g.end+idx] != '\n' {
444 if runeCnt == g.wantsCol {
445 break
446 }
447
448 _, d := utf8.DecodeRune(g.data[g.end+idx:])
449 idx += d
450 runeCnt++
451
452 if g.end+idx > len(g.data)-1 {
453 break
454 }
455 }
456
457 _ = copy(g.data[g.start:], g.data[g.end:g.end+idx])
458 g.start += idx
459 g.end += idx
460
461 g.lines.down()
462}
463
464// grow resizes the gap buffer by `growFactor` times its current size and copies
465// the existing data.
466func (g *GapBuffer) grow() {
467 tmp := make([]byte, len(g.data)*growFactor)
468 _ = copy(tmp, g.data[:g.start])
469 nE := len(tmp) - (len(g.data) - g.end)
470 _ = copy(tmp[nE:], g.data[g.end:])
471 g.end = nE
472 g.data = tmp
473}
474
475// Insert inserts the given string at the current cursor position.
476// The string can be a single unicode scalar point or text of arbitrary size and
477// anything in between (like a single unicode rune).
478//
479// The cursor is moved to the end of the inserted text.
480func (g *GapBuffer) Insert(str string) {
481 if g.end-g.start < len(str)+1 {
482 g.grow()
483 }
484
485 g.lines.insert(str, g.start)
486 l := copy(g.data[g.start:], str)
487 g.start += l
488 g.wantsCol = g.RuneCol()
489}