fork of go-gitdiff with jj support

Add LineReaderAt interface and implementation

This is a line-oriented parallel to io.ReaderAt, meant for text applies.
While the mapping isn't quite as clean as in the binary case, a text
apply still reads a fixed chunk of lines starting at a specific line
number and modifies them. This also allows a consistent interface for
strict and fuzzy applies.

The implementation wraps an io.ReaderAt and reads data in chunks,
indexing line boundaries as it goes. This is probably not the most
efficient way to implement this interface, but it works and allows file
application to take a consistent interface.

Changed files
+132
gitdiff
+132
gitdiff/io.go
··· 2 2 3 3 import ( 4 4 "bufio" 5 + "errors" 5 6 "fmt" 6 7 "io" 7 8 ) ··· 77 78 panic(fmt.Sprintf("%T does not implement io.Reader and is not a gitdiff wrapper", lr)) 78 79 } 79 80 } 81 + 82 + // LineReaderAt is the interface that wraps the ReadLinesAt method. 83 + // 84 + // ReadLinesAt reads len(lines) into lines starting at line offset in the 85 + // input source. It returns number of full lines read (0 <= n <= len(lines)) 86 + // and any error encountered. Line numbers are zero-indexed. 87 + // 88 + // If n < len(lines), ReadLinesAt returns a non-nil error explaining why more 89 + // lines were not returned. 90 + // 91 + // Each full line includes the line ending character(s). If the last line of 92 + // the input does not have a line ending character, ReadLinesAt returns the 93 + // content of the line and io.EOF. 94 + // 95 + // If the content of the input source changes after the first call to 96 + // ReadLinesAt, the behavior of future calls is undefined. 97 + type LineReaderAt interface { 98 + ReadLinesAt(lines [][]byte, offset int64) (n int, err error) 99 + } 100 + 101 + // NewLineReaderAt creates a LineReaderAt from an io.ReaderAt. 102 + func NewLineReaderAt(r io.ReaderAt) LineReaderAt { 103 + return &lineReaderAt{r: r} 104 + } 105 + 106 + type lineReaderAt struct { 107 + r io.ReaderAt 108 + index []int64 109 + eof bool 110 + } 111 + 112 + func (r *lineReaderAt) ReadLinesAt(lines [][]byte, offset int64) (n int, err error) { 113 + // TODO(bkeyes): revisit variable names 114 + // - it's generally not clear when something is bytes vs lines 115 + // - offset is a good example of this 116 + 117 + if len(lines) == 0 { 118 + return 0, nil 119 + } 120 + 121 + endLine := offset + int64(len(lines)) 122 + if endLine > int64(len(r.index)) && !r.eof { 123 + if err := r.indexTo(endLine); err != nil { 124 + return 0, err 125 + } 126 + } 127 + if offset > int64(len(r.index)) { 128 + return 0, io.EOF 129 + } 130 + 131 + // TODO(bkeyes): check usage of int / int64 132 + // - interface uses int64 for arbitrarily large files 133 + // - implementation is limited to int lines by index array 134 + 135 + // offset <= len(r.index) means that it must fit in int without loss 136 + size, readOffset := lookupLines(r.index, int(offset), len(lines)) 137 + 138 + b := make([]byte, size) 139 + if _, err := r.r.ReadAt(b, readOffset); err != nil { 140 + if err == io.EOF { 141 + err = errors.New("ReadLinesAt: corrupt line index or changed source data") 142 + } 143 + return 0, err 144 + } 145 + 146 + for n = 0; n < len(lines) && offset+int64(n) < int64(len(r.index)); n++ { 147 + i := int(offset) + n 148 + start, end := readOffset, r.index[i] 149 + if i > 0 { 150 + start = r.index[i-1] 151 + } 152 + lines[n] = b[start-readOffset : end-readOffset] 153 + } 154 + 155 + if n < len(lines) || b[size-1] != '\n' { 156 + return n, io.EOF 157 + } 158 + return n, nil 159 + } 160 + 161 + // indexTo reads data and computes the line index until there is information 162 + // for line or a read returns io.EOF. It returns an error if and only if there 163 + // is an error reading data. 164 + func (r *lineReaderAt) indexTo(line int64) error { 165 + var buf [1024]byte 166 + 167 + var offset int64 168 + if len(r.index) > 0 { 169 + offset = r.index[len(r.index)-1] 170 + } 171 + 172 + for int64(len(r.index)) < line { 173 + n, err := r.r.ReadAt(buf[:], offset) 174 + if err != nil && err != io.EOF { 175 + return err 176 + } 177 + for _, b := range buf[:n] { 178 + offset++ 179 + if b == '\n' { 180 + r.index = append(r.index, offset) 181 + } 182 + } 183 + if err == io.EOF { 184 + if n > 0 && buf[n-1] != '\n' { 185 + r.index = append(r.index, offset) 186 + } 187 + r.eof = true 188 + break 189 + } 190 + } 191 + return nil 192 + } 193 + 194 + // lookupLines gets the byte offset and size of a range of lines from an index 195 + // where the value at n is the offset of the first byte after line number n. 196 + func lookupLines(index []int64, start, n int) (size int64, offset int64) { 197 + if start > len(index) { 198 + offset = index[len(index)-1] 199 + } else if start > 0 { 200 + offset = index[start-1] 201 + } 202 + if n > 0 { 203 + // TODO(bkeyes): check types for overflow 204 + if start+n > len(index) { 205 + size = index[len(index)-1] - offset 206 + } else { 207 + size = index[start+n-1] - offset 208 + } 209 + } 210 + return 211 + }