1// Copyright 2021 The Gitea Authors. All rights reserved.
2// SPDX-License-Identifier: MIT
3
4package gitdiff
5
6import (
7 "encoding/csv"
8 "errors"
9 "io"
10)
11
12const (
13 unmappedColumn = -1
14 maxRowsToInspect int = 10
15 minRatioToMatch float32 = 0.8
16)
17
18// TableDiffCellType represents the type of a TableDiffCell.
19type TableDiffCellType uint8
20
21// TableDiffCellType possible values.
22const (
23 TableDiffCellUnchanged TableDiffCellType = iota + 1
24 TableDiffCellChanged
25 TableDiffCellAdd
26 TableDiffCellDel
27 TableDiffCellMovedUnchanged
28 TableDiffCellMovedChanged
29)
30
31// TableDiffCell represents a cell of a TableDiffRow
32type TableDiffCell struct {
33 LeftCell string
34 RightCell string
35 Type TableDiffCellType
36}
37
38// TableDiffRow represents a row of a TableDiffSection.
39type TableDiffRow struct {
40 RowIdx int
41 Cells []*TableDiffCell
42}
43
44// TableDiffSection represents a section of a DiffFile.
45type TableDiffSection struct {
46 Rows []*TableDiffRow
47}
48
49// csvReader wraps a csv.Reader which buffers the first rows.
50type csvReader struct {
51 reader *csv.Reader
52 buffer [][]string
53 line int
54 eof bool
55}
56
57// ErrorUndefinedCell is for when a row, column coordinates do not exist in the CSV
58var ErrorUndefinedCell = errors.New("undefined cell")
59
60// createCsvReader creates a csvReader and fills the buffer
61func createCsvReader(reader *csv.Reader, bufferRowCount int) (*csvReader, error) {
62 csv := &csvReader{reader: reader}
63 csv.buffer = make([][]string, bufferRowCount)
64 for i := 0; i < bufferRowCount && !csv.eof; i++ {
65 row, err := csv.readNextRow()
66 if err != nil {
67 return nil, err
68 }
69 csv.buffer[i] = row
70 }
71 csv.line = bufferRowCount
72 return csv, nil
73}
74
75// GetRow gets a row from the buffer if present or advances the reader to the requested row. On the end of the file only nil gets returned.
76func (csv *csvReader) GetRow(row int) ([]string, error) {
77 if row < len(csv.buffer) && row >= 0 {
78 return csv.buffer[row], nil
79 }
80 if csv.eof {
81 return nil, nil
82 }
83 for {
84 fields, err := csv.readNextRow()
85 if err != nil {
86 return nil, err
87 }
88 if csv.eof {
89 return nil, nil
90 }
91 csv.line++
92 if csv.line-1 == row {
93 return fields, nil
94 }
95 }
96}
97
98func (csv *csvReader) readNextRow() ([]string, error) {
99 if csv.eof {
100 return nil, nil
101 }
102 row, err := csv.reader.Read()
103 if err != nil {
104 if err != io.EOF {
105 return nil, err
106 }
107 csv.eof = true
108 }
109 return row, nil
110}
111
112// CreateCsvDiff creates a tabular diff based on two CSV readers.
113func CreateCsvDiff(diffFile *DiffFile, baseReader, headReader *csv.Reader) ([]*TableDiffSection, error) {
114 if baseReader != nil && headReader != nil {
115 return createCsvDiff(diffFile, baseReader, headReader)
116 }
117
118 if baseReader != nil {
119 return createCsvDiffSingle(baseReader, TableDiffCellDel)
120 }
121 return createCsvDiffSingle(headReader, TableDiffCellAdd)
122}
123
124// createCsvDiffSingle creates a tabular diff based on a single CSV reader. All cells are added or deleted.
125func createCsvDiffSingle(reader *csv.Reader, celltype TableDiffCellType) ([]*TableDiffSection, error) {
126 var rows []*TableDiffRow
127 i := 1
128 for {
129 row, err := reader.Read()
130 if err != nil {
131 if err == io.EOF {
132 break
133 }
134 return nil, err
135 }
136 cells := make([]*TableDiffCell, len(row))
137 for j := 0; j < len(row); j++ {
138 if celltype == TableDiffCellDel {
139 cells[j] = &TableDiffCell{LeftCell: row[j], Type: celltype}
140 } else {
141 cells[j] = &TableDiffCell{RightCell: row[j], Type: celltype}
142 }
143 }
144 rows = append(rows, &TableDiffRow{RowIdx: i, Cells: cells})
145 i++
146 }
147
148 return []*TableDiffSection{{Rows: rows}}, nil
149}
150
151func createCsvDiff(diffFile *DiffFile, baseReader, headReader *csv.Reader) ([]*TableDiffSection, error) {
152 // Given the baseReader and headReader, we are going to create CSV Reader for each, baseCSVReader and b respectively
153 baseCSVReader, err := createCsvReader(baseReader, maxRowsToInspect)
154 if err != nil {
155 return nil, err
156 }
157 headCSVReader, err := createCsvReader(headReader, maxRowsToInspect)
158 if err != nil {
159 return nil, err
160 }
161
162 // Initializing the mappings of base to head (a2bColMap) and head to base (b2aColMap) columns
163 a2bColMap, b2aColMap := getColumnMapping(baseCSVReader, headCSVReader)
164
165 // Determines how many cols there will be in the diff table, which includes deleted columns from base and added columns to base
166 numDiffTableCols := len(a2bColMap) + countUnmappedColumns(b2aColMap)
167 if len(a2bColMap) < len(b2aColMap) {
168 numDiffTableCols = len(b2aColMap) + countUnmappedColumns(a2bColMap)
169 }
170
171 // createDiffTableRow takes the row # of the `a` line and `b` line of a diff (starting from 1), 0 if the line doesn't exist (undefined)
172 // in the base or head respectively.
173 // Returns a TableDiffRow which has the row index
174 createDiffTableRow := func(aLineNum, bLineNum int) (*TableDiffRow, error) {
175 // diffTableCells is a row of the diff table. It will have a cells for added, deleted, changed, and unchanged content, thus either
176 // the same size as the head table or bigger
177 diffTableCells := make([]*TableDiffCell, numDiffTableCols)
178 var bRow *[]string
179 if bLineNum > 0 {
180 row, err := headCSVReader.GetRow(bLineNum - 1)
181 if err != nil {
182 return nil, err
183 }
184 bRow = &row
185 }
186 var aRow *[]string
187 if aLineNum > 0 {
188 row, err := baseCSVReader.GetRow(aLineNum - 1)
189 if err != nil {
190 return nil, err
191 }
192 aRow = &row
193 }
194 if aRow == nil && bRow == nil {
195 // No content
196 return nil, nil
197 }
198
199 aIndex := 0 // tracks where we are in the a2bColMap
200 bIndex := 0 // tracks where we are in the b2aColMap
201 colsAdded := 0 // incremented whenever we found a column was added
202 colsDeleted := 0 // incrememted whenever a column was deleted
203
204 // We loop until both the aIndex and bIndex are greater than their col map, which then we are done
205 for aIndex < len(a2bColMap) || bIndex < len(b2aColMap) {
206 // Starting from where aIndex is currently pointing, we see if the map is -1 (dleeted) and if is, create column to note that, increment, and look at the next aIndex
207 for aIndex < len(a2bColMap) && a2bColMap[aIndex] == -1 && (bIndex >= len(b2aColMap) || aIndex <= bIndex) {
208 var aCell string
209 if aRow != nil {
210 if cell, err := getCell(*aRow, aIndex); err != nil {
211 if err != ErrorUndefinedCell {
212 return nil, err
213 }
214 } else {
215 aCell = cell
216 }
217 }
218 diffTableCells[bIndex+colsDeleted] = &TableDiffCell{LeftCell: aCell, Type: TableDiffCellDel}
219 aIndex++
220 colsDeleted++
221 }
222
223 // aIndex is now pointing to a column that also exists in b, or is at the end of a2bColMap. If the former,
224 // we can just increment aIndex until it points to a -1 column or one greater than the current bIndex
225 for aIndex < len(a2bColMap) && a2bColMap[aIndex] != -1 {
226 aIndex++
227 }
228
229 // Starting from where bIndex is currently pointing, we see if the map is -1 (added) and if is, create column to note that, increment, and look at the next aIndex
230 for bIndex < len(b2aColMap) && b2aColMap[bIndex] == -1 && (aIndex >= len(a2bColMap) || bIndex < aIndex) {
231 var bCell string
232 cellType := TableDiffCellAdd
233 if bRow != nil {
234 if cell, err := getCell(*bRow, bIndex); err != nil {
235 if err != ErrorUndefinedCell {
236 return nil, err
237 }
238 } else {
239 bCell = cell
240 }
241 } else {
242 cellType = TableDiffCellDel
243 }
244 diffTableCells[bIndex+colsDeleted] = &TableDiffCell{RightCell: bCell, Type: cellType}
245 bIndex++
246 colsAdded++
247 }
248
249 // aIndex is now pointing to a column that also exists in a, or is at the end of b2aColMap. If the former,
250 // we get the a col and b col values (if they exist), figure out if they are the same or not, and if the column moved, and add it to the diff table
251 for bIndex < len(b2aColMap) && b2aColMap[bIndex] != -1 && (aIndex >= len(a2bColMap) || bIndex < aIndex) {
252 var diffTableCell TableDiffCell
253
254 var aCell *string
255 // get the aCell value if the aRow exists
256 if aRow != nil {
257 if cell, err := getCell(*aRow, b2aColMap[bIndex]); err != nil {
258 if err != ErrorUndefinedCell {
259 return nil, err
260 }
261 } else {
262 aCell = &cell
263 diffTableCell.LeftCell = cell
264 }
265 } else {
266 diffTableCell.Type = TableDiffCellAdd
267 }
268
269 var bCell *string
270 // get the bCell value if the bRow exists
271 if bRow != nil {
272 if cell, err := getCell(*bRow, bIndex); err != nil {
273 if err != ErrorUndefinedCell {
274 return nil, err
275 }
276 } else {
277 bCell = &cell
278 diffTableCell.RightCell = cell
279 }
280 } else {
281 diffTableCell.Type = TableDiffCellDel
282 }
283
284 // if both a and b have a row that exists, compare the value and determine if the row has moved
285 if aCell != nil && bCell != nil {
286 moved := ((bIndex + colsDeleted) != (b2aColMap[bIndex] + colsAdded))
287 if *aCell != *bCell {
288 if moved {
289 diffTableCell.Type = TableDiffCellMovedChanged
290 } else {
291 diffTableCell.Type = TableDiffCellChanged
292 }
293 } else {
294 if moved {
295 diffTableCell.Type = TableDiffCellMovedUnchanged
296 } else {
297 diffTableCell.Type = TableDiffCellUnchanged
298 }
299 diffTableCell.LeftCell = ""
300 }
301 }
302
303 // Add the diff column to the diff row
304 diffTableCells[bIndex+colsDeleted] = &diffTableCell
305 bIndex++
306 }
307 }
308
309 return &TableDiffRow{RowIdx: bLineNum, Cells: diffTableCells}, nil
310 }
311
312 // diffTableSections are TableDiffSections which represent the diffTableSections we get when doing a diff, each will be its own table in the view
313 var diffTableSections []*TableDiffSection
314
315 for i, section := range diffFile.Sections {
316 // Each section has multiple diffTableRows
317 var diffTableRows []*TableDiffRow
318 lines := tryMergeLines(section.Lines)
319 // Loop through the merged lines to get each row of the CSV diff table for this section
320 for j, line := range lines {
321 if i == 0 && j == 0 && (line[0] != 1 || line[1] != 1) {
322 diffTableRow, err := createDiffTableRow(1, 1)
323 if err != nil {
324 return nil, err
325 }
326 if diffTableRow != nil {
327 diffTableRows = append(diffTableRows, diffTableRow)
328 }
329 }
330 diffTableRow, err := createDiffTableRow(line[0], line[1])
331 if err != nil {
332 return nil, err
333 }
334 if diffTableRow != nil {
335 diffTableRows = append(diffTableRows, diffTableRow)
336 }
337 }
338
339 if len(diffTableRows) > 0 {
340 diffTableSections = append(diffTableSections, &TableDiffSection{Rows: diffTableRows})
341 }
342 }
343
344 return diffTableSections, nil
345}
346
347// getColumnMapping creates a mapping of columns between a and b
348func getColumnMapping(baseCSVReader, headCSVReader *csvReader) ([]int, []int) {
349 baseRow, _ := baseCSVReader.GetRow(0)
350 headRow, _ := headCSVReader.GetRow(0)
351
352 base2HeadColMap := []int{}
353 head2BaseColMap := []int{}
354
355 if baseRow != nil {
356 base2HeadColMap = make([]int, len(baseRow))
357 }
358 if headRow != nil {
359 head2BaseColMap = make([]int, len(headRow))
360 }
361
362 // Initializes all head2base mappings to be unmappedColumn (-1)
363 for i := 0; i < len(head2BaseColMap); i++ {
364 head2BaseColMap[i] = unmappedColumn
365 }
366
367 // Loops through the baseRow and see if there is a match in the head row
368 for i := 0; i < len(baseRow); i++ {
369 base2HeadColMap[i] = unmappedColumn
370 baseCell, err := getCell(baseRow, i)
371 if err == nil {
372 for j := 0; j < len(headRow); j++ {
373 if head2BaseColMap[j] == -1 {
374 headCell, err := getCell(headRow, j)
375 if err == nil && baseCell == headCell {
376 base2HeadColMap[i] = j
377 head2BaseColMap[j] = i
378 break
379 }
380 }
381 }
382 }
383 }
384
385 tryMapColumnsByContent(baseCSVReader, base2HeadColMap, headCSVReader, head2BaseColMap)
386 tryMapColumnsByContent(headCSVReader, head2BaseColMap, baseCSVReader, base2HeadColMap)
387
388 return base2HeadColMap, head2BaseColMap
389}
390
391// tryMapColumnsByContent tries to map missing columns by the content of the first lines.
392func tryMapColumnsByContent(baseCSVReader *csvReader, base2HeadColMap []int, headCSVReader *csvReader, head2BaseColMap []int) {
393 for i := 0; i < len(base2HeadColMap); i++ {
394 headStart := 0
395 for base2HeadColMap[i] == unmappedColumn && headStart < len(head2BaseColMap) {
396 if head2BaseColMap[headStart] == unmappedColumn {
397 rows := min(maxRowsToInspect, max(0, min(len(baseCSVReader.buffer), len(headCSVReader.buffer))-1))
398 same := 0
399 for j := 1; j <= rows; j++ {
400 baseCell, baseErr := getCell(baseCSVReader.buffer[j], i)
401 headCell, headErr := getCell(headCSVReader.buffer[j], headStart)
402 if baseErr == nil && headErr == nil && baseCell == headCell {
403 same++
404 }
405 }
406 if (float32(same) / float32(rows)) > minRatioToMatch {
407 base2HeadColMap[i] = headStart
408 head2BaseColMap[headStart] = i
409 }
410 }
411 headStart++
412 }
413 }
414}
415
416// getCell returns the specific cell or nil if not present.
417func getCell(row []string, column int) (string, error) {
418 if column < len(row) {
419 return row[column], nil
420 }
421 return "", ErrorUndefinedCell
422}
423
424// countUnmappedColumns returns the count of unmapped columns.
425func countUnmappedColumns(mapping []int) int {
426 count := 0
427 for i := 0; i < len(mapping); i++ {
428 if mapping[i] == unmappedColumn {
429 count++
430 }
431 }
432 return count
433}
434
435// tryMergeLines maps the separated line numbers of a git diff. The result is assumed to be ordered.
436func tryMergeLines(lines []*DiffLine) [][2]int {
437 ids := make([][2]int, len(lines))
438
439 i := 0
440 for _, line := range lines {
441 if line.Type != DiffLineSection {
442 ids[i][0] = line.LeftIdx
443 ids[i][1] = line.RightIdx
444 i++
445 }
446 }
447
448 ids = ids[:i]
449
450 result := make([][2]int, len(ids))
451
452 j := 0
453 for i = 0; i < len(ids); i++ {
454 if ids[i][0] == 0 {
455 if j > 0 && result[j-1][1] == 0 {
456 temp := j
457 for temp > 0 && result[temp-1][1] == 0 {
458 temp--
459 }
460 result[temp][1] = ids[i][1]
461 continue
462 }
463 }
464 result[j] = ids[i]
465 j++
466 }
467
468 return result[:j]
469}