Serenity Operating System
1/*
2 * Copyright (c) 2021, Mustafa Quraish <mustafa@serenityos.org>
3 *
4 * SPDX-License-Identifier: BSD-2-Clause
5 */
6
7#include "Generator.h"
8
9namespace Diff {
10
11Vector<Hunk> from_text(StringView old_text, StringView new_text)
12{
13 auto old_lines = old_text.lines();
14 auto new_lines = new_text.lines();
15
16 /**
17 * This is a simple implementation of the Longest Common Subsequence algorithm (over
18 * the lines of the text as opposed to the characters). A Dynamic programming approach
19 * is used here.
20 */
21
22 enum class Direction {
23 Down, // Added a new line
24 Right, // Removed a line
25 Diagonal, // Line remained the same
26 };
27
28 // A single cell in the DP-matrix. Cell (i, j) represents the longest common
29 // sub-sequence of lines between old_lines[0 : i] and new_lines[0 : j].
30 struct Cell {
31 size_t length;
32 Direction direction;
33 };
34
35 auto dp_matrix = Vector<Cell>();
36 dp_matrix.resize((old_lines.size() + 1) * (new_lines.size() + 1));
37
38 auto dp = [&dp_matrix, width = old_lines.size() + 1](size_t i, size_t j) -> Cell& {
39 return dp_matrix[i + width * j];
40 };
41
42 // Initialize the first row and column
43 for (size_t i = 0; i <= old_lines.size(); ++i)
44 dp(i, new_lines.size()) = { 0, Direction::Right };
45
46 for (size_t j = 0; j <= new_lines.size(); ++j)
47 dp(old_lines.size(), 0) = { 0, Direction::Down };
48
49 // Fill in the rest of the DP table
50 for (int i = old_lines.size() - 1; i >= 0; --i) {
51 for (int j = new_lines.size() - 1; j >= 0; --j) {
52 if (old_lines[i] == new_lines[j]) {
53 dp(i, j) = { dp(i + 1, j + 1).length + 1, Direction::Diagonal };
54 } else {
55 auto down = dp(i, j + 1).length;
56 auto right = dp(i + 1, j).length;
57 if (down > right)
58 dp(i, j) = { down, Direction::Down };
59 else
60 dp(i, j) = { right, Direction::Right };
61 }
62 }
63 }
64
65 Vector<Hunk> hunks;
66 Hunk cur_hunk;
67 bool in_hunk = false;
68
69 auto update_hunk = [&](size_t i, size_t j, Direction direction) {
70 if (!in_hunk) {
71 in_hunk = true;
72 cur_hunk = { i, j, {}, {} };
73 }
74 if (direction == Direction::Down) {
75 cur_hunk.added_lines.append(new_lines[j]);
76 } else if (direction == Direction::Right) {
77 cur_hunk.removed_lines.append(old_lines[i]);
78 }
79 };
80
81 auto flush_hunk = [&]() {
82 if (in_hunk) {
83 if (cur_hunk.added_lines.size() > 0)
84 cur_hunk.target_start_line++;
85 if (cur_hunk.removed_lines.size() > 0)
86 cur_hunk.original_start_line++;
87 hunks.append(cur_hunk);
88 in_hunk = false;
89 }
90 };
91
92 size_t i = 0;
93 size_t j = 0;
94
95 while (i < old_lines.size() && j < new_lines.size()) {
96 auto& cell = dp(i, j);
97 if (cell.direction == Direction::Down) {
98 update_hunk(i, j, cell.direction);
99 ++j;
100 } else if (cell.direction == Direction::Right) {
101 update_hunk(i, j, cell.direction);
102 ++i;
103 } else {
104 ++i;
105 ++j;
106 flush_hunk();
107 }
108 }
109
110 while (i < old_lines.size()) {
111 update_hunk(i, new_lines.is_empty() ? 0 : new_lines.size() - 1, Direction::Right); // Remove a line
112 ++i;
113 }
114 while (j < new_lines.size()) {
115 update_hunk(old_lines.is_empty() ? 0 : old_lines.size() - 1, j, Direction::Down); // Add a line
116 ++j;
117 }
118
119 flush_hunk();
120
121 return hunks;
122}
123
124}