Serenity Operating System
at master 124 lines 3.6 kB view raw
1/* 2 * Copyright (c) 2021, Mustafa Quraish <mustafa@serenityos.org> 3 * 4 * SPDX-License-Identifier: BSD-2-Clause 5 */ 6 7#include "Generator.h" 8 9namespace Diff { 10 11Vector<Hunk> from_text(StringView old_text, StringView new_text) 12{ 13 auto old_lines = old_text.lines(); 14 auto new_lines = new_text.lines(); 15 16 /** 17 * This is a simple implementation of the Longest Common Subsequence algorithm (over 18 * the lines of the text as opposed to the characters). A Dynamic programming approach 19 * is used here. 20 */ 21 22 enum class Direction { 23 Down, // Added a new line 24 Right, // Removed a line 25 Diagonal, // Line remained the same 26 }; 27 28 // A single cell in the DP-matrix. Cell (i, j) represents the longest common 29 // sub-sequence of lines between old_lines[0 : i] and new_lines[0 : j]. 30 struct Cell { 31 size_t length; 32 Direction direction; 33 }; 34 35 auto dp_matrix = Vector<Cell>(); 36 dp_matrix.resize((old_lines.size() + 1) * (new_lines.size() + 1)); 37 38 auto dp = [&dp_matrix, width = old_lines.size() + 1](size_t i, size_t j) -> Cell& { 39 return dp_matrix[i + width * j]; 40 }; 41 42 // Initialize the first row and column 43 for (size_t i = 0; i <= old_lines.size(); ++i) 44 dp(i, new_lines.size()) = { 0, Direction::Right }; 45 46 for (size_t j = 0; j <= new_lines.size(); ++j) 47 dp(old_lines.size(), 0) = { 0, Direction::Down }; 48 49 // Fill in the rest of the DP table 50 for (int i = old_lines.size() - 1; i >= 0; --i) { 51 for (int j = new_lines.size() - 1; j >= 0; --j) { 52 if (old_lines[i] == new_lines[j]) { 53 dp(i, j) = { dp(i + 1, j + 1).length + 1, Direction::Diagonal }; 54 } else { 55 auto down = dp(i, j + 1).length; 56 auto right = dp(i + 1, j).length; 57 if (down > right) 58 dp(i, j) = { down, Direction::Down }; 59 else 60 dp(i, j) = { right, Direction::Right }; 61 } 62 } 63 } 64 65 Vector<Hunk> hunks; 66 Hunk cur_hunk; 67 bool in_hunk = false; 68 69 auto update_hunk = [&](size_t i, size_t j, Direction direction) { 70 if (!in_hunk) { 71 in_hunk = true; 72 cur_hunk = { i, j, {}, {} }; 73 } 74 if (direction == Direction::Down) { 75 cur_hunk.added_lines.append(new_lines[j]); 76 } else if (direction == Direction::Right) { 77 cur_hunk.removed_lines.append(old_lines[i]); 78 } 79 }; 80 81 auto flush_hunk = [&]() { 82 if (in_hunk) { 83 if (cur_hunk.added_lines.size() > 0) 84 cur_hunk.target_start_line++; 85 if (cur_hunk.removed_lines.size() > 0) 86 cur_hunk.original_start_line++; 87 hunks.append(cur_hunk); 88 in_hunk = false; 89 } 90 }; 91 92 size_t i = 0; 93 size_t j = 0; 94 95 while (i < old_lines.size() && j < new_lines.size()) { 96 auto& cell = dp(i, j); 97 if (cell.direction == Direction::Down) { 98 update_hunk(i, j, cell.direction); 99 ++j; 100 } else if (cell.direction == Direction::Right) { 101 update_hunk(i, j, cell.direction); 102 ++i; 103 } else { 104 ++i; 105 ++j; 106 flush_hunk(); 107 } 108 } 109 110 while (i < old_lines.size()) { 111 update_hunk(i, new_lines.is_empty() ? 0 : new_lines.size() - 1, Direction::Right); // Remove a line 112 ++i; 113 } 114 while (j < new_lines.size()) { 115 update_hunk(old_lines.is_empty() ? 0 : old_lines.size() - 1, j, Direction::Down); // Add a line 116 ++j; 117 } 118 119 flush_hunk(); 120 121 return hunks; 122} 123 124}