Neovim sign gutter, designed to be mostly VCS agnostic
1local M = {}
2
3local util = require "vcsigns.util"
4
5---@class IntraHunkDiff
6---@field minus_intervals integer[][] Intervals of the minus side
7---@field plus_intervals integer[][] Intervals of the plus side
8local IntraHunkDiff = {}
9
10---@class Hunk
11---@field minus_start integer Start of the minus side
12---@field minus_count integer Count of the minus side
13---@field minus_lines string[] Lines in the minus side
14---@field plus_start integer Start of the plus side
15---@field plus_count integer Count of the plus side
16---@field plus_lines string[] Lines in the plus side
17---@field intra_diff IntraHunkDiff Fine grained diff within the hunk
18local Hunk = {}
19
20--- Split a string into tokens.
21---@param text string The text to split into tokens.
22---@return string[] The list of tokens.
23local function _tokenize(text)
24 -- Current simple tokenization strategy:
25 -- * Group work characters together.
26 -- * Non-word characters are treated as separate tokens.
27 local res = {}
28 local lines = vim.split(text, "\n", { plain = true })
29 for _, line in ipairs(lines) do
30 local m = string.gmatch(line, "([%w]*)([^%w]?)")
31 while true do
32 local word, sep = m()
33 if not word then
34 break
35 end
36 if #word > 0 then
37 res[#res + 1] = word
38 end
39 if sep and #sep > 0 then
40 res[#res + 1] = sep
41 end
42 end
43 res[#res + 1] = "\0"
44 end
45 return res
46end
47
48--- Reconstruct character intervals per line given the token list and hunk side.
49local function _extract_intervals(parts, hunk_side)
50 -- Precompute (line, col) positions for each part.
51 local line = 0
52 local col = 0
53 local positions = { { 0, 0 } }
54 for i, part in ipairs(parts) do
55 if part == "\0" then
56 -- Newline.
57 line = line + 1
58 col = 0
59 else
60 -- Word or separator.
61 col = col + #part
62 end
63 positions[i + 1] = { line, col }
64 end
65
66 local line_intervals = {}
67 for quad in hunk_side do
68 -- Translate the hunk quad into char ranges.
69 local start = quad[1]
70 local count = quad[2]
71
72 for k = start, start + count - 1 do
73 -- TODO(algmyr): Optimize this by merging adjacent intervals?
74 local l = positions[k]
75 local r = positions[k + 1]
76 -- Drop intervals straddling lines.
77 if l[1] == r[1] then
78 local t = line_intervals[l[1] + 1] or {}
79 t[#t + 1] = { l[2], r[2] }
80 line_intervals[l[1] + 1] = t
81 end
82 end
83 end
84 return line_intervals
85end
86
87local function join_lines(lines)
88 if #lines == 0 then
89 return ""
90 end
91 return table.concat(lines, "\n") .. "\n"
92end
93
94--- Compute the diff between two sets of tokens.
95--- This is hacking around the fact that vim.text.diff
96--- takes a string rather than a list of strings.
97--- Note: The strings in the lists must not contain newlines!
98---@param old_tokens string[] The old tokens.
99---@param new_tokens string[] The new tokens.
100---@param diff_opts table Options for the diff algorithm.
101---@return integer[][] The diff as a list of quads.
102local function _vim_diff(old_tokens, new_tokens, diff_opts)
103 if #old_tokens == 0 and #new_tokens == 1 and new_tokens[1] == "" then
104 -- Special case for non-existent old file with empty new file.
105 return {}
106 end
107 local opts = vim.deepcopy(diff_opts) or {}
108 opts.result_type = "indices"
109 local vim_diff_impl = vim.text.diff or vim.diff -- Fallback for older Neovim versions
110 local result =
111 vim_diff_impl(join_lines(old_tokens), join_lines(new_tokens), opts)
112 ---@cast result integer[][]?
113 if not result then
114 error("Failed to compute diff: " .. vim.inspect(result))
115 end
116 return result
117end
118
119--- Compute finer grained diffs within a hunk.
120---@param minus_lines string[] The lines in the minus side of the hunk.
121---@param plus_lines string[] The lines in the plus side of the hunk.
122---@param diff_opts table Options for the diff algorithm.
123---@return IntraHunkDiff The fine grained diffs.
124local function _compute_intra_hunk_diff(minus_lines, plus_lines, diff_opts)
125 local minus_parts = _tokenize(table.concat(minus_lines, "\n"))
126 local plus_parts = _tokenize(table.concat(plus_lines, "\n"))
127 local hunk_quads = _vim_diff(minus_parts, plus_parts, diff_opts)
128
129 local minus_intervals = _extract_intervals(
130 minus_parts,
131 vim.iter(hunk_quads):map(function(quad)
132 return { quad[1], quad[2] }
133 end)
134 )
135 local plus_intervals = _extract_intervals(
136 plus_parts,
137 vim.iter(hunk_quads):map(function(quad)
138 return { quad[3], quad[4] }
139 end)
140 )
141
142 return {
143 minus_intervals = minus_intervals,
144 plus_intervals = plus_intervals,
145 }
146end
147
148function M.intra_diff(hunk)
149 if not hunk.intra_diff then
150 hunk.intra_diff = _compute_intra_hunk_diff(
151 hunk.minus_lines,
152 hunk.plus_lines,
153 vim.g.vcsigns_fine_diff_opts
154 )
155 end
156 return hunk.intra_diff
157end
158
159--- Convert a hunk quad to a Hunk.
160---@param hunk_quad integer[]
161---@param old_lines string[] The old lines of the file.
162---@param new_lines string[] The new lines of the file.
163---@return Hunk
164local function _quad_to_hunk(hunk_quad, old_lines, new_lines)
165 local minus_lines = util.slice(old_lines, hunk_quad[1], hunk_quad[2])
166 local plus_lines = util.slice(new_lines, hunk_quad[3], hunk_quad[4])
167
168 return {
169 minus_start = hunk_quad[1],
170 minus_count = hunk_quad[2],
171 minus_lines = minus_lines,
172 plus_start = hunk_quad[3],
173 plus_count = hunk_quad[4],
174 plus_lines = plus_lines,
175 intra_diff = nil, -- Will be lazily computed as needed.
176 }
177end
178
179---Compute the diff between two contents.
180---@param old_lines string[] The old lines.
181---@param new_lines string[] The new lines.
182---@return Hunk[] The computed hunks.
183function M.compute_diff(old_lines, new_lines)
184 local diff_opts = vim.g.vcsigns_diff_opts
185
186 -- If file is too large, skip diffing.
187 if
188 #old_lines > vim.g.vcsigns_diff_max_lines
189 or #new_lines > vim.g.vcsigns_diff_max_lines
190 then
191 util.verbose "Too many lines, skipping diff."
192 return {}
193 end
194
195 local hunk_quads = _vim_diff(old_lines, new_lines, diff_opts)
196 local hunks = {}
197 for _, quad in ipairs(hunk_quads) do
198 table.insert(hunks, _quad_to_hunk(quad, old_lines, new_lines))
199 end
200 return hunks
201end
202
203return M