@recaptime-dev's working patches + fork for Phorge, a community fork of Phabricator. (Upstream dev and stable branches are at upstream/main and upstream/stable respectively.)
hq.recaptime.dev/wiki/Phorge
phorge
phabricator
1<?php
2
3final class DifferentialChangesetEngine extends Phobject {
4
5 private $viewer;
6
7 public function setViewer(PhabricatorUser $viewer) {
8 $this->viewer = $viewer;
9 return $this;
10 }
11
12 public function getViewer() {
13 return $this->viewer;
14 }
15
16 /**
17 * @param array<DifferentialChangeset> $changesets
18 */
19 public function rebuildChangesets(array $changesets) {
20 assert_instances_of($changesets, DifferentialChangeset::class);
21
22 $changesets = $this->loadChangesetFiles($changesets);
23
24 foreach ($changesets as $changeset) {
25 $this->detectGeneratedCode($changeset);
26 $this->computeHashes($changeset);
27 }
28
29 $this->detectCopiedCode($changesets);
30 }
31
32 private function loadChangesetFiles(array $changesets) {
33 $viewer = $this->getViewer();
34
35 $file_phids = array();
36 foreach ($changesets as $changeset) {
37 $file_phid = $changeset->getNewFileObjectPHID();
38 if ($file_phid !== null) {
39 $file_phids[] = $file_phid;
40 }
41 }
42
43 if ($file_phids) {
44 $files = id(new PhabricatorFileQuery())
45 ->setViewer($viewer)
46 ->withPHIDs($file_phids)
47 ->execute();
48 $files = mpull($files, null, 'getPHID');
49 } else {
50 $files = array();
51 }
52
53 foreach ($changesets as $changeset_key => $changeset) {
54 $file_phid = $changeset->getNewFileObjectPHID();
55 if ($file_phid === null) {
56 continue;
57 }
58
59 $file = idx($files, $file_phid);
60 if (!$file) {
61 unset($changesets[$changeset_key]);
62 continue;
63 }
64
65 $changeset->attachNewFileObject($file);
66 }
67
68 return $changesets;
69 }
70
71
72/* -( Generated Code )----------------------------------------------------- */
73
74
75 private function detectGeneratedCode(DifferentialChangeset $changeset) {
76 $is_generated_trusted = $this->isTrustedGeneratedCode($changeset);
77 if ($is_generated_trusted) {
78 $changeset->setTrustedChangesetAttribute(
79 DifferentialChangeset::ATTRIBUTE_GENERATED,
80 $is_generated_trusted);
81 }
82
83 $is_generated_untrusted = $this->isUntrustedGeneratedCode($changeset);
84 if ($is_generated_untrusted) {
85 $changeset->setUntrustedChangesetAttribute(
86 DifferentialChangeset::ATTRIBUTE_GENERATED,
87 $is_generated_untrusted);
88 }
89 }
90
91 private function isTrustedGeneratedCode(DifferentialChangeset $changeset) {
92
93 $filename = $changeset->getFilename();
94
95 $paths = PhabricatorEnv::getEnvConfig('differential.generated-paths');
96 foreach ($paths as $regexp) {
97 if (preg_match($regexp, $filename)) {
98 return true;
99 }
100 }
101
102 return false;
103 }
104
105 private function isUntrustedGeneratedCode(DifferentialChangeset $changeset) {
106
107 if ($changeset->getHunks()) {
108 $new_data = $changeset->makeNewFile();
109 if (strpos($new_data, '@'.'generated') !== false) {
110 return true;
111 }
112
113 // See PHI1112. This is the official pattern for marking Go code as
114 // generated.
115 if (preg_match('(^// Code generated .* DO NOT EDIT\.$)m', $new_data)) {
116 return true;
117 }
118 }
119
120 return false;
121 }
122
123
124/* -( Content Hashes )----------------------------------------------------- */
125
126
127 private function computeHashes(DifferentialChangeset $changeset) {
128
129 $effect_key = DifferentialChangeset::METADATA_EFFECT_HASH;
130
131 $effect_hash = $this->newEffectHash($changeset);
132 if ($effect_hash !== null) {
133 $changeset->setChangesetMetadata($effect_key, $effect_hash);
134 }
135 }
136
137 private function newEffectHash(DifferentialChangeset $changeset) {
138
139 if ($changeset->getHunks()) {
140 $new_data = $changeset->makeNewFile();
141 return PhabricatorHash::digestForIndex($new_data);
142 }
143
144 if ($changeset->getNewFileObjectPHID()) {
145 $file = $changeset->getNewFileObject();
146
147 // See T13522. For now, the "contentHash" is not really a content hash
148 // for files >4MB. This is okay: we will just always detect them as
149 // changed, which is the safer behavior.
150
151 $hash = $file->getContentHash();
152 if ($hash !== null) {
153 $hash = sprintf('file-hash:%s', $hash);
154 return PhabricatorHash::digestForIndex($hash);
155 }
156 }
157
158 return null;
159 }
160
161
162/* -( Copied Code )-------------------------------------------------------- */
163
164
165 private function detectCopiedCode(array $changesets) {
166 // See PHI944. If the total number of changed lines is excessively large,
167 // don't bother with copied code detection. This can take a lot of time and
168 // memory and it's not generally of any use for very large changes.
169 $max_size = 65535;
170
171 $total_size = 0;
172 foreach ($changesets as $changeset) {
173 $total_size += ($changeset->getAddLines() + $changeset->getDelLines());
174 }
175
176 if ($total_size > $max_size) {
177 return;
178 }
179
180 $min_width = 30;
181 $min_lines = 3;
182
183 $map = array();
184 $files = array();
185 $types = array();
186 foreach ($changesets as $changeset) {
187 $file = $changeset->getFilename();
188 foreach ($changeset->getHunks() as $hunk) {
189 $lines = $hunk->getStructuredOldFile();
190 foreach ($lines as $line => $info) {
191 $type = $info['type'];
192 if ($type == '\\') {
193 continue;
194 }
195 $types[$file][$line] = $type;
196
197 $text = $info['text'];
198 $text = trim($text);
199 $files[$file][$line] = $text;
200
201 if (strlen($text) >= $min_width) {
202 $map[$text][] = array($file, $line);
203 }
204 }
205 }
206 }
207
208 foreach ($changesets as $changeset) {
209 $copies = array();
210 foreach ($changeset->getHunks() as $hunk) {
211 $added = $hunk->getStructuredNewFile();
212 $atype = array();
213
214 foreach ($added as $line => $info) {
215 $atype[$line] = $info['type'];
216 $added[$line] = trim($info['text']);
217 }
218
219 $skip_lines = 0;
220 foreach ($added as $line => $code) {
221 if ($skip_lines) {
222 // We're skipping lines that we already processed because we
223 // extended a block above them downward to include them.
224 $skip_lines--;
225 continue;
226 }
227
228 if ($atype[$line] !== '+') {
229 // This line hasn't been changed in the new file, so don't try
230 // to figure out where it came from.
231 continue;
232 }
233
234 if (empty($map[$code])) {
235 // This line was too short to trigger copy/move detection.
236 continue;
237 }
238
239 if (count($map[$code]) > 16) {
240 // If there are a large number of identical lines in this diff,
241 // don't try to figure out where this block came from: the analysis
242 // is O(N^2), since we need to compare every line against every
243 // other line. Even if we arrive at a result, it is unlikely to be
244 // meaningful. See T5041.
245 continue;
246 }
247
248 $best_length = 0;
249
250 // Explore all candidates.
251 foreach ($map[$code] as $val) {
252 list($file, $orig_line) = $val;
253 $length = 1;
254
255 // Search backward and forward to find all of the adjacent lines
256 // which match.
257 foreach (array(-1, 1) as $direction) {
258 $offset = $direction;
259 while (true) {
260 if (isset($copies[$line + $offset])) {
261 // If we run into a block above us which we've already
262 // attributed to a move or copy from elsewhere, stop
263 // looking.
264 break;
265 }
266
267 if (!isset($added[$line + $offset])) {
268 // If we've run off the beginning or end of the new file,
269 // stop looking.
270 break;
271 }
272
273 if (!isset($files[$file][$orig_line + $offset])) {
274 // If we've run off the beginning or end of the original
275 // file, we also stop looking.
276 break;
277 }
278
279 $old = $files[$file][$orig_line + $offset];
280 $new = $added[$line + $offset];
281 if ($old !== $new) {
282 // If the old line doesn't match the new line, stop
283 // looking.
284 break;
285 }
286
287 $length++;
288 $offset += $direction;
289 }
290 }
291
292 if ($length < $best_length) {
293 // If we already know of a better source (more matching lines)
294 // for this move/copy, stick with that one. We prefer long
295 // copies/moves which match a lot of context over short ones.
296 continue;
297 }
298
299 if ($length == $best_length) {
300 if (idx($types[$file], $orig_line) != '-') {
301 // If we already know of an equally good source (same number
302 // of matching lines) and this isn't a move, stick with the
303 // other one. We prefer moves over copies.
304 continue;
305 }
306 }
307
308 $best_length = $length;
309 // ($offset - 1) contains number of forward matching lines.
310 $best_offset = $offset - 1;
311 $best_file = $file;
312 $best_line = $orig_line;
313 }
314
315 $file = ($best_file == $changeset->getFilename() ? '' : $best_file);
316 for ($i = $best_length; $i--; ) {
317 $type = idx($types[$best_file], $best_line + $best_offset - $i);
318 $copies[$line + $best_offset - $i] = ($best_length < $min_lines
319 ? array() // Ignore short blocks.
320 : array($file, $best_line + $best_offset - $i, $type));
321 }
322
323 $skip_lines = $best_offset;
324 }
325 }
326
327 $copies = array_filter($copies);
328 if ($copies) {
329 $metadata = $changeset->getMetadata();
330 $metadata['copy:lines'] = $copies;
331 $changeset->setMetadata($metadata);
332 }
333 }
334
335 }
336
337}