@recaptime-dev's working patches + fork for Phorge, a community fork of Phabricator. (Upstream dev and stable branches are at upstream/main and upstream/stable respectively.) hq.recaptime.dev/wiki/Phorge
phorge phabricator
at upstream/main 337 lines 10 kB view raw
1<?php 2 3final class DifferentialChangesetEngine extends Phobject { 4 5 private $viewer; 6 7 public function setViewer(PhabricatorUser $viewer) { 8 $this->viewer = $viewer; 9 return $this; 10 } 11 12 public function getViewer() { 13 return $this->viewer; 14 } 15 16 /** 17 * @param array<DifferentialChangeset> $changesets 18 */ 19 public function rebuildChangesets(array $changesets) { 20 assert_instances_of($changesets, DifferentialChangeset::class); 21 22 $changesets = $this->loadChangesetFiles($changesets); 23 24 foreach ($changesets as $changeset) { 25 $this->detectGeneratedCode($changeset); 26 $this->computeHashes($changeset); 27 } 28 29 $this->detectCopiedCode($changesets); 30 } 31 32 private function loadChangesetFiles(array $changesets) { 33 $viewer = $this->getViewer(); 34 35 $file_phids = array(); 36 foreach ($changesets as $changeset) { 37 $file_phid = $changeset->getNewFileObjectPHID(); 38 if ($file_phid !== null) { 39 $file_phids[] = $file_phid; 40 } 41 } 42 43 if ($file_phids) { 44 $files = id(new PhabricatorFileQuery()) 45 ->setViewer($viewer) 46 ->withPHIDs($file_phids) 47 ->execute(); 48 $files = mpull($files, null, 'getPHID'); 49 } else { 50 $files = array(); 51 } 52 53 foreach ($changesets as $changeset_key => $changeset) { 54 $file_phid = $changeset->getNewFileObjectPHID(); 55 if ($file_phid === null) { 56 continue; 57 } 58 59 $file = idx($files, $file_phid); 60 if (!$file) { 61 unset($changesets[$changeset_key]); 62 continue; 63 } 64 65 $changeset->attachNewFileObject($file); 66 } 67 68 return $changesets; 69 } 70 71 72/* -( Generated Code )----------------------------------------------------- */ 73 74 75 private function detectGeneratedCode(DifferentialChangeset $changeset) { 76 $is_generated_trusted = $this->isTrustedGeneratedCode($changeset); 77 if ($is_generated_trusted) { 78 $changeset->setTrustedChangesetAttribute( 79 DifferentialChangeset::ATTRIBUTE_GENERATED, 80 $is_generated_trusted); 81 } 82 83 $is_generated_untrusted = $this->isUntrustedGeneratedCode($changeset); 84 if ($is_generated_untrusted) { 85 $changeset->setUntrustedChangesetAttribute( 86 DifferentialChangeset::ATTRIBUTE_GENERATED, 87 $is_generated_untrusted); 88 } 89 } 90 91 private function isTrustedGeneratedCode(DifferentialChangeset $changeset) { 92 93 $filename = $changeset->getFilename(); 94 95 $paths = PhabricatorEnv::getEnvConfig('differential.generated-paths'); 96 foreach ($paths as $regexp) { 97 if (preg_match($regexp, $filename)) { 98 return true; 99 } 100 } 101 102 return false; 103 } 104 105 private function isUntrustedGeneratedCode(DifferentialChangeset $changeset) { 106 107 if ($changeset->getHunks()) { 108 $new_data = $changeset->makeNewFile(); 109 if (strpos($new_data, '@'.'generated') !== false) { 110 return true; 111 } 112 113 // See PHI1112. This is the official pattern for marking Go code as 114 // generated. 115 if (preg_match('(^// Code generated .* DO NOT EDIT\.$)m', $new_data)) { 116 return true; 117 } 118 } 119 120 return false; 121 } 122 123 124/* -( Content Hashes )----------------------------------------------------- */ 125 126 127 private function computeHashes(DifferentialChangeset $changeset) { 128 129 $effect_key = DifferentialChangeset::METADATA_EFFECT_HASH; 130 131 $effect_hash = $this->newEffectHash($changeset); 132 if ($effect_hash !== null) { 133 $changeset->setChangesetMetadata($effect_key, $effect_hash); 134 } 135 } 136 137 private function newEffectHash(DifferentialChangeset $changeset) { 138 139 if ($changeset->getHunks()) { 140 $new_data = $changeset->makeNewFile(); 141 return PhabricatorHash::digestForIndex($new_data); 142 } 143 144 if ($changeset->getNewFileObjectPHID()) { 145 $file = $changeset->getNewFileObject(); 146 147 // See T13522. For now, the "contentHash" is not really a content hash 148 // for files >4MB. This is okay: we will just always detect them as 149 // changed, which is the safer behavior. 150 151 $hash = $file->getContentHash(); 152 if ($hash !== null) { 153 $hash = sprintf('file-hash:%s', $hash); 154 return PhabricatorHash::digestForIndex($hash); 155 } 156 } 157 158 return null; 159 } 160 161 162/* -( Copied Code )-------------------------------------------------------- */ 163 164 165 private function detectCopiedCode(array $changesets) { 166 // See PHI944. If the total number of changed lines is excessively large, 167 // don't bother with copied code detection. This can take a lot of time and 168 // memory and it's not generally of any use for very large changes. 169 $max_size = 65535; 170 171 $total_size = 0; 172 foreach ($changesets as $changeset) { 173 $total_size += ($changeset->getAddLines() + $changeset->getDelLines()); 174 } 175 176 if ($total_size > $max_size) { 177 return; 178 } 179 180 $min_width = 30; 181 $min_lines = 3; 182 183 $map = array(); 184 $files = array(); 185 $types = array(); 186 foreach ($changesets as $changeset) { 187 $file = $changeset->getFilename(); 188 foreach ($changeset->getHunks() as $hunk) { 189 $lines = $hunk->getStructuredOldFile(); 190 foreach ($lines as $line => $info) { 191 $type = $info['type']; 192 if ($type == '\\') { 193 continue; 194 } 195 $types[$file][$line] = $type; 196 197 $text = $info['text']; 198 $text = trim($text); 199 $files[$file][$line] = $text; 200 201 if (strlen($text) >= $min_width) { 202 $map[$text][] = array($file, $line); 203 } 204 } 205 } 206 } 207 208 foreach ($changesets as $changeset) { 209 $copies = array(); 210 foreach ($changeset->getHunks() as $hunk) { 211 $added = $hunk->getStructuredNewFile(); 212 $atype = array(); 213 214 foreach ($added as $line => $info) { 215 $atype[$line] = $info['type']; 216 $added[$line] = trim($info['text']); 217 } 218 219 $skip_lines = 0; 220 foreach ($added as $line => $code) { 221 if ($skip_lines) { 222 // We're skipping lines that we already processed because we 223 // extended a block above them downward to include them. 224 $skip_lines--; 225 continue; 226 } 227 228 if ($atype[$line] !== '+') { 229 // This line hasn't been changed in the new file, so don't try 230 // to figure out where it came from. 231 continue; 232 } 233 234 if (empty($map[$code])) { 235 // This line was too short to trigger copy/move detection. 236 continue; 237 } 238 239 if (count($map[$code]) > 16) { 240 // If there are a large number of identical lines in this diff, 241 // don't try to figure out where this block came from: the analysis 242 // is O(N^2), since we need to compare every line against every 243 // other line. Even if we arrive at a result, it is unlikely to be 244 // meaningful. See T5041. 245 continue; 246 } 247 248 $best_length = 0; 249 250 // Explore all candidates. 251 foreach ($map[$code] as $val) { 252 list($file, $orig_line) = $val; 253 $length = 1; 254 255 // Search backward and forward to find all of the adjacent lines 256 // which match. 257 foreach (array(-1, 1) as $direction) { 258 $offset = $direction; 259 while (true) { 260 if (isset($copies[$line + $offset])) { 261 // If we run into a block above us which we've already 262 // attributed to a move or copy from elsewhere, stop 263 // looking. 264 break; 265 } 266 267 if (!isset($added[$line + $offset])) { 268 // If we've run off the beginning or end of the new file, 269 // stop looking. 270 break; 271 } 272 273 if (!isset($files[$file][$orig_line + $offset])) { 274 // If we've run off the beginning or end of the original 275 // file, we also stop looking. 276 break; 277 } 278 279 $old = $files[$file][$orig_line + $offset]; 280 $new = $added[$line + $offset]; 281 if ($old !== $new) { 282 // If the old line doesn't match the new line, stop 283 // looking. 284 break; 285 } 286 287 $length++; 288 $offset += $direction; 289 } 290 } 291 292 if ($length < $best_length) { 293 // If we already know of a better source (more matching lines) 294 // for this move/copy, stick with that one. We prefer long 295 // copies/moves which match a lot of context over short ones. 296 continue; 297 } 298 299 if ($length == $best_length) { 300 if (idx($types[$file], $orig_line) != '-') { 301 // If we already know of an equally good source (same number 302 // of matching lines) and this isn't a move, stick with the 303 // other one. We prefer moves over copies. 304 continue; 305 } 306 } 307 308 $best_length = $length; 309 // ($offset - 1) contains number of forward matching lines. 310 $best_offset = $offset - 1; 311 $best_file = $file; 312 $best_line = $orig_line; 313 } 314 315 $file = ($best_file == $changeset->getFilename() ? '' : $best_file); 316 for ($i = $best_length; $i--; ) { 317 $type = idx($types[$best_file], $best_line + $best_offset - $i); 318 $copies[$line + $best_offset - $i] = ($best_length < $min_lines 319 ? array() // Ignore short blocks. 320 : array($file, $best_line + $best_offset - $i, $type)); 321 } 322 323 $skip_lines = $best_offset; 324 } 325 } 326 327 $copies = array_filter($copies); 328 if ($copies) { 329 $metadata = $changeset->getMetadata(); 330 $metadata['copy:lines'] = $copies; 331 $changeset->setMetadata($metadata); 332 } 333 } 334 335 } 336 337}