@recaptime-dev's working patches + fork for Phorge, a community fork of Phabricator. (Upstream dev and stable branches are at upstream/main and upstream/stable respectively.) hq.recaptime.dev/wiki/Phorge
phorge phabricator

Extract scope line selection logic from the diff rendering engine so it can reasonably be iterated on

Summary:
Ref T13249. Ref T11738. See PHI985. Currently, we have a crude heuristic for guessing what line in a source file provides the best context.

We get it wrong in a lot of cases, sometimes selecting very silly lines like "{". Although we can't always pick the same line a human would pick, we //can// pile on heuristics until this is less frequently completely wrong and perhaps eventually get it to work fairly well most of the time.

Pull the logic for this into a separate standalone class and make it testable to prepare for adding heuristics.

Test Plan: Ran unit tests, browsed various files in the web UI and saw as-good-or-better context selection.

Reviewers: amckinley

Reviewed By: amckinley

Maniphest Tasks: T13249, T11738

Differential Revision: https://secure.phabricator.com/D20171

+286 -63
+4
src/__phutil_library_map__.php
··· 2971 2971 'PhabricatorDeveloperPreferencesSettingsPanel' => 'applications/settings/panel/PhabricatorDeveloperPreferencesSettingsPanel.php', 2972 2972 'PhabricatorDiffInlineCommentQuery' => 'infrastructure/diff/query/PhabricatorDiffInlineCommentQuery.php', 2973 2973 'PhabricatorDiffPreferencesSettingsPanel' => 'applications/settings/panel/PhabricatorDiffPreferencesSettingsPanel.php', 2974 + 'PhabricatorDiffScopeEngine' => 'infrastructure/diff/PhabricatorDiffScopeEngine.php', 2975 + 'PhabricatorDiffScopeEngineTestCase' => 'infrastructure/diff/__tests__/PhabricatorDiffScopeEngineTestCase.php', 2974 2976 'PhabricatorDifferenceEngine' => 'infrastructure/diff/PhabricatorDifferenceEngine.php', 2975 2977 'PhabricatorDifferentialApplication' => 'applications/differential/application/PhabricatorDifferentialApplication.php', 2976 2978 'PhabricatorDifferentialAttachCommitWorkflow' => 'applications/differential/management/PhabricatorDifferentialAttachCommitWorkflow.php', ··· 8850 8852 'PhabricatorDeveloperPreferencesSettingsPanel' => 'PhabricatorEditEngineSettingsPanel', 8851 8853 'PhabricatorDiffInlineCommentQuery' => 'PhabricatorApplicationTransactionCommentQuery', 8852 8854 'PhabricatorDiffPreferencesSettingsPanel' => 'PhabricatorEditEngineSettingsPanel', 8855 + 'PhabricatorDiffScopeEngine' => 'Phobject', 8856 + 'PhabricatorDiffScopeEngineTestCase' => 'PhabricatorTestCase', 8853 8857 'PhabricatorDifferenceEngine' => 'Phobject', 8854 8858 'PhabricatorDifferentialApplication' => 'PhabricatorApplication', 8855 8859 'PhabricatorDifferentialAttachCommitWorkflow' => 'PhabricatorDifferentialManagementWorkflow',
+5 -44
src/applications/differential/parser/DifferentialChangesetParser.php
··· 1173 1173 } 1174 1174 $range_len = min($range_len, $rows - $range_start); 1175 1175 1176 - list($gaps, $mask, $depths) = $this->calculateGapsMaskAndDepths( 1176 + list($gaps, $mask) = $this->calculateGapsAndMask( 1177 1177 $mask_force, 1178 1178 $feedback_mask, 1179 1179 $range_start, ··· 1181 1181 1182 1182 $renderer 1183 1183 ->setGaps($gaps) 1184 - ->setMask($mask) 1185 - ->setDepths($depths); 1184 + ->setMask($mask); 1186 1185 1187 1186 $html = $renderer->renderTextChange( 1188 1187 $range_start, ··· 1208 1207 * "show more"). The $mask returned is a sparsely populated dictionary 1209 1208 * of $visible_line_number => true. 1210 1209 * 1211 - * Depths - compute how indented any given line is. The $depths returned 1212 - * is a sparsely populated dictionary of $visible_line_number => $depth. 1213 - * 1214 - * This function also has the side effect of modifying member variable 1215 - * new such that tabs are normalized to spaces for each line of the diff. 1216 - * 1217 - * @return array($gaps, $mask, $depths) 1210 + * @return array($gaps, $mask) 1218 1211 */ 1219 - private function calculateGapsMaskAndDepths( 1212 + private function calculateGapsAndMask( 1220 1213 $mask_force, 1221 1214 $feedback_mask, 1222 1215 $range_start, ··· 1224 1217 1225 1218 $lines_context = $this->getLinesOfContext(); 1226 1219 1227 - // Calculate gaps and mask first 1228 1220 $gaps = array(); 1229 1221 $gap_start = 0; 1230 1222 $in_gap = false; ··· 1253 1245 $gaps = array_reverse($gaps); 1254 1246 $mask = $base_mask; 1255 1247 1256 - // Time to calculate depth. 1257 - // We need to go backwards to properly indent whitespace in this code: 1258 - // 1259 - // 0: class C { 1260 - // 1: 1261 - // 1: function f() { 1262 - // 2: 1263 - // 2: return; 1264 - // 1: 1265 - // 1: } 1266 - // 0: 1267 - // 0: } 1268 - // 1269 - $depths = array(); 1270 - $last_depth = 0; 1271 - $range_end = $range_start + $range_len; 1272 - if (!isset($this->new[$range_end])) { 1273 - $range_end--; 1274 - } 1275 - for ($ii = $range_end; $ii >= $range_start; $ii--) { 1276 - // We need to expand tabs to process mixed indenting and to round 1277 - // correctly later. 1278 - $line = str_replace("\t", ' ', $this->new[$ii]['text']); 1279 - $trimmed = ltrim($line); 1280 - if ($trimmed != '') { 1281 - // We round down to flatten "/**" and " *". 1282 - $last_depth = floor((strlen($line) - strlen($trimmed)) / 2); 1283 - } 1284 - $depths[$ii] = $last_depth; 1285 - } 1286 - 1287 - return array($gaps, $mask, $depths); 1248 + return array($gaps, $mask); 1288 1249 } 1289 1250 1290 1251 /**
+29 -9
src/applications/differential/render/DifferentialChangesetRenderer.php
··· 28 28 private $originalNew; 29 29 private $gaps; 30 30 private $mask; 31 - private $depths; 32 31 private $originalCharacterEncoding; 33 32 private $showEditAndReplyLinks; 34 33 private $canMarkDone; 35 34 private $objectOwnerPHID; 36 35 private $highlightingDisabled; 36 + private $scopeEngine; 37 37 38 38 private $oldFile = false; 39 39 private $newFile = false; ··· 74 74 75 75 public function getIsUndershield() { 76 76 return $this->isUndershield; 77 - } 78 - 79 - public function setDepths($depths) { 80 - $this->depths = $depths; 81 - return $this; 82 - } 83 - protected function getDepths() { 84 - return $this->depths; 85 77 } 86 78 87 79 public function setMask($mask) { ··· 676 668 } 677 669 678 670 return $views; 671 + } 672 + 673 + 674 + final protected function getScopeEngine() { 675 + if (!$this->scopeEngine) { 676 + $line_map = $this->getNewLineTextMap(); 677 + 678 + $scope_engine = id(new PhabricatorDiffScopeEngine()) 679 + ->setLineTextMap($line_map); 680 + 681 + $this->scopeEngine = $scope_engine; 682 + } 683 + 684 + return $this->scopeEngine; 685 + } 686 + 687 + private function getNewLineTextMap() { 688 + $new = $this->getNewLines(); 689 + 690 + $text_map = array(); 691 + foreach ($new as $new_line) { 692 + if (!isset($new_line['line'])) { 693 + continue; 694 + } 695 + $text_map[$new_line['line']] = $new_line['text']; 696 + } 697 + 698 + return $text_map; 679 699 } 680 700 681 701 }
+36 -10
src/applications/differential/render/DifferentialChangesetTwoUpRenderer.php
··· 3 3 final class DifferentialChangesetTwoUpRenderer 4 4 extends DifferentialChangesetHTMLRenderer { 5 5 6 + private $newOffsetMap; 7 + 6 8 public function isOneUpRenderer() { 7 9 return false; 8 10 } ··· 66 68 $new_render = $this->getNewRender(); 67 69 $original_left = $this->getOriginalOld(); 68 70 $original_right = $this->getOriginalNew(); 69 - $depths = $this->getDepths(); 70 71 $mask = $this->getMask(); 72 + 73 + $scope_engine = $this->getScopeEngine(); 74 + 75 + $offset_map = null; 71 76 72 77 for ($ii = $range_start; $ii < $range_start + $range_len; $ii++) { 73 78 if (empty($mask[$ii])) { ··· 87 92 $is_last_block = true; 88 93 } 89 94 90 - $context = null; 95 + $context_text = null; 91 96 $context_line = null; 92 - if (!$is_last_block && $depths[$ii + $len]) { 93 - for ($l = $ii + $len - 1; $l >= $ii; $l--) { 94 - $line = $new_lines[$l]['text']; 95 - if ($depths[$l] < $depths[$ii + $len] && trim($line) != '') { 96 - $context = $new_render[$l]; 97 - $context_line = $new_lines[$l]['line']; 98 - break; 97 + if (!$is_last_block) { 98 + $target_line = $new_lines[$ii + $len]['line']; 99 + $context_line = $scope_engine->getScopeStart($target_line); 100 + if ($context_line !== null) { 101 + // The scope engine returns a line number in the file. We need 102 + // to map that back to a display offset in the diff. 103 + if (!$offset_map) { 104 + $offset_map = $this->getNewLineToOffsetMap(); 99 105 } 106 + $offset = $offset_map[$context_line]; 107 + $context_text = $new_render[$offset]; 100 108 } 101 109 } 102 110 ··· 126 134 'class' => 'show-context', 127 135 ), 128 136 // TODO: [HTML] Escaping model here isn't ideal. 129 - phutil_safe_html($context)), 137 + phutil_safe_html($context_text)), 130 138 )); 131 139 132 140 $html[] = $container; ··· 384 392 public function getRowScaffoldForInline(PHUIDiffInlineCommentView $view) { 385 393 return id(new PHUIDiffTwoUpInlineCommentRowScaffold()) 386 394 ->addInlineView($view); 395 + } 396 + 397 + private function getNewLineToOffsetMap() { 398 + if ($this->newOffsetMap === null) { 399 + $new = $this->getNewLines(); 400 + 401 + $map = array(); 402 + foreach ($new as $offset => $new_line) { 403 + if ($new_line['line'] === null) { 404 + continue; 405 + } 406 + $map[$new_line['line']] = $offset; 407 + } 408 + 409 + $this->newOffsetMap = $map; 410 + } 411 + 412 + return $this->newOffsetMap; 387 413 } 388 414 389 415 }
+156
src/infrastructure/diff/PhabricatorDiffScopeEngine.php
··· 1 + <?php 2 + 3 + final class PhabricatorDiffScopeEngine 4 + extends Phobject { 5 + 6 + private $lineTextMap; 7 + private $lineDepthMap; 8 + 9 + public function setLineTextMap(array $map) { 10 + if (array_key_exists(0, $map)) { 11 + throw new Exception( 12 + pht('ScopeEngine text map must be a 1-based map of lines.')); 13 + } 14 + 15 + $expect = 1; 16 + foreach ($map as $key => $value) { 17 + if ($key === $expect) { 18 + $expect++; 19 + continue; 20 + } 21 + 22 + throw new Exception( 23 + pht( 24 + 'ScopeEngine text map must be a contiguous map of '. 25 + 'lines, but is not: found key "%s" where key "%s" was expected.', 26 + $key, 27 + $expect)); 28 + } 29 + 30 + $this->lineTextMap = $map; 31 + 32 + return $this; 33 + } 34 + 35 + public function getLineTextMap() { 36 + if ($this->lineTextMap === null) { 37 + throw new PhutilInvalidStateException('setLineTextMap'); 38 + } 39 + return $this->lineTextMap; 40 + } 41 + 42 + public function getScopeStart($line) { 43 + $text_map = $this->getLineTextMap(); 44 + $depth_map = $this->getLineDepthMap(); 45 + $length = count($text_map); 46 + 47 + // Figure out the effective depth of the line we're getting scope for. 48 + // If the line is just whitespace, it may have no depth on its own. In 49 + // this case, we look for the next line. 50 + $line_depth = null; 51 + for ($ii = $line; $ii <= $length; $ii++) { 52 + if ($depth_map[$ii] !== null) { 53 + $line_depth = $depth_map[$ii]; 54 + break; 55 + } 56 + } 57 + 58 + // If we can't find a line depth for the target line, just bail. 59 + if ($line_depth === null) { 60 + return null; 61 + } 62 + 63 + // Limit the maximum number of lines we'll examine. If a user has a 64 + // million-line diff of nonsense, scanning the whole thing is a waste 65 + // of time. 66 + $search_range = 1000; 67 + $search_until = max(0, $ii - $search_range); 68 + 69 + for ($ii = $line - 1; $ii > $search_until; $ii--) { 70 + $line_text = $text_map[$ii]; 71 + 72 + // This line is in missing context: the diff was diffed with partial 73 + // context, and we ran out of context before finding a good scope line. 74 + // Bail out, we don't want to jump across missing context blocks. 75 + if ($line_text === null) { 76 + return null; 77 + } 78 + 79 + $depth = $depth_map[$ii]; 80 + 81 + // This line is all whitespace. This isn't a possible match. 82 + if ($depth === null) { 83 + continue; 84 + } 85 + 86 + // The depth is the same as (or greater than) the depth we started with, 87 + // so this isn't a possible match. 88 + if ($depth >= $line_depth) { 89 + continue; 90 + } 91 + 92 + // Reject lines which begin with "}" or "{". These lines are probably 93 + // never good matches. 94 + if (preg_match('/^\s*[{}]/i', $line_text)) { 95 + continue; 96 + } 97 + 98 + return $ii; 99 + } 100 + 101 + return null; 102 + } 103 + 104 + private function getLineDepthMap() { 105 + if (!$this->lineDepthMap) { 106 + $this->lineDepthMap = $this->newLineDepthMap(); 107 + } 108 + 109 + return $this->lineDepthMap; 110 + } 111 + 112 + private function newLineDepthMap() { 113 + $text_map = $this->getLineTextMap(); 114 + 115 + // TODO: This should be configurable once we handle tab widths better. 116 + $tab_width = 2; 117 + 118 + $depth_map = array(); 119 + foreach ($text_map as $line_number => $line_text) { 120 + if ($line_text === null) { 121 + $depth_map[$line_number] = null; 122 + continue; 123 + } 124 + 125 + $len = strlen($line_text); 126 + 127 + // If the line has no actual text, don't assign it a depth. 128 + if (!$len || !strlen(trim($line_text))) { 129 + $depth_map[$line_number] = null; 130 + continue; 131 + } 132 + 133 + $count = 0; 134 + for ($ii = 0; $ii < $len; $ii++) { 135 + $c = $line_text[$ii]; 136 + if ($c == ' ') { 137 + $count++; 138 + } else if ($c == "\t") { 139 + $count += $tab_width; 140 + } else { 141 + break; 142 + } 143 + } 144 + 145 + // Round down to cheat our way through the " *" parts of docblock 146 + // comments. This is generally a reasonble heuristic because odd tab 147 + // widths are exceptionally rare. 148 + $depth = ($count >> 1); 149 + 150 + $depth_map[$line_number] = $depth; 151 + } 152 + 153 + return $depth_map; 154 + } 155 + 156 + }
+51
src/infrastructure/diff/__tests__/PhabricatorDiffScopeEngineTestCase.php
··· 1 + <?php 2 + 3 + final class PhabricatorDiffScopeEngineTestCase 4 + extends PhabricatorTestCase { 5 + 6 + private $engines = array(); 7 + 8 + public function testScopeEngine() { 9 + $this->assertScopeStart('zebra.c', 4, 2); 10 + } 11 + 12 + private function assertScopeStart($file, $line, $expect) { 13 + $engine = $this->getScopeTestEngine($file); 14 + 15 + $actual = $engine->getScopeStart($line); 16 + $this->assertEqual( 17 + $expect, 18 + $actual, 19 + pht( 20 + 'Expect scope for line %s to start on line %s (actual: %s) in "%s".', 21 + $line, 22 + $expect, 23 + $actual, 24 + $file)); 25 + } 26 + 27 + private function getScopeTestEngine($file) { 28 + if (!isset($this->engines[$file])) { 29 + $this->engines[$file] = $this->newScopeTestEngine($file); 30 + } 31 + 32 + return $this->engines[$file]; 33 + } 34 + 35 + private function newScopeTestEngine($file) { 36 + $path = dirname(__FILE__).'/data/'.$file; 37 + $data = Filesystem::readFile($path); 38 + 39 + $lines = phutil_split_lines($data); 40 + $map = array(); 41 + foreach ($lines as $key => $line) { 42 + $map[$key + 1] = $line; 43 + } 44 + 45 + $engine = id(new PhabricatorDiffScopeEngine()) 46 + ->setLineTextMap($map); 47 + 48 + return $engine; 49 + } 50 + 51 + }
+5
src/infrastructure/diff/__tests__/data/zebra.c
··· 1 + void 2 + ZebraTamer::TameAZebra(nsPoint where, const nsRect& zone, nsAtom* material) 3 + { 4 + zebra.tame = true; 5 + }