@recaptime-dev's working patches + fork for Phorge, a community fork of Phabricator. (Upstream dev and stable branches are at upstream/main and upstream/stable respectively.) hq.recaptime.dev/wiki/Phorge
phorge phabricator
at upstream/main 294 lines 7.5 kB view raw
1<?php 2 3final class PhutilProseDifferenceEngine extends Phobject { 4 5 public function getDiff($u, $v) { 6 return $this->buildDiff($u, $v, 0); 7 } 8 9 private function buildDiff($u, $v, $level) { 10 $u_parts = $this->splitCorpus($u, $level); 11 $v_parts = $this->splitCorpus($v, $level); 12 13 if ($level === 0) { 14 $diff = $this->newHashDiff($u_parts, $v_parts); 15 $too_large = false; 16 } else { 17 list($diff, $too_large) = $this->newEditDistanceMatrixDiff( 18 $u_parts, 19 $v_parts, 20 $level); 21 } 22 23 $diff->reorderParts(); 24 25 // If we just built a character-level diff, we're all done and do not 26 // need to go any deeper. 27 if ($level == 3) { 28 return $diff; 29 } 30 31 $blocks = array(); 32 $block = null; 33 foreach ($diff->getParts() as $part) { 34 $type = $part['type']; 35 $text = $part['text']; 36 switch ($type) { 37 case '=': 38 if ($block) { 39 $blocks[] = $block; 40 $block = null; 41 } 42 $blocks[] = array( 43 'type' => $type, 44 'text' => $text, 45 ); 46 break; 47 case '-': 48 if (!$block) { 49 $block = array( 50 'type' => '!', 51 'old' => '', 52 'new' => '', 53 ); 54 } 55 $block['old'] .= $text; 56 break; 57 case '+': 58 if (!$block) { 59 $block = array( 60 'type' => '!', 61 'old' => '', 62 'new' => '', 63 ); 64 } 65 $block['new'] .= $text; 66 break; 67 } 68 } 69 70 if ($block) { 71 $blocks[] = $block; 72 } 73 74 $result = new PhutilProseDiff(); 75 foreach ($blocks as $block) { 76 $type = $block['type']; 77 if ($type == '=') { 78 $result->addPart('=', $block['text']); 79 } else { 80 $old = $block['old']; 81 $new = $block['new']; 82 if (!strlen($old) && !strlen($new)) { 83 // Nothing to do. 84 } else if (!strlen($old)) { 85 $result->addPart('+', $new); 86 } else if (!strlen($new)) { 87 $result->addPart('-', $old); 88 } else { 89 if ($too_large) { 90 // If this text was too big to diff, don't try to subdivide it. 91 $result->addPart('-', $old); 92 $result->addPart('+', $new); 93 } else { 94 $subdiff = $this->buildDiff( 95 $old, 96 $new, 97 $level + 1); 98 99 foreach ($subdiff->getParts() as $part) { 100 $result->addPart($part['type'], $part['text']); 101 } 102 } 103 } 104 } 105 } 106 107 $result->reorderParts(); 108 109 return $result; 110 } 111 112 private function splitCorpus($corpus, $level) { 113 switch ($level) { 114 case 0: 115 // Level 0: Split into paragraphs. 116 $expr = '/([\n]+)/'; 117 break; 118 case 1: 119 // Level 1: Split into sentences. 120 $expr = '/([\n,!;?\.]+)/'; 121 break; 122 case 2: 123 // Level 2: Split into words. 124 $expr = '/(\s+)/'; 125 break; 126 case 3: 127 // Level 3: Split into characters. 128 return phutil_utf8v_combined($corpus); 129 } 130 131 $pieces = preg_split($expr, $corpus, -1, PREG_SPLIT_DELIM_CAPTURE); 132 return $this->stitchPieces($pieces, $level); 133 } 134 135 private function stitchPieces(array $pieces, $level) { 136 $results = array(); 137 $count = count($pieces); 138 for ($ii = 0; $ii < $count; $ii += 2) { 139 $result = $pieces[$ii]; 140 if ($ii + 1 < $count) { 141 $result .= $pieces[$ii + 1]; 142 } 143 144 if ($level < 2) { 145 $trimmed_pieces = $this->trimApart($result); 146 foreach ($trimmed_pieces as $trimmed_piece) { 147 $results[] = $trimmed_piece; 148 } 149 } else { 150 $results[] = $result; 151 } 152 } 153 154 // If the input ended with a delimiter, we can get an empty final piece. 155 // Just discard it. 156 if (last($results) == '') { 157 array_pop($results); 158 } 159 160 return $results; 161 } 162 163 private function newEditDistanceMatrixDiff( 164 array $u_parts, 165 array $v_parts, 166 $level) { 167 168 $matrix = id(new PhutilEditDistanceMatrix()) 169 ->setMaximumLength(128) 170 ->setSequences($u_parts, $v_parts) 171 ->setComputeString(true); 172 173 // For word-level and character-level changes, smooth the output string 174 // to reduce the choppiness of the diff. 175 if ($level > 1) { 176 $matrix->setApplySmoothing(PhutilEditDistanceMatrix::SMOOTHING_FULL); 177 } 178 179 $u_pos = 0; 180 $v_pos = 0; 181 182 $edits = $matrix->getEditString(); 183 $edits_length = strlen($edits); 184 185 $diff = new PhutilProseDiff(); 186 for ($ii = 0; $ii < $edits_length; $ii++) { 187 $c = $edits[$ii]; 188 if ($c == 's') { 189 $diff->addPart('=', $u_parts[$u_pos]); 190 $u_pos++; 191 $v_pos++; 192 } else if ($c == 'd') { 193 $diff->addPart('-', $u_parts[$u_pos]); 194 $u_pos++; 195 } else if ($c == 'i') { 196 $diff->addPart('+', $v_parts[$v_pos]); 197 $v_pos++; 198 } else if ($c == 'x') { 199 $diff->addPart('-', $u_parts[$u_pos]); 200 $diff->addPart('+', $v_parts[$v_pos]); 201 $u_pos++; 202 $v_pos++; 203 } else { 204 throw new Exception( 205 pht( 206 'Unexpected character ("%s") in edit string.', 207 $c)); 208 } 209 } 210 211 return array($diff, $matrix->didReachMaximumLength()); 212 } 213 214 private function newHashDiff(array $u_parts, array $v_parts) { 215 216 $u_ref = new PhabricatorDocumentRef(); 217 $v_ref = new PhabricatorDocumentRef(); 218 219 $u_blocks = $this->newDocumentEngineBlocks($u_parts); 220 $v_blocks = $this->newDocumentEngineBlocks($v_parts); 221 222 $rows = id(new PhabricatorDocumentEngineBlocks()) 223 ->addBlockList($u_ref, $u_blocks) 224 ->addBlockList($v_ref, $v_blocks) 225 ->newTwoUpLayout(); 226 227 $diff = new PhutilProseDiff(); 228 foreach ($rows as $row) { 229 list($u_block, $v_block) = $row; 230 231 if ($u_block && $v_block) { 232 if ($u_block->getDifferenceType() === '-') { 233 $diff->addPart('-', $u_block->getContent()); 234 $diff->addPart('+', $v_block->getContent()); 235 } else { 236 $diff->addPart('=', $u_block->getContent()); 237 } 238 } else if ($u_block) { 239 $diff->addPart('-', $u_block->getContent()); 240 } else { 241 $diff->addPart('+', $v_block->getContent()); 242 } 243 } 244 245 return $diff; 246 } 247 248 private function newDocumentEngineBlocks(array $parts) { 249 $blocks = array(); 250 251 foreach ($parts as $part) { 252 $hash = PhabricatorHash::digestForIndex($part); 253 254 $blocks[] = id(new PhabricatorDocumentEngineBlock()) 255 ->setContent($part) 256 ->setDifferenceHash($hash); 257 } 258 259 return $blocks; 260 } 261 262 public static function trimApart($input) { 263 // Split pieces into separate text and whitespace sections: make one 264 // piece out of all the whitespace at the beginning, one piece out of 265 // all the actual text in the middle, and one piece out of all the 266 // whitespace at the end. 267 268 $parts = array(); 269 270 $length = strlen($input); 271 272 $corpus = ltrim($input); 273 $l_length = strlen($corpus); 274 if ($l_length !== $length) { 275 $parts[] = substr($input, 0, $length - $l_length); 276 } 277 278 $corpus = rtrim($corpus); 279 $lr_length = strlen($corpus); 280 281 if ($lr_length) { 282 $parts[] = $corpus; 283 } 284 285 if ($lr_length !== $l_length) { 286 // NOTE: This will be a negative value; we're slicing from the end of 287 // the input string. 288 $parts[] = substr($input, $lr_length - $l_length); 289 } 290 291 return $parts; 292 } 293 294}