@recaptime-dev's working patches + fork for Phorge, a community fork of Phabricator. (Upstream dev and stable branches are at upstream/main and upstream/stable respectively.)
hq.recaptime.dev/wiki/Phorge
phorge
phabricator
1<?php
2
3final class PhutilProseDiff extends Phobject {
4
5 private $parts = array();
6
7 public function addPart($type, $text) {
8 $this->parts[] = array(
9 'type' => $type,
10 'text' => $text,
11 );
12 return $this;
13 }
14
15 public function getParts() {
16 return $this->parts;
17 }
18
19 /**
20 * Get diff parts, but replace large blocks of unchanged text with "."
21 * parts representing missing context.
22 */
23 public function getSummaryParts() {
24 $parts = $this->getParts();
25
26 $head_key = head_key($parts);
27 $last_key = last_key($parts);
28
29 $results = array();
30 foreach ($parts as $key => $part) {
31 $is_head = ($key == $head_key);
32 $is_last = ($key == $last_key);
33
34 switch ($part['type']) {
35 case '=':
36 $pieces = $this->splitTextForSummary($part['text']);
37
38 if ($is_head || $is_last) {
39 $need = 2;
40 } else {
41 $need = 3;
42 }
43
44 // We don't have enough pieces to omit anything, so just continue.
45 if (count($pieces) < $need) {
46 $results[] = $part;
47 break;
48 }
49
50 if (!$is_head) {
51 $results[] = array(
52 'type' => '=',
53 'text' => head($pieces),
54 );
55 }
56
57 $results[] = array(
58 'type' => '.',
59 'text' => null,
60 );
61
62 if (!$is_last) {
63 $results[] = array(
64 'type' => '=',
65 'text' => last($pieces),
66 );
67 }
68 break;
69 default:
70 $results[] = $part;
71 break;
72 }
73 }
74
75 return $results;
76 }
77
78
79 public function reorderParts() {
80 // Reorder sequences of removed and added sections to put all the "-"
81 // parts together first, then all the "+" parts together. This produces
82 // a more human-readable result than intermingling them.
83
84 $o_run = array();
85 $n_run = array();
86 $result = array();
87 foreach ($this->parts as $part) {
88 $type = $part['type'];
89 switch ($type) {
90 case '-':
91 $o_run[] = $part;
92 break;
93 case '+':
94 $n_run[] = $part;
95 break;
96 default:
97 if ($o_run || $n_run) {
98 foreach ($this->combineRuns($o_run, $n_run) as $merged_part) {
99 $result[] = $merged_part;
100 }
101 $o_run = array();
102 $n_run = array();
103 }
104 $result[] = $part;
105 break;
106 }
107 }
108
109 if ($o_run || $n_run) {
110 foreach ($this->combineRuns($o_run, $n_run) as $part) {
111 $result[] = $part;
112 }
113 }
114
115 // Now, combine consecuitive runs of the same type of change (like a
116 // series of "-" parts) into a single run.
117 $combined = array();
118
119 $last = null;
120 $last_text = null;
121 foreach ($result as $part) {
122 $type = $part['type'];
123
124 if ($last !== $type) {
125 if ($last !== null) {
126 $combined[] = array(
127 'type' => $last,
128 'text' => $last_text,
129 );
130 }
131 $last_text = null;
132 $last = $type;
133 }
134
135 $last_text .= $part['text'];
136 }
137
138 if ($last_text !== null) {
139 $combined[] = array(
140 'type' => $last,
141 'text' => $last_text,
142 );
143 }
144
145 $this->parts = $combined;
146
147 return $this;
148 }
149
150 private function combineRuns($o_run, $n_run) {
151 $o_merge = $this->mergeParts($o_run);
152 $n_merge = $this->mergeParts($n_run);
153
154 // When removed and added blocks share a prefix or suffix, we sometimes
155 // want to count it as unchanged (for example, if it is whitespace) but
156 // sometimes want to count it as changed (for example, if it is a word
157 // suffix like "ing"). Find common prefixes and suffixes of these layout
158 // characters and emit them as "=" (unchanged) blocks.
159
160 $layout_characters = array(
161 ' ' => true,
162 "\n" => true,
163 '.' => true,
164 '!' => true,
165 ',' => true,
166 '?' => true,
167 ']' => true,
168 '[' => true,
169 '(' => true,
170 ')' => true,
171 '<' => true,
172 '>' => true,
173 );
174
175 $o_text = $o_merge['text'];
176 $n_text = $n_merge['text'];
177 $o_len = strlen($o_text);
178 $n_len = strlen($n_text);
179 $min_len = min($o_len, $n_len);
180
181 $prefix_len = 0;
182 for ($pos = 0; $pos < $min_len; $pos++) {
183 $o = $o_text[$pos];
184 $n = $n_text[$pos];
185 if ($o !== $n) {
186 break;
187 }
188 if (empty($layout_characters[$o])) {
189 break;
190 }
191 $prefix_len++;
192 }
193
194 $suffix_len = 0;
195 for ($pos = 0; $pos < ($min_len - $prefix_len); $pos++) {
196 $o = $o_text[$o_len - ($pos + 1)];
197 $n = $n_text[$n_len - ($pos + 1)];
198 if ($o !== $n) {
199 break;
200 }
201 if (empty($layout_characters[$o])) {
202 break;
203 }
204 $suffix_len++;
205 }
206
207 $results = array();
208
209 if ($prefix_len) {
210 $results[] = array(
211 'type' => '=',
212 'text' => substr($o_text, 0, $prefix_len),
213 );
214 }
215
216 if ($prefix_len < $o_len) {
217 $results[] = array(
218 'type' => '-',
219 'text' => substr(
220 $o_text,
221 $prefix_len,
222 $o_len - $prefix_len - $suffix_len),
223 );
224 }
225
226 if ($prefix_len < $n_len) {
227 $results[] = array(
228 'type' => '+',
229 'text' => substr(
230 $n_text,
231 $prefix_len,
232 $n_len - $prefix_len - $suffix_len),
233 );
234 }
235
236 if ($suffix_len) {
237 $results[] = array(
238 'type' => '=',
239 'text' => substr($o_text, -$suffix_len),
240 );
241 }
242
243 return $results;
244 }
245
246 private function mergeParts(array $parts) {
247 $text = '';
248 $type = null;
249 foreach ($parts as $part) {
250 $part_type = $part['type'];
251 if ($type === null) {
252 $type = $part_type;
253 }
254 if ($type !== $part_type) {
255 throw new Exception(pht('Can not merge parts of dissimilar types!'));
256 }
257 $text .= $part['text'];
258 }
259
260 return array(
261 'type' => $type,
262 'text' => $text,
263 );
264 }
265
266 private function splitTextForSummary($text) {
267 $matches = null;
268
269 $ok = preg_match('/^(\n*[^\n]+)\n/', $text, $matches);
270 if (!$ok) {
271 return array($text);
272 }
273
274 $head = $matches[1];
275 $text = substr($text, strlen($head));
276
277 $ok = preg_match('/\n([^\n]+\n*)\z/', $text, $matches);
278 if (!$ok) {
279 return array($text);
280 }
281
282 $last = $matches[1];
283 $text = substr($text, 0, -strlen($last));
284
285 if (!strlen(trim($text))) {
286 return array($head, $last);
287 } else {
288 return array($head, $text, $last);
289 }
290 }
291
292}