@recaptime-dev's working patches + fork for Phorge, a community fork of Phabricator. (Upstream dev and stable branches are at upstream/main and upstream/stable respectively.)
hq.recaptime.dev/wiki/Phorge
phorge
phabricator
1<?php
2
3final class DifferentialHunkParser extends Phobject {
4
5 private $oldLines;
6 private $newLines;
7 private $intraLineDiffs;
8 private $depthOnlyLines;
9 private $visibleLinesMask;
10 private $normalized;
11
12 /**
13 * Get a map of lines on which hunks start, other than line 1. This
14 * datastructure is used to determine when to render "Context not available."
15 * in diffs with multiple hunks.
16 *
17 * @param array<DifferentialHunk> $hunks
18 * @return array<int|string, bool> Map of lines where hunks start, other than
19 * line 1.
20 */
21 public function getHunkStartLines(array $hunks) {
22 assert_instances_of($hunks, DifferentialHunk::class);
23
24 $map = array();
25 foreach ($hunks as $hunk) {
26 $line = $hunk->getOldOffset();
27 if ($line > 1) {
28 $map[$line] = true;
29 }
30 }
31
32 return $map;
33 }
34
35 private function setVisibleLinesMask($mask) {
36 $this->visibleLinesMask = $mask;
37 return $this;
38 }
39 public function getVisibleLinesMask() {
40 if ($this->visibleLinesMask === null) {
41 throw new PhutilInvalidStateException('generateVisibleLinesMask');
42 }
43 return $this->visibleLinesMask;
44 }
45
46 private function setIntraLineDiffs($intra_line_diffs) {
47 $this->intraLineDiffs = $intra_line_diffs;
48 return $this;
49 }
50 public function getIntraLineDiffs() {
51 if ($this->intraLineDiffs === null) {
52 throw new PhutilInvalidStateException('generateIntraLineDiffs');
53 }
54 return $this->intraLineDiffs;
55 }
56
57 private function setNewLines($new_lines) {
58 $this->newLines = $new_lines;
59 return $this;
60 }
61 public function getNewLines() {
62 if ($this->newLines === null) {
63 throw new PhutilInvalidStateException('parseHunksForLineData');
64 }
65 return $this->newLines;
66 }
67
68 private function setOldLines($old_lines) {
69 $this->oldLines = $old_lines;
70 return $this;
71 }
72 public function getOldLines() {
73 if ($this->oldLines === null) {
74 throw new PhutilInvalidStateException('parseHunksForLineData');
75 }
76 return $this->oldLines;
77 }
78
79 public function getOldLineTypeMap() {
80 $map = array();
81 $old = $this->getOldLines();
82 foreach ($old as $o) {
83 if (!$o) {
84 continue;
85 }
86 $map[$o['line']] = $o['type'];
87 }
88 return $map;
89 }
90
91 public function setOldLineTypeMap(array $map) {
92 $lines = $this->getOldLines();
93 foreach ($lines as $key => $data) {
94 $lines[$key]['type'] = idx($map, $data['line']);
95 }
96 $this->oldLines = $lines;
97 return $this;
98 }
99
100 public function getNewLineTypeMap() {
101 $map = array();
102 $new = $this->getNewLines();
103 foreach ($new as $n) {
104 if (!$n) {
105 continue;
106 }
107 $map[$n['line']] = $n['type'];
108 }
109 return $map;
110 }
111
112 public function setNewLineTypeMap(array $map) {
113 $lines = $this->getNewLines();
114 foreach ($lines as $key => $data) {
115 $lines[$key]['type'] = idx($map, $data['line']);
116 }
117 $this->newLines = $lines;
118 return $this;
119 }
120
121 public function setDepthOnlyLines(array $map) {
122 $this->depthOnlyLines = $map;
123 return $this;
124 }
125
126 public function getDepthOnlyLines() {
127 return $this->depthOnlyLines;
128 }
129
130 public function setNormalized($normalized) {
131 $this->normalized = $normalized;
132 return $this;
133 }
134
135 public function getNormalized() {
136 return $this->normalized;
137 }
138
139 public function getIsDeleted() {
140 foreach ($this->getNewLines() as $line) {
141 if ($line) {
142 // At least one new line, so the entire file wasn't deleted.
143 return false;
144 }
145 }
146
147 foreach ($this->getOldLines() as $line) {
148 if ($line) {
149 // No new lines, at least one old line; the entire file was deleted.
150 return true;
151 }
152 }
153
154 // This is an empty file.
155 return false;
156 }
157
158 /**
159 * Returns true if the hunks change anything, including whitespace.
160 */
161 public function getHasAnyChanges() {
162 return $this->getHasChanges('any');
163 }
164
165 private function getHasChanges($filter) {
166 if ($filter !== 'any' && $filter !== 'text') {
167 throw new Exception(pht("Unknown change filter '%s'.", $filter));
168 }
169
170 $old = $this->getOldLines();
171 $new = $this->getNewLines();
172
173 $is_any = ($filter === 'any');
174
175 foreach ($old as $key => $o) {
176 $n = $new[$key];
177 if ($o === null || $n === null) {
178 // One side is missing, and it's impossible for both sides to be null,
179 // so the other side must have something, and thus the two sides are
180 // different and the file has been changed under any type of filter.
181 return true;
182 }
183
184 if ($o['type'] !== $n['type']) {
185 return true;
186 }
187
188 if ($o['text'] !== $n['text']) {
189 if ($is_any) {
190 // The text is different, so there's a change.
191 return true;
192 } else if (trim($o['text']) !== trim($n['text'])) {
193 return true;
194 }
195 }
196 }
197
198 // No changes anywhere in the file.
199 return false;
200 }
201
202
203 /**
204 * This function takes advantage of the parsing work done in
205 * @{method:parseHunksForLineData} and continues the struggle to hammer this
206 * data into something we can display to a user.
207 *
208 * In particular, this function re-parses the hunks to make them equivalent
209 * in length for easy rendering, adding `null` as necessary to pad the
210 * length.
211 *
212 * Anyhoo, this function is not particularly well-named but I try.
213 *
214 * NOTE: this function must be called after
215 * @{method:parseHunksForLineData}.
216 */
217 public function reparseHunksForSpecialAttributes() {
218 $rebuild_old = array();
219 $rebuild_new = array();
220
221 $old_lines = array_reverse($this->getOldLines());
222 $new_lines = array_reverse($this->getNewLines());
223
224 while (count($old_lines) || count($new_lines)) {
225 $old_line_data = array_pop($old_lines);
226 $new_line_data = array_pop($new_lines);
227
228 if ($old_line_data) {
229 $o_type = $old_line_data['type'];
230 } else {
231 $o_type = null;
232 }
233
234 if ($new_line_data) {
235 $n_type = $new_line_data['type'];
236 } else {
237 $n_type = null;
238 }
239
240 // This line does not exist in the new file.
241 if (($o_type != null) && ($n_type == null)) {
242 $rebuild_old[] = $old_line_data;
243 $rebuild_new[] = null;
244 if ($new_line_data) {
245 array_push($new_lines, $new_line_data);
246 }
247 continue;
248 }
249
250 // This line does not exist in the old file.
251 if (($n_type != null) && ($o_type == null)) {
252 $rebuild_old[] = null;
253 $rebuild_new[] = $new_line_data;
254 if ($old_line_data) {
255 array_push($old_lines, $old_line_data);
256 }
257 continue;
258 }
259
260 $rebuild_old[] = $old_line_data;
261 $rebuild_new[] = $new_line_data;
262 }
263
264 $this->setOldLines($rebuild_old);
265 $this->setNewLines($rebuild_new);
266
267 $this->updateChangeTypesForNormalization();
268
269 return $this;
270 }
271
272 public function generateIntraLineDiffs() {
273 $old = $this->getOldLines();
274 $new = $this->getNewLines();
275
276 $diffs = array();
277 $depth_only = array();
278 foreach ($old as $key => $o) {
279 $n = $new[$key];
280
281 if (!$o || !$n) {
282 continue;
283 }
284
285 if ($o['type'] != $n['type']) {
286 $o_segments = array();
287 $n_segments = array();
288 $tab_width = 2;
289
290 $o_text = $o['text'];
291 $n_text = $n['text'];
292
293 if ($o_text !== $n_text && (ltrim($o_text) === ltrim($n_text))) {
294 $o_depth = $this->getIndentDepth($o_text, $tab_width);
295 $n_depth = $this->getIndentDepth($n_text, $tab_width);
296
297 if ($o_depth < $n_depth) {
298 $segment_type = '>';
299 $segment_width = $this->getCharacterCountForVisualWhitespace(
300 $n_text,
301 ($n_depth - $o_depth),
302 $tab_width);
303 if ($segment_width) {
304 $n_text = substr($n_text, $segment_width);
305 $n_segments[] = array(
306 $segment_type,
307 $segment_width,
308 );
309 }
310 } else if ($o_depth > $n_depth) {
311 $segment_type = '<';
312 $segment_width = $this->getCharacterCountForVisualWhitespace(
313 $o_text,
314 ($o_depth - $n_depth),
315 $tab_width);
316 if ($segment_width) {
317 $o_text = substr($o_text, $segment_width);
318 $o_segments[] = array(
319 $segment_type,
320 $segment_width,
321 );
322 }
323 }
324
325 // If there are no remaining changes to this line after we've marked
326 // off the indent depth changes, this line was only modified by
327 // changing the indent depth. Mark it for later so we can change how
328 // it is displayed.
329 if ($o_text === $n_text) {
330 $depth_only[$key] = $segment_type;
331 }
332 }
333
334 $intraline_segments = ArcanistDiffUtils::generateIntralineDiff(
335 $o_text,
336 $n_text);
337
338 foreach ($intraline_segments[0] as $o_segment) {
339 $o_segments[] = $o_segment;
340 }
341
342 foreach ($intraline_segments[1] as $n_segment) {
343 $n_segments[] = $n_segment;
344 }
345
346 $diffs[$key] = array(
347 $o_segments,
348 $n_segments,
349 );
350 }
351 }
352
353 $this->setIntraLineDiffs($diffs);
354 $this->setDepthOnlyLines($depth_only);
355
356 return $this;
357 }
358
359 public function generateVisibleBlocksMask($lines_context) {
360
361 // See T13468. This is similar to "generateVisibleLinesMask()", but
362 // attempts to work around a series of bugs which cancel each other
363 // out but make a mess of the intermediate steps.
364
365 $old = $this->getOldLines();
366 $new = $this->getNewLines();
367
368 $length = max(count($old), count($new));
369
370 $visible_lines = array();
371 for ($ii = 0; $ii < $length; $ii++) {
372 $old_visible = (isset($old[$ii]) && $old[$ii]['type']);
373 $new_visible = (isset($new[$ii]) && $new[$ii]['type']);
374
375 $visible_lines[$ii] = ($old_visible || $new_visible);
376 }
377
378 $mask = array();
379 $reveal_cursor = -1;
380 for ($ii = 0; $ii < $length; $ii++) {
381
382 // If this line isn't visible, it isn't going to reveal anything.
383 if (!$visible_lines[$ii]) {
384
385 // If it hasn't been revealed by a nearby line, mark it as masked.
386 if (empty($mask[$ii])) {
387 $mask[$ii] = false;
388 }
389
390 continue;
391 }
392
393 // If this line is visible, reveal all the lines nearby.
394
395 // First, compute the minimum and maximum offsets we want to reveal.
396 $min_reveal = max($ii - $lines_context, 0);
397 $max_reveal = min($ii + $lines_context, $length - 1);
398
399 // Naively, we'd do more work than necessary when revealing context for
400 // several adjacent visible lines: we would mark all the overlapping
401 // lines as revealed several times.
402
403 // To avoid duplicating work, keep track of the largest line we've
404 // revealed to. Since we reveal context by marking every consecutive
405 // line, we don't need to touch any line above it.
406 $min_reveal = max($min_reveal, $reveal_cursor);
407
408 // Reveal the remaining unrevealed lines.
409 for ($jj = $min_reveal; $jj <= $max_reveal; $jj++) {
410 $mask[$jj] = true;
411 }
412
413 // Move the cursor to the next line which may still need to be revealed.
414 $reveal_cursor = $max_reveal + 1;
415 }
416
417 $this->setVisibleLinesMask($mask);
418
419 return $mask;
420 }
421
422 public function generateVisibleLinesMask($lines_context) {
423 $old = $this->getOldLines();
424 $new = $this->getNewLines();
425 $max_length = max(count($old), count($new));
426 $visible = false;
427 $last = 0;
428 $mask = array();
429
430 for ($cursor = -$lines_context; $cursor < $max_length; $cursor++) {
431 $offset = $cursor + $lines_context;
432 if ((isset($old[$offset]) && $old[$offset]['type']) ||
433 (isset($new[$offset]) && $new[$offset]['type'])) {
434 $visible = true;
435 $last = $offset;
436 } else if ($cursor > $last + $lines_context) {
437 $visible = false;
438 }
439 if ($visible && $cursor > 0) {
440 $mask[$cursor] = 1;
441 }
442 }
443
444 $this->setVisibleLinesMask($mask);
445
446 return $this;
447 }
448
449 public function getOldCorpus() {
450 return $this->getCorpus($this->getOldLines());
451 }
452
453 public function getNewCorpus() {
454 return $this->getCorpus($this->getNewLines());
455 }
456
457 private function getCorpus(array $lines) {
458
459 $corpus = array();
460 foreach ($lines as $l) {
461 if ($l === null) {
462 $corpus[] = "\n";
463 continue;
464 }
465
466 if ($l['type'] != '\\') {
467 if ($l['text'] === null) {
468 // There's no text on this side of the diff, but insert a placeholder
469 // newline so the highlighted line numbers match up.
470 $corpus[] = "\n";
471 } else {
472 $corpus[] = $l['text'];
473 }
474 }
475 }
476 return $corpus;
477 }
478
479 /**
480 * @param array<DifferentialHunk> $hunks
481 */
482 public function parseHunksForLineData(array $hunks) {
483 assert_instances_of($hunks, DifferentialHunk::class);
484
485 $old_lines = array();
486 $new_lines = array();
487 foreach ($hunks as $hunk) {
488 $lines = $hunk->getSplitLines();
489
490 $line_type_map = array();
491 $line_text = array();
492 foreach ($lines as $line_index => $line) {
493 if (isset($line[0])) {
494 $char = $line[0];
495 switch ($char) {
496 case ' ':
497 $line_type_map[$line_index] = null;
498 $line_text[$line_index] = substr($line, 1);
499 break;
500 case "\r":
501 case "\n":
502 // NOTE: Normally, the first character is a space, plus, minus or
503 // backslash, but it may be a newline if it used to be a space and
504 // trailing whitespace has been stripped via email transmission or
505 // some similar mechanism. In these cases, we essentially pretend
506 // the missing space is still there.
507 $line_type_map[$line_index] = null;
508 $line_text[$line_index] = $line;
509 break;
510 case '+':
511 case '-':
512 case '\\':
513 $line_type_map[$line_index] = $char;
514 $line_text[$line_index] = substr($line, 1);
515 break;
516 default:
517 throw new Exception(
518 pht(
519 'Unexpected leading character "%s" at line index %s!',
520 $char,
521 $line_index));
522 }
523 } else {
524 $line_type_map[$line_index] = null;
525 $line_text[$line_index] = '';
526 }
527 }
528
529 $old_line = $hunk->getOldOffset();
530 $new_line = $hunk->getNewOffset();
531
532 $num_lines = count($lines);
533 for ($cursor = 0; $cursor < $num_lines; $cursor++) {
534 $type = $line_type_map[$cursor];
535 $data = array(
536 'type' => $type,
537 'text' => $line_text[$cursor],
538 'line' => $new_line,
539 );
540 if ($type == '\\') {
541 $type = $line_type_map[$cursor - 1];
542 $data['text'] = ltrim($data['text']);
543 }
544 switch ($type) {
545 case '+':
546 $new_lines[] = $data;
547 ++$new_line;
548 break;
549 case '-':
550 $data['line'] = $old_line;
551 $old_lines[] = $data;
552 ++$old_line;
553 break;
554 default:
555 $new_lines[] = $data;
556 $data['line'] = $old_line;
557 $old_lines[] = $data;
558 ++$new_line;
559 ++$old_line;
560 break;
561 }
562 }
563 }
564
565 $this->setOldLines($old_lines);
566 $this->setNewLines($new_lines);
567
568 return $this;
569 }
570
571 /**
572 * @param array<DifferentialHunk> $changeset_hunks
573 * @param array<DifferentialHunk> $old_hunks
574 * @param array<DifferentialHunk> $new_hunks
575 */
576 public function parseHunksForHighlightMasks(
577 array $changeset_hunks,
578 array $old_hunks,
579 array $new_hunks) {
580 assert_instances_of($changeset_hunks, DifferentialHunk::class);
581 assert_instances_of($old_hunks, DifferentialHunk::class);
582 assert_instances_of($new_hunks, DifferentialHunk::class);
583
584 // Put changes side by side.
585 $olds = array();
586 $news = array();
587 $olds_cursor = -1;
588 $news_cursor = -1;
589 foreach ($changeset_hunks as $hunk) {
590 $n_old = $hunk->getOldOffset();
591 $n_new = $hunk->getNewOffset();
592 $changes = $hunk->getSplitLines();
593 foreach ($changes as $line) {
594 $diff_type = $line[0]; // Change type in diff of diffs.
595 $is_same = ($diff_type === ' ');
596 $is_add = ($diff_type === '+');
597 $is_rem = ($diff_type === '-');
598
599 $orig_type = $line[1]; // Change type in the original diff.
600
601 if ($is_same) {
602 // Use the same key for lines that are next to each other.
603 if ($olds_cursor > $news_cursor) {
604 $key = $olds_cursor + 1;
605 } else {
606 $key = $news_cursor + 1;
607 }
608 $olds[$key] = null;
609 $news[$key] = null;
610 $olds_cursor = $key;
611 $news_cursor = $key;
612 } else if ($is_rem) {
613 $olds[] = array($n_old, $orig_type);
614 $olds_cursor++;
615 } else if ($is_add) {
616 $news[] = array($n_new, $orig_type);
617 $news_cursor++;
618 } else {
619 throw new Exception(
620 pht(
621 'Found unknown intradiff source line, expected a line '.
622 'beginning with "+", "-", or " " (space): %s.',
623 $line));
624 }
625
626 // See T13539. Don't increment the line count if this line was removed,
627 // or if the line is a "No newline at end of file" marker.
628 $not_a_line = ($orig_type === '-' || $orig_type === '\\');
629 if ($not_a_line) {
630 continue;
631 }
632
633 if ($is_same || $is_rem) {
634 $n_old++;
635 }
636
637 if ($is_same || $is_add) {
638 $n_new++;
639 }
640 }
641 }
642
643 $offsets_old = $this->computeOffsets($old_hunks);
644 $offsets_new = $this->computeOffsets($new_hunks);
645
646 // Highlight lines that were added on each side or removed on the other
647 // side.
648 $highlight_old = array();
649 $highlight_new = array();
650 $last = max(last_key($olds), last_key($news));
651 for ($i = 0; $i <= $last; $i++) {
652 if (isset($olds[$i])) {
653 list($n, $type) = $olds[$i];
654 if ($type == '+' ||
655 ($type == ' ' && isset($news[$i]) && $news[$i][1] != ' ')) {
656 if (isset($offsets_old[$n])) {
657 $highlight_old[] = $offsets_old[$n];
658 }
659 }
660 }
661 if (isset($news[$i])) {
662 list($n, $type) = $news[$i];
663 if ($type == '+' ||
664 ($type == ' ' && isset($olds[$i]) && $olds[$i][1] != ' ')) {
665 if (isset($offsets_new[$n])) {
666 $highlight_new[] = $offsets_new[$n];
667 }
668 }
669 }
670 }
671
672 return array($highlight_old, $highlight_new);
673 }
674
675 /**
676 * @param array<DifferentialHunk> $hunks
677 * @param bool $is_new
678 * @param int $line_number
679 * @param int $line_length
680 * @param int $add_context
681 */
682 public function makeContextDiff(
683 array $hunks,
684 $is_new,
685 $line_number,
686 $line_length,
687 $add_context) {
688
689 assert_instances_of($hunks, DifferentialHunk::class);
690
691 $context = array();
692
693 if ($is_new) {
694 $prefix = '+';
695 } else {
696 $prefix = '-';
697 }
698
699 foreach ($hunks as $hunk) {
700 if ($is_new) {
701 $offset = $hunk->getNewOffset();
702 $length = $hunk->getNewLen();
703 } else {
704 $offset = $hunk->getOldOffset();
705 $length = $hunk->getOldLen();
706 }
707 $start = $line_number - $offset;
708 $end = $start + $line_length;
709 // We need to go in if $start == $length, because the last line
710 // might be a "\No newline at end of file" marker, which we want
711 // to show if the additional context is > 0.
712 if ($start <= $length && $end >= 0) {
713 $start = $start - $add_context;
714 $end = $end + $add_context;
715 $hunk_content = array();
716 $hunk_pos = array('-' => 0, '+' => 0);
717 $hunk_offset = array('-' => null, '+' => null);
718 $hunk_last = array('-' => null, '+' => null);
719 foreach (explode("\n", $hunk->getChanges()) as $line) {
720 $in_common = strncmp($line, ' ', 1) === 0;
721 $in_old = strncmp($line, '-', 1) === 0 || $in_common;
722 $in_new = strncmp($line, '+', 1) === 0 || $in_common;
723 $in_selected = strncmp($line, $prefix, 1) === 0;
724 $skip = !$in_selected && !$in_common;
725 if ($hunk_pos[$prefix] <= $end) {
726 if ($start <= $hunk_pos[$prefix]) {
727 if (!$skip || ($hunk_pos[$prefix] != $start &&
728 $hunk_pos[$prefix] != $end)) {
729 if ($in_old) {
730 if ($hunk_offset['-'] === null) {
731 $hunk_offset['-'] = $hunk_pos['-'];
732 }
733 $hunk_last['-'] = $hunk_pos['-'];
734 }
735 if ($in_new) {
736 if ($hunk_offset['+'] === null) {
737 $hunk_offset['+'] = $hunk_pos['+'];
738 }
739 $hunk_last['+'] = $hunk_pos['+'];
740 }
741
742 $hunk_content[] = $line;
743 }
744 }
745 if ($in_old) { ++$hunk_pos['-']; }
746 if ($in_new) { ++$hunk_pos['+']; }
747 }
748 }
749 if ($hunk_offset['-'] !== null || $hunk_offset['+'] !== null) {
750 $header = '@@';
751 if ($hunk_offset['-'] !== null) {
752 $header .= ' -'.($hunk->getOldOffset() + $hunk_offset['-']).
753 ','.($hunk_last['-'] - $hunk_offset['-'] + 1);
754 }
755 if ($hunk_offset['+'] !== null) {
756 $header .= ' +'.($hunk->getNewOffset() + $hunk_offset['+']).
757 ','.($hunk_last['+'] - $hunk_offset['+'] + 1);
758 }
759 $header .= ' @@';
760 $context[] = $header;
761 $context[] = implode("\n", $hunk_content);
762 }
763 }
764 }
765 return implode("\n", $context);
766 }
767
768 /**
769 * @param array<DifferentialHunk> $hunks
770 */
771 private function computeOffsets(array $hunks) {
772 assert_instances_of($hunks, DifferentialHunk::class);
773
774 $offsets = array();
775 $n = 1;
776 foreach ($hunks as $hunk) {
777 $new_length = $hunk->getNewLen();
778 $new_offset = $hunk->getNewOffset();
779
780 for ($i = 0; $i < $new_length; $i++) {
781 $offsets[$n] = $new_offset + $i;
782 $n++;
783 }
784 }
785
786 return $offsets;
787 }
788
789 private function getIndentDepth($text, $tab_width) {
790 $len = strlen($text);
791
792 $depth = 0;
793 for ($ii = 0; $ii < $len; $ii++) {
794 $c = $text[$ii];
795
796 // If this is a space, increase the indent depth by 1.
797 if ($c == ' ') {
798 $depth++;
799 continue;
800 }
801
802 // If this is a tab, increase the indent depth to the next tabstop.
803
804 // For example, if the tab width is 4, these sequences both lead us to
805 // a visual width of 8, i.e. the cursor will be in the 8th column:
806 //
807 // <tab><tab>
808 // <space><tab><space><space><space><tab>
809
810 if ($c == "\t") {
811 $depth = ($depth + $tab_width);
812 $depth = $depth - ($depth % $tab_width);
813 continue;
814 }
815
816 break;
817 }
818
819 return $depth;
820 }
821
822 private function getCharacterCountForVisualWhitespace(
823 $text,
824 $depth,
825 $tab_width) {
826
827 // Here, we know the visual indent depth of a line has been increased by
828 // some amount (for example, 6 characters).
829
830 // We want to find the largest whitespace prefix of the string we can
831 // which still fits into that amount of visual space.
832
833 // In most cases, this is very easy. For example, if the string has been
834 // indented by two characters and the string begins with two spaces, that's
835 // a perfect match.
836
837 // However, if the string has been indented by 7 characters, the tab width
838 // is 8, and the string begins with "<space><space><tab>", we can only
839 // mark the two spaces as an indent change. These cases are unusual.
840
841 $character_depth = 0;
842 $visual_depth = 0;
843
844 $len = strlen($text);
845 for ($ii = 0; $ii < $len; $ii++) {
846 if ($visual_depth >= $depth) {
847 break;
848 }
849
850 $c = $text[$ii];
851
852 if ($c == ' ') {
853 $character_depth++;
854 $visual_depth++;
855 continue;
856 }
857
858 if ($c == "\t") {
859 // Figure out how many visual spaces we have until the next tabstop.
860 $tab_visual = ($visual_depth + $tab_width);
861 $tab_visual = $tab_visual - ($tab_visual % $tab_width);
862 $tab_visual = ($tab_visual - $visual_depth);
863
864 // If this tab would take us over the limit, we're all done.
865 $remaining_depth = ($depth - $visual_depth);
866 if ($remaining_depth < $tab_visual) {
867 break;
868 }
869
870 $character_depth++;
871 $visual_depth += $tab_visual;
872 continue;
873 }
874
875 break;
876 }
877
878 return $character_depth;
879 }
880
881 private function updateChangeTypesForNormalization() {
882 if (!$this->getNormalized()) {
883 return;
884 }
885
886 // If we've parsed based on a normalized diff alignment, we may currently
887 // believe some lines are unchanged when they have actually changed. This
888 // happens when:
889 //
890 // - a line changes;
891 // - the change is a kind of change we normalize away when aligning the
892 // diff, like an indentation change;
893 // - we normalize the change away to align the diff; and so
894 // - the old and new copies of the line are now aligned in the new
895 // normalized diff.
896 //
897 // Then we end up with an alignment where the two lines that differ only
898 // in some some trivial way are aligned. This is great, and exactly what
899 // we're trying to accomplish by doing all this alignment stuff in the
900 // first place.
901 //
902 // However, in this case the correctly-aligned lines will be incorrectly
903 // marked as unchanged because the diff alorithm was fed normalized copies
904 // of the lines, and these copies truly weren't any different.
905 //
906 // When lines are aligned and marked identical, but they're not actually
907 // identical, we now mark them as changed. The rest of the processing will
908 // figure out how to render them appropritely.
909
910 $new = $this->getNewLines();
911 $old = $this->getOldLines();
912 foreach ($old as $key => $o) {
913 $n = $new[$key];
914
915 if (!$o || !$n) {
916 continue;
917 }
918
919 if ($o['type'] === null && $n['type'] === null) {
920 if ($o['text'] !== $n['text']) {
921 $old[$key]['type'] = '-';
922 $new[$key]['type'] = '+';
923 }
924 }
925 }
926
927 $this->setOldLines($old);
928 $this->setNewLines($new);
929 }
930
931
932}