just playing with tangled
1// Copyright 2020-2022 The Jujutsu Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::borrow::Borrow;
16use std::cmp::max;
17use std::io;
18use std::iter;
19use std::mem;
20use std::ops::Range;
21use std::path::Path;
22use std::path::PathBuf;
23
24use bstr::BStr;
25use bstr::BString;
26use futures::executor::block_on_stream;
27use futures::stream::BoxStream;
28use futures::StreamExt as _;
29use futures::TryStreamExt as _;
30use itertools::Itertools as _;
31use jj_lib::backend::BackendError;
32use jj_lib::backend::BackendResult;
33use jj_lib::backend::CommitId;
34use jj_lib::backend::CopyRecord;
35use jj_lib::backend::TreeValue;
36use jj_lib::commit::Commit;
37use jj_lib::config::ConfigGetError;
38use jj_lib::config::ConfigGetResultExt as _;
39use jj_lib::conflicts::materialize_merge_result_to_bytes;
40use jj_lib::conflicts::materialized_diff_stream;
41use jj_lib::conflicts::ConflictMarkerStyle;
42use jj_lib::conflicts::MaterializedFileValue;
43use jj_lib::conflicts::MaterializedTreeDiffEntry;
44use jj_lib::conflicts::MaterializedTreeValue;
45use jj_lib::copies::CopiesTreeDiffEntry;
46use jj_lib::copies::CopiesTreeDiffEntryPath;
47use jj_lib::copies::CopyOperation;
48use jj_lib::copies::CopyRecords;
49use jj_lib::diff::find_line_ranges;
50use jj_lib::diff::CompareBytesExactly;
51use jj_lib::diff::CompareBytesIgnoreAllWhitespace;
52use jj_lib::diff::CompareBytesIgnoreWhitespaceAmount;
53use jj_lib::diff::Diff;
54use jj_lib::diff::DiffHunk;
55use jj_lib::diff::DiffHunkKind;
56use jj_lib::files::DiffLineHunkSide;
57use jj_lib::files::DiffLineIterator;
58use jj_lib::files::DiffLineNumber;
59use jj_lib::matchers::Matcher;
60use jj_lib::merge::MergedTreeValue;
61use jj_lib::merged_tree::MergedTree;
62use jj_lib::object_id::ObjectId as _;
63use jj_lib::repo::Repo;
64use jj_lib::repo_path::InvalidRepoPathError;
65use jj_lib::repo_path::RepoPath;
66use jj_lib::repo_path::RepoPathUiConverter;
67use jj_lib::rewrite::rebase_to_dest_parent;
68use jj_lib::settings::UserSettings;
69use jj_lib::store::Store;
70use pollster::FutureExt as _;
71use thiserror::Error;
72use tracing::instrument;
73use unicode_width::UnicodeWidthStr as _;
74
75use crate::config::CommandNameAndArgs;
76use crate::formatter::Formatter;
77use crate::merge_tools;
78use crate::merge_tools::generate_diff;
79use crate::merge_tools::invoke_external_diff;
80use crate::merge_tools::new_utf8_temp_dir;
81use crate::merge_tools::DiffGenerateError;
82use crate::merge_tools::DiffToolMode;
83use crate::merge_tools::ExternalMergeTool;
84use crate::text_util;
85use crate::ui::Ui;
86
87#[derive(clap::Args, Clone, Debug)]
88#[command(next_help_heading = "Diff Formatting Options")]
89#[command(group(clap::ArgGroup::new("short-format").args(&["summary", "stat", "types", "name_only"])))]
90#[command(group(clap::ArgGroup::new("long-format").args(&["git", "color_words", "tool"])))]
91pub struct DiffFormatArgs {
92 /// For each path, show only whether it was modified, added, or deleted
93 #[arg(long, short)]
94 pub summary: bool,
95 /// Show a histogram of the changes
96 #[arg(long)]
97 pub stat: bool,
98 /// For each path, show only its type before and after
99 ///
100 /// The diff is shown as two letters. The first letter indicates the type
101 /// before and the second letter indicates the type after. '-' indicates
102 /// that the path was not present, 'F' represents a regular file, `L'
103 /// represents a symlink, 'C' represents a conflict, and 'G' represents a
104 /// Git submodule.
105 #[arg(long)]
106 pub types: bool,
107 /// For each path, show only its path
108 ///
109 /// Typically useful for shell commands like:
110 /// `jj diff -r @- --name-only | xargs perl -pi -e's/OLD/NEW/g`
111 #[arg(long)]
112 pub name_only: bool,
113 /// Show a Git-format diff
114 #[arg(long)]
115 pub git: bool,
116 /// Show a word-level diff with changes indicated only by color
117 #[arg(long)]
118 pub color_words: bool,
119 /// Generate diff by external command
120 #[arg(long)]
121 pub tool: Option<String>,
122 /// Number of lines of context to show
123 #[arg(long)]
124 context: Option<usize>,
125
126 // Short flags are set by command to avoid future conflicts.
127 /// Ignore whitespace when comparing lines.
128 #[arg(long)] // short = 'w'
129 ignore_all_space: bool,
130 /// Ignore changes in amount of whitespace when comparing lines.
131 #[arg(long, conflicts_with = "ignore_all_space")] // short = 'b'
132 ignore_space_change: bool,
133}
134
135#[derive(Clone, Debug, Eq, PartialEq)]
136pub enum DiffFormat {
137 // Non-trivial parameters are boxed in order to keep the variants small
138 Summary,
139 Stat(Box<DiffStatOptions>),
140 Types,
141 NameOnly,
142 Git(Box<UnifiedDiffOptions>),
143 ColorWords(Box<ColorWordsDiffOptions>),
144 Tool(Box<ExternalMergeTool>),
145}
146
147impl DiffFormat {
148 fn is_short(&self) -> bool {
149 match self {
150 DiffFormat::Summary
151 | DiffFormat::Stat(_)
152 | DiffFormat::Types
153 | DiffFormat::NameOnly => true,
154 DiffFormat::Git(_) | DiffFormat::ColorWords(_) | DiffFormat::Tool(_) => false,
155 }
156 }
157}
158
159/// Returns a list of requested diff formats, which will never be empty.
160pub fn diff_formats_for(
161 settings: &UserSettings,
162 args: &DiffFormatArgs,
163) -> Result<Vec<DiffFormat>, ConfigGetError> {
164 let formats = diff_formats_from_args(settings, args)?;
165 if formats.is_empty() {
166 Ok(vec![default_diff_format(settings, args)?])
167 } else {
168 Ok(formats)
169 }
170}
171
172/// Returns a list of requested diff formats for log-like commands, which may be
173/// empty.
174pub fn diff_formats_for_log(
175 settings: &UserSettings,
176 args: &DiffFormatArgs,
177 patch: bool,
178) -> Result<Vec<DiffFormat>, ConfigGetError> {
179 let mut formats = diff_formats_from_args(settings, args)?;
180 // --patch implies default if no "long" format is specified
181 if patch && formats.iter().all(DiffFormat::is_short) {
182 // TODO: maybe better to error out if the configured default isn't a
183 // "long" format?
184 formats.push(default_diff_format(settings, args)?);
185 formats.dedup();
186 }
187 Ok(formats)
188}
189
190fn diff_formats_from_args(
191 settings: &UserSettings,
192 args: &DiffFormatArgs,
193) -> Result<Vec<DiffFormat>, ConfigGetError> {
194 let mut formats = Vec::new();
195 // "short" format first:
196 if args.summary {
197 formats.push(DiffFormat::Summary);
198 }
199 if args.stat {
200 let mut options = DiffStatOptions::default();
201 options.merge_args(args);
202 formats.push(DiffFormat::Stat(Box::new(options)));
203 }
204 if args.types {
205 formats.push(DiffFormat::Types);
206 }
207 if args.name_only {
208 formats.push(DiffFormat::NameOnly);
209 }
210 // "long" format follows:
211 if args.git {
212 let mut options = UnifiedDiffOptions::from_settings(settings)?;
213 options.merge_args(args);
214 formats.push(DiffFormat::Git(Box::new(options)));
215 }
216 if args.color_words {
217 let mut options = ColorWordsDiffOptions::from_settings(settings)?;
218 options.merge_args(args);
219 formats.push(DiffFormat::ColorWords(Box::new(options)));
220 }
221 if let Some(name) = &args.tool {
222 let tool = merge_tools::get_external_tool_config(settings, name)?
223 .unwrap_or_else(|| ExternalMergeTool::with_program(name));
224 formats.push(DiffFormat::Tool(Box::new(tool)));
225 }
226 Ok(formats)
227}
228
229fn default_diff_format(
230 settings: &UserSettings,
231 args: &DiffFormatArgs,
232) -> Result<DiffFormat, ConfigGetError> {
233 if let Some(args) = settings.get("ui.diff.tool").optional()? {
234 // External "tool" overrides the internal "format" option.
235 let tool = if let CommandNameAndArgs::String(name) = &args {
236 merge_tools::get_external_tool_config(settings, name)?
237 } else {
238 None
239 }
240 .unwrap_or_else(|| ExternalMergeTool::with_diff_args(&args));
241 return Ok(DiffFormat::Tool(Box::new(tool)));
242 }
243 match settings.get_string("ui.diff.format")?.as_ref() {
244 "summary" => Ok(DiffFormat::Summary),
245 "stat" => {
246 let mut options = DiffStatOptions::default();
247 options.merge_args(args);
248 Ok(DiffFormat::Stat(Box::new(options)))
249 }
250 "types" => Ok(DiffFormat::Types),
251 "name-only" => Ok(DiffFormat::NameOnly),
252 "git" => {
253 let mut options = UnifiedDiffOptions::from_settings(settings)?;
254 options.merge_args(args);
255 Ok(DiffFormat::Git(Box::new(options)))
256 }
257 "color-words" => {
258 let mut options = ColorWordsDiffOptions::from_settings(settings)?;
259 options.merge_args(args);
260 Ok(DiffFormat::ColorWords(Box::new(options)))
261 }
262 name => Err(ConfigGetError::Type {
263 name: "ui.diff.format".to_owned(),
264 error: format!("Invalid diff format: {name}").into(),
265 source_path: None,
266 }),
267 }
268}
269
270#[derive(Debug, Error)]
271pub enum DiffRenderError {
272 #[error("Failed to generate diff")]
273 DiffGenerate(#[source] DiffGenerateError),
274 #[error(transparent)]
275 Backend(#[from] BackendError),
276 #[error("Access denied to {path}")]
277 AccessDenied {
278 path: String,
279 source: Box<dyn std::error::Error + Send + Sync>,
280 },
281 #[error(transparent)]
282 InvalidRepoPath(#[from] InvalidRepoPathError),
283 #[error(transparent)]
284 Io(#[from] io::Error),
285}
286
287/// Configuration and environment to render textual diff.
288pub struct DiffRenderer<'a> {
289 repo: &'a dyn Repo,
290 path_converter: &'a RepoPathUiConverter,
291 conflict_marker_style: ConflictMarkerStyle,
292 formats: Vec<DiffFormat>,
293}
294
295impl<'a> DiffRenderer<'a> {
296 pub fn new(
297 repo: &'a dyn Repo,
298 path_converter: &'a RepoPathUiConverter,
299 conflict_marker_style: ConflictMarkerStyle,
300 formats: Vec<DiffFormat>,
301 ) -> Self {
302 DiffRenderer {
303 repo,
304 path_converter,
305 conflict_marker_style,
306 formats,
307 }
308 }
309
310 /// Generates diff between `from_tree` and `to_tree`.
311 #[expect(clippy::too_many_arguments)]
312 pub fn show_diff(
313 &self,
314 ui: &Ui, // TODO: remove Ui dependency if possible
315 formatter: &mut dyn Formatter,
316 from_tree: &MergedTree,
317 to_tree: &MergedTree,
318 matcher: &dyn Matcher,
319 copy_records: &CopyRecords,
320 width: usize,
321 ) -> Result<(), DiffRenderError> {
322 formatter.with_label("diff", |formatter| {
323 self.show_diff_inner(
324 ui,
325 formatter,
326 from_tree,
327 to_tree,
328 matcher,
329 copy_records,
330 width,
331 )
332 })
333 }
334
335 #[expect(clippy::too_many_arguments)]
336 fn show_diff_inner(
337 &self,
338 ui: &Ui,
339 formatter: &mut dyn Formatter,
340 from_tree: &MergedTree,
341 to_tree: &MergedTree,
342 matcher: &dyn Matcher,
343 copy_records: &CopyRecords,
344 width: usize,
345 ) -> Result<(), DiffRenderError> {
346 let store = self.repo.store();
347 let path_converter = self.path_converter;
348 for format in &self.formats {
349 match format {
350 DiffFormat::Summary => {
351 let tree_diff =
352 from_tree.diff_stream_with_copies(to_tree, matcher, copy_records);
353 show_diff_summary(formatter, tree_diff, path_converter)?;
354 }
355 DiffFormat::Stat(options) => {
356 let tree_diff =
357 from_tree.diff_stream_with_copies(to_tree, matcher, copy_records);
358 let stats =
359 DiffStats::calculate(store, tree_diff, options, self.conflict_marker_style)
360 .block_on()?;
361 show_diff_stats(formatter, &stats, path_converter, width)?;
362 }
363 DiffFormat::Types => {
364 let tree_diff =
365 from_tree.diff_stream_with_copies(to_tree, matcher, copy_records);
366 show_types(formatter, tree_diff, path_converter)?;
367 }
368 DiffFormat::NameOnly => {
369 let tree_diff =
370 from_tree.diff_stream_with_copies(to_tree, matcher, copy_records);
371 show_names(formatter, tree_diff, path_converter)?;
372 }
373 DiffFormat::Git(options) => {
374 let tree_diff =
375 from_tree.diff_stream_with_copies(to_tree, matcher, copy_records);
376 show_git_diff(
377 formatter,
378 store,
379 tree_diff,
380 options,
381 self.conflict_marker_style,
382 )?;
383 }
384 DiffFormat::ColorWords(options) => {
385 let tree_diff =
386 from_tree.diff_stream_with_copies(to_tree, matcher, copy_records);
387 show_color_words_diff(
388 formatter,
389 store,
390 tree_diff,
391 path_converter,
392 options,
393 self.conflict_marker_style,
394 )?;
395 }
396 DiffFormat::Tool(tool) => {
397 match tool.diff_invocation_mode {
398 DiffToolMode::FileByFile => {
399 let tree_diff =
400 from_tree.diff_stream_with_copies(to_tree, matcher, copy_records);
401 show_file_by_file_diff(
402 ui,
403 formatter,
404 store,
405 tree_diff,
406 path_converter,
407 tool,
408 self.conflict_marker_style,
409 )
410 }
411 DiffToolMode::Dir => {
412 let mut writer = formatter.raw()?;
413 generate_diff(
414 ui,
415 writer.as_mut(),
416 from_tree,
417 to_tree,
418 matcher,
419 tool,
420 self.conflict_marker_style,
421 )
422 .map_err(DiffRenderError::DiffGenerate)
423 }
424 }?;
425 }
426 }
427 }
428 Ok(())
429 }
430
431 /// Generates diff between `from_commits` and `to_commit` based off their
432 /// parents. The `from_commits` will temporarily be rebased onto the
433 /// `to_commit` parents to exclude unrelated changes.
434 pub fn show_inter_diff(
435 &self,
436 ui: &Ui,
437 formatter: &mut dyn Formatter,
438 from_commits: &[Commit],
439 to_commit: &Commit,
440 matcher: &dyn Matcher,
441 width: usize,
442 ) -> Result<(), DiffRenderError> {
443 let from_tree = rebase_to_dest_parent(self.repo, from_commits, to_commit)?;
444 let to_tree = to_commit.tree()?;
445 let copy_records = CopyRecords::default(); // TODO
446 self.show_diff(
447 ui,
448 formatter,
449 &from_tree,
450 &to_tree,
451 matcher,
452 ©_records,
453 width,
454 )
455 }
456
457 /// Generates diff of the given `commit` compared to its parents.
458 pub fn show_patch(
459 &self,
460 ui: &Ui,
461 formatter: &mut dyn Formatter,
462 commit: &Commit,
463 matcher: &dyn Matcher,
464 width: usize,
465 ) -> Result<(), DiffRenderError> {
466 let from_tree = commit.parent_tree(self.repo)?;
467 let to_tree = commit.tree()?;
468 let mut copy_records = CopyRecords::default();
469 for parent_id in commit.parent_ids() {
470 let records = get_copy_records(self.repo.store(), parent_id, commit.id(), matcher)?;
471 copy_records.add_records(records)?;
472 }
473 self.show_diff(
474 ui,
475 formatter,
476 &from_tree,
477 &to_tree,
478 matcher,
479 ©_records,
480 width,
481 )
482 }
483}
484
485pub fn get_copy_records<'a>(
486 store: &'a Store,
487 root: &CommitId,
488 head: &CommitId,
489 matcher: &'a dyn Matcher,
490) -> BackendResult<impl Iterator<Item = BackendResult<CopyRecord>> + use<'a>> {
491 // TODO: teach backend about matching path prefixes?
492 let stream = store.get_copy_records(None, root, head)?;
493 // TODO: test record.source as well? should be AND-ed or OR-ed?
494 Ok(block_on_stream(stream).filter_ok(|record| matcher.matches(&record.target)))
495}
496
497#[derive(Clone, Debug, Default, Eq, PartialEq)]
498pub struct LineDiffOptions {
499 /// How equivalence of lines is tested.
500 pub compare_mode: LineCompareMode,
501 // TODO: add --ignore-blank-lines, etc. which aren't mutually exclusive.
502}
503
504impl LineDiffOptions {
505 fn merge_args(&mut self, args: &DiffFormatArgs) {
506 self.compare_mode = if args.ignore_all_space {
507 LineCompareMode::IgnoreAllSpace
508 } else if args.ignore_space_change {
509 LineCompareMode::IgnoreSpaceChange
510 } else {
511 LineCompareMode::Exact
512 };
513 }
514}
515
516#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
517pub enum LineCompareMode {
518 /// Compares lines literally.
519 #[default]
520 Exact,
521 /// Compares lines ignoring any whitespace occurrences.
522 IgnoreAllSpace,
523 /// Compares lines ignoring changes in whitespace amount.
524 IgnoreSpaceChange,
525}
526
527fn diff_by_line<'input, T: AsRef<[u8]> + ?Sized + 'input>(
528 inputs: impl IntoIterator<Item = &'input T>,
529 options: &LineDiffOptions,
530) -> Diff<'input> {
531 // TODO: If we add --ignore-blank-lines, its tokenizer will have to attach
532 // blank lines to the preceding range. Maybe it can also be implemented as a
533 // post-process (similar to refine_changed_regions()) that expands unchanged
534 // regions across blank lines.
535 match options.compare_mode {
536 LineCompareMode::Exact => {
537 Diff::for_tokenizer(inputs, find_line_ranges, CompareBytesExactly)
538 }
539 LineCompareMode::IgnoreAllSpace => {
540 Diff::for_tokenizer(inputs, find_line_ranges, CompareBytesIgnoreAllWhitespace)
541 }
542 LineCompareMode::IgnoreSpaceChange => {
543 Diff::for_tokenizer(inputs, find_line_ranges, CompareBytesIgnoreWhitespaceAmount)
544 }
545 }
546}
547
548#[derive(Clone, Debug, Eq, PartialEq)]
549pub struct ColorWordsDiffOptions {
550 /// Number of context lines to show.
551 pub context: usize,
552 /// How lines are tokenized and compared.
553 pub line_diff: LineDiffOptions,
554 /// Maximum number of removed/added word alternation to inline.
555 pub max_inline_alternation: Option<usize>,
556}
557
558impl ColorWordsDiffOptions {
559 pub fn from_settings(settings: &UserSettings) -> Result<Self, ConfigGetError> {
560 let max_inline_alternation = {
561 let name = "diff.color-words.max-inline-alternation";
562 match settings.get_int(name)? {
563 -1 => None, // unlimited
564 n => Some(usize::try_from(n).map_err(|err| ConfigGetError::Type {
565 name: name.to_owned(),
566 error: err.into(),
567 source_path: None,
568 })?),
569 }
570 };
571 Ok(ColorWordsDiffOptions {
572 context: settings.get("diff.color-words.context")?,
573 line_diff: LineDiffOptions::default(),
574 max_inline_alternation,
575 })
576 }
577
578 fn merge_args(&mut self, args: &DiffFormatArgs) {
579 if let Some(context) = args.context {
580 self.context = context;
581 }
582 self.line_diff.merge_args(args);
583 }
584}
585
586fn show_color_words_diff_hunks(
587 formatter: &mut dyn Formatter,
588 contents: [&BStr; 2],
589 options: &ColorWordsDiffOptions,
590) -> io::Result<()> {
591 let line_diff = diff_by_line(contents, &options.line_diff);
592 let mut line_number = DiffLineNumber { left: 1, right: 1 };
593 // Matching entries shouldn't appear consecutively in diff of two inputs.
594 // However, if the inputs have conflicts, there may be a hunk that can be
595 // resolved, resulting [matching, resolved, matching] sequence.
596 let mut contexts = Vec::new();
597 let mut emitted = false;
598
599 for hunk in line_diff.hunks() {
600 let hunk_contents: [&BStr; 2] = hunk.contents[..].try_into().unwrap();
601 match hunk.kind {
602 DiffHunkKind::Matching => contexts.push(hunk_contents),
603 DiffHunkKind::Different => {
604 let num_after = if emitted { options.context } else { 0 };
605 line_number = show_color_words_context_lines(
606 formatter,
607 &contexts,
608 line_number,
609 options,
610 num_after,
611 options.context,
612 )?;
613 contexts.clear();
614 emitted = true;
615 line_number =
616 show_color_words_diff_lines(formatter, hunk_contents, line_number, options)?;
617 }
618 }
619 }
620
621 if emitted {
622 show_color_words_context_lines(
623 formatter,
624 &contexts,
625 line_number,
626 options,
627 options.context,
628 0,
629 )?;
630 }
631 Ok(())
632}
633
634/// Prints `num_after` lines, ellipsis, and `num_before` lines.
635fn show_color_words_context_lines(
636 formatter: &mut dyn Formatter,
637 contexts: &[[&BStr; 2]],
638 mut line_number: DiffLineNumber,
639 options: &ColorWordsDiffOptions,
640 num_after: usize,
641 num_before: usize,
642) -> io::Result<DiffLineNumber> {
643 const SKIPPED_CONTEXT_LINE: &str = " ...\n";
644 let extract = |side: usize| -> (Vec<&[u8]>, Vec<&[u8]>, u32) {
645 let mut lines = contexts
646 .iter()
647 .flat_map(|contents| contents[side].split_inclusive(|b| *b == b'\n'))
648 .fuse();
649 let after_lines = lines.by_ref().take(num_after).collect();
650 let before_lines = lines.by_ref().rev().take(num_before + 1).collect();
651 let num_skipped: u32 = lines.count().try_into().unwrap();
652 (after_lines, before_lines, num_skipped)
653 };
654 let show = |formatter: &mut dyn Formatter,
655 [left_lines, right_lines]: [&[&[u8]]; 2],
656 mut line_number: DiffLineNumber| {
657 if left_lines == right_lines {
658 for line in left_lines {
659 show_color_words_line_number(
660 formatter,
661 [Some(line_number.left), Some(line_number.right)],
662 )?;
663 show_color_words_inline_hunks(
664 formatter,
665 &[(DiffLineHunkSide::Both, line.as_ref())],
666 )?;
667 line_number.left += 1;
668 line_number.right += 1;
669 }
670 Ok(line_number)
671 } else {
672 let left = left_lines.concat();
673 let right = right_lines.concat();
674 show_color_words_diff_lines(
675 formatter,
676 [&left, &right].map(BStr::new),
677 line_number,
678 options,
679 )
680 }
681 };
682
683 let (left_after, mut left_before, num_left_skipped) = extract(0);
684 let (right_after, mut right_before, num_right_skipped) = extract(1);
685 line_number = show(formatter, [&left_after, &right_after], line_number)?;
686 if num_left_skipped > 0 || num_right_skipped > 0 {
687 write!(formatter, "{SKIPPED_CONTEXT_LINE}")?;
688 line_number.left += num_left_skipped;
689 line_number.right += num_right_skipped;
690 if left_before.len() > num_before {
691 left_before.pop();
692 line_number.left += 1;
693 }
694 if right_before.len() > num_before {
695 right_before.pop();
696 line_number.right += 1;
697 }
698 }
699 left_before.reverse();
700 right_before.reverse();
701 line_number = show(formatter, [&left_before, &right_before], line_number)?;
702 Ok(line_number)
703}
704
705fn show_color_words_diff_lines(
706 formatter: &mut dyn Formatter,
707 contents: [&BStr; 2],
708 mut line_number: DiffLineNumber,
709 options: &ColorWordsDiffOptions,
710) -> io::Result<DiffLineNumber> {
711 let word_diff_hunks = Diff::by_word(contents).hunks().collect_vec();
712 let can_inline = match options.max_inline_alternation {
713 None => true, // unlimited
714 Some(0) => false, // no need to count alternation
715 Some(max_num) => {
716 let groups = split_diff_hunks_by_matching_newline(&word_diff_hunks);
717 groups.map(count_diff_alternation).max().unwrap_or(0) <= max_num
718 }
719 };
720 if can_inline {
721 let mut diff_line_iter =
722 DiffLineIterator::with_line_number(word_diff_hunks.iter(), line_number);
723 for diff_line in diff_line_iter.by_ref() {
724 show_color_words_line_number(
725 formatter,
726 [
727 diff_line
728 .has_left_content()
729 .then_some(diff_line.line_number.left),
730 diff_line
731 .has_right_content()
732 .then_some(diff_line.line_number.right),
733 ],
734 )?;
735 show_color_words_inline_hunks(formatter, &diff_line.hunks)?;
736 }
737 line_number = diff_line_iter.next_line_number();
738 } else {
739 let [left_lines, right_lines] = unzip_diff_hunks_to_lines(&word_diff_hunks);
740 for tokens in &left_lines {
741 show_color_words_line_number(formatter, [Some(line_number.left), None])?;
742 show_color_words_single_sided_line(formatter, tokens, "removed")?;
743 line_number.left += 1;
744 }
745 for tokens in &right_lines {
746 show_color_words_line_number(formatter, [None, Some(line_number.right)])?;
747 show_color_words_single_sided_line(formatter, tokens, "added")?;
748 line_number.right += 1;
749 }
750 }
751 Ok(line_number)
752}
753
754fn show_color_words_line_number(
755 formatter: &mut dyn Formatter,
756 [left_line_number, right_line_number]: [Option<u32>; 2],
757) -> io::Result<()> {
758 if let Some(line_number) = left_line_number {
759 formatter.with_label("removed", |formatter| {
760 write!(formatter.labeled("line_number"), "{line_number:>4}")
761 })?;
762 write!(formatter, " ")?;
763 } else {
764 write!(formatter, " ")?;
765 }
766 if let Some(line_number) = right_line_number {
767 formatter.with_label("added", |formatter| {
768 write!(formatter.labeled("line_number"), "{line_number:>4}",)
769 })?;
770 write!(formatter, ": ")?;
771 } else {
772 write!(formatter, " : ")?;
773 }
774 Ok(())
775}
776
777/// Prints line hunks which may contain tokens originating from both sides.
778fn show_color_words_inline_hunks(
779 formatter: &mut dyn Formatter,
780 line_hunks: &[(DiffLineHunkSide, &BStr)],
781) -> io::Result<()> {
782 for (side, data) in line_hunks {
783 let label = match side {
784 DiffLineHunkSide::Both => None,
785 DiffLineHunkSide::Left => Some("removed"),
786 DiffLineHunkSide::Right => Some("added"),
787 };
788 if let Some(label) = label {
789 formatter.with_label(label, |formatter| {
790 formatter.with_label("token", |formatter| formatter.write_all(data))
791 })?;
792 } else {
793 formatter.write_all(data)?;
794 }
795 }
796 let (_, data) = line_hunks.last().expect("diff line must not be empty");
797 if !data.ends_with(b"\n") {
798 writeln!(formatter)?;
799 };
800 Ok(())
801}
802
803/// Prints left/right-only line tokens with the given label.
804fn show_color_words_single_sided_line(
805 formatter: &mut dyn Formatter,
806 tokens: &[(DiffTokenType, &[u8])],
807 label: &str,
808) -> io::Result<()> {
809 formatter.with_label(label, |formatter| show_diff_line_tokens(formatter, tokens))?;
810 let (_, data) = tokens.last().expect("diff line must not be empty");
811 if !data.ends_with(b"\n") {
812 writeln!(formatter)?;
813 };
814 Ok(())
815}
816
817/// Counts number of diff-side alternation, ignoring matching hunks.
818///
819/// This function is meant to measure visual complexity of diff hunks. It's easy
820/// to read hunks containing some removed or added words, but is getting harder
821/// as more removes and adds interleaved.
822///
823/// For example,
824/// - `[matching]` -> 0
825/// - `[left]` -> 1
826/// - `[left, matching, left]` -> 1
827/// - `[matching, left, right, matching, right]` -> 2
828/// - `[left, right, matching, right, left]` -> 3
829fn count_diff_alternation(diff_hunks: &[DiffHunk]) -> usize {
830 diff_hunks
831 .iter()
832 .filter_map(|hunk| match hunk.kind {
833 DiffHunkKind::Matching => None,
834 DiffHunkKind::Different => Some(&hunk.contents),
835 })
836 // Map non-empty diff side to index (0: left, 1: right)
837 .flat_map(|contents| contents.iter().positions(|content| !content.is_empty()))
838 // Omit e.g. left->(matching->)*left
839 .dedup()
840 .count()
841}
842
843/// Splits hunks into slices of contiguous changed lines.
844fn split_diff_hunks_by_matching_newline<'a, 'b>(
845 diff_hunks: &'a [DiffHunk<'b>],
846) -> impl Iterator<Item = &'a [DiffHunk<'b>]> {
847 diff_hunks.split_inclusive(|hunk| match hunk.kind {
848 DiffHunkKind::Matching => hunk.contents.iter().all(|content| content.contains(&b'\n')),
849 DiffHunkKind::Different => false,
850 })
851}
852
853struct FileContent {
854 /// false if this file is likely text; true if it is likely binary.
855 is_binary: bool,
856 contents: BString,
857}
858
859impl FileContent {
860 fn empty() -> Self {
861 Self {
862 is_binary: false,
863 contents: BString::default(),
864 }
865 }
866
867 pub(crate) fn is_empty(&self) -> bool {
868 self.contents.is_empty()
869 }
870}
871
872fn file_content_for_diff(
873 path: &RepoPath,
874 file: &mut MaterializedFileValue,
875) -> BackendResult<FileContent> {
876 // If this is a binary file, don't show the full contents.
877 // Determine whether it's binary by whether the first 8k bytes contain a null
878 // character; this is the same heuristic used by git as of writing: https://github.com/git/git/blob/eea0e59ffbed6e33d171ace5be13cde9faa41639/xdiff-interface.c#L192-L198
879 const PEEK_SIZE: usize = 8000;
880 // TODO: currently we look at the whole file, even though for binary files we
881 // only need to know the file size. To change that we'd have to extend all
882 // the data backends to support getting the length.
883 let contents = BString::new(file.read_all(path)?);
884 let start = &contents[..PEEK_SIZE.min(contents.len())];
885 Ok(FileContent {
886 is_binary: start.contains(&b'\0'),
887 contents,
888 })
889}
890
891fn diff_content(
892 path: &RepoPath,
893 value: MaterializedTreeValue,
894 conflict_marker_style: ConflictMarkerStyle,
895) -> BackendResult<FileContent> {
896 match value {
897 MaterializedTreeValue::Absent => Ok(FileContent::empty()),
898 MaterializedTreeValue::AccessDenied(err) => Ok(FileContent {
899 is_binary: false,
900 contents: format!("Access denied: {err}").into(),
901 }),
902 MaterializedTreeValue::File(mut file) => file_content_for_diff(path, &mut file),
903 MaterializedTreeValue::Symlink { id: _, target } => Ok(FileContent {
904 // Unix file paths can't contain null bytes.
905 is_binary: false,
906 contents: target.into(),
907 }),
908 MaterializedTreeValue::GitSubmodule(id) => Ok(FileContent {
909 is_binary: false,
910 contents: format!("Git submodule checked out at {id}").into(),
911 }),
912 // TODO: are we sure this is never binary?
913 MaterializedTreeValue::FileConflict {
914 id: _,
915 contents,
916 executable: _,
917 } => Ok(FileContent {
918 is_binary: false,
919 contents: materialize_merge_result_to_bytes(&contents, conflict_marker_style),
920 }),
921 MaterializedTreeValue::OtherConflict { id } => Ok(FileContent {
922 is_binary: false,
923 contents: id.describe().into(),
924 }),
925 MaterializedTreeValue::Tree(id) => {
926 panic!("Unexpected tree with id {id:?} in diff at path {path:?}");
927 }
928 }
929}
930
931fn basic_diff_file_type(value: &MaterializedTreeValue) -> &'static str {
932 match value {
933 MaterializedTreeValue::Absent => {
934 panic!("absent path in diff");
935 }
936 MaterializedTreeValue::AccessDenied(_) => "access denied",
937 MaterializedTreeValue::File(file) => {
938 if file.executable {
939 "executable file"
940 } else {
941 "regular file"
942 }
943 }
944 MaterializedTreeValue::Symlink { .. } => "symlink",
945 MaterializedTreeValue::Tree(_) => "tree",
946 MaterializedTreeValue::GitSubmodule(_) => "Git submodule",
947 MaterializedTreeValue::FileConflict { .. }
948 | MaterializedTreeValue::OtherConflict { .. } => "conflict",
949 }
950}
951
952pub fn show_color_words_diff(
953 formatter: &mut dyn Formatter,
954 store: &Store,
955 tree_diff: BoxStream<CopiesTreeDiffEntry>,
956 path_converter: &RepoPathUiConverter,
957 options: &ColorWordsDiffOptions,
958 conflict_marker_style: ConflictMarkerStyle,
959) -> Result<(), DiffRenderError> {
960 let mut diff_stream = materialized_diff_stream(store, tree_diff);
961 async {
962 while let Some(MaterializedTreeDiffEntry { path, values }) = diff_stream.next().await {
963 let left_path = path.source();
964 let right_path = path.target();
965 let left_ui_path = path_converter.format_file_path(left_path);
966 let right_ui_path = path_converter.format_file_path(right_path);
967 let (left_value, right_value) = values?;
968
969 match (&left_value, &right_value) {
970 (MaterializedTreeValue::AccessDenied(source), _) => {
971 write!(
972 formatter.labeled("access-denied"),
973 "Access denied to {left_ui_path}:"
974 )?;
975 writeln!(formatter, " {source}")?;
976 continue;
977 }
978 (_, MaterializedTreeValue::AccessDenied(source)) => {
979 write!(
980 formatter.labeled("access-denied"),
981 "Access denied to {right_ui_path}:"
982 )?;
983 writeln!(formatter, " {source}")?;
984 continue;
985 }
986 _ => {}
987 }
988 if left_value.is_absent() {
989 let description = basic_diff_file_type(&right_value);
990 writeln!(
991 formatter.labeled("header"),
992 "Added {description} {right_ui_path}:"
993 )?;
994 let right_content = diff_content(right_path, right_value, conflict_marker_style)?;
995 if right_content.is_empty() {
996 writeln!(formatter.labeled("empty"), " (empty)")?;
997 } else if right_content.is_binary {
998 writeln!(formatter.labeled("binary"), " (binary)")?;
999 } else {
1000 show_color_words_diff_hunks(
1001 formatter,
1002 [BStr::new(""), right_content.contents.as_ref()],
1003 options,
1004 )?;
1005 }
1006 } else if right_value.is_present() {
1007 let description = match (&left_value, &right_value) {
1008 (MaterializedTreeValue::File(left), MaterializedTreeValue::File(right)) => {
1009 if left.executable && right.executable {
1010 "Modified executable file".to_string()
1011 } else if left.executable {
1012 "Executable file became non-executable at".to_string()
1013 } else if right.executable {
1014 "Non-executable file became executable at".to_string()
1015 } else {
1016 "Modified regular file".to_string()
1017 }
1018 }
1019 (
1020 MaterializedTreeValue::FileConflict { .. }
1021 | MaterializedTreeValue::OtherConflict { .. },
1022 MaterializedTreeValue::FileConflict { .. }
1023 | MaterializedTreeValue::OtherConflict { .. },
1024 ) => "Modified conflict in".to_string(),
1025 (
1026 MaterializedTreeValue::FileConflict { .. }
1027 | MaterializedTreeValue::OtherConflict { .. },
1028 _,
1029 ) => "Resolved conflict in".to_string(),
1030 (
1031 _,
1032 MaterializedTreeValue::FileConflict { .. }
1033 | MaterializedTreeValue::OtherConflict { .. },
1034 ) => "Created conflict in".to_string(),
1035 (
1036 MaterializedTreeValue::Symlink { .. },
1037 MaterializedTreeValue::Symlink { .. },
1038 ) => "Symlink target changed at".to_string(),
1039 (_, _) => {
1040 let left_type = basic_diff_file_type(&left_value);
1041 let right_type = basic_diff_file_type(&right_value);
1042 let (first, rest) = left_type.split_at(1);
1043 format!(
1044 "{}{} became {} at",
1045 first.to_ascii_uppercase(),
1046 rest,
1047 right_type
1048 )
1049 }
1050 };
1051 let left_content = diff_content(left_path, left_value, conflict_marker_style)?;
1052 let right_content = diff_content(right_path, right_value, conflict_marker_style)?;
1053 if left_path == right_path {
1054 writeln!(
1055 formatter.labeled("header"),
1056 "{description} {right_ui_path}:"
1057 )?;
1058 } else {
1059 writeln!(
1060 formatter.labeled("header"),
1061 "{description} {right_ui_path} ({left_ui_path} => {right_ui_path}):"
1062 )?;
1063 }
1064 if left_content.is_binary || right_content.is_binary {
1065 writeln!(formatter.labeled("binary"), " (binary)")?;
1066 } else {
1067 show_color_words_diff_hunks(
1068 formatter,
1069 [&left_content.contents, &right_content.contents].map(BStr::new),
1070 options,
1071 )?;
1072 }
1073 } else {
1074 let description = basic_diff_file_type(&left_value);
1075 writeln!(
1076 formatter.labeled("header"),
1077 "Removed {description} {right_ui_path}:"
1078 )?;
1079 let left_content = diff_content(left_path, left_value, conflict_marker_style)?;
1080 if left_content.is_empty() {
1081 writeln!(formatter.labeled("empty"), " (empty)")?;
1082 } else if left_content.is_binary {
1083 writeln!(formatter.labeled("binary"), " (binary)")?;
1084 } else {
1085 show_color_words_diff_hunks(
1086 formatter,
1087 [left_content.contents.as_ref(), BStr::new("")],
1088 options,
1089 )?;
1090 }
1091 }
1092 }
1093 Ok(())
1094 }
1095 .block_on()
1096}
1097
1098pub fn show_file_by_file_diff(
1099 ui: &Ui,
1100 formatter: &mut dyn Formatter,
1101 store: &Store,
1102 tree_diff: BoxStream<CopiesTreeDiffEntry>,
1103 path_converter: &RepoPathUiConverter,
1104 tool: &ExternalMergeTool,
1105 conflict_marker_style: ConflictMarkerStyle,
1106) -> Result<(), DiffRenderError> {
1107 let create_file = |path: &RepoPath,
1108 wc_dir: &Path,
1109 value: MaterializedTreeValue|
1110 -> Result<PathBuf, DiffRenderError> {
1111 let fs_path = path.to_fs_path(wc_dir)?;
1112 std::fs::create_dir_all(fs_path.parent().unwrap())?;
1113 let content = diff_content(path, value, conflict_marker_style)?;
1114 std::fs::write(&fs_path, content.contents)?;
1115 Ok(fs_path)
1116 };
1117
1118 let temp_dir = new_utf8_temp_dir("jj-diff-")?;
1119 let left_wc_dir = temp_dir.path().join("left");
1120 let right_wc_dir = temp_dir.path().join("right");
1121 let mut diff_stream = materialized_diff_stream(store, tree_diff);
1122 async {
1123 while let Some(MaterializedTreeDiffEntry { path, values }) = diff_stream.next().await {
1124 let (left_value, right_value) = values?;
1125 let left_path = path.source();
1126 let right_path = path.target();
1127 let left_ui_path = path_converter.format_file_path(left_path);
1128 let right_ui_path = path_converter.format_file_path(right_path);
1129
1130 match (&left_value, &right_value) {
1131 (_, MaterializedTreeValue::AccessDenied(source)) => {
1132 write!(
1133 formatter.labeled("access-denied"),
1134 "Access denied to {right_ui_path}:"
1135 )?;
1136 writeln!(formatter, " {source}")?;
1137 continue;
1138 }
1139 (MaterializedTreeValue::AccessDenied(source), _) => {
1140 write!(
1141 formatter.labeled("access-denied"),
1142 "Access denied to {left_ui_path}:"
1143 )?;
1144 writeln!(formatter, " {source}")?;
1145 continue;
1146 }
1147 _ => {}
1148 }
1149 let left_path = create_file(left_path, &left_wc_dir, left_value)?;
1150 let right_path = create_file(right_path, &right_wc_dir, right_value)?;
1151
1152 let mut writer = formatter.raw()?;
1153 invoke_external_diff(
1154 ui,
1155 writer.as_mut(),
1156 tool,
1157 &maplit::hashmap! {
1158 "left" => left_path.to_str().expect("temp_dir should be valid utf-8"),
1159 "right" => right_path.to_str().expect("temp_dir should be valid utf-8"),
1160 },
1161 )
1162 .map_err(DiffRenderError::DiffGenerate)?;
1163 }
1164 Ok::<(), DiffRenderError>(())
1165 }
1166 .block_on()
1167}
1168
1169struct GitDiffPart {
1170 /// Octal mode string or `None` if the file is absent.
1171 mode: Option<&'static str>,
1172 hash: String,
1173 content: FileContent,
1174}
1175
1176fn git_diff_part(
1177 path: &RepoPath,
1178 value: MaterializedTreeValue,
1179 conflict_marker_style: ConflictMarkerStyle,
1180) -> Result<GitDiffPart, DiffRenderError> {
1181 const DUMMY_HASH: &str = "0000000000";
1182 let mode;
1183 let mut hash;
1184 let content;
1185 match value {
1186 MaterializedTreeValue::Absent => {
1187 return Ok(GitDiffPart {
1188 mode: None,
1189 hash: DUMMY_HASH.to_owned(),
1190 content: FileContent::empty(),
1191 });
1192 }
1193 MaterializedTreeValue::AccessDenied(err) => {
1194 return Err(DiffRenderError::AccessDenied {
1195 path: path.as_internal_file_string().to_owned(),
1196 source: err,
1197 });
1198 }
1199 MaterializedTreeValue::File(mut file) => {
1200 mode = if file.executable { "100755" } else { "100644" };
1201 hash = file.id.hex();
1202 content = file_content_for_diff(path, &mut file)?;
1203 }
1204 MaterializedTreeValue::Symlink { id, target } => {
1205 mode = "120000";
1206 hash = id.hex();
1207 content = FileContent {
1208 // Unix file paths can't contain null bytes.
1209 is_binary: false,
1210 contents: target.into(),
1211 };
1212 }
1213 MaterializedTreeValue::GitSubmodule(id) => {
1214 // TODO: What should we actually do here?
1215 mode = "040000";
1216 hash = id.hex();
1217 content = FileContent::empty();
1218 }
1219 MaterializedTreeValue::FileConflict {
1220 id: _,
1221 contents,
1222 executable,
1223 } => {
1224 mode = if executable { "100755" } else { "100644" };
1225 hash = DUMMY_HASH.to_owned();
1226 content = FileContent {
1227 is_binary: false, // TODO: are we sure this is never binary?
1228 contents: materialize_merge_result_to_bytes(&contents, conflict_marker_style),
1229 };
1230 }
1231 MaterializedTreeValue::OtherConflict { id } => {
1232 mode = "100644";
1233 hash = DUMMY_HASH.to_owned();
1234 content = FileContent {
1235 is_binary: false,
1236 contents: id.describe().into(),
1237 };
1238 }
1239 MaterializedTreeValue::Tree(_) => {
1240 panic!("Unexpected tree in diff at path {path:?}");
1241 }
1242 }
1243 hash.truncate(10);
1244 Ok(GitDiffPart {
1245 mode: Some(mode),
1246 hash,
1247 content,
1248 })
1249}
1250
1251#[derive(Clone, Debug, Eq, PartialEq)]
1252pub struct UnifiedDiffOptions {
1253 /// Number of context lines to show.
1254 pub context: usize,
1255 /// How lines are tokenized and compared.
1256 pub line_diff: LineDiffOptions,
1257}
1258
1259impl UnifiedDiffOptions {
1260 pub fn from_settings(settings: &UserSettings) -> Result<Self, ConfigGetError> {
1261 Ok(UnifiedDiffOptions {
1262 context: settings.get("diff.git.context")?,
1263 line_diff: LineDiffOptions::default(),
1264 })
1265 }
1266
1267 fn merge_args(&mut self, args: &DiffFormatArgs) {
1268 if let Some(context) = args.context {
1269 self.context = context;
1270 }
1271 self.line_diff.merge_args(args);
1272 }
1273}
1274
1275#[derive(Clone, Copy, Debug, Eq, PartialEq)]
1276enum DiffLineType {
1277 Context,
1278 Removed,
1279 Added,
1280}
1281
1282#[derive(Clone, Copy, Debug, Eq, PartialEq)]
1283enum DiffTokenType {
1284 Matching,
1285 Different,
1286}
1287
1288type DiffTokenVec<'content> = Vec<(DiffTokenType, &'content [u8])>;
1289
1290struct UnifiedDiffHunk<'content> {
1291 left_line_range: Range<usize>,
1292 right_line_range: Range<usize>,
1293 lines: Vec<(DiffLineType, DiffTokenVec<'content>)>,
1294}
1295
1296impl<'content> UnifiedDiffHunk<'content> {
1297 fn extend_context_lines(&mut self, lines: impl IntoIterator<Item = &'content [u8]>) {
1298 let old_len = self.lines.len();
1299 self.lines.extend(lines.into_iter().map(|line| {
1300 let tokens = vec![(DiffTokenType::Matching, line)];
1301 (DiffLineType::Context, tokens)
1302 }));
1303 self.left_line_range.end += self.lines.len() - old_len;
1304 self.right_line_range.end += self.lines.len() - old_len;
1305 }
1306
1307 fn extend_removed_lines(&mut self, lines: impl IntoIterator<Item = DiffTokenVec<'content>>) {
1308 let old_len = self.lines.len();
1309 self.lines
1310 .extend(lines.into_iter().map(|line| (DiffLineType::Removed, line)));
1311 self.left_line_range.end += self.lines.len() - old_len;
1312 }
1313
1314 fn extend_added_lines(&mut self, lines: impl IntoIterator<Item = DiffTokenVec<'content>>) {
1315 let old_len = self.lines.len();
1316 self.lines
1317 .extend(lines.into_iter().map(|line| (DiffLineType::Added, line)));
1318 self.right_line_range.end += self.lines.len() - old_len;
1319 }
1320}
1321
1322fn unified_diff_hunks<'content>(
1323 contents: [&'content BStr; 2],
1324 options: &UnifiedDiffOptions,
1325) -> Vec<UnifiedDiffHunk<'content>> {
1326 let mut hunks = vec![];
1327 let mut current_hunk = UnifiedDiffHunk {
1328 left_line_range: 0..0,
1329 right_line_range: 0..0,
1330 lines: vec![],
1331 };
1332 let diff = diff_by_line(contents, &options.line_diff);
1333 let mut diff_hunks = diff.hunks().peekable();
1334 while let Some(hunk) = diff_hunks.next() {
1335 match hunk.kind {
1336 DiffHunkKind::Matching => {
1337 // Just use the right (i.e. new) content. We could count the
1338 // number of skipped lines separately, but the number of the
1339 // context lines should match the displayed content.
1340 let [_, right] = hunk.contents[..].try_into().unwrap();
1341 let mut lines = right.split_inclusive(|b| *b == b'\n').fuse();
1342 if !current_hunk.lines.is_empty() {
1343 // The previous hunk line should be either removed/added.
1344 current_hunk.extend_context_lines(lines.by_ref().take(options.context));
1345 }
1346 let before_lines = if diff_hunks.peek().is_some() {
1347 lines.by_ref().rev().take(options.context).collect()
1348 } else {
1349 vec![] // No more hunks
1350 };
1351 let num_skip_lines = lines.count();
1352 if num_skip_lines > 0 {
1353 let left_start = current_hunk.left_line_range.end + num_skip_lines;
1354 let right_start = current_hunk.right_line_range.end + num_skip_lines;
1355 if !current_hunk.lines.is_empty() {
1356 hunks.push(current_hunk);
1357 }
1358 current_hunk = UnifiedDiffHunk {
1359 left_line_range: left_start..left_start,
1360 right_line_range: right_start..right_start,
1361 lines: vec![],
1362 };
1363 }
1364 // The next hunk should be of DiffHunk::Different type if any.
1365 current_hunk.extend_context_lines(before_lines.into_iter().rev());
1366 }
1367 DiffHunkKind::Different => {
1368 let [left_lines, right_lines] =
1369 unzip_diff_hunks_to_lines(Diff::by_word(hunk.contents).hunks());
1370 current_hunk.extend_removed_lines(left_lines);
1371 current_hunk.extend_added_lines(right_lines);
1372 }
1373 }
1374 }
1375 if !current_hunk.lines.is_empty() {
1376 hunks.push(current_hunk);
1377 }
1378 hunks
1379}
1380
1381/// Splits `[left, right]` hunk pairs into `[left_lines, right_lines]`.
1382fn unzip_diff_hunks_to_lines<'content, I>(diff_hunks: I) -> [Vec<DiffTokenVec<'content>>; 2]
1383where
1384 I: IntoIterator,
1385 I::Item: Borrow<DiffHunk<'content>>,
1386{
1387 let mut left_lines: Vec<DiffTokenVec<'content>> = vec![];
1388 let mut right_lines: Vec<DiffTokenVec<'content>> = vec![];
1389 let mut left_tokens: DiffTokenVec<'content> = vec![];
1390 let mut right_tokens: DiffTokenVec<'content> = vec![];
1391
1392 for hunk in diff_hunks {
1393 let hunk = hunk.borrow();
1394 match hunk.kind {
1395 DiffHunkKind::Matching => {
1396 // TODO: add support for unmatched contexts
1397 debug_assert!(hunk.contents.iter().all_equal());
1398 for token in hunk.contents[0].split_inclusive(|b| *b == b'\n') {
1399 left_tokens.push((DiffTokenType::Matching, token));
1400 right_tokens.push((DiffTokenType::Matching, token));
1401 if token.ends_with(b"\n") {
1402 left_lines.push(mem::take(&mut left_tokens));
1403 right_lines.push(mem::take(&mut right_tokens));
1404 }
1405 }
1406 }
1407 DiffHunkKind::Different => {
1408 let [left, right] = hunk.contents[..]
1409 .try_into()
1410 .expect("hunk should have exactly two inputs");
1411 for token in left.split_inclusive(|b| *b == b'\n') {
1412 left_tokens.push((DiffTokenType::Different, token));
1413 if token.ends_with(b"\n") {
1414 left_lines.push(mem::take(&mut left_tokens));
1415 }
1416 }
1417 for token in right.split_inclusive(|b| *b == b'\n') {
1418 right_tokens.push((DiffTokenType::Different, token));
1419 if token.ends_with(b"\n") {
1420 right_lines.push(mem::take(&mut right_tokens));
1421 }
1422 }
1423 }
1424 }
1425 }
1426
1427 if !left_tokens.is_empty() {
1428 left_lines.push(left_tokens);
1429 }
1430 if !right_tokens.is_empty() {
1431 right_lines.push(right_tokens);
1432 }
1433 [left_lines, right_lines]
1434}
1435
1436fn show_unified_diff_hunks(
1437 formatter: &mut dyn Formatter,
1438 contents: [&BStr; 2],
1439 options: &UnifiedDiffOptions,
1440) -> io::Result<()> {
1441 // "If the chunk size is 0, the first number is one lower than one would
1442 // expect." - https://www.artima.com/weblogs/viewpost.jsp?thread=164293
1443 //
1444 // The POSIX spec also states that "the ending line number of an empty range
1445 // shall be the number of the preceding line, or 0 if the range is at the
1446 // start of the file."
1447 // - https://pubs.opengroup.org/onlinepubs/9799919799/utilities/diff.html
1448 fn to_line_number(range: Range<usize>) -> usize {
1449 if range.is_empty() {
1450 range.start
1451 } else {
1452 range.start + 1
1453 }
1454 }
1455
1456 for hunk in unified_diff_hunks(contents, options) {
1457 writeln!(
1458 formatter.labeled("hunk_header"),
1459 "@@ -{},{} +{},{} @@",
1460 to_line_number(hunk.left_line_range.clone()),
1461 hunk.left_line_range.len(),
1462 to_line_number(hunk.right_line_range.clone()),
1463 hunk.right_line_range.len()
1464 )?;
1465 for (line_type, tokens) in &hunk.lines {
1466 let (label, sigil) = match line_type {
1467 DiffLineType::Context => ("context", " "),
1468 DiffLineType::Removed => ("removed", "-"),
1469 DiffLineType::Added => ("added", "+"),
1470 };
1471 formatter.with_label(label, |formatter| {
1472 write!(formatter, "{sigil}")?;
1473 show_diff_line_tokens(formatter, tokens)
1474 })?;
1475 let (_, content) = tokens.last().expect("hunk line must not be empty");
1476 if !content.ends_with(b"\n") {
1477 write!(formatter, "\n\\ No newline at end of file\n")?;
1478 }
1479 }
1480 }
1481 Ok(())
1482}
1483
1484fn show_diff_line_tokens(
1485 formatter: &mut dyn Formatter,
1486 tokens: &[(DiffTokenType, &[u8])],
1487) -> io::Result<()> {
1488 for (token_type, content) in tokens {
1489 match token_type {
1490 DiffTokenType::Matching => formatter.write_all(content)?,
1491 DiffTokenType::Different => {
1492 formatter.with_label("token", |formatter| formatter.write_all(content))?;
1493 }
1494 }
1495 }
1496 Ok(())
1497}
1498
1499pub fn show_git_diff(
1500 formatter: &mut dyn Formatter,
1501 store: &Store,
1502 tree_diff: BoxStream<CopiesTreeDiffEntry>,
1503 options: &UnifiedDiffOptions,
1504 conflict_marker_style: ConflictMarkerStyle,
1505) -> Result<(), DiffRenderError> {
1506 let mut diff_stream = materialized_diff_stream(store, tree_diff);
1507 async {
1508 while let Some(MaterializedTreeDiffEntry { path, values }) = diff_stream.next().await {
1509 let left_path = path.source();
1510 let right_path = path.target();
1511 let left_path_string = left_path.as_internal_file_string();
1512 let right_path_string = right_path.as_internal_file_string();
1513 let (left_value, right_value) = values?;
1514
1515 let left_part = git_diff_part(left_path, left_value, conflict_marker_style)?;
1516 let right_part = git_diff_part(right_path, right_value, conflict_marker_style)?;
1517
1518 formatter.with_label("file_header", |formatter| {
1519 writeln!(
1520 formatter,
1521 "diff --git a/{left_path_string} b/{right_path_string}"
1522 )?;
1523 let left_hash = &left_part.hash;
1524 let right_hash = &right_part.hash;
1525 match (left_part.mode, right_part.mode) {
1526 (None, Some(right_mode)) => {
1527 writeln!(formatter, "new file mode {right_mode}")?;
1528 writeln!(formatter, "index {left_hash}..{right_hash}")?;
1529 }
1530 (Some(left_mode), None) => {
1531 writeln!(formatter, "deleted file mode {left_mode}")?;
1532 writeln!(formatter, "index {left_hash}..{right_hash}")?;
1533 }
1534 (Some(left_mode), Some(right_mode)) => {
1535 if let Some(op) = path.copy_operation() {
1536 let operation = match op {
1537 CopyOperation::Copy => "copy",
1538 CopyOperation::Rename => "rename",
1539 };
1540 // TODO: include similarity index?
1541 writeln!(formatter, "{operation} from {left_path_string}")?;
1542 writeln!(formatter, "{operation} to {right_path_string}")?;
1543 }
1544 if left_mode != right_mode {
1545 writeln!(formatter, "old mode {left_mode}")?;
1546 writeln!(formatter, "new mode {right_mode}")?;
1547 if left_hash != right_hash {
1548 writeln!(formatter, "index {left_hash}..{right_hash}")?;
1549 }
1550 } else if left_hash != right_hash {
1551 writeln!(formatter, "index {left_hash}..{right_hash} {left_mode}")?;
1552 }
1553 }
1554 (None, None) => panic!("either left or right part should be present"),
1555 }
1556 Ok::<(), DiffRenderError>(())
1557 })?;
1558
1559 if left_part.content.contents == right_part.content.contents {
1560 continue; // no content hunks
1561 }
1562
1563 let left_path = match left_part.mode {
1564 Some(_) => format!("a/{left_path_string}"),
1565 None => "/dev/null".to_owned(),
1566 };
1567 let right_path = match right_part.mode {
1568 Some(_) => format!("b/{right_path_string}"),
1569 None => "/dev/null".to_owned(),
1570 };
1571 if left_part.content.is_binary || right_part.content.is_binary {
1572 // TODO: add option to emit Git binary diff
1573 writeln!(
1574 formatter,
1575 "Binary files {left_path} and {right_path} differ"
1576 )?;
1577 } else {
1578 formatter.with_label("file_header", |formatter| {
1579 writeln!(formatter, "--- {left_path}")?;
1580 writeln!(formatter, "+++ {right_path}")?;
1581 io::Result::Ok(())
1582 })?;
1583 show_unified_diff_hunks(
1584 formatter,
1585 [&left_part.content.contents, &right_part.content.contents].map(BStr::new),
1586 options,
1587 )?;
1588 }
1589 }
1590 Ok(())
1591 }
1592 .block_on()
1593}
1594
1595#[instrument(skip_all)]
1596pub fn show_diff_summary(
1597 formatter: &mut dyn Formatter,
1598 mut tree_diff: BoxStream<CopiesTreeDiffEntry>,
1599 path_converter: &RepoPathUiConverter,
1600) -> Result<(), DiffRenderError> {
1601 async {
1602 while let Some(CopiesTreeDiffEntry { path, values }) = tree_diff.next().await {
1603 let (before, after) = values?;
1604 let (label, sigil) = diff_status_label_and_char(&path, &before, &after);
1605 let path = if path.copy_operation().is_some() {
1606 path_converter.format_copied_path(path.source(), path.target())
1607 } else {
1608 path_converter.format_file_path(path.target())
1609 };
1610 writeln!(formatter.labeled(label), "{sigil} {path}")?;
1611 }
1612 Ok(())
1613 }
1614 .block_on()
1615}
1616
1617pub fn diff_status_label_and_char(
1618 path: &CopiesTreeDiffEntryPath,
1619 before: &MergedTreeValue,
1620 after: &MergedTreeValue,
1621) -> (&'static str, char) {
1622 if let Some(op) = path.copy_operation() {
1623 match op {
1624 CopyOperation::Copy => ("copied", 'C'),
1625 CopyOperation::Rename => ("renamed", 'R'),
1626 }
1627 } else {
1628 match (before.is_present(), after.is_present()) {
1629 (true, true) => ("modified", 'M'),
1630 (false, true) => ("added", 'A'),
1631 (true, false) => ("removed", 'D'),
1632 (false, false) => panic!("values pair must differ"),
1633 }
1634 }
1635}
1636
1637#[derive(Clone, Debug, Default, Eq, PartialEq)]
1638pub struct DiffStatOptions {
1639 /// How lines are tokenized and compared.
1640 pub line_diff: LineDiffOptions,
1641}
1642
1643impl DiffStatOptions {
1644 fn merge_args(&mut self, args: &DiffFormatArgs) {
1645 self.line_diff.merge_args(args);
1646 }
1647}
1648
1649#[derive(Clone, Debug)]
1650pub struct DiffStats {
1651 entries: Vec<DiffStatEntry>,
1652}
1653
1654impl DiffStats {
1655 /// Calculates stats of changed lines per file.
1656 pub async fn calculate(
1657 store: &Store,
1658 tree_diff: BoxStream<'_, CopiesTreeDiffEntry>,
1659 options: &DiffStatOptions,
1660 conflict_marker_style: ConflictMarkerStyle,
1661 ) -> BackendResult<Self> {
1662 let entries = materialized_diff_stream(store, tree_diff)
1663 .map(|MaterializedTreeDiffEntry { path, values }| {
1664 let (left, right) = values?;
1665 let left_content = diff_content(path.source(), left, conflict_marker_style)?;
1666 let right_content = diff_content(path.target(), right, conflict_marker_style)?;
1667 let stat = get_diff_stat_entry(
1668 path,
1669 [&left_content.contents, &right_content.contents].map(BStr::new),
1670 options,
1671 );
1672 BackendResult::Ok(stat)
1673 })
1674 .try_collect()
1675 .await?;
1676 Ok(DiffStats { entries })
1677 }
1678
1679 /// List of stats per file.
1680 pub fn entries(&self) -> &[DiffStatEntry] {
1681 &self.entries
1682 }
1683
1684 /// Total number of insertions.
1685 pub fn count_total_added(&self) -> usize {
1686 self.entries.iter().map(|stat| stat.added).sum()
1687 }
1688
1689 /// Total number of deletions.
1690 pub fn count_total_removed(&self) -> usize {
1691 self.entries.iter().map(|stat| stat.removed).sum()
1692 }
1693}
1694
1695#[derive(Clone, Debug)]
1696pub struct DiffStatEntry {
1697 pub path: CopiesTreeDiffEntryPath,
1698 pub added: usize,
1699 pub removed: usize,
1700}
1701
1702fn get_diff_stat_entry(
1703 path: CopiesTreeDiffEntryPath,
1704 contents: [&BStr; 2],
1705 options: &DiffStatOptions,
1706) -> DiffStatEntry {
1707 // TODO: this matches git's behavior, which is to count the number of newlines
1708 // in the file. but that behavior seems unhelpful; no one really cares how
1709 // many `0x0a` characters are in an image.
1710 let diff = diff_by_line(contents, &options.line_diff);
1711 let mut added = 0;
1712 let mut removed = 0;
1713 for hunk in diff.hunks() {
1714 match hunk.kind {
1715 DiffHunkKind::Matching => {}
1716 DiffHunkKind::Different => {
1717 let [left, right] = hunk.contents[..].try_into().unwrap();
1718 removed += left.split_inclusive(|b| *b == b'\n').count();
1719 added += right.split_inclusive(|b| *b == b'\n').count();
1720 }
1721 }
1722 }
1723 DiffStatEntry {
1724 path,
1725 added,
1726 removed,
1727 }
1728}
1729
1730pub fn show_diff_stats(
1731 formatter: &mut dyn Formatter,
1732 stats: &DiffStats,
1733 path_converter: &RepoPathUiConverter,
1734 display_width: usize,
1735) -> io::Result<()> {
1736 let ui_paths = stats
1737 .entries()
1738 .iter()
1739 .map(|stat| {
1740 if stat.path.copy_operation().is_some() {
1741 path_converter.format_copied_path(stat.path.source(), stat.path.target())
1742 } else {
1743 path_converter.format_file_path(stat.path.target())
1744 }
1745 })
1746 .collect_vec();
1747 let max_path_width = ui_paths.iter().map(|s| s.width()).max().unwrap_or(0);
1748 let max_diffs = stats
1749 .entries()
1750 .iter()
1751 .map(|stat| stat.added + stat.removed)
1752 .max()
1753 .unwrap_or(0);
1754
1755 let number_padding = max_diffs.to_string().len();
1756 // 4 characters padding for the graph
1757 let available_width = display_width.saturating_sub(4 + " | ".len() + number_padding);
1758 // Always give at least a tiny bit of room
1759 let available_width = max(available_width, 5);
1760 let max_path_width = max_path_width.clamp(3, (0.7 * available_width as f64) as usize);
1761 let max_bar_length = available_width.saturating_sub(max_path_width);
1762 let factor = if max_diffs < max_bar_length {
1763 1.0
1764 } else {
1765 max_bar_length as f64 / max_diffs as f64
1766 };
1767
1768 for (stat, ui_path) in iter::zip(stats.entries(), &ui_paths) {
1769 let bar_added = (stat.added as f64 * factor).ceil() as usize;
1770 let bar_removed = (stat.removed as f64 * factor).ceil() as usize;
1771 // replace start of path with ellipsis if the path is too long
1772 let (path, path_width) = text_util::elide_start(ui_path, "...", max_path_width);
1773 let path_pad_width = max_path_width - path_width;
1774 write!(
1775 formatter,
1776 "{path}{:path_pad_width$} | {:>number_padding$}{}",
1777 "", // pad to max_path_width
1778 stat.added + stat.removed,
1779 if bar_added + bar_removed > 0 { " " } else { "" },
1780 )?;
1781 write!(formatter.labeled("added"), "{}", "+".repeat(bar_added))?;
1782 writeln!(formatter.labeled("removed"), "{}", "-".repeat(bar_removed))?;
1783 }
1784
1785 let total_added = stats.count_total_added();
1786 let total_removed = stats.count_total_removed();
1787 let total_files = stats.entries().len();
1788 writeln!(
1789 formatter.labeled("stat-summary"),
1790 "{} file{} changed, {} insertion{}(+), {} deletion{}(-)",
1791 total_files,
1792 if total_files == 1 { "" } else { "s" },
1793 total_added,
1794 if total_added == 1 { "" } else { "s" },
1795 total_removed,
1796 if total_removed == 1 { "" } else { "s" },
1797 )?;
1798 Ok(())
1799}
1800
1801pub fn show_types(
1802 formatter: &mut dyn Formatter,
1803 mut tree_diff: BoxStream<CopiesTreeDiffEntry>,
1804 path_converter: &RepoPathUiConverter,
1805) -> Result<(), DiffRenderError> {
1806 async {
1807 while let Some(CopiesTreeDiffEntry { path, values }) = tree_diff.next().await {
1808 let (before, after) = values?;
1809 writeln!(
1810 formatter.labeled("modified"),
1811 "{}{} {}",
1812 diff_summary_char(&before),
1813 diff_summary_char(&after),
1814 path_converter.format_copied_path(path.source(), path.target())
1815 )?;
1816 }
1817 Ok(())
1818 }
1819 .block_on()
1820}
1821
1822fn diff_summary_char(value: &MergedTreeValue) -> char {
1823 match value.as_resolved() {
1824 Some(None) => '-',
1825 Some(Some(TreeValue::File { .. })) => 'F',
1826 Some(Some(TreeValue::Symlink(_))) => 'L',
1827 Some(Some(TreeValue::GitSubmodule(_))) => 'G',
1828 None => 'C',
1829 Some(Some(TreeValue::Tree(_))) | Some(Some(TreeValue::Conflict(_))) => {
1830 panic!("Unexpected {value:?} in diff")
1831 }
1832 }
1833}
1834
1835pub fn show_names(
1836 formatter: &mut dyn Formatter,
1837 mut tree_diff: BoxStream<CopiesTreeDiffEntry>,
1838 path_converter: &RepoPathUiConverter,
1839) -> io::Result<()> {
1840 async {
1841 while let Some(CopiesTreeDiffEntry { path, .. }) = tree_diff.next().await {
1842 writeln!(
1843 formatter,
1844 "{}",
1845 path_converter.format_file_path(path.target())
1846 )?;
1847 }
1848 Ok(())
1849 }
1850 .block_on()
1851}