just playing with tangled
at gvimdiff 68 kB view raw
1// Copyright 2020-2022 The Jujutsu Authors 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// https://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15use std::borrow::Borrow; 16use std::cmp::max; 17use std::io; 18use std::iter; 19use std::mem; 20use std::ops::Range; 21use std::path::Path; 22use std::path::PathBuf; 23 24use bstr::BStr; 25use bstr::BString; 26use futures::executor::block_on_stream; 27use futures::stream::BoxStream; 28use futures::StreamExt as _; 29use futures::TryStreamExt as _; 30use itertools::Itertools as _; 31use jj_lib::backend::BackendError; 32use jj_lib::backend::BackendResult; 33use jj_lib::backend::CommitId; 34use jj_lib::backend::CopyRecord; 35use jj_lib::backend::TreeValue; 36use jj_lib::commit::Commit; 37use jj_lib::config::ConfigGetError; 38use jj_lib::config::ConfigGetResultExt as _; 39use jj_lib::conflicts::materialize_merge_result_to_bytes; 40use jj_lib::conflicts::materialized_diff_stream; 41use jj_lib::conflicts::ConflictMarkerStyle; 42use jj_lib::conflicts::MaterializedFileValue; 43use jj_lib::conflicts::MaterializedTreeDiffEntry; 44use jj_lib::conflicts::MaterializedTreeValue; 45use jj_lib::copies::CopiesTreeDiffEntry; 46use jj_lib::copies::CopiesTreeDiffEntryPath; 47use jj_lib::copies::CopyOperation; 48use jj_lib::copies::CopyRecords; 49use jj_lib::diff::find_line_ranges; 50use jj_lib::diff::CompareBytesExactly; 51use jj_lib::diff::CompareBytesIgnoreAllWhitespace; 52use jj_lib::diff::CompareBytesIgnoreWhitespaceAmount; 53use jj_lib::diff::Diff; 54use jj_lib::diff::DiffHunk; 55use jj_lib::diff::DiffHunkKind; 56use jj_lib::files::DiffLineHunkSide; 57use jj_lib::files::DiffLineIterator; 58use jj_lib::files::DiffLineNumber; 59use jj_lib::matchers::Matcher; 60use jj_lib::merge::MergedTreeValue; 61use jj_lib::merged_tree::MergedTree; 62use jj_lib::object_id::ObjectId as _; 63use jj_lib::repo::Repo; 64use jj_lib::repo_path::InvalidRepoPathError; 65use jj_lib::repo_path::RepoPath; 66use jj_lib::repo_path::RepoPathUiConverter; 67use jj_lib::rewrite::rebase_to_dest_parent; 68use jj_lib::settings::UserSettings; 69use jj_lib::store::Store; 70use pollster::FutureExt as _; 71use thiserror::Error; 72use tracing::instrument; 73use unicode_width::UnicodeWidthStr as _; 74 75use crate::config::CommandNameAndArgs; 76use crate::formatter::Formatter; 77use crate::merge_tools; 78use crate::merge_tools::generate_diff; 79use crate::merge_tools::invoke_external_diff; 80use crate::merge_tools::new_utf8_temp_dir; 81use crate::merge_tools::DiffGenerateError; 82use crate::merge_tools::DiffToolMode; 83use crate::merge_tools::ExternalMergeTool; 84use crate::text_util; 85use crate::ui::Ui; 86 87#[derive(clap::Args, Clone, Debug)] 88#[command(next_help_heading = "Diff Formatting Options")] 89#[command(group(clap::ArgGroup::new("short-format").args(&["summary", "stat", "types", "name_only"])))] 90#[command(group(clap::ArgGroup::new("long-format").args(&["git", "color_words", "tool"])))] 91pub struct DiffFormatArgs { 92 /// For each path, show only whether it was modified, added, or deleted 93 #[arg(long, short)] 94 pub summary: bool, 95 /// Show a histogram of the changes 96 #[arg(long)] 97 pub stat: bool, 98 /// For each path, show only its type before and after 99 /// 100 /// The diff is shown as two letters. The first letter indicates the type 101 /// before and the second letter indicates the type after. '-' indicates 102 /// that the path was not present, 'F' represents a regular file, `L' 103 /// represents a symlink, 'C' represents a conflict, and 'G' represents a 104 /// Git submodule. 105 #[arg(long)] 106 pub types: bool, 107 /// For each path, show only its path 108 /// 109 /// Typically useful for shell commands like: 110 /// `jj diff -r @- --name-only | xargs perl -pi -e's/OLD/NEW/g` 111 #[arg(long)] 112 pub name_only: bool, 113 /// Show a Git-format diff 114 #[arg(long)] 115 pub git: bool, 116 /// Show a word-level diff with changes indicated only by color 117 #[arg(long)] 118 pub color_words: bool, 119 /// Generate diff by external command 120 #[arg(long)] 121 pub tool: Option<String>, 122 /// Number of lines of context to show 123 #[arg(long)] 124 context: Option<usize>, 125 126 // Short flags are set by command to avoid future conflicts. 127 /// Ignore whitespace when comparing lines. 128 #[arg(long)] // short = 'w' 129 ignore_all_space: bool, 130 /// Ignore changes in amount of whitespace when comparing lines. 131 #[arg(long, conflicts_with = "ignore_all_space")] // short = 'b' 132 ignore_space_change: bool, 133} 134 135#[derive(Clone, Debug, Eq, PartialEq)] 136pub enum DiffFormat { 137 // Non-trivial parameters are boxed in order to keep the variants small 138 Summary, 139 Stat(Box<DiffStatOptions>), 140 Types, 141 NameOnly, 142 Git(Box<UnifiedDiffOptions>), 143 ColorWords(Box<ColorWordsDiffOptions>), 144 Tool(Box<ExternalMergeTool>), 145} 146 147impl DiffFormat { 148 fn is_short(&self) -> bool { 149 match self { 150 DiffFormat::Summary 151 | DiffFormat::Stat(_) 152 | DiffFormat::Types 153 | DiffFormat::NameOnly => true, 154 DiffFormat::Git(_) | DiffFormat::ColorWords(_) | DiffFormat::Tool(_) => false, 155 } 156 } 157} 158 159/// Returns a list of requested diff formats, which will never be empty. 160pub fn diff_formats_for( 161 settings: &UserSettings, 162 args: &DiffFormatArgs, 163) -> Result<Vec<DiffFormat>, ConfigGetError> { 164 let formats = diff_formats_from_args(settings, args)?; 165 if formats.is_empty() { 166 Ok(vec![default_diff_format(settings, args)?]) 167 } else { 168 Ok(formats) 169 } 170} 171 172/// Returns a list of requested diff formats for log-like commands, which may be 173/// empty. 174pub fn diff_formats_for_log( 175 settings: &UserSettings, 176 args: &DiffFormatArgs, 177 patch: bool, 178) -> Result<Vec<DiffFormat>, ConfigGetError> { 179 let mut formats = diff_formats_from_args(settings, args)?; 180 // --patch implies default if no "long" format is specified 181 if patch && formats.iter().all(DiffFormat::is_short) { 182 // TODO: maybe better to error out if the configured default isn't a 183 // "long" format? 184 formats.push(default_diff_format(settings, args)?); 185 formats.dedup(); 186 } 187 Ok(formats) 188} 189 190fn diff_formats_from_args( 191 settings: &UserSettings, 192 args: &DiffFormatArgs, 193) -> Result<Vec<DiffFormat>, ConfigGetError> { 194 let mut formats = Vec::new(); 195 // "short" format first: 196 if args.summary { 197 formats.push(DiffFormat::Summary); 198 } 199 if args.stat { 200 let mut options = DiffStatOptions::default(); 201 options.merge_args(args); 202 formats.push(DiffFormat::Stat(Box::new(options))); 203 } 204 if args.types { 205 formats.push(DiffFormat::Types); 206 } 207 if args.name_only { 208 formats.push(DiffFormat::NameOnly); 209 } 210 // "long" format follows: 211 if args.git { 212 let mut options = UnifiedDiffOptions::from_settings(settings)?; 213 options.merge_args(args); 214 formats.push(DiffFormat::Git(Box::new(options))); 215 } 216 if args.color_words { 217 let mut options = ColorWordsDiffOptions::from_settings(settings)?; 218 options.merge_args(args); 219 formats.push(DiffFormat::ColorWords(Box::new(options))); 220 } 221 if let Some(name) = &args.tool { 222 let tool = merge_tools::get_external_tool_config(settings, name)? 223 .unwrap_or_else(|| ExternalMergeTool::with_program(name)); 224 formats.push(DiffFormat::Tool(Box::new(tool))); 225 } 226 Ok(formats) 227} 228 229fn default_diff_format( 230 settings: &UserSettings, 231 args: &DiffFormatArgs, 232) -> Result<DiffFormat, ConfigGetError> { 233 if let Some(args) = settings.get("ui.diff.tool").optional()? { 234 // External "tool" overrides the internal "format" option. 235 let tool = if let CommandNameAndArgs::String(name) = &args { 236 merge_tools::get_external_tool_config(settings, name)? 237 } else { 238 None 239 } 240 .unwrap_or_else(|| ExternalMergeTool::with_diff_args(&args)); 241 return Ok(DiffFormat::Tool(Box::new(tool))); 242 } 243 match settings.get_string("ui.diff.format")?.as_ref() { 244 "summary" => Ok(DiffFormat::Summary), 245 "stat" => { 246 let mut options = DiffStatOptions::default(); 247 options.merge_args(args); 248 Ok(DiffFormat::Stat(Box::new(options))) 249 } 250 "types" => Ok(DiffFormat::Types), 251 "name-only" => Ok(DiffFormat::NameOnly), 252 "git" => { 253 let mut options = UnifiedDiffOptions::from_settings(settings)?; 254 options.merge_args(args); 255 Ok(DiffFormat::Git(Box::new(options))) 256 } 257 "color-words" => { 258 let mut options = ColorWordsDiffOptions::from_settings(settings)?; 259 options.merge_args(args); 260 Ok(DiffFormat::ColorWords(Box::new(options))) 261 } 262 name => Err(ConfigGetError::Type { 263 name: "ui.diff.format".to_owned(), 264 error: format!("Invalid diff format: {name}").into(), 265 source_path: None, 266 }), 267 } 268} 269 270#[derive(Debug, Error)] 271pub enum DiffRenderError { 272 #[error("Failed to generate diff")] 273 DiffGenerate(#[source] DiffGenerateError), 274 #[error(transparent)] 275 Backend(#[from] BackendError), 276 #[error("Access denied to {path}")] 277 AccessDenied { 278 path: String, 279 source: Box<dyn std::error::Error + Send + Sync>, 280 }, 281 #[error(transparent)] 282 InvalidRepoPath(#[from] InvalidRepoPathError), 283 #[error(transparent)] 284 Io(#[from] io::Error), 285} 286 287/// Configuration and environment to render textual diff. 288pub struct DiffRenderer<'a> { 289 repo: &'a dyn Repo, 290 path_converter: &'a RepoPathUiConverter, 291 conflict_marker_style: ConflictMarkerStyle, 292 formats: Vec<DiffFormat>, 293} 294 295impl<'a> DiffRenderer<'a> { 296 pub fn new( 297 repo: &'a dyn Repo, 298 path_converter: &'a RepoPathUiConverter, 299 conflict_marker_style: ConflictMarkerStyle, 300 formats: Vec<DiffFormat>, 301 ) -> Self { 302 DiffRenderer { 303 repo, 304 path_converter, 305 conflict_marker_style, 306 formats, 307 } 308 } 309 310 /// Generates diff between `from_tree` and `to_tree`. 311 #[expect(clippy::too_many_arguments)] 312 pub fn show_diff( 313 &self, 314 ui: &Ui, // TODO: remove Ui dependency if possible 315 formatter: &mut dyn Formatter, 316 from_tree: &MergedTree, 317 to_tree: &MergedTree, 318 matcher: &dyn Matcher, 319 copy_records: &CopyRecords, 320 width: usize, 321 ) -> Result<(), DiffRenderError> { 322 formatter.with_label("diff", |formatter| { 323 self.show_diff_inner( 324 ui, 325 formatter, 326 from_tree, 327 to_tree, 328 matcher, 329 copy_records, 330 width, 331 ) 332 }) 333 } 334 335 #[expect(clippy::too_many_arguments)] 336 fn show_diff_inner( 337 &self, 338 ui: &Ui, 339 formatter: &mut dyn Formatter, 340 from_tree: &MergedTree, 341 to_tree: &MergedTree, 342 matcher: &dyn Matcher, 343 copy_records: &CopyRecords, 344 width: usize, 345 ) -> Result<(), DiffRenderError> { 346 let store = self.repo.store(); 347 let path_converter = self.path_converter; 348 for format in &self.formats { 349 match format { 350 DiffFormat::Summary => { 351 let tree_diff = 352 from_tree.diff_stream_with_copies(to_tree, matcher, copy_records); 353 show_diff_summary(formatter, tree_diff, path_converter)?; 354 } 355 DiffFormat::Stat(options) => { 356 let tree_diff = 357 from_tree.diff_stream_with_copies(to_tree, matcher, copy_records); 358 let stats = 359 DiffStats::calculate(store, tree_diff, options, self.conflict_marker_style) 360 .block_on()?; 361 show_diff_stats(formatter, &stats, path_converter, width)?; 362 } 363 DiffFormat::Types => { 364 let tree_diff = 365 from_tree.diff_stream_with_copies(to_tree, matcher, copy_records); 366 show_types(formatter, tree_diff, path_converter)?; 367 } 368 DiffFormat::NameOnly => { 369 let tree_diff = 370 from_tree.diff_stream_with_copies(to_tree, matcher, copy_records); 371 show_names(formatter, tree_diff, path_converter)?; 372 } 373 DiffFormat::Git(options) => { 374 let tree_diff = 375 from_tree.diff_stream_with_copies(to_tree, matcher, copy_records); 376 show_git_diff( 377 formatter, 378 store, 379 tree_diff, 380 options, 381 self.conflict_marker_style, 382 )?; 383 } 384 DiffFormat::ColorWords(options) => { 385 let tree_diff = 386 from_tree.diff_stream_with_copies(to_tree, matcher, copy_records); 387 show_color_words_diff( 388 formatter, 389 store, 390 tree_diff, 391 path_converter, 392 options, 393 self.conflict_marker_style, 394 )?; 395 } 396 DiffFormat::Tool(tool) => { 397 match tool.diff_invocation_mode { 398 DiffToolMode::FileByFile => { 399 let tree_diff = 400 from_tree.diff_stream_with_copies(to_tree, matcher, copy_records); 401 show_file_by_file_diff( 402 ui, 403 formatter, 404 store, 405 tree_diff, 406 path_converter, 407 tool, 408 self.conflict_marker_style, 409 ) 410 } 411 DiffToolMode::Dir => { 412 let mut writer = formatter.raw()?; 413 generate_diff( 414 ui, 415 writer.as_mut(), 416 from_tree, 417 to_tree, 418 matcher, 419 tool, 420 self.conflict_marker_style, 421 ) 422 .map_err(DiffRenderError::DiffGenerate) 423 } 424 }?; 425 } 426 } 427 } 428 Ok(()) 429 } 430 431 /// Generates diff between `from_commits` and `to_commit` based off their 432 /// parents. The `from_commits` will temporarily be rebased onto the 433 /// `to_commit` parents to exclude unrelated changes. 434 pub fn show_inter_diff( 435 &self, 436 ui: &Ui, 437 formatter: &mut dyn Formatter, 438 from_commits: &[Commit], 439 to_commit: &Commit, 440 matcher: &dyn Matcher, 441 width: usize, 442 ) -> Result<(), DiffRenderError> { 443 let from_tree = rebase_to_dest_parent(self.repo, from_commits, to_commit)?; 444 let to_tree = to_commit.tree()?; 445 let copy_records = CopyRecords::default(); // TODO 446 self.show_diff( 447 ui, 448 formatter, 449 &from_tree, 450 &to_tree, 451 matcher, 452 &copy_records, 453 width, 454 ) 455 } 456 457 /// Generates diff of the given `commit` compared to its parents. 458 pub fn show_patch( 459 &self, 460 ui: &Ui, 461 formatter: &mut dyn Formatter, 462 commit: &Commit, 463 matcher: &dyn Matcher, 464 width: usize, 465 ) -> Result<(), DiffRenderError> { 466 let from_tree = commit.parent_tree(self.repo)?; 467 let to_tree = commit.tree()?; 468 let mut copy_records = CopyRecords::default(); 469 for parent_id in commit.parent_ids() { 470 let records = get_copy_records(self.repo.store(), parent_id, commit.id(), matcher)?; 471 copy_records.add_records(records)?; 472 } 473 self.show_diff( 474 ui, 475 formatter, 476 &from_tree, 477 &to_tree, 478 matcher, 479 &copy_records, 480 width, 481 ) 482 } 483} 484 485pub fn get_copy_records<'a>( 486 store: &'a Store, 487 root: &CommitId, 488 head: &CommitId, 489 matcher: &'a dyn Matcher, 490) -> BackendResult<impl Iterator<Item = BackendResult<CopyRecord>> + use<'a>> { 491 // TODO: teach backend about matching path prefixes? 492 let stream = store.get_copy_records(None, root, head)?; 493 // TODO: test record.source as well? should be AND-ed or OR-ed? 494 Ok(block_on_stream(stream).filter_ok(|record| matcher.matches(&record.target))) 495} 496 497#[derive(Clone, Debug, Default, Eq, PartialEq)] 498pub struct LineDiffOptions { 499 /// How equivalence of lines is tested. 500 pub compare_mode: LineCompareMode, 501 // TODO: add --ignore-blank-lines, etc. which aren't mutually exclusive. 502} 503 504impl LineDiffOptions { 505 fn merge_args(&mut self, args: &DiffFormatArgs) { 506 self.compare_mode = if args.ignore_all_space { 507 LineCompareMode::IgnoreAllSpace 508 } else if args.ignore_space_change { 509 LineCompareMode::IgnoreSpaceChange 510 } else { 511 LineCompareMode::Exact 512 }; 513 } 514} 515 516#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)] 517pub enum LineCompareMode { 518 /// Compares lines literally. 519 #[default] 520 Exact, 521 /// Compares lines ignoring any whitespace occurrences. 522 IgnoreAllSpace, 523 /// Compares lines ignoring changes in whitespace amount. 524 IgnoreSpaceChange, 525} 526 527fn diff_by_line<'input, T: AsRef<[u8]> + ?Sized + 'input>( 528 inputs: impl IntoIterator<Item = &'input T>, 529 options: &LineDiffOptions, 530) -> Diff<'input> { 531 // TODO: If we add --ignore-blank-lines, its tokenizer will have to attach 532 // blank lines to the preceding range. Maybe it can also be implemented as a 533 // post-process (similar to refine_changed_regions()) that expands unchanged 534 // regions across blank lines. 535 match options.compare_mode { 536 LineCompareMode::Exact => { 537 Diff::for_tokenizer(inputs, find_line_ranges, CompareBytesExactly) 538 } 539 LineCompareMode::IgnoreAllSpace => { 540 Diff::for_tokenizer(inputs, find_line_ranges, CompareBytesIgnoreAllWhitespace) 541 } 542 LineCompareMode::IgnoreSpaceChange => { 543 Diff::for_tokenizer(inputs, find_line_ranges, CompareBytesIgnoreWhitespaceAmount) 544 } 545 } 546} 547 548#[derive(Clone, Debug, Eq, PartialEq)] 549pub struct ColorWordsDiffOptions { 550 /// Number of context lines to show. 551 pub context: usize, 552 /// How lines are tokenized and compared. 553 pub line_diff: LineDiffOptions, 554 /// Maximum number of removed/added word alternation to inline. 555 pub max_inline_alternation: Option<usize>, 556} 557 558impl ColorWordsDiffOptions { 559 pub fn from_settings(settings: &UserSettings) -> Result<Self, ConfigGetError> { 560 let max_inline_alternation = { 561 let name = "diff.color-words.max-inline-alternation"; 562 match settings.get_int(name)? { 563 -1 => None, // unlimited 564 n => Some(usize::try_from(n).map_err(|err| ConfigGetError::Type { 565 name: name.to_owned(), 566 error: err.into(), 567 source_path: None, 568 })?), 569 } 570 }; 571 Ok(ColorWordsDiffOptions { 572 context: settings.get("diff.color-words.context")?, 573 line_diff: LineDiffOptions::default(), 574 max_inline_alternation, 575 }) 576 } 577 578 fn merge_args(&mut self, args: &DiffFormatArgs) { 579 if let Some(context) = args.context { 580 self.context = context; 581 } 582 self.line_diff.merge_args(args); 583 } 584} 585 586fn show_color_words_diff_hunks( 587 formatter: &mut dyn Formatter, 588 contents: [&BStr; 2], 589 options: &ColorWordsDiffOptions, 590) -> io::Result<()> { 591 let line_diff = diff_by_line(contents, &options.line_diff); 592 let mut line_number = DiffLineNumber { left: 1, right: 1 }; 593 // Matching entries shouldn't appear consecutively in diff of two inputs. 594 // However, if the inputs have conflicts, there may be a hunk that can be 595 // resolved, resulting [matching, resolved, matching] sequence. 596 let mut contexts = Vec::new(); 597 let mut emitted = false; 598 599 for hunk in line_diff.hunks() { 600 let hunk_contents: [&BStr; 2] = hunk.contents[..].try_into().unwrap(); 601 match hunk.kind { 602 DiffHunkKind::Matching => contexts.push(hunk_contents), 603 DiffHunkKind::Different => { 604 let num_after = if emitted { options.context } else { 0 }; 605 line_number = show_color_words_context_lines( 606 formatter, 607 &contexts, 608 line_number, 609 options, 610 num_after, 611 options.context, 612 )?; 613 contexts.clear(); 614 emitted = true; 615 line_number = 616 show_color_words_diff_lines(formatter, hunk_contents, line_number, options)?; 617 } 618 } 619 } 620 621 if emitted { 622 show_color_words_context_lines( 623 formatter, 624 &contexts, 625 line_number, 626 options, 627 options.context, 628 0, 629 )?; 630 } 631 Ok(()) 632} 633 634/// Prints `num_after` lines, ellipsis, and `num_before` lines. 635fn show_color_words_context_lines( 636 formatter: &mut dyn Formatter, 637 contexts: &[[&BStr; 2]], 638 mut line_number: DiffLineNumber, 639 options: &ColorWordsDiffOptions, 640 num_after: usize, 641 num_before: usize, 642) -> io::Result<DiffLineNumber> { 643 const SKIPPED_CONTEXT_LINE: &str = " ...\n"; 644 let extract = |side: usize| -> (Vec<&[u8]>, Vec<&[u8]>, u32) { 645 let mut lines = contexts 646 .iter() 647 .flat_map(|contents| contents[side].split_inclusive(|b| *b == b'\n')) 648 .fuse(); 649 let after_lines = lines.by_ref().take(num_after).collect(); 650 let before_lines = lines.by_ref().rev().take(num_before + 1).collect(); 651 let num_skipped: u32 = lines.count().try_into().unwrap(); 652 (after_lines, before_lines, num_skipped) 653 }; 654 let show = |formatter: &mut dyn Formatter, 655 [left_lines, right_lines]: [&[&[u8]]; 2], 656 mut line_number: DiffLineNumber| { 657 if left_lines == right_lines { 658 for line in left_lines { 659 show_color_words_line_number( 660 formatter, 661 [Some(line_number.left), Some(line_number.right)], 662 )?; 663 show_color_words_inline_hunks( 664 formatter, 665 &[(DiffLineHunkSide::Both, line.as_ref())], 666 )?; 667 line_number.left += 1; 668 line_number.right += 1; 669 } 670 Ok(line_number) 671 } else { 672 let left = left_lines.concat(); 673 let right = right_lines.concat(); 674 show_color_words_diff_lines( 675 formatter, 676 [&left, &right].map(BStr::new), 677 line_number, 678 options, 679 ) 680 } 681 }; 682 683 let (left_after, mut left_before, num_left_skipped) = extract(0); 684 let (right_after, mut right_before, num_right_skipped) = extract(1); 685 line_number = show(formatter, [&left_after, &right_after], line_number)?; 686 if num_left_skipped > 0 || num_right_skipped > 0 { 687 write!(formatter, "{SKIPPED_CONTEXT_LINE}")?; 688 line_number.left += num_left_skipped; 689 line_number.right += num_right_skipped; 690 if left_before.len() > num_before { 691 left_before.pop(); 692 line_number.left += 1; 693 } 694 if right_before.len() > num_before { 695 right_before.pop(); 696 line_number.right += 1; 697 } 698 } 699 left_before.reverse(); 700 right_before.reverse(); 701 line_number = show(formatter, [&left_before, &right_before], line_number)?; 702 Ok(line_number) 703} 704 705fn show_color_words_diff_lines( 706 formatter: &mut dyn Formatter, 707 contents: [&BStr; 2], 708 mut line_number: DiffLineNumber, 709 options: &ColorWordsDiffOptions, 710) -> io::Result<DiffLineNumber> { 711 let word_diff_hunks = Diff::by_word(contents).hunks().collect_vec(); 712 let can_inline = match options.max_inline_alternation { 713 None => true, // unlimited 714 Some(0) => false, // no need to count alternation 715 Some(max_num) => { 716 let groups = split_diff_hunks_by_matching_newline(&word_diff_hunks); 717 groups.map(count_diff_alternation).max().unwrap_or(0) <= max_num 718 } 719 }; 720 if can_inline { 721 let mut diff_line_iter = 722 DiffLineIterator::with_line_number(word_diff_hunks.iter(), line_number); 723 for diff_line in diff_line_iter.by_ref() { 724 show_color_words_line_number( 725 formatter, 726 [ 727 diff_line 728 .has_left_content() 729 .then_some(diff_line.line_number.left), 730 diff_line 731 .has_right_content() 732 .then_some(diff_line.line_number.right), 733 ], 734 )?; 735 show_color_words_inline_hunks(formatter, &diff_line.hunks)?; 736 } 737 line_number = diff_line_iter.next_line_number(); 738 } else { 739 let [left_lines, right_lines] = unzip_diff_hunks_to_lines(&word_diff_hunks); 740 for tokens in &left_lines { 741 show_color_words_line_number(formatter, [Some(line_number.left), None])?; 742 show_color_words_single_sided_line(formatter, tokens, "removed")?; 743 line_number.left += 1; 744 } 745 for tokens in &right_lines { 746 show_color_words_line_number(formatter, [None, Some(line_number.right)])?; 747 show_color_words_single_sided_line(formatter, tokens, "added")?; 748 line_number.right += 1; 749 } 750 } 751 Ok(line_number) 752} 753 754fn show_color_words_line_number( 755 formatter: &mut dyn Formatter, 756 [left_line_number, right_line_number]: [Option<u32>; 2], 757) -> io::Result<()> { 758 if let Some(line_number) = left_line_number { 759 formatter.with_label("removed", |formatter| { 760 write!(formatter.labeled("line_number"), "{line_number:>4}") 761 })?; 762 write!(formatter, " ")?; 763 } else { 764 write!(formatter, " ")?; 765 } 766 if let Some(line_number) = right_line_number { 767 formatter.with_label("added", |formatter| { 768 write!(formatter.labeled("line_number"), "{line_number:>4}",) 769 })?; 770 write!(formatter, ": ")?; 771 } else { 772 write!(formatter, " : ")?; 773 } 774 Ok(()) 775} 776 777/// Prints line hunks which may contain tokens originating from both sides. 778fn show_color_words_inline_hunks( 779 formatter: &mut dyn Formatter, 780 line_hunks: &[(DiffLineHunkSide, &BStr)], 781) -> io::Result<()> { 782 for (side, data) in line_hunks { 783 let label = match side { 784 DiffLineHunkSide::Both => None, 785 DiffLineHunkSide::Left => Some("removed"), 786 DiffLineHunkSide::Right => Some("added"), 787 }; 788 if let Some(label) = label { 789 formatter.with_label(label, |formatter| { 790 formatter.with_label("token", |formatter| formatter.write_all(data)) 791 })?; 792 } else { 793 formatter.write_all(data)?; 794 } 795 } 796 let (_, data) = line_hunks.last().expect("diff line must not be empty"); 797 if !data.ends_with(b"\n") { 798 writeln!(formatter)?; 799 }; 800 Ok(()) 801} 802 803/// Prints left/right-only line tokens with the given label. 804fn show_color_words_single_sided_line( 805 formatter: &mut dyn Formatter, 806 tokens: &[(DiffTokenType, &[u8])], 807 label: &str, 808) -> io::Result<()> { 809 formatter.with_label(label, |formatter| show_diff_line_tokens(formatter, tokens))?; 810 let (_, data) = tokens.last().expect("diff line must not be empty"); 811 if !data.ends_with(b"\n") { 812 writeln!(formatter)?; 813 }; 814 Ok(()) 815} 816 817/// Counts number of diff-side alternation, ignoring matching hunks. 818/// 819/// This function is meant to measure visual complexity of diff hunks. It's easy 820/// to read hunks containing some removed or added words, but is getting harder 821/// as more removes and adds interleaved. 822/// 823/// For example, 824/// - `[matching]` -> 0 825/// - `[left]` -> 1 826/// - `[left, matching, left]` -> 1 827/// - `[matching, left, right, matching, right]` -> 2 828/// - `[left, right, matching, right, left]` -> 3 829fn count_diff_alternation(diff_hunks: &[DiffHunk]) -> usize { 830 diff_hunks 831 .iter() 832 .filter_map(|hunk| match hunk.kind { 833 DiffHunkKind::Matching => None, 834 DiffHunkKind::Different => Some(&hunk.contents), 835 }) 836 // Map non-empty diff side to index (0: left, 1: right) 837 .flat_map(|contents| contents.iter().positions(|content| !content.is_empty())) 838 // Omit e.g. left->(matching->)*left 839 .dedup() 840 .count() 841} 842 843/// Splits hunks into slices of contiguous changed lines. 844fn split_diff_hunks_by_matching_newline<'a, 'b>( 845 diff_hunks: &'a [DiffHunk<'b>], 846) -> impl Iterator<Item = &'a [DiffHunk<'b>]> { 847 diff_hunks.split_inclusive(|hunk| match hunk.kind { 848 DiffHunkKind::Matching => hunk.contents.iter().all(|content| content.contains(&b'\n')), 849 DiffHunkKind::Different => false, 850 }) 851} 852 853struct FileContent { 854 /// false if this file is likely text; true if it is likely binary. 855 is_binary: bool, 856 contents: BString, 857} 858 859impl FileContent { 860 fn empty() -> Self { 861 Self { 862 is_binary: false, 863 contents: BString::default(), 864 } 865 } 866 867 pub(crate) fn is_empty(&self) -> bool { 868 self.contents.is_empty() 869 } 870} 871 872fn file_content_for_diff( 873 path: &RepoPath, 874 file: &mut MaterializedFileValue, 875) -> BackendResult<FileContent> { 876 // If this is a binary file, don't show the full contents. 877 // Determine whether it's binary by whether the first 8k bytes contain a null 878 // character; this is the same heuristic used by git as of writing: https://github.com/git/git/blob/eea0e59ffbed6e33d171ace5be13cde9faa41639/xdiff-interface.c#L192-L198 879 const PEEK_SIZE: usize = 8000; 880 // TODO: currently we look at the whole file, even though for binary files we 881 // only need to know the file size. To change that we'd have to extend all 882 // the data backends to support getting the length. 883 let contents = BString::new(file.read_all(path)?); 884 let start = &contents[..PEEK_SIZE.min(contents.len())]; 885 Ok(FileContent { 886 is_binary: start.contains(&b'\0'), 887 contents, 888 }) 889} 890 891fn diff_content( 892 path: &RepoPath, 893 value: MaterializedTreeValue, 894 conflict_marker_style: ConflictMarkerStyle, 895) -> BackendResult<FileContent> { 896 match value { 897 MaterializedTreeValue::Absent => Ok(FileContent::empty()), 898 MaterializedTreeValue::AccessDenied(err) => Ok(FileContent { 899 is_binary: false, 900 contents: format!("Access denied: {err}").into(), 901 }), 902 MaterializedTreeValue::File(mut file) => file_content_for_diff(path, &mut file), 903 MaterializedTreeValue::Symlink { id: _, target } => Ok(FileContent { 904 // Unix file paths can't contain null bytes. 905 is_binary: false, 906 contents: target.into(), 907 }), 908 MaterializedTreeValue::GitSubmodule(id) => Ok(FileContent { 909 is_binary: false, 910 contents: format!("Git submodule checked out at {id}").into(), 911 }), 912 // TODO: are we sure this is never binary? 913 MaterializedTreeValue::FileConflict { 914 id: _, 915 contents, 916 executable: _, 917 } => Ok(FileContent { 918 is_binary: false, 919 contents: materialize_merge_result_to_bytes(&contents, conflict_marker_style), 920 }), 921 MaterializedTreeValue::OtherConflict { id } => Ok(FileContent { 922 is_binary: false, 923 contents: id.describe().into(), 924 }), 925 MaterializedTreeValue::Tree(id) => { 926 panic!("Unexpected tree with id {id:?} in diff at path {path:?}"); 927 } 928 } 929} 930 931fn basic_diff_file_type(value: &MaterializedTreeValue) -> &'static str { 932 match value { 933 MaterializedTreeValue::Absent => { 934 panic!("absent path in diff"); 935 } 936 MaterializedTreeValue::AccessDenied(_) => "access denied", 937 MaterializedTreeValue::File(file) => { 938 if file.executable { 939 "executable file" 940 } else { 941 "regular file" 942 } 943 } 944 MaterializedTreeValue::Symlink { .. } => "symlink", 945 MaterializedTreeValue::Tree(_) => "tree", 946 MaterializedTreeValue::GitSubmodule(_) => "Git submodule", 947 MaterializedTreeValue::FileConflict { .. } 948 | MaterializedTreeValue::OtherConflict { .. } => "conflict", 949 } 950} 951 952pub fn show_color_words_diff( 953 formatter: &mut dyn Formatter, 954 store: &Store, 955 tree_diff: BoxStream<CopiesTreeDiffEntry>, 956 path_converter: &RepoPathUiConverter, 957 options: &ColorWordsDiffOptions, 958 conflict_marker_style: ConflictMarkerStyle, 959) -> Result<(), DiffRenderError> { 960 let mut diff_stream = materialized_diff_stream(store, tree_diff); 961 async { 962 while let Some(MaterializedTreeDiffEntry { path, values }) = diff_stream.next().await { 963 let left_path = path.source(); 964 let right_path = path.target(); 965 let left_ui_path = path_converter.format_file_path(left_path); 966 let right_ui_path = path_converter.format_file_path(right_path); 967 let (left_value, right_value) = values?; 968 969 match (&left_value, &right_value) { 970 (MaterializedTreeValue::AccessDenied(source), _) => { 971 write!( 972 formatter.labeled("access-denied"), 973 "Access denied to {left_ui_path}:" 974 )?; 975 writeln!(formatter, " {source}")?; 976 continue; 977 } 978 (_, MaterializedTreeValue::AccessDenied(source)) => { 979 write!( 980 formatter.labeled("access-denied"), 981 "Access denied to {right_ui_path}:" 982 )?; 983 writeln!(formatter, " {source}")?; 984 continue; 985 } 986 _ => {} 987 } 988 if left_value.is_absent() { 989 let description = basic_diff_file_type(&right_value); 990 writeln!( 991 formatter.labeled("header"), 992 "Added {description} {right_ui_path}:" 993 )?; 994 let right_content = diff_content(right_path, right_value, conflict_marker_style)?; 995 if right_content.is_empty() { 996 writeln!(formatter.labeled("empty"), " (empty)")?; 997 } else if right_content.is_binary { 998 writeln!(formatter.labeled("binary"), " (binary)")?; 999 } else { 1000 show_color_words_diff_hunks( 1001 formatter, 1002 [BStr::new(""), right_content.contents.as_ref()], 1003 options, 1004 )?; 1005 } 1006 } else if right_value.is_present() { 1007 let description = match (&left_value, &right_value) { 1008 (MaterializedTreeValue::File(left), MaterializedTreeValue::File(right)) => { 1009 if left.executable && right.executable { 1010 "Modified executable file".to_string() 1011 } else if left.executable { 1012 "Executable file became non-executable at".to_string() 1013 } else if right.executable { 1014 "Non-executable file became executable at".to_string() 1015 } else { 1016 "Modified regular file".to_string() 1017 } 1018 } 1019 ( 1020 MaterializedTreeValue::FileConflict { .. } 1021 | MaterializedTreeValue::OtherConflict { .. }, 1022 MaterializedTreeValue::FileConflict { .. } 1023 | MaterializedTreeValue::OtherConflict { .. }, 1024 ) => "Modified conflict in".to_string(), 1025 ( 1026 MaterializedTreeValue::FileConflict { .. } 1027 | MaterializedTreeValue::OtherConflict { .. }, 1028 _, 1029 ) => "Resolved conflict in".to_string(), 1030 ( 1031 _, 1032 MaterializedTreeValue::FileConflict { .. } 1033 | MaterializedTreeValue::OtherConflict { .. }, 1034 ) => "Created conflict in".to_string(), 1035 ( 1036 MaterializedTreeValue::Symlink { .. }, 1037 MaterializedTreeValue::Symlink { .. }, 1038 ) => "Symlink target changed at".to_string(), 1039 (_, _) => { 1040 let left_type = basic_diff_file_type(&left_value); 1041 let right_type = basic_diff_file_type(&right_value); 1042 let (first, rest) = left_type.split_at(1); 1043 format!( 1044 "{}{} became {} at", 1045 first.to_ascii_uppercase(), 1046 rest, 1047 right_type 1048 ) 1049 } 1050 }; 1051 let left_content = diff_content(left_path, left_value, conflict_marker_style)?; 1052 let right_content = diff_content(right_path, right_value, conflict_marker_style)?; 1053 if left_path == right_path { 1054 writeln!( 1055 formatter.labeled("header"), 1056 "{description} {right_ui_path}:" 1057 )?; 1058 } else { 1059 writeln!( 1060 formatter.labeled("header"), 1061 "{description} {right_ui_path} ({left_ui_path} => {right_ui_path}):" 1062 )?; 1063 } 1064 if left_content.is_binary || right_content.is_binary { 1065 writeln!(formatter.labeled("binary"), " (binary)")?; 1066 } else { 1067 show_color_words_diff_hunks( 1068 formatter, 1069 [&left_content.contents, &right_content.contents].map(BStr::new), 1070 options, 1071 )?; 1072 } 1073 } else { 1074 let description = basic_diff_file_type(&left_value); 1075 writeln!( 1076 formatter.labeled("header"), 1077 "Removed {description} {right_ui_path}:" 1078 )?; 1079 let left_content = diff_content(left_path, left_value, conflict_marker_style)?; 1080 if left_content.is_empty() { 1081 writeln!(formatter.labeled("empty"), " (empty)")?; 1082 } else if left_content.is_binary { 1083 writeln!(formatter.labeled("binary"), " (binary)")?; 1084 } else { 1085 show_color_words_diff_hunks( 1086 formatter, 1087 [left_content.contents.as_ref(), BStr::new("")], 1088 options, 1089 )?; 1090 } 1091 } 1092 } 1093 Ok(()) 1094 } 1095 .block_on() 1096} 1097 1098pub fn show_file_by_file_diff( 1099 ui: &Ui, 1100 formatter: &mut dyn Formatter, 1101 store: &Store, 1102 tree_diff: BoxStream<CopiesTreeDiffEntry>, 1103 path_converter: &RepoPathUiConverter, 1104 tool: &ExternalMergeTool, 1105 conflict_marker_style: ConflictMarkerStyle, 1106) -> Result<(), DiffRenderError> { 1107 let create_file = |path: &RepoPath, 1108 wc_dir: &Path, 1109 value: MaterializedTreeValue| 1110 -> Result<PathBuf, DiffRenderError> { 1111 let fs_path = path.to_fs_path(wc_dir)?; 1112 std::fs::create_dir_all(fs_path.parent().unwrap())?; 1113 let content = diff_content(path, value, conflict_marker_style)?; 1114 std::fs::write(&fs_path, content.contents)?; 1115 Ok(fs_path) 1116 }; 1117 1118 let temp_dir = new_utf8_temp_dir("jj-diff-")?; 1119 let left_wc_dir = temp_dir.path().join("left"); 1120 let right_wc_dir = temp_dir.path().join("right"); 1121 let mut diff_stream = materialized_diff_stream(store, tree_diff); 1122 async { 1123 while let Some(MaterializedTreeDiffEntry { path, values }) = diff_stream.next().await { 1124 let (left_value, right_value) = values?; 1125 let left_path = path.source(); 1126 let right_path = path.target(); 1127 let left_ui_path = path_converter.format_file_path(left_path); 1128 let right_ui_path = path_converter.format_file_path(right_path); 1129 1130 match (&left_value, &right_value) { 1131 (_, MaterializedTreeValue::AccessDenied(source)) => { 1132 write!( 1133 formatter.labeled("access-denied"), 1134 "Access denied to {right_ui_path}:" 1135 )?; 1136 writeln!(formatter, " {source}")?; 1137 continue; 1138 } 1139 (MaterializedTreeValue::AccessDenied(source), _) => { 1140 write!( 1141 formatter.labeled("access-denied"), 1142 "Access denied to {left_ui_path}:" 1143 )?; 1144 writeln!(formatter, " {source}")?; 1145 continue; 1146 } 1147 _ => {} 1148 } 1149 let left_path = create_file(left_path, &left_wc_dir, left_value)?; 1150 let right_path = create_file(right_path, &right_wc_dir, right_value)?; 1151 1152 let mut writer = formatter.raw()?; 1153 invoke_external_diff( 1154 ui, 1155 writer.as_mut(), 1156 tool, 1157 &maplit::hashmap! { 1158 "left" => left_path.to_str().expect("temp_dir should be valid utf-8"), 1159 "right" => right_path.to_str().expect("temp_dir should be valid utf-8"), 1160 }, 1161 ) 1162 .map_err(DiffRenderError::DiffGenerate)?; 1163 } 1164 Ok::<(), DiffRenderError>(()) 1165 } 1166 .block_on() 1167} 1168 1169struct GitDiffPart { 1170 /// Octal mode string or `None` if the file is absent. 1171 mode: Option<&'static str>, 1172 hash: String, 1173 content: FileContent, 1174} 1175 1176fn git_diff_part( 1177 path: &RepoPath, 1178 value: MaterializedTreeValue, 1179 conflict_marker_style: ConflictMarkerStyle, 1180) -> Result<GitDiffPart, DiffRenderError> { 1181 const DUMMY_HASH: &str = "0000000000"; 1182 let mode; 1183 let mut hash; 1184 let content; 1185 match value { 1186 MaterializedTreeValue::Absent => { 1187 return Ok(GitDiffPart { 1188 mode: None, 1189 hash: DUMMY_HASH.to_owned(), 1190 content: FileContent::empty(), 1191 }); 1192 } 1193 MaterializedTreeValue::AccessDenied(err) => { 1194 return Err(DiffRenderError::AccessDenied { 1195 path: path.as_internal_file_string().to_owned(), 1196 source: err, 1197 }); 1198 } 1199 MaterializedTreeValue::File(mut file) => { 1200 mode = if file.executable { "100755" } else { "100644" }; 1201 hash = file.id.hex(); 1202 content = file_content_for_diff(path, &mut file)?; 1203 } 1204 MaterializedTreeValue::Symlink { id, target } => { 1205 mode = "120000"; 1206 hash = id.hex(); 1207 content = FileContent { 1208 // Unix file paths can't contain null bytes. 1209 is_binary: false, 1210 contents: target.into(), 1211 }; 1212 } 1213 MaterializedTreeValue::GitSubmodule(id) => { 1214 // TODO: What should we actually do here? 1215 mode = "040000"; 1216 hash = id.hex(); 1217 content = FileContent::empty(); 1218 } 1219 MaterializedTreeValue::FileConflict { 1220 id: _, 1221 contents, 1222 executable, 1223 } => { 1224 mode = if executable { "100755" } else { "100644" }; 1225 hash = DUMMY_HASH.to_owned(); 1226 content = FileContent { 1227 is_binary: false, // TODO: are we sure this is never binary? 1228 contents: materialize_merge_result_to_bytes(&contents, conflict_marker_style), 1229 }; 1230 } 1231 MaterializedTreeValue::OtherConflict { id } => { 1232 mode = "100644"; 1233 hash = DUMMY_HASH.to_owned(); 1234 content = FileContent { 1235 is_binary: false, 1236 contents: id.describe().into(), 1237 }; 1238 } 1239 MaterializedTreeValue::Tree(_) => { 1240 panic!("Unexpected tree in diff at path {path:?}"); 1241 } 1242 } 1243 hash.truncate(10); 1244 Ok(GitDiffPart { 1245 mode: Some(mode), 1246 hash, 1247 content, 1248 }) 1249} 1250 1251#[derive(Clone, Debug, Eq, PartialEq)] 1252pub struct UnifiedDiffOptions { 1253 /// Number of context lines to show. 1254 pub context: usize, 1255 /// How lines are tokenized and compared. 1256 pub line_diff: LineDiffOptions, 1257} 1258 1259impl UnifiedDiffOptions { 1260 pub fn from_settings(settings: &UserSettings) -> Result<Self, ConfigGetError> { 1261 Ok(UnifiedDiffOptions { 1262 context: settings.get("diff.git.context")?, 1263 line_diff: LineDiffOptions::default(), 1264 }) 1265 } 1266 1267 fn merge_args(&mut self, args: &DiffFormatArgs) { 1268 if let Some(context) = args.context { 1269 self.context = context; 1270 } 1271 self.line_diff.merge_args(args); 1272 } 1273} 1274 1275#[derive(Clone, Copy, Debug, Eq, PartialEq)] 1276enum DiffLineType { 1277 Context, 1278 Removed, 1279 Added, 1280} 1281 1282#[derive(Clone, Copy, Debug, Eq, PartialEq)] 1283enum DiffTokenType { 1284 Matching, 1285 Different, 1286} 1287 1288type DiffTokenVec<'content> = Vec<(DiffTokenType, &'content [u8])>; 1289 1290struct UnifiedDiffHunk<'content> { 1291 left_line_range: Range<usize>, 1292 right_line_range: Range<usize>, 1293 lines: Vec<(DiffLineType, DiffTokenVec<'content>)>, 1294} 1295 1296impl<'content> UnifiedDiffHunk<'content> { 1297 fn extend_context_lines(&mut self, lines: impl IntoIterator<Item = &'content [u8]>) { 1298 let old_len = self.lines.len(); 1299 self.lines.extend(lines.into_iter().map(|line| { 1300 let tokens = vec![(DiffTokenType::Matching, line)]; 1301 (DiffLineType::Context, tokens) 1302 })); 1303 self.left_line_range.end += self.lines.len() - old_len; 1304 self.right_line_range.end += self.lines.len() - old_len; 1305 } 1306 1307 fn extend_removed_lines(&mut self, lines: impl IntoIterator<Item = DiffTokenVec<'content>>) { 1308 let old_len = self.lines.len(); 1309 self.lines 1310 .extend(lines.into_iter().map(|line| (DiffLineType::Removed, line))); 1311 self.left_line_range.end += self.lines.len() - old_len; 1312 } 1313 1314 fn extend_added_lines(&mut self, lines: impl IntoIterator<Item = DiffTokenVec<'content>>) { 1315 let old_len = self.lines.len(); 1316 self.lines 1317 .extend(lines.into_iter().map(|line| (DiffLineType::Added, line))); 1318 self.right_line_range.end += self.lines.len() - old_len; 1319 } 1320} 1321 1322fn unified_diff_hunks<'content>( 1323 contents: [&'content BStr; 2], 1324 options: &UnifiedDiffOptions, 1325) -> Vec<UnifiedDiffHunk<'content>> { 1326 let mut hunks = vec![]; 1327 let mut current_hunk = UnifiedDiffHunk { 1328 left_line_range: 0..0, 1329 right_line_range: 0..0, 1330 lines: vec![], 1331 }; 1332 let diff = diff_by_line(contents, &options.line_diff); 1333 let mut diff_hunks = diff.hunks().peekable(); 1334 while let Some(hunk) = diff_hunks.next() { 1335 match hunk.kind { 1336 DiffHunkKind::Matching => { 1337 // Just use the right (i.e. new) content. We could count the 1338 // number of skipped lines separately, but the number of the 1339 // context lines should match the displayed content. 1340 let [_, right] = hunk.contents[..].try_into().unwrap(); 1341 let mut lines = right.split_inclusive(|b| *b == b'\n').fuse(); 1342 if !current_hunk.lines.is_empty() { 1343 // The previous hunk line should be either removed/added. 1344 current_hunk.extend_context_lines(lines.by_ref().take(options.context)); 1345 } 1346 let before_lines = if diff_hunks.peek().is_some() { 1347 lines.by_ref().rev().take(options.context).collect() 1348 } else { 1349 vec![] // No more hunks 1350 }; 1351 let num_skip_lines = lines.count(); 1352 if num_skip_lines > 0 { 1353 let left_start = current_hunk.left_line_range.end + num_skip_lines; 1354 let right_start = current_hunk.right_line_range.end + num_skip_lines; 1355 if !current_hunk.lines.is_empty() { 1356 hunks.push(current_hunk); 1357 } 1358 current_hunk = UnifiedDiffHunk { 1359 left_line_range: left_start..left_start, 1360 right_line_range: right_start..right_start, 1361 lines: vec![], 1362 }; 1363 } 1364 // The next hunk should be of DiffHunk::Different type if any. 1365 current_hunk.extend_context_lines(before_lines.into_iter().rev()); 1366 } 1367 DiffHunkKind::Different => { 1368 let [left_lines, right_lines] = 1369 unzip_diff_hunks_to_lines(Diff::by_word(hunk.contents).hunks()); 1370 current_hunk.extend_removed_lines(left_lines); 1371 current_hunk.extend_added_lines(right_lines); 1372 } 1373 } 1374 } 1375 if !current_hunk.lines.is_empty() { 1376 hunks.push(current_hunk); 1377 } 1378 hunks 1379} 1380 1381/// Splits `[left, right]` hunk pairs into `[left_lines, right_lines]`. 1382fn unzip_diff_hunks_to_lines<'content, I>(diff_hunks: I) -> [Vec<DiffTokenVec<'content>>; 2] 1383where 1384 I: IntoIterator, 1385 I::Item: Borrow<DiffHunk<'content>>, 1386{ 1387 let mut left_lines: Vec<DiffTokenVec<'content>> = vec![]; 1388 let mut right_lines: Vec<DiffTokenVec<'content>> = vec![]; 1389 let mut left_tokens: DiffTokenVec<'content> = vec![]; 1390 let mut right_tokens: DiffTokenVec<'content> = vec![]; 1391 1392 for hunk in diff_hunks { 1393 let hunk = hunk.borrow(); 1394 match hunk.kind { 1395 DiffHunkKind::Matching => { 1396 // TODO: add support for unmatched contexts 1397 debug_assert!(hunk.contents.iter().all_equal()); 1398 for token in hunk.contents[0].split_inclusive(|b| *b == b'\n') { 1399 left_tokens.push((DiffTokenType::Matching, token)); 1400 right_tokens.push((DiffTokenType::Matching, token)); 1401 if token.ends_with(b"\n") { 1402 left_lines.push(mem::take(&mut left_tokens)); 1403 right_lines.push(mem::take(&mut right_tokens)); 1404 } 1405 } 1406 } 1407 DiffHunkKind::Different => { 1408 let [left, right] = hunk.contents[..] 1409 .try_into() 1410 .expect("hunk should have exactly two inputs"); 1411 for token in left.split_inclusive(|b| *b == b'\n') { 1412 left_tokens.push((DiffTokenType::Different, token)); 1413 if token.ends_with(b"\n") { 1414 left_lines.push(mem::take(&mut left_tokens)); 1415 } 1416 } 1417 for token in right.split_inclusive(|b| *b == b'\n') { 1418 right_tokens.push((DiffTokenType::Different, token)); 1419 if token.ends_with(b"\n") { 1420 right_lines.push(mem::take(&mut right_tokens)); 1421 } 1422 } 1423 } 1424 } 1425 } 1426 1427 if !left_tokens.is_empty() { 1428 left_lines.push(left_tokens); 1429 } 1430 if !right_tokens.is_empty() { 1431 right_lines.push(right_tokens); 1432 } 1433 [left_lines, right_lines] 1434} 1435 1436fn show_unified_diff_hunks( 1437 formatter: &mut dyn Formatter, 1438 contents: [&BStr; 2], 1439 options: &UnifiedDiffOptions, 1440) -> io::Result<()> { 1441 // "If the chunk size is 0, the first number is one lower than one would 1442 // expect." - https://www.artima.com/weblogs/viewpost.jsp?thread=164293 1443 // 1444 // The POSIX spec also states that "the ending line number of an empty range 1445 // shall be the number of the preceding line, or 0 if the range is at the 1446 // start of the file." 1447 // - https://pubs.opengroup.org/onlinepubs/9799919799/utilities/diff.html 1448 fn to_line_number(range: Range<usize>) -> usize { 1449 if range.is_empty() { 1450 range.start 1451 } else { 1452 range.start + 1 1453 } 1454 } 1455 1456 for hunk in unified_diff_hunks(contents, options) { 1457 writeln!( 1458 formatter.labeled("hunk_header"), 1459 "@@ -{},{} +{},{} @@", 1460 to_line_number(hunk.left_line_range.clone()), 1461 hunk.left_line_range.len(), 1462 to_line_number(hunk.right_line_range.clone()), 1463 hunk.right_line_range.len() 1464 )?; 1465 for (line_type, tokens) in &hunk.lines { 1466 let (label, sigil) = match line_type { 1467 DiffLineType::Context => ("context", " "), 1468 DiffLineType::Removed => ("removed", "-"), 1469 DiffLineType::Added => ("added", "+"), 1470 }; 1471 formatter.with_label(label, |formatter| { 1472 write!(formatter, "{sigil}")?; 1473 show_diff_line_tokens(formatter, tokens) 1474 })?; 1475 let (_, content) = tokens.last().expect("hunk line must not be empty"); 1476 if !content.ends_with(b"\n") { 1477 write!(formatter, "\n\\ No newline at end of file\n")?; 1478 } 1479 } 1480 } 1481 Ok(()) 1482} 1483 1484fn show_diff_line_tokens( 1485 formatter: &mut dyn Formatter, 1486 tokens: &[(DiffTokenType, &[u8])], 1487) -> io::Result<()> { 1488 for (token_type, content) in tokens { 1489 match token_type { 1490 DiffTokenType::Matching => formatter.write_all(content)?, 1491 DiffTokenType::Different => { 1492 formatter.with_label("token", |formatter| formatter.write_all(content))?; 1493 } 1494 } 1495 } 1496 Ok(()) 1497} 1498 1499pub fn show_git_diff( 1500 formatter: &mut dyn Formatter, 1501 store: &Store, 1502 tree_diff: BoxStream<CopiesTreeDiffEntry>, 1503 options: &UnifiedDiffOptions, 1504 conflict_marker_style: ConflictMarkerStyle, 1505) -> Result<(), DiffRenderError> { 1506 let mut diff_stream = materialized_diff_stream(store, tree_diff); 1507 async { 1508 while let Some(MaterializedTreeDiffEntry { path, values }) = diff_stream.next().await { 1509 let left_path = path.source(); 1510 let right_path = path.target(); 1511 let left_path_string = left_path.as_internal_file_string(); 1512 let right_path_string = right_path.as_internal_file_string(); 1513 let (left_value, right_value) = values?; 1514 1515 let left_part = git_diff_part(left_path, left_value, conflict_marker_style)?; 1516 let right_part = git_diff_part(right_path, right_value, conflict_marker_style)?; 1517 1518 formatter.with_label("file_header", |formatter| { 1519 writeln!( 1520 formatter, 1521 "diff --git a/{left_path_string} b/{right_path_string}" 1522 )?; 1523 let left_hash = &left_part.hash; 1524 let right_hash = &right_part.hash; 1525 match (left_part.mode, right_part.mode) { 1526 (None, Some(right_mode)) => { 1527 writeln!(formatter, "new file mode {right_mode}")?; 1528 writeln!(formatter, "index {left_hash}..{right_hash}")?; 1529 } 1530 (Some(left_mode), None) => { 1531 writeln!(formatter, "deleted file mode {left_mode}")?; 1532 writeln!(formatter, "index {left_hash}..{right_hash}")?; 1533 } 1534 (Some(left_mode), Some(right_mode)) => { 1535 if let Some(op) = path.copy_operation() { 1536 let operation = match op { 1537 CopyOperation::Copy => "copy", 1538 CopyOperation::Rename => "rename", 1539 }; 1540 // TODO: include similarity index? 1541 writeln!(formatter, "{operation} from {left_path_string}")?; 1542 writeln!(formatter, "{operation} to {right_path_string}")?; 1543 } 1544 if left_mode != right_mode { 1545 writeln!(formatter, "old mode {left_mode}")?; 1546 writeln!(formatter, "new mode {right_mode}")?; 1547 if left_hash != right_hash { 1548 writeln!(formatter, "index {left_hash}..{right_hash}")?; 1549 } 1550 } else if left_hash != right_hash { 1551 writeln!(formatter, "index {left_hash}..{right_hash} {left_mode}")?; 1552 } 1553 } 1554 (None, None) => panic!("either left or right part should be present"), 1555 } 1556 Ok::<(), DiffRenderError>(()) 1557 })?; 1558 1559 if left_part.content.contents == right_part.content.contents { 1560 continue; // no content hunks 1561 } 1562 1563 let left_path = match left_part.mode { 1564 Some(_) => format!("a/{left_path_string}"), 1565 None => "/dev/null".to_owned(), 1566 }; 1567 let right_path = match right_part.mode { 1568 Some(_) => format!("b/{right_path_string}"), 1569 None => "/dev/null".to_owned(), 1570 }; 1571 if left_part.content.is_binary || right_part.content.is_binary { 1572 // TODO: add option to emit Git binary diff 1573 writeln!( 1574 formatter, 1575 "Binary files {left_path} and {right_path} differ" 1576 )?; 1577 } else { 1578 formatter.with_label("file_header", |formatter| { 1579 writeln!(formatter, "--- {left_path}")?; 1580 writeln!(formatter, "+++ {right_path}")?; 1581 io::Result::Ok(()) 1582 })?; 1583 show_unified_diff_hunks( 1584 formatter, 1585 [&left_part.content.contents, &right_part.content.contents].map(BStr::new), 1586 options, 1587 )?; 1588 } 1589 } 1590 Ok(()) 1591 } 1592 .block_on() 1593} 1594 1595#[instrument(skip_all)] 1596pub fn show_diff_summary( 1597 formatter: &mut dyn Formatter, 1598 mut tree_diff: BoxStream<CopiesTreeDiffEntry>, 1599 path_converter: &RepoPathUiConverter, 1600) -> Result<(), DiffRenderError> { 1601 async { 1602 while let Some(CopiesTreeDiffEntry { path, values }) = tree_diff.next().await { 1603 let (before, after) = values?; 1604 let (label, sigil) = diff_status_label_and_char(&path, &before, &after); 1605 let path = if path.copy_operation().is_some() { 1606 path_converter.format_copied_path(path.source(), path.target()) 1607 } else { 1608 path_converter.format_file_path(path.target()) 1609 }; 1610 writeln!(formatter.labeled(label), "{sigil} {path}")?; 1611 } 1612 Ok(()) 1613 } 1614 .block_on() 1615} 1616 1617pub fn diff_status_label_and_char( 1618 path: &CopiesTreeDiffEntryPath, 1619 before: &MergedTreeValue, 1620 after: &MergedTreeValue, 1621) -> (&'static str, char) { 1622 if let Some(op) = path.copy_operation() { 1623 match op { 1624 CopyOperation::Copy => ("copied", 'C'), 1625 CopyOperation::Rename => ("renamed", 'R'), 1626 } 1627 } else { 1628 match (before.is_present(), after.is_present()) { 1629 (true, true) => ("modified", 'M'), 1630 (false, true) => ("added", 'A'), 1631 (true, false) => ("removed", 'D'), 1632 (false, false) => panic!("values pair must differ"), 1633 } 1634 } 1635} 1636 1637#[derive(Clone, Debug, Default, Eq, PartialEq)] 1638pub struct DiffStatOptions { 1639 /// How lines are tokenized and compared. 1640 pub line_diff: LineDiffOptions, 1641} 1642 1643impl DiffStatOptions { 1644 fn merge_args(&mut self, args: &DiffFormatArgs) { 1645 self.line_diff.merge_args(args); 1646 } 1647} 1648 1649#[derive(Clone, Debug)] 1650pub struct DiffStats { 1651 entries: Vec<DiffStatEntry>, 1652} 1653 1654impl DiffStats { 1655 /// Calculates stats of changed lines per file. 1656 pub async fn calculate( 1657 store: &Store, 1658 tree_diff: BoxStream<'_, CopiesTreeDiffEntry>, 1659 options: &DiffStatOptions, 1660 conflict_marker_style: ConflictMarkerStyle, 1661 ) -> BackendResult<Self> { 1662 let entries = materialized_diff_stream(store, tree_diff) 1663 .map(|MaterializedTreeDiffEntry { path, values }| { 1664 let (left, right) = values?; 1665 let left_content = diff_content(path.source(), left, conflict_marker_style)?; 1666 let right_content = diff_content(path.target(), right, conflict_marker_style)?; 1667 let stat = get_diff_stat_entry( 1668 path, 1669 [&left_content.contents, &right_content.contents].map(BStr::new), 1670 options, 1671 ); 1672 BackendResult::Ok(stat) 1673 }) 1674 .try_collect() 1675 .await?; 1676 Ok(DiffStats { entries }) 1677 } 1678 1679 /// List of stats per file. 1680 pub fn entries(&self) -> &[DiffStatEntry] { 1681 &self.entries 1682 } 1683 1684 /// Total number of insertions. 1685 pub fn count_total_added(&self) -> usize { 1686 self.entries.iter().map(|stat| stat.added).sum() 1687 } 1688 1689 /// Total number of deletions. 1690 pub fn count_total_removed(&self) -> usize { 1691 self.entries.iter().map(|stat| stat.removed).sum() 1692 } 1693} 1694 1695#[derive(Clone, Debug)] 1696pub struct DiffStatEntry { 1697 pub path: CopiesTreeDiffEntryPath, 1698 pub added: usize, 1699 pub removed: usize, 1700} 1701 1702fn get_diff_stat_entry( 1703 path: CopiesTreeDiffEntryPath, 1704 contents: [&BStr; 2], 1705 options: &DiffStatOptions, 1706) -> DiffStatEntry { 1707 // TODO: this matches git's behavior, which is to count the number of newlines 1708 // in the file. but that behavior seems unhelpful; no one really cares how 1709 // many `0x0a` characters are in an image. 1710 let diff = diff_by_line(contents, &options.line_diff); 1711 let mut added = 0; 1712 let mut removed = 0; 1713 for hunk in diff.hunks() { 1714 match hunk.kind { 1715 DiffHunkKind::Matching => {} 1716 DiffHunkKind::Different => { 1717 let [left, right] = hunk.contents[..].try_into().unwrap(); 1718 removed += left.split_inclusive(|b| *b == b'\n').count(); 1719 added += right.split_inclusive(|b| *b == b'\n').count(); 1720 } 1721 } 1722 } 1723 DiffStatEntry { 1724 path, 1725 added, 1726 removed, 1727 } 1728} 1729 1730pub fn show_diff_stats( 1731 formatter: &mut dyn Formatter, 1732 stats: &DiffStats, 1733 path_converter: &RepoPathUiConverter, 1734 display_width: usize, 1735) -> io::Result<()> { 1736 let ui_paths = stats 1737 .entries() 1738 .iter() 1739 .map(|stat| { 1740 if stat.path.copy_operation().is_some() { 1741 path_converter.format_copied_path(stat.path.source(), stat.path.target()) 1742 } else { 1743 path_converter.format_file_path(stat.path.target()) 1744 } 1745 }) 1746 .collect_vec(); 1747 let max_path_width = ui_paths.iter().map(|s| s.width()).max().unwrap_or(0); 1748 let max_diffs = stats 1749 .entries() 1750 .iter() 1751 .map(|stat| stat.added + stat.removed) 1752 .max() 1753 .unwrap_or(0); 1754 1755 let number_padding = max_diffs.to_string().len(); 1756 // 4 characters padding for the graph 1757 let available_width = display_width.saturating_sub(4 + " | ".len() + number_padding); 1758 // Always give at least a tiny bit of room 1759 let available_width = max(available_width, 5); 1760 let max_path_width = max_path_width.clamp(3, (0.7 * available_width as f64) as usize); 1761 let max_bar_length = available_width.saturating_sub(max_path_width); 1762 let factor = if max_diffs < max_bar_length { 1763 1.0 1764 } else { 1765 max_bar_length as f64 / max_diffs as f64 1766 }; 1767 1768 for (stat, ui_path) in iter::zip(stats.entries(), &ui_paths) { 1769 let bar_added = (stat.added as f64 * factor).ceil() as usize; 1770 let bar_removed = (stat.removed as f64 * factor).ceil() as usize; 1771 // replace start of path with ellipsis if the path is too long 1772 let (path, path_width) = text_util::elide_start(ui_path, "...", max_path_width); 1773 let path_pad_width = max_path_width - path_width; 1774 write!( 1775 formatter, 1776 "{path}{:path_pad_width$} | {:>number_padding$}{}", 1777 "", // pad to max_path_width 1778 stat.added + stat.removed, 1779 if bar_added + bar_removed > 0 { " " } else { "" }, 1780 )?; 1781 write!(formatter.labeled("added"), "{}", "+".repeat(bar_added))?; 1782 writeln!(formatter.labeled("removed"), "{}", "-".repeat(bar_removed))?; 1783 } 1784 1785 let total_added = stats.count_total_added(); 1786 let total_removed = stats.count_total_removed(); 1787 let total_files = stats.entries().len(); 1788 writeln!( 1789 formatter.labeled("stat-summary"), 1790 "{} file{} changed, {} insertion{}(+), {} deletion{}(-)", 1791 total_files, 1792 if total_files == 1 { "" } else { "s" }, 1793 total_added, 1794 if total_added == 1 { "" } else { "s" }, 1795 total_removed, 1796 if total_removed == 1 { "" } else { "s" }, 1797 )?; 1798 Ok(()) 1799} 1800 1801pub fn show_types( 1802 formatter: &mut dyn Formatter, 1803 mut tree_diff: BoxStream<CopiesTreeDiffEntry>, 1804 path_converter: &RepoPathUiConverter, 1805) -> Result<(), DiffRenderError> { 1806 async { 1807 while let Some(CopiesTreeDiffEntry { path, values }) = tree_diff.next().await { 1808 let (before, after) = values?; 1809 writeln!( 1810 formatter.labeled("modified"), 1811 "{}{} {}", 1812 diff_summary_char(&before), 1813 diff_summary_char(&after), 1814 path_converter.format_copied_path(path.source(), path.target()) 1815 )?; 1816 } 1817 Ok(()) 1818 } 1819 .block_on() 1820} 1821 1822fn diff_summary_char(value: &MergedTreeValue) -> char { 1823 match value.as_resolved() { 1824 Some(None) => '-', 1825 Some(Some(TreeValue::File { .. })) => 'F', 1826 Some(Some(TreeValue::Symlink(_))) => 'L', 1827 Some(Some(TreeValue::GitSubmodule(_))) => 'G', 1828 None => 'C', 1829 Some(Some(TreeValue::Tree(_))) | Some(Some(TreeValue::Conflict(_))) => { 1830 panic!("Unexpected {value:?} in diff") 1831 } 1832 } 1833} 1834 1835pub fn show_names( 1836 formatter: &mut dyn Formatter, 1837 mut tree_diff: BoxStream<CopiesTreeDiffEntry>, 1838 path_converter: &RepoPathUiConverter, 1839) -> io::Result<()> { 1840 async { 1841 while let Some(CopiesTreeDiffEntry { path, .. }) = tree_diff.next().await { 1842 writeln!( 1843 formatter, 1844 "{}", 1845 path_converter.format_file_path(path.target()) 1846 )?; 1847 } 1848 Ok(()) 1849 } 1850 .block_on() 1851}