just playing with tangled
at diffedit3 632 lines 23 kB view raw
1// Copyright 2022-2023 The Jujutsu Authors 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// https://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15use std::borrow::Cow; 16use std::{cmp, io}; 17 18use unicode_width::UnicodeWidthChar as _; 19 20use crate::formatter::{FormatRecorder, Formatter}; 21 22pub fn complete_newline(s: impl Into<String>) -> String { 23 let mut s = s.into(); 24 if !s.is_empty() && !s.ends_with('\n') { 25 s.push('\n'); 26 } 27 s 28} 29 30pub fn split_email(email: &str) -> (&str, Option<&str>) { 31 if let Some((username, rest)) = email.split_once('@') { 32 (username, Some(rest)) 33 } else { 34 (email, None) 35 } 36} 37 38/// Shortens `text` to `max_width` by removing leading characters. `ellipsis` is 39/// added if the `text` gets truncated. 40/// 41/// The returned string (including `ellipsis`) never exceeds the `max_width`. 42pub fn elide_start<'a>( 43 text: &'a str, 44 ellipsis: &'a str, 45 max_width: usize, 46) -> (Cow<'a, str>, usize) { 47 let (text_start, text_width) = truncate_start_pos(text, max_width); 48 if text_start == 0 { 49 return (Cow::Borrowed(text), text_width); 50 } 51 52 let (ellipsis_start, ellipsis_width) = truncate_start_pos(ellipsis, max_width); 53 if ellipsis_start != 0 { 54 let ellipsis = trim_start_zero_width_chars(&ellipsis[ellipsis_start..]); 55 return (Cow::Borrowed(ellipsis), ellipsis_width); 56 } 57 58 let text = &text[text_start..]; 59 let max_text_width = max_width - ellipsis_width; 60 let (skip, skipped_width) = skip_start_pos(text, text_width.saturating_sub(max_text_width)); 61 let text = trim_start_zero_width_chars(&text[skip..]); 62 let concat_width = ellipsis_width + (text_width - skipped_width); 63 assert!(concat_width <= max_width); 64 (Cow::Owned([ellipsis, text].concat()), concat_width) 65} 66 67/// Shortens `text` to `max_width` by removing leading characters, returning 68/// `(start_index, width)`. 69/// 70/// The truncated string may have 0-width decomposed characters at start. 71fn truncate_start_pos(text: &str, max_width: usize) -> (usize, usize) { 72 let mut acc_width = 0; 73 for (i, c) in text.char_indices().rev() { 74 let new_width = acc_width + c.width().unwrap_or(0); 75 if new_width > max_width { 76 let prev_index = i + c.len_utf8(); 77 return (prev_index, acc_width); 78 } 79 acc_width = new_width; 80 } 81 (0, acc_width) 82} 83 84/// Skips `width` leading characters, returning `(start_index, skipped_width)`. 85/// 86/// The `skipped_width` may exceed the given `width` if `width` is not at 87/// character boundary. 88/// 89/// The truncated string may have 0-width decomposed characters at start. 90fn skip_start_pos(text: &str, width: usize) -> (usize, usize) { 91 let mut acc_width = 0; 92 for (i, c) in text.char_indices() { 93 if acc_width >= width { 94 return (i, acc_width); 95 } 96 acc_width += c.width().unwrap_or(0); 97 } 98 (text.len(), acc_width) 99} 100 101/// Removes leading 0-width characters. 102fn trim_start_zero_width_chars(text: &str) -> &str { 103 text.trim_start_matches(|c: char| c.width().unwrap_or(0) == 0) 104} 105 106/// Indents each line by the given prefix preserving labels. 107pub fn write_indented( 108 formatter: &mut dyn Formatter, 109 recorded_content: &FormatRecorder, 110 mut write_prefix: impl FnMut(&mut dyn Formatter) -> io::Result<()>, 111) -> io::Result<()> { 112 let data = recorded_content.data(); 113 let mut new_line = true; 114 recorded_content.replay_with(formatter, |formatter, range| { 115 for line in data[range].split_inclusive(|&c| c == b'\n') { 116 if new_line && line != b"\n" { 117 // Prefix inherits the current labels. This is implementation detail 118 // and may be fixed later. 119 write_prefix(formatter)?; 120 } 121 formatter.write_all(line)?; 122 new_line = line.ends_with(b"\n"); 123 } 124 Ok(()) 125 }) 126} 127 128/// Word with trailing whitespace. 129#[derive(Clone, Copy, Debug, Eq, PartialEq)] 130struct ByteFragment<'a> { 131 word: &'a [u8], 132 whitespace_len: usize, 133 word_width: usize, 134} 135 136impl<'a> ByteFragment<'a> { 137 fn new(word: &'a [u8], whitespace_len: usize) -> Self { 138 // We don't care about the width of non-UTF-8 bytes, but should not panic. 139 let word_width = textwrap::core::display_width(&String::from_utf8_lossy(word)); 140 ByteFragment { 141 word, 142 whitespace_len, 143 word_width, 144 } 145 } 146 147 fn offset_in(&self, text: &[u8]) -> usize { 148 byte_offset_from(text, self.word) 149 } 150} 151 152impl textwrap::core::Fragment for ByteFragment<'_> { 153 fn width(&self) -> f64 { 154 self.word_width as f64 155 } 156 157 fn whitespace_width(&self) -> f64 { 158 self.whitespace_len as f64 159 } 160 161 fn penalty_width(&self) -> f64 { 162 0.0 163 } 164} 165 166fn byte_offset_from(outer: &[u8], inner: &[u8]) -> usize { 167 let outer_start = outer.as_ptr() as usize; 168 let inner_start = inner.as_ptr() as usize; 169 assert!(outer_start <= inner_start); 170 assert!(inner_start + inner.len() <= outer_start + outer.len()); 171 inner_start - outer_start 172} 173 174fn split_byte_line_to_words(line: &[u8]) -> Vec<ByteFragment<'_>> { 175 let mut words = Vec::new(); 176 let mut tail = line; 177 while let Some(word_end) = tail.iter().position(|&c| c == b' ') { 178 let word = &tail[..word_end]; 179 let ws_end = tail[word_end + 1..] 180 .iter() 181 .position(|&c| c != b' ') 182 .map(|p| p + word_end + 1) 183 .unwrap_or(tail.len()); 184 words.push(ByteFragment::new(word, ws_end - word_end)); 185 tail = &tail[ws_end..]; 186 } 187 if !tail.is_empty() { 188 words.push(ByteFragment::new(tail, 0)); 189 } 190 words 191} 192 193/// Wraps lines at the given width, returns a vector of lines (excluding "\n".) 194/// 195/// Existing newline characters will never be removed. For `str` content, you 196/// can use `textwrap::refill()` to refill a pre-formatted text. 197/// 198/// Each line is a sub-slice of the given text, even if the line is empty. 199/// 200/// The wrapping logic is more restricted than the default of the `textwrap`. 201/// Notably, this doesn't support hyphenation nor unicode line break. The 202/// display width is calculated based on unicode property in the same manner 203/// as `textwrap::wrap()`. 204pub fn wrap_bytes(text: &[u8], width: usize) -> Vec<&[u8]> { 205 let mut split_lines = Vec::new(); 206 for line in text.split(|&c| c == b'\n') { 207 let words = split_byte_line_to_words(line); 208 let split = textwrap::wrap_algorithms::wrap_first_fit(&words, &[width as f64]); 209 split_lines.extend(split.iter().map(|words| match words { 210 [] => &line[..0], // Empty line 211 [a] => a.word, 212 [a, .., b] => { 213 let start = a.offset_in(line); 214 let end = b.offset_in(line) + b.word.len(); 215 &line[start..end] 216 } 217 })); 218 } 219 split_lines 220} 221 222/// Wraps lines at the given width preserving labels. 223/// 224/// `textwrap::wrap()` can also process text containing ANSI escape sequences. 225/// The main difference is that this function will reset the style for each line 226/// and recreate it on the following line if the output `formatter` is 227/// a `ColorFormatter`. 228pub fn write_wrapped( 229 formatter: &mut dyn Formatter, 230 recorded_content: &FormatRecorder, 231 width: usize, 232) -> io::Result<()> { 233 let data = recorded_content.data(); 234 let mut line_ranges = wrap_bytes(data, width) 235 .into_iter() 236 .map(|line| { 237 let start = byte_offset_from(data, line); 238 start..start + line.len() 239 }) 240 .peekable(); 241 // The recorded data ranges are contiguous, and the line ranges are increasing 242 // sequence (with some holes.) Both ranges should start from data[0]. 243 recorded_content.replay_with(formatter, |formatter, data_range| { 244 while let Some(line_range) = line_ranges.peek() { 245 let start = cmp::max(data_range.start, line_range.start); 246 let end = cmp::min(data_range.end, line_range.end); 247 if start < end { 248 formatter.write_all(&data[start..end])?; 249 } 250 if data_range.end <= line_range.end { 251 break; // No more lines in this data range 252 } 253 line_ranges.next().unwrap(); 254 if line_ranges.peek().is_some() { 255 writeln!(formatter)?; // Not the last line 256 } 257 } 258 Ok(()) 259 }) 260} 261 262#[cfg(test)] 263mod tests { 264 use std::io::Write as _; 265 266 use super::*; 267 use crate::formatter::{ColorFormatter, PlainTextFormatter}; 268 269 fn format_colored(write: impl FnOnce(&mut dyn Formatter) -> io::Result<()>) -> String { 270 let config = config::Config::builder() 271 .set_override("colors.cyan", "cyan") 272 .unwrap() 273 .set_override("colors.red", "red") 274 .unwrap() 275 .build() 276 .unwrap(); 277 let mut output = Vec::new(); 278 let mut formatter = ColorFormatter::for_config(&mut output, &config).unwrap(); 279 write(&mut formatter).unwrap(); 280 drop(formatter); 281 String::from_utf8(output).unwrap() 282 } 283 284 fn format_plain_text(write: impl FnOnce(&mut dyn Formatter) -> io::Result<()>) -> String { 285 let mut output = Vec::new(); 286 let mut formatter = PlainTextFormatter::new(&mut output); 287 write(&mut formatter).unwrap(); 288 String::from_utf8(output).unwrap() 289 } 290 291 #[test] 292 fn test_elide_start() { 293 // Empty string 294 assert_eq!(elide_start("", "", 1), ("".into(), 0)); 295 296 // Basic truncation 297 assert_eq!(elide_start("abcdef", "", 6), ("abcdef".into(), 6)); 298 assert_eq!(elide_start("abcdef", "", 5), ("bcdef".into(), 5)); 299 assert_eq!(elide_start("abcdef", "", 1), ("f".into(), 1)); 300 assert_eq!(elide_start("abcdef", "", 0), ("".into(), 0)); 301 assert_eq!(elide_start("abcdef", "-=~", 6), ("abcdef".into(), 6)); 302 assert_eq!(elide_start("abcdef", "-=~", 5), ("-=~ef".into(), 5)); 303 assert_eq!(elide_start("abcdef", "-=~", 4), ("-=~f".into(), 4)); 304 assert_eq!(elide_start("abcdef", "-=~", 3), ("-=~".into(), 3)); 305 assert_eq!(elide_start("abcdef", "-=~", 2), ("=~".into(), 2)); 306 assert_eq!(elide_start("abcdef", "-=~", 1), ("~".into(), 1)); 307 assert_eq!(elide_start("abcdef", "-=~", 0), ("".into(), 0)); 308 309 // East Asian characters (char.width() == 2) 310 assert_eq!(elide_start("一二三", "", 6), ("一二三".into(), 6)); 311 assert_eq!(elide_start("一二三", "", 5), ("二三".into(), 4)); 312 assert_eq!(elide_start("一二三", "", 4), ("二三".into(), 4)); 313 assert_eq!(elide_start("一二三", "", 1), ("".into(), 0)); 314 assert_eq!(elide_start("一二三", "-=~", 6), ("一二三".into(), 6)); 315 assert_eq!(elide_start("一二三", "-=~", 5), ("-=~三".into(), 5)); 316 assert_eq!(elide_start("一二三", "-=~", 4), ("-=~".into(), 3)); 317 assert_eq!(elide_start("一二三", "", 6), ("一二三".into(), 6)); 318 assert_eq!(elide_start("一二三", "", 5), ("略三".into(), 4)); 319 assert_eq!(elide_start("一二三", "", 4), ("略三".into(), 4)); 320 assert_eq!(elide_start("一二三", "", 2), ("".into(), 2)); 321 assert_eq!(elide_start("一二三", "", 1), ("".into(), 0)); 322 assert_eq!(elide_start("一二三", ".", 5), (".二三".into(), 5)); 323 assert_eq!(elide_start("一二三", ".", 4), (".三".into(), 3)); 324 assert_eq!(elide_start("一二三", "略.", 5), ("略.三".into(), 5)); 325 assert_eq!(elide_start("一二三", "略.", 4), ("略.".into(), 3)); 326 327 // Multi-byte character at boundary 328 assert_eq!(elide_start("àbcdè", "", 5), ("àbcdè".into(), 5)); 329 assert_eq!(elide_start("àbcdè", "", 4), ("bcdè".into(), 4)); 330 assert_eq!(elide_start("àbcdè", "", 1), ("è".into(), 1)); 331 assert_eq!(elide_start("àbcdè", "", 0), ("".into(), 0)); 332 assert_eq!(elide_start("àbcdè", "ÀÇÈ", 4), ("ÀÇÈè".into(), 4)); 333 assert_eq!(elide_start("àbcdè", "ÀÇÈ", 3), ("ÀÇÈ".into(), 3)); 334 assert_eq!(elide_start("àbcdè", "ÀÇÈ", 2), ("ÇÈ".into(), 2)); 335 336 // Decomposed character at boundary 337 assert_eq!( 338 elide_start("a\u{300}bcde\u{300}", "", 5), 339 ("a\u{300}bcde\u{300}".into(), 5) 340 ); 341 assert_eq!( 342 elide_start("a\u{300}bcde\u{300}", "", 4), 343 ("bcde\u{300}".into(), 4) 344 ); 345 assert_eq!( 346 elide_start("a\u{300}bcde\u{300}", "", 1), 347 ("e\u{300}".into(), 1) 348 ); 349 assert_eq!(elide_start("a\u{300}bcde\u{300}", "", 0), ("".into(), 0)); 350 assert_eq!( 351 elide_start("a\u{300}bcde\u{300}", "A\u{300}CE\u{300}", 4), 352 ("A\u{300}CE\u{300}e\u{300}".into(), 4) 353 ); 354 assert_eq!( 355 elide_start("a\u{300}bcde\u{300}", "A\u{300}CE\u{300}", 3), 356 ("A\u{300}CE\u{300}".into(), 3) 357 ); 358 assert_eq!( 359 elide_start("a\u{300}bcde\u{300}", "A\u{300}CE\u{300}", 2), 360 ("CE\u{300}".into(), 2) 361 ); 362 } 363 364 #[test] 365 fn test_split_byte_line_to_words() { 366 assert_eq!(split_byte_line_to_words(b""), vec![]); 367 assert_eq!( 368 split_byte_line_to_words(b"foo"), 369 vec![ByteFragment { 370 word: b"foo", 371 whitespace_len: 0, 372 word_width: 3 373 }], 374 ); 375 assert_eq!( 376 split_byte_line_to_words(b" foo"), 377 vec![ 378 ByteFragment { 379 word: b"", 380 whitespace_len: 2, 381 word_width: 0 382 }, 383 ByteFragment { 384 word: b"foo", 385 whitespace_len: 0, 386 word_width: 3 387 }, 388 ], 389 ); 390 assert_eq!( 391 split_byte_line_to_words(b"foo "), 392 vec![ByteFragment { 393 word: b"foo", 394 whitespace_len: 2, 395 word_width: 3 396 }], 397 ); 398 assert_eq!( 399 split_byte_line_to_words(b"a b foo bar "), 400 vec![ 401 ByteFragment { 402 word: b"a", 403 whitespace_len: 1, 404 word_width: 1 405 }, 406 ByteFragment { 407 word: b"b", 408 whitespace_len: 2, 409 word_width: 1 410 }, 411 ByteFragment { 412 word: b"foo", 413 whitespace_len: 1, 414 word_width: 3, 415 }, 416 ByteFragment { 417 word: b"bar", 418 whitespace_len: 1, 419 word_width: 3, 420 }, 421 ], 422 ); 423 } 424 425 #[test] 426 fn test_wrap_bytes() { 427 assert_eq!(wrap_bytes(b"foo", 10), [b"foo".as_ref()]); 428 assert_eq!(wrap_bytes(b"foo bar", 10), [b"foo bar".as_ref()]); 429 assert_eq!( 430 wrap_bytes(b"foo bar baz", 10), 431 [b"foo bar".as_ref(), b"baz".as_ref()], 432 ); 433 434 // Empty text is represented as [""] 435 assert_eq!(wrap_bytes(b"", 10), [b"".as_ref()]); 436 assert_eq!(wrap_bytes(b" ", 10), [b"".as_ref()]); 437 438 // Whitespace in the middle should be preserved 439 assert_eq!( 440 wrap_bytes(b"foo bar baz", 8), 441 [b"foo bar".as_ref(), b"baz".as_ref()], 442 ); 443 assert_eq!( 444 wrap_bytes(b"foo bar x", 7), 445 [b"foo".as_ref(), b"bar x".as_ref()], 446 ); 447 assert_eq!( 448 wrap_bytes(b"foo bar \nx", 7), 449 [b"foo bar".as_ref(), b"x".as_ref()], 450 ); 451 assert_eq!( 452 wrap_bytes(b"foo bar\n x", 7), 453 [b"foo bar".as_ref(), b" x".as_ref()], 454 ); 455 assert_eq!( 456 wrap_bytes(b"foo bar x", 4), 457 [b"foo".as_ref(), b"bar".as_ref(), b"x".as_ref()], 458 ); 459 460 // Ends with "\n" 461 assert_eq!(wrap_bytes(b"foo\n", 10), [b"foo".as_ref(), b"".as_ref()]); 462 assert_eq!(wrap_bytes(b"foo\n", 3), [b"foo".as_ref(), b"".as_ref()]); 463 assert_eq!(wrap_bytes(b"\n", 10), [b"".as_ref(), b"".as_ref()]); 464 465 // Overflow 466 assert_eq!(wrap_bytes(b"foo x", 2), [b"foo".as_ref(), b"x".as_ref()]); 467 assert_eq!(wrap_bytes(b"x y", 0), [b"x".as_ref(), b"y".as_ref()]); 468 469 // Invalid UTF-8 bytes should not cause panic 470 assert_eq!(wrap_bytes(b"foo\x80", 10), [b"foo\x80".as_ref()]); 471 } 472 473 #[test] 474 fn test_wrap_bytes_slice_ptr() { 475 let text = b"\nfoo\n\nbar baz\n"; 476 let lines = wrap_bytes(text, 10); 477 assert_eq!( 478 lines, 479 [ 480 b"".as_ref(), 481 b"foo".as_ref(), 482 b"".as_ref(), 483 b"bar baz".as_ref(), 484 b"".as_ref() 485 ], 486 ); 487 // Each line should be a sub-slice of the source text 488 assert_eq!(lines[0].as_ptr(), text[0..].as_ptr()); 489 assert_eq!(lines[1].as_ptr(), text[1..].as_ptr()); 490 assert_eq!(lines[2].as_ptr(), text[5..].as_ptr()); 491 assert_eq!(lines[3].as_ptr(), text[6..].as_ptr()); 492 assert_eq!(lines[4].as_ptr(), text[14..].as_ptr()); 493 } 494 495 #[test] 496 fn test_write_wrapped() { 497 // Split single label chunk 498 let mut recorder = FormatRecorder::new(); 499 recorder.push_label("red").unwrap(); 500 write!(recorder, "foo bar baz\nqux quux\n").unwrap(); 501 recorder.pop_label().unwrap(); 502 insta::assert_snapshot!( 503 format_colored(|formatter| write_wrapped(formatter, &recorder, 7)), 504 @r###" 505 foo bar 506 baz 507 qux 508 quux 509 "### 510 ); 511 512 // Multiple label chunks in a line 513 let mut recorder = FormatRecorder::new(); 514 for (i, word) in ["foo ", "bar ", "baz\n", "qux ", "quux"].iter().enumerate() { 515 recorder.push_label(["red", "cyan"][i & 1]).unwrap(); 516 write!(recorder, "{word}").unwrap(); 517 recorder.pop_label().unwrap(); 518 } 519 insta::assert_snapshot!( 520 format_colored(|formatter| write_wrapped(formatter, &recorder, 7)), 521 @r###" 522 foo bar 523 baz 524 qux 525 quux 526 "### 527 ); 528 529 // Empty lines should not cause panic 530 let mut recorder = FormatRecorder::new(); 531 for (i, word) in ["", "foo", "", "bar baz", ""].iter().enumerate() { 532 recorder.push_label(["red", "cyan"][i & 1]).unwrap(); 533 writeln!(recorder, "{word}").unwrap(); 534 recorder.pop_label().unwrap(); 535 } 536 insta::assert_snapshot!( 537 format_colored(|formatter| write_wrapped(formatter, &recorder, 10)), 538 @r###" 539  540 foo 541  542 bar baz 543  544 "### 545 ); 546 547 // Split at label boundary 548 let mut recorder = FormatRecorder::new(); 549 recorder.push_label("red").unwrap(); 550 write!(recorder, "foo bar").unwrap(); 551 recorder.pop_label().unwrap(); 552 write!(recorder, " ").unwrap(); 553 recorder.push_label("cyan").unwrap(); 554 writeln!(recorder, "baz").unwrap(); 555 recorder.pop_label().unwrap(); 556 insta::assert_snapshot!( 557 format_colored(|formatter| write_wrapped(formatter, &recorder, 10)), 558 @r###" 559 foo bar 560 baz 561 "### 562 ); 563 564 // Do not split at label boundary "ba|z" (since it's a single word) 565 let mut recorder = FormatRecorder::new(); 566 recorder.push_label("red").unwrap(); 567 write!(recorder, "foo bar ba").unwrap(); 568 recorder.pop_label().unwrap(); 569 recorder.push_label("cyan").unwrap(); 570 writeln!(recorder, "z").unwrap(); 571 recorder.pop_label().unwrap(); 572 insta::assert_snapshot!( 573 format_colored(|formatter| write_wrapped(formatter, &recorder, 10)), 574 @r###" 575 foo bar 576 baz 577 "### 578 ); 579 } 580 581 #[test] 582 fn test_write_wrapped_leading_labeled_whitespace() { 583 let mut recorder = FormatRecorder::new(); 584 recorder.push_label("red").unwrap(); 585 write!(recorder, " ").unwrap(); 586 recorder.pop_label().unwrap(); 587 write!(recorder, "foo").unwrap(); 588 insta::assert_snapshot!( 589 format_colored(|formatter| write_wrapped(formatter, &recorder, 10)), 590 @" foo" 591 ); 592 } 593 594 #[test] 595 fn test_write_wrapped_trailing_labeled_whitespace() { 596 // data: "foo" " " 597 // line: --- 598 let mut recorder = FormatRecorder::new(); 599 write!(recorder, "foo").unwrap(); 600 recorder.push_label("red").unwrap(); 601 write!(recorder, " ").unwrap(); 602 recorder.pop_label().unwrap(); 603 assert_eq!( 604 format_plain_text(|formatter| write_wrapped(formatter, &recorder, 10)), 605 "foo", 606 ); 607 608 // data: "foo" "\n" 609 // line: --- - 610 let mut recorder = FormatRecorder::new(); 611 write!(recorder, "foo").unwrap(); 612 recorder.push_label("red").unwrap(); 613 writeln!(recorder).unwrap(); 614 recorder.pop_label().unwrap(); 615 assert_eq!( 616 format_plain_text(|formatter| write_wrapped(formatter, &recorder, 10)), 617 "foo\n", 618 ); 619 620 // data: "foo\n" " " 621 // line: --- - 622 let mut recorder = FormatRecorder::new(); 623 writeln!(recorder, "foo").unwrap(); 624 recorder.push_label("red").unwrap(); 625 write!(recorder, " ").unwrap(); 626 recorder.pop_label().unwrap(); 627 assert_eq!( 628 format_plain_text(|formatter| write_wrapped(formatter, &recorder, 10)), 629 "foo\n", 630 ); 631 } 632}