just playing with tangled
1// Copyright 2022-2023 The Jujutsu Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::borrow::Cow;
16use std::{cmp, io};
17
18use unicode_width::UnicodeWidthChar as _;
19
20use crate::formatter::{FormatRecorder, Formatter};
21
22pub fn complete_newline(s: impl Into<String>) -> String {
23 let mut s = s.into();
24 if !s.is_empty() && !s.ends_with('\n') {
25 s.push('\n');
26 }
27 s
28}
29
30pub fn split_email(email: &str) -> (&str, Option<&str>) {
31 if let Some((username, rest)) = email.split_once('@') {
32 (username, Some(rest))
33 } else {
34 (email, None)
35 }
36}
37
38/// Shortens `text` to `max_width` by removing leading characters. `ellipsis` is
39/// added if the `text` gets truncated.
40///
41/// The returned string (including `ellipsis`) never exceeds the `max_width`.
42pub fn elide_start<'a>(
43 text: &'a str,
44 ellipsis: &'a str,
45 max_width: usize,
46) -> (Cow<'a, str>, usize) {
47 let (text_start, text_width) = truncate_start_pos(text, max_width);
48 if text_start == 0 {
49 return (Cow::Borrowed(text), text_width);
50 }
51
52 let (ellipsis_start, ellipsis_width) = truncate_start_pos(ellipsis, max_width);
53 if ellipsis_start != 0 {
54 let ellipsis = trim_start_zero_width_chars(&ellipsis[ellipsis_start..]);
55 return (Cow::Borrowed(ellipsis), ellipsis_width);
56 }
57
58 let text = &text[text_start..];
59 let max_text_width = max_width - ellipsis_width;
60 let (skip, skipped_width) = skip_start_pos(text, text_width.saturating_sub(max_text_width));
61 let text = trim_start_zero_width_chars(&text[skip..]);
62 let concat_width = ellipsis_width + (text_width - skipped_width);
63 assert!(concat_width <= max_width);
64 (Cow::Owned([ellipsis, text].concat()), concat_width)
65}
66
67/// Shortens `text` to `max_width` by removing leading characters, returning
68/// `(start_index, width)`.
69///
70/// The truncated string may have 0-width decomposed characters at start.
71fn truncate_start_pos(text: &str, max_width: usize) -> (usize, usize) {
72 let mut acc_width = 0;
73 for (i, c) in text.char_indices().rev() {
74 let new_width = acc_width + c.width().unwrap_or(0);
75 if new_width > max_width {
76 let prev_index = i + c.len_utf8();
77 return (prev_index, acc_width);
78 }
79 acc_width = new_width;
80 }
81 (0, acc_width)
82}
83
84/// Skips `width` leading characters, returning `(start_index, skipped_width)`.
85///
86/// The `skipped_width` may exceed the given `width` if `width` is not at
87/// character boundary.
88///
89/// The truncated string may have 0-width decomposed characters at start.
90fn skip_start_pos(text: &str, width: usize) -> (usize, usize) {
91 let mut acc_width = 0;
92 for (i, c) in text.char_indices() {
93 if acc_width >= width {
94 return (i, acc_width);
95 }
96 acc_width += c.width().unwrap_or(0);
97 }
98 (text.len(), acc_width)
99}
100
101/// Removes leading 0-width characters.
102fn trim_start_zero_width_chars(text: &str) -> &str {
103 text.trim_start_matches(|c: char| c.width().unwrap_or(0) == 0)
104}
105
106/// Indents each line by the given prefix preserving labels.
107pub fn write_indented(
108 formatter: &mut dyn Formatter,
109 recorded_content: &FormatRecorder,
110 mut write_prefix: impl FnMut(&mut dyn Formatter) -> io::Result<()>,
111) -> io::Result<()> {
112 let data = recorded_content.data();
113 let mut new_line = true;
114 recorded_content.replay_with(formatter, |formatter, range| {
115 for line in data[range].split_inclusive(|&c| c == b'\n') {
116 if new_line && line != b"\n" {
117 // Prefix inherits the current labels. This is implementation detail
118 // and may be fixed later.
119 write_prefix(formatter)?;
120 }
121 formatter.write_all(line)?;
122 new_line = line.ends_with(b"\n");
123 }
124 Ok(())
125 })
126}
127
128/// Word with trailing whitespace.
129#[derive(Clone, Copy, Debug, Eq, PartialEq)]
130struct ByteFragment<'a> {
131 word: &'a [u8],
132 whitespace_len: usize,
133 word_width: usize,
134}
135
136impl<'a> ByteFragment<'a> {
137 fn new(word: &'a [u8], whitespace_len: usize) -> Self {
138 // We don't care about the width of non-UTF-8 bytes, but should not panic.
139 let word_width = textwrap::core::display_width(&String::from_utf8_lossy(word));
140 ByteFragment {
141 word,
142 whitespace_len,
143 word_width,
144 }
145 }
146
147 fn offset_in(&self, text: &[u8]) -> usize {
148 byte_offset_from(text, self.word)
149 }
150}
151
152impl textwrap::core::Fragment for ByteFragment<'_> {
153 fn width(&self) -> f64 {
154 self.word_width as f64
155 }
156
157 fn whitespace_width(&self) -> f64 {
158 self.whitespace_len as f64
159 }
160
161 fn penalty_width(&self) -> f64 {
162 0.0
163 }
164}
165
166fn byte_offset_from(outer: &[u8], inner: &[u8]) -> usize {
167 let outer_start = outer.as_ptr() as usize;
168 let inner_start = inner.as_ptr() as usize;
169 assert!(outer_start <= inner_start);
170 assert!(inner_start + inner.len() <= outer_start + outer.len());
171 inner_start - outer_start
172}
173
174fn split_byte_line_to_words(line: &[u8]) -> Vec<ByteFragment<'_>> {
175 let mut words = Vec::new();
176 let mut tail = line;
177 while let Some(word_end) = tail.iter().position(|&c| c == b' ') {
178 let word = &tail[..word_end];
179 let ws_end = tail[word_end + 1..]
180 .iter()
181 .position(|&c| c != b' ')
182 .map(|p| p + word_end + 1)
183 .unwrap_or(tail.len());
184 words.push(ByteFragment::new(word, ws_end - word_end));
185 tail = &tail[ws_end..];
186 }
187 if !tail.is_empty() {
188 words.push(ByteFragment::new(tail, 0));
189 }
190 words
191}
192
193/// Wraps lines at the given width, returns a vector of lines (excluding "\n".)
194///
195/// Existing newline characters will never be removed. For `str` content, you
196/// can use `textwrap::refill()` to refill a pre-formatted text.
197///
198/// Each line is a sub-slice of the given text, even if the line is empty.
199///
200/// The wrapping logic is more restricted than the default of the `textwrap`.
201/// Notably, this doesn't support hyphenation nor unicode line break. The
202/// display width is calculated based on unicode property in the same manner
203/// as `textwrap::wrap()`.
204pub fn wrap_bytes(text: &[u8], width: usize) -> Vec<&[u8]> {
205 let mut split_lines = Vec::new();
206 for line in text.split(|&c| c == b'\n') {
207 let words = split_byte_line_to_words(line);
208 let split = textwrap::wrap_algorithms::wrap_first_fit(&words, &[width as f64]);
209 split_lines.extend(split.iter().map(|words| match words {
210 [] => &line[..0], // Empty line
211 [a] => a.word,
212 [a, .., b] => {
213 let start = a.offset_in(line);
214 let end = b.offset_in(line) + b.word.len();
215 &line[start..end]
216 }
217 }));
218 }
219 split_lines
220}
221
222/// Wraps lines at the given width preserving labels.
223///
224/// `textwrap::wrap()` can also process text containing ANSI escape sequences.
225/// The main difference is that this function will reset the style for each line
226/// and recreate it on the following line if the output `formatter` is
227/// a `ColorFormatter`.
228pub fn write_wrapped(
229 formatter: &mut dyn Formatter,
230 recorded_content: &FormatRecorder,
231 width: usize,
232) -> io::Result<()> {
233 let data = recorded_content.data();
234 let mut line_ranges = wrap_bytes(data, width)
235 .into_iter()
236 .map(|line| {
237 let start = byte_offset_from(data, line);
238 start..start + line.len()
239 })
240 .peekable();
241 // The recorded data ranges are contiguous, and the line ranges are increasing
242 // sequence (with some holes.) Both ranges should start from data[0].
243 recorded_content.replay_with(formatter, |formatter, data_range| {
244 while let Some(line_range) = line_ranges.peek() {
245 let start = cmp::max(data_range.start, line_range.start);
246 let end = cmp::min(data_range.end, line_range.end);
247 if start < end {
248 formatter.write_all(&data[start..end])?;
249 }
250 if data_range.end <= line_range.end {
251 break; // No more lines in this data range
252 }
253 line_ranges.next().unwrap();
254 if line_ranges.peek().is_some() {
255 writeln!(formatter)?; // Not the last line
256 }
257 }
258 Ok(())
259 })
260}
261
262#[cfg(test)]
263mod tests {
264 use std::io::Write as _;
265
266 use super::*;
267 use crate::formatter::{ColorFormatter, PlainTextFormatter};
268
269 fn format_colored(write: impl FnOnce(&mut dyn Formatter) -> io::Result<()>) -> String {
270 let config = config::Config::builder()
271 .set_override("colors.cyan", "cyan")
272 .unwrap()
273 .set_override("colors.red", "red")
274 .unwrap()
275 .build()
276 .unwrap();
277 let mut output = Vec::new();
278 let mut formatter = ColorFormatter::for_config(&mut output, &config).unwrap();
279 write(&mut formatter).unwrap();
280 drop(formatter);
281 String::from_utf8(output).unwrap()
282 }
283
284 fn format_plain_text(write: impl FnOnce(&mut dyn Formatter) -> io::Result<()>) -> String {
285 let mut output = Vec::new();
286 let mut formatter = PlainTextFormatter::new(&mut output);
287 write(&mut formatter).unwrap();
288 String::from_utf8(output).unwrap()
289 }
290
291 #[test]
292 fn test_elide_start() {
293 // Empty string
294 assert_eq!(elide_start("", "", 1), ("".into(), 0));
295
296 // Basic truncation
297 assert_eq!(elide_start("abcdef", "", 6), ("abcdef".into(), 6));
298 assert_eq!(elide_start("abcdef", "", 5), ("bcdef".into(), 5));
299 assert_eq!(elide_start("abcdef", "", 1), ("f".into(), 1));
300 assert_eq!(elide_start("abcdef", "", 0), ("".into(), 0));
301 assert_eq!(elide_start("abcdef", "-=~", 6), ("abcdef".into(), 6));
302 assert_eq!(elide_start("abcdef", "-=~", 5), ("-=~ef".into(), 5));
303 assert_eq!(elide_start("abcdef", "-=~", 4), ("-=~f".into(), 4));
304 assert_eq!(elide_start("abcdef", "-=~", 3), ("-=~".into(), 3));
305 assert_eq!(elide_start("abcdef", "-=~", 2), ("=~".into(), 2));
306 assert_eq!(elide_start("abcdef", "-=~", 1), ("~".into(), 1));
307 assert_eq!(elide_start("abcdef", "-=~", 0), ("".into(), 0));
308
309 // East Asian characters (char.width() == 2)
310 assert_eq!(elide_start("一二三", "", 6), ("一二三".into(), 6));
311 assert_eq!(elide_start("一二三", "", 5), ("二三".into(), 4));
312 assert_eq!(elide_start("一二三", "", 4), ("二三".into(), 4));
313 assert_eq!(elide_start("一二三", "", 1), ("".into(), 0));
314 assert_eq!(elide_start("一二三", "-=~", 6), ("一二三".into(), 6));
315 assert_eq!(elide_start("一二三", "-=~", 5), ("-=~三".into(), 5));
316 assert_eq!(elide_start("一二三", "-=~", 4), ("-=~".into(), 3));
317 assert_eq!(elide_start("一二三", "略", 6), ("一二三".into(), 6));
318 assert_eq!(elide_start("一二三", "略", 5), ("略三".into(), 4));
319 assert_eq!(elide_start("一二三", "略", 4), ("略三".into(), 4));
320 assert_eq!(elide_start("一二三", "略", 2), ("略".into(), 2));
321 assert_eq!(elide_start("一二三", "略", 1), ("".into(), 0));
322 assert_eq!(elide_start("一二三", ".", 5), (".二三".into(), 5));
323 assert_eq!(elide_start("一二三", ".", 4), (".三".into(), 3));
324 assert_eq!(elide_start("一二三", "略.", 5), ("略.三".into(), 5));
325 assert_eq!(elide_start("一二三", "略.", 4), ("略.".into(), 3));
326
327 // Multi-byte character at boundary
328 assert_eq!(elide_start("àbcdè", "", 5), ("àbcdè".into(), 5));
329 assert_eq!(elide_start("àbcdè", "", 4), ("bcdè".into(), 4));
330 assert_eq!(elide_start("àbcdè", "", 1), ("è".into(), 1));
331 assert_eq!(elide_start("àbcdè", "", 0), ("".into(), 0));
332 assert_eq!(elide_start("àbcdè", "ÀÇÈ", 4), ("ÀÇÈè".into(), 4));
333 assert_eq!(elide_start("àbcdè", "ÀÇÈ", 3), ("ÀÇÈ".into(), 3));
334 assert_eq!(elide_start("àbcdè", "ÀÇÈ", 2), ("ÇÈ".into(), 2));
335
336 // Decomposed character at boundary
337 assert_eq!(
338 elide_start("a\u{300}bcde\u{300}", "", 5),
339 ("a\u{300}bcde\u{300}".into(), 5)
340 );
341 assert_eq!(
342 elide_start("a\u{300}bcde\u{300}", "", 4),
343 ("bcde\u{300}".into(), 4)
344 );
345 assert_eq!(
346 elide_start("a\u{300}bcde\u{300}", "", 1),
347 ("e\u{300}".into(), 1)
348 );
349 assert_eq!(elide_start("a\u{300}bcde\u{300}", "", 0), ("".into(), 0));
350 assert_eq!(
351 elide_start("a\u{300}bcde\u{300}", "A\u{300}CE\u{300}", 4),
352 ("A\u{300}CE\u{300}e\u{300}".into(), 4)
353 );
354 assert_eq!(
355 elide_start("a\u{300}bcde\u{300}", "A\u{300}CE\u{300}", 3),
356 ("A\u{300}CE\u{300}".into(), 3)
357 );
358 assert_eq!(
359 elide_start("a\u{300}bcde\u{300}", "A\u{300}CE\u{300}", 2),
360 ("CE\u{300}".into(), 2)
361 );
362 }
363
364 #[test]
365 fn test_split_byte_line_to_words() {
366 assert_eq!(split_byte_line_to_words(b""), vec![]);
367 assert_eq!(
368 split_byte_line_to_words(b"foo"),
369 vec![ByteFragment {
370 word: b"foo",
371 whitespace_len: 0,
372 word_width: 3
373 }],
374 );
375 assert_eq!(
376 split_byte_line_to_words(b" foo"),
377 vec![
378 ByteFragment {
379 word: b"",
380 whitespace_len: 2,
381 word_width: 0
382 },
383 ByteFragment {
384 word: b"foo",
385 whitespace_len: 0,
386 word_width: 3
387 },
388 ],
389 );
390 assert_eq!(
391 split_byte_line_to_words(b"foo "),
392 vec![ByteFragment {
393 word: b"foo",
394 whitespace_len: 2,
395 word_width: 3
396 }],
397 );
398 assert_eq!(
399 split_byte_line_to_words(b"a b foo bar "),
400 vec![
401 ByteFragment {
402 word: b"a",
403 whitespace_len: 1,
404 word_width: 1
405 },
406 ByteFragment {
407 word: b"b",
408 whitespace_len: 2,
409 word_width: 1
410 },
411 ByteFragment {
412 word: b"foo",
413 whitespace_len: 1,
414 word_width: 3,
415 },
416 ByteFragment {
417 word: b"bar",
418 whitespace_len: 1,
419 word_width: 3,
420 },
421 ],
422 );
423 }
424
425 #[test]
426 fn test_wrap_bytes() {
427 assert_eq!(wrap_bytes(b"foo", 10), [b"foo".as_ref()]);
428 assert_eq!(wrap_bytes(b"foo bar", 10), [b"foo bar".as_ref()]);
429 assert_eq!(
430 wrap_bytes(b"foo bar baz", 10),
431 [b"foo bar".as_ref(), b"baz".as_ref()],
432 );
433
434 // Empty text is represented as [""]
435 assert_eq!(wrap_bytes(b"", 10), [b"".as_ref()]);
436 assert_eq!(wrap_bytes(b" ", 10), [b"".as_ref()]);
437
438 // Whitespace in the middle should be preserved
439 assert_eq!(
440 wrap_bytes(b"foo bar baz", 8),
441 [b"foo bar".as_ref(), b"baz".as_ref()],
442 );
443 assert_eq!(
444 wrap_bytes(b"foo bar x", 7),
445 [b"foo".as_ref(), b"bar x".as_ref()],
446 );
447 assert_eq!(
448 wrap_bytes(b"foo bar \nx", 7),
449 [b"foo bar".as_ref(), b"x".as_ref()],
450 );
451 assert_eq!(
452 wrap_bytes(b"foo bar\n x", 7),
453 [b"foo bar".as_ref(), b" x".as_ref()],
454 );
455 assert_eq!(
456 wrap_bytes(b"foo bar x", 4),
457 [b"foo".as_ref(), b"bar".as_ref(), b"x".as_ref()],
458 );
459
460 // Ends with "\n"
461 assert_eq!(wrap_bytes(b"foo\n", 10), [b"foo".as_ref(), b"".as_ref()]);
462 assert_eq!(wrap_bytes(b"foo\n", 3), [b"foo".as_ref(), b"".as_ref()]);
463 assert_eq!(wrap_bytes(b"\n", 10), [b"".as_ref(), b"".as_ref()]);
464
465 // Overflow
466 assert_eq!(wrap_bytes(b"foo x", 2), [b"foo".as_ref(), b"x".as_ref()]);
467 assert_eq!(wrap_bytes(b"x y", 0), [b"x".as_ref(), b"y".as_ref()]);
468
469 // Invalid UTF-8 bytes should not cause panic
470 assert_eq!(wrap_bytes(b"foo\x80", 10), [b"foo\x80".as_ref()]);
471 }
472
473 #[test]
474 fn test_wrap_bytes_slice_ptr() {
475 let text = b"\nfoo\n\nbar baz\n";
476 let lines = wrap_bytes(text, 10);
477 assert_eq!(
478 lines,
479 [
480 b"".as_ref(),
481 b"foo".as_ref(),
482 b"".as_ref(),
483 b"bar baz".as_ref(),
484 b"".as_ref()
485 ],
486 );
487 // Each line should be a sub-slice of the source text
488 assert_eq!(lines[0].as_ptr(), text[0..].as_ptr());
489 assert_eq!(lines[1].as_ptr(), text[1..].as_ptr());
490 assert_eq!(lines[2].as_ptr(), text[5..].as_ptr());
491 assert_eq!(lines[3].as_ptr(), text[6..].as_ptr());
492 assert_eq!(lines[4].as_ptr(), text[14..].as_ptr());
493 }
494
495 #[test]
496 fn test_write_wrapped() {
497 // Split single label chunk
498 let mut recorder = FormatRecorder::new();
499 recorder.push_label("red").unwrap();
500 write!(recorder, "foo bar baz\nqux quux\n").unwrap();
501 recorder.pop_label().unwrap();
502 insta::assert_snapshot!(
503 format_colored(|formatter| write_wrapped(formatter, &recorder, 7)),
504 @r###"
505 [38;5;1mfoo bar[39m
506 [38;5;1mbaz[39m
507 [38;5;1mqux[39m
508 [38;5;1mquux[39m
509 "###
510 );
511
512 // Multiple label chunks in a line
513 let mut recorder = FormatRecorder::new();
514 for (i, word) in ["foo ", "bar ", "baz\n", "qux ", "quux"].iter().enumerate() {
515 recorder.push_label(["red", "cyan"][i & 1]).unwrap();
516 write!(recorder, "{word}").unwrap();
517 recorder.pop_label().unwrap();
518 }
519 insta::assert_snapshot!(
520 format_colored(|formatter| write_wrapped(formatter, &recorder, 7)),
521 @r###"
522 [38;5;1mfoo [39m[38;5;6mbar[39m
523 [38;5;1mbaz[39m
524 [38;5;6mqux[39m
525 [38;5;1mquux[39m
526 "###
527 );
528
529 // Empty lines should not cause panic
530 let mut recorder = FormatRecorder::new();
531 for (i, word) in ["", "foo", "", "bar baz", ""].iter().enumerate() {
532 recorder.push_label(["red", "cyan"][i & 1]).unwrap();
533 writeln!(recorder, "{word}").unwrap();
534 recorder.pop_label().unwrap();
535 }
536 insta::assert_snapshot!(
537 format_colored(|formatter| write_wrapped(formatter, &recorder, 10)),
538 @r###"
539 [38;5;1m[39m
540 [38;5;6mfoo[39m
541 [38;5;1m[39m
542 [38;5;6mbar baz[39m
543 [38;5;1m[39m
544 "###
545 );
546
547 // Split at label boundary
548 let mut recorder = FormatRecorder::new();
549 recorder.push_label("red").unwrap();
550 write!(recorder, "foo bar").unwrap();
551 recorder.pop_label().unwrap();
552 write!(recorder, " ").unwrap();
553 recorder.push_label("cyan").unwrap();
554 writeln!(recorder, "baz").unwrap();
555 recorder.pop_label().unwrap();
556 insta::assert_snapshot!(
557 format_colored(|formatter| write_wrapped(formatter, &recorder, 10)),
558 @r###"
559 [38;5;1mfoo bar[39m
560 [38;5;6mbaz[39m
561 "###
562 );
563
564 // Do not split at label boundary "ba|z" (since it's a single word)
565 let mut recorder = FormatRecorder::new();
566 recorder.push_label("red").unwrap();
567 write!(recorder, "foo bar ba").unwrap();
568 recorder.pop_label().unwrap();
569 recorder.push_label("cyan").unwrap();
570 writeln!(recorder, "z").unwrap();
571 recorder.pop_label().unwrap();
572 insta::assert_snapshot!(
573 format_colored(|formatter| write_wrapped(formatter, &recorder, 10)),
574 @r###"
575 [38;5;1mfoo bar[39m
576 [38;5;1mba[39m[38;5;6mz[39m
577 "###
578 );
579 }
580
581 #[test]
582 fn test_write_wrapped_leading_labeled_whitespace() {
583 let mut recorder = FormatRecorder::new();
584 recorder.push_label("red").unwrap();
585 write!(recorder, " ").unwrap();
586 recorder.pop_label().unwrap();
587 write!(recorder, "foo").unwrap();
588 insta::assert_snapshot!(
589 format_colored(|formatter| write_wrapped(formatter, &recorder, 10)),
590 @"[38;5;1m [39mfoo"
591 );
592 }
593
594 #[test]
595 fn test_write_wrapped_trailing_labeled_whitespace() {
596 // data: "foo" " "
597 // line: ---
598 let mut recorder = FormatRecorder::new();
599 write!(recorder, "foo").unwrap();
600 recorder.push_label("red").unwrap();
601 write!(recorder, " ").unwrap();
602 recorder.pop_label().unwrap();
603 assert_eq!(
604 format_plain_text(|formatter| write_wrapped(formatter, &recorder, 10)),
605 "foo",
606 );
607
608 // data: "foo" "\n"
609 // line: --- -
610 let mut recorder = FormatRecorder::new();
611 write!(recorder, "foo").unwrap();
612 recorder.push_label("red").unwrap();
613 writeln!(recorder).unwrap();
614 recorder.pop_label().unwrap();
615 assert_eq!(
616 format_plain_text(|formatter| write_wrapped(formatter, &recorder, 10)),
617 "foo\n",
618 );
619
620 // data: "foo\n" " "
621 // line: --- -
622 let mut recorder = FormatRecorder::new();
623 writeln!(recorder, "foo").unwrap();
624 recorder.push_label("red").unwrap();
625 write!(recorder, " ").unwrap();
626 recorder.pop_label().unwrap();
627 assert_eq!(
628 format_plain_text(|formatter| write_wrapped(formatter, &recorder, 10)),
629 "foo\n",
630 );
631 }
632}