utils/sliceAnsi.ts at main · oppi.li/claude-code

oppi.li / claude-code
fork atom
source dump of claude code
fork atom
claude-code / utils / sliceAnsi.ts
at main 91 lines 3.3 kB view raw
wrap content
oppi.li dump from zip 3d ago
63aada3f
 1import {
 2  type AnsiCode,
 3  ansiCodesToString,
 4  reduceAnsiCodes,
 5  tokenize,
 6  undoAnsiCodes,
 7} from '@alcalzone/ansi-tokenize'
 8import { stringWidth } from '../ink/stringWidth.js'
 9
10// A code is an "end code" if its code equals its endCode (e.g., hyperlink close)
11function isEndCode(code: AnsiCode): boolean {
12  return code.code === code.endCode
13}
14
15// Filter to only include "start codes" (not end codes)
16function filterStartCodes(codes: AnsiCode[]): AnsiCode[] {
17  return codes.filter(c => !isEndCode(c))
18}
19
20/**
21 * Slice a string containing ANSI escape codes.
22 *
23 * Unlike the slice-ansi package, this properly handles OSC 8 hyperlink
24 * sequences because @alcalzone/ansi-tokenize tokenizes them correctly.
25 */
26export default function sliceAnsi(
27  str: string,
28  start: number,
29  end?: number,
30): string {
31  // Don't pass `end` to tokenize — it counts code units, not display cells,
32  // so it drops tokens early for text with zero-width combining marks.
33  const tokens = tokenize(str)
34  let activeCodes: AnsiCode[] = []
35  let position = 0
36  let result = ''
37  let include = false
38
39  for (const token of tokens) {
40    // Advance by display width, not code units. Combining marks (Devanagari
41    // matras, virama, diacritics) are width 0 — counting them via .length
42    // advanced position past `end` early and truncated the slice. Callers
43    // pass start/end in display cells (via stringWidth), so position must
44    // track the same units.
45    const width =
46      token.type === 'ansi' ? 0 : token.fullWidth ? 2 : stringWidth(token.value)
47
48    // Break AFTER trailing zero-width marks — a combining mark attaches to
49    // the preceding base char, so "भा" (भ + ा, 1 display cell) sliced at
50    // end=1 must include the ा. Breaking on position >= end BEFORE the
51    // zero-width check would drop it and render भ bare. ANSI codes are
52    // width 0 but must NOT be included past end (they open new style runs
53    // that leak into the undo sequence), so gate on char type too. The
54    // !include guard ensures empty slices (start===end) stay empty even
55    // when the string starts with a zero-width char (BOM, ZWJ).
56    if (end !== undefined && position >= end) {
57      if (token.type === 'ansi' || width > 0 || !include) break
58    }
59
60    if (token.type === 'ansi') {
61      activeCodes.push(token)
62      if (include) {
63        // Emit all ANSI codes during the slice
64        result += token.code
65      }
66    } else {
67      if (!include && position >= start) {
68        // Skip leading zero-width marks at the start boundary — they belong
69        // to the preceding base char in the left half. Without this, the
70        // mark appears in BOTH halves: left+right ≠ original. Only applies
71        // when start > 0 (otherwise there's no preceding char to own it).
72        if (start > 0 && width === 0) continue
73        include = true
74        // Reduce and filter to only active start codes
75        activeCodes = filterStartCodes(reduceAnsiCodes(activeCodes))
76        result = ansiCodesToString(activeCodes)
77      }
78
79      if (include) {
80        result += token.value
81      }
82
83      position += width
84    }
85  }
86
87  // Only undo start codes that are still active
88  const activeStartCodes = filterStartCodes(reduceAnsiCodes(activeCodes))
89  result += ansiCodesToString(undoAnsiCodes(activeStartCodes))
90  return result
91}