source dump of claude code
at main 91 lines 3.3 kB view raw
1import { 2 type AnsiCode, 3 ansiCodesToString, 4 reduceAnsiCodes, 5 tokenize, 6 undoAnsiCodes, 7} from '@alcalzone/ansi-tokenize' 8import { stringWidth } from '../ink/stringWidth.js' 9 10// A code is an "end code" if its code equals its endCode (e.g., hyperlink close) 11function isEndCode(code: AnsiCode): boolean { 12 return code.code === code.endCode 13} 14 15// Filter to only include "start codes" (not end codes) 16function filterStartCodes(codes: AnsiCode[]): AnsiCode[] { 17 return codes.filter(c => !isEndCode(c)) 18} 19 20/** 21 * Slice a string containing ANSI escape codes. 22 * 23 * Unlike the slice-ansi package, this properly handles OSC 8 hyperlink 24 * sequences because @alcalzone/ansi-tokenize tokenizes them correctly. 25 */ 26export default function sliceAnsi( 27 str: string, 28 start: number, 29 end?: number, 30): string { 31 // Don't pass `end` to tokenize — it counts code units, not display cells, 32 // so it drops tokens early for text with zero-width combining marks. 33 const tokens = tokenize(str) 34 let activeCodes: AnsiCode[] = [] 35 let position = 0 36 let result = '' 37 let include = false 38 39 for (const token of tokens) { 40 // Advance by display width, not code units. Combining marks (Devanagari 41 // matras, virama, diacritics) are width 0 — counting them via .length 42 // advanced position past `end` early and truncated the slice. Callers 43 // pass start/end in display cells (via stringWidth), so position must 44 // track the same units. 45 const width = 46 token.type === 'ansi' ? 0 : token.fullWidth ? 2 : stringWidth(token.value) 47 48 // Break AFTER trailing zero-width marks — a combining mark attaches to 49 // the preceding base char, so "भा" (भ + ा, 1 display cell) sliced at 50 // end=1 must include the ा. Breaking on position >= end BEFORE the 51 // zero-width check would drop it and render भ bare. ANSI codes are 52 // width 0 but must NOT be included past end (they open new style runs 53 // that leak into the undo sequence), so gate on char type too. The 54 // !include guard ensures empty slices (start===end) stay empty even 55 // when the string starts with a zero-width char (BOM, ZWJ). 56 if (end !== undefined && position >= end) { 57 if (token.type === 'ansi' || width > 0 || !include) break 58 } 59 60 if (token.type === 'ansi') { 61 activeCodes.push(token) 62 if (include) { 63 // Emit all ANSI codes during the slice 64 result += token.code 65 } 66 } else { 67 if (!include && position >= start) { 68 // Skip leading zero-width marks at the start boundary — they belong 69 // to the preceding base char in the left half. Without this, the 70 // mark appears in BOTH halves: left+right ≠ original. Only applies 71 // when start > 0 (otherwise there's no preceding char to own it). 72 if (start > 0 && width === 0) continue 73 include = true 74 // Reduce and filter to only active start codes 75 activeCodes = filterStartCodes(reduceAnsiCodes(activeCodes)) 76 result = ansiCodesToString(activeCodes) 77 } 78 79 if (include) { 80 result += token.value 81 } 82 83 position += width 84 } 85 } 86 87 // Only undo start codes that are still active 88 const activeStartCodes = filterStartCodes(reduceAnsiCodes(activeCodes)) 89 result += ansiCodesToString(undoAnsiCodes(activeStartCodes)) 90 return result 91}