source dump of claude code
at main 222 lines 7.2 kB view raw
1import emojiRegex from 'emoji-regex' 2import { eastAsianWidth } from 'get-east-asian-width' 3import stripAnsi from 'strip-ansi' 4import { getGraphemeSegmenter } from '../utils/intl.js' 5 6const EMOJI_REGEX = emojiRegex() 7 8/** 9 * Fallback JavaScript implementation of stringWidth when Bun.stringWidth is not available. 10 * 11 * Get the display width of a string as it would appear in a terminal. 12 * 13 * This is a more accurate alternative to the string-width package that correctly handles 14 * characters like ⚠ (U+26A0) which string-width incorrectly reports as width 2. 15 * 16 * The implementation uses eastAsianWidth directly with ambiguousAsWide: false, 17 * which correctly treats ambiguous-width characters as narrow (width 1) as 18 * recommended by the Unicode standard for Western contexts. 19 */ 20function stringWidthJavaScript(str: string): number { 21 if (typeof str !== 'string' || str.length === 0) { 22 return 0 23 } 24 25 // Fast path: pure ASCII string (no ANSI codes, no wide chars) 26 let isPureAscii = true 27 for (let i = 0; i < str.length; i++) { 28 const code = str.charCodeAt(i) 29 // Check for non-ASCII or ANSI escape (0x1b) 30 if (code >= 127 || code === 0x1b) { 31 isPureAscii = false 32 break 33 } 34 } 35 if (isPureAscii) { 36 // Count printable characters (exclude control chars) 37 let width = 0 38 for (let i = 0; i < str.length; i++) { 39 const code = str.charCodeAt(i) 40 if (code > 0x1f) { 41 width++ 42 } 43 } 44 return width 45 } 46 47 // Strip ANSI if escape character is present 48 if (str.includes('\x1b')) { 49 str = stripAnsi(str) 50 if (str.length === 0) { 51 return 0 52 } 53 } 54 55 // Fast path: simple Unicode (no emoji, variation selectors, or joiners) 56 if (!needsSegmentation(str)) { 57 let width = 0 58 for (const char of str) { 59 const codePoint = char.codePointAt(0)! 60 if (!isZeroWidth(codePoint)) { 61 width += eastAsianWidth(codePoint, { ambiguousAsWide: false }) 62 } 63 } 64 return width 65 } 66 67 let width = 0 68 69 for (const { segment: grapheme } of getGraphemeSegmenter().segment(str)) { 70 // Check for emoji first (most emoji sequences are width 2) 71 EMOJI_REGEX.lastIndex = 0 72 if (EMOJI_REGEX.test(grapheme)) { 73 width += getEmojiWidth(grapheme) 74 continue 75 } 76 77 // Calculate width for non-emoji graphemes 78 // For grapheme clusters (like Devanagari conjuncts with virama+ZWJ), only count 79 // the first non-zero-width character's width since the cluster renders as one glyph 80 for (const char of grapheme) { 81 const codePoint = char.codePointAt(0)! 82 if (!isZeroWidth(codePoint)) { 83 width += eastAsianWidth(codePoint, { ambiguousAsWide: false }) 84 break 85 } 86 } 87 } 88 89 return width 90} 91 92function needsSegmentation(str: string): boolean { 93 for (const char of str) { 94 const cp = char.codePointAt(0)! 95 // Emoji ranges 96 if (cp >= 0x1f300 && cp <= 0x1faff) return true 97 if (cp >= 0x2600 && cp <= 0x27bf) return true 98 if (cp >= 0x1f1e6 && cp <= 0x1f1ff) return true 99 // Variation selectors, ZWJ 100 if (cp >= 0xfe00 && cp <= 0xfe0f) return true 101 if (cp === 0x200d) return true 102 } 103 return false 104} 105 106function getEmojiWidth(grapheme: string): number { 107 // Regional indicators: single = 1, pair = 2 108 const first = grapheme.codePointAt(0)! 109 if (first >= 0x1f1e6 && first <= 0x1f1ff) { 110 let count = 0 111 for (const _ of grapheme) count++ 112 return count === 1 ? 1 : 2 113 } 114 115 // Incomplete keycap: digit/symbol + VS16 without U+20E3 116 if (grapheme.length === 2) { 117 const second = grapheme.codePointAt(1) 118 if ( 119 second === 0xfe0f && 120 ((first >= 0x30 && first <= 0x39) || first === 0x23 || first === 0x2a) 121 ) { 122 return 1 123 } 124 } 125 126 return 2 127} 128 129function isZeroWidth(codePoint: number): boolean { 130 // Fast path for common printable range 131 if (codePoint >= 0x20 && codePoint < 0x7f) return false 132 if (codePoint >= 0xa0 && codePoint < 0x0300) return codePoint === 0x00ad 133 134 // Control characters 135 if (codePoint <= 0x1f || (codePoint >= 0x7f && codePoint <= 0x9f)) return true 136 137 // Zero-width and invisible characters 138 if ( 139 (codePoint >= 0x200b && codePoint <= 0x200d) || // ZW space/joiner 140 codePoint === 0xfeff || // BOM 141 (codePoint >= 0x2060 && codePoint <= 0x2064) // Word joiner etc. 142 ) { 143 return true 144 } 145 146 // Variation selectors 147 if ( 148 (codePoint >= 0xfe00 && codePoint <= 0xfe0f) || 149 (codePoint >= 0xe0100 && codePoint <= 0xe01ef) 150 ) { 151 return true 152 } 153 154 // Combining diacritical marks 155 if ( 156 (codePoint >= 0x0300 && codePoint <= 0x036f) || 157 (codePoint >= 0x1ab0 && codePoint <= 0x1aff) || 158 (codePoint >= 0x1dc0 && codePoint <= 0x1dff) || 159 (codePoint >= 0x20d0 && codePoint <= 0x20ff) || 160 (codePoint >= 0xfe20 && codePoint <= 0xfe2f) 161 ) { 162 return true 163 } 164 165 // Indic script combining marks (covers Devanagari through Malayalam) 166 if (codePoint >= 0x0900 && codePoint <= 0x0d4f) { 167 // Signs and vowel marks at start of each script block 168 const offset = codePoint & 0x7f 169 if (offset <= 0x03) return true // Signs at block start 170 if (offset >= 0x3a && offset <= 0x4f) return true // Vowel signs, virama 171 if (offset >= 0x51 && offset <= 0x57) return true // Stress signs 172 if (offset >= 0x62 && offset <= 0x63) return true // Vowel signs 173 } 174 175 // Thai/Lao combining marks 176 // Note: U+0E32 (SARA AA), U+0E33 (SARA AM), U+0EB2, U+0EB3 are spacing vowels (width 1), not combining marks 177 if ( 178 codePoint === 0x0e31 || // Thai MAI HAN-AKAT 179 (codePoint >= 0x0e34 && codePoint <= 0x0e3a) || // Thai vowel signs (skip U+0E32, U+0E33) 180 (codePoint >= 0x0e47 && codePoint <= 0x0e4e) || // Thai vowel signs and marks 181 codePoint === 0x0eb1 || // Lao MAI KAN 182 (codePoint >= 0x0eb4 && codePoint <= 0x0ebc) || // Lao vowel signs (skip U+0EB2, U+0EB3) 183 (codePoint >= 0x0ec8 && codePoint <= 0x0ecd) // Lao tone marks 184 ) { 185 return true 186 } 187 188 // Arabic formatting 189 if ( 190 (codePoint >= 0x0600 && codePoint <= 0x0605) || 191 codePoint === 0x06dd || 192 codePoint === 0x070f || 193 codePoint === 0x08e2 194 ) { 195 return true 196 } 197 198 // Surrogates, tag characters 199 if (codePoint >= 0xd800 && codePoint <= 0xdfff) return true 200 if (codePoint >= 0xe0000 && codePoint <= 0xe007f) return true 201 202 return false 203} 204 205// Note: complex-script graphemes like Devanagari क्ष (ka+virama+ZWJ+ssa) render 206// as a single ligature glyph but occupy 2 terminal cells (wcwidth sums the base 207// consonants). Bun.stringWidth=2 matches terminal cell allocation, which is what 208// we need for cursor positioning — the JS fallback's grapheme-cluster width of 1 209// would desync Ink's layout from the terminal. 210// 211// Bun.stringWidth is resolved once at module scope rather than checked on every 212// call — typeof guards deopt property access and this is a hot path (~100k calls/frame). 213const bunStringWidth = 214 typeof Bun !== 'undefined' && typeof Bun.stringWidth === 'function' 215 ? Bun.stringWidth 216 : null 217 218const BUN_STRING_WIDTH_OPTS = { ambiguousIsNarrow: true } as const 219 220export const stringWidth: (str: string) => number = bunStringWidth 221 ? str => bunStringWidth(str, BUN_STRING_WIDTH_OPTS) 222 : stringWidthJavaScript