ink/stringWidth.ts at main · oppi.li/claude-code

oppi.li / claude-code
fork atom
source dump of claude code
fork atom
claude-code / ink / stringWidth.ts
at main 222 lines 7.2 kB view raw
wrap content
oppi.li dump from zip 6d ago
63aada3f
  1import emojiRegex from 'emoji-regex'
  2import { eastAsianWidth } from 'get-east-asian-width'
  3import stripAnsi from 'strip-ansi'
  4import { getGraphemeSegmenter } from '../utils/intl.js'
  5
  6const EMOJI_REGEX = emojiRegex()
  7
  8/**
  9 * Fallback JavaScript implementation of stringWidth when Bun.stringWidth is not available.
 10 *
 11 * Get the display width of a string as it would appear in a terminal.
 12 *
 13 * This is a more accurate alternative to the string-width package that correctly handles
 14 * characters like ⚠ (U+26A0) which string-width incorrectly reports as width 2.
 15 *
 16 * The implementation uses eastAsianWidth directly with ambiguousAsWide: false,
 17 * which correctly treats ambiguous-width characters as narrow (width 1) as
 18 * recommended by the Unicode standard for Western contexts.
 19 */
 20function stringWidthJavaScript(str: string): number {
 21  if (typeof str !== 'string' || str.length === 0) {
 22    return 0
 23  }
 24
 25  // Fast path: pure ASCII string (no ANSI codes, no wide chars)
 26  let isPureAscii = true
 27  for (let i = 0; i < str.length; i++) {
 28    const code = str.charCodeAt(i)
 29    // Check for non-ASCII or ANSI escape (0x1b)
 30    if (code >= 127 || code === 0x1b) {
 31      isPureAscii = false
 32      break
 33    }
 34  }
 35  if (isPureAscii) {
 36    // Count printable characters (exclude control chars)
 37    let width = 0
 38    for (let i = 0; i < str.length; i++) {
 39      const code = str.charCodeAt(i)
 40      if (code > 0x1f) {
 41        width++
 42      }
 43    }
 44    return width
 45  }
 46
 47  // Strip ANSI if escape character is present
 48  if (str.includes('\x1b')) {
 49    str = stripAnsi(str)
 50    if (str.length === 0) {
 51      return 0
 52    }
 53  }
 54
 55  // Fast path: simple Unicode (no emoji, variation selectors, or joiners)
 56  if (!needsSegmentation(str)) {
 57    let width = 0
 58    for (const char of str) {
 59      const codePoint = char.codePointAt(0)!
 60      if (!isZeroWidth(codePoint)) {
 61        width += eastAsianWidth(codePoint, { ambiguousAsWide: false })
 62      }
 63    }
 64    return width
 65  }
 66
 67  let width = 0
 68
 69  for (const { segment: grapheme } of getGraphemeSegmenter().segment(str)) {
 70    // Check for emoji first (most emoji sequences are width 2)
 71    EMOJI_REGEX.lastIndex = 0
 72    if (EMOJI_REGEX.test(grapheme)) {
 73      width += getEmojiWidth(grapheme)
 74      continue
 75    }
 76
 77    // Calculate width for non-emoji graphemes
 78    // For grapheme clusters (like Devanagari conjuncts with virama+ZWJ), only count
 79    // the first non-zero-width character's width since the cluster renders as one glyph
 80    for (const char of grapheme) {
 81      const codePoint = char.codePointAt(0)!
 82      if (!isZeroWidth(codePoint)) {
 83        width += eastAsianWidth(codePoint, { ambiguousAsWide: false })
 84        break
 85      }
 86    }
 87  }
 88
 89  return width
 90}
 91
 92function needsSegmentation(str: string): boolean {
 93  for (const char of str) {
 94    const cp = char.codePointAt(0)!
 95    // Emoji ranges
 96    if (cp >= 0x1f300 && cp <= 0x1faff) return true
 97    if (cp >= 0x2600 && cp <= 0x27bf) return true
 98    if (cp >= 0x1f1e6 && cp <= 0x1f1ff) return true
 99    // Variation selectors, ZWJ
100    if (cp >= 0xfe00 && cp <= 0xfe0f) return true
101    if (cp === 0x200d) return true
102  }
103  return false
104}
105
106function getEmojiWidth(grapheme: string): number {
107  // Regional indicators: single = 1, pair = 2
108  const first = grapheme.codePointAt(0)!
109  if (first >= 0x1f1e6 && first <= 0x1f1ff) {
110    let count = 0
111    for (const _ of grapheme) count++
112    return count === 1 ? 1 : 2
113  }
114
115  // Incomplete keycap: digit/symbol + VS16 without U+20E3
116  if (grapheme.length === 2) {
117    const second = grapheme.codePointAt(1)
118    if (
119      second === 0xfe0f &&
120      ((first >= 0x30 && first <= 0x39) || first === 0x23 || first === 0x2a)
121    ) {
122      return 1
123    }
124  }
125
126  return 2
127}
128
129function isZeroWidth(codePoint: number): boolean {
130  // Fast path for common printable range
131  if (codePoint >= 0x20 && codePoint < 0x7f) return false
132  if (codePoint >= 0xa0 && codePoint < 0x0300) return codePoint === 0x00ad
133
134  // Control characters
135  if (codePoint <= 0x1f || (codePoint >= 0x7f && codePoint <= 0x9f)) return true
136
137  // Zero-width and invisible characters
138  if (
139    (codePoint >= 0x200b && codePoint <= 0x200d) || // ZW space/joiner
140    codePoint === 0xfeff || // BOM
141    (codePoint >= 0x2060 && codePoint <= 0x2064) // Word joiner etc.
142  ) {
143    return true
144  }
145
146  // Variation selectors
147  if (
148    (codePoint >= 0xfe00 && codePoint <= 0xfe0f) ||
149    (codePoint >= 0xe0100 && codePoint <= 0xe01ef)
150  ) {
151    return true
152  }
153
154  // Combining diacritical marks
155  if (
156    (codePoint >= 0x0300 && codePoint <= 0x036f) ||
157    (codePoint >= 0x1ab0 && codePoint <= 0x1aff) ||
158    (codePoint >= 0x1dc0 && codePoint <= 0x1dff) ||
159    (codePoint >= 0x20d0 && codePoint <= 0x20ff) ||
160    (codePoint >= 0xfe20 && codePoint <= 0xfe2f)
161  ) {
162    return true
163  }
164
165  // Indic script combining marks (covers Devanagari through Malayalam)
166  if (codePoint >= 0x0900 && codePoint <= 0x0d4f) {
167    // Signs and vowel marks at start of each script block
168    const offset = codePoint & 0x7f
169    if (offset <= 0x03) return true // Signs at block start
170    if (offset >= 0x3a && offset <= 0x4f) return true // Vowel signs, virama
171    if (offset >= 0x51 && offset <= 0x57) return true // Stress signs
172    if (offset >= 0x62 && offset <= 0x63) return true // Vowel signs
173  }
174
175  // Thai/Lao combining marks
176  // Note: U+0E32 (SARA AA), U+0E33 (SARA AM), U+0EB2, U+0EB3 are spacing vowels (width 1), not combining marks
177  if (
178    codePoint === 0x0e31 || // Thai MAI HAN-AKAT
179    (codePoint >= 0x0e34 && codePoint <= 0x0e3a) || // Thai vowel signs (skip U+0E32, U+0E33)
180    (codePoint >= 0x0e47 && codePoint <= 0x0e4e) || // Thai vowel signs and marks
181    codePoint === 0x0eb1 || // Lao MAI KAN
182    (codePoint >= 0x0eb4 && codePoint <= 0x0ebc) || // Lao vowel signs (skip U+0EB2, U+0EB3)
183    (codePoint >= 0x0ec8 && codePoint <= 0x0ecd) // Lao tone marks
184  ) {
185    return true
186  }
187
188  // Arabic formatting
189  if (
190    (codePoint >= 0x0600 && codePoint <= 0x0605) ||
191    codePoint === 0x06dd ||
192    codePoint === 0x070f ||
193    codePoint === 0x08e2
194  ) {
195    return true
196  }
197
198  // Surrogates, tag characters
199  if (codePoint >= 0xd800 && codePoint <= 0xdfff) return true
200  if (codePoint >= 0xe0000 && codePoint <= 0xe007f) return true
201
202  return false
203}
204
205// Note: complex-script graphemes like Devanagari क्ष (ka+virama+ZWJ+ssa) render
206// as a single ligature glyph but occupy 2 terminal cells (wcwidth sums the base
207// consonants). Bun.stringWidth=2 matches terminal cell allocation, which is what
208// we need for cursor positioning — the JS fallback's grapheme-cluster width of 1
209// would desync Ink's layout from the terminal.
210//
211// Bun.stringWidth is resolved once at module scope rather than checked on every
212// call — typeof guards deopt property access and this is a hot path (~100k calls/frame).
213const bunStringWidth =
214  typeof Bun !== 'undefined' && typeof Bun.stringWidth === 'function'
215    ? Bun.stringWidth
216    : null
217
218const BUN_STRING_WIDTH_OPTS = { ambiguousIsNarrow: true } as const
219
220export const stringWidth: (str: string) => number = bunStringWidth
221  ? str => bunStringWidth(str, BUN_STRING_WIDTH_OPTS)
222  : stringWidthJavaScript