source dump of claude code
at main 394 lines 12 kB view raw
1/** 2 * ANSI Parser - Semantic Action Generator 3 * 4 * A streaming parser for ANSI escape sequences that produces semantic actions. 5 * Uses the tokenizer for escape sequence boundary detection, then interprets 6 * each sequence to produce structured actions. 7 * 8 * Key design decisions: 9 * - Streaming: can process input incrementally 10 * - Semantic output: produces structured actions, not string tokens 11 * - Style tracking: maintains current text style state 12 */ 13 14import { getGraphemeSegmenter } from '../../utils/intl.js' 15import { C0 } from './ansi.js' 16import { CSI, CURSOR_STYLES, ERASE_DISPLAY, ERASE_LINE_REGION } from './csi.js' 17import { DEC } from './dec.js' 18import { parseEsc } from './esc.js' 19import { parseOSC } from './osc.js' 20import { applySGR } from './sgr.js' 21import { createTokenizer, type Token, type Tokenizer } from './tokenize.js' 22import type { Action, Grapheme, TextStyle } from './types.js' 23import { defaultStyle } from './types.js' 24 25// ============================================================================= 26// Grapheme Utilities 27// ============================================================================= 28 29function isEmoji(codePoint: number): boolean { 30 return ( 31 (codePoint >= 0x2600 && codePoint <= 0x26ff) || 32 (codePoint >= 0x2700 && codePoint <= 0x27bf) || 33 (codePoint >= 0x1f300 && codePoint <= 0x1f9ff) || 34 (codePoint >= 0x1fa00 && codePoint <= 0x1faff) || 35 (codePoint >= 0x1f1e0 && codePoint <= 0x1f1ff) 36 ) 37} 38 39function isEastAsianWide(codePoint: number): boolean { 40 return ( 41 (codePoint >= 0x1100 && codePoint <= 0x115f) || 42 (codePoint >= 0x2e80 && codePoint <= 0x9fff) || 43 (codePoint >= 0xac00 && codePoint <= 0xd7a3) || 44 (codePoint >= 0xf900 && codePoint <= 0xfaff) || 45 (codePoint >= 0xfe10 && codePoint <= 0xfe1f) || 46 (codePoint >= 0xfe30 && codePoint <= 0xfe6f) || 47 (codePoint >= 0xff00 && codePoint <= 0xff60) || 48 (codePoint >= 0xffe0 && codePoint <= 0xffe6) || 49 (codePoint >= 0x20000 && codePoint <= 0x2fffd) || 50 (codePoint >= 0x30000 && codePoint <= 0x3fffd) 51 ) 52} 53 54function hasMultipleCodepoints(str: string): boolean { 55 let count = 0 56 for (const _ of str) { 57 count++ 58 if (count > 1) return true 59 } 60 return false 61} 62 63function graphemeWidth(grapheme: string): 1 | 2 { 64 if (hasMultipleCodepoints(grapheme)) return 2 65 const codePoint = grapheme.codePointAt(0) 66 if (codePoint === undefined) return 1 67 if (isEmoji(codePoint) || isEastAsianWide(codePoint)) return 2 68 return 1 69} 70 71function* segmentGraphemes(str: string): Generator<Grapheme> { 72 for (const { segment } of getGraphemeSegmenter().segment(str)) { 73 yield { value: segment, width: graphemeWidth(segment) } 74 } 75} 76 77// ============================================================================= 78// Sequence Parsing 79// ============================================================================= 80 81function parseCSIParams(paramStr: string): number[] { 82 if (paramStr === '') return [] 83 return paramStr.split(/[;:]/).map(s => (s === '' ? 0 : parseInt(s, 10))) 84} 85 86/** Parse a raw CSI sequence (e.g., "\x1b[31m") into an action */ 87function parseCSI(rawSequence: string): Action | null { 88 const inner = rawSequence.slice(2) 89 if (inner.length === 0) return null 90 91 const finalByte = inner.charCodeAt(inner.length - 1) 92 const beforeFinal = inner.slice(0, -1) 93 94 let privateMode = '' 95 let paramStr = beforeFinal 96 let intermediate = '' 97 98 if (beforeFinal.length > 0 && '?>='.includes(beforeFinal[0]!)) { 99 privateMode = beforeFinal[0]! 100 paramStr = beforeFinal.slice(1) 101 } 102 103 const intermediateMatch = paramStr.match(/([^0-9;:]+)$/) 104 if (intermediateMatch) { 105 intermediate = intermediateMatch[1]! 106 paramStr = paramStr.slice(0, -intermediate.length) 107 } 108 109 const params = parseCSIParams(paramStr) 110 const p0 = params[0] ?? 1 111 const p1 = params[1] ?? 1 112 113 // SGR (Select Graphic Rendition) 114 if (finalByte === CSI.SGR && privateMode === '') { 115 return { type: 'sgr', params: paramStr } 116 } 117 118 // Cursor movement 119 if (finalByte === CSI.CUU) { 120 return { 121 type: 'cursor', 122 action: { type: 'move', direction: 'up', count: p0 }, 123 } 124 } 125 if (finalByte === CSI.CUD) { 126 return { 127 type: 'cursor', 128 action: { type: 'move', direction: 'down', count: p0 }, 129 } 130 } 131 if (finalByte === CSI.CUF) { 132 return { 133 type: 'cursor', 134 action: { type: 'move', direction: 'forward', count: p0 }, 135 } 136 } 137 if (finalByte === CSI.CUB) { 138 return { 139 type: 'cursor', 140 action: { type: 'move', direction: 'back', count: p0 }, 141 } 142 } 143 if (finalByte === CSI.CNL) { 144 return { type: 'cursor', action: { type: 'nextLine', count: p0 } } 145 } 146 if (finalByte === CSI.CPL) { 147 return { type: 'cursor', action: { type: 'prevLine', count: p0 } } 148 } 149 if (finalByte === CSI.CHA) { 150 return { type: 'cursor', action: { type: 'column', col: p0 } } 151 } 152 if (finalByte === CSI.CUP || finalByte === CSI.HVP) { 153 return { type: 'cursor', action: { type: 'position', row: p0, col: p1 } } 154 } 155 if (finalByte === CSI.VPA) { 156 return { type: 'cursor', action: { type: 'row', row: p0 } } 157 } 158 159 // Erase 160 if (finalByte === CSI.ED) { 161 const region = ERASE_DISPLAY[params[0] ?? 0] ?? 'toEnd' 162 return { type: 'erase', action: { type: 'display', region } } 163 } 164 if (finalByte === CSI.EL) { 165 const region = ERASE_LINE_REGION[params[0] ?? 0] ?? 'toEnd' 166 return { type: 'erase', action: { type: 'line', region } } 167 } 168 if (finalByte === CSI.ECH) { 169 return { type: 'erase', action: { type: 'chars', count: p0 } } 170 } 171 172 // Scroll 173 if (finalByte === CSI.SU) { 174 return { type: 'scroll', action: { type: 'up', count: p0 } } 175 } 176 if (finalByte === CSI.SD) { 177 return { type: 'scroll', action: { type: 'down', count: p0 } } 178 } 179 if (finalByte === CSI.DECSTBM) { 180 return { 181 type: 'scroll', 182 action: { type: 'setRegion', top: p0, bottom: p1 }, 183 } 184 } 185 186 // Cursor save/restore 187 if (finalByte === CSI.SCOSC) { 188 return { type: 'cursor', action: { type: 'save' } } 189 } 190 if (finalByte === CSI.SCORC) { 191 return { type: 'cursor', action: { type: 'restore' } } 192 } 193 194 // Cursor style 195 if (finalByte === CSI.DECSCUSR && intermediate === ' ') { 196 const styleInfo = CURSOR_STYLES[p0] ?? CURSOR_STYLES[0]! 197 return { type: 'cursor', action: { type: 'style', ...styleInfo } } 198 } 199 200 // Private modes 201 if (privateMode === '?' && (finalByte === CSI.SM || finalByte === CSI.RM)) { 202 const enabled = finalByte === CSI.SM 203 204 if (p0 === DEC.CURSOR_VISIBLE) { 205 return { 206 type: 'cursor', 207 action: enabled ? { type: 'show' } : { type: 'hide' }, 208 } 209 } 210 if (p0 === DEC.ALT_SCREEN_CLEAR || p0 === DEC.ALT_SCREEN) { 211 return { type: 'mode', action: { type: 'alternateScreen', enabled } } 212 } 213 if (p0 === DEC.BRACKETED_PASTE) { 214 return { type: 'mode', action: { type: 'bracketedPaste', enabled } } 215 } 216 if (p0 === DEC.MOUSE_NORMAL) { 217 return { 218 type: 'mode', 219 action: { type: 'mouseTracking', mode: enabled ? 'normal' : 'off' }, 220 } 221 } 222 if (p0 === DEC.MOUSE_BUTTON) { 223 return { 224 type: 'mode', 225 action: { type: 'mouseTracking', mode: enabled ? 'button' : 'off' }, 226 } 227 } 228 if (p0 === DEC.MOUSE_ANY) { 229 return { 230 type: 'mode', 231 action: { type: 'mouseTracking', mode: enabled ? 'any' : 'off' }, 232 } 233 } 234 if (p0 === DEC.FOCUS_EVENTS) { 235 return { type: 'mode', action: { type: 'focusEvents', enabled } } 236 } 237 } 238 239 return { type: 'unknown', sequence: rawSequence } 240} 241 242/** 243 * Identify the type of escape sequence from its raw form. 244 */ 245function identifySequence( 246 seq: string, 247): 'csi' | 'osc' | 'esc' | 'ss3' | 'unknown' { 248 if (seq.length < 2) return 'unknown' 249 if (seq.charCodeAt(0) !== C0.ESC) return 'unknown' 250 251 const second = seq.charCodeAt(1) 252 if (second === 0x5b) return 'csi' // [ 253 if (second === 0x5d) return 'osc' // ] 254 if (second === 0x4f) return 'ss3' // O 255 return 'esc' 256} 257 258// ============================================================================= 259// Main Parser 260// ============================================================================= 261 262/** 263 * Parser class - maintains state for streaming/incremental parsing 264 * 265 * Usage: 266 * ```typescript 267 * const parser = new Parser() 268 * const actions1 = parser.feed('partial\x1b[') 269 * const actions2 = parser.feed('31mred') // state maintained internally 270 * ``` 271 */ 272export class Parser { 273 private tokenizer: Tokenizer = createTokenizer() 274 275 style: TextStyle = defaultStyle() 276 inLink = false 277 linkUrl: string | undefined 278 279 reset(): void { 280 this.tokenizer.reset() 281 this.style = defaultStyle() 282 this.inLink = false 283 this.linkUrl = undefined 284 } 285 286 /** Feed input and get resulting actions */ 287 feed(input: string): Action[] { 288 const tokens = this.tokenizer.feed(input) 289 const actions: Action[] = [] 290 291 for (const token of tokens) { 292 const tokenActions = this.processToken(token) 293 actions.push(...tokenActions) 294 } 295 296 return actions 297 } 298 299 private processToken(token: Token): Action[] { 300 switch (token.type) { 301 case 'text': 302 return this.processText(token.value) 303 304 case 'sequence': 305 return this.processSequence(token.value) 306 } 307 } 308 309 private processText(text: string): Action[] { 310 // Handle BEL characters embedded in text 311 const actions: Action[] = [] 312 let current = '' 313 314 for (const char of text) { 315 if (char.charCodeAt(0) === C0.BEL) { 316 if (current) { 317 const graphemes = [...segmentGraphemes(current)] 318 if (graphemes.length > 0) { 319 actions.push({ type: 'text', graphemes, style: { ...this.style } }) 320 } 321 current = '' 322 } 323 actions.push({ type: 'bell' }) 324 } else { 325 current += char 326 } 327 } 328 329 if (current) { 330 const graphemes = [...segmentGraphemes(current)] 331 if (graphemes.length > 0) { 332 actions.push({ type: 'text', graphemes, style: { ...this.style } }) 333 } 334 } 335 336 return actions 337 } 338 339 private processSequence(seq: string): Action[] { 340 const seqType = identifySequence(seq) 341 342 switch (seqType) { 343 case 'csi': { 344 const action = parseCSI(seq) 345 if (!action) return [] 346 if (action.type === 'sgr') { 347 this.style = applySGR(action.params, this.style) 348 return [] 349 } 350 return [action] 351 } 352 353 case 'osc': { 354 // Extract OSC content (between ESC ] and terminator) 355 let content = seq.slice(2) 356 // Remove terminator (BEL or ESC \) 357 if (content.endsWith('\x07')) { 358 content = content.slice(0, -1) 359 } else if (content.endsWith('\x1b\\')) { 360 content = content.slice(0, -2) 361 } 362 363 const action = parseOSC(content) 364 if (action) { 365 if (action.type === 'link') { 366 if (action.action.type === 'start') { 367 this.inLink = true 368 this.linkUrl = action.action.url 369 } else { 370 this.inLink = false 371 this.linkUrl = undefined 372 } 373 } 374 return [action] 375 } 376 return [] 377 } 378 379 case 'esc': { 380 const escContent = seq.slice(1) 381 const action = parseEsc(escContent) 382 return action ? [action] : [] 383 } 384 385 case 'ss3': 386 // SS3 sequences are typically cursor keys in application mode 387 // For output parsing, treat as unknown 388 return [{ type: 'unknown', sequence: seq }] 389 390 default: 391 return [{ type: 'unknown', sequence: seq }] 392 } 393 } 394}