source dump of claude code
at main 301 lines 8.9 kB view raw
1/** 2 * Query profiling utility for measuring and reporting time spent in the query 3 * pipeline from user input to first token arrival. Enable by setting CLAUDE_CODE_PROFILE_QUERY=1 4 * 5 * Uses Node.js built-in performance hooks API for standard timing measurement. 6 * Tracks each query session with detailed checkpoints for identifying bottlenecks. 7 * 8 * Checkpoints tracked (in order): 9 * - query_user_input_received: Start of profiling 10 * - query_context_loading_start/end: Loading system prompts and contexts 11 * - query_query_start: Entry to query call from REPL 12 * - query_fn_entry: Entry to query() function 13 * - query_microcompact_start/end: Microcompaction of messages 14 * - query_autocompact_start/end: Autocompaction check 15 * - query_setup_start/end: StreamingToolExecutor and model setup 16 * - query_api_loop_start: Start of API retry loop 17 * - query_api_streaming_start: Start of streaming API call 18 * - query_tool_schema_build_start/end: Building tool schemas 19 * - query_message_normalization_start/end: Normalizing messages 20 * - query_client_creation_start/end: Creating Anthropic client 21 * - query_api_request_sent: HTTP request dispatched (before await, inside retry body) 22 * - query_response_headers_received: .withResponse() resolved (headers arrived) 23 * - query_first_chunk_received: First streaming chunk received (TTFT) 24 * - query_api_streaming_end: Streaming complete 25 * - query_tool_execution_start/end: Tool execution 26 * - query_recursive_call: Before recursive query call 27 * - query_end: End of query 28 */ 29 30import { logForDebugging } from './debug.js' 31import { isEnvTruthy } from './envUtils.js' 32import { formatMs, formatTimelineLine, getPerformance } from './profilerBase.js' 33 34// Module-level state - initialized once when the module loads 35// eslint-disable-next-line custom-rules/no-process-env-top-level 36const ENABLED = isEnvTruthy(process.env.CLAUDE_CODE_PROFILE_QUERY) 37 38// Track memory snapshots separately (perf_hooks doesn't track memory) 39const memorySnapshots = new Map<string, NodeJS.MemoryUsage>() 40 41// Track query count for reporting 42let queryCount = 0 43 44// Track first token received time separately for summary 45let firstTokenTime: number | null = null 46 47/** 48 * Start profiling a new query session 49 */ 50export function startQueryProfile(): void { 51 if (!ENABLED) return 52 53 const perf = getPerformance() 54 55 // Clear previous marks and memory snapshots 56 perf.clearMarks() 57 memorySnapshots.clear() 58 firstTokenTime = null 59 60 queryCount++ 61 62 // Record the start checkpoint 63 queryCheckpoint('query_user_input_received') 64} 65 66/** 67 * Record a checkpoint with the given name 68 */ 69export function queryCheckpoint(name: string): void { 70 if (!ENABLED) return 71 72 const perf = getPerformance() 73 perf.mark(name) 74 memorySnapshots.set(name, process.memoryUsage()) 75 76 // Track first token specially 77 if (name === 'query_first_chunk_received' && firstTokenTime === null) { 78 const marks = perf.getEntriesByType('mark') 79 if (marks.length > 0) { 80 const lastMark = marks[marks.length - 1] 81 firstTokenTime = lastMark?.startTime ?? 0 82 } 83 } 84} 85 86/** 87 * End the current query profiling session 88 */ 89export function endQueryProfile(): void { 90 if (!ENABLED) return 91 92 queryCheckpoint('query_profile_end') 93} 94 95/** 96 * Identify slow operations (> 100ms delta) 97 */ 98function getSlowWarning(deltaMs: number, name: string): string { 99 // Don't flag the first checkpoint as slow - it measures time from process start, 100 // not actual processing overhead 101 if (name === 'query_user_input_received') { 102 return '' 103 } 104 105 if (deltaMs > 1000) { 106 return ` ⚠️ VERY SLOW` 107 } 108 if (deltaMs > 100) { 109 return ` ⚠️ SLOW` 110 } 111 112 // Specific warnings for known bottlenecks 113 if (name.includes('git_status') && deltaMs > 50) { 114 return ' ⚠️ git status' 115 } 116 if (name.includes('tool_schema') && deltaMs > 50) { 117 return ' ⚠️ tool schemas' 118 } 119 if (name.includes('client_creation') && deltaMs > 50) { 120 return ' ⚠️ client creation' 121 } 122 123 return '' 124} 125 126/** 127 * Get a formatted report of all checkpoints for the current/last query 128 */ 129function getQueryProfileReport(): string { 130 if (!ENABLED) { 131 return 'Query profiling not enabled (set CLAUDE_CODE_PROFILE_QUERY=1)' 132 } 133 134 const perf = getPerformance() 135 const marks = perf.getEntriesByType('mark') 136 if (marks.length === 0) { 137 return 'No query profiling checkpoints recorded' 138 } 139 140 const lines: string[] = [] 141 lines.push('='.repeat(80)) 142 lines.push(`QUERY PROFILING REPORT - Query #${queryCount}`) 143 lines.push('='.repeat(80)) 144 lines.push('') 145 146 // Use first mark as baseline (query start time) to show relative times 147 const baselineTime = marks[0]?.startTime ?? 0 148 let prevTime = baselineTime 149 let apiRequestSentTime = 0 150 let firstChunkTime = 0 151 152 for (const mark of marks) { 153 const relativeTime = mark.startTime - baselineTime 154 const deltaMs = mark.startTime - prevTime 155 lines.push( 156 formatTimelineLine( 157 relativeTime, 158 deltaMs, 159 mark.name, 160 memorySnapshots.get(mark.name), 161 10, 162 9, 163 getSlowWarning(deltaMs, mark.name), 164 ), 165 ) 166 167 // Track key milestones for summary (use relative times) 168 if (mark.name === 'query_api_request_sent') { 169 apiRequestSentTime = relativeTime 170 } 171 if (mark.name === 'query_first_chunk_received') { 172 firstChunkTime = relativeTime 173 } 174 175 prevTime = mark.startTime 176 } 177 178 // Calculate summary statistics (relative to baseline) 179 const lastMark = marks[marks.length - 1] 180 const totalTime = lastMark ? lastMark.startTime - baselineTime : 0 181 182 lines.push('') 183 lines.push('-'.repeat(80)) 184 185 if (firstChunkTime > 0) { 186 const preRequestOverhead = apiRequestSentTime 187 const networkLatency = firstChunkTime - apiRequestSentTime 188 const preRequestPercent = ( 189 (preRequestOverhead / firstChunkTime) * 190 100 191 ).toFixed(1) 192 const networkPercent = ((networkLatency / firstChunkTime) * 100).toFixed(1) 193 194 lines.push(`Total TTFT: ${formatMs(firstChunkTime)}ms`) 195 lines.push( 196 ` - Pre-request overhead: ${formatMs(preRequestOverhead)}ms (${preRequestPercent}%)`, 197 ) 198 lines.push( 199 ` - Network latency: ${formatMs(networkLatency)}ms (${networkPercent}%)`, 200 ) 201 } else { 202 lines.push(`Total time: ${formatMs(totalTime)}ms`) 203 } 204 205 // Add phase summary 206 lines.push(getPhaseSummary(marks, baselineTime)) 207 208 lines.push('='.repeat(80)) 209 210 return lines.join('\n') 211} 212 213/** 214 * Get phase-based summary showing time spent in each major phase 215 */ 216function getPhaseSummary( 217 marks: Array<{ name: string; startTime: number }>, 218 baselineTime: number, 219): string { 220 const phases: Array<{ name: string; start: string; end: string }> = [ 221 { 222 name: 'Context loading', 223 start: 'query_context_loading_start', 224 end: 'query_context_loading_end', 225 }, 226 { 227 name: 'Microcompact', 228 start: 'query_microcompact_start', 229 end: 'query_microcompact_end', 230 }, 231 { 232 name: 'Autocompact', 233 start: 'query_autocompact_start', 234 end: 'query_autocompact_end', 235 }, 236 { name: 'Query setup', start: 'query_setup_start', end: 'query_setup_end' }, 237 { 238 name: 'Tool schemas', 239 start: 'query_tool_schema_build_start', 240 end: 'query_tool_schema_build_end', 241 }, 242 { 243 name: 'Message normalization', 244 start: 'query_message_normalization_start', 245 end: 'query_message_normalization_end', 246 }, 247 { 248 name: 'Client creation', 249 start: 'query_client_creation_start', 250 end: 'query_client_creation_end', 251 }, 252 { 253 name: 'Network TTFB', 254 start: 'query_api_request_sent', 255 end: 'query_first_chunk_received', 256 }, 257 { 258 name: 'Tool execution', 259 start: 'query_tool_execution_start', 260 end: 'query_tool_execution_end', 261 }, 262 ] 263 264 const markMap = new Map(marks.map(m => [m.name, m.startTime - baselineTime])) 265 266 const lines: string[] = [] 267 lines.push('') 268 lines.push('PHASE BREAKDOWN:') 269 270 for (const phase of phases) { 271 const startTime = markMap.get(phase.start) 272 const endTime = markMap.get(phase.end) 273 274 if (startTime !== undefined && endTime !== undefined) { 275 const duration = endTime - startTime 276 const bar = '█'.repeat(Math.min(Math.ceil(duration / 10), 50)) // 1 block per 10ms, max 50 277 lines.push( 278 ` ${phase.name.padEnd(22)} ${formatMs(duration).padStart(10)}ms ${bar}`, 279 ) 280 } 281 } 282 283 // Calculate pre-API overhead (everything before api_request_sent) 284 const apiRequestSent = markMap.get('query_api_request_sent') 285 if (apiRequestSent !== undefined) { 286 lines.push('') 287 lines.push( 288 ` ${'Total pre-API overhead'.padEnd(22)} ${formatMs(apiRequestSent).padStart(10)}ms`, 289 ) 290 } 291 292 return lines.join('\n') 293} 294 295/** 296 * Log the query profile report to debug output 297 */ 298export function logQueryProfileReport(): void { 299 if (!ENABLED) return 300 logForDebugging(getQueryProfileReport()) 301}