utils/queryProfiler.ts at main · oppi.li/claude-code

oppi.li / claude-code
fork atom
source dump of claude code
fork atom
claude-code / utils / queryProfiler.ts
at main 301 lines 8.9 kB view raw
wrap content
oppi.li dump from zip 2d ago
63aada3f
  1/**
  2 * Query profiling utility for measuring and reporting time spent in the query
  3 * pipeline from user input to first token arrival. Enable by setting CLAUDE_CODE_PROFILE_QUERY=1
  4 *
  5 * Uses Node.js built-in performance hooks API for standard timing measurement.
  6 * Tracks each query session with detailed checkpoints for identifying bottlenecks.
  7 *
  8 * Checkpoints tracked (in order):
  9 * - query_user_input_received: Start of profiling
 10 * - query_context_loading_start/end: Loading system prompts and contexts
 11 * - query_query_start: Entry to query call from REPL
 12 * - query_fn_entry: Entry to query() function
 13 * - query_microcompact_start/end: Microcompaction of messages
 14 * - query_autocompact_start/end: Autocompaction check
 15 * - query_setup_start/end: StreamingToolExecutor and model setup
 16 * - query_api_loop_start: Start of API retry loop
 17 * - query_api_streaming_start: Start of streaming API call
 18 * - query_tool_schema_build_start/end: Building tool schemas
 19 * - query_message_normalization_start/end: Normalizing messages
 20 * - query_client_creation_start/end: Creating Anthropic client
 21 * - query_api_request_sent: HTTP request dispatched (before await, inside retry body)
 22 * - query_response_headers_received: .withResponse() resolved (headers arrived)
 23 * - query_first_chunk_received: First streaming chunk received (TTFT)
 24 * - query_api_streaming_end: Streaming complete
 25 * - query_tool_execution_start/end: Tool execution
 26 * - query_recursive_call: Before recursive query call
 27 * - query_end: End of query
 28 */
 29
 30import { logForDebugging } from './debug.js'
 31import { isEnvTruthy } from './envUtils.js'
 32import { formatMs, formatTimelineLine, getPerformance } from './profilerBase.js'
 33
 34// Module-level state - initialized once when the module loads
 35// eslint-disable-next-line custom-rules/no-process-env-top-level
 36const ENABLED = isEnvTruthy(process.env.CLAUDE_CODE_PROFILE_QUERY)
 37
 38// Track memory snapshots separately (perf_hooks doesn't track memory)
 39const memorySnapshots = new Map<string, NodeJS.MemoryUsage>()
 40
 41// Track query count for reporting
 42let queryCount = 0
 43
 44// Track first token received time separately for summary
 45let firstTokenTime: number | null = null
 46
 47/**
 48 * Start profiling a new query session
 49 */
 50export function startQueryProfile(): void {
 51  if (!ENABLED) return
 52
 53  const perf = getPerformance()
 54
 55  // Clear previous marks and memory snapshots
 56  perf.clearMarks()
 57  memorySnapshots.clear()
 58  firstTokenTime = null
 59
 60  queryCount++
 61
 62  // Record the start checkpoint
 63  queryCheckpoint('query_user_input_received')
 64}
 65
 66/**
 67 * Record a checkpoint with the given name
 68 */
 69export function queryCheckpoint(name: string): void {
 70  if (!ENABLED) return
 71
 72  const perf = getPerformance()
 73  perf.mark(name)
 74  memorySnapshots.set(name, process.memoryUsage())
 75
 76  // Track first token specially
 77  if (name === 'query_first_chunk_received' && firstTokenTime === null) {
 78    const marks = perf.getEntriesByType('mark')
 79    if (marks.length > 0) {
 80      const lastMark = marks[marks.length - 1]
 81      firstTokenTime = lastMark?.startTime ?? 0
 82    }
 83  }
 84}
 85
 86/**
 87 * End the current query profiling session
 88 */
 89export function endQueryProfile(): void {
 90  if (!ENABLED) return
 91
 92  queryCheckpoint('query_profile_end')
 93}
 94
 95/**
 96 * Identify slow operations (> 100ms delta)
 97 */
 98function getSlowWarning(deltaMs: number, name: string): string {
 99  // Don't flag the first checkpoint as slow - it measures time from process start,
100  // not actual processing overhead
101  if (name === 'query_user_input_received') {
102    return ''
103  }
104
105  if (deltaMs > 1000) {
106    return ` ⚠️  VERY SLOW`
107  }
108  if (deltaMs > 100) {
109    return ` ⚠️  SLOW`
110  }
111
112  // Specific warnings for known bottlenecks
113  if (name.includes('git_status') && deltaMs > 50) {
114    return ' ⚠️  git status'
115  }
116  if (name.includes('tool_schema') && deltaMs > 50) {
117    return ' ⚠️  tool schemas'
118  }
119  if (name.includes('client_creation') && deltaMs > 50) {
120    return ' ⚠️  client creation'
121  }
122
123  return ''
124}
125
126/**
127 * Get a formatted report of all checkpoints for the current/last query
128 */
129function getQueryProfileReport(): string {
130  if (!ENABLED) {
131    return 'Query profiling not enabled (set CLAUDE_CODE_PROFILE_QUERY=1)'
132  }
133
134  const perf = getPerformance()
135  const marks = perf.getEntriesByType('mark')
136  if (marks.length === 0) {
137    return 'No query profiling checkpoints recorded'
138  }
139
140  const lines: string[] = []
141  lines.push('='.repeat(80))
142  lines.push(`QUERY PROFILING REPORT - Query #${queryCount}`)
143  lines.push('='.repeat(80))
144  lines.push('')
145
146  // Use first mark as baseline (query start time) to show relative times
147  const baselineTime = marks[0]?.startTime ?? 0
148  let prevTime = baselineTime
149  let apiRequestSentTime = 0
150  let firstChunkTime = 0
151
152  for (const mark of marks) {
153    const relativeTime = mark.startTime - baselineTime
154    const deltaMs = mark.startTime - prevTime
155    lines.push(
156      formatTimelineLine(
157        relativeTime,
158        deltaMs,
159        mark.name,
160        memorySnapshots.get(mark.name),
161        10,
162        9,
163        getSlowWarning(deltaMs, mark.name),
164      ),
165    )
166
167    // Track key milestones for summary (use relative times)
168    if (mark.name === 'query_api_request_sent') {
169      apiRequestSentTime = relativeTime
170    }
171    if (mark.name === 'query_first_chunk_received') {
172      firstChunkTime = relativeTime
173    }
174
175    prevTime = mark.startTime
176  }
177
178  // Calculate summary statistics (relative to baseline)
179  const lastMark = marks[marks.length - 1]
180  const totalTime = lastMark ? lastMark.startTime - baselineTime : 0
181
182  lines.push('')
183  lines.push('-'.repeat(80))
184
185  if (firstChunkTime > 0) {
186    const preRequestOverhead = apiRequestSentTime
187    const networkLatency = firstChunkTime - apiRequestSentTime
188    const preRequestPercent = (
189      (preRequestOverhead / firstChunkTime) *
190      100
191    ).toFixed(1)
192    const networkPercent = ((networkLatency / firstChunkTime) * 100).toFixed(1)
193
194    lines.push(`Total TTFT: ${formatMs(firstChunkTime)}ms`)
195    lines.push(
196      `  - Pre-request overhead: ${formatMs(preRequestOverhead)}ms (${preRequestPercent}%)`,
197    )
198    lines.push(
199      `  - Network latency: ${formatMs(networkLatency)}ms (${networkPercent}%)`,
200    )
201  } else {
202    lines.push(`Total time: ${formatMs(totalTime)}ms`)
203  }
204
205  // Add phase summary
206  lines.push(getPhaseSummary(marks, baselineTime))
207
208  lines.push('='.repeat(80))
209
210  return lines.join('\n')
211}
212
213/**
214 * Get phase-based summary showing time spent in each major phase
215 */
216function getPhaseSummary(
217  marks: Array<{ name: string; startTime: number }>,
218  baselineTime: number,
219): string {
220  const phases: Array<{ name: string; start: string; end: string }> = [
221    {
222      name: 'Context loading',
223      start: 'query_context_loading_start',
224      end: 'query_context_loading_end',
225    },
226    {
227      name: 'Microcompact',
228      start: 'query_microcompact_start',
229      end: 'query_microcompact_end',
230    },
231    {
232      name: 'Autocompact',
233      start: 'query_autocompact_start',
234      end: 'query_autocompact_end',
235    },
236    { name: 'Query setup', start: 'query_setup_start', end: 'query_setup_end' },
237    {
238      name: 'Tool schemas',
239      start: 'query_tool_schema_build_start',
240      end: 'query_tool_schema_build_end',
241    },
242    {
243      name: 'Message normalization',
244      start: 'query_message_normalization_start',
245      end: 'query_message_normalization_end',
246    },
247    {
248      name: 'Client creation',
249      start: 'query_client_creation_start',
250      end: 'query_client_creation_end',
251    },
252    {
253      name: 'Network TTFB',
254      start: 'query_api_request_sent',
255      end: 'query_first_chunk_received',
256    },
257    {
258      name: 'Tool execution',
259      start: 'query_tool_execution_start',
260      end: 'query_tool_execution_end',
261    },
262  ]
263
264  const markMap = new Map(marks.map(m => [m.name, m.startTime - baselineTime]))
265
266  const lines: string[] = []
267  lines.push('')
268  lines.push('PHASE BREAKDOWN:')
269
270  for (const phase of phases) {
271    const startTime = markMap.get(phase.start)
272    const endTime = markMap.get(phase.end)
273
274    if (startTime !== undefined && endTime !== undefined) {
275      const duration = endTime - startTime
276      const bar = '█'.repeat(Math.min(Math.ceil(duration / 10), 50)) // 1 block per 10ms, max 50
277      lines.push(
278        `  ${phase.name.padEnd(22)} ${formatMs(duration).padStart(10)}ms ${bar}`,
279      )
280    }
281  }
282
283  // Calculate pre-API overhead (everything before api_request_sent)
284  const apiRequestSent = markMap.get('query_api_request_sent')
285  if (apiRequestSent !== undefined) {
286    lines.push('')
287    lines.push(
288      `  ${'Total pre-API overhead'.padEnd(22)} ${formatMs(apiRequestSent).padStart(10)}ms`,
289    )
290  }
291
292  return lines.join('\n')
293}
294
295/**
296 * Log the query profile report to debug output
297 */
298export function logQueryProfileReport(): void {
299  if (!ENABLED) return
300  logForDebugging(getQueryProfileReport())
301}