source dump of claude code
at main 1120 lines 30 kB view raw
1/** 2 * Perfetto Tracing for Claude Code (Ant-only) 3 * 4 * This module generates traces in the Chrome Trace Event format that can be 5 * viewed in ui.perfetto.dev or Chrome's chrome://tracing. 6 * 7 * NOTE: This feature is ant-only and eliminated from external builds. 8 * 9 * The trace file includes: 10 * - Agent hierarchy (parent-child relationships in a swarm) 11 * - API requests with TTFT, TTLT, prompt length, cache stats, msg ID, speculative flag 12 * - Tool executions with name, duration, and token usage 13 * - User input waiting time 14 * 15 * Usage: 16 * 1. Enable via CLAUDE_CODE_PERFETTO_TRACE=1 or CLAUDE_CODE_PERFETTO_TRACE=<path> 17 * 2. Optionally set CLAUDE_CODE_PERFETTO_WRITE_INTERVAL_S=<positive integer> to write the 18 * trace file periodically (default: write only on exit). 19 * 3. Run Claude Code normally 20 * 4. Trace file is written to ~/.claude/traces/trace-<session-id>.json 21 * or to the specified path 22 * 5. Open in ui.perfetto.dev to visualize 23 */ 24 25import { feature } from 'bun:bundle' 26import { mkdirSync, writeFileSync } from 'fs' 27import { mkdir, writeFile } from 'fs/promises' 28import { dirname, join } from 'path' 29import { getSessionId } from '../../bootstrap/state.js' 30import { registerCleanup } from '../cleanupRegistry.js' 31import { logForDebugging } from '../debug.js' 32import { 33 getClaudeConfigHomeDir, 34 isEnvDefinedFalsy, 35 isEnvTruthy, 36} from '../envUtils.js' 37import { errorMessage } from '../errors.js' 38import { djb2Hash } from '../hash.js' 39import { jsonStringify } from '../slowOperations.js' 40import { getAgentId, getAgentName, getParentSessionId } from '../teammate.js' 41 42/** 43 * Chrome Trace Event format types 44 * See: https://docs.google.com/document/d/1CvAClvFfyA5R-PhYUmn5OOQtYMH4h6I0nSsKchNAySU 45 */ 46 47export type TraceEventPhase = 48 | 'B' // Begin duration event 49 | 'E' // End duration event 50 | 'X' // Complete event (with duration) 51 | 'i' // Instant event 52 | 'C' // Counter event 53 | 'b' // Async begin 54 | 'n' // Async instant 55 | 'e' // Async end 56 | 'M' // Metadata event 57 58export type TraceEvent = { 59 name: string 60 cat: string 61 ph: TraceEventPhase 62 ts: number // Timestamp in microseconds 63 pid: number // Process ID (we use 1 for main, agent IDs for subagents) 64 tid: number // Thread ID (we use numeric hash of agent name or 1 for main) 65 dur?: number // Duration in microseconds (for 'X' events) 66 args?: Record<string, unknown> 67 id?: string // For async events 68 scope?: string 69} 70 71/** 72 * Agent info for tracking hierarchy 73 */ 74type AgentInfo = { 75 agentId: string 76 agentName: string 77 parentAgentId?: string 78 processId: number 79 threadId: number 80} 81 82/** 83 * Pending span for tracking begin/end pairs 84 */ 85type PendingSpan = { 86 name: string 87 category: string 88 startTime: number 89 agentInfo: AgentInfo 90 args: Record<string, unknown> 91} 92 93// Global state for the Perfetto tracer 94let isEnabled = false 95let tracePath: string | null = null 96// Metadata events (ph: 'M' — process/thread names, parent links) are kept 97// separate so they survive eviction — Perfetto UI needs them to label 98// tracks. Bounded by agent count (~3 events per agent). 99const metadataEvents: TraceEvent[] = [] 100const events: TraceEvent[] = [] 101// events[] cap. Cron-driven sessions run for days; 22 push sites × many 102// turns would otherwise grow unboundedly (periodicWrite flushes to disk but 103// does not truncate — it writes the full snapshot). At ~300B/event this is 104// ~30MB, enough trace history for any debugging session. Eviction drops the 105// oldest half when hit, amortized O(1). 106const MAX_EVENTS = 100_000 107const pendingSpans = new Map<string, PendingSpan>() 108const agentRegistry = new Map<string, AgentInfo>() 109let totalAgentCount = 0 110let startTimeMs = 0 111let spanIdCounter = 0 112let traceWritten = false // Flag to avoid double writes 113 114// Map agent IDs to numeric process IDs (Perfetto requires numeric IDs) 115let processIdCounter = 1 116const agentIdToProcessId = new Map<string, number>() 117 118// Periodic write interval handle 119let writeIntervalId: ReturnType<typeof setInterval> | null = null 120 121const STALE_SPAN_TTL_MS = 30 * 60 * 1000 // 30 minutes 122const STALE_SPAN_CLEANUP_INTERVAL_MS = 60 * 1000 // 1 minute 123let staleSpanCleanupId: ReturnType<typeof setInterval> | null = null 124 125/** 126 * Convert a string to a numeric hash for use as thread ID 127 */ 128function stringToNumericHash(str: string): number { 129 return Math.abs(djb2Hash(str)) || 1 // Ensure non-zero 130} 131 132/** 133 * Get or create a numeric process ID for an agent 134 */ 135function getProcessIdForAgent(agentId: string): number { 136 const existing = agentIdToProcessId.get(agentId) 137 if (existing !== undefined) return existing 138 139 processIdCounter++ 140 agentIdToProcessId.set(agentId, processIdCounter) 141 return processIdCounter 142} 143 144/** 145 * Get current agent info 146 */ 147function getCurrentAgentInfo(): AgentInfo { 148 const agentId = getAgentId() ?? getSessionId() 149 const agentName = getAgentName() ?? 'main' 150 const parentSessionId = getParentSessionId() 151 152 // Check if we've already registered this agent 153 const existing = agentRegistry.get(agentId) 154 if (existing) return existing 155 156 const info: AgentInfo = { 157 agentId, 158 agentName, 159 parentAgentId: parentSessionId, 160 processId: agentId === getSessionId() ? 1 : getProcessIdForAgent(agentId), 161 threadId: stringToNumericHash(agentName), 162 } 163 164 agentRegistry.set(agentId, info) 165 totalAgentCount++ 166 return info 167} 168 169/** 170 * Get timestamp in microseconds relative to trace start 171 */ 172function getTimestamp(): number { 173 return (Date.now() - startTimeMs) * 1000 174} 175 176/** 177 * Generate a unique span ID 178 */ 179function generateSpanId(): string { 180 return `span_${++spanIdCounter}` 181} 182 183/** 184 * Evict pending spans older than STALE_SPAN_TTL_MS. 185 * Mirrors the TTL cleanup pattern in sessionTracing.ts. 186 */ 187function evictStaleSpans(): void { 188 const now = getTimestamp() 189 const ttlUs = STALE_SPAN_TTL_MS * 1000 // Convert ms to microseconds 190 for (const [spanId, span] of pendingSpans) { 191 if (now - span.startTime > ttlUs) { 192 // Emit an end event so the span shows up in the trace as incomplete 193 events.push({ 194 name: span.name, 195 cat: span.category, 196 ph: 'E', 197 ts: now, 198 pid: span.agentInfo.processId, 199 tid: span.agentInfo.threadId, 200 args: { 201 ...span.args, 202 evicted: true, 203 duration_ms: (now - span.startTime) / 1000, 204 }, 205 }) 206 pendingSpans.delete(spanId) 207 } 208 } 209} 210 211/** 212 * Build the full trace document (Chrome Trace JSON format). 213 */ 214function buildTraceDocument(): string { 215 return jsonStringify({ 216 traceEvents: [...metadataEvents, ...events], 217 metadata: { 218 session_id: getSessionId(), 219 trace_start_time: new Date(startTimeMs).toISOString(), 220 agent_count: totalAgentCount, 221 total_event_count: metadataEvents.length + events.length, 222 }, 223 }) 224} 225 226/** 227 * Drop the oldest half of events[] when over MAX_EVENTS. Called from the 228 * stale-span cleanup interval (60s). The half-batch splice keeps this 229 * amortized O(1) — we don't pay splice cost per-push. A synthetic marker 230 * is inserted so the gap is visible in ui.perfetto.dev. 231 */ 232function evictOldestEvents(): void { 233 if (events.length < MAX_EVENTS) return 234 const dropped = events.splice(0, MAX_EVENTS / 2) 235 events.unshift({ 236 name: 'trace_truncated', 237 cat: '__metadata', 238 ph: 'i', 239 ts: dropped[dropped.length - 1]?.ts ?? 0, 240 pid: 1, 241 tid: 0, 242 args: { dropped_events: dropped.length }, 243 }) 244 logForDebugging( 245 `[Perfetto] Evicted ${dropped.length} oldest events (cap ${MAX_EVENTS})`, 246 ) 247} 248 249/** 250 * Initialize Perfetto tracing 251 * Call this early in the application lifecycle 252 */ 253export function initializePerfettoTracing(): void { 254 const envValue = process.env.CLAUDE_CODE_PERFETTO_TRACE 255 logForDebugging( 256 `[Perfetto] initializePerfettoTracing called, env value: ${envValue}`, 257 ) 258 259 // Wrap in feature() for dead code elimination - entire block removed from external builds 260 if (feature('PERFETTO_TRACING')) { 261 if (!envValue || isEnvDefinedFalsy(envValue)) { 262 logForDebugging( 263 '[Perfetto] Tracing disabled (env var not set or disabled)', 264 ) 265 return 266 } 267 268 isEnabled = true 269 startTimeMs = Date.now() 270 271 // Determine trace file path 272 if (isEnvTruthy(envValue)) { 273 const tracesDir = join(getClaudeConfigHomeDir(), 'traces') 274 tracePath = join(tracesDir, `trace-${getSessionId()}.json`) 275 } else { 276 // Use the provided path 277 tracePath = envValue 278 } 279 280 logForDebugging( 281 `[Perfetto] Tracing enabled, will write to: ${tracePath}, isEnabled=${isEnabled}`, 282 ) 283 284 // Start periodic full-trace write if CLAUDE_CODE_PERFETTO_WRITE_INTERVAL_S is a positive integer 285 const intervalSec = parseInt( 286 process.env.CLAUDE_CODE_PERFETTO_WRITE_INTERVAL_S ?? '', 287 10, 288 ) 289 if (intervalSec > 0) { 290 writeIntervalId = setInterval(() => { 291 void periodicWrite() 292 }, intervalSec * 1000) 293 // Don't let the interval keep the process alive on its own 294 if (writeIntervalId.unref) writeIntervalId.unref() 295 logForDebugging( 296 `[Perfetto] Periodic write enabled, interval: ${intervalSec}s`, 297 ) 298 } 299 300 // Start stale span cleanup interval 301 staleSpanCleanupId = setInterval(() => { 302 evictStaleSpans() 303 evictOldestEvents() 304 }, STALE_SPAN_CLEANUP_INTERVAL_MS) 305 if (staleSpanCleanupId.unref) staleSpanCleanupId.unref() 306 307 // Register cleanup to write final trace on exit 308 registerCleanup(async () => { 309 logForDebugging('[Perfetto] Cleanup callback invoked') 310 await writePerfettoTrace() 311 }) 312 313 // Also register a beforeExit handler as a fallback 314 // This ensures the trace is written even if cleanup registry is not called 315 process.on('beforeExit', () => { 316 logForDebugging('[Perfetto] beforeExit handler invoked') 317 void writePerfettoTrace() 318 }) 319 320 // Register a synchronous exit handler as a last resort 321 // This is the final fallback to ensure trace is written before process exits 322 process.on('exit', () => { 323 if (!traceWritten) { 324 logForDebugging( 325 '[Perfetto] exit handler invoked, writing trace synchronously', 326 ) 327 writePerfettoTraceSync() 328 } 329 }) 330 331 // Emit process metadata events for main process 332 const mainAgent = getCurrentAgentInfo() 333 emitProcessMetadata(mainAgent) 334 } 335} 336 337/** 338 * Emit metadata events for a process/agent 339 */ 340function emitProcessMetadata(agentInfo: AgentInfo): void { 341 if (!isEnabled) return 342 343 // Process name 344 metadataEvents.push({ 345 name: 'process_name', 346 cat: '__metadata', 347 ph: 'M', 348 ts: 0, 349 pid: agentInfo.processId, 350 tid: 0, 351 args: { name: agentInfo.agentName }, 352 }) 353 354 // Thread name (same as process for now) 355 metadataEvents.push({ 356 name: 'thread_name', 357 cat: '__metadata', 358 ph: 'M', 359 ts: 0, 360 pid: agentInfo.processId, 361 tid: agentInfo.threadId, 362 args: { name: agentInfo.agentName }, 363 }) 364 365 // Add parent info if available 366 if (agentInfo.parentAgentId) { 367 metadataEvents.push({ 368 name: 'parent_agent', 369 cat: '__metadata', 370 ph: 'M', 371 ts: 0, 372 pid: agentInfo.processId, 373 tid: 0, 374 args: { 375 parent_agent_id: agentInfo.parentAgentId, 376 }, 377 }) 378 } 379} 380 381/** 382 * Check if Perfetto tracing is enabled 383 */ 384export function isPerfettoTracingEnabled(): boolean { 385 return isEnabled 386} 387 388/** 389 * Register a new agent in the trace 390 * Call this when a subagent/teammate is spawned 391 */ 392export function registerAgent( 393 agentId: string, 394 agentName: string, 395 parentAgentId?: string, 396): void { 397 if (!isEnabled) return 398 399 const info: AgentInfo = { 400 agentId, 401 agentName, 402 parentAgentId, 403 processId: getProcessIdForAgent(agentId), 404 threadId: stringToNumericHash(agentName), 405 } 406 407 agentRegistry.set(agentId, info) 408 totalAgentCount++ 409 emitProcessMetadata(info) 410} 411 412/** 413 * Unregister an agent from the trace. 414 * Call this when an agent completes, fails, or is aborted to free memory. 415 */ 416export function unregisterAgent(agentId: string): void { 417 if (!isEnabled) return 418 agentRegistry.delete(agentId) 419 agentIdToProcessId.delete(agentId) 420} 421 422/** 423 * Start an API call span 424 */ 425export function startLLMRequestPerfettoSpan(args: { 426 model: string 427 promptTokens?: number 428 messageId?: string 429 isSpeculative?: boolean 430 querySource?: string 431}): string { 432 if (!isEnabled) return '' 433 434 const spanId = generateSpanId() 435 const agentInfo = getCurrentAgentInfo() 436 437 pendingSpans.set(spanId, { 438 name: 'API Call', 439 category: 'api', 440 startTime: getTimestamp(), 441 agentInfo, 442 args: { 443 model: args.model, 444 prompt_tokens: args.promptTokens, 445 message_id: args.messageId, 446 is_speculative: args.isSpeculative ?? false, 447 query_source: args.querySource, 448 }, 449 }) 450 451 // Emit begin event 452 events.push({ 453 name: 'API Call', 454 cat: 'api', 455 ph: 'B', 456 ts: pendingSpans.get(spanId)!.startTime, 457 pid: agentInfo.processId, 458 tid: agentInfo.threadId, 459 args: pendingSpans.get(spanId)!.args, 460 }) 461 462 return spanId 463} 464 465/** 466 * End an API call span with response metadata 467 */ 468export function endLLMRequestPerfettoSpan( 469 spanId: string, 470 metadata: { 471 ttftMs?: number 472 ttltMs?: number 473 promptTokens?: number 474 outputTokens?: number 475 cacheReadTokens?: number 476 cacheCreationTokens?: number 477 messageId?: string 478 success?: boolean 479 error?: string 480 /** Time spent in pre-request setup (client creation, retries) before the successful attempt */ 481 requestSetupMs?: number 482 /** Timestamps (Date.now()) of each attempt start — used to emit retry sub-spans */ 483 attemptStartTimes?: number[] 484 }, 485): void { 486 if (!isEnabled || !spanId) return 487 488 const pending = pendingSpans.get(spanId) 489 if (!pending) return 490 491 const endTime = getTimestamp() 492 const duration = endTime - pending.startTime 493 494 const promptTokens = 495 metadata.promptTokens ?? (pending.args.prompt_tokens as number | undefined) 496 const ttftMs = metadata.ttftMs 497 const ttltMs = metadata.ttltMs 498 const outputTokens = metadata.outputTokens 499 const cacheReadTokens = metadata.cacheReadTokens 500 501 // Compute derived metrics 502 // ITPS: input tokens per second (prompt processing speed) 503 const itps = 504 ttftMs !== undefined && promptTokens !== undefined && ttftMs > 0 505 ? Math.round((promptTokens / (ttftMs / 1000)) * 100) / 100 506 : undefined 507 508 // OTPS: output tokens per second (sampling speed) 509 const samplingMs = 510 ttltMs !== undefined && ttftMs !== undefined ? ttltMs - ttftMs : undefined 511 const otps = 512 samplingMs !== undefined && outputTokens !== undefined && samplingMs > 0 513 ? Math.round((outputTokens / (samplingMs / 1000)) * 100) / 100 514 : undefined 515 516 // Cache hit rate: percentage of prompt tokens from cache 517 const cacheHitRate = 518 cacheReadTokens !== undefined && 519 promptTokens !== undefined && 520 promptTokens > 0 521 ? Math.round((cacheReadTokens / promptTokens) * 10000) / 100 522 : undefined 523 524 const requestSetupMs = metadata.requestSetupMs 525 const attemptStartTimes = metadata.attemptStartTimes 526 527 // Merge metadata with original args 528 const args = { 529 ...pending.args, 530 ttft_ms: ttftMs, 531 ttlt_ms: ttltMs, 532 prompt_tokens: promptTokens, 533 output_tokens: outputTokens, 534 cache_read_tokens: cacheReadTokens, 535 cache_creation_tokens: metadata.cacheCreationTokens, 536 message_id: metadata.messageId ?? pending.args.message_id, 537 success: metadata.success ?? true, 538 error: metadata.error, 539 duration_ms: duration / 1000, 540 request_setup_ms: requestSetupMs, 541 // Derived metrics 542 itps, 543 otps, 544 cache_hit_rate_pct: cacheHitRate, 545 } 546 547 // Emit Request Setup sub-span when there was measurable setup time 548 // (client creation, param building, retries before the successful attempt) 549 const setupUs = 550 requestSetupMs !== undefined && requestSetupMs > 0 551 ? requestSetupMs * 1000 552 : 0 553 if (setupUs > 0) { 554 const setupEndTs = pending.startTime + setupUs 555 556 events.push({ 557 name: 'Request Setup', 558 cat: 'api,setup', 559 ph: 'B', 560 ts: pending.startTime, 561 pid: pending.agentInfo.processId, 562 tid: pending.agentInfo.threadId, 563 args: { 564 request_setup_ms: requestSetupMs, 565 attempt_count: attemptStartTimes?.length ?? 1, 566 }, 567 }) 568 569 // Emit retry attempt sub-spans within Request Setup. 570 // Each failed attempt runs from its start to the next attempt's start. 571 if (attemptStartTimes && attemptStartTimes.length > 1) { 572 // attemptStartTimes[0] is the reference point (first attempt). 573 // Convert wall-clock deltas into Perfetto-relative microseconds. 574 const baseWallMs = attemptStartTimes[0]! 575 for (let i = 0; i < attemptStartTimes.length - 1; i++) { 576 const attemptStartUs = 577 pending.startTime + (attemptStartTimes[i]! - baseWallMs) * 1000 578 const attemptEndUs = 579 pending.startTime + (attemptStartTimes[i + 1]! - baseWallMs) * 1000 580 581 events.push({ 582 name: `Attempt ${i + 1} (retry)`, 583 cat: 'api,retry', 584 ph: 'B', 585 ts: attemptStartUs, 586 pid: pending.agentInfo.processId, 587 tid: pending.agentInfo.threadId, 588 args: { attempt: i + 1 }, 589 }) 590 events.push({ 591 name: `Attempt ${i + 1} (retry)`, 592 cat: 'api,retry', 593 ph: 'E', 594 ts: attemptEndUs, 595 pid: pending.agentInfo.processId, 596 tid: pending.agentInfo.threadId, 597 }) 598 } 599 } 600 601 events.push({ 602 name: 'Request Setup', 603 cat: 'api,setup', 604 ph: 'E', 605 ts: setupEndTs, 606 pid: pending.agentInfo.processId, 607 tid: pending.agentInfo.threadId, 608 }) 609 } 610 611 // Emit sub-spans for First Token and Sampling phases (before API Call end) 612 // Using B/E pairs in proper nesting order for correct Perfetto visualization 613 if (ttftMs !== undefined) { 614 // First Token starts after request setup (if any) 615 const firstTokenStartTs = pending.startTime + setupUs 616 const firstTokenEndTs = firstTokenStartTs + ttftMs * 1000 617 618 // First Token phase: from successful attempt start to first token 619 events.push({ 620 name: 'First Token', 621 cat: 'api,ttft', 622 ph: 'B', 623 ts: firstTokenStartTs, 624 pid: pending.agentInfo.processId, 625 tid: pending.agentInfo.threadId, 626 args: { 627 ttft_ms: ttftMs, 628 prompt_tokens: promptTokens, 629 itps, 630 cache_hit_rate_pct: cacheHitRate, 631 }, 632 }) 633 events.push({ 634 name: 'First Token', 635 cat: 'api,ttft', 636 ph: 'E', 637 ts: firstTokenEndTs, 638 pid: pending.agentInfo.processId, 639 tid: pending.agentInfo.threadId, 640 }) 641 642 // Sampling phase: from first token to last token 643 // Note: samplingMs = ttltMs - ttftMs still includes setup time in ttltMs, 644 // so we compute the actual sampling duration for the span as the time from 645 // first token to API call end (endTime), not samplingMs directly. 646 const actualSamplingMs = 647 ttltMs !== undefined ? ttltMs - ttftMs - setupUs / 1000 : undefined 648 if (actualSamplingMs !== undefined && actualSamplingMs > 0) { 649 events.push({ 650 name: 'Sampling', 651 cat: 'api,sampling', 652 ph: 'B', 653 ts: firstTokenEndTs, 654 pid: pending.agentInfo.processId, 655 tid: pending.agentInfo.threadId, 656 args: { 657 sampling_ms: actualSamplingMs, 658 output_tokens: outputTokens, 659 otps, 660 }, 661 }) 662 events.push({ 663 name: 'Sampling', 664 cat: 'api,sampling', 665 ph: 'E', 666 ts: firstTokenEndTs + actualSamplingMs * 1000, 667 pid: pending.agentInfo.processId, 668 tid: pending.agentInfo.threadId, 669 }) 670 } 671 } 672 673 // Emit API Call end event (after sub-spans) 674 events.push({ 675 name: pending.name, 676 cat: pending.category, 677 ph: 'E', 678 ts: endTime, 679 pid: pending.agentInfo.processId, 680 tid: pending.agentInfo.threadId, 681 args, 682 }) 683 684 pendingSpans.delete(spanId) 685} 686 687/** 688 * Start a tool execution span 689 */ 690export function startToolPerfettoSpan( 691 toolName: string, 692 args?: Record<string, unknown>, 693): string { 694 if (!isEnabled) return '' 695 696 const spanId = generateSpanId() 697 const agentInfo = getCurrentAgentInfo() 698 699 pendingSpans.set(spanId, { 700 name: `Tool: ${toolName}`, 701 category: 'tool', 702 startTime: getTimestamp(), 703 agentInfo, 704 args: { 705 tool_name: toolName, 706 ...args, 707 }, 708 }) 709 710 // Emit begin event 711 events.push({ 712 name: `Tool: ${toolName}`, 713 cat: 'tool', 714 ph: 'B', 715 ts: pendingSpans.get(spanId)!.startTime, 716 pid: agentInfo.processId, 717 tid: agentInfo.threadId, 718 args: pendingSpans.get(spanId)!.args, 719 }) 720 721 return spanId 722} 723 724/** 725 * End a tool execution span 726 */ 727export function endToolPerfettoSpan( 728 spanId: string, 729 metadata?: { 730 success?: boolean 731 error?: string 732 resultTokens?: number 733 }, 734): void { 735 if (!isEnabled || !spanId) return 736 737 const pending = pendingSpans.get(spanId) 738 if (!pending) return 739 740 const endTime = getTimestamp() 741 const duration = endTime - pending.startTime 742 743 const args = { 744 ...pending.args, 745 success: metadata?.success ?? true, 746 error: metadata?.error, 747 result_tokens: metadata?.resultTokens, 748 duration_ms: duration / 1000, 749 } 750 751 // Emit end event 752 events.push({ 753 name: pending.name, 754 cat: pending.category, 755 ph: 'E', 756 ts: endTime, 757 pid: pending.agentInfo.processId, 758 tid: pending.agentInfo.threadId, 759 args, 760 }) 761 762 pendingSpans.delete(spanId) 763} 764 765/** 766 * Start a user input waiting span 767 */ 768export function startUserInputPerfettoSpan(context?: string): string { 769 if (!isEnabled) return '' 770 771 const spanId = generateSpanId() 772 const agentInfo = getCurrentAgentInfo() 773 774 pendingSpans.set(spanId, { 775 name: 'Waiting for User Input', 776 category: 'user_input', 777 startTime: getTimestamp(), 778 agentInfo, 779 args: { 780 context, 781 }, 782 }) 783 784 // Emit begin event 785 events.push({ 786 name: 'Waiting for User Input', 787 cat: 'user_input', 788 ph: 'B', 789 ts: pendingSpans.get(spanId)!.startTime, 790 pid: agentInfo.processId, 791 tid: agentInfo.threadId, 792 args: pendingSpans.get(spanId)!.args, 793 }) 794 795 return spanId 796} 797 798/** 799 * End a user input waiting span 800 */ 801export function endUserInputPerfettoSpan( 802 spanId: string, 803 metadata?: { 804 decision?: string 805 source?: string 806 }, 807): void { 808 if (!isEnabled || !spanId) return 809 810 const pending = pendingSpans.get(spanId) 811 if (!pending) return 812 813 const endTime = getTimestamp() 814 const duration = endTime - pending.startTime 815 816 const args = { 817 ...pending.args, 818 decision: metadata?.decision, 819 source: metadata?.source, 820 duration_ms: duration / 1000, 821 } 822 823 // Emit end event 824 events.push({ 825 name: pending.name, 826 cat: pending.category, 827 ph: 'E', 828 ts: endTime, 829 pid: pending.agentInfo.processId, 830 tid: pending.agentInfo.threadId, 831 args, 832 }) 833 834 pendingSpans.delete(spanId) 835} 836 837/** 838 * Emit an instant event (marker) 839 */ 840export function emitPerfettoInstant( 841 name: string, 842 category: string, 843 args?: Record<string, unknown>, 844): void { 845 if (!isEnabled) return 846 847 const agentInfo = getCurrentAgentInfo() 848 849 events.push({ 850 name, 851 cat: category, 852 ph: 'i', 853 ts: getTimestamp(), 854 pid: agentInfo.processId, 855 tid: agentInfo.threadId, 856 args, 857 }) 858} 859 860/** 861 * Emit a counter event for tracking metrics over time 862 */ 863export function emitPerfettoCounter( 864 name: string, 865 values: Record<string, number>, 866): void { 867 if (!isEnabled) return 868 869 const agentInfo = getCurrentAgentInfo() 870 871 events.push({ 872 name, 873 cat: 'counter', 874 ph: 'C', 875 ts: getTimestamp(), 876 pid: agentInfo.processId, 877 tid: agentInfo.threadId, 878 args: values, 879 }) 880} 881 882/** 883 * Start an interaction span (wraps a full user request cycle) 884 */ 885export function startInteractionPerfettoSpan(userPrompt?: string): string { 886 if (!isEnabled) return '' 887 888 const spanId = generateSpanId() 889 const agentInfo = getCurrentAgentInfo() 890 891 pendingSpans.set(spanId, { 892 name: 'Interaction', 893 category: 'interaction', 894 startTime: getTimestamp(), 895 agentInfo, 896 args: { 897 user_prompt_length: userPrompt?.length, 898 }, 899 }) 900 901 // Emit begin event 902 events.push({ 903 name: 'Interaction', 904 cat: 'interaction', 905 ph: 'B', 906 ts: pendingSpans.get(spanId)!.startTime, 907 pid: agentInfo.processId, 908 tid: agentInfo.threadId, 909 args: pendingSpans.get(spanId)!.args, 910 }) 911 912 return spanId 913} 914 915/** 916 * End an interaction span 917 */ 918export function endInteractionPerfettoSpan(spanId: string): void { 919 if (!isEnabled || !spanId) return 920 921 const pending = pendingSpans.get(spanId) 922 if (!pending) return 923 924 const endTime = getTimestamp() 925 const duration = endTime - pending.startTime 926 927 // Emit end event 928 events.push({ 929 name: pending.name, 930 cat: pending.category, 931 ph: 'E', 932 ts: endTime, 933 pid: pending.agentInfo.processId, 934 tid: pending.agentInfo.threadId, 935 args: { 936 ...pending.args, 937 duration_ms: duration / 1000, 938 }, 939 }) 940 941 pendingSpans.delete(spanId) 942} 943 944// --------------------------------------------------------------------------- 945// Periodic write helpers 946// --------------------------------------------------------------------------- 947 948/** 949 * Stop the periodic write timer. 950 */ 951function stopWriteInterval(): void { 952 if (staleSpanCleanupId) { 953 clearInterval(staleSpanCleanupId) 954 staleSpanCleanupId = null 955 } 956 if (writeIntervalId) { 957 clearInterval(writeIntervalId) 958 writeIntervalId = null 959 } 960} 961 962/** 963 * Force-close any remaining open spans at session end. 964 */ 965function closeOpenSpans(): void { 966 for (const [spanId, pending] of pendingSpans) { 967 const endTime = getTimestamp() 968 events.push({ 969 name: pending.name, 970 cat: pending.category, 971 ph: 'E', 972 ts: endTime, 973 pid: pending.agentInfo.processId, 974 tid: pending.agentInfo.threadId, 975 args: { 976 ...pending.args, 977 incomplete: true, 978 duration_ms: (endTime - pending.startTime) / 1000, 979 }, 980 }) 981 pendingSpans.delete(spanId) 982 } 983} 984 985/** 986 * Write the full trace to disk. Errors are logged but swallowed so that a 987 * transient I/O problem does not crash the session — the next periodic tick 988 * (or the final exit write) will retry with a complete snapshot. 989 */ 990async function periodicWrite(): Promise<void> { 991 if (!isEnabled || !tracePath || traceWritten) return 992 993 try { 994 await mkdir(dirname(tracePath), { recursive: true }) 995 await writeFile(tracePath, buildTraceDocument()) 996 logForDebugging( 997 `[Perfetto] Periodic write: ${events.length} events to ${tracePath}`, 998 ) 999 } catch (error) { 1000 logForDebugging( 1001 `[Perfetto] Periodic write failed: ${errorMessage(error)}`, 1002 { level: 'error' }, 1003 ) 1004 } 1005} 1006 1007/** 1008 * Final async write: close open spans and write the complete trace. 1009 * Idempotent — sets `traceWritten` on success so subsequent calls are no-ops. 1010 */ 1011async function writePerfettoTrace(): Promise<void> { 1012 if (!isEnabled || !tracePath || traceWritten) { 1013 logForDebugging( 1014 `[Perfetto] Skipping final write: isEnabled=${isEnabled}, tracePath=${tracePath}, traceWritten=${traceWritten}`, 1015 ) 1016 return 1017 } 1018 1019 stopWriteInterval() 1020 closeOpenSpans() 1021 1022 logForDebugging( 1023 `[Perfetto] writePerfettoTrace called: events=${events.length}`, 1024 ) 1025 1026 try { 1027 await mkdir(dirname(tracePath), { recursive: true }) 1028 await writeFile(tracePath, buildTraceDocument()) 1029 traceWritten = true 1030 logForDebugging(`[Perfetto] Trace finalized at: ${tracePath}`) 1031 } catch (error) { 1032 logForDebugging( 1033 `[Perfetto] Failed to write final trace: ${errorMessage(error)}`, 1034 { level: 'error' }, 1035 ) 1036 } 1037} 1038 1039/** 1040 * Final synchronous write (fallback for process 'exit' handler where async is forbidden). 1041 */ 1042function writePerfettoTraceSync(): void { 1043 if (!isEnabled || !tracePath || traceWritten) { 1044 logForDebugging( 1045 `[Perfetto] Skipping final sync write: isEnabled=${isEnabled}, tracePath=${tracePath}, traceWritten=${traceWritten}`, 1046 ) 1047 return 1048 } 1049 1050 stopWriteInterval() 1051 closeOpenSpans() 1052 1053 logForDebugging( 1054 `[Perfetto] writePerfettoTraceSync called: events=${events.length}`, 1055 ) 1056 1057 try { 1058 const dir = dirname(tracePath) 1059 // eslint-disable-next-line custom-rules/no-sync-fs -- Only called from process.on('exit') handler 1060 mkdirSync(dir, { recursive: true }) 1061 // eslint-disable-next-line custom-rules/no-sync-fs, eslint-plugin-n/no-sync -- Required for process 'exit' handler which doesn't support async 1062 writeFileSync(tracePath, buildTraceDocument()) 1063 traceWritten = true 1064 logForDebugging(`[Perfetto] Trace finalized synchronously at: ${tracePath}`) 1065 } catch (error) { 1066 logForDebugging( 1067 `[Perfetto] Failed to write final trace synchronously: ${errorMessage(error)}`, 1068 { level: 'error' }, 1069 ) 1070 } 1071} 1072 1073/** 1074 * Get all recorded events (for testing) 1075 */ 1076export function getPerfettoEvents(): TraceEvent[] { 1077 return [...metadataEvents, ...events] 1078} 1079 1080/** 1081 * Reset the tracer state (for testing) 1082 */ 1083export function resetPerfettoTracer(): void { 1084 if (staleSpanCleanupId) { 1085 clearInterval(staleSpanCleanupId) 1086 staleSpanCleanupId = null 1087 } 1088 stopWriteInterval() 1089 metadataEvents.length = 0 1090 events.length = 0 1091 pendingSpans.clear() 1092 agentRegistry.clear() 1093 agentIdToProcessId.clear() 1094 totalAgentCount = 0 1095 processIdCounter = 1 1096 spanIdCounter = 0 1097 isEnabled = false 1098 tracePath = null 1099 startTimeMs = 0 1100 traceWritten = false 1101} 1102 1103/** 1104 * Trigger a periodic write immediately (for testing) 1105 */ 1106export async function triggerPeriodicWriteForTesting(): Promise<void> { 1107 await periodicWrite() 1108} 1109 1110/** 1111 * Evict stale spans immediately (for testing) 1112 */ 1113export function evictStaleSpansForTesting(): void { 1114 evictStaleSpans() 1115} 1116 1117export const MAX_EVENTS_FOR_TESTING = MAX_EVENTS 1118export function evictOldestEventsForTesting(): void { 1119 evictOldestEvents() 1120}