source dump of claude code
at main 973 lines 33 kB view raw
1// biome-ignore-all assist/source/organizeImports: ANT-ONLY import markers must not be reordered 2/** 3 * Shared event metadata enrichment for analytics systems 4 * 5 * This module provides a single source of truth for collecting and formatting 6 * event metadata across all analytics systems (Datadog, 1P). 7 */ 8 9import { extname } from 'path' 10import memoize from 'lodash-es/memoize.js' 11import { env, getHostPlatformForAnalytics } from '../../utils/env.js' 12import { envDynamic } from '../../utils/envDynamic.js' 13import { getModelBetas } from '../../utils/betas.js' 14import { getMainLoopModel } from '../../utils/model/model.js' 15import { 16 getSessionId, 17 getIsInteractive, 18 getKairosActive, 19 getClientType, 20 getParentSessionId as getParentSessionIdFromState, 21} from '../../bootstrap/state.js' 22import { isEnvTruthy } from '../../utils/envUtils.js' 23import { isOfficialMcpUrl } from '../mcp/officialRegistry.js' 24import { isClaudeAISubscriber, getSubscriptionType } from '../../utils/auth.js' 25import { getRepoRemoteHash } from '../../utils/git.js' 26import { 27 getWslVersion, 28 getLinuxDistroInfo, 29 detectVcs, 30} from '../../utils/platform.js' 31import type { CoreUserData } from 'src/utils/user.js' 32import { getAgentContext } from '../../utils/agentContext.js' 33import type { EnvironmentMetadata } from '../../types/generated/events_mono/claude_code/v1/claude_code_internal_event.js' 34import type { PublicApiAuth } from '../../types/generated/events_mono/common/v1/auth.js' 35import { jsonStringify } from '../../utils/slowOperations.js' 36import { 37 getAgentId, 38 getParentSessionId as getTeammateParentSessionId, 39 getTeamName, 40 isTeammate, 41} from '../../utils/teammate.js' 42import { feature } from 'bun:bundle' 43 44/** 45 * Marker type for verifying analytics metadata doesn't contain sensitive data 46 * 47 * This type forces explicit verification that string values being logged 48 * don't contain code snippets, file paths, or other sensitive information. 49 * 50 * The metadata is expected to be JSON-serializable. 51 * 52 * Usage: `myString as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS` 53 * 54 * The type is `never` which means it can never actually hold a value - this is 55 * intentional as it's only used for type-casting to document developer intent. 56 */ 57export type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS = never 58 59/** 60 * Sanitizes tool names for analytics logging to avoid PII exposure. 61 * 62 * MCP tool names follow the format `mcp__<server>__<tool>` and can reveal 63 * user-specific server configurations, which is considered PII-medium. 64 * This function redacts MCP tool names while preserving built-in tool names 65 * (Bash, Read, Write, etc.) which are safe to log. 66 * 67 * @param toolName - The tool name to sanitize 68 * @returns The original name for built-in tools, or 'mcp_tool' for MCP tools 69 */ 70export function sanitizeToolNameForAnalytics( 71 toolName: string, 72): AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS { 73 if (toolName.startsWith('mcp__')) { 74 return 'mcp_tool' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS 75 } 76 return toolName as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS 77} 78 79/** 80 * Check if detailed tool name logging is enabled for OTLP events. 81 * When enabled, MCP server/tool names and Skill names are logged. 82 * Disabled by default to protect PII (user-specific server configurations). 83 * 84 * Enable with OTEL_LOG_TOOL_DETAILS=1 85 */ 86export function isToolDetailsLoggingEnabled(): boolean { 87 return isEnvTruthy(process.env.OTEL_LOG_TOOL_DETAILS) 88} 89 90/** 91 * Check if detailed tool name logging (MCP server/tool names) is enabled 92 * for analytics events. 93 * 94 * Per go/taxonomy, MCP names are medium PII. We log them for: 95 * - Cowork (entrypoint=local-agent) — no ZDR concept, log all MCPs 96 * - claude.ai-proxied connectors — always official (from claude.ai's list) 97 * - Servers whose URL matches the official MCP registry — directory 98 * connectors added via `claude mcp add`, not customer-specific config 99 * 100 * Custom/user-configured MCPs stay sanitized (toolName='mcp_tool'). 101 */ 102export function isAnalyticsToolDetailsLoggingEnabled( 103 mcpServerType: string | undefined, 104 mcpServerBaseUrl: string | undefined, 105): boolean { 106 if (process.env.CLAUDE_CODE_ENTRYPOINT === 'local-agent') { 107 return true 108 } 109 if (mcpServerType === 'claudeai-proxy') { 110 return true 111 } 112 if (mcpServerBaseUrl && isOfficialMcpUrl(mcpServerBaseUrl)) { 113 return true 114 } 115 return false 116} 117 118/** 119 * Built-in first-party MCP servers whose names are fixed reserved strings, 120 * not user-configured — so logging them is not PII. Checked in addition to 121 * isAnalyticsToolDetailsLoggingEnabled's transport/URL gates, which a stdio 122 * built-in would otherwise fail. 123 * 124 * Feature-gated so the set is empty when the feature is off: the name 125 * reservation (main.tsx, config.ts addMcpServer) is itself feature-gated, so 126 * a user-configured 'computer-use' is possible in builds without the feature. 127 */ 128/* eslint-disable @typescript-eslint/no-require-imports */ 129const BUILTIN_MCP_SERVER_NAMES: ReadonlySet<string> = new Set( 130 feature('CHICAGO_MCP') 131 ? [ 132 ( 133 require('../../utils/computerUse/common.js') as typeof import('../../utils/computerUse/common.js') 134 ).COMPUTER_USE_MCP_SERVER_NAME, 135 ] 136 : [], 137) 138/* eslint-enable @typescript-eslint/no-require-imports */ 139 140/** 141 * Spreadable helper for logEvent payloads — returns {mcpServerName, mcpToolName} 142 * if the gate passes, empty object otherwise. Consolidates the identical IIFE 143 * pattern at each tengu_tool_use_* call site. 144 */ 145export function mcpToolDetailsForAnalytics( 146 toolName: string, 147 mcpServerType: string | undefined, 148 mcpServerBaseUrl: string | undefined, 149): { 150 mcpServerName?: AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS 151 mcpToolName?: AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS 152} { 153 const details = extractMcpToolDetails(toolName) 154 if (!details) { 155 return {} 156 } 157 if ( 158 !BUILTIN_MCP_SERVER_NAMES.has(details.serverName) && 159 !isAnalyticsToolDetailsLoggingEnabled(mcpServerType, mcpServerBaseUrl) 160 ) { 161 return {} 162 } 163 return { 164 mcpServerName: details.serverName, 165 mcpToolName: details.mcpToolName, 166 } 167} 168 169/** 170 * Extract MCP server and tool names from a full MCP tool name. 171 * MCP tool names follow the format: mcp__<server>__<tool> 172 * 173 * @param toolName - The full tool name (e.g., 'mcp__slack__read_channel') 174 * @returns Object with serverName and toolName, or undefined if not an MCP tool 175 */ 176export function extractMcpToolDetails(toolName: string): 177 | { 178 serverName: AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS 179 mcpToolName: AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS 180 } 181 | undefined { 182 if (!toolName.startsWith('mcp__')) { 183 return undefined 184 } 185 186 // Format: mcp__<server>__<tool> 187 const parts = toolName.split('__') 188 if (parts.length < 3) { 189 return undefined 190 } 191 192 const serverName = parts[1] 193 // Tool name may contain __ so rejoin remaining parts 194 const mcpToolName = parts.slice(2).join('__') 195 196 if (!serverName || !mcpToolName) { 197 return undefined 198 } 199 200 return { 201 serverName: 202 serverName as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, 203 mcpToolName: 204 mcpToolName as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, 205 } 206} 207 208/** 209 * Extract skill name from Skill tool input. 210 * 211 * @param toolName - The tool name (should be 'Skill') 212 * @param input - The tool input containing the skill name 213 * @returns The skill name if this is a Skill tool call, undefined otherwise 214 */ 215export function extractSkillName( 216 toolName: string, 217 input: unknown, 218): AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS | undefined { 219 if (toolName !== 'Skill') { 220 return undefined 221 } 222 223 if ( 224 typeof input === 'object' && 225 input !== null && 226 'skill' in input && 227 typeof (input as { skill: unknown }).skill === 'string' 228 ) { 229 return (input as { skill: string }) 230 .skill as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS 231 } 232 233 return undefined 234} 235 236const TOOL_INPUT_STRING_TRUNCATE_AT = 512 237const TOOL_INPUT_STRING_TRUNCATE_TO = 128 238const TOOL_INPUT_MAX_JSON_CHARS = 4 * 1024 239const TOOL_INPUT_MAX_COLLECTION_ITEMS = 20 240const TOOL_INPUT_MAX_DEPTH = 2 241 242function truncateToolInputValue(value: unknown, depth = 0): unknown { 243 if (typeof value === 'string') { 244 if (value.length > TOOL_INPUT_STRING_TRUNCATE_AT) { 245 return `${value.slice(0, TOOL_INPUT_STRING_TRUNCATE_TO)}…[${value.length} chars]` 246 } 247 return value 248 } 249 if ( 250 typeof value === 'number' || 251 typeof value === 'boolean' || 252 value === null || 253 value === undefined 254 ) { 255 return value 256 } 257 if (depth >= TOOL_INPUT_MAX_DEPTH) { 258 return '<nested>' 259 } 260 if (Array.isArray(value)) { 261 const mapped = value 262 .slice(0, TOOL_INPUT_MAX_COLLECTION_ITEMS) 263 .map(v => truncateToolInputValue(v, depth + 1)) 264 if (value.length > TOOL_INPUT_MAX_COLLECTION_ITEMS) { 265 mapped.push(`…[${value.length} items]`) 266 } 267 return mapped 268 } 269 if (typeof value === 'object') { 270 const entries = Object.entries(value as Record<string, unknown>) 271 // Skip internal marker keys (e.g. _simulatedSedEdit re-introduced by 272 // SedEditPermissionRequest) so they don't leak into telemetry. 273 .filter(([k]) => !k.startsWith('_')) 274 const mapped = entries 275 .slice(0, TOOL_INPUT_MAX_COLLECTION_ITEMS) 276 .map(([k, v]) => [k, truncateToolInputValue(v, depth + 1)]) 277 if (entries.length > TOOL_INPUT_MAX_COLLECTION_ITEMS) { 278 mapped.push(['…', `${entries.length} keys`]) 279 } 280 return Object.fromEntries(mapped) 281 } 282 return String(value) 283} 284 285/** 286 * Serialize a tool's input arguments for the OTel tool_result event. 287 * Truncates long strings and deep nesting to keep the output bounded while 288 * preserving forensically useful fields like file paths, URLs, and MCP args. 289 * Returns undefined when OTEL_LOG_TOOL_DETAILS is not enabled. 290 */ 291export function extractToolInputForTelemetry( 292 input: unknown, 293): string | undefined { 294 if (!isToolDetailsLoggingEnabled()) { 295 return undefined 296 } 297 const truncated = truncateToolInputValue(input) 298 let json = jsonStringify(truncated) 299 if (json.length > TOOL_INPUT_MAX_JSON_CHARS) { 300 json = json.slice(0, TOOL_INPUT_MAX_JSON_CHARS) + '…[truncated]' 301 } 302 return json 303} 304 305/** 306 * Maximum length for file extensions to be logged. 307 * Extensions longer than this are considered potentially sensitive 308 * (e.g., hash-based filenames like "key-hash-abcd-123-456") and 309 * will be replaced with 'other'. 310 */ 311const MAX_FILE_EXTENSION_LENGTH = 10 312 313/** 314 * Extracts and sanitizes a file extension for analytics logging. 315 * 316 * Uses Node's path.extname for reliable cross-platform extension extraction. 317 * Returns 'other' for extensions exceeding MAX_FILE_EXTENSION_LENGTH to avoid 318 * logging potentially sensitive data (like hash-based filenames). 319 * 320 * @param filePath - The file path to extract the extension from 321 * @returns The sanitized extension, 'other' for long extensions, or undefined if no extension 322 */ 323export function getFileExtensionForAnalytics( 324 filePath: string, 325): AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS | undefined { 326 const ext = extname(filePath).toLowerCase() 327 if (!ext || ext === '.') { 328 return undefined 329 } 330 331 const extension = ext.slice(1) // remove leading dot 332 if (extension.length > MAX_FILE_EXTENSION_LENGTH) { 333 return 'other' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS 334 } 335 336 return extension as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS 337} 338 339/** Allow list of commands we extract file extensions from. */ 340const FILE_COMMANDS = new Set([ 341 'rm', 342 'mv', 343 'cp', 344 'touch', 345 'mkdir', 346 'chmod', 347 'chown', 348 'cat', 349 'head', 350 'tail', 351 'sort', 352 'stat', 353 'diff', 354 'wc', 355 'grep', 356 'rg', 357 'sed', 358]) 359 360/** Regex to split bash commands on compound operators (&&, ||, ;, |). */ 361const COMPOUND_OPERATOR_REGEX = /\s*(?:&&|\|\||[;|])\s*/ 362 363/** Regex to split on whitespace. */ 364const WHITESPACE_REGEX = /\s+/ 365 366/** 367 * Extracts file extensions from a bash command for analytics. 368 * Best-effort: splits on operators and whitespace, extracts extensions 369 * from non-flag args of allowed commands. No heavy shell parsing needed 370 * because grep patterns and sed scripts rarely resemble file extensions. 371 */ 372export function getFileExtensionsFromBashCommand( 373 command: string, 374 simulatedSedEditFilePath?: string, 375): AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS | undefined { 376 if (!command.includes('.') && !simulatedSedEditFilePath) return undefined 377 378 let result: string | undefined 379 const seen = new Set<string>() 380 381 if (simulatedSedEditFilePath) { 382 const ext = getFileExtensionForAnalytics(simulatedSedEditFilePath) 383 if (ext) { 384 seen.add(ext) 385 result = ext 386 } 387 } 388 389 for (const subcmd of command.split(COMPOUND_OPERATOR_REGEX)) { 390 if (!subcmd) continue 391 const tokens = subcmd.split(WHITESPACE_REGEX) 392 if (tokens.length < 2) continue 393 394 const firstToken = tokens[0]! 395 const slashIdx = firstToken.lastIndexOf('/') 396 const baseCmd = slashIdx >= 0 ? firstToken.slice(slashIdx + 1) : firstToken 397 if (!FILE_COMMANDS.has(baseCmd)) continue 398 399 for (let i = 1; i < tokens.length; i++) { 400 const arg = tokens[i]! 401 if (arg.charCodeAt(0) === 45 /* - */) continue 402 const ext = getFileExtensionForAnalytics(arg) 403 if (ext && !seen.has(ext)) { 404 seen.add(ext) 405 result = result ? result + ',' + ext : ext 406 } 407 } 408 } 409 410 if (!result) return undefined 411 return result as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS 412} 413 414/** 415 * Environment context metadata 416 */ 417export type EnvContext = { 418 platform: string 419 platformRaw: string 420 arch: string 421 nodeVersion: string 422 terminal: string | null 423 packageManagers: string 424 runtimes: string 425 isRunningWithBun: boolean 426 isCi: boolean 427 isClaubbit: boolean 428 isClaudeCodeRemote: boolean 429 isLocalAgentMode: boolean 430 isConductor: boolean 431 remoteEnvironmentType?: string 432 coworkerType?: string 433 claudeCodeContainerId?: string 434 claudeCodeRemoteSessionId?: string 435 tags?: string 436 isGithubAction: boolean 437 isClaudeCodeAction: boolean 438 isClaudeAiAuth: boolean 439 version: string 440 versionBase?: string 441 buildTime: string 442 deploymentEnvironment: string 443 githubEventName?: string 444 githubActionsRunnerEnvironment?: string 445 githubActionsRunnerOs?: string 446 githubActionRef?: string 447 wslVersion?: string 448 linuxDistroId?: string 449 linuxDistroVersion?: string 450 linuxKernel?: string 451 vcs?: string 452} 453 454/** 455 * Process metrics included with all analytics events. 456 */ 457export type ProcessMetrics = { 458 uptime: number 459 rss: number 460 heapTotal: number 461 heapUsed: number 462 external: number 463 arrayBuffers: number 464 constrainedMemory: number | undefined 465 cpuUsage: NodeJS.CpuUsage 466 cpuPercent: number | undefined 467} 468 469/** 470 * Core event metadata shared across all analytics systems 471 */ 472export type EventMetadata = { 473 model: string 474 sessionId: string 475 userType: string 476 betas?: string 477 envContext: EnvContext 478 entrypoint?: string 479 agentSdkVersion?: string 480 isInteractive: string 481 clientType: string 482 processMetrics?: ProcessMetrics 483 sweBenchRunId: string 484 sweBenchInstanceId: string 485 sweBenchTaskId: string 486 // Swarm/team agent identification for analytics attribution 487 agentId?: string // CLAUDE_CODE_AGENT_ID (format: agentName@teamName) or subagent UUID 488 parentSessionId?: string // CLAUDE_CODE_PARENT_SESSION_ID (team lead's session) 489 agentType?: 'teammate' | 'subagent' | 'standalone' // Distinguishes swarm teammates, Agent tool subagents, and standalone agents 490 teamName?: string // Team name for swarm agents (from env var or AsyncLocalStorage) 491 subscriptionType?: string // OAuth subscription tier (max, pro, enterprise, team) 492 rh?: string // Hashed repo remote URL (first 16 chars of SHA256), for joining with server-side data 493 kairosActive?: true // KAIROS assistant mode active (ant-only; set in main.tsx after gate check) 494 skillMode?: 'discovery' | 'coach' | 'discovery_and_coach' // Which skill surfacing mechanism(s) are gated on (ant-only; for BQ session segmentation) 495 observerMode?: 'backseat' | 'skillcoach' | 'both' // Which observer classifiers are gated on (ant-only; for BQ cohort splits on tengu_backseat_* events) 496} 497 498/** 499 * Options for enriching event metadata 500 */ 501export type EnrichMetadataOptions = { 502 // Model to use, falls back to getMainLoopModel() if not provided 503 model?: unknown 504 // Explicit betas string (already joined) 505 betas?: unknown 506 // Additional metadata to include (optional) 507 additionalMetadata?: Record<string, unknown> 508} 509 510/** 511 * Get agent identification for analytics. 512 * Priority: AsyncLocalStorage context (subagents) > env vars (swarm teammates) 513 */ 514function getAgentIdentification(): { 515 agentId?: string 516 parentSessionId?: string 517 agentType?: 'teammate' | 'subagent' | 'standalone' 518 teamName?: string 519} { 520 // Check AsyncLocalStorage first (for subagents running in same process) 521 const agentContext = getAgentContext() 522 if (agentContext) { 523 const result: ReturnType<typeof getAgentIdentification> = { 524 agentId: agentContext.agentId, 525 parentSessionId: agentContext.parentSessionId, 526 agentType: agentContext.agentType, 527 } 528 if (agentContext.agentType === 'teammate') { 529 result.teamName = agentContext.teamName 530 } 531 return result 532 } 533 534 // Fall back to swarm helpers (for swarm agents) 535 const agentId = getAgentId() 536 const parentSessionId = getTeammateParentSessionId() 537 const teamName = getTeamName() 538 const isSwarmAgent = isTeammate() 539 // For standalone agents (have agent ID but not a teammate), set agentType to 'standalone' 540 const agentType = isSwarmAgent 541 ? ('teammate' as const) 542 : agentId 543 ? ('standalone' as const) 544 : undefined 545 if (agentId || agentType || parentSessionId || teamName) { 546 return { 547 ...(agentId ? { agentId } : {}), 548 ...(agentType ? { agentType } : {}), 549 ...(parentSessionId ? { parentSessionId } : {}), 550 ...(teamName ? { teamName } : {}), 551 } 552 } 553 554 // Check bootstrap state for parent session ID (e.g., plan mode -> implementation) 555 const stateParentSessionId = getParentSessionIdFromState() 556 if (stateParentSessionId) { 557 return { parentSessionId: stateParentSessionId } 558 } 559 560 return {} 561} 562 563/** 564 * Extract base version from full version string. "2.0.36-dev.20251107.t174150.sha2709699" → "2.0.36-dev" 565 */ 566const getVersionBase = memoize((): string | undefined => { 567 const match = MACRO.VERSION.match(/^\d+\.\d+\.\d+(?:-[a-z]+)?/) 568 return match ? match[0] : undefined 569}) 570 571/** 572 * Builds the environment context object 573 */ 574const buildEnvContext = memoize(async (): Promise<EnvContext> => { 575 const [packageManagers, runtimes, linuxDistroInfo, vcs] = await Promise.all([ 576 env.getPackageManagers(), 577 env.getRuntimes(), 578 getLinuxDistroInfo(), 579 detectVcs(), 580 ]) 581 582 return { 583 platform: getHostPlatformForAnalytics(), 584 // Raw process.platform so freebsd/openbsd/aix/sunos are visible in BQ. 585 // getHostPlatformForAnalytics() buckets those into 'linux'; here we want 586 // the truth. CLAUDE_CODE_HOST_PLATFORM still overrides for container/remote. 587 platformRaw: process.env.CLAUDE_CODE_HOST_PLATFORM || process.platform, 588 arch: env.arch, 589 nodeVersion: env.nodeVersion, 590 terminal: envDynamic.terminal, 591 packageManagers: packageManagers.join(','), 592 runtimes: runtimes.join(','), 593 isRunningWithBun: env.isRunningWithBun(), 594 isCi: isEnvTruthy(process.env.CI), 595 isClaubbit: isEnvTruthy(process.env.CLAUBBIT), 596 isClaudeCodeRemote: isEnvTruthy(process.env.CLAUDE_CODE_REMOTE), 597 isLocalAgentMode: process.env.CLAUDE_CODE_ENTRYPOINT === 'local-agent', 598 isConductor: env.isConductor(), 599 ...(process.env.CLAUDE_CODE_REMOTE_ENVIRONMENT_TYPE && { 600 remoteEnvironmentType: process.env.CLAUDE_CODE_REMOTE_ENVIRONMENT_TYPE, 601 }), 602 // Gated by feature flag to prevent leaking "coworkerType" string in external builds 603 ...(feature('COWORKER_TYPE_TELEMETRY') 604 ? process.env.CLAUDE_CODE_COWORKER_TYPE 605 ? { coworkerType: process.env.CLAUDE_CODE_COWORKER_TYPE } 606 : {} 607 : {}), 608 ...(process.env.CLAUDE_CODE_CONTAINER_ID && { 609 claudeCodeContainerId: process.env.CLAUDE_CODE_CONTAINER_ID, 610 }), 611 ...(process.env.CLAUDE_CODE_REMOTE_SESSION_ID && { 612 claudeCodeRemoteSessionId: process.env.CLAUDE_CODE_REMOTE_SESSION_ID, 613 }), 614 ...(process.env.CLAUDE_CODE_TAGS && { 615 tags: process.env.CLAUDE_CODE_TAGS, 616 }), 617 isGithubAction: isEnvTruthy(process.env.GITHUB_ACTIONS), 618 isClaudeCodeAction: isEnvTruthy(process.env.CLAUDE_CODE_ACTION), 619 isClaudeAiAuth: isClaudeAISubscriber(), 620 version: MACRO.VERSION, 621 versionBase: getVersionBase(), 622 buildTime: MACRO.BUILD_TIME, 623 deploymentEnvironment: env.detectDeploymentEnvironment(), 624 ...(isEnvTruthy(process.env.GITHUB_ACTIONS) && { 625 githubEventName: process.env.GITHUB_EVENT_NAME, 626 githubActionsRunnerEnvironment: process.env.RUNNER_ENVIRONMENT, 627 githubActionsRunnerOs: process.env.RUNNER_OS, 628 githubActionRef: process.env.GITHUB_ACTION_PATH?.includes( 629 'claude-code-action/', 630 ) 631 ? process.env.GITHUB_ACTION_PATH.split('claude-code-action/')[1] 632 : undefined, 633 }), 634 ...(getWslVersion() && { wslVersion: getWslVersion() }), 635 ...(linuxDistroInfo ?? {}), 636 ...(vcs.length > 0 ? { vcs: vcs.join(',') } : {}), 637 } 638}) 639 640// -- 641// CPU% delta tracking — inherently process-global, same pattern as logBatch/flushTimer in datadog.ts 642let prevCpuUsage: NodeJS.CpuUsage | null = null 643let prevWallTimeMs: number | null = null 644 645/** 646 * Builds process metrics object for all users. 647 */ 648function buildProcessMetrics(): ProcessMetrics | undefined { 649 try { 650 const mem = process.memoryUsage() 651 const cpu = process.cpuUsage() 652 const now = Date.now() 653 654 let cpuPercent: number | undefined 655 if (prevCpuUsage && prevWallTimeMs) { 656 const wallDeltaMs = now - prevWallTimeMs 657 if (wallDeltaMs > 0) { 658 const userDeltaUs = cpu.user - prevCpuUsage.user 659 const systemDeltaUs = cpu.system - prevCpuUsage.system 660 cpuPercent = 661 ((userDeltaUs + systemDeltaUs) / (wallDeltaMs * 1000)) * 100 662 } 663 } 664 prevCpuUsage = cpu 665 prevWallTimeMs = now 666 667 return { 668 uptime: process.uptime(), 669 rss: mem.rss, 670 heapTotal: mem.heapTotal, 671 heapUsed: mem.heapUsed, 672 external: mem.external, 673 arrayBuffers: mem.arrayBuffers, 674 // eslint-disable-next-line eslint-plugin-n/no-unsupported-features/node-builtins 675 constrainedMemory: process.constrainedMemory(), 676 cpuUsage: cpu, 677 cpuPercent, 678 } 679 } catch { 680 return undefined 681 } 682} 683 684/** 685 * Get core event metadata shared across all analytics systems. 686 * 687 * This function collects environment, runtime, and context information 688 * that should be included with all analytics events. 689 * 690 * @param options - Configuration options 691 * @returns Promise resolving to enriched metadata object 692 */ 693export async function getEventMetadata( 694 options: EnrichMetadataOptions = {}, 695): Promise<EventMetadata> { 696 const model = options.model ? String(options.model) : getMainLoopModel() 697 const betas = 698 typeof options.betas === 'string' 699 ? options.betas 700 : getModelBetas(model).join(',') 701 const [envContext, repoRemoteHash] = await Promise.all([ 702 buildEnvContext(), 703 getRepoRemoteHash(), 704 ]) 705 const processMetrics = buildProcessMetrics() 706 707 const metadata: EventMetadata = { 708 model, 709 sessionId: getSessionId(), 710 userType: process.env.USER_TYPE || '', 711 ...(betas.length > 0 ? { betas: betas } : {}), 712 envContext, 713 ...(process.env.CLAUDE_CODE_ENTRYPOINT && { 714 entrypoint: process.env.CLAUDE_CODE_ENTRYPOINT, 715 }), 716 ...(process.env.CLAUDE_AGENT_SDK_VERSION && { 717 agentSdkVersion: process.env.CLAUDE_AGENT_SDK_VERSION, 718 }), 719 isInteractive: String(getIsInteractive()), 720 clientType: getClientType(), 721 ...(processMetrics && { processMetrics }), 722 sweBenchRunId: process.env.SWE_BENCH_RUN_ID || '', 723 sweBenchInstanceId: process.env.SWE_BENCH_INSTANCE_ID || '', 724 sweBenchTaskId: process.env.SWE_BENCH_TASK_ID || '', 725 // Swarm/team agent identification 726 // Priority: AsyncLocalStorage context (subagents) > env vars (swarm teammates) 727 ...getAgentIdentification(), 728 // Subscription tier for DAU-by-tier analytics 729 ...(getSubscriptionType() && { 730 subscriptionType: getSubscriptionType()!, 731 }), 732 // Assistant mode tag — lives outside memoized buildEnvContext() because 733 // setKairosActive() runs at main.tsx:~1648, after the first event may 734 // have already fired and memoized the env. Read fresh per-event instead. 735 ...(feature('KAIROS') && getKairosActive() 736 ? { kairosActive: true as const } 737 : {}), 738 // Repo remote hash for joining with server-side repo bundle data 739 ...(repoRemoteHash && { rh: repoRemoteHash }), 740 } 741 742 return metadata 743} 744 745 746/** 747 * Core event metadata for 1P event logging (snake_case format). 748 */ 749export type FirstPartyEventLoggingCoreMetadata = { 750 session_id: string 751 model: string 752 user_type: string 753 betas?: string 754 entrypoint?: string 755 agent_sdk_version?: string 756 is_interactive: boolean 757 client_type: string 758 swe_bench_run_id?: string 759 swe_bench_instance_id?: string 760 swe_bench_task_id?: string 761 // Swarm/team agent identification 762 agent_id?: string 763 parent_session_id?: string 764 agent_type?: 'teammate' | 'subagent' | 'standalone' 765 team_name?: string 766} 767 768/** 769 * Complete event logging metadata format for 1P events. 770 */ 771export type FirstPartyEventLoggingMetadata = { 772 env: EnvironmentMetadata 773 process?: string 774 // auth is a top-level field on ClaudeCodeInternalEvent (proto PublicApiAuth). 775 // account_id is intentionally omitted — only UUID fields are populated client-side. 776 auth?: PublicApiAuth 777 // core fields correspond to the top level of ClaudeCodeInternalEvent. 778 // They get directly exported to their individual columns in the BigQuery tables 779 core: FirstPartyEventLoggingCoreMetadata 780 // additional fields are populated in the additional_metadata field of the 781 // ClaudeCodeInternalEvent proto. Includes but is not limited to information 782 // that differs by event type. 783 additional: Record<string, unknown> 784} 785 786/** 787 * Convert metadata to 1P event logging format (snake_case fields). 788 * 789 * The /api/event_logging/batch endpoint expects snake_case field names 790 * for environment and core metadata. 791 * 792 * @param metadata - Core event metadata 793 * @param additionalMetadata - Additional metadata to include 794 * @returns Metadata formatted for 1P event logging 795 */ 796export function to1PEventFormat( 797 metadata: EventMetadata, 798 userMetadata: CoreUserData, 799 additionalMetadata: Record<string, unknown> = {}, 800): FirstPartyEventLoggingMetadata { 801 const { 802 envContext, 803 processMetrics, 804 rh, 805 kairosActive, 806 skillMode, 807 observerMode, 808 ...coreFields 809 } = metadata 810 811 // Convert envContext to snake_case. 812 // IMPORTANT: env is typed as the proto-generated EnvironmentMetadata so that 813 // adding a field here that the proto doesn't define is a compile error. The 814 // generated toJSON() serializer silently drops unknown keys — a hand-written 815 // parallel type previously let #11318, #13924, #19448, and coworker_type all 816 // ship fields that never reached BQ. 817 // Adding a field? Update the monorepo proto first (go/cc-logging): 818 // event_schemas/.../claude_code/v1/claude_code_internal_event.proto 819 // then run `bun run generate:proto` here. 820 const env: EnvironmentMetadata = { 821 platform: envContext.platform, 822 platform_raw: envContext.platformRaw, 823 arch: envContext.arch, 824 node_version: envContext.nodeVersion, 825 terminal: envContext.terminal || 'unknown', 826 package_managers: envContext.packageManagers, 827 runtimes: envContext.runtimes, 828 is_running_with_bun: envContext.isRunningWithBun, 829 is_ci: envContext.isCi, 830 is_claubbit: envContext.isClaubbit, 831 is_claude_code_remote: envContext.isClaudeCodeRemote, 832 is_local_agent_mode: envContext.isLocalAgentMode, 833 is_conductor: envContext.isConductor, 834 is_github_action: envContext.isGithubAction, 835 is_claude_code_action: envContext.isClaudeCodeAction, 836 is_claude_ai_auth: envContext.isClaudeAiAuth, 837 version: envContext.version, 838 build_time: envContext.buildTime, 839 deployment_environment: envContext.deploymentEnvironment, 840 } 841 842 // Add optional env fields 843 if (envContext.remoteEnvironmentType) { 844 env.remote_environment_type = envContext.remoteEnvironmentType 845 } 846 if (feature('COWORKER_TYPE_TELEMETRY') && envContext.coworkerType) { 847 env.coworker_type = envContext.coworkerType 848 } 849 if (envContext.claudeCodeContainerId) { 850 env.claude_code_container_id = envContext.claudeCodeContainerId 851 } 852 if (envContext.claudeCodeRemoteSessionId) { 853 env.claude_code_remote_session_id = envContext.claudeCodeRemoteSessionId 854 } 855 if (envContext.tags) { 856 env.tags = envContext.tags 857 .split(',') 858 .map(t => t.trim()) 859 .filter(Boolean) 860 } 861 if (envContext.githubEventName) { 862 env.github_event_name = envContext.githubEventName 863 } 864 if (envContext.githubActionsRunnerEnvironment) { 865 env.github_actions_runner_environment = 866 envContext.githubActionsRunnerEnvironment 867 } 868 if (envContext.githubActionsRunnerOs) { 869 env.github_actions_runner_os = envContext.githubActionsRunnerOs 870 } 871 if (envContext.githubActionRef) { 872 env.github_action_ref = envContext.githubActionRef 873 } 874 if (envContext.wslVersion) { 875 env.wsl_version = envContext.wslVersion 876 } 877 if (envContext.linuxDistroId) { 878 env.linux_distro_id = envContext.linuxDistroId 879 } 880 if (envContext.linuxDistroVersion) { 881 env.linux_distro_version = envContext.linuxDistroVersion 882 } 883 if (envContext.linuxKernel) { 884 env.linux_kernel = envContext.linuxKernel 885 } 886 if (envContext.vcs) { 887 env.vcs = envContext.vcs 888 } 889 if (envContext.versionBase) { 890 env.version_base = envContext.versionBase 891 } 892 893 // Convert core fields to snake_case 894 const core: FirstPartyEventLoggingCoreMetadata = { 895 session_id: coreFields.sessionId, 896 model: coreFields.model, 897 user_type: coreFields.userType, 898 is_interactive: coreFields.isInteractive === 'true', 899 client_type: coreFields.clientType, 900 } 901 902 // Add other core fields 903 if (coreFields.betas) { 904 core.betas = coreFields.betas 905 } 906 if (coreFields.entrypoint) { 907 core.entrypoint = coreFields.entrypoint 908 } 909 if (coreFields.agentSdkVersion) { 910 core.agent_sdk_version = coreFields.agentSdkVersion 911 } 912 if (coreFields.sweBenchRunId) { 913 core.swe_bench_run_id = coreFields.sweBenchRunId 914 } 915 if (coreFields.sweBenchInstanceId) { 916 core.swe_bench_instance_id = coreFields.sweBenchInstanceId 917 } 918 if (coreFields.sweBenchTaskId) { 919 core.swe_bench_task_id = coreFields.sweBenchTaskId 920 } 921 // Swarm/team agent identification 922 if (coreFields.agentId) { 923 core.agent_id = coreFields.agentId 924 } 925 if (coreFields.parentSessionId) { 926 core.parent_session_id = coreFields.parentSessionId 927 } 928 if (coreFields.agentType) { 929 core.agent_type = coreFields.agentType 930 } 931 if (coreFields.teamName) { 932 core.team_name = coreFields.teamName 933 } 934 935 // Map userMetadata to output fields. 936 // Based on src/utils/user.ts getUser(), but with fields present in other 937 // parts of ClaudeCodeInternalEvent deduplicated. 938 // Convert camelCase GitHubActionsMetadata to snake_case for 1P API 939 // Note: github_actions_metadata is placed inside env (EnvironmentMetadata) 940 // rather than at the top level of ClaudeCodeInternalEvent 941 if (userMetadata.githubActionsMetadata) { 942 const ghMeta = userMetadata.githubActionsMetadata 943 env.github_actions_metadata = { 944 actor_id: ghMeta.actorId, 945 repository_id: ghMeta.repositoryId, 946 repository_owner_id: ghMeta.repositoryOwnerId, 947 } 948 } 949 950 let auth: PublicApiAuth | undefined 951 if (userMetadata.accountUuid || userMetadata.organizationUuid) { 952 auth = { 953 account_uuid: userMetadata.accountUuid, 954 organization_uuid: userMetadata.organizationUuid, 955 } 956 } 957 958 return { 959 env, 960 ...(processMetrics && { 961 process: Buffer.from(jsonStringify(processMetrics)).toString('base64'), 962 }), 963 ...(auth && { auth }), 964 core, 965 additional: { 966 ...(rh && { rh }), 967 ...(kairosActive && { is_assistant_mode: true }), 968 ...(skillMode && { skill_mode: skillMode }), 969 ...(observerMode && { observer_mode: observerMode }), 970 ...additionalMetadata, 971 }, 972 } 973}