services/analytics/metadata.ts at main

oppi.li / claude-code
fork atom
source dump of claude code
fork atom
claude-code / services / analytics / metadata.ts
at main 973 lines 33 kB view raw
wrap content
oppi.li dump from zip 12d ago
63aada3f
  1// biome-ignore-all assist/source/organizeImports: ANT-ONLY import markers must not be reordered
  2/**
  3 * Shared event metadata enrichment for analytics systems
  4 *
  5 * This module provides a single source of truth for collecting and formatting
  6 * event metadata across all analytics systems (Datadog, 1P).
  7 */
  8
  9import { extname } from 'path'
 10import memoize from 'lodash-es/memoize.js'
 11import { env, getHostPlatformForAnalytics } from '../../utils/env.js'
 12import { envDynamic } from '../../utils/envDynamic.js'
 13import { getModelBetas } from '../../utils/betas.js'
 14import { getMainLoopModel } from '../../utils/model/model.js'
 15import {
 16  getSessionId,
 17  getIsInteractive,
 18  getKairosActive,
 19  getClientType,
 20  getParentSessionId as getParentSessionIdFromState,
 21} from '../../bootstrap/state.js'
 22import { isEnvTruthy } from '../../utils/envUtils.js'
 23import { isOfficialMcpUrl } from '../mcp/officialRegistry.js'
 24import { isClaudeAISubscriber, getSubscriptionType } from '../../utils/auth.js'
 25import { getRepoRemoteHash } from '../../utils/git.js'
 26import {
 27  getWslVersion,
 28  getLinuxDistroInfo,
 29  detectVcs,
 30} from '../../utils/platform.js'
 31import type { CoreUserData } from 'src/utils/user.js'
 32import { getAgentContext } from '../../utils/agentContext.js'
 33import type { EnvironmentMetadata } from '../../types/generated/events_mono/claude_code/v1/claude_code_internal_event.js'
 34import type { PublicApiAuth } from '../../types/generated/events_mono/common/v1/auth.js'
 35import { jsonStringify } from '../../utils/slowOperations.js'
 36import {
 37  getAgentId,
 38  getParentSessionId as getTeammateParentSessionId,
 39  getTeamName,
 40  isTeammate,
 41} from '../../utils/teammate.js'
 42import { feature } from 'bun:bundle'
 43
 44/**
 45 * Marker type for verifying analytics metadata doesn't contain sensitive data
 46 *
 47 * This type forces explicit verification that string values being logged
 48 * don't contain code snippets, file paths, or other sensitive information.
 49 *
 50 * The metadata is expected to be JSON-serializable.
 51 *
 52 * Usage: `myString as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS`
 53 *
 54 * The type is `never` which means it can never actually hold a value - this is
 55 * intentional as it's only used for type-casting to document developer intent.
 56 */
 57export type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS = never
 58
 59/**
 60 * Sanitizes tool names for analytics logging to avoid PII exposure.
 61 *
 62 * MCP tool names follow the format `mcp__<server>__<tool>` and can reveal
 63 * user-specific server configurations, which is considered PII-medium.
 64 * This function redacts MCP tool names while preserving built-in tool names
 65 * (Bash, Read, Write, etc.) which are safe to log.
 66 *
 67 * @param toolName - The tool name to sanitize
 68 * @returns The original name for built-in tools, or 'mcp_tool' for MCP tools
 69 */
 70export function sanitizeToolNameForAnalytics(
 71  toolName: string,
 72): AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS {
 73  if (toolName.startsWith('mcp__')) {
 74    return 'mcp_tool' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS
 75  }
 76  return toolName as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS
 77}
 78
 79/**
 80 * Check if detailed tool name logging is enabled for OTLP events.
 81 * When enabled, MCP server/tool names and Skill names are logged.
 82 * Disabled by default to protect PII (user-specific server configurations).
 83 *
 84 * Enable with OTEL_LOG_TOOL_DETAILS=1
 85 */
 86export function isToolDetailsLoggingEnabled(): boolean {
 87  return isEnvTruthy(process.env.OTEL_LOG_TOOL_DETAILS)
 88}
 89
 90/**
 91 * Check if detailed tool name logging (MCP server/tool names) is enabled
 92 * for analytics events.
 93 *
 94 * Per go/taxonomy, MCP names are medium PII. We log them for:
 95 * - Cowork (entrypoint=local-agent) — no ZDR concept, log all MCPs
 96 * - claude.ai-proxied connectors — always official (from claude.ai's list)
 97 * - Servers whose URL matches the official MCP registry — directory
 98 *   connectors added via `claude mcp add`, not customer-specific config
 99 *
100 * Custom/user-configured MCPs stay sanitized (toolName='mcp_tool').
101 */
102export function isAnalyticsToolDetailsLoggingEnabled(
103  mcpServerType: string | undefined,
104  mcpServerBaseUrl: string | undefined,
105): boolean {
106  if (process.env.CLAUDE_CODE_ENTRYPOINT === 'local-agent') {
107    return true
108  }
109  if (mcpServerType === 'claudeai-proxy') {
110    return true
111  }
112  if (mcpServerBaseUrl && isOfficialMcpUrl(mcpServerBaseUrl)) {
113    return true
114  }
115  return false
116}
117
118/**
119 * Built-in first-party MCP servers whose names are fixed reserved strings,
120 * not user-configured — so logging them is not PII. Checked in addition to
121 * isAnalyticsToolDetailsLoggingEnabled's transport/URL gates, which a stdio
122 * built-in would otherwise fail.
123 *
124 * Feature-gated so the set is empty when the feature is off: the name
125 * reservation (main.tsx, config.ts addMcpServer) is itself feature-gated, so
126 * a user-configured 'computer-use' is possible in builds without the feature.
127 */
128/* eslint-disable @typescript-eslint/no-require-imports */
129const BUILTIN_MCP_SERVER_NAMES: ReadonlySet<string> = new Set(
130  feature('CHICAGO_MCP')
131    ? [
132        (
133          require('../../utils/computerUse/common.js') as typeof import('../../utils/computerUse/common.js')
134        ).COMPUTER_USE_MCP_SERVER_NAME,
135      ]
136    : [],
137)
138/* eslint-enable @typescript-eslint/no-require-imports */
139
140/**
141 * Spreadable helper for logEvent payloads — returns {mcpServerName, mcpToolName}
142 * if the gate passes, empty object otherwise. Consolidates the identical IIFE
143 * pattern at each tengu_tool_use_* call site.
144 */
145export function mcpToolDetailsForAnalytics(
146  toolName: string,
147  mcpServerType: string | undefined,
148  mcpServerBaseUrl: string | undefined,
149): {
150  mcpServerName?: AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS
151  mcpToolName?: AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS
152} {
153  const details = extractMcpToolDetails(toolName)
154  if (!details) {
155    return {}
156  }
157  if (
158    !BUILTIN_MCP_SERVER_NAMES.has(details.serverName) &&
159    !isAnalyticsToolDetailsLoggingEnabled(mcpServerType, mcpServerBaseUrl)
160  ) {
161    return {}
162  }
163  return {
164    mcpServerName: details.serverName,
165    mcpToolName: details.mcpToolName,
166  }
167}
168
169/**
170 * Extract MCP server and tool names from a full MCP tool name.
171 * MCP tool names follow the format: mcp__<server>__<tool>
172 *
173 * @param toolName - The full tool name (e.g., 'mcp__slack__read_channel')
174 * @returns Object with serverName and toolName, or undefined if not an MCP tool
175 */
176export function extractMcpToolDetails(toolName: string):
177  | {
178      serverName: AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS
179      mcpToolName: AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS
180    }
181  | undefined {
182  if (!toolName.startsWith('mcp__')) {
183    return undefined
184  }
185
186  // Format: mcp__<server>__<tool>
187  const parts = toolName.split('__')
188  if (parts.length < 3) {
189    return undefined
190  }
191
192  const serverName = parts[1]
193  // Tool name may contain __ so rejoin remaining parts
194  const mcpToolName = parts.slice(2).join('__')
195
196  if (!serverName || !mcpToolName) {
197    return undefined
198  }
199
200  return {
201    serverName:
202      serverName as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
203    mcpToolName:
204      mcpToolName as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
205  }
206}
207
208/**
209 * Extract skill name from Skill tool input.
210 *
211 * @param toolName - The tool name (should be 'Skill')
212 * @param input - The tool input containing the skill name
213 * @returns The skill name if this is a Skill tool call, undefined otherwise
214 */
215export function extractSkillName(
216  toolName: string,
217  input: unknown,
218): AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS | undefined {
219  if (toolName !== 'Skill') {
220    return undefined
221  }
222
223  if (
224    typeof input === 'object' &&
225    input !== null &&
226    'skill' in input &&
227    typeof (input as { skill: unknown }).skill === 'string'
228  ) {
229    return (input as { skill: string })
230      .skill as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS
231  }
232
233  return undefined
234}
235
236const TOOL_INPUT_STRING_TRUNCATE_AT = 512
237const TOOL_INPUT_STRING_TRUNCATE_TO = 128
238const TOOL_INPUT_MAX_JSON_CHARS = 4 * 1024
239const TOOL_INPUT_MAX_COLLECTION_ITEMS = 20
240const TOOL_INPUT_MAX_DEPTH = 2
241
242function truncateToolInputValue(value: unknown, depth = 0): unknown {
243  if (typeof value === 'string') {
244    if (value.length > TOOL_INPUT_STRING_TRUNCATE_AT) {
245      return `${value.slice(0, TOOL_INPUT_STRING_TRUNCATE_TO)}…[${value.length} chars]`
246    }
247    return value
248  }
249  if (
250    typeof value === 'number' ||
251    typeof value === 'boolean' ||
252    value === null ||
253    value === undefined
254  ) {
255    return value
256  }
257  if (depth >= TOOL_INPUT_MAX_DEPTH) {
258    return '<nested>'
259  }
260  if (Array.isArray(value)) {
261    const mapped = value
262      .slice(0, TOOL_INPUT_MAX_COLLECTION_ITEMS)
263      .map(v => truncateToolInputValue(v, depth + 1))
264    if (value.length > TOOL_INPUT_MAX_COLLECTION_ITEMS) {
265      mapped.push(`…[${value.length} items]`)
266    }
267    return mapped
268  }
269  if (typeof value === 'object') {
270    const entries = Object.entries(value as Record<string, unknown>)
271      // Skip internal marker keys (e.g. _simulatedSedEdit re-introduced by
272      // SedEditPermissionRequest) so they don't leak into telemetry.
273      .filter(([k]) => !k.startsWith('_'))
274    const mapped = entries
275      .slice(0, TOOL_INPUT_MAX_COLLECTION_ITEMS)
276      .map(([k, v]) => [k, truncateToolInputValue(v, depth + 1)])
277    if (entries.length > TOOL_INPUT_MAX_COLLECTION_ITEMS) {
278      mapped.push(['…', `${entries.length} keys`])
279    }
280    return Object.fromEntries(mapped)
281  }
282  return String(value)
283}
284
285/**
286 * Serialize a tool's input arguments for the OTel tool_result event.
287 * Truncates long strings and deep nesting to keep the output bounded while
288 * preserving forensically useful fields like file paths, URLs, and MCP args.
289 * Returns undefined when OTEL_LOG_TOOL_DETAILS is not enabled.
290 */
291export function extractToolInputForTelemetry(
292  input: unknown,
293): string | undefined {
294  if (!isToolDetailsLoggingEnabled()) {
295    return undefined
296  }
297  const truncated = truncateToolInputValue(input)
298  let json = jsonStringify(truncated)
299  if (json.length > TOOL_INPUT_MAX_JSON_CHARS) {
300    json = json.slice(0, TOOL_INPUT_MAX_JSON_CHARS) + '…[truncated]'
301  }
302  return json
303}
304
305/**
306 * Maximum length for file extensions to be logged.
307 * Extensions longer than this are considered potentially sensitive
308 * (e.g., hash-based filenames like "key-hash-abcd-123-456") and
309 * will be replaced with 'other'.
310 */
311const MAX_FILE_EXTENSION_LENGTH = 10
312
313/**
314 * Extracts and sanitizes a file extension for analytics logging.
315 *
316 * Uses Node's path.extname for reliable cross-platform extension extraction.
317 * Returns 'other' for extensions exceeding MAX_FILE_EXTENSION_LENGTH to avoid
318 * logging potentially sensitive data (like hash-based filenames).
319 *
320 * @param filePath - The file path to extract the extension from
321 * @returns The sanitized extension, 'other' for long extensions, or undefined if no extension
322 */
323export function getFileExtensionForAnalytics(
324  filePath: string,
325): AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS | undefined {
326  const ext = extname(filePath).toLowerCase()
327  if (!ext || ext === '.') {
328    return undefined
329  }
330
331  const extension = ext.slice(1) // remove leading dot
332  if (extension.length > MAX_FILE_EXTENSION_LENGTH) {
333    return 'other' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS
334  }
335
336  return extension as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS
337}
338
339/** Allow list of commands we extract file extensions from. */
340const FILE_COMMANDS = new Set([
341  'rm',
342  'mv',
343  'cp',
344  'touch',
345  'mkdir',
346  'chmod',
347  'chown',
348  'cat',
349  'head',
350  'tail',
351  'sort',
352  'stat',
353  'diff',
354  'wc',
355  'grep',
356  'rg',
357  'sed',
358])
359
360/** Regex to split bash commands on compound operators (&&, ||, ;, |). */
361const COMPOUND_OPERATOR_REGEX = /\s*(?:&&|\|\||[;|])\s*/
362
363/** Regex to split on whitespace. */
364const WHITESPACE_REGEX = /\s+/
365
366/**
367 * Extracts file extensions from a bash command for analytics.
368 * Best-effort: splits on operators and whitespace, extracts extensions
369 * from non-flag args of allowed commands. No heavy shell parsing needed
370 * because grep patterns and sed scripts rarely resemble file extensions.
371 */
372export function getFileExtensionsFromBashCommand(
373  command: string,
374  simulatedSedEditFilePath?: string,
375): AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS | undefined {
376  if (!command.includes('.') && !simulatedSedEditFilePath) return undefined
377
378  let result: string | undefined
379  const seen = new Set<string>()
380
381  if (simulatedSedEditFilePath) {
382    const ext = getFileExtensionForAnalytics(simulatedSedEditFilePath)
383    if (ext) {
384      seen.add(ext)
385      result = ext
386    }
387  }
388
389  for (const subcmd of command.split(COMPOUND_OPERATOR_REGEX)) {
390    if (!subcmd) continue
391    const tokens = subcmd.split(WHITESPACE_REGEX)
392    if (tokens.length < 2) continue
393
394    const firstToken = tokens[0]!
395    const slashIdx = firstToken.lastIndexOf('/')
396    const baseCmd = slashIdx >= 0 ? firstToken.slice(slashIdx + 1) : firstToken
397    if (!FILE_COMMANDS.has(baseCmd)) continue
398
399    for (let i = 1; i < tokens.length; i++) {
400      const arg = tokens[i]!
401      if (arg.charCodeAt(0) === 45 /* - */) continue
402      const ext = getFileExtensionForAnalytics(arg)
403      if (ext && !seen.has(ext)) {
404        seen.add(ext)
405        result = result ? result + ',' + ext : ext
406      }
407    }
408  }
409
410  if (!result) return undefined
411  return result as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS
412}
413
414/**
415 * Environment context metadata
416 */
417export type EnvContext = {
418  platform: string
419  platformRaw: string
420  arch: string
421  nodeVersion: string
422  terminal: string | null
423  packageManagers: string
424  runtimes: string
425  isRunningWithBun: boolean
426  isCi: boolean
427  isClaubbit: boolean
428  isClaudeCodeRemote: boolean
429  isLocalAgentMode: boolean
430  isConductor: boolean
431  remoteEnvironmentType?: string
432  coworkerType?: string
433  claudeCodeContainerId?: string
434  claudeCodeRemoteSessionId?: string
435  tags?: string
436  isGithubAction: boolean
437  isClaudeCodeAction: boolean
438  isClaudeAiAuth: boolean
439  version: string
440  versionBase?: string
441  buildTime: string
442  deploymentEnvironment: string
443  githubEventName?: string
444  githubActionsRunnerEnvironment?: string
445  githubActionsRunnerOs?: string
446  githubActionRef?: string
447  wslVersion?: string
448  linuxDistroId?: string
449  linuxDistroVersion?: string
450  linuxKernel?: string
451  vcs?: string
452}
453
454/**
455 * Process metrics included with all analytics events.
456 */
457export type ProcessMetrics = {
458  uptime: number
459  rss: number
460  heapTotal: number
461  heapUsed: number
462  external: number
463  arrayBuffers: number
464  constrainedMemory: number | undefined
465  cpuUsage: NodeJS.CpuUsage
466  cpuPercent: number | undefined
467}
468
469/**
470 * Core event metadata shared across all analytics systems
471 */
472export type EventMetadata = {
473  model: string
474  sessionId: string
475  userType: string
476  betas?: string
477  envContext: EnvContext
478  entrypoint?: string
479  agentSdkVersion?: string
480  isInteractive: string
481  clientType: string
482  processMetrics?: ProcessMetrics
483  sweBenchRunId: string
484  sweBenchInstanceId: string
485  sweBenchTaskId: string
486  // Swarm/team agent identification for analytics attribution
487  agentId?: string // CLAUDE_CODE_AGENT_ID (format: agentName@teamName) or subagent UUID
488  parentSessionId?: string // CLAUDE_CODE_PARENT_SESSION_ID (team lead's session)
489  agentType?: 'teammate' | 'subagent' | 'standalone' // Distinguishes swarm teammates, Agent tool subagents, and standalone agents
490  teamName?: string // Team name for swarm agents (from env var or AsyncLocalStorage)
491  subscriptionType?: string // OAuth subscription tier (max, pro, enterprise, team)
492  rh?: string // Hashed repo remote URL (first 16 chars of SHA256), for joining with server-side data
493  kairosActive?: true // KAIROS assistant mode active (ant-only; set in main.tsx after gate check)
494  skillMode?: 'discovery' | 'coach' | 'discovery_and_coach' // Which skill surfacing mechanism(s) are gated on (ant-only; for BQ session segmentation)
495  observerMode?: 'backseat' | 'skillcoach' | 'both' // Which observer classifiers are gated on (ant-only; for BQ cohort splits on tengu_backseat_* events)
496}
497
498/**
499 * Options for enriching event metadata
500 */
501export type EnrichMetadataOptions = {
502  // Model to use, falls back to getMainLoopModel() if not provided
503  model?: unknown
504  // Explicit betas string (already joined)
505  betas?: unknown
506  // Additional metadata to include (optional)
507  additionalMetadata?: Record<string, unknown>
508}
509
510/**
511 * Get agent identification for analytics.
512 * Priority: AsyncLocalStorage context (subagents) > env vars (swarm teammates)
513 */
514function getAgentIdentification(): {
515  agentId?: string
516  parentSessionId?: string
517  agentType?: 'teammate' | 'subagent' | 'standalone'
518  teamName?: string
519} {
520  // Check AsyncLocalStorage first (for subagents running in same process)
521  const agentContext = getAgentContext()
522  if (agentContext) {
523    const result: ReturnType<typeof getAgentIdentification> = {
524      agentId: agentContext.agentId,
525      parentSessionId: agentContext.parentSessionId,
526      agentType: agentContext.agentType,
527    }
528    if (agentContext.agentType === 'teammate') {
529      result.teamName = agentContext.teamName
530    }
531    return result
532  }
533
534  // Fall back to swarm helpers (for swarm agents)
535  const agentId = getAgentId()
536  const parentSessionId = getTeammateParentSessionId()
537  const teamName = getTeamName()
538  const isSwarmAgent = isTeammate()
539  // For standalone agents (have agent ID but not a teammate), set agentType to 'standalone'
540  const agentType = isSwarmAgent
541    ? ('teammate' as const)
542    : agentId
543      ? ('standalone' as const)
544      : undefined
545  if (agentId || agentType || parentSessionId || teamName) {
546    return {
547      ...(agentId ? { agentId } : {}),
548      ...(agentType ? { agentType } : {}),
549      ...(parentSessionId ? { parentSessionId } : {}),
550      ...(teamName ? { teamName } : {}),
551    }
552  }
553
554  // Check bootstrap state for parent session ID (e.g., plan mode -> implementation)
555  const stateParentSessionId = getParentSessionIdFromState()
556  if (stateParentSessionId) {
557    return { parentSessionId: stateParentSessionId }
558  }
559
560  return {}
561}
562
563/**
564 * Extract base version from full version string. "2.0.36-dev.20251107.t174150.sha2709699" → "2.0.36-dev"
565 */
566const getVersionBase = memoize((): string | undefined => {
567  const match = MACRO.VERSION.match(/^\d+\.\d+\.\d+(?:-[a-z]+)?/)
568  return match ? match[0] : undefined
569})
570
571/**
572 * Builds the environment context object
573 */
574const buildEnvContext = memoize(async (): Promise<EnvContext> => {
575  const [packageManagers, runtimes, linuxDistroInfo, vcs] = await Promise.all([
576    env.getPackageManagers(),
577    env.getRuntimes(),
578    getLinuxDistroInfo(),
579    detectVcs(),
580  ])
581
582  return {
583    platform: getHostPlatformForAnalytics(),
584    // Raw process.platform so freebsd/openbsd/aix/sunos are visible in BQ.
585    // getHostPlatformForAnalytics() buckets those into 'linux'; here we want
586    // the truth. CLAUDE_CODE_HOST_PLATFORM still overrides for container/remote.
587    platformRaw: process.env.CLAUDE_CODE_HOST_PLATFORM || process.platform,
588    arch: env.arch,
589    nodeVersion: env.nodeVersion,
590    terminal: envDynamic.terminal,
591    packageManagers: packageManagers.join(','),
592    runtimes: runtimes.join(','),
593    isRunningWithBun: env.isRunningWithBun(),
594    isCi: isEnvTruthy(process.env.CI),
595    isClaubbit: isEnvTruthy(process.env.CLAUBBIT),
596    isClaudeCodeRemote: isEnvTruthy(process.env.CLAUDE_CODE_REMOTE),
597    isLocalAgentMode: process.env.CLAUDE_CODE_ENTRYPOINT === 'local-agent',
598    isConductor: env.isConductor(),
599    ...(process.env.CLAUDE_CODE_REMOTE_ENVIRONMENT_TYPE && {
600      remoteEnvironmentType: process.env.CLAUDE_CODE_REMOTE_ENVIRONMENT_TYPE,
601    }),
602    // Gated by feature flag to prevent leaking "coworkerType" string in external builds
603    ...(feature('COWORKER_TYPE_TELEMETRY')
604      ? process.env.CLAUDE_CODE_COWORKER_TYPE
605        ? { coworkerType: process.env.CLAUDE_CODE_COWORKER_TYPE }
606        : {}
607      : {}),
608    ...(process.env.CLAUDE_CODE_CONTAINER_ID && {
609      claudeCodeContainerId: process.env.CLAUDE_CODE_CONTAINER_ID,
610    }),
611    ...(process.env.CLAUDE_CODE_REMOTE_SESSION_ID && {
612      claudeCodeRemoteSessionId: process.env.CLAUDE_CODE_REMOTE_SESSION_ID,
613    }),
614    ...(process.env.CLAUDE_CODE_TAGS && {
615      tags: process.env.CLAUDE_CODE_TAGS,
616    }),
617    isGithubAction: isEnvTruthy(process.env.GITHUB_ACTIONS),
618    isClaudeCodeAction: isEnvTruthy(process.env.CLAUDE_CODE_ACTION),
619    isClaudeAiAuth: isClaudeAISubscriber(),
620    version: MACRO.VERSION,
621    versionBase: getVersionBase(),
622    buildTime: MACRO.BUILD_TIME,
623    deploymentEnvironment: env.detectDeploymentEnvironment(),
624    ...(isEnvTruthy(process.env.GITHUB_ACTIONS) && {
625      githubEventName: process.env.GITHUB_EVENT_NAME,
626      githubActionsRunnerEnvironment: process.env.RUNNER_ENVIRONMENT,
627      githubActionsRunnerOs: process.env.RUNNER_OS,
628      githubActionRef: process.env.GITHUB_ACTION_PATH?.includes(
629        'claude-code-action/',
630      )
631        ? process.env.GITHUB_ACTION_PATH.split('claude-code-action/')[1]
632        : undefined,
633    }),
634    ...(getWslVersion() && { wslVersion: getWslVersion() }),
635    ...(linuxDistroInfo ?? {}),
636    ...(vcs.length > 0 ? { vcs: vcs.join(',') } : {}),
637  }
638})
639
640// --
641// CPU% delta tracking — inherently process-global, same pattern as logBatch/flushTimer in datadog.ts
642let prevCpuUsage: NodeJS.CpuUsage | null = null
643let prevWallTimeMs: number | null = null
644
645/**
646 * Builds process metrics object for all users.
647 */
648function buildProcessMetrics(): ProcessMetrics | undefined {
649  try {
650    const mem = process.memoryUsage()
651    const cpu = process.cpuUsage()
652    const now = Date.now()
653
654    let cpuPercent: number | undefined
655    if (prevCpuUsage && prevWallTimeMs) {
656      const wallDeltaMs = now - prevWallTimeMs
657      if (wallDeltaMs > 0) {
658        const userDeltaUs = cpu.user - prevCpuUsage.user
659        const systemDeltaUs = cpu.system - prevCpuUsage.system
660        cpuPercent =
661          ((userDeltaUs + systemDeltaUs) / (wallDeltaMs * 1000)) * 100
662      }
663    }
664    prevCpuUsage = cpu
665    prevWallTimeMs = now
666
667    return {
668      uptime: process.uptime(),
669      rss: mem.rss,
670      heapTotal: mem.heapTotal,
671      heapUsed: mem.heapUsed,
672      external: mem.external,
673      arrayBuffers: mem.arrayBuffers,
674      // eslint-disable-next-line eslint-plugin-n/no-unsupported-features/node-builtins
675      constrainedMemory: process.constrainedMemory(),
676      cpuUsage: cpu,
677      cpuPercent,
678    }
679  } catch {
680    return undefined
681  }
682}
683
684/**
685 * Get core event metadata shared across all analytics systems.
686 *
687 * This function collects environment, runtime, and context information
688 * that should be included with all analytics events.
689 *
690 * @param options - Configuration options
691 * @returns Promise resolving to enriched metadata object
692 */
693export async function getEventMetadata(
694  options: EnrichMetadataOptions = {},
695): Promise<EventMetadata> {
696  const model = options.model ? String(options.model) : getMainLoopModel()
697  const betas =
698    typeof options.betas === 'string'
699      ? options.betas
700      : getModelBetas(model).join(',')
701  const [envContext, repoRemoteHash] = await Promise.all([
702    buildEnvContext(),
703    getRepoRemoteHash(),
704  ])
705  const processMetrics = buildProcessMetrics()
706
707  const metadata: EventMetadata = {
708    model,
709    sessionId: getSessionId(),
710    userType: process.env.USER_TYPE || '',
711    ...(betas.length > 0 ? { betas: betas } : {}),
712    envContext,
713    ...(process.env.CLAUDE_CODE_ENTRYPOINT && {
714      entrypoint: process.env.CLAUDE_CODE_ENTRYPOINT,
715    }),
716    ...(process.env.CLAUDE_AGENT_SDK_VERSION && {
717      agentSdkVersion: process.env.CLAUDE_AGENT_SDK_VERSION,
718    }),
719    isInteractive: String(getIsInteractive()),
720    clientType: getClientType(),
721    ...(processMetrics && { processMetrics }),
722    sweBenchRunId: process.env.SWE_BENCH_RUN_ID || '',
723    sweBenchInstanceId: process.env.SWE_BENCH_INSTANCE_ID || '',
724    sweBenchTaskId: process.env.SWE_BENCH_TASK_ID || '',
725    // Swarm/team agent identification
726    // Priority: AsyncLocalStorage context (subagents) > env vars (swarm teammates)
727    ...getAgentIdentification(),
728    // Subscription tier for DAU-by-tier analytics
729    ...(getSubscriptionType() && {
730      subscriptionType: getSubscriptionType()!,
731    }),
732    // Assistant mode tag — lives outside memoized buildEnvContext() because
733    // setKairosActive() runs at main.tsx:~1648, after the first event may
734    // have already fired and memoized the env. Read fresh per-event instead.
735    ...(feature('KAIROS') && getKairosActive()
736      ? { kairosActive: true as const }
737      : {}),
738    // Repo remote hash for joining with server-side repo bundle data
739    ...(repoRemoteHash && { rh: repoRemoteHash }),
740  }
741
742  return metadata
743}
744
745
746/**
747 * Core event metadata for 1P event logging (snake_case format).
748 */
749export type FirstPartyEventLoggingCoreMetadata = {
750  session_id: string
751  model: string
752  user_type: string
753  betas?: string
754  entrypoint?: string
755  agent_sdk_version?: string
756  is_interactive: boolean
757  client_type: string
758  swe_bench_run_id?: string
759  swe_bench_instance_id?: string
760  swe_bench_task_id?: string
761  // Swarm/team agent identification
762  agent_id?: string
763  parent_session_id?: string
764  agent_type?: 'teammate' | 'subagent' | 'standalone'
765  team_name?: string
766}
767
768/**
769 * Complete event logging metadata format for 1P events.
770 */
771export type FirstPartyEventLoggingMetadata = {
772  env: EnvironmentMetadata
773  process?: string
774  // auth is a top-level field on ClaudeCodeInternalEvent (proto PublicApiAuth).
775  // account_id is intentionally omitted — only UUID fields are populated client-side.
776  auth?: PublicApiAuth
777  // core fields correspond to the top level of ClaudeCodeInternalEvent.
778  // They get directly exported to their individual columns in the BigQuery tables
779  core: FirstPartyEventLoggingCoreMetadata
780  // additional fields are populated in the additional_metadata field of the
781  // ClaudeCodeInternalEvent proto. Includes but is not limited to information
782  // that differs by event type.
783  additional: Record<string, unknown>
784}
785
786/**
787 * Convert metadata to 1P event logging format (snake_case fields).
788 *
789 * The /api/event_logging/batch endpoint expects snake_case field names
790 * for environment and core metadata.
791 *
792 * @param metadata - Core event metadata
793 * @param additionalMetadata - Additional metadata to include
794 * @returns Metadata formatted for 1P event logging
795 */
796export function to1PEventFormat(
797  metadata: EventMetadata,
798  userMetadata: CoreUserData,
799  additionalMetadata: Record<string, unknown> = {},
800): FirstPartyEventLoggingMetadata {
801  const {
802    envContext,
803    processMetrics,
804    rh,
805    kairosActive,
806    skillMode,
807    observerMode,
808    ...coreFields
809  } = metadata
810
811  // Convert envContext to snake_case.
812  // IMPORTANT: env is typed as the proto-generated EnvironmentMetadata so that
813  // adding a field here that the proto doesn't define is a compile error. The
814  // generated toJSON() serializer silently drops unknown keys — a hand-written
815  // parallel type previously let #11318, #13924, #19448, and coworker_type all
816  // ship fields that never reached BQ.
817  // Adding a field? Update the monorepo proto first (go/cc-logging):
818  //   event_schemas/.../claude_code/v1/claude_code_internal_event.proto
819  // then run `bun run generate:proto` here.
820  const env: EnvironmentMetadata = {
821    platform: envContext.platform,
822    platform_raw: envContext.platformRaw,
823    arch: envContext.arch,
824    node_version: envContext.nodeVersion,
825    terminal: envContext.terminal || 'unknown',
826    package_managers: envContext.packageManagers,
827    runtimes: envContext.runtimes,
828    is_running_with_bun: envContext.isRunningWithBun,
829    is_ci: envContext.isCi,
830    is_claubbit: envContext.isClaubbit,
831    is_claude_code_remote: envContext.isClaudeCodeRemote,
832    is_local_agent_mode: envContext.isLocalAgentMode,
833    is_conductor: envContext.isConductor,
834    is_github_action: envContext.isGithubAction,
835    is_claude_code_action: envContext.isClaudeCodeAction,
836    is_claude_ai_auth: envContext.isClaudeAiAuth,
837    version: envContext.version,
838    build_time: envContext.buildTime,
839    deployment_environment: envContext.deploymentEnvironment,
840  }
841
842  // Add optional env fields
843  if (envContext.remoteEnvironmentType) {
844    env.remote_environment_type = envContext.remoteEnvironmentType
845  }
846  if (feature('COWORKER_TYPE_TELEMETRY') && envContext.coworkerType) {
847    env.coworker_type = envContext.coworkerType
848  }
849  if (envContext.claudeCodeContainerId) {
850    env.claude_code_container_id = envContext.claudeCodeContainerId
851  }
852  if (envContext.claudeCodeRemoteSessionId) {
853    env.claude_code_remote_session_id = envContext.claudeCodeRemoteSessionId
854  }
855  if (envContext.tags) {
856    env.tags = envContext.tags
857      .split(',')
858      .map(t => t.trim())
859      .filter(Boolean)
860  }
861  if (envContext.githubEventName) {
862    env.github_event_name = envContext.githubEventName
863  }
864  if (envContext.githubActionsRunnerEnvironment) {
865    env.github_actions_runner_environment =
866      envContext.githubActionsRunnerEnvironment
867  }
868  if (envContext.githubActionsRunnerOs) {
869    env.github_actions_runner_os = envContext.githubActionsRunnerOs
870  }
871  if (envContext.githubActionRef) {
872    env.github_action_ref = envContext.githubActionRef
873  }
874  if (envContext.wslVersion) {
875    env.wsl_version = envContext.wslVersion
876  }
877  if (envContext.linuxDistroId) {
878    env.linux_distro_id = envContext.linuxDistroId
879  }
880  if (envContext.linuxDistroVersion) {
881    env.linux_distro_version = envContext.linuxDistroVersion
882  }
883  if (envContext.linuxKernel) {
884    env.linux_kernel = envContext.linuxKernel
885  }
886  if (envContext.vcs) {
887    env.vcs = envContext.vcs
888  }
889  if (envContext.versionBase) {
890    env.version_base = envContext.versionBase
891  }
892
893  // Convert core fields to snake_case
894  const core: FirstPartyEventLoggingCoreMetadata = {
895    session_id: coreFields.sessionId,
896    model: coreFields.model,
897    user_type: coreFields.userType,
898    is_interactive: coreFields.isInteractive === 'true',
899    client_type: coreFields.clientType,
900  }
901
902  // Add other core fields
903  if (coreFields.betas) {
904    core.betas = coreFields.betas
905  }
906  if (coreFields.entrypoint) {
907    core.entrypoint = coreFields.entrypoint
908  }
909  if (coreFields.agentSdkVersion) {
910    core.agent_sdk_version = coreFields.agentSdkVersion
911  }
912  if (coreFields.sweBenchRunId) {
913    core.swe_bench_run_id = coreFields.sweBenchRunId
914  }
915  if (coreFields.sweBenchInstanceId) {
916    core.swe_bench_instance_id = coreFields.sweBenchInstanceId
917  }
918  if (coreFields.sweBenchTaskId) {
919    core.swe_bench_task_id = coreFields.sweBenchTaskId
920  }
921  // Swarm/team agent identification
922  if (coreFields.agentId) {
923    core.agent_id = coreFields.agentId
924  }
925  if (coreFields.parentSessionId) {
926    core.parent_session_id = coreFields.parentSessionId
927  }
928  if (coreFields.agentType) {
929    core.agent_type = coreFields.agentType
930  }
931  if (coreFields.teamName) {
932    core.team_name = coreFields.teamName
933  }
934
935  // Map userMetadata to output fields.
936  // Based on src/utils/user.ts getUser(), but with fields present in other
937  // parts of ClaudeCodeInternalEvent deduplicated.
938  // Convert camelCase GitHubActionsMetadata to snake_case for 1P API
939  // Note: github_actions_metadata is placed inside env (EnvironmentMetadata)
940  // rather than at the top level of ClaudeCodeInternalEvent
941  if (userMetadata.githubActionsMetadata) {
942    const ghMeta = userMetadata.githubActionsMetadata
943    env.github_actions_metadata = {
944      actor_id: ghMeta.actorId,
945      repository_id: ghMeta.repositoryId,
946      repository_owner_id: ghMeta.repositoryOwnerId,
947    }
948  }
949
950  let auth: PublicApiAuth | undefined
951  if (userMetadata.accountUuid || userMetadata.organizationUuid) {
952    auth = {
953      account_uuid: userMetadata.accountUuid,
954      organization_uuid: userMetadata.organizationUuid,
955    }
956  }
957
958  return {
959    env,
960    ...(processMetrics && {
961      process: Buffer.from(jsonStringify(processMetrics)).toString('base64'),
962    }),
963    ...(auth && { auth }),
964    core,
965    additional: {
966      ...(rh && { rh }),
967      ...(kairosActive && { is_assistant_mode: true }),
968      ...(skillMode && { skill_mode: skillMode }),
969      ...(observerMode && { observer_mode: observerMode }),
970      ...additionalMetadata,
971    },
972  }
973}