source dump of claude code
at main 307 lines 9.1 kB view raw
1import axios from 'axios' 2import { createHash } from 'crypto' 3import memoize from 'lodash-es/memoize.js' 4import { getOrCreateUserID } from '../../utils/config.js' 5import { logError } from '../../utils/log.js' 6import { getCanonicalName } from '../../utils/model/model.js' 7import { getAPIProvider } from '../../utils/model/providers.js' 8import { MODEL_COSTS } from '../../utils/modelCost.js' 9import { isAnalyticsDisabled } from './config.js' 10import { getEventMetadata } from './metadata.js' 11 12const DATADOG_LOGS_ENDPOINT = 13 'https://http-intake.logs.us5.datadoghq.com/api/v2/logs' 14const DATADOG_CLIENT_TOKEN = 'pubbbf48e6d78dae54bceaa4acf463299bf' 15const DEFAULT_FLUSH_INTERVAL_MS = 15000 16const MAX_BATCH_SIZE = 100 17const NETWORK_TIMEOUT_MS = 5000 18 19const DATADOG_ALLOWED_EVENTS = new Set([ 20 'chrome_bridge_connection_succeeded', 21 'chrome_bridge_connection_failed', 22 'chrome_bridge_disconnected', 23 'chrome_bridge_tool_call_completed', 24 'chrome_bridge_tool_call_error', 25 'chrome_bridge_tool_call_started', 26 'chrome_bridge_tool_call_timeout', 27 'tengu_api_error', 28 'tengu_api_success', 29 'tengu_brief_mode_enabled', 30 'tengu_brief_mode_toggled', 31 'tengu_brief_send', 32 'tengu_cancel', 33 'tengu_compact_failed', 34 'tengu_exit', 35 'tengu_flicker', 36 'tengu_init', 37 'tengu_model_fallback_triggered', 38 'tengu_oauth_error', 39 'tengu_oauth_success', 40 'tengu_oauth_token_refresh_failure', 41 'tengu_oauth_token_refresh_success', 42 'tengu_oauth_token_refresh_lock_acquiring', 43 'tengu_oauth_token_refresh_lock_acquired', 44 'tengu_oauth_token_refresh_starting', 45 'tengu_oauth_token_refresh_completed', 46 'tengu_oauth_token_refresh_lock_releasing', 47 'tengu_oauth_token_refresh_lock_released', 48 'tengu_query_error', 49 'tengu_session_file_read', 50 'tengu_started', 51 'tengu_tool_use_error', 52 'tengu_tool_use_granted_in_prompt_permanent', 53 'tengu_tool_use_granted_in_prompt_temporary', 54 'tengu_tool_use_rejected_in_prompt', 55 'tengu_tool_use_success', 56 'tengu_uncaught_exception', 57 'tengu_unhandled_rejection', 58 'tengu_voice_recording_started', 59 'tengu_voice_toggled', 60 'tengu_team_mem_sync_pull', 61 'tengu_team_mem_sync_push', 62 'tengu_team_mem_sync_started', 63 'tengu_team_mem_entries_capped', 64]) 65 66const TAG_FIELDS = [ 67 'arch', 68 'clientType', 69 'errorType', 70 'http_status_range', 71 'http_status', 72 'kairosActive', 73 'model', 74 'platform', 75 'provider', 76 'skillMode', 77 'subscriptionType', 78 'toolName', 79 'userBucket', 80 'userType', 81 'version', 82 'versionBase', 83] 84 85function camelToSnakeCase(str: string): string { 86 return str.replace(/[A-Z]/g, letter => `_${letter.toLowerCase()}`) 87} 88 89type DatadogLog = { 90 ddsource: string 91 ddtags: string 92 message: string 93 service: string 94 hostname: string 95 [key: string]: unknown 96} 97 98let logBatch: DatadogLog[] = [] 99let flushTimer: NodeJS.Timeout | null = null 100let datadogInitialized: boolean | null = null 101 102async function flushLogs(): Promise<void> { 103 if (logBatch.length === 0) return 104 105 const logsToSend = logBatch 106 logBatch = [] 107 108 try { 109 await axios.post(DATADOG_LOGS_ENDPOINT, logsToSend, { 110 headers: { 111 'Content-Type': 'application/json', 112 'DD-API-KEY': DATADOG_CLIENT_TOKEN, 113 }, 114 timeout: NETWORK_TIMEOUT_MS, 115 }) 116 } catch (error) { 117 logError(error) 118 } 119} 120 121function scheduleFlush(): void { 122 if (flushTimer) return 123 124 flushTimer = setTimeout(() => { 125 flushTimer = null 126 void flushLogs() 127 }, getFlushIntervalMs()).unref() 128} 129 130export const initializeDatadog = memoize(async (): Promise<boolean> => { 131 if (isAnalyticsDisabled()) { 132 datadogInitialized = false 133 return false 134 } 135 136 try { 137 datadogInitialized = true 138 return true 139 } catch (error) { 140 logError(error) 141 datadogInitialized = false 142 return false 143 } 144}) 145 146/** 147 * Flush remaining Datadog logs and shut down. 148 * Called from gracefulShutdown() before process.exit() since 149 * forceExit() prevents the beforeExit handler from firing. 150 */ 151export async function shutdownDatadog(): Promise<void> { 152 if (flushTimer) { 153 clearTimeout(flushTimer) 154 flushTimer = null 155 } 156 await flushLogs() 157} 158 159// NOTE: use via src/services/analytics/index.ts > logEvent 160export async function trackDatadogEvent( 161 eventName: string, 162 properties: { [key: string]: boolean | number | undefined }, 163): Promise<void> { 164 if (process.env.NODE_ENV !== 'production') { 165 return 166 } 167 168 // Don't send events for 3P providers (Bedrock, Vertex, Foundry) 169 if (getAPIProvider() !== 'firstParty') { 170 return 171 } 172 173 // Fast path: use cached result if available to avoid await overhead 174 let initialized = datadogInitialized 175 if (initialized === null) { 176 initialized = await initializeDatadog() 177 } 178 if (!initialized || !DATADOG_ALLOWED_EVENTS.has(eventName)) { 179 return 180 } 181 182 try { 183 const metadata = await getEventMetadata({ 184 model: properties.model, 185 betas: properties.betas, 186 }) 187 // Destructure to avoid duplicate envContext (once nested, once flattened) 188 const { envContext, ...restMetadata } = metadata 189 const allData: Record<string, unknown> = { 190 ...restMetadata, 191 ...envContext, 192 ...properties, 193 userBucket: getUserBucket(), 194 } 195 196 // Normalize MCP tool names to "mcp" for cardinality reduction 197 if ( 198 typeof allData.toolName === 'string' && 199 allData.toolName.startsWith('mcp__') 200 ) { 201 allData.toolName = 'mcp' 202 } 203 204 // Normalize model names for cardinality reduction (external users only) 205 if (process.env.USER_TYPE !== 'ant' && typeof allData.model === 'string') { 206 const shortName = getCanonicalName(allData.model.replace(/\[1m]$/i, '')) 207 allData.model = shortName in MODEL_COSTS ? shortName : 'other' 208 } 209 210 // Truncate dev version to base + date (remove timestamp and sha for cardinality reduction) 211 // e.g. "2.0.53-dev.20251124.t173302.sha526cc6a" -> "2.0.53-dev.20251124" 212 if (typeof allData.version === 'string') { 213 allData.version = allData.version.replace( 214 /^(\d+\.\d+\.\d+-dev\.\d{8})\.t\d+\.sha[a-f0-9]+$/, 215 '$1', 216 ) 217 } 218 219 // Transform status to http_status and http_status_range to avoid Datadog reserved field 220 if (allData.status !== undefined && allData.status !== null) { 221 const statusCode = String(allData.status) 222 allData.http_status = statusCode 223 224 // Determine status range (1xx, 2xx, 3xx, 4xx, 5xx) 225 const firstDigit = statusCode.charAt(0) 226 if (firstDigit >= '1' && firstDigit <= '5') { 227 allData.http_status_range = `${firstDigit}xx` 228 } 229 230 // Remove original status field to avoid conflict with Datadog's reserved field 231 delete allData.status 232 } 233 234 // Build ddtags with high-cardinality fields for filtering. 235 // event:<name> is prepended so the event name is searchable via the 236 // log search API — the `message` field (where eventName also lives) 237 // is a DD reserved field and is NOT queryable from dashboard widget 238 // queries or the aggregation API. See scripts/release/MONITORING.md. 239 const allDataRecord = allData 240 const tags = [ 241 `event:${eventName}`, 242 ...TAG_FIELDS.filter( 243 field => 244 allDataRecord[field] !== undefined && allDataRecord[field] !== null, 245 ).map(field => `${camelToSnakeCase(field)}:${allDataRecord[field]}`), 246 ] 247 248 const log: DatadogLog = { 249 ddsource: 'nodejs', 250 ddtags: tags.join(','), 251 message: eventName, 252 service: 'claude-code', 253 hostname: 'claude-code', 254 env: process.env.USER_TYPE, 255 } 256 257 // Add all fields as searchable attributes (not duplicated in tags) 258 for (const [key, value] of Object.entries(allData)) { 259 if (value !== undefined && value !== null) { 260 log[camelToSnakeCase(key)] = value 261 } 262 } 263 264 logBatch.push(log) 265 266 // Flush immediately if batch is full, otherwise schedule 267 if (logBatch.length >= MAX_BATCH_SIZE) { 268 if (flushTimer) { 269 clearTimeout(flushTimer) 270 flushTimer = null 271 } 272 void flushLogs() 273 } else { 274 scheduleFlush() 275 } 276 } catch (error) { 277 logError(error) 278 } 279} 280 281const NUM_USER_BUCKETS = 30 282 283/** 284 * Gets a 'bucket' that the user ID falls into. 285 * 286 * For alerting purposes, we want to alert on the number of users impacted 287 * by an issue, rather than the number of events- often a small number of users 288 * can generate a large number of events (e.g. due to retries). To approximate 289 * this without ruining cardinality by counting user IDs directly, we hash the user ID 290 * and assign it to one of a fixed number of buckets. 291 * 292 * This allows us to estimate the number of unique users by counting unique buckets, 293 * while preserving user privacy and reducing cardinality. 294 */ 295const getUserBucket = memoize((): number => { 296 const userId = getOrCreateUserID() 297 const hash = createHash('sha256').update(userId).digest('hex') 298 return parseInt(hash.slice(0, 8), 16) % NUM_USER_BUCKETS 299}) 300 301function getFlushIntervalMs(): number { 302 // Allow tests to override to not block on the default flush interval. 303 return ( 304 parseInt(process.env.CLAUDE_CODE_DATADOG_FLUSH_INTERVAL_MS || '', 10) || 305 DEFAULT_FLUSH_INTERVAL_MS 306 ) 307}