utils/telemetry/betaSessionTracing.ts at main

oppi.li / claude-code
fork atom
source dump of claude code
fork atom
claude-code / utils / telemetry / betaSessionTracing.ts
at main 491 lines 16 kB view raw
wrap content
oppi.li dump from zip 4d ago
63aada3f
  1/**
  2 * Beta Session Tracing for Claude Code
  3 *
  4 * This module contains beta tracing features enabled when
  5 * ENABLE_BETA_TRACING_DETAILED=1 and BETA_TRACING_ENDPOINT are set.
  6 *
  7 * For external users, tracing is enabled in SDK/headless mode, or in
  8 * interactive mode when the org is allowlisted via the
  9 * tengu_trace_lantern GrowthBook gate.
 10 * For ant users, tracing is enabled in all modes.
 11 *
 12 * Visibility Rules:
 13 * | Content          | External | Ant  |
 14 * |------------------|----------|------|
 15 * | System prompts   | ✅                  | ✅   |
 16 * | Model output     | ✅                  | ✅   |
 17 * | Thinking output  | ❌                  | ✅   |
 18 * | Tools            | ✅                  | ✅   |
 19 * | new_context      | ✅                  | ✅   |
 20 *
 21 * Features:
 22 * - Per-agent message tracking with hash-based deduplication
 23 * - System prompt logging (once per unique hash)
 24 * - Hook execution spans
 25 * - Detailed new_context attributes for LLM requests
 26 */
 27
 28import type { Span } from '@opentelemetry/api'
 29import { createHash } from 'crypto'
 30import { getIsNonInteractiveSession } from '../../bootstrap/state.js'
 31import { getFeatureValue_CACHED_MAY_BE_STALE } from '../../services/analytics/growthbook.js'
 32import { sanitizeToolNameForAnalytics } from '../../services/analytics/metadata.js'
 33import type { AssistantMessage, UserMessage } from '../../types/message.js'
 34import { isEnvTruthy } from '../envUtils.js'
 35import { jsonParse, jsonStringify } from '../slowOperations.js'
 36import { logOTelEvent } from './events.js'
 37
 38// Message type for API calls (UserMessage or AssistantMessage)
 39type APIMessage = UserMessage | AssistantMessage
 40
 41/**
 42 * Track hashes we've already logged this session (system prompts, tools, etc).
 43 *
 44 * WHY: System prompts and tool schemas are large and rarely change within a session.
 45 * Sending full content on every request would be wasteful. Instead, we hash and
 46 * only log the full content once per unique hash.
 47 */
 48const seenHashes = new Set<string>()
 49
 50/**
 51 * Track the last reported message hash per querySource (agent) for incremental context.
 52 *
 53 * WHY: When debugging traces, we want to see what NEW information was added each turn,
 54 * not the entire conversation history (which can be huge). By tracking the last message
 55 * we reported per agent, we can compute and send only the delta (new messages since
 56 * the last request). This is tracked per-agent (querySource) because different agents
 57 * (main thread, subagents, warmup requests) have independent conversation contexts.
 58 */
 59const lastReportedMessageHash = new Map<string, string>()
 60
 61/**
 62 * Clear tracking state after compaction.
 63 * Old hashes are irrelevant once messages have been replaced.
 64 */
 65export function clearBetaTracingState(): void {
 66  seenHashes.clear()
 67  lastReportedMessageHash.clear()
 68}
 69
 70const MAX_CONTENT_SIZE = 60 * 1024 // 60KB (Honeycomb limit is 64KB, staying safe)
 71
 72/**
 73 * Check if beta detailed tracing is enabled.
 74 * - Requires ENABLE_BETA_TRACING_DETAILED=1 and BETA_TRACING_ENDPOINT
 75 * - For external users, enabled in SDK/headless mode OR when org is
 76 *   allowlisted via the tengu_trace_lantern GrowthBook gate
 77 */
 78export function isBetaTracingEnabled(): boolean {
 79  const baseEnabled =
 80    isEnvTruthy(process.env.ENABLE_BETA_TRACING_DETAILED) &&
 81    Boolean(process.env.BETA_TRACING_ENDPOINT)
 82
 83  if (!baseEnabled) {
 84    return false
 85  }
 86
 87  // For external users, enable in SDK/headless mode OR when org is allowlisted.
 88  // Gate reads from disk cache, so first run after allowlisting returns false;
 89  // works from second run onward (same behavior as enhanced_telemetry_beta).
 90  if (process.env.USER_TYPE !== 'ant') {
 91    return (
 92      getIsNonInteractiveSession() ||
 93      getFeatureValue_CACHED_MAY_BE_STALE('tengu_trace_lantern', false)
 94    )
 95  }
 96
 97  return true
 98}
 99
100/**
101 * Truncate content to fit within Honeycomb limits.
102 */
103export function truncateContent(
104  content: string,
105  maxSize: number = MAX_CONTENT_SIZE,
106): { content: string; truncated: boolean } {
107  if (content.length <= maxSize) {
108    return { content, truncated: false }
109  }
110
111  return {
112    content:
113      content.slice(0, maxSize) +
114      '\n\n[TRUNCATED - Content exceeds 60KB limit]',
115    truncated: true,
116  }
117}
118
119/**
120 * Generate a short hash (first 12 hex chars of SHA-256).
121 */
122function shortHash(content: string): string {
123  return createHash('sha256').update(content).digest('hex').slice(0, 12)
124}
125
126/**
127 * Generate a hash for a system prompt.
128 */
129function hashSystemPrompt(systemPrompt: string): string {
130  return `sp_${shortHash(systemPrompt)}`
131}
132
133/**
134 * Generate a hash for a message based on its content.
135 */
136function hashMessage(message: APIMessage): string {
137  const content = jsonStringify(message.message.content)
138  return `msg_${shortHash(content)}`
139}
140
141// Regex to detect content wrapped in <system-reminder> tags
142const SYSTEM_REMINDER_REGEX =
143  /^<system-reminder>\n?([\s\S]*?)\n?<\/system-reminder>$/
144
145/**
146 * Check if text is entirely a system reminder (wrapped in <system-reminder> tags).
147 * Returns the inner content if it is, null otherwise.
148 */
149function extractSystemReminderContent(text: string): string | null {
150  const match = text.trim().match(SYSTEM_REMINDER_REGEX)
151  return match && match[1] ? match[1].trim() : null
152}
153
154/**
155 * Result of formatting messages - separates regular content from system reminders.
156 */
157interface FormattedMessages {
158  contextParts: string[]
159  systemReminders: string[]
160}
161
162/**
163 * Format user messages for new_context display, separating system reminders.
164 * Only handles user messages (assistant messages are filtered out before this is called).
165 */
166function formatMessagesForContext(messages: UserMessage[]): FormattedMessages {
167  const contextParts: string[] = []
168  const systemReminders: string[] = []
169
170  for (const message of messages) {
171    const content = message.message.content
172    if (typeof content === 'string') {
173      const reminderContent = extractSystemReminderContent(content)
174      if (reminderContent) {
175        systemReminders.push(reminderContent)
176      } else {
177        contextParts.push(`[USER]\n${content}`)
178      }
179    } else if (Array.isArray(content)) {
180      for (const block of content) {
181        if (block.type === 'text') {
182          const reminderContent = extractSystemReminderContent(block.text)
183          if (reminderContent) {
184            systemReminders.push(reminderContent)
185          } else {
186            contextParts.push(`[USER]\n${block.text}`)
187          }
188        } else if (block.type === 'tool_result') {
189          const resultContent =
190            typeof block.content === 'string'
191              ? block.content
192              : jsonStringify(block.content)
193          // Tool results can also contain system reminders (e.g., malware warning)
194          const reminderContent = extractSystemReminderContent(resultContent)
195          if (reminderContent) {
196            systemReminders.push(reminderContent)
197          } else {
198            contextParts.push(
199              `[TOOL RESULT: ${block.tool_use_id}]\n${resultContent}`,
200            )
201          }
202        }
203      }
204    }
205  }
206
207  return { contextParts, systemReminders }
208}
209
210export interface LLMRequestNewContext {
211  /** System prompt (typically only on first request or if changed) */
212  systemPrompt?: string
213  /** Query source identifying the agent/purpose (e.g., 'repl_main_thread', 'agent:builtin') */
214  querySource?: string
215  /** Tool schemas sent with the request */
216  tools?: string
217}
218
219/**
220 * Add beta attributes to an interaction span.
221 * Adds new_context with the user prompt.
222 */
223export function addBetaInteractionAttributes(
224  span: Span,
225  userPrompt: string,
226): void {
227  if (!isBetaTracingEnabled()) {
228    return
229  }
230
231  const { content: truncatedPrompt, truncated } = truncateContent(
232    `[USER PROMPT]\n${userPrompt}`,
233  )
234  span.setAttributes({
235    new_context: truncatedPrompt,
236    ...(truncated && {
237      new_context_truncated: true,
238      new_context_original_length: userPrompt.length,
239    }),
240  })
241}
242
243/**
244 * Add beta attributes to an LLM request span.
245 * Handles system prompt logging and new_context computation.
246 */
247export function addBetaLLMRequestAttributes(
248  span: Span,
249  newContext?: LLMRequestNewContext,
250  messagesForAPI?: APIMessage[],
251): void {
252  if (!isBetaTracingEnabled()) {
253    return
254  }
255
256  // Add system prompt info to the span
257  if (newContext?.systemPrompt) {
258    const promptHash = hashSystemPrompt(newContext.systemPrompt)
259    const preview = newContext.systemPrompt.slice(0, 500)
260
261    // Always add hash, preview, and length to the span
262    span.setAttribute('system_prompt_hash', promptHash)
263    span.setAttribute('system_prompt_preview', preview)
264    span.setAttribute('system_prompt_length', newContext.systemPrompt.length)
265
266    // Log the full system prompt only once per unique hash this session
267    if (!seenHashes.has(promptHash)) {
268      seenHashes.add(promptHash)
269
270      // Truncate for the log if needed
271      const { content: truncatedPrompt, truncated } = truncateContent(
272        newContext.systemPrompt,
273      )
274
275      void logOTelEvent('system_prompt', {
276        system_prompt_hash: promptHash,
277        system_prompt: truncatedPrompt,
278        system_prompt_length: String(newContext.systemPrompt.length),
279        ...(truncated && { system_prompt_truncated: 'true' }),
280      })
281    }
282  }
283
284  // Add tools info to the span
285  if (newContext?.tools) {
286    try {
287      const toolsArray = jsonParse(newContext.tools) as Record<
288        string,
289        unknown
290      >[]
291
292      // Build array of {name, hash} for each tool
293      const toolsWithHashes = toolsArray.map(tool => {
294        const toolJson = jsonStringify(tool)
295        const toolHash = shortHash(toolJson)
296        return {
297          name: typeof tool.name === 'string' ? tool.name : 'unknown',
298          hash: toolHash,
299          json: toolJson,
300        }
301      })
302
303      // Set span attribute with array of name/hash pairs
304      span.setAttribute(
305        'tools',
306        jsonStringify(
307          toolsWithHashes.map(({ name, hash }) => ({ name, hash })),
308        ),
309      )
310      span.setAttribute('tools_count', toolsWithHashes.length)
311
312      // Log each tool's full description once per unique hash
313      for (const { name, hash, json } of toolsWithHashes) {
314        if (!seenHashes.has(`tool_${hash}`)) {
315          seenHashes.add(`tool_${hash}`)
316
317          const { content: truncatedTool, truncated } = truncateContent(json)
318
319          void logOTelEvent('tool', {
320            tool_name: sanitizeToolNameForAnalytics(name),
321            tool_hash: hash,
322            tool: truncatedTool,
323            ...(truncated && { tool_truncated: 'true' }),
324          })
325        }
326      }
327    } catch {
328      // If parsing fails, log the raw tools string
329      span.setAttribute('tools_parse_error', true)
330    }
331  }
332
333  // Add new_context using hash-based tracking (visible to all users)
334  if (messagesForAPI && messagesForAPI.length > 0 && newContext?.querySource) {
335    const querySource = newContext.querySource
336    const lastHash = lastReportedMessageHash.get(querySource)
337
338    // Find where the last reported message is in the array
339    let startIndex = 0
340    if (lastHash) {
341      for (let i = 0; i < messagesForAPI.length; i++) {
342        const msg = messagesForAPI[i]
343        if (msg && hashMessage(msg) === lastHash) {
344          startIndex = i + 1 // Start after the last reported message
345          break
346        }
347      }
348      // If lastHash not found, startIndex stays 0 (send everything)
349    }
350
351    // Get new messages (filter out assistant messages - we only want user input/tool results)
352    const newMessages = messagesForAPI
353      .slice(startIndex)
354      .filter((m): m is UserMessage => m.type === 'user')
355
356    if (newMessages.length > 0) {
357      // Format new messages, separating system reminders from regular content
358      const { contextParts, systemReminders } =
359        formatMessagesForContext(newMessages)
360
361      // Set new_context (regular user content and tool results)
362      if (contextParts.length > 0) {
363        const fullContext = contextParts.join('\n\n---\n\n')
364        const { content: truncatedContext, truncated } =
365          truncateContent(fullContext)
366
367        span.setAttributes({
368          new_context: truncatedContext,
369          new_context_message_count: newMessages.length,
370          ...(truncated && {
371            new_context_truncated: true,
372            new_context_original_length: fullContext.length,
373          }),
374        })
375      }
376
377      // Set system_reminders as a separate attribute
378      if (systemReminders.length > 0) {
379        const fullReminders = systemReminders.join('\n\n---\n\n')
380        const { content: truncatedReminders, truncated: remindersTruncated } =
381          truncateContent(fullReminders)
382
383        span.setAttributes({
384          system_reminders: truncatedReminders,
385          system_reminders_count: systemReminders.length,
386          ...(remindersTruncated && {
387            system_reminders_truncated: true,
388            system_reminders_original_length: fullReminders.length,
389          }),
390        })
391      }
392
393      // Update last reported hash to the last message in the array
394      const lastMessage = messagesForAPI[messagesForAPI.length - 1]
395      if (lastMessage) {
396        lastReportedMessageHash.set(querySource, hashMessage(lastMessage))
397      }
398    }
399  }
400}
401
402/**
403 * Add beta attributes to endLLMRequestSpan.
404 * Handles model_output and thinking_output truncation.
405 */
406export function addBetaLLMResponseAttributes(
407  endAttributes: Record<string, string | number | boolean>,
408  metadata?: {
409    modelOutput?: string
410    thinkingOutput?: string
411  },
412): void {
413  if (!isBetaTracingEnabled() || !metadata) {
414    return
415  }
416
417  // Add model_output (text content) - visible to all users
418  if (metadata.modelOutput !== undefined) {
419    const { content: modelOutput, truncated: outputTruncated } =
420      truncateContent(metadata.modelOutput)
421    endAttributes['response.model_output'] = modelOutput
422    if (outputTruncated) {
423      endAttributes['response.model_output_truncated'] = true
424      endAttributes['response.model_output_original_length'] =
425        metadata.modelOutput.length
426    }
427  }
428
429  // Add thinking_output - ant-only
430  if (
431    process.env.USER_TYPE === 'ant' &&
432    metadata.thinkingOutput !== undefined
433  ) {
434    const { content: thinkingOutput, truncated: thinkingTruncated } =
435      truncateContent(metadata.thinkingOutput)
436    endAttributes['response.thinking_output'] = thinkingOutput
437    if (thinkingTruncated) {
438      endAttributes['response.thinking_output_truncated'] = true
439      endAttributes['response.thinking_output_original_length'] =
440        metadata.thinkingOutput.length
441    }
442  }
443}
444
445/**
446 * Add beta attributes to startToolSpan.
447 * Adds tool_input with the serialized tool input.
448 */
449export function addBetaToolInputAttributes(
450  span: Span,
451  toolName: string,
452  toolInput: string,
453): void {
454  if (!isBetaTracingEnabled()) {
455    return
456  }
457
458  const { content: truncatedInput, truncated } = truncateContent(
459    `[TOOL INPUT: ${toolName}]\n${toolInput}`,
460  )
461  span.setAttributes({
462    tool_input: truncatedInput,
463    ...(truncated && {
464      tool_input_truncated: true,
465      tool_input_original_length: toolInput.length,
466    }),
467  })
468}
469
470/**
471 * Add beta attributes to endToolSpan.
472 * Adds new_context with the tool result.
473 */
474export function addBetaToolResultAttributes(
475  endAttributes: Record<string, string | number | boolean>,
476  toolName: string | number | boolean,
477  toolResult: string,
478): void {
479  if (!isBetaTracingEnabled()) {
480    return
481  }
482
483  const { content: truncatedResult, truncated } = truncateContent(
484    `[TOOL RESULT: ${toolName}]\n${toolResult}`,
485  )
486  endAttributes['new_context'] = truncatedResult
487  if (truncated) {
488    endAttributes['new_context_truncated'] = true
489    endAttributes['new_context_original_length'] = toolResult.length
490  }
491}