utils/contextAnalysis.ts at main · oppi.li/claude-code

oppi.li / claude-code
fork atom
source dump of claude code
fork atom
claude-code / utils / contextAnalysis.ts
at main 272 lines 7.7 kB view raw
wrap content
oppi.li dump from zip 4d ago
63aada3f
  1import type { BetaContentBlock } from '@anthropic-ai/sdk/resources/beta/messages/messages.mjs'
  2import type {
  3  ContentBlock,
  4  ContentBlockParam,
  5} from '@anthropic-ai/sdk/resources/index.mjs'
  6import { roughTokenCountEstimation as countTokens } from '../services/tokenEstimation.js'
  7import type {
  8  AssistantMessage,
  9  Message,
 10  UserMessage,
 11} from '../types/message.js'
 12import { normalizeMessagesForAPI } from './messages.js'
 13import { jsonStringify } from './slowOperations.js'
 14
 15type TokenStats = {
 16  toolRequests: Map<string, number>
 17  toolResults: Map<string, number>
 18  humanMessages: number
 19  assistantMessages: number
 20  localCommandOutputs: number
 21  other: number
 22  attachments: Map<string, number>
 23  duplicateFileReads: Map<string, { count: number; tokens: number }>
 24  total: number
 25}
 26
 27export function analyzeContext(messages: Message[]): TokenStats {
 28  const stats: TokenStats = {
 29    toolRequests: new Map(),
 30    toolResults: new Map(),
 31    humanMessages: 0,
 32    assistantMessages: 0,
 33    localCommandOutputs: 0,
 34    other: 0,
 35    attachments: new Map(),
 36    duplicateFileReads: new Map(),
 37    total: 0,
 38  }
 39
 40  const toolIdsToToolNames = new Map<string, string>()
 41  const readToolIdToFilePath = new Map<string, string>()
 42  const fileReadStats = new Map<
 43    string,
 44    { count: number; totalTokens: number }
 45  >()
 46
 47  messages.forEach(msg => {
 48    if (msg.type === 'attachment') {
 49      const type = msg.attachment.type || 'unknown'
 50      stats.attachments.set(type, (stats.attachments.get(type) || 0) + 1)
 51    }
 52  })
 53
 54  const normalizedMessages = normalizeMessagesForAPI(messages)
 55  normalizedMessages.forEach(msg => {
 56    const { content } = msg.message
 57
 58    // Not sure if this path is still used, but adding as a fallback
 59    if (typeof content === 'string') {
 60      const tokens = countTokens(content)
 61      stats.total += tokens
 62      // Check if this is a local command output
 63      if (msg.type === 'user' && content.includes('local-command-stdout')) {
 64        stats.localCommandOutputs += tokens
 65      } else {
 66        stats[msg.type === 'user' ? 'humanMessages' : 'assistantMessages'] +=
 67          tokens
 68      }
 69    } else {
 70      content.forEach(block =>
 71        processBlock(
 72          block,
 73          msg,
 74          stats,
 75          toolIdsToToolNames,
 76          readToolIdToFilePath,
 77          fileReadStats,
 78        ),
 79      )
 80    }
 81  })
 82
 83  // Calculate duplicate file reads
 84  fileReadStats.forEach((data, path) => {
 85    if (data.count > 1) {
 86      const averageTokensPerRead = Math.floor(data.totalTokens / data.count)
 87      const duplicateTokens = averageTokensPerRead * (data.count - 1)
 88
 89      stats.duplicateFileReads.set(path, {
 90        count: data.count,
 91        tokens: duplicateTokens,
 92      })
 93    }
 94  })
 95
 96  return stats
 97}
 98
 99function processBlock(
100  block: ContentBlockParam | ContentBlock | BetaContentBlock,
101  message: UserMessage | AssistantMessage,
102  stats: TokenStats,
103  toolIds: Map<string, string>,
104  readToolPaths: Map<string, string>,
105  fileReads: Map<string, { count: number; totalTokens: number }>,
106): void {
107  const tokens = countTokens(jsonStringify(block))
108  stats.total += tokens
109
110  switch (block.type) {
111    case 'text':
112      // Check if this is a local command output
113      if (
114        message.type === 'user' &&
115        'text' in block &&
116        block.text.includes('local-command-stdout')
117      ) {
118        stats.localCommandOutputs += tokens
119      } else {
120        stats[
121          message.type === 'user' ? 'humanMessages' : 'assistantMessages'
122        ] += tokens
123      }
124      break
125
126    case 'tool_use': {
127      if ('name' in block && 'id' in block) {
128        const toolName = block.name || 'unknown'
129        increment(stats.toolRequests, toolName, tokens)
130        toolIds.set(block.id, toolName)
131
132        // Track Read tool file paths
133        if (
134          toolName === 'Read' &&
135          'input' in block &&
136          block.input &&
137          typeof block.input === 'object' &&
138          'file_path' in block.input
139        ) {
140          const path = String(
141            (block.input as Record<string, unknown>).file_path,
142          )
143          readToolPaths.set(block.id, path)
144        }
145      }
146      break
147    }
148
149    case 'tool_result': {
150      if ('tool_use_id' in block) {
151        const toolName = toolIds.get(block.tool_use_id) || 'unknown'
152        increment(stats.toolResults, toolName, tokens)
153
154        // Track file read tokens
155        if (toolName === 'Read') {
156          const path = readToolPaths.get(block.tool_use_id)
157          if (path) {
158            const current = fileReads.get(path) || { count: 0, totalTokens: 0 }
159            fileReads.set(path, {
160              count: current.count + 1,
161              totalTokens: current.totalTokens + tokens,
162            })
163          }
164        }
165      }
166      break
167    }
168
169    case 'image':
170    case 'server_tool_use':
171    case 'web_search_tool_result':
172    case 'search_result':
173    case 'document':
174    case 'thinking':
175    case 'redacted_thinking':
176    case 'code_execution_tool_result':
177    case 'mcp_tool_use':
178    case 'mcp_tool_result':
179    case 'container_upload':
180    case 'web_fetch_tool_result':
181    case 'bash_code_execution_tool_result':
182    case 'text_editor_code_execution_tool_result':
183    case 'tool_search_tool_result':
184    case 'compaction':
185      // Don't care about these for now..
186      stats['other'] += tokens
187      break
188  }
189}
190
191function increment(map: Map<string, number>, key: string, value: number): void {
192  map.set(key, (map.get(key) || 0) + value)
193}
194
195export function tokenStatsToStatsigMetrics(
196  stats: TokenStats,
197): Record<string, number> {
198  const metrics: Record<string, number> = {
199    total_tokens: stats.total,
200    human_message_tokens: stats.humanMessages,
201    assistant_message_tokens: stats.assistantMessages,
202    local_command_output_tokens: stats.localCommandOutputs,
203    other_tokens: stats.other,
204  }
205
206  stats.attachments.forEach((count, type) => {
207    metrics[`attachment_${type}_count`] = count
208  })
209
210  stats.toolRequests.forEach((tokens, tool) => {
211    metrics[`tool_request_${tool}_tokens`] = tokens
212  })
213
214  stats.toolResults.forEach((tokens, tool) => {
215    metrics[`tool_result_${tool}_tokens`] = tokens
216  })
217
218  const duplicateTotal = [...stats.duplicateFileReads.values()].reduce(
219    (sum, d) => sum + d.tokens,
220    0,
221  )
222
223  metrics.duplicate_read_tokens = duplicateTotal
224  metrics.duplicate_read_file_count = stats.duplicateFileReads.size
225
226  if (stats.total > 0) {
227    metrics.human_message_percent = Math.round(
228      (stats.humanMessages / stats.total) * 100,
229    )
230    metrics.assistant_message_percent = Math.round(
231      (stats.assistantMessages / stats.total) * 100,
232    )
233    metrics.local_command_output_percent = Math.round(
234      (stats.localCommandOutputs / stats.total) * 100,
235    )
236    metrics.duplicate_read_percent = Math.round(
237      (duplicateTotal / stats.total) * 100,
238    )
239
240    const toolRequestTotal = [...stats.toolRequests.values()].reduce(
241      (sum, v) => sum + v,
242      0,
243    )
244    const toolResultTotal = [...stats.toolResults.values()].reduce(
245      (sum, v) => sum + v,
246      0,
247    )
248
249    metrics.tool_request_percent = Math.round(
250      (toolRequestTotal / stats.total) * 100,
251    )
252    metrics.tool_result_percent = Math.round(
253      (toolResultTotal / stats.total) * 100,
254    )
255
256    // Add individual tool request percentages
257    stats.toolRequests.forEach((tokens, tool) => {
258      metrics[`tool_request_${tool}_percent`] = Math.round(
259        (tokens / stats.total) * 100,
260      )
261    })
262
263    // Add individual tool result percentages
264    stats.toolResults.forEach((tokens, tool) => {
265      metrics[`tool_result_${tool}_percent`] = Math.round(
266        (tokens / stats.total) * 100,
267      )
268    })
269  }
270
271  return metrics
272}