source dump of claude code
at main 272 lines 7.7 kB view raw
1import type { BetaContentBlock } from '@anthropic-ai/sdk/resources/beta/messages/messages.mjs' 2import type { 3 ContentBlock, 4 ContentBlockParam, 5} from '@anthropic-ai/sdk/resources/index.mjs' 6import { roughTokenCountEstimation as countTokens } from '../services/tokenEstimation.js' 7import type { 8 AssistantMessage, 9 Message, 10 UserMessage, 11} from '../types/message.js' 12import { normalizeMessagesForAPI } from './messages.js' 13import { jsonStringify } from './slowOperations.js' 14 15type TokenStats = { 16 toolRequests: Map<string, number> 17 toolResults: Map<string, number> 18 humanMessages: number 19 assistantMessages: number 20 localCommandOutputs: number 21 other: number 22 attachments: Map<string, number> 23 duplicateFileReads: Map<string, { count: number; tokens: number }> 24 total: number 25} 26 27export function analyzeContext(messages: Message[]): TokenStats { 28 const stats: TokenStats = { 29 toolRequests: new Map(), 30 toolResults: new Map(), 31 humanMessages: 0, 32 assistantMessages: 0, 33 localCommandOutputs: 0, 34 other: 0, 35 attachments: new Map(), 36 duplicateFileReads: new Map(), 37 total: 0, 38 } 39 40 const toolIdsToToolNames = new Map<string, string>() 41 const readToolIdToFilePath = new Map<string, string>() 42 const fileReadStats = new Map< 43 string, 44 { count: number; totalTokens: number } 45 >() 46 47 messages.forEach(msg => { 48 if (msg.type === 'attachment') { 49 const type = msg.attachment.type || 'unknown' 50 stats.attachments.set(type, (stats.attachments.get(type) || 0) + 1) 51 } 52 }) 53 54 const normalizedMessages = normalizeMessagesForAPI(messages) 55 normalizedMessages.forEach(msg => { 56 const { content } = msg.message 57 58 // Not sure if this path is still used, but adding as a fallback 59 if (typeof content === 'string') { 60 const tokens = countTokens(content) 61 stats.total += tokens 62 // Check if this is a local command output 63 if (msg.type === 'user' && content.includes('local-command-stdout')) { 64 stats.localCommandOutputs += tokens 65 } else { 66 stats[msg.type === 'user' ? 'humanMessages' : 'assistantMessages'] += 67 tokens 68 } 69 } else { 70 content.forEach(block => 71 processBlock( 72 block, 73 msg, 74 stats, 75 toolIdsToToolNames, 76 readToolIdToFilePath, 77 fileReadStats, 78 ), 79 ) 80 } 81 }) 82 83 // Calculate duplicate file reads 84 fileReadStats.forEach((data, path) => { 85 if (data.count > 1) { 86 const averageTokensPerRead = Math.floor(data.totalTokens / data.count) 87 const duplicateTokens = averageTokensPerRead * (data.count - 1) 88 89 stats.duplicateFileReads.set(path, { 90 count: data.count, 91 tokens: duplicateTokens, 92 }) 93 } 94 }) 95 96 return stats 97} 98 99function processBlock( 100 block: ContentBlockParam | ContentBlock | BetaContentBlock, 101 message: UserMessage | AssistantMessage, 102 stats: TokenStats, 103 toolIds: Map<string, string>, 104 readToolPaths: Map<string, string>, 105 fileReads: Map<string, { count: number; totalTokens: number }>, 106): void { 107 const tokens = countTokens(jsonStringify(block)) 108 stats.total += tokens 109 110 switch (block.type) { 111 case 'text': 112 // Check if this is a local command output 113 if ( 114 message.type === 'user' && 115 'text' in block && 116 block.text.includes('local-command-stdout') 117 ) { 118 stats.localCommandOutputs += tokens 119 } else { 120 stats[ 121 message.type === 'user' ? 'humanMessages' : 'assistantMessages' 122 ] += tokens 123 } 124 break 125 126 case 'tool_use': { 127 if ('name' in block && 'id' in block) { 128 const toolName = block.name || 'unknown' 129 increment(stats.toolRequests, toolName, tokens) 130 toolIds.set(block.id, toolName) 131 132 // Track Read tool file paths 133 if ( 134 toolName === 'Read' && 135 'input' in block && 136 block.input && 137 typeof block.input === 'object' && 138 'file_path' in block.input 139 ) { 140 const path = String( 141 (block.input as Record<string, unknown>).file_path, 142 ) 143 readToolPaths.set(block.id, path) 144 } 145 } 146 break 147 } 148 149 case 'tool_result': { 150 if ('tool_use_id' in block) { 151 const toolName = toolIds.get(block.tool_use_id) || 'unknown' 152 increment(stats.toolResults, toolName, tokens) 153 154 // Track file read tokens 155 if (toolName === 'Read') { 156 const path = readToolPaths.get(block.tool_use_id) 157 if (path) { 158 const current = fileReads.get(path) || { count: 0, totalTokens: 0 } 159 fileReads.set(path, { 160 count: current.count + 1, 161 totalTokens: current.totalTokens + tokens, 162 }) 163 } 164 } 165 } 166 break 167 } 168 169 case 'image': 170 case 'server_tool_use': 171 case 'web_search_tool_result': 172 case 'search_result': 173 case 'document': 174 case 'thinking': 175 case 'redacted_thinking': 176 case 'code_execution_tool_result': 177 case 'mcp_tool_use': 178 case 'mcp_tool_result': 179 case 'container_upload': 180 case 'web_fetch_tool_result': 181 case 'bash_code_execution_tool_result': 182 case 'text_editor_code_execution_tool_result': 183 case 'tool_search_tool_result': 184 case 'compaction': 185 // Don't care about these for now.. 186 stats['other'] += tokens 187 break 188 } 189} 190 191function increment(map: Map<string, number>, key: string, value: number): void { 192 map.set(key, (map.get(key) || 0) + value) 193} 194 195export function tokenStatsToStatsigMetrics( 196 stats: TokenStats, 197): Record<string, number> { 198 const metrics: Record<string, number> = { 199 total_tokens: stats.total, 200 human_message_tokens: stats.humanMessages, 201 assistant_message_tokens: stats.assistantMessages, 202 local_command_output_tokens: stats.localCommandOutputs, 203 other_tokens: stats.other, 204 } 205 206 stats.attachments.forEach((count, type) => { 207 metrics[`attachment_${type}_count`] = count 208 }) 209 210 stats.toolRequests.forEach((tokens, tool) => { 211 metrics[`tool_request_${tool}_tokens`] = tokens 212 }) 213 214 stats.toolResults.forEach((tokens, tool) => { 215 metrics[`tool_result_${tool}_tokens`] = tokens 216 }) 217 218 const duplicateTotal = [...stats.duplicateFileReads.values()].reduce( 219 (sum, d) => sum + d.tokens, 220 0, 221 ) 222 223 metrics.duplicate_read_tokens = duplicateTotal 224 metrics.duplicate_read_file_count = stats.duplicateFileReads.size 225 226 if (stats.total > 0) { 227 metrics.human_message_percent = Math.round( 228 (stats.humanMessages / stats.total) * 100, 229 ) 230 metrics.assistant_message_percent = Math.round( 231 (stats.assistantMessages / stats.total) * 100, 232 ) 233 metrics.local_command_output_percent = Math.round( 234 (stats.localCommandOutputs / stats.total) * 100, 235 ) 236 metrics.duplicate_read_percent = Math.round( 237 (duplicateTotal / stats.total) * 100, 238 ) 239 240 const toolRequestTotal = [...stats.toolRequests.values()].reduce( 241 (sum, v) => sum + v, 242 0, 243 ) 244 const toolResultTotal = [...stats.toolResults.values()].reduce( 245 (sum, v) => sum + v, 246 0, 247 ) 248 249 metrics.tool_request_percent = Math.round( 250 (toolRequestTotal / stats.total) * 100, 251 ) 252 metrics.tool_result_percent = Math.round( 253 (toolResultTotal / stats.total) * 100, 254 ) 255 256 // Add individual tool request percentages 257 stats.toolRequests.forEach((tokens, tool) => { 258 metrics[`tool_request_${tool}_percent`] = Math.round( 259 (tokens / stats.total) * 100, 260 ) 261 }) 262 263 // Add individual tool result percentages 264 stats.toolResults.forEach((tokens, tool) => { 265 metrics[`tool_result_${tool}_percent`] = Math.round( 266 (tokens / stats.total) * 100, 267 ) 268 }) 269 } 270 271 return metrics 272}