source dump of claude code
at main 495 lines 17 kB view raw
1import type { Anthropic } from '@anthropic-ai/sdk' 2import type { BetaMessageParam as MessageParam } from '@anthropic-ai/sdk/resources/beta/messages/messages.mjs' 3// @aws-sdk/client-bedrock-runtime is imported dynamically in countTokensWithBedrock() 4// to defer ~279KB of AWS SDK code until a Bedrock call is actually made 5import type { CountTokensCommandInput } from '@aws-sdk/client-bedrock-runtime' 6import { getAPIProvider } from 'src/utils/model/providers.js' 7import { VERTEX_COUNT_TOKENS_ALLOWED_BETAS } from '../constants/betas.js' 8import type { Attachment } from '../utils/attachments.js' 9import { getModelBetas } from '../utils/betas.js' 10import { getVertexRegionForModel, isEnvTruthy } from '../utils/envUtils.js' 11import { logError } from '../utils/log.js' 12import { normalizeAttachmentForAPI } from '../utils/messages.js' 13import { 14 createBedrockRuntimeClient, 15 getInferenceProfileBackingModel, 16 isFoundationModel, 17} from '../utils/model/bedrock.js' 18import { 19 getDefaultSonnetModel, 20 getMainLoopModel, 21 getSmallFastModel, 22 normalizeModelStringForAPI, 23} from '../utils/model/model.js' 24import { jsonStringify } from '../utils/slowOperations.js' 25import { isToolReferenceBlock } from '../utils/toolSearch.js' 26import { getAPIMetadata, getExtraBodyParams } from './api/claude.js' 27import { getAnthropicClient } from './api/client.js' 28import { withTokenCountVCR } from './vcr.js' 29 30// Minimal values for token counting with thinking enabled 31// API constraint: max_tokens must be greater than thinking.budget_tokens 32const TOKEN_COUNT_THINKING_BUDGET = 1024 33const TOKEN_COUNT_MAX_TOKENS = 2048 34 35/** 36 * Check if messages contain thinking blocks 37 */ 38function hasThinkingBlocks( 39 messages: Anthropic.Beta.Messages.BetaMessageParam[], 40): boolean { 41 for (const message of messages) { 42 if (message.role === 'assistant' && Array.isArray(message.content)) { 43 for (const block of message.content) { 44 if ( 45 typeof block === 'object' && 46 block !== null && 47 'type' in block && 48 (block.type === 'thinking' || block.type === 'redacted_thinking') 49 ) { 50 return true 51 } 52 } 53 } 54 } 55 return false 56} 57 58/** 59 * Strip tool search-specific fields from messages before sending for token counting. 60 * This removes 'caller' from tool_use blocks and 'tool_reference' from tool_result content. 61 * These fields are only valid with the tool search beta and will cause errors otherwise. 62 * 63 * Note: We use 'as unknown as' casts because the SDK types don't include tool search beta fields, 64 * but at runtime these fields may exist from API responses when tool search was enabled. 65 */ 66function stripToolSearchFieldsFromMessages( 67 messages: Anthropic.Beta.Messages.BetaMessageParam[], 68): Anthropic.Beta.Messages.BetaMessageParam[] { 69 return messages.map(message => { 70 if (!Array.isArray(message.content)) { 71 return message 72 } 73 74 const normalizedContent = message.content.map(block => { 75 // Strip 'caller' from tool_use blocks (assistant messages) 76 if (block.type === 'tool_use') { 77 // Destructure to exclude any extra fields like 'caller' 78 const toolUse = 79 block as Anthropic.Beta.Messages.BetaToolUseBlockParam & { 80 caller?: unknown 81 } 82 return { 83 type: 'tool_use' as const, 84 id: toolUse.id, 85 name: toolUse.name, 86 input: toolUse.input, 87 } 88 } 89 90 // Strip tool_reference blocks from tool_result content (user messages) 91 if (block.type === 'tool_result') { 92 const toolResult = 93 block as Anthropic.Beta.Messages.BetaToolResultBlockParam 94 if (Array.isArray(toolResult.content)) { 95 const filteredContent = (toolResult.content as unknown[]).filter( 96 c => !isToolReferenceBlock(c), 97 ) as typeof toolResult.content 98 99 if (filteredContent.length === 0) { 100 return { 101 ...toolResult, 102 content: [{ type: 'text' as const, text: '[tool references]' }], 103 } 104 } 105 if (filteredContent.length !== toolResult.content.length) { 106 return { 107 ...toolResult, 108 content: filteredContent, 109 } 110 } 111 } 112 } 113 114 return block 115 }) 116 117 return { 118 ...message, 119 content: normalizedContent, 120 } 121 }) 122} 123 124export async function countTokensWithAPI( 125 content: string, 126): Promise<number | null> { 127 // Special case for empty content - API doesn't accept empty messages 128 if (!content) { 129 return 0 130 } 131 132 const message: Anthropic.Beta.Messages.BetaMessageParam = { 133 role: 'user', 134 content: content, 135 } 136 137 return countMessagesTokensWithAPI([message], []) 138} 139 140export async function countMessagesTokensWithAPI( 141 messages: Anthropic.Beta.Messages.BetaMessageParam[], 142 tools: Anthropic.Beta.Messages.BetaToolUnion[], 143): Promise<number | null> { 144 return withTokenCountVCR(messages, tools, async () => { 145 try { 146 const model = getMainLoopModel() 147 const betas = getModelBetas(model) 148 const containsThinking = hasThinkingBlocks(messages) 149 150 if (getAPIProvider() === 'bedrock') { 151 // @anthropic-sdk/bedrock-sdk doesn't support countTokens currently 152 return countTokensWithBedrock({ 153 model: normalizeModelStringForAPI(model), 154 messages, 155 tools, 156 betas, 157 containsThinking, 158 }) 159 } 160 161 const anthropic = await getAnthropicClient({ 162 maxRetries: 1, 163 model, 164 source: 'count_tokens', 165 }) 166 167 const filteredBetas = 168 getAPIProvider() === 'vertex' 169 ? betas.filter(b => VERTEX_COUNT_TOKENS_ALLOWED_BETAS.has(b)) 170 : betas 171 172 const response = await anthropic.beta.messages.countTokens({ 173 model: normalizeModelStringForAPI(model), 174 messages: 175 // When we pass tools and no messages, we need to pass a dummy message 176 // to get an accurate tool token count. 177 messages.length > 0 ? messages : [{ role: 'user', content: 'foo' }], 178 tools, 179 ...(filteredBetas.length > 0 && { betas: filteredBetas }), 180 // Enable thinking if messages contain thinking blocks 181 ...(containsThinking && { 182 thinking: { 183 type: 'enabled', 184 budget_tokens: TOKEN_COUNT_THINKING_BUDGET, 185 }, 186 }), 187 }) 188 189 if (typeof response.input_tokens !== 'number') { 190 // Vertex client throws 191 // Bedrock client succeeds with { Output: { __type: 'com.amazon.coral.service#UnknownOperationException' }, Version: '1.0' } 192 return null 193 } 194 195 return response.input_tokens 196 } catch (error) { 197 logError(error) 198 return null 199 } 200 }) 201} 202 203export function roughTokenCountEstimation( 204 content: string, 205 bytesPerToken: number = 4, 206): number { 207 return Math.round(content.length / bytesPerToken) 208} 209 210/** 211 * Returns an estimated bytes-per-token ratio for a given file extension. 212 * Dense JSON has many single-character tokens (`{`, `}`, `:`, `,`, `"`) 213 * which makes the real ratio closer to 2 rather than the default 4. 214 */ 215export function bytesPerTokenForFileType(fileExtension: string): number { 216 switch (fileExtension) { 217 case 'json': 218 case 'jsonl': 219 case 'jsonc': 220 return 2 221 default: 222 return 4 223 } 224} 225 226/** 227 * Like {@link roughTokenCountEstimation} but uses a more accurate 228 * bytes-per-token ratio when the file type is known. 229 * 230 * This matters when the API-based token count is unavailable (e.g. on 231 * Bedrock) and we fall back to the rough estimate — an underestimate can 232 * let an oversized tool result slip into the conversation. 233 */ 234export function roughTokenCountEstimationForFileType( 235 content: string, 236 fileExtension: string, 237): number { 238 return roughTokenCountEstimation( 239 content, 240 bytesPerTokenForFileType(fileExtension), 241 ) 242} 243 244/** 245 * Estimates token count for a Message object by extracting and analyzing its text content. 246 * This provides a more reliable estimate than getTokenUsage for messages that may have been compacted. 247 * Uses Haiku for token counting (Haiku 4.5 supports thinking blocks), except: 248 * - Vertex global region: uses Sonnet (Haiku not available) 249 * - Bedrock with thinking blocks: uses Sonnet (Haiku 3.5 doesn't support thinking) 250 */ 251export async function countTokensViaHaikuFallback( 252 messages: Anthropic.Beta.Messages.BetaMessageParam[], 253 tools: Anthropic.Beta.Messages.BetaToolUnion[], 254): Promise<number | null> { 255 // Check if messages contain thinking blocks 256 const containsThinking = hasThinkingBlocks(messages) 257 258 // If we're on Vertex and using global region, always use Sonnet since Haiku is not available there. 259 const isVertexGlobalEndpoint = 260 isEnvTruthy(process.env.CLAUDE_CODE_USE_VERTEX) && 261 getVertexRegionForModel(getSmallFastModel()) === 'global' 262 // If we're on Bedrock with thinking blocks, use Sonnet since Haiku 3.5 doesn't support thinking 263 const isBedrockWithThinking = 264 isEnvTruthy(process.env.CLAUDE_CODE_USE_BEDROCK) && containsThinking 265 // If we're on Vertex with thinking blocks, use Sonnet since Haiku 3.5 doesn't support thinking 266 const isVertexWithThinking = 267 isEnvTruthy(process.env.CLAUDE_CODE_USE_VERTEX) && containsThinking 268 // Otherwise always use Haiku - Haiku 4.5 supports thinking blocks. 269 // WARNING: if you change this to use a non-Haiku model, this request will fail in 1P unless it uses getCLISyspromptPrefix. 270 // Note: We don't need Sonnet for tool_reference blocks because we strip them via 271 // stripToolSearchFieldsFromMessages() before sending. 272 // Use getSmallFastModel() to respect ANTHROPIC_SMALL_FAST_MODEL env var for Bedrock users 273 // with global inference profiles (see issue #10883). 274 const model = 275 isVertexGlobalEndpoint || isBedrockWithThinking || isVertexWithThinking 276 ? getDefaultSonnetModel() 277 : getSmallFastModel() 278 const anthropic = await getAnthropicClient({ 279 maxRetries: 1, 280 model, 281 source: 'count_tokens', 282 }) 283 284 // Strip tool search-specific fields (caller, tool_reference) before sending 285 // These fields are only valid with the tool search beta header 286 const normalizedMessages = stripToolSearchFieldsFromMessages(messages) 287 288 const messagesToSend: MessageParam[] = 289 normalizedMessages.length > 0 290 ? (normalizedMessages as MessageParam[]) 291 : [{ role: 'user', content: 'count' }] 292 293 const betas = getModelBetas(model) 294 // Filter betas for Vertex - some betas (like web-search) cause 400 errors 295 // on certain Vertex endpoints. See issue #10789. 296 const filteredBetas = 297 getAPIProvider() === 'vertex' 298 ? betas.filter(b => VERTEX_COUNT_TOKENS_ALLOWED_BETAS.has(b)) 299 : betas 300 301 // biome-ignore lint/plugin: token counting needs specialized parameters (thinking, betas) that sideQuery doesn't support 302 const response = await anthropic.beta.messages.create({ 303 model: normalizeModelStringForAPI(model), 304 max_tokens: containsThinking ? TOKEN_COUNT_MAX_TOKENS : 1, 305 messages: messagesToSend, 306 tools: tools.length > 0 ? tools : undefined, 307 ...(filteredBetas.length > 0 && { betas: filteredBetas }), 308 metadata: getAPIMetadata(), 309 ...getExtraBodyParams(), 310 // Enable thinking if messages contain thinking blocks 311 ...(containsThinking && { 312 thinking: { 313 type: 'enabled', 314 budget_tokens: TOKEN_COUNT_THINKING_BUDGET, 315 }, 316 }), 317 }) 318 319 const usage = response.usage 320 const inputTokens = usage.input_tokens 321 const cacheCreationTokens = usage.cache_creation_input_tokens || 0 322 const cacheReadTokens = usage.cache_read_input_tokens || 0 323 324 return inputTokens + cacheCreationTokens + cacheReadTokens 325} 326 327export function roughTokenCountEstimationForMessages( 328 messages: readonly { 329 type: string 330 message?: { content?: unknown } 331 attachment?: Attachment 332 }[], 333): number { 334 let totalTokens = 0 335 for (const message of messages) { 336 totalTokens += roughTokenCountEstimationForMessage(message) 337 } 338 return totalTokens 339} 340 341export function roughTokenCountEstimationForMessage(message: { 342 type: string 343 message?: { content?: unknown } 344 attachment?: Attachment 345}): number { 346 if ( 347 (message.type === 'assistant' || message.type === 'user') && 348 message.message?.content 349 ) { 350 return roughTokenCountEstimationForContent( 351 message.message?.content as 352 | string 353 | Array<Anthropic.ContentBlock> 354 | Array<Anthropic.ContentBlockParam> 355 | undefined, 356 ) 357 } 358 359 if (message.type === 'attachment' && message.attachment) { 360 const userMessages = normalizeAttachmentForAPI(message.attachment) 361 let total = 0 362 for (const userMsg of userMessages) { 363 total += roughTokenCountEstimationForContent(userMsg.message.content) 364 } 365 return total 366 } 367 368 return 0 369} 370 371function roughTokenCountEstimationForContent( 372 content: 373 | string 374 | Array<Anthropic.ContentBlock> 375 | Array<Anthropic.ContentBlockParam> 376 | undefined, 377): number { 378 if (!content) { 379 return 0 380 } 381 if (typeof content === 'string') { 382 return roughTokenCountEstimation(content) 383 } 384 let totalTokens = 0 385 for (const block of content) { 386 totalTokens += roughTokenCountEstimationForBlock(block) 387 } 388 return totalTokens 389} 390 391function roughTokenCountEstimationForBlock( 392 block: string | Anthropic.ContentBlock | Anthropic.ContentBlockParam, 393): number { 394 if (typeof block === 'string') { 395 return roughTokenCountEstimation(block) 396 } 397 if (block.type === 'text') { 398 return roughTokenCountEstimation(block.text) 399 } 400 if (block.type === 'image' || block.type === 'document') { 401 // https://platform.claude.com/docs/en/build-with-claude/vision#calculate-image-costs 402 // tokens = (width px * height px)/750 403 // Images are resized to max 2000x2000 (5333 tokens). Use a conservative 404 // estimate that matches microCompact's IMAGE_MAX_TOKEN_SIZE to avoid 405 // underestimating and triggering auto-compact too late. 406 // 407 // document: base64 PDF in source.data. Must NOT reach the 408 // jsonStringify catch-all — a 1MB PDF is ~1.33M base64 chars → 409 // ~325k estimated tokens, vs the ~2000 the API actually charges. 410 // Same constant as microCompact's calculateToolResultTokens. 411 return 2000 412 } 413 if (block.type === 'tool_result') { 414 return roughTokenCountEstimationForContent(block.content) 415 } 416 if (block.type === 'tool_use') { 417 // input is the JSON the model generated — arbitrarily large (bash 418 // commands, Edit diffs, file contents). Stringify once for the 419 // char count; the API re-serializes anyway so this is what it sees. 420 return roughTokenCountEstimation( 421 block.name + jsonStringify(block.input ?? {}), 422 ) 423 } 424 if (block.type === 'thinking') { 425 return roughTokenCountEstimation(block.thinking) 426 } 427 if (block.type === 'redacted_thinking') { 428 return roughTokenCountEstimation(block.data) 429 } 430 // server_tool_use, web_search_tool_result, mcp_tool_use, etc. — 431 // text-like payloads (tool inputs, search results, no base64). 432 // Stringify-length tracks the serialized form the API sees; the 433 // key/bracket overhead is single-digit percent on real blocks. 434 return roughTokenCountEstimation(jsonStringify(block)) 435} 436 437async function countTokensWithBedrock({ 438 model, 439 messages, 440 tools, 441 betas, 442 containsThinking, 443}: { 444 model: string 445 messages: Anthropic.Beta.Messages.BetaMessageParam[] 446 tools: Anthropic.Beta.Messages.BetaToolUnion[] 447 betas: string[] 448 containsThinking: boolean 449}): Promise<number | null> { 450 try { 451 const client = await createBedrockRuntimeClient() 452 // Bedrock CountTokens requires a model ID, not an inference profile / ARN 453 const modelId = isFoundationModel(model) 454 ? model 455 : await getInferenceProfileBackingModel(model) 456 if (!modelId) { 457 return null 458 } 459 460 const requestBody = { 461 anthropic_version: 'bedrock-2023-05-31', 462 // When we pass tools and no messages, we need to pass a dummy message 463 // to get an accurate tool token count. 464 messages: 465 messages.length > 0 ? messages : [{ role: 'user', content: 'foo' }], 466 max_tokens: containsThinking ? TOKEN_COUNT_MAX_TOKENS : 1, 467 ...(tools.length > 0 && { tools }), 468 ...(betas.length > 0 && { anthropic_beta: betas }), 469 ...(containsThinking && { 470 thinking: { 471 type: 'enabled', 472 budget_tokens: TOKEN_COUNT_THINKING_BUDGET, 473 }, 474 }), 475 } 476 477 const { CountTokensCommand } = await import( 478 '@aws-sdk/client-bedrock-runtime' 479 ) 480 const input: CountTokensCommandInput = { 481 modelId, 482 input: { 483 invokeModel: { 484 body: new TextEncoder().encode(jsonStringify(requestBody)), 485 }, 486 }, 487 } 488 const response = await client.send(new CountTokensCommand(input)) 489 const tokenCount = response.inputTokens ?? null 490 return tokenCount 491 } catch (error) { 492 logError(error) 493 return null 494 } 495}