source dump of claude code
at main 221 lines 6.9 kB view raw
1// biome-ignore-all assist/source/organizeImports: ANT-ONLY import markers must not be reordered 2import { CONTEXT_1M_BETA_HEADER } from '../constants/betas.js' 3import { getGlobalConfig } from './config.js' 4import { isEnvTruthy } from './envUtils.js' 5import { getCanonicalName } from './model/model.js' 6import { getModelCapability } from './model/modelCapabilities.js' 7 8// Model context window size (200k tokens for all models right now) 9export const MODEL_CONTEXT_WINDOW_DEFAULT = 200_000 10 11// Maximum output tokens for compact operations 12export const COMPACT_MAX_OUTPUT_TOKENS = 20_000 13 14// Default max output tokens 15const MAX_OUTPUT_TOKENS_DEFAULT = 32_000 16const MAX_OUTPUT_TOKENS_UPPER_LIMIT = 64_000 17 18// Capped default for slot-reservation optimization. BQ p99 output = 4,911 19// tokens, so 32k/64k defaults over-reserve 8-16× slot capacity. With the cap 20// enabled, <1% of requests hit the limit; those get one clean retry at 64k 21// (see query.ts max_output_tokens_escalate). Cap is applied in 22// claude.ts:getMaxOutputTokensForModel to avoid the growthbook→betas→context 23// import cycle. 24export const CAPPED_DEFAULT_MAX_TOKENS = 8_000 25export const ESCALATED_MAX_TOKENS = 64_000 26 27/** 28 * Check if 1M context is disabled via environment variable. 29 * Used by C4E admins to disable 1M context for HIPAA compliance. 30 */ 31export function is1mContextDisabled(): boolean { 32 return isEnvTruthy(process.env.CLAUDE_CODE_DISABLE_1M_CONTEXT) 33} 34 35export function has1mContext(model: string): boolean { 36 if (is1mContextDisabled()) { 37 return false 38 } 39 return /\[1m\]/i.test(model) 40} 41 42// @[MODEL LAUNCH]: Update this pattern if the new model supports 1M context 43export function modelSupports1M(model: string): boolean { 44 if (is1mContextDisabled()) { 45 return false 46 } 47 const canonical = getCanonicalName(model) 48 return canonical.includes('claude-sonnet-4') || canonical.includes('opus-4-6') 49} 50 51export function getContextWindowForModel( 52 model: string, 53 betas?: string[], 54): number { 55 // Allow override via environment variable (ant-only) 56 // This takes precedence over all other context window resolution, including 1M detection, 57 // so users can cap the effective context window for local decisions (auto-compact, etc.) 58 // while still using a 1M-capable endpoint. 59 if ( 60 process.env.USER_TYPE === 'ant' && 61 process.env.CLAUDE_CODE_MAX_CONTEXT_TOKENS 62 ) { 63 const override = parseInt(process.env.CLAUDE_CODE_MAX_CONTEXT_TOKENS, 10) 64 if (!isNaN(override) && override > 0) { 65 return override 66 } 67 } 68 69 // [1m] suffix — explicit client-side opt-in, respected over all detection 70 if (has1mContext(model)) { 71 return 1_000_000 72 } 73 74 const cap = getModelCapability(model) 75 if (cap?.max_input_tokens && cap.max_input_tokens >= 100_000) { 76 if ( 77 cap.max_input_tokens > MODEL_CONTEXT_WINDOW_DEFAULT && 78 is1mContextDisabled() 79 ) { 80 return MODEL_CONTEXT_WINDOW_DEFAULT 81 } 82 return cap.max_input_tokens 83 } 84 85 if (betas?.includes(CONTEXT_1M_BETA_HEADER) && modelSupports1M(model)) { 86 return 1_000_000 87 } 88 if (getSonnet1mExpTreatmentEnabled(model)) { 89 return 1_000_000 90 } 91 if (process.env.USER_TYPE === 'ant') { 92 const antModel = resolveAntModel(model) 93 if (antModel?.contextWindow) { 94 return antModel.contextWindow 95 } 96 } 97 return MODEL_CONTEXT_WINDOW_DEFAULT 98} 99 100export function getSonnet1mExpTreatmentEnabled(model: string): boolean { 101 if (is1mContextDisabled()) { 102 return false 103 } 104 // Only applies to sonnet 4.6 without an explicit [1m] suffix 105 if (has1mContext(model)) { 106 return false 107 } 108 if (!getCanonicalName(model).includes('sonnet-4-6')) { 109 return false 110 } 111 return getGlobalConfig().clientDataCache?.['coral_reef_sonnet'] === 'true' 112} 113 114/** 115 * Calculate context window usage percentage from token usage data. 116 * Returns used and remaining percentages, or null values if no usage data. 117 */ 118export function calculateContextPercentages( 119 currentUsage: { 120 input_tokens: number 121 cache_creation_input_tokens: number 122 cache_read_input_tokens: number 123 } | null, 124 contextWindowSize: number, 125): { used: number | null; remaining: number | null } { 126 if (!currentUsage) { 127 return { used: null, remaining: null } 128 } 129 130 const totalInputTokens = 131 currentUsage.input_tokens + 132 currentUsage.cache_creation_input_tokens + 133 currentUsage.cache_read_input_tokens 134 135 const usedPercentage = Math.round( 136 (totalInputTokens / contextWindowSize) * 100, 137 ) 138 const clampedUsed = Math.min(100, Math.max(0, usedPercentage)) 139 140 return { 141 used: clampedUsed, 142 remaining: 100 - clampedUsed, 143 } 144} 145 146/** 147 * Returns the model's default and upper limit for max output tokens. 148 */ 149export function getModelMaxOutputTokens(model: string): { 150 default: number 151 upperLimit: number 152} { 153 let defaultTokens: number 154 let upperLimit: number 155 156 if (process.env.USER_TYPE === 'ant') { 157 const antModel = resolveAntModel(model.toLowerCase()) 158 if (antModel) { 159 defaultTokens = antModel.defaultMaxTokens ?? MAX_OUTPUT_TOKENS_DEFAULT 160 upperLimit = antModel.upperMaxTokensLimit ?? MAX_OUTPUT_TOKENS_UPPER_LIMIT 161 return { default: defaultTokens, upperLimit } 162 } 163 } 164 165 const m = getCanonicalName(model) 166 167 if (m.includes('opus-4-6')) { 168 defaultTokens = 64_000 169 upperLimit = 128_000 170 } else if (m.includes('sonnet-4-6')) { 171 defaultTokens = 32_000 172 upperLimit = 128_000 173 } else if ( 174 m.includes('opus-4-5') || 175 m.includes('sonnet-4') || 176 m.includes('haiku-4') 177 ) { 178 defaultTokens = 32_000 179 upperLimit = 64_000 180 } else if (m.includes('opus-4-1') || m.includes('opus-4')) { 181 defaultTokens = 32_000 182 upperLimit = 32_000 183 } else if (m.includes('claude-3-opus')) { 184 defaultTokens = 4_096 185 upperLimit = 4_096 186 } else if (m.includes('claude-3-sonnet')) { 187 defaultTokens = 8_192 188 upperLimit = 8_192 189 } else if (m.includes('claude-3-haiku')) { 190 defaultTokens = 4_096 191 upperLimit = 4_096 192 } else if (m.includes('3-5-sonnet') || m.includes('3-5-haiku')) { 193 defaultTokens = 8_192 194 upperLimit = 8_192 195 } else if (m.includes('3-7-sonnet')) { 196 defaultTokens = 32_000 197 upperLimit = 64_000 198 } else { 199 defaultTokens = MAX_OUTPUT_TOKENS_DEFAULT 200 upperLimit = MAX_OUTPUT_TOKENS_UPPER_LIMIT 201 } 202 203 const cap = getModelCapability(model) 204 if (cap?.max_tokens && cap.max_tokens >= 4_096) { 205 upperLimit = cap.max_tokens 206 defaultTokens = Math.min(defaultTokens, upperLimit) 207 } 208 209 return { default: defaultTokens, upperLimit } 210} 211 212/** 213 * Returns the max thinking budget tokens for a given model. The max 214 * thinking tokens should be strictly less than the max output tokens. 215 * 216 * Deprecated since newer models use adaptive thinking rather than a 217 * strict thinking token budget. 218 */ 219export function getMaxThinkingTokensForModel(model: string): number { 220 return getModelMaxOutputTokens(model).upperLimit - 1 221}