utils/context.ts at main · oppi.li/claude-code

oppi.li / claude-code
fork atom
source dump of claude code
fork atom
claude-code / utils / context.ts
at main 221 lines 6.9 kB view raw
wrap content
oppi.li dump from zip 9d ago
63aada3f
  1// biome-ignore-all assist/source/organizeImports: ANT-ONLY import markers must not be reordered
  2import { CONTEXT_1M_BETA_HEADER } from '../constants/betas.js'
  3import { getGlobalConfig } from './config.js'
  4import { isEnvTruthy } from './envUtils.js'
  5import { getCanonicalName } from './model/model.js'
  6import { getModelCapability } from './model/modelCapabilities.js'
  7
  8// Model context window size (200k tokens for all models right now)
  9export const MODEL_CONTEXT_WINDOW_DEFAULT = 200_000
 10
 11// Maximum output tokens for compact operations
 12export const COMPACT_MAX_OUTPUT_TOKENS = 20_000
 13
 14// Default max output tokens
 15const MAX_OUTPUT_TOKENS_DEFAULT = 32_000
 16const MAX_OUTPUT_TOKENS_UPPER_LIMIT = 64_000
 17
 18// Capped default for slot-reservation optimization. BQ p99 output = 4,911
 19// tokens, so 32k/64k defaults over-reserve 8-16× slot capacity. With the cap
 20// enabled, <1% of requests hit the limit; those get one clean retry at 64k
 21// (see query.ts max_output_tokens_escalate). Cap is applied in
 22// claude.ts:getMaxOutputTokensForModel to avoid the growthbook→betas→context
 23// import cycle.
 24export const CAPPED_DEFAULT_MAX_TOKENS = 8_000
 25export const ESCALATED_MAX_TOKENS = 64_000
 26
 27/**
 28 * Check if 1M context is disabled via environment variable.
 29 * Used by C4E admins to disable 1M context for HIPAA compliance.
 30 */
 31export function is1mContextDisabled(): boolean {
 32  return isEnvTruthy(process.env.CLAUDE_CODE_DISABLE_1M_CONTEXT)
 33}
 34
 35export function has1mContext(model: string): boolean {
 36  if (is1mContextDisabled()) {
 37    return false
 38  }
 39  return /\[1m\]/i.test(model)
 40}
 41
 42// @[MODEL LAUNCH]: Update this pattern if the new model supports 1M context
 43export function modelSupports1M(model: string): boolean {
 44  if (is1mContextDisabled()) {
 45    return false
 46  }
 47  const canonical = getCanonicalName(model)
 48  return canonical.includes('claude-sonnet-4') || canonical.includes('opus-4-6')
 49}
 50
 51export function getContextWindowForModel(
 52  model: string,
 53  betas?: string[],
 54): number {
 55  // Allow override via environment variable (ant-only)
 56  // This takes precedence over all other context window resolution, including 1M detection,
 57  // so users can cap the effective context window for local decisions (auto-compact, etc.)
 58  // while still using a 1M-capable endpoint.
 59  if (
 60    process.env.USER_TYPE === 'ant' &&
 61    process.env.CLAUDE_CODE_MAX_CONTEXT_TOKENS
 62  ) {
 63    const override = parseInt(process.env.CLAUDE_CODE_MAX_CONTEXT_TOKENS, 10)
 64    if (!isNaN(override) && override > 0) {
 65      return override
 66    }
 67  }
 68
 69  // [1m] suffix — explicit client-side opt-in, respected over all detection
 70  if (has1mContext(model)) {
 71    return 1_000_000
 72  }
 73
 74  const cap = getModelCapability(model)
 75  if (cap?.max_input_tokens && cap.max_input_tokens >= 100_000) {
 76    if (
 77      cap.max_input_tokens > MODEL_CONTEXT_WINDOW_DEFAULT &&
 78      is1mContextDisabled()
 79    ) {
 80      return MODEL_CONTEXT_WINDOW_DEFAULT
 81    }
 82    return cap.max_input_tokens
 83  }
 84
 85  if (betas?.includes(CONTEXT_1M_BETA_HEADER) && modelSupports1M(model)) {
 86    return 1_000_000
 87  }
 88  if (getSonnet1mExpTreatmentEnabled(model)) {
 89    return 1_000_000
 90  }
 91  if (process.env.USER_TYPE === 'ant') {
 92    const antModel = resolveAntModel(model)
 93    if (antModel?.contextWindow) {
 94      return antModel.contextWindow
 95    }
 96  }
 97  return MODEL_CONTEXT_WINDOW_DEFAULT
 98}
 99
100export function getSonnet1mExpTreatmentEnabled(model: string): boolean {
101  if (is1mContextDisabled()) {
102    return false
103  }
104  // Only applies to sonnet 4.6 without an explicit [1m] suffix
105  if (has1mContext(model)) {
106    return false
107  }
108  if (!getCanonicalName(model).includes('sonnet-4-6')) {
109    return false
110  }
111  return getGlobalConfig().clientDataCache?.['coral_reef_sonnet'] === 'true'
112}
113
114/**
115 * Calculate context window usage percentage from token usage data.
116 * Returns used and remaining percentages, or null values if no usage data.
117 */
118export function calculateContextPercentages(
119  currentUsage: {
120    input_tokens: number
121    cache_creation_input_tokens: number
122    cache_read_input_tokens: number
123  } | null,
124  contextWindowSize: number,
125): { used: number | null; remaining: number | null } {
126  if (!currentUsage) {
127    return { used: null, remaining: null }
128  }
129
130  const totalInputTokens =
131    currentUsage.input_tokens +
132    currentUsage.cache_creation_input_tokens +
133    currentUsage.cache_read_input_tokens
134
135  const usedPercentage = Math.round(
136    (totalInputTokens / contextWindowSize) * 100,
137  )
138  const clampedUsed = Math.min(100, Math.max(0, usedPercentage))
139
140  return {
141    used: clampedUsed,
142    remaining: 100 - clampedUsed,
143  }
144}
145
146/**
147 * Returns the model's default and upper limit for max output tokens.
148 */
149export function getModelMaxOutputTokens(model: string): {
150  default: number
151  upperLimit: number
152} {
153  let defaultTokens: number
154  let upperLimit: number
155
156  if (process.env.USER_TYPE === 'ant') {
157    const antModel = resolveAntModel(model.toLowerCase())
158    if (antModel) {
159      defaultTokens = antModel.defaultMaxTokens ?? MAX_OUTPUT_TOKENS_DEFAULT
160      upperLimit = antModel.upperMaxTokensLimit ?? MAX_OUTPUT_TOKENS_UPPER_LIMIT
161      return { default: defaultTokens, upperLimit }
162    }
163  }
164
165  const m = getCanonicalName(model)
166
167  if (m.includes('opus-4-6')) {
168    defaultTokens = 64_000
169    upperLimit = 128_000
170  } else if (m.includes('sonnet-4-6')) {
171    defaultTokens = 32_000
172    upperLimit = 128_000
173  } else if (
174    m.includes('opus-4-5') ||
175    m.includes('sonnet-4') ||
176    m.includes('haiku-4')
177  ) {
178    defaultTokens = 32_000
179    upperLimit = 64_000
180  } else if (m.includes('opus-4-1') || m.includes('opus-4')) {
181    defaultTokens = 32_000
182    upperLimit = 32_000
183  } else if (m.includes('claude-3-opus')) {
184    defaultTokens = 4_096
185    upperLimit = 4_096
186  } else if (m.includes('claude-3-sonnet')) {
187    defaultTokens = 8_192
188    upperLimit = 8_192
189  } else if (m.includes('claude-3-haiku')) {
190    defaultTokens = 4_096
191    upperLimit = 4_096
192  } else if (m.includes('3-5-sonnet') || m.includes('3-5-haiku')) {
193    defaultTokens = 8_192
194    upperLimit = 8_192
195  } else if (m.includes('3-7-sonnet')) {
196    defaultTokens = 32_000
197    upperLimit = 64_000
198  } else {
199    defaultTokens = MAX_OUTPUT_TOKENS_DEFAULT
200    upperLimit = MAX_OUTPUT_TOKENS_UPPER_LIMIT
201  }
202
203  const cap = getModelCapability(model)
204  if (cap?.max_tokens && cap.max_tokens >= 4_096) {
205    upperLimit = cap.max_tokens
206    defaultTokens = Math.min(defaultTokens, upperLimit)
207  }
208
209  return { default: defaultTokens, upperLimit }
210}
211
212/**
213 * Returns the max thinking budget tokens for a given model. The max
214 * thinking tokens should be strictly less than the max output tokens.
215 *
216 * Deprecated since newer models use adaptive thinking rather than a
217 * strict thinking token budget.
218 */
219export function getMaxThinkingTokensForModel(model: string): number {
220  return getModelMaxOutputTokens(model).upperLimit - 1
221}