source dump of claude code
at main 515 lines 17 kB view raw
1import { APIError } from '@anthropic-ai/sdk' 2import type { MessageParam } from '@anthropic-ai/sdk/resources/index.mjs' 3import isEqual from 'lodash-es/isEqual.js' 4import { getIsNonInteractiveSession } from '../bootstrap/state.js' 5import { isClaudeAISubscriber } from '../utils/auth.js' 6import { getModelBetas } from '../utils/betas.js' 7import { getGlobalConfig, saveGlobalConfig } from '../utils/config.js' 8import { logError } from '../utils/log.js' 9import { getSmallFastModel } from '../utils/model/model.js' 10import { isEssentialTrafficOnly } from '../utils/privacyLevel.js' 11import type { AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS } from './analytics/index.js' 12import { logEvent } from './analytics/index.js' 13import { getAPIMetadata } from './api/claude.js' 14import { getAnthropicClient } from './api/client.js' 15import { 16 processRateLimitHeaders, 17 shouldProcessRateLimits, 18} from './rateLimitMocking.js' 19 20// Re-export message functions from centralized location 21export { 22 getRateLimitErrorMessage, 23 getRateLimitWarning, 24 getUsingOverageText, 25} from './rateLimitMessages.js' 26 27type QuotaStatus = 'allowed' | 'allowed_warning' | 'rejected' 28 29type RateLimitType = 30 | 'five_hour' 31 | 'seven_day' 32 | 'seven_day_opus' 33 | 'seven_day_sonnet' 34 | 'overage' 35 36export type { RateLimitType } 37 38type EarlyWarningThreshold = { 39 utilization: number // 0-1 scale: trigger warning when usage >= this 40 timePct: number // 0-1 scale: trigger warning when time elapsed <= this 41} 42 43type EarlyWarningConfig = { 44 rateLimitType: RateLimitType 45 claimAbbrev: '5h' | '7d' 46 windowSeconds: number 47 thresholds: EarlyWarningThreshold[] 48} 49 50// Early warning configurations in priority order (checked first to last) 51// Used as fallback when server doesn't send surpassed-threshold header 52// Warns users when they're consuming quota faster than the time window allows 53const EARLY_WARNING_CONFIGS: EarlyWarningConfig[] = [ 54 { 55 rateLimitType: 'five_hour', 56 claimAbbrev: '5h', 57 windowSeconds: 5 * 60 * 60, 58 thresholds: [{ utilization: 0.9, timePct: 0.72 }], 59 }, 60 { 61 rateLimitType: 'seven_day', 62 claimAbbrev: '7d', 63 windowSeconds: 7 * 24 * 60 * 60, 64 thresholds: [ 65 { utilization: 0.75, timePct: 0.6 }, 66 { utilization: 0.5, timePct: 0.35 }, 67 { utilization: 0.25, timePct: 0.15 }, 68 ], 69 }, 70] 71 72// Maps claim abbreviations to rate limit types for header-based detection 73const EARLY_WARNING_CLAIM_MAP: Record<string, RateLimitType> = { 74 '5h': 'five_hour', 75 '7d': 'seven_day', 76 overage: 'overage', 77} 78 79const RATE_LIMIT_DISPLAY_NAMES: Record<RateLimitType, string> = { 80 five_hour: 'session limit', 81 seven_day: 'weekly limit', 82 seven_day_opus: 'Opus limit', 83 seven_day_sonnet: 'Sonnet limit', 84 overage: 'extra usage limit', 85} 86 87export function getRateLimitDisplayName(type: RateLimitType): string { 88 return RATE_LIMIT_DISPLAY_NAMES[type] || type 89} 90 91/** 92 * Calculate what fraction of a time window has elapsed. 93 * Used for time-relative early warning fallback. 94 * @param resetsAt - Unix epoch timestamp in seconds when the limit resets 95 * @param windowSeconds - Duration of the window in seconds 96 * @returns fraction (0-1) of the window that has elapsed 97 */ 98function computeTimeProgress(resetsAt: number, windowSeconds: number): number { 99 const nowSeconds = Date.now() / 1000 100 const windowStart = resetsAt - windowSeconds 101 const elapsed = nowSeconds - windowStart 102 return Math.max(0, Math.min(1, elapsed / windowSeconds)) 103} 104 105// Reason why overage is disabled/rejected 106// These values come from the API's unified limiter 107export type OverageDisabledReason = 108 | 'overage_not_provisioned' // Overage is not provisioned for this org or seat tier 109 | 'org_level_disabled' // Organization doesn't have overage enabled 110 | 'org_level_disabled_until' // Organization overage temporarily disabled 111 | 'out_of_credits' // Organization has insufficient credits 112 | 'seat_tier_level_disabled' // Seat tier doesn't have overage enabled 113 | 'member_level_disabled' // Account specifically has overage disabled 114 | 'seat_tier_zero_credit_limit' // Seat tier has a zero credit limit 115 | 'group_zero_credit_limit' // Resolved group limit has a zero credit limit 116 | 'member_zero_credit_limit' // Account has a zero credit limit 117 | 'org_service_level_disabled' // Org service specifically has overage disabled 118 | 'org_service_zero_credit_limit' // Org service has a zero credit limit 119 | 'no_limits_configured' // No overage limits configured for account 120 | 'unknown' // Unknown reason, should not happen 121 122export type ClaudeAILimits = { 123 status: QuotaStatus 124 // unifiedRateLimitFallbackAvailable is currently used to warn users that set 125 // their model to Opus whenever they are about to run out of quota. It does 126 // not change the actual model that is used. 127 unifiedRateLimitFallbackAvailable: boolean 128 resetsAt?: number 129 rateLimitType?: RateLimitType 130 utilization?: number 131 overageStatus?: QuotaStatus 132 overageResetsAt?: number 133 overageDisabledReason?: OverageDisabledReason 134 isUsingOverage?: boolean 135 surpassedThreshold?: number 136} 137 138// Exported for testing only 139export let currentLimits: ClaudeAILimits = { 140 status: 'allowed', 141 unifiedRateLimitFallbackAvailable: false, 142 isUsingOverage: false, 143} 144 145/** 146 * Raw per-window utilization from response headers, tracked on every API 147 * response (unlike currentLimits.utilization which is only set when a warning 148 * threshold fires). Exposed to statusline scripts via getRawUtilization(). 149 */ 150type RawWindowUtilization = { 151 utilization: number // 0-1 fraction 152 resets_at: number // unix epoch seconds 153} 154type RawUtilization = { 155 five_hour?: RawWindowUtilization 156 seven_day?: RawWindowUtilization 157} 158let rawUtilization: RawUtilization = {} 159 160export function getRawUtilization(): RawUtilization { 161 return rawUtilization 162} 163 164function extractRawUtilization(headers: globalThis.Headers): RawUtilization { 165 const result: RawUtilization = {} 166 for (const [key, abbrev] of [ 167 ['five_hour', '5h'], 168 ['seven_day', '7d'], 169 ] as const) { 170 const util = headers.get( 171 `anthropic-ratelimit-unified-${abbrev}-utilization`, 172 ) 173 const reset = headers.get(`anthropic-ratelimit-unified-${abbrev}-reset`) 174 if (util !== null && reset !== null) { 175 result[key] = { utilization: Number(util), resets_at: Number(reset) } 176 } 177 } 178 return result 179} 180 181type StatusChangeListener = (limits: ClaudeAILimits) => void 182export const statusListeners: Set<StatusChangeListener> = new Set() 183 184export function emitStatusChange(limits: ClaudeAILimits) { 185 currentLimits = limits 186 statusListeners.forEach(listener => listener(limits)) 187 const hoursTillReset = Math.round( 188 (limits.resetsAt ? limits.resetsAt - Date.now() / 1000 : 0) / (60 * 60), 189 ) 190 191 logEvent('tengu_claudeai_limits_status_changed', { 192 status: 193 limits.status as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, 194 unifiedRateLimitFallbackAvailable: limits.unifiedRateLimitFallbackAvailable, 195 hoursTillReset, 196 }) 197} 198 199async function makeTestQuery() { 200 const model = getSmallFastModel() 201 const anthropic = await getAnthropicClient({ 202 maxRetries: 0, 203 model, 204 source: 'quota_check', 205 }) 206 const messages: MessageParam[] = [{ role: 'user', content: 'quota' }] 207 const betas = getModelBetas(model) 208 // biome-ignore lint/plugin: quota check needs raw response access via asResponse() 209 return anthropic.beta.messages 210 .create({ 211 model, 212 max_tokens: 1, 213 messages, 214 metadata: getAPIMetadata(), 215 ...(betas.length > 0 ? { betas } : {}), 216 }) 217 .asResponse() 218} 219 220export async function checkQuotaStatus(): Promise<void> { 221 // Skip network requests if nonessential traffic is disabled 222 if (isEssentialTrafficOnly()) { 223 return 224 } 225 226 // Check if we should process rate limits (real subscriber or mock testing) 227 if (!shouldProcessRateLimits(isClaudeAISubscriber())) { 228 return 229 } 230 231 // In non-interactive mode (-p), the real query follows immediately and 232 // extractQuotaStatusFromHeaders() will update limits from its response 233 // headers (claude.ts), so skip this pre-check API call. 234 if (getIsNonInteractiveSession()) { 235 return 236 } 237 238 try { 239 // Make a minimal request to check quota 240 const raw = await makeTestQuery() 241 242 // Update limits based on the response 243 extractQuotaStatusFromHeaders(raw.headers) 244 } catch (error) { 245 if (error instanceof APIError) { 246 extractQuotaStatusFromError(error) 247 } 248 } 249} 250 251/** 252 * Check if early warning should be triggered based on surpassed-threshold header. 253 * Returns ClaudeAILimits if a threshold was surpassed, null otherwise. 254 */ 255function getHeaderBasedEarlyWarning( 256 headers: globalThis.Headers, 257 unifiedRateLimitFallbackAvailable: boolean, 258): ClaudeAILimits | null { 259 // Check each claim type for surpassed threshold header 260 for (const [claimAbbrev, rateLimitType] of Object.entries( 261 EARLY_WARNING_CLAIM_MAP, 262 )) { 263 const surpassedThreshold = headers.get( 264 `anthropic-ratelimit-unified-${claimAbbrev}-surpassed-threshold`, 265 ) 266 267 // If threshold header is present, user has crossed a warning threshold 268 if (surpassedThreshold !== null) { 269 const utilizationHeader = headers.get( 270 `anthropic-ratelimit-unified-${claimAbbrev}-utilization`, 271 ) 272 const resetHeader = headers.get( 273 `anthropic-ratelimit-unified-${claimAbbrev}-reset`, 274 ) 275 276 const utilization = utilizationHeader 277 ? Number(utilizationHeader) 278 : undefined 279 const resetsAt = resetHeader ? Number(resetHeader) : undefined 280 281 return { 282 status: 'allowed_warning', 283 resetsAt, 284 rateLimitType: rateLimitType as RateLimitType, 285 utilization, 286 unifiedRateLimitFallbackAvailable, 287 isUsingOverage: false, 288 surpassedThreshold: Number(surpassedThreshold), 289 } 290 } 291 } 292 293 return null 294} 295 296/** 297 * Check if time-relative early warning should be triggered for a rate limit type. 298 * Fallback when server doesn't send surpassed-threshold header. 299 * Returns ClaudeAILimits if thresholds are exceeded, null otherwise. 300 */ 301function getTimeRelativeEarlyWarning( 302 headers: globalThis.Headers, 303 config: EarlyWarningConfig, 304 unifiedRateLimitFallbackAvailable: boolean, 305): ClaudeAILimits | null { 306 const { rateLimitType, claimAbbrev, windowSeconds, thresholds } = config 307 308 const utilizationHeader = headers.get( 309 `anthropic-ratelimit-unified-${claimAbbrev}-utilization`, 310 ) 311 const resetHeader = headers.get( 312 `anthropic-ratelimit-unified-${claimAbbrev}-reset`, 313 ) 314 315 if (utilizationHeader === null || resetHeader === null) { 316 return null 317 } 318 319 const utilization = Number(utilizationHeader) 320 const resetsAt = Number(resetHeader) 321 const timeProgress = computeTimeProgress(resetsAt, windowSeconds) 322 323 // Check if any threshold is exceeded: high usage early in the window 324 const shouldWarn = thresholds.some( 325 t => utilization >= t.utilization && timeProgress <= t.timePct, 326 ) 327 328 if (!shouldWarn) { 329 return null 330 } 331 332 return { 333 status: 'allowed_warning', 334 resetsAt, 335 rateLimitType, 336 utilization, 337 unifiedRateLimitFallbackAvailable, 338 isUsingOverage: false, 339 } 340} 341 342/** 343 * Get early warning limits using header-based detection with time-relative fallback. 344 * 1. First checks for surpassed-threshold header (new server-side approach) 345 * 2. Falls back to time-relative thresholds (client-side calculation) 346 */ 347function getEarlyWarningFromHeaders( 348 headers: globalThis.Headers, 349 unifiedRateLimitFallbackAvailable: boolean, 350): ClaudeAILimits | null { 351 // Try header-based detection first (preferred when API sends the header) 352 const headerBasedWarning = getHeaderBasedEarlyWarning( 353 headers, 354 unifiedRateLimitFallbackAvailable, 355 ) 356 if (headerBasedWarning) { 357 return headerBasedWarning 358 } 359 360 // Fallback: Use time-relative thresholds (client-side calculation) 361 // This catches users burning quota faster than sustainable 362 for (const config of EARLY_WARNING_CONFIGS) { 363 const timeRelativeWarning = getTimeRelativeEarlyWarning( 364 headers, 365 config, 366 unifiedRateLimitFallbackAvailable, 367 ) 368 if (timeRelativeWarning) { 369 return timeRelativeWarning 370 } 371 } 372 373 return null 374} 375 376function computeNewLimitsFromHeaders( 377 headers: globalThis.Headers, 378): ClaudeAILimits { 379 const status = 380 (headers.get('anthropic-ratelimit-unified-status') as QuotaStatus) || 381 'allowed' 382 const resetsAtHeader = headers.get('anthropic-ratelimit-unified-reset') 383 const resetsAt = resetsAtHeader ? Number(resetsAtHeader) : undefined 384 const unifiedRateLimitFallbackAvailable = 385 headers.get('anthropic-ratelimit-unified-fallback') === 'available' 386 387 // Headers for rate limit type and overage support 388 const rateLimitType = headers.get( 389 'anthropic-ratelimit-unified-representative-claim', 390 ) as RateLimitType | null 391 const overageStatus = headers.get( 392 'anthropic-ratelimit-unified-overage-status', 393 ) as QuotaStatus | null 394 const overageResetsAtHeader = headers.get( 395 'anthropic-ratelimit-unified-overage-reset', 396 ) 397 const overageResetsAt = overageResetsAtHeader 398 ? Number(overageResetsAtHeader) 399 : undefined 400 401 // Reason why overage is disabled (spending cap or wallet empty) 402 const overageDisabledReason = headers.get( 403 'anthropic-ratelimit-unified-overage-disabled-reason', 404 ) as OverageDisabledReason | null 405 406 // Determine if we're using overage (standard limits rejected but overage allowed) 407 const isUsingOverage = 408 status === 'rejected' && 409 (overageStatus === 'allowed' || overageStatus === 'allowed_warning') 410 411 // Check for early warning based on surpassed-threshold header 412 // If status is allowed/allowed_warning and we find a surpassed threshold, show warning 413 let finalStatus: QuotaStatus = status 414 if (status === 'allowed' || status === 'allowed_warning') { 415 const earlyWarning = getEarlyWarningFromHeaders( 416 headers, 417 unifiedRateLimitFallbackAvailable, 418 ) 419 if (earlyWarning) { 420 return earlyWarning 421 } 422 // No early warning threshold surpassed 423 finalStatus = 'allowed' 424 } 425 426 return { 427 status: finalStatus, 428 resetsAt, 429 unifiedRateLimitFallbackAvailable, 430 ...(rateLimitType && { rateLimitType }), 431 ...(overageStatus && { overageStatus }), 432 ...(overageResetsAt && { overageResetsAt }), 433 ...(overageDisabledReason && { overageDisabledReason }), 434 isUsingOverage, 435 } 436} 437 438/** 439 * Cache the extra usage disabled reason from API headers. 440 */ 441function cacheExtraUsageDisabledReason(headers: globalThis.Headers): void { 442 // A null reason means extra usage is enabled (no disabled reason header) 443 const reason = 444 headers.get('anthropic-ratelimit-unified-overage-disabled-reason') ?? null 445 const cached = getGlobalConfig().cachedExtraUsageDisabledReason 446 if (cached !== reason) { 447 saveGlobalConfig(current => ({ 448 ...current, 449 cachedExtraUsageDisabledReason: reason, 450 })) 451 } 452} 453 454export function extractQuotaStatusFromHeaders( 455 headers: globalThis.Headers, 456): void { 457 // Check if we need to process rate limits 458 const isSubscriber = isClaudeAISubscriber() 459 460 if (!shouldProcessRateLimits(isSubscriber)) { 461 // If we have any rate limit state, clear it 462 rawUtilization = {} 463 if (currentLimits.status !== 'allowed' || currentLimits.resetsAt) { 464 const defaultLimits: ClaudeAILimits = { 465 status: 'allowed', 466 unifiedRateLimitFallbackAvailable: false, 467 isUsingOverage: false, 468 } 469 emitStatusChange(defaultLimits) 470 } 471 return 472 } 473 474 // Process headers (applies mocks from /mock-limits command if active) 475 const headersToUse = processRateLimitHeaders(headers) 476 rawUtilization = extractRawUtilization(headersToUse) 477 const newLimits = computeNewLimitsFromHeaders(headersToUse) 478 479 // Cache extra usage status (persists across sessions) 480 cacheExtraUsageDisabledReason(headersToUse) 481 482 if (!isEqual(currentLimits, newLimits)) { 483 emitStatusChange(newLimits) 484 } 485} 486 487export function extractQuotaStatusFromError(error: APIError): void { 488 if ( 489 !shouldProcessRateLimits(isClaudeAISubscriber()) || 490 error.status !== 429 491 ) { 492 return 493 } 494 495 try { 496 let newLimits = { ...currentLimits } 497 if (error.headers) { 498 // Process headers (applies mocks from /mock-limits command if active) 499 const headersToUse = processRateLimitHeaders(error.headers) 500 rawUtilization = extractRawUtilization(headersToUse) 501 newLimits = computeNewLimitsFromHeaders(headersToUse) 502 503 // Cache extra usage status (persists across sessions) 504 cacheExtraUsageDisabledReason(headersToUse) 505 } 506 // For errors, always set status to rejected even if headers are not present. 507 newLimits.status = 'rejected' 508 509 if (!isEqual(currentLimits, newLimits)) { 510 emitStatusChange(newLimits) 511 } 512 } catch (e) { 513 logError(e as Error) 514 } 515}