import { APIError } from '@anthropic-ai/sdk' import type { MessageParam } from '@anthropic-ai/sdk/resources/index.mjs' import isEqual from 'lodash-es/isEqual.js' import { getIsNonInteractiveSession } from '../bootstrap/state.js' import { isClaudeAISubscriber } from '../utils/auth.js' import { getModelBetas } from '../utils/betas.js' import { getGlobalConfig, saveGlobalConfig } from '../utils/config.js' import { logError } from '../utils/log.js' import { getSmallFastModel } from '../utils/model/model.js' import { isEssentialTrafficOnly } from '../utils/privacyLevel.js' import type { AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS } from './analytics/index.js' import { logEvent } from './analytics/index.js' import { getAPIMetadata } from './api/claude.js' import { getAnthropicClient } from './api/client.js' import { processRateLimitHeaders, shouldProcessRateLimits, } from './rateLimitMocking.js' // Re-export message functions from centralized location export { getRateLimitErrorMessage, getRateLimitWarning, getUsingOverageText, } from './rateLimitMessages.js' type QuotaStatus = 'allowed' | 'allowed_warning' | 'rejected' type RateLimitType = | 'five_hour' | 'seven_day' | 'seven_day_opus' | 'seven_day_sonnet' | 'overage' export type { RateLimitType } type EarlyWarningThreshold = { utilization: number // 0-1 scale: trigger warning when usage >= this timePct: number // 0-1 scale: trigger warning when time elapsed <= this } type EarlyWarningConfig = { rateLimitType: RateLimitType claimAbbrev: '5h' | '7d' windowSeconds: number thresholds: EarlyWarningThreshold[] } // Early warning configurations in priority order (checked first to last) // Used as fallback when server doesn't send surpassed-threshold header // Warns users when they're consuming quota faster than the time window allows const EARLY_WARNING_CONFIGS: EarlyWarningConfig[] = [ { rateLimitType: 'five_hour', claimAbbrev: '5h', windowSeconds: 5 * 60 * 60, thresholds: [{ utilization: 0.9, timePct: 0.72 }], }, { rateLimitType: 'seven_day', claimAbbrev: '7d', windowSeconds: 7 * 24 * 60 * 60, thresholds: [ { utilization: 0.75, timePct: 0.6 }, { utilization: 0.5, timePct: 0.35 }, { utilization: 0.25, timePct: 0.15 }, ], }, ] // Maps claim abbreviations to rate limit types for header-based detection const EARLY_WARNING_CLAIM_MAP: Record = { '5h': 'five_hour', '7d': 'seven_day', overage: 'overage', } const RATE_LIMIT_DISPLAY_NAMES: Record = { five_hour: 'session limit', seven_day: 'weekly limit', seven_day_opus: 'Opus limit', seven_day_sonnet: 'Sonnet limit', overage: 'extra usage limit', } export function getRateLimitDisplayName(type: RateLimitType): string { return RATE_LIMIT_DISPLAY_NAMES[type] || type } /** * Calculate what fraction of a time window has elapsed. * Used for time-relative early warning fallback. * @param resetsAt - Unix epoch timestamp in seconds when the limit resets * @param windowSeconds - Duration of the window in seconds * @returns fraction (0-1) of the window that has elapsed */ function computeTimeProgress(resetsAt: number, windowSeconds: number): number { const nowSeconds = Date.now() / 1000 const windowStart = resetsAt - windowSeconds const elapsed = nowSeconds - windowStart return Math.max(0, Math.min(1, elapsed / windowSeconds)) } // Reason why overage is disabled/rejected // These values come from the API's unified limiter export type OverageDisabledReason = | 'overage_not_provisioned' // Overage is not provisioned for this org or seat tier | 'org_level_disabled' // Organization doesn't have overage enabled | 'org_level_disabled_until' // Organization overage temporarily disabled | 'out_of_credits' // Organization has insufficient credits | 'seat_tier_level_disabled' // Seat tier doesn't have overage enabled | 'member_level_disabled' // Account specifically has overage disabled | 'seat_tier_zero_credit_limit' // Seat tier has a zero credit limit | 'group_zero_credit_limit' // Resolved group limit has a zero credit limit | 'member_zero_credit_limit' // Account has a zero credit limit | 'org_service_level_disabled' // Org service specifically has overage disabled | 'org_service_zero_credit_limit' // Org service has a zero credit limit | 'no_limits_configured' // No overage limits configured for account | 'unknown' // Unknown reason, should not happen export type ClaudeAILimits = { status: QuotaStatus // unifiedRateLimitFallbackAvailable is currently used to warn users that set // their model to Opus whenever they are about to run out of quota. It does // not change the actual model that is used. unifiedRateLimitFallbackAvailable: boolean resetsAt?: number rateLimitType?: RateLimitType utilization?: number overageStatus?: QuotaStatus overageResetsAt?: number overageDisabledReason?: OverageDisabledReason isUsingOverage?: boolean surpassedThreshold?: number } // Exported for testing only export let currentLimits: ClaudeAILimits = { status: 'allowed', unifiedRateLimitFallbackAvailable: false, isUsingOverage: false, } /** * Raw per-window utilization from response headers, tracked on every API * response (unlike currentLimits.utilization which is only set when a warning * threshold fires). Exposed to statusline scripts via getRawUtilization(). */ type RawWindowUtilization = { utilization: number // 0-1 fraction resets_at: number // unix epoch seconds } type RawUtilization = { five_hour?: RawWindowUtilization seven_day?: RawWindowUtilization } let rawUtilization: RawUtilization = {} export function getRawUtilization(): RawUtilization { return rawUtilization } function extractRawUtilization(headers: globalThis.Headers): RawUtilization { const result: RawUtilization = {} for (const [key, abbrev] of [ ['five_hour', '5h'], ['seven_day', '7d'], ] as const) { const util = headers.get( `anthropic-ratelimit-unified-${abbrev}-utilization`, ) const reset = headers.get(`anthropic-ratelimit-unified-${abbrev}-reset`) if (util !== null && reset !== null) { result[key] = { utilization: Number(util), resets_at: Number(reset) } } } return result } type StatusChangeListener = (limits: ClaudeAILimits) => void export const statusListeners: Set = new Set() export function emitStatusChange(limits: ClaudeAILimits) { currentLimits = limits statusListeners.forEach(listener => listener(limits)) const hoursTillReset = Math.round( (limits.resetsAt ? limits.resetsAt - Date.now() / 1000 : 0) / (60 * 60), ) logEvent('tengu_claudeai_limits_status_changed', { status: limits.status as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, unifiedRateLimitFallbackAvailable: limits.unifiedRateLimitFallbackAvailable, hoursTillReset, }) } async function makeTestQuery() { const model = getSmallFastModel() const anthropic = await getAnthropicClient({ maxRetries: 0, model, source: 'quota_check', }) const messages: MessageParam[] = [{ role: 'user', content: 'quota' }] const betas = getModelBetas(model) // biome-ignore lint/plugin: quota check needs raw response access via asResponse() return anthropic.beta.messages .create({ model, max_tokens: 1, messages, metadata: getAPIMetadata(), ...(betas.length > 0 ? { betas } : {}), }) .asResponse() } export async function checkQuotaStatus(): Promise { // Skip network requests if nonessential traffic is disabled if (isEssentialTrafficOnly()) { return } // Check if we should process rate limits (real subscriber or mock testing) if (!shouldProcessRateLimits(isClaudeAISubscriber())) { return } // In non-interactive mode (-p), the real query follows immediately and // extractQuotaStatusFromHeaders() will update limits from its response // headers (claude.ts), so skip this pre-check API call. if (getIsNonInteractiveSession()) { return } try { // Make a minimal request to check quota const raw = await makeTestQuery() // Update limits based on the response extractQuotaStatusFromHeaders(raw.headers) } catch (error) { if (error instanceof APIError) { extractQuotaStatusFromError(error) } } } /** * Check if early warning should be triggered based on surpassed-threshold header. * Returns ClaudeAILimits if a threshold was surpassed, null otherwise. */ function getHeaderBasedEarlyWarning( headers: globalThis.Headers, unifiedRateLimitFallbackAvailable: boolean, ): ClaudeAILimits | null { // Check each claim type for surpassed threshold header for (const [claimAbbrev, rateLimitType] of Object.entries( EARLY_WARNING_CLAIM_MAP, )) { const surpassedThreshold = headers.get( `anthropic-ratelimit-unified-${claimAbbrev}-surpassed-threshold`, ) // If threshold header is present, user has crossed a warning threshold if (surpassedThreshold !== null) { const utilizationHeader = headers.get( `anthropic-ratelimit-unified-${claimAbbrev}-utilization`, ) const resetHeader = headers.get( `anthropic-ratelimit-unified-${claimAbbrev}-reset`, ) const utilization = utilizationHeader ? Number(utilizationHeader) : undefined const resetsAt = resetHeader ? Number(resetHeader) : undefined return { status: 'allowed_warning', resetsAt, rateLimitType: rateLimitType as RateLimitType, utilization, unifiedRateLimitFallbackAvailable, isUsingOverage: false, surpassedThreshold: Number(surpassedThreshold), } } } return null } /** * Check if time-relative early warning should be triggered for a rate limit type. * Fallback when server doesn't send surpassed-threshold header. * Returns ClaudeAILimits if thresholds are exceeded, null otherwise. */ function getTimeRelativeEarlyWarning( headers: globalThis.Headers, config: EarlyWarningConfig, unifiedRateLimitFallbackAvailable: boolean, ): ClaudeAILimits | null { const { rateLimitType, claimAbbrev, windowSeconds, thresholds } = config const utilizationHeader = headers.get( `anthropic-ratelimit-unified-${claimAbbrev}-utilization`, ) const resetHeader = headers.get( `anthropic-ratelimit-unified-${claimAbbrev}-reset`, ) if (utilizationHeader === null || resetHeader === null) { return null } const utilization = Number(utilizationHeader) const resetsAt = Number(resetHeader) const timeProgress = computeTimeProgress(resetsAt, windowSeconds) // Check if any threshold is exceeded: high usage early in the window const shouldWarn = thresholds.some( t => utilization >= t.utilization && timeProgress <= t.timePct, ) if (!shouldWarn) { return null } return { status: 'allowed_warning', resetsAt, rateLimitType, utilization, unifiedRateLimitFallbackAvailable, isUsingOverage: false, } } /** * Get early warning limits using header-based detection with time-relative fallback. * 1. First checks for surpassed-threshold header (new server-side approach) * 2. Falls back to time-relative thresholds (client-side calculation) */ function getEarlyWarningFromHeaders( headers: globalThis.Headers, unifiedRateLimitFallbackAvailable: boolean, ): ClaudeAILimits | null { // Try header-based detection first (preferred when API sends the header) const headerBasedWarning = getHeaderBasedEarlyWarning( headers, unifiedRateLimitFallbackAvailable, ) if (headerBasedWarning) { return headerBasedWarning } // Fallback: Use time-relative thresholds (client-side calculation) // This catches users burning quota faster than sustainable for (const config of EARLY_WARNING_CONFIGS) { const timeRelativeWarning = getTimeRelativeEarlyWarning( headers, config, unifiedRateLimitFallbackAvailable, ) if (timeRelativeWarning) { return timeRelativeWarning } } return null } function computeNewLimitsFromHeaders( headers: globalThis.Headers, ): ClaudeAILimits { const status = (headers.get('anthropic-ratelimit-unified-status') as QuotaStatus) || 'allowed' const resetsAtHeader = headers.get('anthropic-ratelimit-unified-reset') const resetsAt = resetsAtHeader ? Number(resetsAtHeader) : undefined const unifiedRateLimitFallbackAvailable = headers.get('anthropic-ratelimit-unified-fallback') === 'available' // Headers for rate limit type and overage support const rateLimitType = headers.get( 'anthropic-ratelimit-unified-representative-claim', ) as RateLimitType | null const overageStatus = headers.get( 'anthropic-ratelimit-unified-overage-status', ) as QuotaStatus | null const overageResetsAtHeader = headers.get( 'anthropic-ratelimit-unified-overage-reset', ) const overageResetsAt = overageResetsAtHeader ? Number(overageResetsAtHeader) : undefined // Reason why overage is disabled (spending cap or wallet empty) const overageDisabledReason = headers.get( 'anthropic-ratelimit-unified-overage-disabled-reason', ) as OverageDisabledReason | null // Determine if we're using overage (standard limits rejected but overage allowed) const isUsingOverage = status === 'rejected' && (overageStatus === 'allowed' || overageStatus === 'allowed_warning') // Check for early warning based on surpassed-threshold header // If status is allowed/allowed_warning and we find a surpassed threshold, show warning let finalStatus: QuotaStatus = status if (status === 'allowed' || status === 'allowed_warning') { const earlyWarning = getEarlyWarningFromHeaders( headers, unifiedRateLimitFallbackAvailable, ) if (earlyWarning) { return earlyWarning } // No early warning threshold surpassed finalStatus = 'allowed' } return { status: finalStatus, resetsAt, unifiedRateLimitFallbackAvailable, ...(rateLimitType && { rateLimitType }), ...(overageStatus && { overageStatus }), ...(overageResetsAt && { overageResetsAt }), ...(overageDisabledReason && { overageDisabledReason }), isUsingOverage, } } /** * Cache the extra usage disabled reason from API headers. */ function cacheExtraUsageDisabledReason(headers: globalThis.Headers): void { // A null reason means extra usage is enabled (no disabled reason header) const reason = headers.get('anthropic-ratelimit-unified-overage-disabled-reason') ?? null const cached = getGlobalConfig().cachedExtraUsageDisabledReason if (cached !== reason) { saveGlobalConfig(current => ({ ...current, cachedExtraUsageDisabledReason: reason, })) } } export function extractQuotaStatusFromHeaders( headers: globalThis.Headers, ): void { // Check if we need to process rate limits const isSubscriber = isClaudeAISubscriber() if (!shouldProcessRateLimits(isSubscriber)) { // If we have any rate limit state, clear it rawUtilization = {} if (currentLimits.status !== 'allowed' || currentLimits.resetsAt) { const defaultLimits: ClaudeAILimits = { status: 'allowed', unifiedRateLimitFallbackAvailable: false, isUsingOverage: false, } emitStatusChange(defaultLimits) } return } // Process headers (applies mocks from /mock-limits command if active) const headersToUse = processRateLimitHeaders(headers) rawUtilization = extractRawUtilization(headersToUse) const newLimits = computeNewLimitsFromHeaders(headersToUse) // Cache extra usage status (persists across sessions) cacheExtraUsageDisabledReason(headersToUse) if (!isEqual(currentLimits, newLimits)) { emitStatusChange(newLimits) } } export function extractQuotaStatusFromError(error: APIError): void { if ( !shouldProcessRateLimits(isClaudeAISubscriber()) || error.status !== 429 ) { return } try { let newLimits = { ...currentLimits } if (error.headers) { // Process headers (applies mocks from /mock-limits command if active) const headersToUse = processRateLimitHeaders(error.headers) rawUtilization = extractRawUtilization(headersToUse) newLimits = computeNewLimitsFromHeaders(headersToUse) // Cache extra usage status (persists across sessions) cacheExtraUsageDisabledReason(headersToUse) } // For errors, always set status to rejected even if headers are not present. newLimits.status = 'rejected' if (!isEqual(currentLimits, newLimits)) { emitStatusChange(newLimits) } } catch (e) { logError(e as Error) } }