services/claudeAiLimits.ts at main · nonbinary.computer/claude-code

nonbinary.computer / claude-code
forked from oppi.li/claude-code
fork atom
source dump of claude code
fork atom
claude-code / services / claudeAiLimits.ts
at main 515 lines 17 kB view raw
wrap content
oppi.li dump from zip 11d ago
63aada3f
  1import { APIError } from '@anthropic-ai/sdk'
  2import type { MessageParam } from '@anthropic-ai/sdk/resources/index.mjs'
  3import isEqual from 'lodash-es/isEqual.js'
  4import { getIsNonInteractiveSession } from '../bootstrap/state.js'
  5import { isClaudeAISubscriber } from '../utils/auth.js'
  6import { getModelBetas } from '../utils/betas.js'
  7import { getGlobalConfig, saveGlobalConfig } from '../utils/config.js'
  8import { logError } from '../utils/log.js'
  9import { getSmallFastModel } from '../utils/model/model.js'
 10import { isEssentialTrafficOnly } from '../utils/privacyLevel.js'
 11import type { AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS } from './analytics/index.js'
 12import { logEvent } from './analytics/index.js'
 13import { getAPIMetadata } from './api/claude.js'
 14import { getAnthropicClient } from './api/client.js'
 15import {
 16  processRateLimitHeaders,
 17  shouldProcessRateLimits,
 18} from './rateLimitMocking.js'
 19
 20// Re-export message functions from centralized location
 21export {
 22  getRateLimitErrorMessage,
 23  getRateLimitWarning,
 24  getUsingOverageText,
 25} from './rateLimitMessages.js'
 26
 27type QuotaStatus = 'allowed' | 'allowed_warning' | 'rejected'
 28
 29type RateLimitType =
 30  | 'five_hour'
 31  | 'seven_day'
 32  | 'seven_day_opus'
 33  | 'seven_day_sonnet'
 34  | 'overage'
 35
 36export type { RateLimitType }
 37
 38type EarlyWarningThreshold = {
 39  utilization: number // 0-1 scale: trigger warning when usage >= this
 40  timePct: number // 0-1 scale: trigger warning when time elapsed <= this
 41}
 42
 43type EarlyWarningConfig = {
 44  rateLimitType: RateLimitType
 45  claimAbbrev: '5h' | '7d'
 46  windowSeconds: number
 47  thresholds: EarlyWarningThreshold[]
 48}
 49
 50// Early warning configurations in priority order (checked first to last)
 51// Used as fallback when server doesn't send surpassed-threshold header
 52// Warns users when they're consuming quota faster than the time window allows
 53const EARLY_WARNING_CONFIGS: EarlyWarningConfig[] = [
 54  {
 55    rateLimitType: 'five_hour',
 56    claimAbbrev: '5h',
 57    windowSeconds: 5 * 60 * 60,
 58    thresholds: [{ utilization: 0.9, timePct: 0.72 }],
 59  },
 60  {
 61    rateLimitType: 'seven_day',
 62    claimAbbrev: '7d',
 63    windowSeconds: 7 * 24 * 60 * 60,
 64    thresholds: [
 65      { utilization: 0.75, timePct: 0.6 },
 66      { utilization: 0.5, timePct: 0.35 },
 67      { utilization: 0.25, timePct: 0.15 },
 68    ],
 69  },
 70]
 71
 72// Maps claim abbreviations to rate limit types for header-based detection
 73const EARLY_WARNING_CLAIM_MAP: Record<string, RateLimitType> = {
 74  '5h': 'five_hour',
 75  '7d': 'seven_day',
 76  overage: 'overage',
 77}
 78
 79const RATE_LIMIT_DISPLAY_NAMES: Record<RateLimitType, string> = {
 80  five_hour: 'session limit',
 81  seven_day: 'weekly limit',
 82  seven_day_opus: 'Opus limit',
 83  seven_day_sonnet: 'Sonnet limit',
 84  overage: 'extra usage limit',
 85}
 86
 87export function getRateLimitDisplayName(type: RateLimitType): string {
 88  return RATE_LIMIT_DISPLAY_NAMES[type] || type
 89}
 90
 91/**
 92 * Calculate what fraction of a time window has elapsed.
 93 * Used for time-relative early warning fallback.
 94 * @param resetsAt - Unix epoch timestamp in seconds when the limit resets
 95 * @param windowSeconds - Duration of the window in seconds
 96 * @returns fraction (0-1) of the window that has elapsed
 97 */
 98function computeTimeProgress(resetsAt: number, windowSeconds: number): number {
 99  const nowSeconds = Date.now() / 1000
100  const windowStart = resetsAt - windowSeconds
101  const elapsed = nowSeconds - windowStart
102  return Math.max(0, Math.min(1, elapsed / windowSeconds))
103}
104
105// Reason why overage is disabled/rejected
106// These values come from the API's unified limiter
107export type OverageDisabledReason =
108  | 'overage_not_provisioned' // Overage is not provisioned for this org or seat tier
109  | 'org_level_disabled' // Organization doesn't have overage enabled
110  | 'org_level_disabled_until' // Organization overage temporarily disabled
111  | 'out_of_credits' // Organization has insufficient credits
112  | 'seat_tier_level_disabled' // Seat tier doesn't have overage enabled
113  | 'member_level_disabled' // Account specifically has overage disabled
114  | 'seat_tier_zero_credit_limit' // Seat tier has a zero credit limit
115  | 'group_zero_credit_limit' // Resolved group limit has a zero credit limit
116  | 'member_zero_credit_limit' // Account has a zero credit limit
117  | 'org_service_level_disabled' // Org service specifically has overage disabled
118  | 'org_service_zero_credit_limit' // Org service has a zero credit limit
119  | 'no_limits_configured' // No overage limits configured for account
120  | 'unknown' // Unknown reason, should not happen
121
122export type ClaudeAILimits = {
123  status: QuotaStatus
124  // unifiedRateLimitFallbackAvailable is currently used to warn users that set
125  // their model to Opus whenever they are about to run out of quota. It does
126  // not change the actual model that is used.
127  unifiedRateLimitFallbackAvailable: boolean
128  resetsAt?: number
129  rateLimitType?: RateLimitType
130  utilization?: number
131  overageStatus?: QuotaStatus
132  overageResetsAt?: number
133  overageDisabledReason?: OverageDisabledReason
134  isUsingOverage?: boolean
135  surpassedThreshold?: number
136}
137
138// Exported for testing only
139export let currentLimits: ClaudeAILimits = {
140  status: 'allowed',
141  unifiedRateLimitFallbackAvailable: false,
142  isUsingOverage: false,
143}
144
145/**
146 * Raw per-window utilization from response headers, tracked on every API
147 * response (unlike currentLimits.utilization which is only set when a warning
148 * threshold fires). Exposed to statusline scripts via getRawUtilization().
149 */
150type RawWindowUtilization = {
151  utilization: number // 0-1 fraction
152  resets_at: number // unix epoch seconds
153}
154type RawUtilization = {
155  five_hour?: RawWindowUtilization
156  seven_day?: RawWindowUtilization
157}
158let rawUtilization: RawUtilization = {}
159
160export function getRawUtilization(): RawUtilization {
161  return rawUtilization
162}
163
164function extractRawUtilization(headers: globalThis.Headers): RawUtilization {
165  const result: RawUtilization = {}
166  for (const [key, abbrev] of [
167    ['five_hour', '5h'],
168    ['seven_day', '7d'],
169  ] as const) {
170    const util = headers.get(
171      `anthropic-ratelimit-unified-${abbrev}-utilization`,
172    )
173    const reset = headers.get(`anthropic-ratelimit-unified-${abbrev}-reset`)
174    if (util !== null && reset !== null) {
175      result[key] = { utilization: Number(util), resets_at: Number(reset) }
176    }
177  }
178  return result
179}
180
181type StatusChangeListener = (limits: ClaudeAILimits) => void
182export const statusListeners: Set<StatusChangeListener> = new Set()
183
184export function emitStatusChange(limits: ClaudeAILimits) {
185  currentLimits = limits
186  statusListeners.forEach(listener => listener(limits))
187  const hoursTillReset = Math.round(
188    (limits.resetsAt ? limits.resetsAt - Date.now() / 1000 : 0) / (60 * 60),
189  )
190
191  logEvent('tengu_claudeai_limits_status_changed', {
192    status:
193      limits.status as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
194    unifiedRateLimitFallbackAvailable: limits.unifiedRateLimitFallbackAvailable,
195    hoursTillReset,
196  })
197}
198
199async function makeTestQuery() {
200  const model = getSmallFastModel()
201  const anthropic = await getAnthropicClient({
202    maxRetries: 0,
203    model,
204    source: 'quota_check',
205  })
206  const messages: MessageParam[] = [{ role: 'user', content: 'quota' }]
207  const betas = getModelBetas(model)
208  // biome-ignore lint/plugin: quota check needs raw response access via asResponse()
209  return anthropic.beta.messages
210    .create({
211      model,
212      max_tokens: 1,
213      messages,
214      metadata: getAPIMetadata(),
215      ...(betas.length > 0 ? { betas } : {}),
216    })
217    .asResponse()
218}
219
220export async function checkQuotaStatus(): Promise<void> {
221  // Skip network requests if nonessential traffic is disabled
222  if (isEssentialTrafficOnly()) {
223    return
224  }
225
226  // Check if we should process rate limits (real subscriber or mock testing)
227  if (!shouldProcessRateLimits(isClaudeAISubscriber())) {
228    return
229  }
230
231  // In non-interactive mode (-p), the real query follows immediately and
232  // extractQuotaStatusFromHeaders() will update limits from its response
233  // headers (claude.ts), so skip this pre-check API call.
234  if (getIsNonInteractiveSession()) {
235    return
236  }
237
238  try {
239    // Make a minimal request to check quota
240    const raw = await makeTestQuery()
241
242    // Update limits based on the response
243    extractQuotaStatusFromHeaders(raw.headers)
244  } catch (error) {
245    if (error instanceof APIError) {
246      extractQuotaStatusFromError(error)
247    }
248  }
249}
250
251/**
252 * Check if early warning should be triggered based on surpassed-threshold header.
253 * Returns ClaudeAILimits if a threshold was surpassed, null otherwise.
254 */
255function getHeaderBasedEarlyWarning(
256  headers: globalThis.Headers,
257  unifiedRateLimitFallbackAvailable: boolean,
258): ClaudeAILimits | null {
259  // Check each claim type for surpassed threshold header
260  for (const [claimAbbrev, rateLimitType] of Object.entries(
261    EARLY_WARNING_CLAIM_MAP,
262  )) {
263    const surpassedThreshold = headers.get(
264      `anthropic-ratelimit-unified-${claimAbbrev}-surpassed-threshold`,
265    )
266
267    // If threshold header is present, user has crossed a warning threshold
268    if (surpassedThreshold !== null) {
269      const utilizationHeader = headers.get(
270        `anthropic-ratelimit-unified-${claimAbbrev}-utilization`,
271      )
272      const resetHeader = headers.get(
273        `anthropic-ratelimit-unified-${claimAbbrev}-reset`,
274      )
275
276      const utilization = utilizationHeader
277        ? Number(utilizationHeader)
278        : undefined
279      const resetsAt = resetHeader ? Number(resetHeader) : undefined
280
281      return {
282        status: 'allowed_warning',
283        resetsAt,
284        rateLimitType: rateLimitType as RateLimitType,
285        utilization,
286        unifiedRateLimitFallbackAvailable,
287        isUsingOverage: false,
288        surpassedThreshold: Number(surpassedThreshold),
289      }
290    }
291  }
292
293  return null
294}
295
296/**
297 * Check if time-relative early warning should be triggered for a rate limit type.
298 * Fallback when server doesn't send surpassed-threshold header.
299 * Returns ClaudeAILimits if thresholds are exceeded, null otherwise.
300 */
301function getTimeRelativeEarlyWarning(
302  headers: globalThis.Headers,
303  config: EarlyWarningConfig,
304  unifiedRateLimitFallbackAvailable: boolean,
305): ClaudeAILimits | null {
306  const { rateLimitType, claimAbbrev, windowSeconds, thresholds } = config
307
308  const utilizationHeader = headers.get(
309    `anthropic-ratelimit-unified-${claimAbbrev}-utilization`,
310  )
311  const resetHeader = headers.get(
312    `anthropic-ratelimit-unified-${claimAbbrev}-reset`,
313  )
314
315  if (utilizationHeader === null || resetHeader === null) {
316    return null
317  }
318
319  const utilization = Number(utilizationHeader)
320  const resetsAt = Number(resetHeader)
321  const timeProgress = computeTimeProgress(resetsAt, windowSeconds)
322
323  // Check if any threshold is exceeded: high usage early in the window
324  const shouldWarn = thresholds.some(
325    t => utilization >= t.utilization && timeProgress <= t.timePct,
326  )
327
328  if (!shouldWarn) {
329    return null
330  }
331
332  return {
333    status: 'allowed_warning',
334    resetsAt,
335    rateLimitType,
336    utilization,
337    unifiedRateLimitFallbackAvailable,
338    isUsingOverage: false,
339  }
340}
341
342/**
343 * Get early warning limits using header-based detection with time-relative fallback.
344 * 1. First checks for surpassed-threshold header (new server-side approach)
345 * 2. Falls back to time-relative thresholds (client-side calculation)
346 */
347function getEarlyWarningFromHeaders(
348  headers: globalThis.Headers,
349  unifiedRateLimitFallbackAvailable: boolean,
350): ClaudeAILimits | null {
351  // Try header-based detection first (preferred when API sends the header)
352  const headerBasedWarning = getHeaderBasedEarlyWarning(
353    headers,
354    unifiedRateLimitFallbackAvailable,
355  )
356  if (headerBasedWarning) {
357    return headerBasedWarning
358  }
359
360  // Fallback: Use time-relative thresholds (client-side calculation)
361  // This catches users burning quota faster than sustainable
362  for (const config of EARLY_WARNING_CONFIGS) {
363    const timeRelativeWarning = getTimeRelativeEarlyWarning(
364      headers,
365      config,
366      unifiedRateLimitFallbackAvailable,
367    )
368    if (timeRelativeWarning) {
369      return timeRelativeWarning
370    }
371  }
372
373  return null
374}
375
376function computeNewLimitsFromHeaders(
377  headers: globalThis.Headers,
378): ClaudeAILimits {
379  const status =
380    (headers.get('anthropic-ratelimit-unified-status') as QuotaStatus) ||
381    'allowed'
382  const resetsAtHeader = headers.get('anthropic-ratelimit-unified-reset')
383  const resetsAt = resetsAtHeader ? Number(resetsAtHeader) : undefined
384  const unifiedRateLimitFallbackAvailable =
385    headers.get('anthropic-ratelimit-unified-fallback') === 'available'
386
387  // Headers for rate limit type and overage support
388  const rateLimitType = headers.get(
389    'anthropic-ratelimit-unified-representative-claim',
390  ) as RateLimitType | null
391  const overageStatus = headers.get(
392    'anthropic-ratelimit-unified-overage-status',
393  ) as QuotaStatus | null
394  const overageResetsAtHeader = headers.get(
395    'anthropic-ratelimit-unified-overage-reset',
396  )
397  const overageResetsAt = overageResetsAtHeader
398    ? Number(overageResetsAtHeader)
399    : undefined
400
401  // Reason why overage is disabled (spending cap or wallet empty)
402  const overageDisabledReason = headers.get(
403    'anthropic-ratelimit-unified-overage-disabled-reason',
404  ) as OverageDisabledReason | null
405
406  // Determine if we're using overage (standard limits rejected but overage allowed)
407  const isUsingOverage =
408    status === 'rejected' &&
409    (overageStatus === 'allowed' || overageStatus === 'allowed_warning')
410
411  // Check for early warning based on surpassed-threshold header
412  // If status is allowed/allowed_warning and we find a surpassed threshold, show warning
413  let finalStatus: QuotaStatus = status
414  if (status === 'allowed' || status === 'allowed_warning') {
415    const earlyWarning = getEarlyWarningFromHeaders(
416      headers,
417      unifiedRateLimitFallbackAvailable,
418    )
419    if (earlyWarning) {
420      return earlyWarning
421    }
422    // No early warning threshold surpassed
423    finalStatus = 'allowed'
424  }
425
426  return {
427    status: finalStatus,
428    resetsAt,
429    unifiedRateLimitFallbackAvailable,
430    ...(rateLimitType && { rateLimitType }),
431    ...(overageStatus && { overageStatus }),
432    ...(overageResetsAt && { overageResetsAt }),
433    ...(overageDisabledReason && { overageDisabledReason }),
434    isUsingOverage,
435  }
436}
437
438/**
439 * Cache the extra usage disabled reason from API headers.
440 */
441function cacheExtraUsageDisabledReason(headers: globalThis.Headers): void {
442  // A null reason means extra usage is enabled (no disabled reason header)
443  const reason =
444    headers.get('anthropic-ratelimit-unified-overage-disabled-reason') ?? null
445  const cached = getGlobalConfig().cachedExtraUsageDisabledReason
446  if (cached !== reason) {
447    saveGlobalConfig(current => ({
448      ...current,
449      cachedExtraUsageDisabledReason: reason,
450    }))
451  }
452}
453
454export function extractQuotaStatusFromHeaders(
455  headers: globalThis.Headers,
456): void {
457  // Check if we need to process rate limits
458  const isSubscriber = isClaudeAISubscriber()
459
460  if (!shouldProcessRateLimits(isSubscriber)) {
461    // If we have any rate limit state, clear it
462    rawUtilization = {}
463    if (currentLimits.status !== 'allowed' || currentLimits.resetsAt) {
464      const defaultLimits: ClaudeAILimits = {
465        status: 'allowed',
466        unifiedRateLimitFallbackAvailable: false,
467        isUsingOverage: false,
468      }
469      emitStatusChange(defaultLimits)
470    }
471    return
472  }
473
474  // Process headers (applies mocks from /mock-limits command if active)
475  const headersToUse = processRateLimitHeaders(headers)
476  rawUtilization = extractRawUtilization(headersToUse)
477  const newLimits = computeNewLimitsFromHeaders(headersToUse)
478
479  // Cache extra usage status (persists across sessions)
480  cacheExtraUsageDisabledReason(headersToUse)
481
482  if (!isEqual(currentLimits, newLimits)) {
483    emitStatusChange(newLimits)
484  }
485}
486
487export function extractQuotaStatusFromError(error: APIError): void {
488  if (
489    !shouldProcessRateLimits(isClaudeAISubscriber()) ||
490    error.status !== 429
491  ) {
492    return
493  }
494
495  try {
496    let newLimits = { ...currentLimits }
497    if (error.headers) {
498      // Process headers (applies mocks from /mock-limits command if active)
499      const headersToUse = processRateLimitHeaders(error.headers)
500      rawUtilization = extractRawUtilization(headersToUse)
501      newLimits = computeNewLimitsFromHeaders(headersToUse)
502
503      // Cache extra usage status (persists across sessions)
504      cacheExtraUsageDisabledReason(headersToUse)
505    }
506    // For errors, always set status to rejected even if headers are not present.
507    newLimits.status = 'rejected'
508
509    if (!isEqual(currentLimits, newLimits)) {
510      emitStatusChange(newLimits)
511    }
512  } catch (e) {
513    logError(e as Error)
514  }
515}