source dump of claude code
at main 822 lines 28 kB view raw
1import { feature } from 'bun:bundle' 2import type Anthropic from '@anthropic-ai/sdk' 3import { 4 APIConnectionError, 5 APIError, 6 APIUserAbortError, 7} from '@anthropic-ai/sdk' 8import type { QuerySource } from 'src/constants/querySource.js' 9import type { SystemAPIErrorMessage } from 'src/types/message.js' 10import { isAwsCredentialsProviderError } from 'src/utils/aws.js' 11import { logForDebugging } from 'src/utils/debug.js' 12import { logError } from 'src/utils/log.js' 13import { createSystemAPIErrorMessage } from 'src/utils/messages.js' 14import { getAPIProviderForStatsig } from 'src/utils/model/providers.js' 15import { 16 clearApiKeyHelperCache, 17 clearAwsCredentialsCache, 18 clearGcpCredentialsCache, 19 getClaudeAIOAuthTokens, 20 handleOAuth401Error, 21 isClaudeAISubscriber, 22 isEnterpriseSubscriber, 23} from '../../utils/auth.js' 24import { isEnvTruthy } from '../../utils/envUtils.js' 25import { errorMessage } from '../../utils/errors.js' 26import { 27 type CooldownReason, 28 handleFastModeOverageRejection, 29 handleFastModeRejectedByAPI, 30 isFastModeCooldown, 31 isFastModeEnabled, 32 triggerFastModeCooldown, 33} from '../../utils/fastMode.js' 34import { isNonCustomOpusModel } from '../../utils/model/model.js' 35import { disableKeepAlive } from '../../utils/proxy.js' 36import { sleep } from '../../utils/sleep.js' 37import type { ThinkingConfig } from '../../utils/thinking.js' 38import { getFeatureValue_CACHED_MAY_BE_STALE } from '../analytics/growthbook.js' 39import { 40 type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, 41 logEvent, 42} from '../analytics/index.js' 43import { 44 checkMockRateLimitError, 45 isMockRateLimitError, 46} from '../rateLimitMocking.js' 47import { REPEATED_529_ERROR_MESSAGE } from './errors.js' 48import { extractConnectionErrorDetails } from './errorUtils.js' 49 50const abortError = () => new APIUserAbortError() 51 52const DEFAULT_MAX_RETRIES = 10 53const FLOOR_OUTPUT_TOKENS = 3000 54const MAX_529_RETRIES = 3 55export const BASE_DELAY_MS = 500 56 57// Foreground query sources where the user IS blocking on the result — these 58// retry on 529. Everything else (summaries, titles, suggestions, classifiers) 59// bails immediately: during a capacity cascade each retry is 3-10× gateway 60// amplification, and the user never sees those fail anyway. New sources 61// default to no-retry — add here only if the user is waiting on the result. 62const FOREGROUND_529_RETRY_SOURCES = new Set<QuerySource>([ 63 'repl_main_thread', 64 'repl_main_thread:outputStyle:custom', 65 'repl_main_thread:outputStyle:Explanatory', 66 'repl_main_thread:outputStyle:Learning', 67 'sdk', 68 'agent:custom', 69 'agent:default', 70 'agent:builtin', 71 'compact', 72 'hook_agent', 73 'hook_prompt', 74 'verification_agent', 75 'side_question', 76 // Security classifiers — must complete for auto-mode correctness. 77 // yoloClassifier.ts uses 'auto_mode' (not 'yolo_classifier' — that's 78 // type-only). bash_classifier is ant-only; feature-gate so the string 79 // tree-shakes out of external builds (excluded-strings.txt). 80 'auto_mode', 81 ...(feature('BASH_CLASSIFIER') ? (['bash_classifier'] as const) : []), 82]) 83 84function shouldRetry529(querySource: QuerySource | undefined): boolean { 85 // undefined → retry (conservative for untagged call paths) 86 return ( 87 querySource === undefined || FOREGROUND_529_RETRY_SOURCES.has(querySource) 88 ) 89} 90 91// CLAUDE_CODE_UNATTENDED_RETRY: for unattended sessions (ant-only). Retries 429/529 92// indefinitely with higher backoff and periodic keep-alive yields so the host 93// environment does not mark the session idle mid-wait. 94// TODO(ANT-344): the keep-alive via SystemAPIErrorMessage yields is a stopgap 95// until there's a dedicated keep-alive channel. 96const PERSISTENT_MAX_BACKOFF_MS = 5 * 60 * 1000 97const PERSISTENT_RESET_CAP_MS = 6 * 60 * 60 * 1000 98const HEARTBEAT_INTERVAL_MS = 30_000 99 100function isPersistentRetryEnabled(): boolean { 101 return feature('UNATTENDED_RETRY') 102 ? isEnvTruthy(process.env.CLAUDE_CODE_UNATTENDED_RETRY) 103 : false 104} 105 106function isTransientCapacityError(error: unknown): boolean { 107 return ( 108 is529Error(error) || (error instanceof APIError && error.status === 429) 109 ) 110} 111 112function isStaleConnectionError(error: unknown): boolean { 113 if (!(error instanceof APIConnectionError)) { 114 return false 115 } 116 const details = extractConnectionErrorDetails(error) 117 return details?.code === 'ECONNRESET' || details?.code === 'EPIPE' 118} 119 120export interface RetryContext { 121 maxTokensOverride?: number 122 model: string 123 thinkingConfig: ThinkingConfig 124 fastMode?: boolean 125} 126 127interface RetryOptions { 128 maxRetries?: number 129 model: string 130 fallbackModel?: string 131 thinkingConfig: ThinkingConfig 132 fastMode?: boolean 133 signal?: AbortSignal 134 querySource?: QuerySource 135 /** 136 * Pre-seed the consecutive 529 counter. Used when this retry loop is a 137 * non-streaming fallback after a streaming 529 — the streaming 529 should 138 * count toward MAX_529_RETRIES so total 529s-before-fallback is consistent 139 * regardless of which request mode hit the overload. 140 */ 141 initialConsecutive529Errors?: number 142} 143 144export class CannotRetryError extends Error { 145 constructor( 146 public readonly originalError: unknown, 147 public readonly retryContext: RetryContext, 148 ) { 149 const message = errorMessage(originalError) 150 super(message) 151 this.name = 'RetryError' 152 153 // Preserve the original stack trace if available 154 if (originalError instanceof Error && originalError.stack) { 155 this.stack = originalError.stack 156 } 157 } 158} 159 160export class FallbackTriggeredError extends Error { 161 constructor( 162 public readonly originalModel: string, 163 public readonly fallbackModel: string, 164 ) { 165 super(`Model fallback triggered: ${originalModel} -> ${fallbackModel}`) 166 this.name = 'FallbackTriggeredError' 167 } 168} 169 170export async function* withRetry<T>( 171 getClient: () => Promise<Anthropic>, 172 operation: ( 173 client: Anthropic, 174 attempt: number, 175 context: RetryContext, 176 ) => Promise<T>, 177 options: RetryOptions, 178): AsyncGenerator<SystemAPIErrorMessage, T> { 179 const maxRetries = getMaxRetries(options) 180 const retryContext: RetryContext = { 181 model: options.model, 182 thinkingConfig: options.thinkingConfig, 183 ...(isFastModeEnabled() && { fastMode: options.fastMode }), 184 } 185 let client: Anthropic | null = null 186 let consecutive529Errors = options.initialConsecutive529Errors ?? 0 187 let lastError: unknown 188 let persistentAttempt = 0 189 for (let attempt = 1; attempt <= maxRetries + 1; attempt++) { 190 if (options.signal?.aborted) { 191 throw new APIUserAbortError() 192 } 193 194 // Capture whether fast mode is active before this attempt 195 // (fallback may change the state mid-loop) 196 const wasFastModeActive = isFastModeEnabled() 197 ? retryContext.fastMode && !isFastModeCooldown() 198 : false 199 200 try { 201 // Check for mock rate limits (used by /mock-limits command for Ant employees) 202 if (process.env.USER_TYPE === 'ant') { 203 const mockError = checkMockRateLimitError( 204 retryContext.model, 205 wasFastModeActive, 206 ) 207 if (mockError) { 208 throw mockError 209 } 210 } 211 212 // Get a fresh client instance on first attempt or after authentication errors 213 // - 401 for first-party API authentication failures 214 // - 403 "OAuth token has been revoked" (another process refreshed the token) 215 // - Bedrock-specific auth errors (403 or CredentialsProviderError) 216 // - Vertex-specific auth errors (credential refresh failures, 401) 217 // - ECONNRESET/EPIPE: stale keep-alive socket; disable pooling and reconnect 218 const isStaleConnection = isStaleConnectionError(lastError) 219 if ( 220 isStaleConnection && 221 getFeatureValue_CACHED_MAY_BE_STALE( 222 'tengu_disable_keepalive_on_econnreset', 223 false, 224 ) 225 ) { 226 logForDebugging( 227 'Stale connection (ECONNRESET/EPIPE) — disabling keep-alive for retry', 228 ) 229 disableKeepAlive() 230 } 231 232 if ( 233 client === null || 234 (lastError instanceof APIError && lastError.status === 401) || 235 isOAuthTokenRevokedError(lastError) || 236 isBedrockAuthError(lastError) || 237 isVertexAuthError(lastError) || 238 isStaleConnection 239 ) { 240 // On 401 "token expired" or 403 "token revoked", force a token refresh 241 if ( 242 (lastError instanceof APIError && lastError.status === 401) || 243 isOAuthTokenRevokedError(lastError) 244 ) { 245 const failedAccessToken = getClaudeAIOAuthTokens()?.accessToken 246 if (failedAccessToken) { 247 await handleOAuth401Error(failedAccessToken) 248 } 249 } 250 client = await getClient() 251 } 252 253 return await operation(client, attempt, retryContext) 254 } catch (error) { 255 lastError = error 256 logForDebugging( 257 `API error (attempt ${attempt}/${maxRetries + 1}): ${error instanceof APIError ? `${error.status} ${error.message}` : errorMessage(error)}`, 258 { level: 'error' }, 259 ) 260 261 // Fast mode fallback: on 429/529, either wait and retry (short delays) 262 // or fall back to standard speed (long delays) to avoid cache thrashing. 263 // Skip in persistent mode: the short-retry path below loops with fast 264 // mode still active, so its `continue` never reaches the attempt clamp 265 // and the for-loop terminates. Persistent sessions want the chunked 266 // keep-alive path instead of fast-mode cache-preservation anyway. 267 if ( 268 wasFastModeActive && 269 !isPersistentRetryEnabled() && 270 error instanceof APIError && 271 (error.status === 429 || is529Error(error)) 272 ) { 273 // If the 429 is specifically because extra usage (overage) is not 274 // available, permanently disable fast mode with a specific message. 275 const overageReason = error.headers?.get( 276 'anthropic-ratelimit-unified-overage-disabled-reason', 277 ) 278 if (overageReason !== null && overageReason !== undefined) { 279 handleFastModeOverageRejection(overageReason) 280 retryContext.fastMode = false 281 continue 282 } 283 284 const retryAfterMs = getRetryAfterMs(error) 285 if (retryAfterMs !== null && retryAfterMs < SHORT_RETRY_THRESHOLD_MS) { 286 // Short retry-after: wait and retry with fast mode still active 287 // to preserve prompt cache (same model name on retry). 288 await sleep(retryAfterMs, options.signal, { abortError }) 289 continue 290 } 291 // Long or unknown retry-after: enter cooldown (switches to standard 292 // speed model), with a minimum floor to avoid flip-flopping. 293 const cooldownMs = Math.max( 294 retryAfterMs ?? DEFAULT_FAST_MODE_FALLBACK_HOLD_MS, 295 MIN_COOLDOWN_MS, 296 ) 297 const cooldownReason: CooldownReason = is529Error(error) 298 ? 'overloaded' 299 : 'rate_limit' 300 triggerFastModeCooldown(Date.now() + cooldownMs, cooldownReason) 301 if (isFastModeEnabled()) { 302 retryContext.fastMode = false 303 } 304 continue 305 } 306 307 // Fast mode fallback: if the API rejects the fast mode parameter 308 // (e.g., org doesn't have fast mode enabled), permanently disable fast 309 // mode and retry at standard speed. 310 if (wasFastModeActive && isFastModeNotEnabledError(error)) { 311 handleFastModeRejectedByAPI() 312 retryContext.fastMode = false 313 continue 314 } 315 316 // Non-foreground sources bail immediately on 529 — no retry amplification 317 // during capacity cascades. User never sees these fail. 318 if (is529Error(error) && !shouldRetry529(options.querySource)) { 319 logEvent('tengu_api_529_background_dropped', { 320 query_source: 321 options.querySource as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, 322 }) 323 throw new CannotRetryError(error, retryContext) 324 } 325 326 // Track consecutive 529 errors 327 if ( 328 is529Error(error) && 329 // If FALLBACK_FOR_ALL_PRIMARY_MODELS is not set, fall through only if the primary model is a non-custom Opus model. 330 // TODO: Revisit if the isNonCustomOpusModel check should still exist, or if isNonCustomOpusModel is a stale artifact of when Claude Code was hardcoded on Opus. 331 (process.env.FALLBACK_FOR_ALL_PRIMARY_MODELS || 332 (!isClaudeAISubscriber() && isNonCustomOpusModel(options.model))) 333 ) { 334 consecutive529Errors++ 335 if (consecutive529Errors >= MAX_529_RETRIES) { 336 // Check if fallback model is specified 337 if (options.fallbackModel) { 338 logEvent('tengu_api_opus_fallback_triggered', { 339 original_model: 340 options.model as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, 341 fallback_model: 342 options.fallbackModel as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, 343 provider: getAPIProviderForStatsig(), 344 }) 345 346 // Throw special error to indicate fallback was triggered 347 throw new FallbackTriggeredError( 348 options.model, 349 options.fallbackModel, 350 ) 351 } 352 353 if ( 354 process.env.USER_TYPE === 'external' && 355 !process.env.IS_SANDBOX && 356 !isPersistentRetryEnabled() 357 ) { 358 logEvent('tengu_api_custom_529_overloaded_error', {}) 359 throw new CannotRetryError( 360 new Error(REPEATED_529_ERROR_MESSAGE), 361 retryContext, 362 ) 363 } 364 } 365 } 366 367 // Only retry if the error indicates we should 368 const persistent = 369 isPersistentRetryEnabled() && isTransientCapacityError(error) 370 if (attempt > maxRetries && !persistent) { 371 throw new CannotRetryError(error, retryContext) 372 } 373 374 // AWS/GCP errors aren't always APIError, but can be retried 375 const handledCloudAuthError = 376 handleAwsCredentialError(error) || handleGcpCredentialError(error) 377 if ( 378 !handledCloudAuthError && 379 (!(error instanceof APIError) || !shouldRetry(error)) 380 ) { 381 throw new CannotRetryError(error, retryContext) 382 } 383 384 // Handle max tokens context overflow errors by adjusting max_tokens for the next attempt 385 // NOTE: With extended-context-window beta, this 400 error should not occur. 386 // The API now returns 'model_context_window_exceeded' stop_reason instead. 387 // Keeping for backward compatibility. 388 if (error instanceof APIError) { 389 const overflowData = parseMaxTokensContextOverflowError(error) 390 if (overflowData) { 391 const { inputTokens, contextLimit } = overflowData 392 393 const safetyBuffer = 1000 394 const availableContext = Math.max( 395 0, 396 contextLimit - inputTokens - safetyBuffer, 397 ) 398 if (availableContext < FLOOR_OUTPUT_TOKENS) { 399 logError( 400 new Error( 401 `availableContext ${availableContext} is less than FLOOR_OUTPUT_TOKENS ${FLOOR_OUTPUT_TOKENS}`, 402 ), 403 ) 404 throw error 405 } 406 // Ensure we have enough tokens for thinking + at least 1 output token 407 const minRequired = 408 (retryContext.thinkingConfig.type === 'enabled' 409 ? retryContext.thinkingConfig.budgetTokens 410 : 0) + 1 411 const adjustedMaxTokens = Math.max( 412 FLOOR_OUTPUT_TOKENS, 413 availableContext, 414 minRequired, 415 ) 416 retryContext.maxTokensOverride = adjustedMaxTokens 417 418 logEvent('tengu_max_tokens_context_overflow_adjustment', { 419 inputTokens, 420 contextLimit, 421 adjustedMaxTokens, 422 attempt, 423 }) 424 425 continue 426 } 427 } 428 429 // For other errors, proceed with normal retry logic 430 // Get retry-after header if available 431 const retryAfter = getRetryAfter(error) 432 let delayMs: number 433 if (persistent && error instanceof APIError && error.status === 429) { 434 persistentAttempt++ 435 // Window-based limits (e.g. 5hr Max/Pro) include a reset timestamp. 436 // Wait until reset rather than polling every 5 min uselessly. 437 const resetDelay = getRateLimitResetDelayMs(error) 438 delayMs = 439 resetDelay ?? 440 Math.min( 441 getRetryDelay( 442 persistentAttempt, 443 retryAfter, 444 PERSISTENT_MAX_BACKOFF_MS, 445 ), 446 PERSISTENT_RESET_CAP_MS, 447 ) 448 } else if (persistent) { 449 persistentAttempt++ 450 // Retry-After is a server directive and bypasses maxDelayMs inside 451 // getRetryDelay (intentional — honoring it is correct). Cap at the 452 // 6hr reset-cap here so a pathological header can't wait unbounded. 453 delayMs = Math.min( 454 getRetryDelay( 455 persistentAttempt, 456 retryAfter, 457 PERSISTENT_MAX_BACKOFF_MS, 458 ), 459 PERSISTENT_RESET_CAP_MS, 460 ) 461 } else { 462 delayMs = getRetryDelay(attempt, retryAfter) 463 } 464 465 // In persistent mode the for-loop `attempt` is clamped at maxRetries+1; 466 // use persistentAttempt for telemetry/yields so they show the true count. 467 const reportedAttempt = persistent ? persistentAttempt : attempt 468 logEvent('tengu_api_retry', { 469 attempt: reportedAttempt, 470 delayMs: delayMs, 471 error: (error as APIError) 472 .message as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, 473 status: (error as APIError).status, 474 provider: getAPIProviderForStatsig(), 475 }) 476 477 if (persistent) { 478 if (delayMs > 60_000) { 479 logEvent('tengu_api_persistent_retry_wait', { 480 status: (error as APIError).status, 481 delayMs, 482 attempt: reportedAttempt, 483 provider: getAPIProviderForStatsig(), 484 }) 485 } 486 // Chunk long sleeps so the host sees periodic stdout activity and 487 // does not mark the session idle. Each yield surfaces as 488 // {type:'system', subtype:'api_retry'} on stdout via QueryEngine. 489 let remaining = delayMs 490 while (remaining > 0) { 491 if (options.signal?.aborted) throw new APIUserAbortError() 492 if (error instanceof APIError) { 493 yield createSystemAPIErrorMessage( 494 error, 495 remaining, 496 reportedAttempt, 497 maxRetries, 498 ) 499 } 500 const chunk = Math.min(remaining, HEARTBEAT_INTERVAL_MS) 501 await sleep(chunk, options.signal, { abortError }) 502 remaining -= chunk 503 } 504 // Clamp so the for-loop never terminates. Backoff uses the separate 505 // persistentAttempt counter which keeps growing to the 5-min cap. 506 if (attempt >= maxRetries) attempt = maxRetries 507 } else { 508 if (error instanceof APIError) { 509 yield createSystemAPIErrorMessage(error, delayMs, attempt, maxRetries) 510 } 511 await sleep(delayMs, options.signal, { abortError }) 512 } 513 } 514 } 515 516 throw new CannotRetryError(lastError, retryContext) 517} 518 519function getRetryAfter(error: unknown): string | null { 520 return ( 521 ((error as { headers?: { 'retry-after'?: string } }).headers?.[ 522 'retry-after' 523 ] || 524 // eslint-disable-next-line eslint-plugin-n/no-unsupported-features/node-builtins 525 ((error as APIError).headers as Headers)?.get?.('retry-after')) ?? 526 null 527 ) 528} 529 530export function getRetryDelay( 531 attempt: number, 532 retryAfterHeader?: string | null, 533 maxDelayMs = 32000, 534): number { 535 if (retryAfterHeader) { 536 const seconds = parseInt(retryAfterHeader, 10) 537 if (!isNaN(seconds)) { 538 return seconds * 1000 539 } 540 } 541 542 const baseDelay = Math.min( 543 BASE_DELAY_MS * Math.pow(2, attempt - 1), 544 maxDelayMs, 545 ) 546 const jitter = Math.random() * 0.25 * baseDelay 547 return baseDelay + jitter 548} 549 550export function parseMaxTokensContextOverflowError(error: APIError): 551 | { 552 inputTokens: number 553 maxTokens: number 554 contextLimit: number 555 } 556 | undefined { 557 if (error.status !== 400 || !error.message) { 558 return undefined 559 } 560 561 if ( 562 !error.message.includes( 563 'input length and `max_tokens` exceed context limit', 564 ) 565 ) { 566 return undefined 567 } 568 569 // Example format: "input length and `max_tokens` exceed context limit: 188059 + 20000 > 200000" 570 const regex = 571 /input length and `max_tokens` exceed context limit: (\d+) \+ (\d+) > (\d+)/ 572 const match = error.message.match(regex) 573 574 if (!match || match.length !== 4) { 575 return undefined 576 } 577 578 if (!match[1] || !match[2] || !match[3]) { 579 logError( 580 new Error( 581 'Unable to parse max_tokens from max_tokens exceed context limit error message', 582 ), 583 ) 584 return undefined 585 } 586 const inputTokens = parseInt(match[1], 10) 587 const maxTokens = parseInt(match[2], 10) 588 const contextLimit = parseInt(match[3], 10) 589 590 if (isNaN(inputTokens) || isNaN(maxTokens) || isNaN(contextLimit)) { 591 return undefined 592 } 593 594 return { inputTokens, maxTokens, contextLimit } 595} 596 597// TODO: Replace with a response header check once the API adds a dedicated 598// header for fast-mode rejection (e.g., x-fast-mode-rejected). String-matching 599// the error message is fragile and will break if the API wording changes. 600function isFastModeNotEnabledError(error: unknown): boolean { 601 if (!(error instanceof APIError)) { 602 return false 603 } 604 return ( 605 error.status === 400 && 606 (error.message?.includes('Fast mode is not enabled') ?? false) 607 ) 608} 609 610export function is529Error(error: unknown): boolean { 611 if (!(error instanceof APIError)) { 612 return false 613 } 614 615 // Check for 529 status code or overloaded error in message 616 return ( 617 error.status === 529 || 618 // See below: the SDK sometimes fails to properly pass the 529 status code during streaming 619 (error.message?.includes('"type":"overloaded_error"') ?? false) 620 ) 621} 622 623function isOAuthTokenRevokedError(error: unknown): boolean { 624 return ( 625 error instanceof APIError && 626 error.status === 403 && 627 (error.message?.includes('OAuth token has been revoked') ?? false) 628 ) 629} 630 631function isBedrockAuthError(error: unknown): boolean { 632 if (isEnvTruthy(process.env.CLAUDE_CODE_USE_BEDROCK)) { 633 // AWS libs reject without an API call if .aws holds a past Expiration value 634 // otherwise, API calls that receive expired tokens give generic 403 635 // "The security token included in the request is invalid" 636 if ( 637 isAwsCredentialsProviderError(error) || 638 (error instanceof APIError && error.status === 403) 639 ) { 640 return true 641 } 642 } 643 return false 644} 645 646/** 647 * Clear AWS auth caches if appropriate. 648 * @returns true if action was taken. 649 */ 650function handleAwsCredentialError(error: unknown): boolean { 651 if (isBedrockAuthError(error)) { 652 clearAwsCredentialsCache() 653 return true 654 } 655 return false 656} 657 658// google-auth-library throws plain Error (no typed name like AWS's 659// CredentialsProviderError). Match common SDK-level credential-failure messages. 660function isGoogleAuthLibraryCredentialError(error: unknown): boolean { 661 if (!(error instanceof Error)) return false 662 const msg = error.message 663 return ( 664 msg.includes('Could not load the default credentials') || 665 msg.includes('Could not refresh access token') || 666 msg.includes('invalid_grant') 667 ) 668} 669 670function isVertexAuthError(error: unknown): boolean { 671 if (isEnvTruthy(process.env.CLAUDE_CODE_USE_VERTEX)) { 672 // SDK-level: google-auth-library fails in prepareOptions() before the HTTP call 673 if (isGoogleAuthLibraryCredentialError(error)) { 674 return true 675 } 676 // Server-side: Vertex returns 401 for expired/invalid tokens 677 if (error instanceof APIError && error.status === 401) { 678 return true 679 } 680 } 681 return false 682} 683 684/** 685 * Clear GCP auth caches if appropriate. 686 * @returns true if action was taken. 687 */ 688function handleGcpCredentialError(error: unknown): boolean { 689 if (isVertexAuthError(error)) { 690 clearGcpCredentialsCache() 691 return true 692 } 693 return false 694} 695 696function shouldRetry(error: APIError): boolean { 697 // Never retry mock errors - they're from /mock-limits command for testing 698 if (isMockRateLimitError(error)) { 699 return false 700 } 701 702 // Persistent mode: 429/529 always retryable, bypass subscriber gates and 703 // x-should-retry header. 704 if (isPersistentRetryEnabled() && isTransientCapacityError(error)) { 705 return true 706 } 707 708 // CCR mode: auth is via infrastructure-provided JWTs, so a 401/403 is a 709 // transient blip (auth service flap, network hiccup) rather than bad 710 // credentials. Bypass x-should-retry:false — the server assumes we'd retry 711 // the same bad key, but our key is fine. 712 if ( 713 isEnvTruthy(process.env.CLAUDE_CODE_REMOTE) && 714 (error.status === 401 || error.status === 403) 715 ) { 716 return true 717 } 718 719 // Check for overloaded errors first by examining the message content 720 // The SDK sometimes fails to properly pass the 529 status code during streaming, 721 // so we need to check the error message directly 722 if (error.message?.includes('"type":"overloaded_error"')) { 723 return true 724 } 725 726 // Check for max tokens context overflow errors that we can handle 727 if (parseMaxTokensContextOverflowError(error)) { 728 return true 729 } 730 731 // Note this is not a standard header. 732 const shouldRetryHeader = error.headers?.get('x-should-retry') 733 734 // If the server explicitly says whether or not to retry, obey. 735 // For Max and Pro users, should-retry is true, but in several hours, so we shouldn't. 736 // Enterprise users can retry because they typically use PAYG instead of rate limits. 737 if ( 738 shouldRetryHeader === 'true' && 739 (!isClaudeAISubscriber() || isEnterpriseSubscriber()) 740 ) { 741 return true 742 } 743 744 // Ants can ignore x-should-retry: false for 5xx server errors only. 745 // For other status codes (401, 403, 400, 429, etc.), respect the header. 746 if (shouldRetryHeader === 'false') { 747 const is5xxError = error.status !== undefined && error.status >= 500 748 if (!(process.env.USER_TYPE === 'ant' && is5xxError)) { 749 return false 750 } 751 } 752 753 if (error instanceof APIConnectionError) { 754 return true 755 } 756 757 if (!error.status) return false 758 759 // Retry on request timeouts. 760 if (error.status === 408) return true 761 762 // Retry on lock timeouts. 763 if (error.status === 409) return true 764 765 // Retry on rate limits, but not for ClaudeAI Subscription users 766 // Enterprise users can retry because they typically use PAYG instead of rate limits 767 if (error.status === 429) { 768 return !isClaudeAISubscriber() || isEnterpriseSubscriber() 769 } 770 771 // Clear API key cache on 401 and allow retry. 772 // OAuth token handling is done in the main retry loop via handleOAuth401Error. 773 if (error.status === 401) { 774 clearApiKeyHelperCache() 775 return true 776 } 777 778 // Retry on 403 "token revoked" (same refresh logic as 401, see above) 779 if (isOAuthTokenRevokedError(error)) { 780 return true 781 } 782 783 // Retry internal errors. 784 if (error.status && error.status >= 500) return true 785 786 return false 787} 788 789export function getDefaultMaxRetries(): number { 790 if (process.env.CLAUDE_CODE_MAX_RETRIES) { 791 return parseInt(process.env.CLAUDE_CODE_MAX_RETRIES, 10) 792 } 793 return DEFAULT_MAX_RETRIES 794} 795function getMaxRetries(options: RetryOptions): number { 796 return options.maxRetries ?? getDefaultMaxRetries() 797} 798 799const DEFAULT_FAST_MODE_FALLBACK_HOLD_MS = 30 * 60 * 1000 // 30 minutes 800const SHORT_RETRY_THRESHOLD_MS = 20 * 1000 // 20 seconds 801const MIN_COOLDOWN_MS = 10 * 60 * 1000 // 10 minutes 802 803function getRetryAfterMs(error: APIError): number | null { 804 const retryAfter = getRetryAfter(error) 805 if (retryAfter) { 806 const seconds = parseInt(retryAfter, 10) 807 if (!isNaN(seconds)) { 808 return seconds * 1000 809 } 810 } 811 return null 812} 813 814function getRateLimitResetDelayMs(error: APIError): number | null { 815 const resetHeader = error.headers?.get?.('anthropic-ratelimit-unified-reset') 816 if (!resetHeader) return null 817 const resetUnixSec = Number(resetHeader) 818 if (!Number.isFinite(resetUnixSec)) return null 819 const delayMs = resetUnixSec * 1000 - Date.now() 820 if (delayMs <= 0) return null 821 return Math.min(delayMs, PERSISTENT_RESET_CAP_MS) 822}