tools/AgentTool/runAgent.ts at main · nonbinary.computer/claude-code

nonbinary.computer / claude-code
forked from oppi.li/claude-code
fork atom
source dump of claude code
fork atom
claude-code / tools / AgentTool / runAgent.ts
at main 973 lines 36 kB view raw
wrap content
oppi.li dump from zip 15d ago
63aada3f
  1import { feature } from 'bun:bundle'
  2import type { UUID } from 'crypto'
  3import { randomUUID } from 'crypto'
  4import uniqBy from 'lodash-es/uniqBy.js'
  5import { logForDebugging } from 'src/utils/debug.js'
  6import { getProjectRoot, getSessionId } from '../../bootstrap/state.js'
  7import { getCommand, getSkillToolCommands, hasCommand } from '../../commands.js'
  8import {
  9  DEFAULT_AGENT_PROMPT,
 10  enhanceSystemPromptWithEnvDetails,
 11} from '../../constants/prompts.js'
 12import type { QuerySource } from '../../constants/querySource.js'
 13import { getSystemContext, getUserContext } from '../../context.js'
 14import type { CanUseToolFn } from '../../hooks/useCanUseTool.js'
 15import { query } from '../../query.js'
 16import { getFeatureValue_CACHED_MAY_BE_STALE } from '../../services/analytics/growthbook.js'
 17import { getDumpPromptsPath } from '../../services/api/dumpPrompts.js'
 18import { cleanupAgentTracking } from '../../services/api/promptCacheBreakDetection.js'
 19import {
 20  connectToServer,
 21  fetchToolsForClient,
 22} from '../../services/mcp/client.js'
 23import { getMcpConfigByName } from '../../services/mcp/config.js'
 24import type {
 25  MCPServerConnection,
 26  ScopedMcpServerConfig,
 27} from '../../services/mcp/types.js'
 28import type { Tool, Tools, ToolUseContext } from '../../Tool.js'
 29import { killShellTasksForAgent } from '../../tasks/LocalShellTask/killShellTasks.js'
 30import type { Command } from '../../types/command.js'
 31import type { AgentId } from '../../types/ids.js'
 32import type {
 33  AssistantMessage,
 34  Message,
 35  ProgressMessage,
 36  RequestStartEvent,
 37  StreamEvent,
 38  SystemCompactBoundaryMessage,
 39  TombstoneMessage,
 40  ToolUseSummaryMessage,
 41  UserMessage,
 42} from '../../types/message.js'
 43import { createAttachmentMessage } from '../../utils/attachments.js'
 44import { AbortError } from '../../utils/errors.js'
 45import { getDisplayPath } from '../../utils/file.js'
 46import {
 47  cloneFileStateCache,
 48  createFileStateCacheWithSizeLimit,
 49  READ_FILE_STATE_CACHE_SIZE,
 50} from '../../utils/fileStateCache.js'
 51import {
 52  type CacheSafeParams,
 53  createSubagentContext,
 54} from '../../utils/forkedAgent.js'
 55import { registerFrontmatterHooks } from '../../utils/hooks/registerFrontmatterHooks.js'
 56import { clearSessionHooks } from '../../utils/hooks/sessionHooks.js'
 57import { executeSubagentStartHooks } from '../../utils/hooks.js'
 58import { createUserMessage } from '../../utils/messages.js'
 59import { getAgentModel } from '../../utils/model/agent.js'
 60import type { ModelAlias } from '../../utils/model/aliases.js'
 61import {
 62  clearAgentTranscriptSubdir,
 63  recordSidechainTranscript,
 64  setAgentTranscriptSubdir,
 65  writeAgentMetadata,
 66} from '../../utils/sessionStorage.js'
 67import {
 68  isRestrictedToPluginOnly,
 69  isSourceAdminTrusted,
 70} from '../../utils/settings/pluginOnlyPolicy.js'
 71import {
 72  asSystemPrompt,
 73  type SystemPrompt,
 74} from '../../utils/systemPromptType.js'
 75import {
 76  isPerfettoTracingEnabled,
 77  registerAgent as registerPerfettoAgent,
 78  unregisterAgent as unregisterPerfettoAgent,
 79} from '../../utils/telemetry/perfettoTracing.js'
 80import type { ContentReplacementState } from '../../utils/toolResultStorage.js'
 81import { createAgentId } from '../../utils/uuid.js'
 82import { resolveAgentTools } from './agentToolUtils.js'
 83import { type AgentDefinition, isBuiltInAgent } from './loadAgentsDir.js'
 84
 85/**
 86 * Initialize agent-specific MCP servers
 87 * Agents can define their own MCP servers in their frontmatter that are additive
 88 * to the parent's MCP clients. These servers are connected when the agent starts
 89 * and cleaned up when the agent finishes.
 90 *
 91 * @param agentDefinition The agent definition with optional mcpServers
 92 * @param parentClients MCP clients inherited from parent context
 93 * @returns Merged clients (parent + agent-specific), agent MCP tools, and cleanup function
 94 */
 95async function initializeAgentMcpServers(
 96  agentDefinition: AgentDefinition,
 97  parentClients: MCPServerConnection[],
 98): Promise<{
 99  clients: MCPServerConnection[]
100  tools: Tools
101  cleanup: () => Promise<void>
102}> {
103  // If no agent-specific servers defined, return parent clients as-is
104  if (!agentDefinition.mcpServers?.length) {
105    return {
106      clients: parentClients,
107      tools: [],
108      cleanup: async () => {},
109    }
110  }
111
112  // When MCP is locked to plugin-only, skip frontmatter MCP servers for
113  // USER-CONTROLLED agents only. Plugin, built-in, and policySettings agents
114  // are admin-trusted — their frontmatter MCP is part of the admin-approved
115  // surface. Blocking them (as the first cut did) breaks plugin agents that
116  // legitimately need MCP, contradicting "plugin-provided always loads."
117  const agentIsAdminTrusted = isSourceAdminTrusted(agentDefinition.source)
118  if (isRestrictedToPluginOnly('mcp') && !agentIsAdminTrusted) {
119    logForDebugging(
120      `[Agent: ${agentDefinition.agentType}] Skipping MCP servers: strictPluginOnlyCustomization locks MCP to plugin-only (agent source: ${agentDefinition.source})`,
121    )
122    return {
123      clients: parentClients,
124      tools: [],
125      cleanup: async () => {},
126    }
127  }
128
129  const agentClients: MCPServerConnection[] = []
130  // Track which clients were newly created (inline definitions) vs. shared from parent
131  // Only newly created clients should be cleaned up when the agent finishes
132  const newlyCreatedClients: MCPServerConnection[] = []
133  const agentTools: Tool[] = []
134
135  for (const spec of agentDefinition.mcpServers) {
136    let config: ScopedMcpServerConfig | null = null
137    let name: string
138    let isNewlyCreated = false
139
140    if (typeof spec === 'string') {
141      // Reference by name - look up in existing MCP configs
142      // This uses the memoized connectToServer, so we may get a shared client
143      name = spec
144      config = getMcpConfigByName(spec)
145      if (!config) {
146        logForDebugging(
147          `[Agent: ${agentDefinition.agentType}] MCP server not found: ${spec}`,
148          { level: 'warn' },
149        )
150        continue
151      }
152    } else {
153      // Inline definition as { [name]: config }
154      // These are agent-specific servers that should be cleaned up
155      const entries = Object.entries(spec)
156      if (entries.length !== 1) {
157        logForDebugging(
158          `[Agent: ${agentDefinition.agentType}] Invalid MCP server spec: expected exactly one key`,
159          { level: 'warn' },
160        )
161        continue
162      }
163      const [serverName, serverConfig] = entries[0]!
164      name = serverName
165      config = {
166        ...serverConfig,
167        scope: 'dynamic' as const,
168      } as ScopedMcpServerConfig
169      isNewlyCreated = true
170    }
171
172    // Connect to the server
173    const client = await connectToServer(name, config)
174    agentClients.push(client)
175    if (isNewlyCreated) {
176      newlyCreatedClients.push(client)
177    }
178
179    // Fetch tools if connected
180    if (client.type === 'connected') {
181      const tools = await fetchToolsForClient(client)
182      agentTools.push(...tools)
183      logForDebugging(
184        `[Agent: ${agentDefinition.agentType}] Connected to MCP server '${name}' with ${tools.length} tools`,
185      )
186    } else {
187      logForDebugging(
188        `[Agent: ${agentDefinition.agentType}] Failed to connect to MCP server '${name}': ${client.type}`,
189        { level: 'warn' },
190      )
191    }
192  }
193
194  // Create cleanup function for agent-specific servers
195  // Only clean up newly created clients (inline definitions), not shared/referenced ones
196  // Shared clients (referenced by string name) are memoized and used by the parent context
197  const cleanup = async () => {
198    for (const client of newlyCreatedClients) {
199      if (client.type === 'connected') {
200        try {
201          await client.cleanup()
202        } catch (error) {
203          logForDebugging(
204            `[Agent: ${agentDefinition.agentType}] Error cleaning up MCP server '${client.name}': ${error}`,
205            { level: 'warn' },
206          )
207        }
208      }
209    }
210  }
211
212  // Return merged clients (parent + agent-specific) and agent tools
213  return {
214    clients: [...parentClients, ...agentClients],
215    tools: agentTools,
216    cleanup,
217  }
218}
219
220type QueryMessage =
221  | StreamEvent
222  | RequestStartEvent
223  | Message
224  | ToolUseSummaryMessage
225  | TombstoneMessage
226
227/**
228 * Type guard to check if a message from query() is a recordable Message type.
229 * Matches the types we want to record: assistant, user, progress, or system compact_boundary.
230 */
231function isRecordableMessage(
232  msg: QueryMessage,
233): msg is
234  | AssistantMessage
235  | UserMessage
236  | ProgressMessage
237  | SystemCompactBoundaryMessage {
238  return (
239    msg.type === 'assistant' ||
240    msg.type === 'user' ||
241    msg.type === 'progress' ||
242    (msg.type === 'system' &&
243      'subtype' in msg &&
244      msg.subtype === 'compact_boundary')
245  )
246}
247
248export async function* runAgent({
249  agentDefinition,
250  promptMessages,
251  toolUseContext,
252  canUseTool,
253  isAsync,
254  canShowPermissionPrompts,
255  forkContextMessages,
256  querySource,
257  override,
258  model,
259  maxTurns,
260  preserveToolUseResults,
261  availableTools,
262  allowedTools,
263  onCacheSafeParams,
264  contentReplacementState,
265  useExactTools,
266  worktreePath,
267  description,
268  transcriptSubdir,
269  onQueryProgress,
270}: {
271  agentDefinition: AgentDefinition
272  promptMessages: Message[]
273  toolUseContext: ToolUseContext
274  canUseTool: CanUseToolFn
275  isAsync: boolean
276  /** Whether this agent can show permission prompts. Defaults to !isAsync.
277   * Set to true for in-process teammates that run async but share the terminal. */
278  canShowPermissionPrompts?: boolean
279  forkContextMessages?: Message[]
280  querySource: QuerySource
281  override?: {
282    userContext?: { [k: string]: string }
283    systemContext?: { [k: string]: string }
284    systemPrompt?: SystemPrompt
285    abortController?: AbortController
286    agentId?: AgentId
287  }
288  model?: ModelAlias
289  maxTurns?: number
290  /** Preserve toolUseResult on messages for subagents with viewable transcripts */
291  preserveToolUseResults?: boolean
292  /** Precomputed tool pool for the worker agent. Computed by the caller
293   * (AgentTool.tsx) to avoid a circular dependency between runAgent and tools.ts.
294   * Always contains the full tool pool assembled with the worker's own permission
295   * mode, independent of the parent's tool restrictions. */
296  availableTools: Tools
297  /** Tool permission rules to add to the agent's session allow rules.
298   * When provided, replaces ALL allow rules so the agent only has what's
299   * explicitly listed (parent approvals don't leak through). */
300  allowedTools?: string[]
301  /** Optional callback invoked with CacheSafeParams after constructing the agent's
302   * system prompt, context, and tools. Used by background summarization to fork
303   * the agent's conversation for periodic progress summaries. */
304  onCacheSafeParams?: (params: CacheSafeParams) => void
305  /** Replacement state reconstructed from a resumed sidechain transcript so
306   * the same tool results are re-replaced (prompt cache stability). When
307   * omitted, createSubagentContext clones the parent's state. */
308  contentReplacementState?: ContentReplacementState
309  /** When true, use availableTools directly without filtering through
310   * resolveAgentTools(). Also inherits the parent's thinkingConfig and
311   * isNonInteractiveSession instead of overriding them. Used by the fork
312   * subagent path to produce byte-identical API request prefixes for
313   * prompt cache hits. */
314  useExactTools?: boolean
315  /** Worktree path if the agent was spawned with isolation: "worktree".
316   * Persisted to metadata so resume can restore the correct cwd. */
317  worktreePath?: string
318  /** Original task description from AgentTool input. Persisted to metadata
319   * so a resumed agent's notification can show the original description. */
320  description?: string
321  /** Optional subdirectory under subagents/ to group this agent's transcript
322   * with related ones (e.g. workflows/<runId> for workflow subagents). */
323  transcriptSubdir?: string
324  /** Optional callback fired on every message yielded by query() — including
325   * stream_event deltas that runAgent otherwise drops. Use to detect liveness
326   * during long single-block streams (e.g. thinking) where no assistant
327   * message is yielded for >60s. */
328  onQueryProgress?: () => void
329}): AsyncGenerator<Message, void> {
330  // Track subagent usage for feature discovery
331
332  const appState = toolUseContext.getAppState()
333  const permissionMode = appState.toolPermissionContext.mode
334  // Always-shared channel to the root AppState store. toolUseContext.setAppState
335  // is a no-op when the *parent* is itself an async agent (nested async→async),
336  // so session-scoped writes (hooks, bash tasks) must go through this instead.
337  const rootSetAppState =
338    toolUseContext.setAppStateForTasks ?? toolUseContext.setAppState
339
340  const resolvedAgentModel = getAgentModel(
341    agentDefinition.model,
342    toolUseContext.options.mainLoopModel,
343    model,
344    permissionMode,
345  )
346
347  const agentId = override?.agentId ? override.agentId : createAgentId()
348
349  // Route this agent's transcript into a grouping subdirectory if requested
350  // (e.g. workflow subagents write to subagents/workflows/<runId>/).
351  if (transcriptSubdir) {
352    setAgentTranscriptSubdir(agentId, transcriptSubdir)
353  }
354
355  // Register agent in Perfetto trace for hierarchy visualization
356  if (isPerfettoTracingEnabled()) {
357    const parentId = toolUseContext.agentId ?? getSessionId()
358    registerPerfettoAgent(agentId, agentDefinition.agentType, parentId)
359  }
360
361  // Log API calls path for subagents (ant-only)
362  if (process.env.USER_TYPE === 'ant') {
363    logForDebugging(
364      `[Subagent ${agentDefinition.agentType}] API calls: ${getDisplayPath(getDumpPromptsPath(agentId))}`,
365    )
366  }
367
368  // Handle message forking for context sharing
369  // Filter out incomplete tool calls from parent messages to avoid API errors
370  const contextMessages: Message[] = forkContextMessages
371    ? filterIncompleteToolCalls(forkContextMessages)
372    : []
373  const initialMessages: Message[] = [...contextMessages, ...promptMessages]
374
375  const agentReadFileState =
376    forkContextMessages !== undefined
377      ? cloneFileStateCache(toolUseContext.readFileState)
378      : createFileStateCacheWithSizeLimit(READ_FILE_STATE_CACHE_SIZE)
379
380  const [baseUserContext, baseSystemContext] = await Promise.all([
381    override?.userContext ?? getUserContext(),
382    override?.systemContext ?? getSystemContext(),
383  ])
384
385  // Read-only agents (Explore, Plan) don't act on commit/PR/lint rules from
386  // CLAUDE.md — the main agent has full context and interprets their output.
387  // Dropping claudeMd here saves ~5-15 Gtok/week across 34M+ Explore spawns.
388  // Explicit override.userContext from callers is preserved untouched.
389  // Kill-switch defaults true; flip tengu_slim_subagent_claudemd=false to revert.
390  const shouldOmitClaudeMd =
391    agentDefinition.omitClaudeMd &&
392    !override?.userContext &&
393    getFeatureValue_CACHED_MAY_BE_STALE('tengu_slim_subagent_claudemd', true)
394  const { claudeMd: _omittedClaudeMd, ...userContextNoClaudeMd } =
395    baseUserContext
396  const resolvedUserContext = shouldOmitClaudeMd
397    ? userContextNoClaudeMd
398    : baseUserContext
399
400  // Explore/Plan are read-only search agents — the parent-session-start
401  // gitStatus (up to 40KB, explicitly labeled stale) is dead weight. If they
402  // need git info they run `git status` themselves and get fresh data.
403  // Saves ~1-3 Gtok/week fleet-wide.
404  const { gitStatus: _omittedGitStatus, ...systemContextNoGit } =
405    baseSystemContext
406  const resolvedSystemContext =
407    agentDefinition.agentType === 'Explore' ||
408    agentDefinition.agentType === 'Plan'
409      ? systemContextNoGit
410      : baseSystemContext
411
412  // Override permission mode if agent defines one
413  // However, don't override if parent is in bypassPermissions or acceptEdits mode - those should always take precedence
414  // For async agents, also set shouldAvoidPermissionPrompts since they can't show UI
415  const agentPermissionMode = agentDefinition.permissionMode
416  const agentGetAppState = () => {
417    const state = toolUseContext.getAppState()
418    let toolPermissionContext = state.toolPermissionContext
419
420    // Override permission mode if agent defines one (unless parent is bypassPermissions, acceptEdits, or auto)
421    if (
422      agentPermissionMode &&
423      state.toolPermissionContext.mode !== 'bypassPermissions' &&
424      state.toolPermissionContext.mode !== 'acceptEdits' &&
425      !(
426        feature('TRANSCRIPT_CLASSIFIER') &&
427        state.toolPermissionContext.mode === 'auto'
428      )
429    ) {
430      toolPermissionContext = {
431        ...toolPermissionContext,
432        mode: agentPermissionMode,
433      }
434    }
435
436    // Set flag to auto-deny prompts for agents that can't show UI
437    // Use explicit canShowPermissionPrompts if provided, otherwise:
438    //   - bubble mode: always show prompts (bubbles to parent terminal)
439    //   - default: !isAsync (sync agents show prompts, async agents don't)
440    const shouldAvoidPrompts =
441      canShowPermissionPrompts !== undefined
442        ? !canShowPermissionPrompts
443        : agentPermissionMode === 'bubble'
444          ? false
445          : isAsync
446    if (shouldAvoidPrompts) {
447      toolPermissionContext = {
448        ...toolPermissionContext,
449        shouldAvoidPermissionPrompts: true,
450      }
451    }
452
453    // For background agents that can show prompts, await automated checks
454    // (classifier, permission hooks) before showing the permission dialog.
455    // Since these are background agents, waiting is fine — the user should
456    // only be interrupted when automated checks can't resolve the permission.
457    // This applies to bubble mode (always) and explicit canShowPermissionPrompts.
458    if (isAsync && !shouldAvoidPrompts) {
459      toolPermissionContext = {
460        ...toolPermissionContext,
461        awaitAutomatedChecksBeforeDialog: true,
462      }
463    }
464
465    // Scope tool permissions: when allowedTools is provided, use them as session rules.
466    // IMPORTANT: Preserve cliArg rules (from SDK's --allowedTools) since those are
467    // explicit permissions from the SDK consumer that should apply to all agents.
468    // Only clear session-level rules from the parent to prevent unintended leakage.
469    if (allowedTools !== undefined) {
470      toolPermissionContext = {
471        ...toolPermissionContext,
472        alwaysAllowRules: {
473          // Preserve SDK-level permissions from --allowedTools
474          cliArg: state.toolPermissionContext.alwaysAllowRules.cliArg,
475          // Use the provided allowedTools as session-level permissions
476          session: [...allowedTools],
477        },
478      }
479    }
480
481    // Override effort level if agent defines one
482    const effortValue =
483      agentDefinition.effort !== undefined
484        ? agentDefinition.effort
485        : state.effortValue
486
487    if (
488      toolPermissionContext === state.toolPermissionContext &&
489      effortValue === state.effortValue
490    ) {
491      return state
492    }
493    return {
494      ...state,
495      toolPermissionContext,
496      effortValue,
497    }
498  }
499
500  const resolvedTools = useExactTools
501    ? availableTools
502    : resolveAgentTools(agentDefinition, availableTools, isAsync).resolvedTools
503
504  const additionalWorkingDirectories = Array.from(
505    appState.toolPermissionContext.additionalWorkingDirectories.keys(),
506  )
507
508  const agentSystemPrompt = override?.systemPrompt
509    ? override.systemPrompt
510    : asSystemPrompt(
511        await getAgentSystemPrompt(
512          agentDefinition,
513          toolUseContext,
514          resolvedAgentModel,
515          additionalWorkingDirectories,
516          resolvedTools,
517        ),
518      )
519
520  // Determine abortController:
521  // - Override takes precedence
522  // - Async agents get a new unlinked controller (runs independently)
523  // - Sync agents share parent's controller
524  const agentAbortController = override?.abortController
525    ? override.abortController
526    : isAsync
527      ? new AbortController()
528      : toolUseContext.abortController
529
530  // Execute SubagentStart hooks and collect additional context
531  const additionalContexts: string[] = []
532  for await (const hookResult of executeSubagentStartHooks(
533    agentId,
534    agentDefinition.agentType,
535    agentAbortController.signal,
536  )) {
537    if (
538      hookResult.additionalContexts &&
539      hookResult.additionalContexts.length > 0
540    ) {
541      additionalContexts.push(...hookResult.additionalContexts)
542    }
543  }
544
545  // Add SubagentStart hook context as a user message (consistent with SessionStart/UserPromptSubmit)
546  if (additionalContexts.length > 0) {
547    const contextMessage = createAttachmentMessage({
548      type: 'hook_additional_context',
549      content: additionalContexts,
550      hookName: 'SubagentStart',
551      toolUseID: randomUUID(),
552      hookEvent: 'SubagentStart',
553    })
554    initialMessages.push(contextMessage)
555  }
556
557  // Register agent's frontmatter hooks (scoped to agent lifecycle)
558  // Pass isAgent=true to convert Stop hooks to SubagentStop (since subagents trigger SubagentStop)
559  // Same admin-trusted gate for frontmatter hooks: under ["hooks"] alone
560  // (skills/agents not locked), user agents still load — block their
561  // frontmatter-hook REGISTRATION here where source is known, rather than
562  // blanket-blocking all session hooks at execution time (which would
563  // also kill plugin agents' hooks).
564  const hooksAllowedForThisAgent =
565    !isRestrictedToPluginOnly('hooks') ||
566    isSourceAdminTrusted(agentDefinition.source)
567  if (agentDefinition.hooks && hooksAllowedForThisAgent) {
568    registerFrontmatterHooks(
569      rootSetAppState,
570      agentId,
571      agentDefinition.hooks,
572      `agent '${agentDefinition.agentType}'`,
573      true, // isAgent - converts Stop to SubagentStop
574    )
575  }
576
577  // Preload skills from agent frontmatter
578  const skillsToPreload = agentDefinition.skills ?? []
579  if (skillsToPreload.length > 0) {
580    const allSkills = await getSkillToolCommands(getProjectRoot())
581
582    // Filter valid skills and warn about missing ones
583    const validSkills: Array<{
584      skillName: string
585      skill: (typeof allSkills)[0] & { type: 'prompt' }
586    }> = []
587
588    for (const skillName of skillsToPreload) {
589      // Resolve the skill name, trying multiple strategies:
590      // 1. Exact match (hasCommand checks name, userFacingName, aliases)
591      // 2. Fully-qualified with agent's plugin prefix (e.g., "my-skill" → "plugin:my-skill")
592      // 3. Suffix match on ":skillName" for plugin-namespaced skills
593      const resolvedName = resolveSkillName(
594        skillName,
595        allSkills,
596        agentDefinition,
597      )
598      if (!resolvedName) {
599        logForDebugging(
600          `[Agent: ${agentDefinition.agentType}] Warning: Skill '${skillName}' specified in frontmatter was not found`,
601          { level: 'warn' },
602        )
603        continue
604      }
605
606      const skill = getCommand(resolvedName, allSkills)
607      if (skill.type !== 'prompt') {
608        logForDebugging(
609          `[Agent: ${agentDefinition.agentType}] Warning: Skill '${skillName}' is not a prompt-based skill`,
610          { level: 'warn' },
611        )
612        continue
613      }
614      validSkills.push({ skillName, skill })
615    }
616
617    // Load all skill contents concurrently and add to initial messages
618    const { formatSkillLoadingMetadata } = await import(
619      '../../utils/processUserInput/processSlashCommand.js'
620    )
621    const loaded = await Promise.all(
622      validSkills.map(async ({ skillName, skill }) => ({
623        skillName,
624        skill,
625        content: await skill.getPromptForCommand('', toolUseContext),
626      })),
627    )
628    for (const { skillName, skill, content } of loaded) {
629      logForDebugging(
630        `[Agent: ${agentDefinition.agentType}] Preloaded skill '${skillName}'`,
631      )
632
633      // Add command-message metadata so the UI shows which skill is loading
634      const metadata = formatSkillLoadingMetadata(
635        skillName,
636        skill.progressMessage,
637      )
638
639      initialMessages.push(
640        createUserMessage({
641          content: [{ type: 'text', text: metadata }, ...content],
642          isMeta: true,
643        }),
644      )
645    }
646  }
647
648  // Initialize agent-specific MCP servers (additive to parent's servers)
649  const {
650    clients: mergedMcpClients,
651    tools: agentMcpTools,
652    cleanup: mcpCleanup,
653  } = await initializeAgentMcpServers(
654    agentDefinition,
655    toolUseContext.options.mcpClients,
656  )
657
658  // Merge agent MCP tools with resolved agent tools, deduplicating by name.
659  // resolvedTools is already deduplicated (see resolveAgentTools), so skip
660  // the spread + uniqBy overhead when there are no agent-specific MCP tools.
661  const allTools =
662    agentMcpTools.length > 0
663      ? uniqBy([...resolvedTools, ...agentMcpTools], 'name')
664      : resolvedTools
665
666  // Build agent-specific options
667  const agentOptions: ToolUseContext['options'] = {
668    isNonInteractiveSession: useExactTools
669      ? toolUseContext.options.isNonInteractiveSession
670      : isAsync
671        ? true
672        : (toolUseContext.options.isNonInteractiveSession ?? false),
673    appendSystemPrompt: toolUseContext.options.appendSystemPrompt,
674    tools: allTools,
675    commands: [],
676    debug: toolUseContext.options.debug,
677    verbose: toolUseContext.options.verbose,
678    mainLoopModel: resolvedAgentModel,
679    // For fork children (useExactTools), inherit thinking config to match the
680    // parent's API request prefix for prompt cache hits. For regular
681    // sub-agents, disable thinking to control output token costs.
682    thinkingConfig: useExactTools
683      ? toolUseContext.options.thinkingConfig
684      : { type: 'disabled' as const },
685    mcpClients: mergedMcpClients,
686    mcpResources: toolUseContext.options.mcpResources,
687    agentDefinitions: toolUseContext.options.agentDefinitions,
688    // Fork children (useExactTools path) need querySource on context.options
689    // for the recursive-fork guard at AgentTool.tsx call() — it checks
690    // options.querySource === 'agent:builtin:fork'. This survives autocompact
691    // (which rewrites messages, not context.options). Without this, the guard
692    // reads undefined and only the message-scan fallback fires — which
693    // autocompact defeats by replacing the fork-boilerplate message.
694    ...(useExactTools && { querySource }),
695  }
696
697  // Create subagent context using shared helper
698  // - Sync agents share setAppState, setResponseLength, abortController with parent
699  // - Async agents are fully isolated (but with explicit unlinked abortController)
700  const agentToolUseContext = createSubagentContext(toolUseContext, {
701    options: agentOptions,
702    agentId,
703    agentType: agentDefinition.agentType,
704    messages: initialMessages,
705    readFileState: agentReadFileState,
706    abortController: agentAbortController,
707    getAppState: agentGetAppState,
708    // Sync agents share these callbacks with parent
709    shareSetAppState: !isAsync,
710    shareSetResponseLength: true, // Both sync and async contribute to response metrics
711    criticalSystemReminder_EXPERIMENTAL:
712      agentDefinition.criticalSystemReminder_EXPERIMENTAL,
713    contentReplacementState,
714  })
715
716  // Preserve tool use results for subagents with viewable transcripts (in-process teammates)
717  if (preserveToolUseResults) {
718    agentToolUseContext.preserveToolUseResults = true
719  }
720
721  // Expose cache-safe params for background summarization (prompt cache sharing)
722  if (onCacheSafeParams) {
723    onCacheSafeParams({
724      systemPrompt: agentSystemPrompt,
725      userContext: resolvedUserContext,
726      systemContext: resolvedSystemContext,
727      toolUseContext: agentToolUseContext,
728      forkContextMessages: initialMessages,
729    })
730  }
731
732  // Record initial messages before the query loop starts, plus the agentType
733  // so resume can route correctly when subagent_type is omitted. Both writes
734  // are fire-and-forget — persistence failure shouldn't block the agent.
735  void recordSidechainTranscript(initialMessages, agentId).catch(_err =>
736    logForDebugging(`Failed to record sidechain transcript: ${_err}`),
737  )
738  void writeAgentMetadata(agentId, {
739    agentType: agentDefinition.agentType,
740    ...(worktreePath && { worktreePath }),
741    ...(description && { description }),
742  }).catch(_err => logForDebugging(`Failed to write agent metadata: ${_err}`))
743
744  // Track the last recorded message UUID for parent chain continuity
745  let lastRecordedUuid: UUID | null = initialMessages.at(-1)?.uuid ?? null
746
747  try {
748    for await (const message of query({
749      messages: initialMessages,
750      systemPrompt: agentSystemPrompt,
751      userContext: resolvedUserContext,
752      systemContext: resolvedSystemContext,
753      canUseTool,
754      toolUseContext: agentToolUseContext,
755      querySource,
756      maxTurns: maxTurns ?? agentDefinition.maxTurns,
757    })) {
758      onQueryProgress?.()
759      // Forward subagent API request starts to parent's metrics display
760      // so TTFT/OTPS update during subagent execution.
761      if (
762        message.type === 'stream_event' &&
763        message.event.type === 'message_start' &&
764        message.ttftMs != null
765      ) {
766        toolUseContext.pushApiMetricsEntry?.(message.ttftMs)
767        continue
768      }
769
770      // Yield attachment messages (e.g., structured_output) without recording them
771      if (message.type === 'attachment') {
772        // Handle max turns reached signal from query.ts
773        if (message.attachment.type === 'max_turns_reached') {
774          logForDebugging(
775            `[Agent
776: $
777{
778  agentDefinition.agentType
779}
780] Reached max turns limit ($
781{
782  message.attachment.maxTurns
783}
784)`,
785          )
786          break
787        }
788        yield message
789        continue
790      }
791
792      if (isRecordableMessage(message)) {
793        // Record only the new message with correct parent (O(1) per message)
794        await recordSidechainTranscript(
795          [message],
796          agentId,
797          lastRecordedUuid,
798        ).catch(err =>
799          logForDebugging(`Failed to record sidechain transcript: ${err}`),
800        )
801        if (message.type !== 'progress') {
802          lastRecordedUuid = message.uuid
803        }
804        yield message
805      }
806    }
807
808    if (agentAbortController.signal.aborted) {
809      throw new AbortError()
810    }
811
812    // Run callback if provided (only built-in agents have callbacks)
813    if (isBuiltInAgent(agentDefinition) && agentDefinition.callback) {
814      agentDefinition.callback()
815    }
816  } finally {
817    // Clean up agent-specific MCP servers (runs on normal completion, abort, or error)
818    await mcpCleanup()
819    // Clean up agent's session hooks
820    if (agentDefinition.hooks) {
821      clearSessionHooks(rootSetAppState, agentId)
822    }
823    // Clean up prompt cache tracking state for this agent
824    if (feature('PROMPT_CACHE_BREAK_DETECTION')) {
825      cleanupAgentTracking(agentId)
826    }
827    // Release cloned file state cache memory
828    agentToolUseContext.readFileState.clear()
829    // Release the cloned fork context messages
830    initialMessages.length = 0
831    // Release perfetto agent registry entry
832    unregisterPerfettoAgent(agentId)
833    // Release transcript subdir mapping
834    clearAgentTranscriptSubdir(agentId)
835    // Release this agent's todos entry. Without this, every subagent that
836    // called TodoWrite leaves a key in AppState.todos forever (even after all
837    // items complete, the value is [] but the key stays). Whale sessions
838    // spawn hundreds of agents; each orphaned key is a small leak that adds up.
839    rootSetAppState(prev => {
840      if (!(agentId in prev.todos)) return prev
841      const { [agentId]: _removed, ...todos } = prev.todos
842      return { ...prev, todos }
843    })
844    // Kill any background bash tasks this agent spawned. Without this, a
845    // `run_in_background` shell loop (e.g. test fixture fake-logs.sh) outlives
846    // the agent as a PPID=1 zombie once the main session eventually exits.
847    killShellTasksForAgent(agentId, toolUseContext.getAppState, rootSetAppState)
848    /* eslint-disable @typescript-eslint/no-require-imports */
849    if (feature('MONITOR_TOOL')) {
850      const mcpMod =
851        require('../../tasks/MonitorMcpTask/MonitorMcpTask.js') as typeof import('../../tasks/MonitorMcpTask/MonitorMcpTask.js')
852      mcpMod.killMonitorMcpTasksForAgent(
853        agentId,
854        toolUseContext.getAppState,
855        rootSetAppState,
856      )
857    }
858    /* eslint-enable @typescript-eslint/no-require-imports */
859  }
860}
861
862/**
863 * Filters out assistant messages with incomplete tool calls (tool uses without results).
864 * This prevents API errors when sending messages with orphaned tool calls.
865 */
866export function filterIncompleteToolCalls(messages: Message[]): Message[] {
867  // Build a set of tool use IDs that have results
868  const toolUseIdsWithResults = new Set<string>()
869
870  for (const message of messages) {
871    if (message?.type === 'user') {
872      const userMessage = message as UserMessage
873      const content = userMessage.message.content
874      if (Array.isArray(content)) {
875        for (const block of content) {
876          if (block.type === 'tool_result' && block.tool_use_id) {
877            toolUseIdsWithResults.add(block.tool_use_id)
878          }
879        }
880      }
881    }
882  }
883
884  // Filter out assistant messages that contain tool calls without results
885  return messages.filter(message => {
886    if (message?.type === 'assistant') {
887      const assistantMessage = message as AssistantMessage
888      const content = assistantMessage.message.content
889      if (Array.isArray(content)) {
890        // Check if this assistant message has any tool uses without results
891        const hasIncompleteToolCall = content.some(
892          block =>
893            block.type === 'tool_use' &&
894            block.id &&
895            !toolUseIdsWithResults.has(block.id),
896        )
897        // Exclude messages with incomplete tool calls
898        return !hasIncompleteToolCall
899      }
900    }
901    // Keep all non-assistant messages and assistant messages without tool calls
902    return true
903  })
904}
905
906async function getAgentSystemPrompt(
907  agentDefinition: AgentDefinition,
908  toolUseContext: Pick<ToolUseContext, 'options'>,
909  resolvedAgentModel: string,
910  additionalWorkingDirectories: string[],
911  resolvedTools: readonly Tool[],
912): Promise<string[]> {
913  const enabledToolNames = new Set(resolvedTools.map(t => t.name))
914  try {
915    const agentPrompt = agentDefinition.getSystemPrompt({ toolUseContext })
916    const prompts = [agentPrompt]
917
918    return await enhanceSystemPromptWithEnvDetails(
919      prompts,
920      resolvedAgentModel,
921      additionalWorkingDirectories,
922      enabledToolNames,
923    )
924  } catch (_error) {
925    return enhanceSystemPromptWithEnvDetails(
926      [DEFAULT_AGENT_PROMPT],
927      resolvedAgentModel,
928      additionalWorkingDirectories,
929      enabledToolNames,
930    )
931  }
932}
933
934/**
935 * Resolve a skill name from agent frontmatter to a registered command name.
936 *
937 * Plugin skills are registered with namespaced names (e.g., "my-plugin:my-skill")
938 * but agents reference them with bare names (e.g., "my-skill"). This function
939 * tries multiple resolution strategies:
940 *
941 * 1. Exact match via hasCommand (name, userFacingName, aliases)
942 * 2. Prefix with agent's plugin name (e.g., "my-skill" → "my-plugin:my-skill")
943 * 3. Suffix match — find any command whose name ends with ":skillName"
944 */
945function resolveSkillName(
946  skillName: string,
947  allSkills: Command[],
948  agentDefinition: AgentDefinition,
949): string | null {
950  // 1. Direct match
951  if (hasCommand(skillName, allSkills)) {
952    return skillName
953  }
954
955  // 2. Try prefixing with the agent's plugin name
956  // Plugin agents have agentType like "pluginName:agentName"
957  const pluginPrefix = agentDefinition.agentType.split(':')[0]
958  if (pluginPrefix) {
959    const qualifiedName = `${pluginPrefix}:${skillName}`
960    if (hasCommand(qualifiedName, allSkills)) {
961      return qualifiedName
962    }
963  }
964
965  // 3. Suffix match — find a skill whose name ends with ":skillName"
966  const suffix = `:${skillName}`
967  const match = allSkills.find(cmd => cmd.name.endsWith(suffix))
968  if (match) {
969    return match.name
970  }
971
972  return null
973}