tools/FileReadTool/FileReadTool.ts at main

oppi.li / claude-code
fork atom
source dump of claude code
fork atom
claude-code / tools / FileReadTool / FileReadTool.ts
at main 1183 lines 39 kB view raw
wrap content
oppi.li dump from zip 5d ago
63aada3f
   1import type { Base64ImageSource } from '@anthropic-ai/sdk/resources/index.mjs'
   2import { readdir, readFile as readFileAsync } from 'fs/promises'
   3import * as path from 'path'
   4import { posix, win32 } from 'path'
   5import { z } from 'zod/v4'
   6import {
   7  PDF_AT_MENTION_INLINE_THRESHOLD,
   8  PDF_EXTRACT_SIZE_THRESHOLD,
   9  PDF_MAX_PAGES_PER_READ,
  10} from '../../constants/apiLimits.js'
  11import { hasBinaryExtension } from '../../constants/files.js'
  12import { memoryFreshnessNote } from '../../memdir/memoryAge.js'
  13import { getFeatureValue_CACHED_MAY_BE_STALE } from '../../services/analytics/growthbook.js'
  14import { logEvent } from '../../services/analytics/index.js'
  15import {
  16  type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
  17  getFileExtensionForAnalytics,
  18} from '../../services/analytics/metadata.js'
  19import {
  20  countTokensWithAPI,
  21  roughTokenCountEstimationForFileType,
  22} from '../../services/tokenEstimation.js'
  23import {
  24  activateConditionalSkillsForPaths,
  25  addSkillDirectories,
  26  discoverSkillDirsForPaths,
  27} from '../../skills/loadSkillsDir.js'
  28import type { ToolUseContext } from '../../Tool.js'
  29import { buildTool, type ToolDef } from '../../Tool.js'
  30import { getCwd } from '../../utils/cwd.js'
  31import { getClaudeConfigHomeDir, isEnvTruthy } from '../../utils/envUtils.js'
  32import { getErrnoCode, isENOENT } from '../../utils/errors.js'
  33import {
  34  addLineNumbers,
  35  FILE_NOT_FOUND_CWD_NOTE,
  36  findSimilarFile,
  37  getFileModificationTimeAsync,
  38  suggestPathUnderCwd,
  39} from '../../utils/file.js'
  40import { logFileOperation } from '../../utils/fileOperationAnalytics.js'
  41import { formatFileSize } from '../../utils/format.js'
  42import { getFsImplementation } from '../../utils/fsOperations.js'
  43import {
  44  compressImageBufferWithTokenLimit,
  45  createImageMetadataText,
  46  detectImageFormatFromBuffer,
  47  type ImageDimensions,
  48  ImageResizeError,
  49  maybeResizeAndDownsampleImageBuffer,
  50} from '../../utils/imageResizer.js'
  51import { lazySchema } from '../../utils/lazySchema.js'
  52import { logError } from '../../utils/log.js'
  53import { isAutoMemFile } from '../../utils/memoryFileDetection.js'
  54import { createUserMessage } from '../../utils/messages.js'
  55import { getCanonicalName, getMainLoopModel } from '../../utils/model/model.js'
  56import {
  57  mapNotebookCellsToToolResult,
  58  readNotebook,
  59} from '../../utils/notebook.js'
  60import { expandPath } from '../../utils/path.js'
  61import { extractPDFPages, getPDFPageCount, readPDF } from '../../utils/pdf.js'
  62import {
  63  isPDFExtension,
  64  isPDFSupported,
  65  parsePDFPageRange,
  66} from '../../utils/pdfUtils.js'
  67import {
  68  checkReadPermissionForTool,
  69  matchingRuleForInput,
  70} from '../../utils/permissions/filesystem.js'
  71import type { PermissionDecision } from '../../utils/permissions/PermissionResult.js'
  72import { matchWildcardPattern } from '../../utils/permissions/shellRuleMatching.js'
  73import { readFileInRange } from '../../utils/readFileInRange.js'
  74import { semanticNumber } from '../../utils/semanticNumber.js'
  75import { jsonStringify } from '../../utils/slowOperations.js'
  76import { BASH_TOOL_NAME } from '../BashTool/toolName.js'
  77import { getDefaultFileReadingLimits } from './limits.js'
  78import {
  79  DESCRIPTION,
  80  FILE_READ_TOOL_NAME,
  81  FILE_UNCHANGED_STUB,
  82  LINE_FORMAT_INSTRUCTION,
  83  OFFSET_INSTRUCTION_DEFAULT,
  84  OFFSET_INSTRUCTION_TARGETED,
  85  renderPromptTemplate,
  86} from './prompt.js'
  87import {
  88  getToolUseSummary,
  89  renderToolResultMessage,
  90  renderToolUseErrorMessage,
  91  renderToolUseMessage,
  92  renderToolUseTag,
  93  userFacingName,
  94} from './UI.js'
  95
  96// Device files that would hang the process: infinite output or blocking input.
  97// Checked by path only (no I/O). Safe devices like /dev/null are intentionally omitted.
  98const BLOCKED_DEVICE_PATHS = new Set([
  99  // Infinite output — never reach EOF
 100  '/dev/zero',
 101  '/dev/random',
 102  '/dev/urandom',
 103  '/dev/full',
 104  // Blocks waiting for input
 105  '/dev/stdin',
 106  '/dev/tty',
 107  '/dev/console',
 108  // Nonsensical to read
 109  '/dev/stdout',
 110  '/dev/stderr',
 111  // fd aliases for stdin/stdout/stderr
 112  '/dev/fd/0',
 113  '/dev/fd/1',
 114  '/dev/fd/2',
 115])
 116
 117function isBlockedDevicePath(filePath: string): boolean {
 118  if (BLOCKED_DEVICE_PATHS.has(filePath)) return true
 119  // /proc/self/fd/0-2 and /proc/<pid>/fd/0-2 are Linux aliases for stdio
 120  if (
 121    filePath.startsWith('/proc/') &&
 122    (filePath.endsWith('/fd/0') ||
 123      filePath.endsWith('/fd/1') ||
 124      filePath.endsWith('/fd/2'))
 125  )
 126    return true
 127  return false
 128}
 129
 130// Narrow no-break space (U+202F) used by some macOS versions in screenshot filenames
 131const THIN_SPACE = String.fromCharCode(8239)
 132
 133/**
 134 * Resolves macOS screenshot paths that may have different space characters.
 135 * macOS uses either regular space or thin space (U+202F) before AM/PM in screenshot
 136 * filenames depending on the macOS version. This function tries the alternate space
 137 * character if the file doesn't exist with the given path.
 138 *
 139 * @param filePath - The normalized file path to resolve
 140 * @returns The path to the actual file on disk (may differ in space character)
 141 */
 142/**
 143 * For macOS screenshot paths with AM/PM, the space before AM/PM may be a
 144 * regular space or a thin space depending on the macOS version.  Returns
 145 * the alternate path to try if the original doesn't exist, or undefined.
 146 */
 147function getAlternateScreenshotPath(filePath: string): string | undefined {
 148  const filename = path.basename(filePath)
 149  const amPmPattern = /^(.+)([ \u202F])(AM|PM)(\.png)$/
 150  const match = filename.match(amPmPattern)
 151  if (!match) return undefined
 152
 153  const currentSpace = match[2]
 154  const alternateSpace = currentSpace === ' ' ? THIN_SPACE : ' '
 155  return filePath.replace(
 156    `${currentSpace}${match[3]}${match[4]}`,
 157    `${alternateSpace}${match[3]}${match[4]}`,
 158  )
 159}
 160
 161// File read listeners - allows other services to be notified when files are read
 162type FileReadListener = (filePath: string, content: string) => void
 163const fileReadListeners: FileReadListener[] = []
 164
 165export function registerFileReadListener(
 166  listener: FileReadListener,
 167): () => void {
 168  fileReadListeners.push(listener)
 169  return () => {
 170    const i = fileReadListeners.indexOf(listener)
 171    if (i >= 0) fileReadListeners.splice(i, 1)
 172  }
 173}
 174
 175export class MaxFileReadTokenExceededError extends Error {
 176  constructor(
 177    public tokenCount: number,
 178    public maxTokens: number,
 179  ) {
 180    super(
 181      `File content (${tokenCount} tokens) exceeds maximum allowed tokens (${maxTokens}). Use offset and limit parameters to read specific portions of the file, or search for specific content instead of reading the whole file.`,
 182    )
 183    this.name = 'MaxFileReadTokenExceededError'
 184  }
 185}
 186
 187// Common image extensions
 188const IMAGE_EXTENSIONS = new Set(['png', 'jpg', 'jpeg', 'gif', 'webp'])
 189
 190/**
 191 * Detects if a file path is a session-related file for analytics logging.
 192 * Only matches files within the Claude config directory (e.g., ~/.claude).
 193 * Returns the type of session file or null if not a session file.
 194 */
 195function detectSessionFileType(
 196  filePath: string,
 197): 'session_memory' | 'session_transcript' | null {
 198  const configDir = getClaudeConfigHomeDir()
 199
 200  // Only match files within the Claude config directory
 201  if (!filePath.startsWith(configDir)) {
 202    return null
 203  }
 204
 205  // Normalize path to use forward slashes for consistent matching across platforms
 206  const normalizedPath = filePath.split(win32.sep).join(posix.sep)
 207
 208  // Session memory files: ~/.claude/session-memory/*.md (including summary.md)
 209  if (
 210    normalizedPath.includes('/session-memory/') &&
 211    normalizedPath.endsWith('.md')
 212  ) {
 213    return 'session_memory'
 214  }
 215
 216  // Session JSONL transcript files: ~/.claude/projects/*/*.jsonl
 217  if (
 218    normalizedPath.includes('/projects/') &&
 219    normalizedPath.endsWith('.jsonl')
 220  ) {
 221    return 'session_transcript'
 222  }
 223
 224  return null
 225}
 226
 227const inputSchema = lazySchema(() =>
 228  z.strictObject({
 229    file_path: z.string().describe('The absolute path to the file to read'),
 230    offset: semanticNumber(z.number().int().nonnegative().optional()).describe(
 231      'The line number to start reading from. Only provide if the file is too large to read at once',
 232    ),
 233    limit: semanticNumber(z.number().int().positive().optional()).describe(
 234      'The number of lines to read. Only provide if the file is too large to read at once.',
 235    ),
 236    pages: z
 237      .string()
 238      .optional()
 239      .describe(
 240        `Page range for PDF files (e.g., "1-5", "3", "10-20"). Only applicable to PDF files. Maximum ${PDF_MAX_PAGES_PER_READ} pages per request.`,
 241      ),
 242  }),
 243)
 244type InputSchema = ReturnType<typeof inputSchema>
 245
 246export type Input = z.infer<InputSchema>
 247
 248const outputSchema = lazySchema(() => {
 249  // Define the media types supported for images
 250  const imageMediaTypes = z.enum([
 251    'image/jpeg',
 252    'image/png',
 253    'image/gif',
 254    'image/webp',
 255  ])
 256
 257  return z.discriminatedUnion('type', [
 258    z.object({
 259      type: z.literal('text'),
 260      file: z.object({
 261        filePath: z.string().describe('The path to the file that was read'),
 262        content: z.string().describe('The content of the file'),
 263        numLines: z
 264          .number()
 265          .describe('Number of lines in the returned content'),
 266        startLine: z.number().describe('The starting line number'),
 267        totalLines: z.number().describe('Total number of lines in the file'),
 268      }),
 269    }),
 270    z.object({
 271      type: z.literal('image'),
 272      file: z.object({
 273        base64: z.string().describe('Base64-encoded image data'),
 274        type: imageMediaTypes.describe('The MIME type of the image'),
 275        originalSize: z.number().describe('Original file size in bytes'),
 276        dimensions: z
 277          .object({
 278            originalWidth: z
 279              .number()
 280              .optional()
 281              .describe('Original image width in pixels'),
 282            originalHeight: z
 283              .number()
 284              .optional()
 285              .describe('Original image height in pixels'),
 286            displayWidth: z
 287              .number()
 288              .optional()
 289              .describe('Displayed image width in pixels (after resizing)'),
 290            displayHeight: z
 291              .number()
 292              .optional()
 293              .describe('Displayed image height in pixels (after resizing)'),
 294          })
 295          .optional()
 296          .describe('Image dimension info for coordinate mapping'),
 297      }),
 298    }),
 299    z.object({
 300      type: z.literal('notebook'),
 301      file: z.object({
 302        filePath: z.string().describe('The path to the notebook file'),
 303        cells: z.array(z.any()).describe('Array of notebook cells'),
 304      }),
 305    }),
 306    z.object({
 307      type: z.literal('pdf'),
 308      file: z.object({
 309        filePath: z.string().describe('The path to the PDF file'),
 310        base64: z.string().describe('Base64-encoded PDF data'),
 311        originalSize: z.number().describe('Original file size in bytes'),
 312      }),
 313    }),
 314    z.object({
 315      type: z.literal('parts'),
 316      file: z.object({
 317        filePath: z.string().describe('The path to the PDF file'),
 318        originalSize: z.number().describe('Original file size in bytes'),
 319        count: z.number().describe('Number of pages extracted'),
 320        outputDir: z
 321          .string()
 322          .describe('Directory containing extracted page images'),
 323      }),
 324    }),
 325    z.object({
 326      type: z.literal('file_unchanged'),
 327      file: z.object({
 328        filePath: z.string().describe('The path to the file'),
 329      }),
 330    }),
 331  ])
 332})
 333type OutputSchema = ReturnType<typeof outputSchema>
 334
 335export type Output = z.infer<OutputSchema>
 336
 337export const FileReadTool = buildTool({
 338  name: FILE_READ_TOOL_NAME,
 339  searchHint: 'read files, images, PDFs, notebooks',
 340  // Output is bounded by maxTokens (validateContentTokens). Persisting to a
 341  // file the model reads back with Read is circular — never persist.
 342  maxResultSizeChars: Infinity,
 343  strict: true,
 344  async description() {
 345    return DESCRIPTION
 346  },
 347  async prompt() {
 348    const limits = getDefaultFileReadingLimits()
 349    const maxSizeInstruction = limits.includeMaxSizeInPrompt
 350      ? `. Files larger than ${formatFileSize(limits.maxSizeBytes)} will return an error; use offset and limit for larger files`
 351      : ''
 352    const offsetInstruction = limits.targetedRangeNudge
 353      ? OFFSET_INSTRUCTION_TARGETED
 354      : OFFSET_INSTRUCTION_DEFAULT
 355    return renderPromptTemplate(
 356      pickLineFormatInstruction(),
 357      maxSizeInstruction,
 358      offsetInstruction,
 359    )
 360  },
 361  get inputSchema(): InputSchema {
 362    return inputSchema()
 363  },
 364  get outputSchema(): OutputSchema {
 365    return outputSchema()
 366  },
 367  userFacingName,
 368  getToolUseSummary,
 369  getActivityDescription(input) {
 370    const summary = getToolUseSummary(input)
 371    return summary ? `Reading ${summary}` : 'Reading file'
 372  },
 373  isConcurrencySafe() {
 374    return true
 375  },
 376  isReadOnly() {
 377    return true
 378  },
 379  toAutoClassifierInput(input) {
 380    return input.file_path
 381  },
 382  isSearchOrReadCommand() {
 383    return { isSearch: false, isRead: true }
 384  },
 385  getPath({ file_path }): string {
 386    return file_path || getCwd()
 387  },
 388  backfillObservableInput(input) {
 389    // hooks.mdx documents file_path as absolute; expand so hook allowlists
 390    // can't be bypassed via ~ or relative paths.
 391    if (typeof input.file_path === 'string') {
 392      input.file_path = expandPath(input.file_path)
 393    }
 394  },
 395  async preparePermissionMatcher({ file_path }) {
 396    return pattern => matchWildcardPattern(pattern, file_path)
 397  },
 398  async checkPermissions(input, context): Promise<PermissionDecision> {
 399    const appState = context.getAppState()
 400    return checkReadPermissionForTool(
 401      FileReadTool,
 402      input,
 403      appState.toolPermissionContext,
 404    )
 405  },
 406  renderToolUseMessage,
 407  renderToolUseTag,
 408  renderToolResultMessage,
 409  // UI.tsx:140 — ALL types render summary chrome only: "Read N lines",
 410  // "Read image (42KB)". Never the content itself. The model-facing
 411  // serialization (below) sends content + CYBER_RISK_MITIGATION_REMINDER
 412  // + line prefixes; UI shows none of it. Nothing to index. Caught by
 413  // the render-fidelity test when this initially claimed file.content.
 414  extractSearchText() {
 415    return ''
 416  },
 417  renderToolUseErrorMessage,
 418  async validateInput({ file_path, pages }, toolUseContext: ToolUseContext) {
 419    // Validate pages parameter (pure string parsing, no I/O)
 420    if (pages !== undefined) {
 421      const parsed = parsePDFPageRange(pages)
 422      if (!parsed) {
 423        return {
 424          result: false,
 425          message: `Invalid pages parameter: "${pages}". Use formats like "1-5", "3", or "10-20". Pages are 1-indexed.`,
 426          errorCode: 7,
 427        }
 428      }
 429      const rangeSize =
 430        parsed.lastPage === Infinity
 431          ? PDF_MAX_PAGES_PER_READ + 1
 432          : parsed.lastPage - parsed.firstPage + 1
 433      if (rangeSize > PDF_MAX_PAGES_PER_READ) {
 434        return {
 435          result: false,
 436          message: `Page range "${pages}" exceeds maximum of ${PDF_MAX_PAGES_PER_READ} pages per request. Please use a smaller range.`,
 437          errorCode: 8,
 438        }
 439      }
 440    }
 441
 442    // Path expansion + deny rule check (no I/O)
 443    const fullFilePath = expandPath(file_path)
 444
 445    const appState = toolUseContext.getAppState()
 446    const denyRule = matchingRuleForInput(
 447      fullFilePath,
 448      appState.toolPermissionContext,
 449      'read',
 450      'deny',
 451    )
 452    if (denyRule !== null) {
 453      return {
 454        result: false,
 455        message:
 456          'File is in a directory that is denied by your permission settings.',
 457        errorCode: 1,
 458      }
 459    }
 460
 461    // SECURITY: UNC path check (no I/O) — defer filesystem operations
 462    // until after user grants permission to prevent NTLM credential leaks
 463    const isUncPath =
 464      fullFilePath.startsWith('\\\\') || fullFilePath.startsWith('//')
 465    if (isUncPath) {
 466      return { result: true }
 467    }
 468
 469    // Binary extension check (string check on extension only, no I/O).
 470    // PDF, images, and SVG are excluded - this tool renders them natively.
 471    const ext = path.extname(fullFilePath).toLowerCase()
 472    if (
 473      hasBinaryExtension(fullFilePath) &&
 474      !isPDFExtension(ext) &&
 475      !IMAGE_EXTENSIONS.has(ext.slice(1))
 476    ) {
 477      return {
 478        result: false,
 479        message: `This tool cannot read binary files. The file appears to be a binary ${ext} file. Please use appropriate tools for binary file analysis.`,
 480        errorCode: 4,
 481      }
 482    }
 483
 484    // Block specific device files that would hang (infinite output or blocking input).
 485    // This is a path-based check with no I/O — safe special files like /dev/null are allowed.
 486    if (isBlockedDevicePath(fullFilePath)) {
 487      return {
 488        result: false,
 489        message: `Cannot read '${file_path}': this device file would block or produce infinite output.`,
 490        errorCode: 9,
 491      }
 492    }
 493
 494    return { result: true }
 495  },
 496  async call(
 497    { file_path, offset = 1, limit = undefined, pages },
 498    context,
 499    _canUseTool?,
 500    parentMessage?,
 501  ) {
 502    const { readFileState, fileReadingLimits } = context
 503
 504    const defaults = getDefaultFileReadingLimits()
 505    const maxSizeBytes =
 506      fileReadingLimits?.maxSizeBytes ?? defaults.maxSizeBytes
 507    const maxTokens = fileReadingLimits?.maxTokens ?? defaults.maxTokens
 508
 509    // Telemetry: track when callers override default read limits.
 510    // Only fires on override (low volume) — event count = override frequency.
 511    if (fileReadingLimits !== undefined) {
 512      logEvent('tengu_file_read_limits_override', {
 513        hasMaxTokens: fileReadingLimits.maxTokens !== undefined,
 514        hasMaxSizeBytes: fileReadingLimits.maxSizeBytes !== undefined,
 515      })
 516    }
 517
 518    const ext = path.extname(file_path).toLowerCase().slice(1)
 519    // Use expandPath for consistent path normalization with FileEditTool/FileWriteTool
 520    // (especially handles whitespace trimming and Windows path separators)
 521    const fullFilePath = expandPath(file_path)
 522
 523    // Dedup: if we've already read this exact range and the file hasn't
 524    // changed on disk, return a stub instead of re-sending the full content.
 525    // The earlier Read tool_result is still in context — two full copies
 526    // waste cache_creation tokens on every subsequent turn. BQ proxy shows
 527    // ~18% of Read calls are same-file collisions (up to 2.64% of fleet
 528    // cache_creation). Only applies to text/notebook reads — images/PDFs
 529    // aren't cached in readFileState so won't match here.
 530    //
 531    // Ant soak: 1,734 dedup hits in 2h, no Read error regression.
 532    // Killswitch pattern: GB can disable if the stub message confuses
 533    // the model externally.
 534    // 3P default: killswitch off = dedup enabled. Client-side only — no
 535    // server support needed, safe for Bedrock/Vertex/Foundry.
 536    const dedupKillswitch = getFeatureValue_CACHED_MAY_BE_STALE(
 537      'tengu_read_dedup_killswitch',
 538      false,
 539    )
 540    const existingState = dedupKillswitch
 541      ? undefined
 542      : readFileState.get(fullFilePath)
 543    // Only dedup entries that came from a prior Read (offset is always set
 544    // by Read). Edit/Write store offset=undefined — their readFileState
 545    // entry reflects post-edit mtime, so deduping against it would wrongly
 546    // point the model at the pre-edit Read content.
 547    if (
 548      existingState &&
 549      !existingState.isPartialView &&
 550      existingState.offset !== undefined
 551    ) {
 552      const rangeMatch =
 553        existingState.offset === offset && existingState.limit === limit
 554      if (rangeMatch) {
 555        try {
 556          const mtimeMs = await getFileModificationTimeAsync(fullFilePath)
 557          if (mtimeMs === existingState.timestamp) {
 558            const analyticsExt = getFileExtensionForAnalytics(fullFilePath)
 559            logEvent('tengu_file_read_dedup', {
 560              ...(analyticsExt !== undefined && { ext: analyticsExt }),
 561            })
 562            return {
 563              data: {
 564                type: 'file_unchanged' as const,
 565                file: { filePath: file_path },
 566              },
 567            }
 568          }
 569        } catch {
 570          // stat failed — fall through to full read
 571        }
 572      }
 573    }
 574
 575    // Discover skills from this file's path (fire-and-forget, non-blocking)
 576    // Skip in simple mode - no skills available
 577    const cwd = getCwd()
 578    if (!isEnvTruthy(process.env.CLAUDE_CODE_SIMPLE)) {
 579      const newSkillDirs = await discoverSkillDirsForPaths([fullFilePath], cwd)
 580      if (newSkillDirs.length > 0) {
 581        // Store discovered dirs for attachment display
 582        for (const dir of newSkillDirs) {
 583          context.dynamicSkillDirTriggers?.add(dir)
 584        }
 585        // Don't await - let skill loading happen in the background
 586        addSkillDirectories(newSkillDirs).catch(() => {})
 587      }
 588
 589      // Activate conditional skills whose path patterns match this file
 590      activateConditionalSkillsForPaths([fullFilePath], cwd)
 591    }
 592
 593    try {
 594      return await callInner(
 595        file_path,
 596        fullFilePath,
 597        fullFilePath,
 598        ext,
 599        offset,
 600        limit,
 601        pages,
 602        maxSizeBytes,
 603        maxTokens,
 604        readFileState,
 605        context,
 606        parentMessage?.message.id,
 607      )
 608    } catch (error) {
 609      // Handle file-not-found: suggest similar files
 610      const code = getErrnoCode(error)
 611      if (code === 'ENOENT') {
 612        // macOS screenshots may use a thin space or regular space before
 613        // AM/PM — try the alternate before giving up.
 614        const altPath = getAlternateScreenshotPath(fullFilePath)
 615        if (altPath) {
 616          try {
 617            return await callInner(
 618              file_path,
 619              fullFilePath,
 620              altPath,
 621              ext,
 622              offset,
 623              limit,
 624              pages,
 625              maxSizeBytes,
 626              maxTokens,
 627              readFileState,
 628              context,
 629              parentMessage?.message.id,
 630            )
 631          } catch (altError) {
 632            if (!isENOENT(altError)) {
 633              throw altError
 634            }
 635            // Alt path also missing — fall through to friendly error
 636          }
 637        }
 638
 639        const similarFilename = findSimilarFile(fullFilePath)
 640        const cwdSuggestion = await suggestPathUnderCwd(fullFilePath)
 641        let message = `File does not exist. ${FILE_NOT_FOUND_CWD_NOTE} ${getCwd()}.`
 642        if (cwdSuggestion) {
 643          message += ` Did you mean ${cwdSuggestion}?`
 644        } else if (similarFilename) {
 645          message += ` Did you mean ${similarFilename}?`
 646        }
 647        throw new Error(message)
 648      }
 649      throw error
 650    }
 651  },
 652  mapToolResultToToolResultBlockParam(data, toolUseID) {
 653    switch (data.type) {
 654      case 'image': {
 655        return {
 656          tool_use_id: toolUseID,
 657          type: 'tool_result',
 658          content: [
 659            {
 660              type: 'image',
 661              source: {
 662                type: 'base64',
 663                data: data.file.base64,
 664                media_type: data.file.type,
 665              },
 666            },
 667          ],
 668        }
 669      }
 670      case 'notebook':
 671        return mapNotebookCellsToToolResult(data.file.cells, toolUseID)
 672      case 'pdf':
 673        // Return PDF metadata only - the actual content is sent as a supplemental DocumentBlockParam
 674        return {
 675          tool_use_id: toolUseID,
 676          type: 'tool_result',
 677          content: `PDF file read: ${data.file.filePath} (${formatFileSize(data.file.originalSize)})`,
 678        }
 679      case 'parts':
 680        // Extracted page images are read and sent as image blocks in mapToolResultToAPIMessage
 681        return {
 682          tool_use_id: toolUseID,
 683          type: 'tool_result',
 684          content: `PDF pages extracted: ${data.file.count} page(s) from ${data.file.filePath} (${formatFileSize(data.file.originalSize)})`,
 685        }
 686      case 'file_unchanged':
 687        return {
 688          tool_use_id: toolUseID,
 689          type: 'tool_result',
 690          content: FILE_UNCHANGED_STUB,
 691        }
 692      case 'text': {
 693        let content: string
 694
 695        if (data.file.content) {
 696          content =
 697            memoryFileFreshnessPrefix(data) +
 698            formatFileLines(data.file) +
 699            (shouldIncludeFileReadMitigation()
 700              ? CYBER_RISK_MITIGATION_REMINDER
 701              : '')
 702        } else {
 703          // Determine the appropriate warning message
 704          content =
 705            data.file.totalLines === 0
 706              ? '<system-reminder>Warning: the file exists but the contents are empty.</system-reminder>'
 707              : `<system-reminder>Warning: the file exists but is shorter than the provided offset (${data.file.startLine}). The file has ${data.file.totalLines} lines.</system-reminder>`
 708        }
 709
 710        return {
 711          tool_use_id: toolUseID,
 712          type: 'tool_result',
 713          content,
 714        }
 715      }
 716    }
 717  },
 718} satisfies ToolDef<InputSchema, Output>)
 719
 720function pickLineFormatInstruction(): string {
 721  return LINE_FORMAT_INSTRUCTION
 722}
 723
 724/** Format file content with line numbers. */
 725function formatFileLines(file: { content: string; startLine: number }): string {
 726  return addLineNumbers(file)
 727}
 728
 729export const CYBER_RISK_MITIGATION_REMINDER =
 730  '\n\n<system-reminder>\nWhenever you read a file, you should consider whether it would be considered malware. You CAN and SHOULD provide analysis of malware, what it is doing. But you MUST refuse to improve or augment the code. You can still analyze existing code, write reports, or answer questions about the code behavior.\n</system-reminder>\n'
 731
 732// Models where cyber risk mitigation should be skipped
 733const MITIGATION_EXEMPT_MODELS = new Set(['claude-opus-4-6'])
 734
 735function shouldIncludeFileReadMitigation(): boolean {
 736  const shortName = getCanonicalName(getMainLoopModel())
 737  return !MITIGATION_EXEMPT_MODELS.has(shortName)
 738}
 739
 740/**
 741 * Side-channel from call() to mapToolResultToToolResultBlockParam: mtime
 742 * of auto-memory files, keyed by the `data` object identity. Avoids
 743 * adding a presentation-only field to the output schema (which flows
 744 * into SDK types) and avoids sync fs in the mapper. WeakMap auto-GCs
 745 * when the data object becomes unreachable after rendering.
 746 */
 747const memoryFileMtimes = new WeakMap<object, number>()
 748
 749function memoryFileFreshnessPrefix(data: object): string {
 750  const mtimeMs = memoryFileMtimes.get(data)
 751  if (mtimeMs === undefined) return ''
 752  return memoryFreshnessNote(mtimeMs)
 753}
 754
 755async function validateContentTokens(
 756  content: string,
 757  ext: string,
 758  maxTokens?: number,
 759): Promise<void> {
 760  const effectiveMaxTokens =
 761    maxTokens ?? getDefaultFileReadingLimits().maxTokens
 762
 763  const tokenEstimate = roughTokenCountEstimationForFileType(content, ext)
 764  if (!tokenEstimate || tokenEstimate <= effectiveMaxTokens / 4) return
 765
 766  const tokenCount = await countTokensWithAPI(content)
 767  const effectiveCount = tokenCount ?? tokenEstimate
 768
 769  if (effectiveCount > effectiveMaxTokens) {
 770    throw new MaxFileReadTokenExceededError(effectiveCount, effectiveMaxTokens)
 771  }
 772}
 773
 774type ImageResult = {
 775  type: 'image'
 776  file: {
 777    base64: string
 778    type: Base64ImageSource['media_type']
 779    originalSize: number
 780    dimensions?: ImageDimensions
 781  }
 782}
 783
 784function createImageResponse(
 785  buffer: Buffer,
 786  mediaType: string,
 787  originalSize: number,
 788  dimensions?: ImageDimensions,
 789): ImageResult {
 790  return {
 791    type: 'image',
 792    file: {
 793      base64: buffer.toString('base64'),
 794      type: `image/${mediaType}` as Base64ImageSource['media_type'],
 795      originalSize,
 796      dimensions,
 797    },
 798  }
 799}
 800
 801/**
 802 * Inner implementation of call, separated to allow ENOENT handling in the outer call.
 803 */
 804async function callInner(
 805  file_path: string,
 806  fullFilePath: string,
 807  resolvedFilePath: string,
 808  ext: string,
 809  offset: number,
 810  limit: number | undefined,
 811  pages: string | undefined,
 812  maxSizeBytes: number,
 813  maxTokens: number,
 814  readFileState: ToolUseContext['readFileState'],
 815  context: ToolUseContext,
 816  messageId: string | undefined,
 817): Promise<{
 818  data: Output
 819  newMessages?: ReturnType<typeof createUserMessage>[]
 820}> {
 821  // --- Notebook ---
 822  if (ext === 'ipynb') {
 823    const cells = await readNotebook(resolvedFilePath)
 824    const cellsJson = jsonStringify(cells)
 825
 826    const cellsJsonBytes = Buffer.byteLength(cellsJson)
 827    if (cellsJsonBytes > maxSizeBytes) {
 828      throw new Error(
 829        `Notebook content (${formatFileSize(cellsJsonBytes)}) exceeds maximum allowed size (${formatFileSize(maxSizeBytes)}). ` +
 830          `Use ${BASH_TOOL_NAME} with jq to read specific portions:\n` +
 831          `  cat "${file_path}" | jq '.cells[:20]' # First 20 cells\n` +
 832          `  cat "${file_path}" | jq '.cells[100:120]' # Cells 100-120\n` +
 833          `  cat "${file_path}" | jq '.cells | length' # Count total cells\n` +
 834          `  cat "${file_path}" | jq '.cells[] | select(.cell_type=="code") | .source' # All code sources`,
 835      )
 836    }
 837
 838    await validateContentTokens(cellsJson, ext, maxTokens)
 839
 840    // Get mtime via async stat (single call, no prior existence check)
 841    const stats = await getFsImplementation().stat(resolvedFilePath)
 842    readFileState.set(fullFilePath, {
 843      content: cellsJson,
 844      timestamp: Math.floor(stats.mtimeMs),
 845      offset,
 846      limit,
 847    })
 848    context.nestedMemoryAttachmentTriggers?.add(fullFilePath)
 849
 850    const data = {
 851      type: 'notebook' as const,
 852      file: { filePath: file_path, cells },
 853    }
 854
 855    logFileOperation({
 856      operation: 'read',
 857      tool: 'FileReadTool',
 858      filePath: fullFilePath,
 859      content: cellsJson,
 860    })
 861
 862    return { data }
 863  }
 864
 865  // --- Image (single read, no double-read) ---
 866  if (IMAGE_EXTENSIONS.has(ext)) {
 867    // Images have their own size limits (token budget + compression) —
 868    // don't apply the text maxSizeBytes cap.
 869    const data = await readImageWithTokenBudget(resolvedFilePath, maxTokens)
 870    context.nestedMemoryAttachmentTriggers?.add(fullFilePath)
 871
 872    logFileOperation({
 873      operation: 'read',
 874      tool: 'FileReadTool',
 875      filePath: fullFilePath,
 876      content: data.file.base64,
 877    })
 878
 879    const metadataText = data.file.dimensions
 880      ? createImageMetadataText(data.file.dimensions)
 881      : null
 882
 883    return {
 884      data,
 885      ...(metadataText && {
 886        newMessages: [
 887          createUserMessage({ content: metadataText, isMeta: true }),
 888        ],
 889      }),
 890    }
 891  }
 892
 893  // --- PDF ---
 894  if (isPDFExtension(ext)) {
 895    if (pages) {
 896      const parsedRange = parsePDFPageRange(pages)
 897      const extractResult = await extractPDFPages(
 898        resolvedFilePath,
 899        parsedRange ?? undefined,
 900      )
 901      if (!extractResult.success) {
 902        throw new Error(extractResult.error.message)
 903      }
 904      logEvent('tengu_pdf_page_extraction', {
 905        success: true,
 906        pageCount: extractResult.data.file.count,
 907        fileSize: extractResult.data.file.originalSize,
 908        hasPageRange: true,
 909      })
 910      logFileOperation({
 911        operation: 'read',
 912        tool: 'FileReadTool',
 913        filePath: fullFilePath,
 914        content: `PDF pages ${pages}`,
 915      })
 916      const entries = await readdir(extractResult.data.file.outputDir)
 917      const imageFiles = entries.filter(f => f.endsWith('.jpg')).sort()
 918      const imageBlocks = await Promise.all(
 919        imageFiles.map(async f => {
 920          const imgPath = path.join(extractResult.data.file.outputDir, f)
 921          const imgBuffer = await readFileAsync(imgPath)
 922          const resized = await maybeResizeAndDownsampleImageBuffer(
 923            imgBuffer,
 924            imgBuffer.length,
 925            'jpeg',
 926          )
 927          return {
 928            type: 'image' as const,
 929            source: {
 930              type: 'base64' as const,
 931              media_type:
 932                `image/${resized.mediaType}` as Base64ImageSource['media_type'],
 933              data: resized.buffer.toString('base64'),
 934            },
 935          }
 936        }),
 937      )
 938      return {
 939        data: extractResult.data,
 940        ...(imageBlocks.length > 0 && {
 941          newMessages: [
 942            createUserMessage({ content: imageBlocks, isMeta: true }),
 943          ],
 944        }),
 945      }
 946    }
 947
 948    const pageCount = await getPDFPageCount(resolvedFilePath)
 949    if (pageCount !== null && pageCount > PDF_AT_MENTION_INLINE_THRESHOLD) {
 950      throw new Error(
 951        `This PDF has ${pageCount} pages, which is too many to read at once. ` +
 952          `Use the pages parameter to read specific page ranges (e.g., pages: "1-5"). ` +
 953          `Maximum ${PDF_MAX_PAGES_PER_READ} pages per request.`,
 954      )
 955    }
 956
 957    const fs = getFsImplementation()
 958    const stats = await fs.stat(resolvedFilePath)
 959    const shouldExtractPages =
 960      !isPDFSupported() || stats.size > PDF_EXTRACT_SIZE_THRESHOLD
 961
 962    if (shouldExtractPages) {
 963      const extractResult = await extractPDFPages(resolvedFilePath)
 964      if (extractResult.success) {
 965        logEvent('tengu_pdf_page_extraction', {
 966          success: true,
 967          pageCount: extractResult.data.file.count,
 968          fileSize: extractResult.data.file.originalSize,
 969        })
 970      } else {
 971        logEvent('tengu_pdf_page_extraction', {
 972          success: false,
 973          available: extractResult.error.reason !== 'unavailable',
 974          fileSize: stats.size,
 975        })
 976      }
 977    }
 978
 979    if (!isPDFSupported()) {
 980      throw new Error(
 981        'Reading full PDFs is not supported with this model. Use a newer model (Sonnet 3.5 v2 or later), ' +
 982          `or use the pages parameter to read specific page ranges (e.g., pages: "1-5", maximum ${PDF_MAX_PAGES_PER_READ} pages per request). ` +
 983          'Page extraction requires poppler-utils: install with `brew install poppler` on macOS or `apt-get install poppler-utils` on Debian/Ubuntu.',
 984      )
 985    }
 986
 987    const readResult = await readPDF(resolvedFilePath)
 988    if (!readResult.success) {
 989      throw new Error(readResult.error.message)
 990    }
 991    const pdfData = readResult.data
 992    logFileOperation({
 993      operation: 'read',
 994      tool: 'FileReadTool',
 995      filePath: fullFilePath,
 996      content: pdfData.file.base64,
 997    })
 998
 999    return {
1000      data: pdfData,
1001      newMessages: [
1002        createUserMessage({
1003          content: [
1004            {
1005              type: 'document',
1006              source: {
1007                type: 'base64',
1008                media_type: 'application/pdf',
1009                data: pdfData.file.base64,
1010              },
1011            },
1012          ],
1013          isMeta: true,
1014        }),
1015      ],
1016    }
1017  }
1018
1019  // --- Text file (single async read via readFileInRange) ---
1020  const lineOffset = offset === 0 ? 0 : offset - 1
1021  const { content, lineCount, totalLines, totalBytes, readBytes, mtimeMs } =
1022    await readFileInRange(
1023      resolvedFilePath,
1024      lineOffset,
1025      limit,
1026      limit === undefined ? maxSizeBytes : undefined,
1027      context.abortController.signal,
1028    )
1029
1030  await validateContentTokens(content, ext, maxTokens)
1031
1032  readFileState.set(fullFilePath, {
1033    content,
1034    timestamp: Math.floor(mtimeMs),
1035    offset,
1036    limit,
1037  })
1038  context.nestedMemoryAttachmentTriggers?.add(fullFilePath)
1039
1040  // Snapshot before iterating — a listener that unsubscribes mid-callback
1041  // would splice the live array and skip the next listener.
1042  for (const listener of fileReadListeners.slice()) {
1043    listener(resolvedFilePath, content)
1044  }
1045
1046  const data = {
1047    type: 'text' as const,
1048    file: {
1049      filePath: file_path,
1050      content,
1051      numLines: lineCount,
1052      startLine: offset,
1053      totalLines,
1054    },
1055  }
1056  if (isAutoMemFile(fullFilePath)) {
1057    memoryFileMtimes.set(data, mtimeMs)
1058  }
1059
1060  logFileOperation({
1061    operation: 'read',
1062    tool: 'FileReadTool',
1063    filePath: fullFilePath,
1064    content,
1065  })
1066
1067  const sessionFileType = detectSessionFileType(fullFilePath)
1068  const analyticsExt = getFileExtensionForAnalytics(fullFilePath)
1069  logEvent('tengu_session_file_read', {
1070    totalLines,
1071    readLines: lineCount,
1072    totalBytes,
1073    readBytes,
1074    offset,
1075    ...(limit !== undefined && { limit }),
1076    ...(analyticsExt !== undefined && { ext: analyticsExt }),
1077    ...(messageId !== undefined && {
1078      messageID:
1079        messageId as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
1080    }),
1081    is_session_memory: sessionFileType === 'session_memory',
1082    is_session_transcript: sessionFileType === 'session_transcript',
1083  })
1084
1085  return { data }
1086}
1087
1088/**
1089 * Reads an image file and applies token-based compression if needed.
1090 * Reads the file ONCE, then applies standard resize. If the result exceeds
1091 * the token limit, applies aggressive compression from the same buffer.
1092 *
1093 * @param filePath - Path to the image file
1094 * @param maxTokens - Maximum token budget for the image
1095 * @returns Image data with appropriate compression applied
1096 */
1097export async function readImageWithTokenBudget(
1098  filePath: string,
1099  maxTokens: number = getDefaultFileReadingLimits().maxTokens,
1100  maxBytes?: number,
1101): Promise<ImageResult> {
1102  // Read file ONCE — capped to maxBytes to avoid OOM on huge files
1103  const imageBuffer = await getFsImplementation().readFileBytes(
1104    filePath,
1105    maxBytes,
1106  )
1107  const originalSize = imageBuffer.length
1108
1109  if (originalSize === 0) {
1110    throw new Error(`Image file is empty: ${filePath}`)
1111  }
1112
1113  const detectedMediaType = detectImageFormatFromBuffer(imageBuffer)
1114  const detectedFormat = detectedMediaType.split('/')[1] || 'png'
1115
1116  // Try standard resize
1117  let result: ImageResult
1118  try {
1119    const resized = await maybeResizeAndDownsampleImageBuffer(
1120      imageBuffer,
1121      originalSize,
1122      detectedFormat,
1123    )
1124    result = createImageResponse(
1125      resized.buffer,
1126      resized.mediaType,
1127      originalSize,
1128      resized.dimensions,
1129    )
1130  } catch (e) {
1131    if (e instanceof ImageResizeError) throw e
1132    logError(e)
1133    result = createImageResponse(imageBuffer, detectedFormat, originalSize)
1134  }
1135
1136  // Check if it fits in token budget
1137  const estimatedTokens = Math.ceil(result.file.base64.length * 0.125)
1138  if (estimatedTokens > maxTokens) {
1139    // Aggressive compression from the SAME buffer (no re-read)
1140    try {
1141      const compressed = await compressImageBufferWithTokenLimit(
1142        imageBuffer,
1143        maxTokens,
1144        detectedMediaType,
1145      )
1146      return {
1147        type: 'image',
1148        file: {
1149          base64: compressed.base64,
1150          type: compressed.mediaType,
1151          originalSize,
1152        },
1153      }
1154    } catch (e) {
1155      logError(e)
1156      // Fallback: heavily compressed version from the SAME buffer
1157      try {
1158        const sharpModule = await import('sharp')
1159        const sharp =
1160          (
1161            sharpModule as {
1162              default?: typeof sharpModule
1163            } & typeof sharpModule
1164          ).default || sharpModule
1165
1166        const fallbackBuffer = await sharp(imageBuffer)
1167          .resize(400, 400, {
1168            fit: 'inside',
1169            withoutEnlargement: true,
1170          })
1171          .jpeg({ quality: 20 })
1172          .toBuffer()
1173
1174        return createImageResponse(fallbackBuffer, 'jpeg', originalSize)
1175      } catch (error) {
1176        logError(error)
1177        return createImageResponse(imageBuffer, detectedFormat, originalSize)
1178      }
1179    }
1180  }
1181
1182  return result
1183}