source dump of claude code
at main 1183 lines 39 kB view raw
1import type { Base64ImageSource } from '@anthropic-ai/sdk/resources/index.mjs' 2import { readdir, readFile as readFileAsync } from 'fs/promises' 3import * as path from 'path' 4import { posix, win32 } from 'path' 5import { z } from 'zod/v4' 6import { 7 PDF_AT_MENTION_INLINE_THRESHOLD, 8 PDF_EXTRACT_SIZE_THRESHOLD, 9 PDF_MAX_PAGES_PER_READ, 10} from '../../constants/apiLimits.js' 11import { hasBinaryExtension } from '../../constants/files.js' 12import { memoryFreshnessNote } from '../../memdir/memoryAge.js' 13import { getFeatureValue_CACHED_MAY_BE_STALE } from '../../services/analytics/growthbook.js' 14import { logEvent } from '../../services/analytics/index.js' 15import { 16 type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, 17 getFileExtensionForAnalytics, 18} from '../../services/analytics/metadata.js' 19import { 20 countTokensWithAPI, 21 roughTokenCountEstimationForFileType, 22} from '../../services/tokenEstimation.js' 23import { 24 activateConditionalSkillsForPaths, 25 addSkillDirectories, 26 discoverSkillDirsForPaths, 27} from '../../skills/loadSkillsDir.js' 28import type { ToolUseContext } from '../../Tool.js' 29import { buildTool, type ToolDef } from '../../Tool.js' 30import { getCwd } from '../../utils/cwd.js' 31import { getClaudeConfigHomeDir, isEnvTruthy } from '../../utils/envUtils.js' 32import { getErrnoCode, isENOENT } from '../../utils/errors.js' 33import { 34 addLineNumbers, 35 FILE_NOT_FOUND_CWD_NOTE, 36 findSimilarFile, 37 getFileModificationTimeAsync, 38 suggestPathUnderCwd, 39} from '../../utils/file.js' 40import { logFileOperation } from '../../utils/fileOperationAnalytics.js' 41import { formatFileSize } from '../../utils/format.js' 42import { getFsImplementation } from '../../utils/fsOperations.js' 43import { 44 compressImageBufferWithTokenLimit, 45 createImageMetadataText, 46 detectImageFormatFromBuffer, 47 type ImageDimensions, 48 ImageResizeError, 49 maybeResizeAndDownsampleImageBuffer, 50} from '../../utils/imageResizer.js' 51import { lazySchema } from '../../utils/lazySchema.js' 52import { logError } from '../../utils/log.js' 53import { isAutoMemFile } from '../../utils/memoryFileDetection.js' 54import { createUserMessage } from '../../utils/messages.js' 55import { getCanonicalName, getMainLoopModel } from '../../utils/model/model.js' 56import { 57 mapNotebookCellsToToolResult, 58 readNotebook, 59} from '../../utils/notebook.js' 60import { expandPath } from '../../utils/path.js' 61import { extractPDFPages, getPDFPageCount, readPDF } from '../../utils/pdf.js' 62import { 63 isPDFExtension, 64 isPDFSupported, 65 parsePDFPageRange, 66} from '../../utils/pdfUtils.js' 67import { 68 checkReadPermissionForTool, 69 matchingRuleForInput, 70} from '../../utils/permissions/filesystem.js' 71import type { PermissionDecision } from '../../utils/permissions/PermissionResult.js' 72import { matchWildcardPattern } from '../../utils/permissions/shellRuleMatching.js' 73import { readFileInRange } from '../../utils/readFileInRange.js' 74import { semanticNumber } from '../../utils/semanticNumber.js' 75import { jsonStringify } from '../../utils/slowOperations.js' 76import { BASH_TOOL_NAME } from '../BashTool/toolName.js' 77import { getDefaultFileReadingLimits } from './limits.js' 78import { 79 DESCRIPTION, 80 FILE_READ_TOOL_NAME, 81 FILE_UNCHANGED_STUB, 82 LINE_FORMAT_INSTRUCTION, 83 OFFSET_INSTRUCTION_DEFAULT, 84 OFFSET_INSTRUCTION_TARGETED, 85 renderPromptTemplate, 86} from './prompt.js' 87import { 88 getToolUseSummary, 89 renderToolResultMessage, 90 renderToolUseErrorMessage, 91 renderToolUseMessage, 92 renderToolUseTag, 93 userFacingName, 94} from './UI.js' 95 96// Device files that would hang the process: infinite output or blocking input. 97// Checked by path only (no I/O). Safe devices like /dev/null are intentionally omitted. 98const BLOCKED_DEVICE_PATHS = new Set([ 99 // Infinite output — never reach EOF 100 '/dev/zero', 101 '/dev/random', 102 '/dev/urandom', 103 '/dev/full', 104 // Blocks waiting for input 105 '/dev/stdin', 106 '/dev/tty', 107 '/dev/console', 108 // Nonsensical to read 109 '/dev/stdout', 110 '/dev/stderr', 111 // fd aliases for stdin/stdout/stderr 112 '/dev/fd/0', 113 '/dev/fd/1', 114 '/dev/fd/2', 115]) 116 117function isBlockedDevicePath(filePath: string): boolean { 118 if (BLOCKED_DEVICE_PATHS.has(filePath)) return true 119 // /proc/self/fd/0-2 and /proc/<pid>/fd/0-2 are Linux aliases for stdio 120 if ( 121 filePath.startsWith('/proc/') && 122 (filePath.endsWith('/fd/0') || 123 filePath.endsWith('/fd/1') || 124 filePath.endsWith('/fd/2')) 125 ) 126 return true 127 return false 128} 129 130// Narrow no-break space (U+202F) used by some macOS versions in screenshot filenames 131const THIN_SPACE = String.fromCharCode(8239) 132 133/** 134 * Resolves macOS screenshot paths that may have different space characters. 135 * macOS uses either regular space or thin space (U+202F) before AM/PM in screenshot 136 * filenames depending on the macOS version. This function tries the alternate space 137 * character if the file doesn't exist with the given path. 138 * 139 * @param filePath - The normalized file path to resolve 140 * @returns The path to the actual file on disk (may differ in space character) 141 */ 142/** 143 * For macOS screenshot paths with AM/PM, the space before AM/PM may be a 144 * regular space or a thin space depending on the macOS version. Returns 145 * the alternate path to try if the original doesn't exist, or undefined. 146 */ 147function getAlternateScreenshotPath(filePath: string): string | undefined { 148 const filename = path.basename(filePath) 149 const amPmPattern = /^(.+)([ \u202F])(AM|PM)(\.png)$/ 150 const match = filename.match(amPmPattern) 151 if (!match) return undefined 152 153 const currentSpace = match[2] 154 const alternateSpace = currentSpace === ' ' ? THIN_SPACE : ' ' 155 return filePath.replace( 156 `${currentSpace}${match[3]}${match[4]}`, 157 `${alternateSpace}${match[3]}${match[4]}`, 158 ) 159} 160 161// File read listeners - allows other services to be notified when files are read 162type FileReadListener = (filePath: string, content: string) => void 163const fileReadListeners: FileReadListener[] = [] 164 165export function registerFileReadListener( 166 listener: FileReadListener, 167): () => void { 168 fileReadListeners.push(listener) 169 return () => { 170 const i = fileReadListeners.indexOf(listener) 171 if (i >= 0) fileReadListeners.splice(i, 1) 172 } 173} 174 175export class MaxFileReadTokenExceededError extends Error { 176 constructor( 177 public tokenCount: number, 178 public maxTokens: number, 179 ) { 180 super( 181 `File content (${tokenCount} tokens) exceeds maximum allowed tokens (${maxTokens}). Use offset and limit parameters to read specific portions of the file, or search for specific content instead of reading the whole file.`, 182 ) 183 this.name = 'MaxFileReadTokenExceededError' 184 } 185} 186 187// Common image extensions 188const IMAGE_EXTENSIONS = new Set(['png', 'jpg', 'jpeg', 'gif', 'webp']) 189 190/** 191 * Detects if a file path is a session-related file for analytics logging. 192 * Only matches files within the Claude config directory (e.g., ~/.claude). 193 * Returns the type of session file or null if not a session file. 194 */ 195function detectSessionFileType( 196 filePath: string, 197): 'session_memory' | 'session_transcript' | null { 198 const configDir = getClaudeConfigHomeDir() 199 200 // Only match files within the Claude config directory 201 if (!filePath.startsWith(configDir)) { 202 return null 203 } 204 205 // Normalize path to use forward slashes for consistent matching across platforms 206 const normalizedPath = filePath.split(win32.sep).join(posix.sep) 207 208 // Session memory files: ~/.claude/session-memory/*.md (including summary.md) 209 if ( 210 normalizedPath.includes('/session-memory/') && 211 normalizedPath.endsWith('.md') 212 ) { 213 return 'session_memory' 214 } 215 216 // Session JSONL transcript files: ~/.claude/projects/*/*.jsonl 217 if ( 218 normalizedPath.includes('/projects/') && 219 normalizedPath.endsWith('.jsonl') 220 ) { 221 return 'session_transcript' 222 } 223 224 return null 225} 226 227const inputSchema = lazySchema(() => 228 z.strictObject({ 229 file_path: z.string().describe('The absolute path to the file to read'), 230 offset: semanticNumber(z.number().int().nonnegative().optional()).describe( 231 'The line number to start reading from. Only provide if the file is too large to read at once', 232 ), 233 limit: semanticNumber(z.number().int().positive().optional()).describe( 234 'The number of lines to read. Only provide if the file is too large to read at once.', 235 ), 236 pages: z 237 .string() 238 .optional() 239 .describe( 240 `Page range for PDF files (e.g., "1-5", "3", "10-20"). Only applicable to PDF files. Maximum ${PDF_MAX_PAGES_PER_READ} pages per request.`, 241 ), 242 }), 243) 244type InputSchema = ReturnType<typeof inputSchema> 245 246export type Input = z.infer<InputSchema> 247 248const outputSchema = lazySchema(() => { 249 // Define the media types supported for images 250 const imageMediaTypes = z.enum([ 251 'image/jpeg', 252 'image/png', 253 'image/gif', 254 'image/webp', 255 ]) 256 257 return z.discriminatedUnion('type', [ 258 z.object({ 259 type: z.literal('text'), 260 file: z.object({ 261 filePath: z.string().describe('The path to the file that was read'), 262 content: z.string().describe('The content of the file'), 263 numLines: z 264 .number() 265 .describe('Number of lines in the returned content'), 266 startLine: z.number().describe('The starting line number'), 267 totalLines: z.number().describe('Total number of lines in the file'), 268 }), 269 }), 270 z.object({ 271 type: z.literal('image'), 272 file: z.object({ 273 base64: z.string().describe('Base64-encoded image data'), 274 type: imageMediaTypes.describe('The MIME type of the image'), 275 originalSize: z.number().describe('Original file size in bytes'), 276 dimensions: z 277 .object({ 278 originalWidth: z 279 .number() 280 .optional() 281 .describe('Original image width in pixels'), 282 originalHeight: z 283 .number() 284 .optional() 285 .describe('Original image height in pixels'), 286 displayWidth: z 287 .number() 288 .optional() 289 .describe('Displayed image width in pixels (after resizing)'), 290 displayHeight: z 291 .number() 292 .optional() 293 .describe('Displayed image height in pixels (after resizing)'), 294 }) 295 .optional() 296 .describe('Image dimension info for coordinate mapping'), 297 }), 298 }), 299 z.object({ 300 type: z.literal('notebook'), 301 file: z.object({ 302 filePath: z.string().describe('The path to the notebook file'), 303 cells: z.array(z.any()).describe('Array of notebook cells'), 304 }), 305 }), 306 z.object({ 307 type: z.literal('pdf'), 308 file: z.object({ 309 filePath: z.string().describe('The path to the PDF file'), 310 base64: z.string().describe('Base64-encoded PDF data'), 311 originalSize: z.number().describe('Original file size in bytes'), 312 }), 313 }), 314 z.object({ 315 type: z.literal('parts'), 316 file: z.object({ 317 filePath: z.string().describe('The path to the PDF file'), 318 originalSize: z.number().describe('Original file size in bytes'), 319 count: z.number().describe('Number of pages extracted'), 320 outputDir: z 321 .string() 322 .describe('Directory containing extracted page images'), 323 }), 324 }), 325 z.object({ 326 type: z.literal('file_unchanged'), 327 file: z.object({ 328 filePath: z.string().describe('The path to the file'), 329 }), 330 }), 331 ]) 332}) 333type OutputSchema = ReturnType<typeof outputSchema> 334 335export type Output = z.infer<OutputSchema> 336 337export const FileReadTool = buildTool({ 338 name: FILE_READ_TOOL_NAME, 339 searchHint: 'read files, images, PDFs, notebooks', 340 // Output is bounded by maxTokens (validateContentTokens). Persisting to a 341 // file the model reads back with Read is circular — never persist. 342 maxResultSizeChars: Infinity, 343 strict: true, 344 async description() { 345 return DESCRIPTION 346 }, 347 async prompt() { 348 const limits = getDefaultFileReadingLimits() 349 const maxSizeInstruction = limits.includeMaxSizeInPrompt 350 ? `. Files larger than ${formatFileSize(limits.maxSizeBytes)} will return an error; use offset and limit for larger files` 351 : '' 352 const offsetInstruction = limits.targetedRangeNudge 353 ? OFFSET_INSTRUCTION_TARGETED 354 : OFFSET_INSTRUCTION_DEFAULT 355 return renderPromptTemplate( 356 pickLineFormatInstruction(), 357 maxSizeInstruction, 358 offsetInstruction, 359 ) 360 }, 361 get inputSchema(): InputSchema { 362 return inputSchema() 363 }, 364 get outputSchema(): OutputSchema { 365 return outputSchema() 366 }, 367 userFacingName, 368 getToolUseSummary, 369 getActivityDescription(input) { 370 const summary = getToolUseSummary(input) 371 return summary ? `Reading ${summary}` : 'Reading file' 372 }, 373 isConcurrencySafe() { 374 return true 375 }, 376 isReadOnly() { 377 return true 378 }, 379 toAutoClassifierInput(input) { 380 return input.file_path 381 }, 382 isSearchOrReadCommand() { 383 return { isSearch: false, isRead: true } 384 }, 385 getPath({ file_path }): string { 386 return file_path || getCwd() 387 }, 388 backfillObservableInput(input) { 389 // hooks.mdx documents file_path as absolute; expand so hook allowlists 390 // can't be bypassed via ~ or relative paths. 391 if (typeof input.file_path === 'string') { 392 input.file_path = expandPath(input.file_path) 393 } 394 }, 395 async preparePermissionMatcher({ file_path }) { 396 return pattern => matchWildcardPattern(pattern, file_path) 397 }, 398 async checkPermissions(input, context): Promise<PermissionDecision> { 399 const appState = context.getAppState() 400 return checkReadPermissionForTool( 401 FileReadTool, 402 input, 403 appState.toolPermissionContext, 404 ) 405 }, 406 renderToolUseMessage, 407 renderToolUseTag, 408 renderToolResultMessage, 409 // UI.tsx:140 — ALL types render summary chrome only: "Read N lines", 410 // "Read image (42KB)". Never the content itself. The model-facing 411 // serialization (below) sends content + CYBER_RISK_MITIGATION_REMINDER 412 // + line prefixes; UI shows none of it. Nothing to index. Caught by 413 // the render-fidelity test when this initially claimed file.content. 414 extractSearchText() { 415 return '' 416 }, 417 renderToolUseErrorMessage, 418 async validateInput({ file_path, pages }, toolUseContext: ToolUseContext) { 419 // Validate pages parameter (pure string parsing, no I/O) 420 if (pages !== undefined) { 421 const parsed = parsePDFPageRange(pages) 422 if (!parsed) { 423 return { 424 result: false, 425 message: `Invalid pages parameter: "${pages}". Use formats like "1-5", "3", or "10-20". Pages are 1-indexed.`, 426 errorCode: 7, 427 } 428 } 429 const rangeSize = 430 parsed.lastPage === Infinity 431 ? PDF_MAX_PAGES_PER_READ + 1 432 : parsed.lastPage - parsed.firstPage + 1 433 if (rangeSize > PDF_MAX_PAGES_PER_READ) { 434 return { 435 result: false, 436 message: `Page range "${pages}" exceeds maximum of ${PDF_MAX_PAGES_PER_READ} pages per request. Please use a smaller range.`, 437 errorCode: 8, 438 } 439 } 440 } 441 442 // Path expansion + deny rule check (no I/O) 443 const fullFilePath = expandPath(file_path) 444 445 const appState = toolUseContext.getAppState() 446 const denyRule = matchingRuleForInput( 447 fullFilePath, 448 appState.toolPermissionContext, 449 'read', 450 'deny', 451 ) 452 if (denyRule !== null) { 453 return { 454 result: false, 455 message: 456 'File is in a directory that is denied by your permission settings.', 457 errorCode: 1, 458 } 459 } 460 461 // SECURITY: UNC path check (no I/O) — defer filesystem operations 462 // until after user grants permission to prevent NTLM credential leaks 463 const isUncPath = 464 fullFilePath.startsWith('\\\\') || fullFilePath.startsWith('//') 465 if (isUncPath) { 466 return { result: true } 467 } 468 469 // Binary extension check (string check on extension only, no I/O). 470 // PDF, images, and SVG are excluded - this tool renders them natively. 471 const ext = path.extname(fullFilePath).toLowerCase() 472 if ( 473 hasBinaryExtension(fullFilePath) && 474 !isPDFExtension(ext) && 475 !IMAGE_EXTENSIONS.has(ext.slice(1)) 476 ) { 477 return { 478 result: false, 479 message: `This tool cannot read binary files. The file appears to be a binary ${ext} file. Please use appropriate tools for binary file analysis.`, 480 errorCode: 4, 481 } 482 } 483 484 // Block specific device files that would hang (infinite output or blocking input). 485 // This is a path-based check with no I/O — safe special files like /dev/null are allowed. 486 if (isBlockedDevicePath(fullFilePath)) { 487 return { 488 result: false, 489 message: `Cannot read '${file_path}': this device file would block or produce infinite output.`, 490 errorCode: 9, 491 } 492 } 493 494 return { result: true } 495 }, 496 async call( 497 { file_path, offset = 1, limit = undefined, pages }, 498 context, 499 _canUseTool?, 500 parentMessage?, 501 ) { 502 const { readFileState, fileReadingLimits } = context 503 504 const defaults = getDefaultFileReadingLimits() 505 const maxSizeBytes = 506 fileReadingLimits?.maxSizeBytes ?? defaults.maxSizeBytes 507 const maxTokens = fileReadingLimits?.maxTokens ?? defaults.maxTokens 508 509 // Telemetry: track when callers override default read limits. 510 // Only fires on override (low volume) — event count = override frequency. 511 if (fileReadingLimits !== undefined) { 512 logEvent('tengu_file_read_limits_override', { 513 hasMaxTokens: fileReadingLimits.maxTokens !== undefined, 514 hasMaxSizeBytes: fileReadingLimits.maxSizeBytes !== undefined, 515 }) 516 } 517 518 const ext = path.extname(file_path).toLowerCase().slice(1) 519 // Use expandPath for consistent path normalization with FileEditTool/FileWriteTool 520 // (especially handles whitespace trimming and Windows path separators) 521 const fullFilePath = expandPath(file_path) 522 523 // Dedup: if we've already read this exact range and the file hasn't 524 // changed on disk, return a stub instead of re-sending the full content. 525 // The earlier Read tool_result is still in context — two full copies 526 // waste cache_creation tokens on every subsequent turn. BQ proxy shows 527 // ~18% of Read calls are same-file collisions (up to 2.64% of fleet 528 // cache_creation). Only applies to text/notebook reads — images/PDFs 529 // aren't cached in readFileState so won't match here. 530 // 531 // Ant soak: 1,734 dedup hits in 2h, no Read error regression. 532 // Killswitch pattern: GB can disable if the stub message confuses 533 // the model externally. 534 // 3P default: killswitch off = dedup enabled. Client-side only — no 535 // server support needed, safe for Bedrock/Vertex/Foundry. 536 const dedupKillswitch = getFeatureValue_CACHED_MAY_BE_STALE( 537 'tengu_read_dedup_killswitch', 538 false, 539 ) 540 const existingState = dedupKillswitch 541 ? undefined 542 : readFileState.get(fullFilePath) 543 // Only dedup entries that came from a prior Read (offset is always set 544 // by Read). Edit/Write store offset=undefined — their readFileState 545 // entry reflects post-edit mtime, so deduping against it would wrongly 546 // point the model at the pre-edit Read content. 547 if ( 548 existingState && 549 !existingState.isPartialView && 550 existingState.offset !== undefined 551 ) { 552 const rangeMatch = 553 existingState.offset === offset && existingState.limit === limit 554 if (rangeMatch) { 555 try { 556 const mtimeMs = await getFileModificationTimeAsync(fullFilePath) 557 if (mtimeMs === existingState.timestamp) { 558 const analyticsExt = getFileExtensionForAnalytics(fullFilePath) 559 logEvent('tengu_file_read_dedup', { 560 ...(analyticsExt !== undefined && { ext: analyticsExt }), 561 }) 562 return { 563 data: { 564 type: 'file_unchanged' as const, 565 file: { filePath: file_path }, 566 }, 567 } 568 } 569 } catch { 570 // stat failed — fall through to full read 571 } 572 } 573 } 574 575 // Discover skills from this file's path (fire-and-forget, non-blocking) 576 // Skip in simple mode - no skills available 577 const cwd = getCwd() 578 if (!isEnvTruthy(process.env.CLAUDE_CODE_SIMPLE)) { 579 const newSkillDirs = await discoverSkillDirsForPaths([fullFilePath], cwd) 580 if (newSkillDirs.length > 0) { 581 // Store discovered dirs for attachment display 582 for (const dir of newSkillDirs) { 583 context.dynamicSkillDirTriggers?.add(dir) 584 } 585 // Don't await - let skill loading happen in the background 586 addSkillDirectories(newSkillDirs).catch(() => {}) 587 } 588 589 // Activate conditional skills whose path patterns match this file 590 activateConditionalSkillsForPaths([fullFilePath], cwd) 591 } 592 593 try { 594 return await callInner( 595 file_path, 596 fullFilePath, 597 fullFilePath, 598 ext, 599 offset, 600 limit, 601 pages, 602 maxSizeBytes, 603 maxTokens, 604 readFileState, 605 context, 606 parentMessage?.message.id, 607 ) 608 } catch (error) { 609 // Handle file-not-found: suggest similar files 610 const code = getErrnoCode(error) 611 if (code === 'ENOENT') { 612 // macOS screenshots may use a thin space or regular space before 613 // AM/PM — try the alternate before giving up. 614 const altPath = getAlternateScreenshotPath(fullFilePath) 615 if (altPath) { 616 try { 617 return await callInner( 618 file_path, 619 fullFilePath, 620 altPath, 621 ext, 622 offset, 623 limit, 624 pages, 625 maxSizeBytes, 626 maxTokens, 627 readFileState, 628 context, 629 parentMessage?.message.id, 630 ) 631 } catch (altError) { 632 if (!isENOENT(altError)) { 633 throw altError 634 } 635 // Alt path also missing — fall through to friendly error 636 } 637 } 638 639 const similarFilename = findSimilarFile(fullFilePath) 640 const cwdSuggestion = await suggestPathUnderCwd(fullFilePath) 641 let message = `File does not exist. ${FILE_NOT_FOUND_CWD_NOTE} ${getCwd()}.` 642 if (cwdSuggestion) { 643 message += ` Did you mean ${cwdSuggestion}?` 644 } else if (similarFilename) { 645 message += ` Did you mean ${similarFilename}?` 646 } 647 throw new Error(message) 648 } 649 throw error 650 } 651 }, 652 mapToolResultToToolResultBlockParam(data, toolUseID) { 653 switch (data.type) { 654 case 'image': { 655 return { 656 tool_use_id: toolUseID, 657 type: 'tool_result', 658 content: [ 659 { 660 type: 'image', 661 source: { 662 type: 'base64', 663 data: data.file.base64, 664 media_type: data.file.type, 665 }, 666 }, 667 ], 668 } 669 } 670 case 'notebook': 671 return mapNotebookCellsToToolResult(data.file.cells, toolUseID) 672 case 'pdf': 673 // Return PDF metadata only - the actual content is sent as a supplemental DocumentBlockParam 674 return { 675 tool_use_id: toolUseID, 676 type: 'tool_result', 677 content: `PDF file read: ${data.file.filePath} (${formatFileSize(data.file.originalSize)})`, 678 } 679 case 'parts': 680 // Extracted page images are read and sent as image blocks in mapToolResultToAPIMessage 681 return { 682 tool_use_id: toolUseID, 683 type: 'tool_result', 684 content: `PDF pages extracted: ${data.file.count} page(s) from ${data.file.filePath} (${formatFileSize(data.file.originalSize)})`, 685 } 686 case 'file_unchanged': 687 return { 688 tool_use_id: toolUseID, 689 type: 'tool_result', 690 content: FILE_UNCHANGED_STUB, 691 } 692 case 'text': { 693 let content: string 694 695 if (data.file.content) { 696 content = 697 memoryFileFreshnessPrefix(data) + 698 formatFileLines(data.file) + 699 (shouldIncludeFileReadMitigation() 700 ? CYBER_RISK_MITIGATION_REMINDER 701 : '') 702 } else { 703 // Determine the appropriate warning message 704 content = 705 data.file.totalLines === 0 706 ? '<system-reminder>Warning: the file exists but the contents are empty.</system-reminder>' 707 : `<system-reminder>Warning: the file exists but is shorter than the provided offset (${data.file.startLine}). The file has ${data.file.totalLines} lines.</system-reminder>` 708 } 709 710 return { 711 tool_use_id: toolUseID, 712 type: 'tool_result', 713 content, 714 } 715 } 716 } 717 }, 718} satisfies ToolDef<InputSchema, Output>) 719 720function pickLineFormatInstruction(): string { 721 return LINE_FORMAT_INSTRUCTION 722} 723 724/** Format file content with line numbers. */ 725function formatFileLines(file: { content: string; startLine: number }): string { 726 return addLineNumbers(file) 727} 728 729export const CYBER_RISK_MITIGATION_REMINDER = 730 '\n\n<system-reminder>\nWhenever you read a file, you should consider whether it would be considered malware. You CAN and SHOULD provide analysis of malware, what it is doing. But you MUST refuse to improve or augment the code. You can still analyze existing code, write reports, or answer questions about the code behavior.\n</system-reminder>\n' 731 732// Models where cyber risk mitigation should be skipped 733const MITIGATION_EXEMPT_MODELS = new Set(['claude-opus-4-6']) 734 735function shouldIncludeFileReadMitigation(): boolean { 736 const shortName = getCanonicalName(getMainLoopModel()) 737 return !MITIGATION_EXEMPT_MODELS.has(shortName) 738} 739 740/** 741 * Side-channel from call() to mapToolResultToToolResultBlockParam: mtime 742 * of auto-memory files, keyed by the `data` object identity. Avoids 743 * adding a presentation-only field to the output schema (which flows 744 * into SDK types) and avoids sync fs in the mapper. WeakMap auto-GCs 745 * when the data object becomes unreachable after rendering. 746 */ 747const memoryFileMtimes = new WeakMap<object, number>() 748 749function memoryFileFreshnessPrefix(data: object): string { 750 const mtimeMs = memoryFileMtimes.get(data) 751 if (mtimeMs === undefined) return '' 752 return memoryFreshnessNote(mtimeMs) 753} 754 755async function validateContentTokens( 756 content: string, 757 ext: string, 758 maxTokens?: number, 759): Promise<void> { 760 const effectiveMaxTokens = 761 maxTokens ?? getDefaultFileReadingLimits().maxTokens 762 763 const tokenEstimate = roughTokenCountEstimationForFileType(content, ext) 764 if (!tokenEstimate || tokenEstimate <= effectiveMaxTokens / 4) return 765 766 const tokenCount = await countTokensWithAPI(content) 767 const effectiveCount = tokenCount ?? tokenEstimate 768 769 if (effectiveCount > effectiveMaxTokens) { 770 throw new MaxFileReadTokenExceededError(effectiveCount, effectiveMaxTokens) 771 } 772} 773 774type ImageResult = { 775 type: 'image' 776 file: { 777 base64: string 778 type: Base64ImageSource['media_type'] 779 originalSize: number 780 dimensions?: ImageDimensions 781 } 782} 783 784function createImageResponse( 785 buffer: Buffer, 786 mediaType: string, 787 originalSize: number, 788 dimensions?: ImageDimensions, 789): ImageResult { 790 return { 791 type: 'image', 792 file: { 793 base64: buffer.toString('base64'), 794 type: `image/${mediaType}` as Base64ImageSource['media_type'], 795 originalSize, 796 dimensions, 797 }, 798 } 799} 800 801/** 802 * Inner implementation of call, separated to allow ENOENT handling in the outer call. 803 */ 804async function callInner( 805 file_path: string, 806 fullFilePath: string, 807 resolvedFilePath: string, 808 ext: string, 809 offset: number, 810 limit: number | undefined, 811 pages: string | undefined, 812 maxSizeBytes: number, 813 maxTokens: number, 814 readFileState: ToolUseContext['readFileState'], 815 context: ToolUseContext, 816 messageId: string | undefined, 817): Promise<{ 818 data: Output 819 newMessages?: ReturnType<typeof createUserMessage>[] 820}> { 821 // --- Notebook --- 822 if (ext === 'ipynb') { 823 const cells = await readNotebook(resolvedFilePath) 824 const cellsJson = jsonStringify(cells) 825 826 const cellsJsonBytes = Buffer.byteLength(cellsJson) 827 if (cellsJsonBytes > maxSizeBytes) { 828 throw new Error( 829 `Notebook content (${formatFileSize(cellsJsonBytes)}) exceeds maximum allowed size (${formatFileSize(maxSizeBytes)}). ` + 830 `Use ${BASH_TOOL_NAME} with jq to read specific portions:\n` + 831 ` cat "${file_path}" | jq '.cells[:20]' # First 20 cells\n` + 832 ` cat "${file_path}" | jq '.cells[100:120]' # Cells 100-120\n` + 833 ` cat "${file_path}" | jq '.cells | length' # Count total cells\n` + 834 ` cat "${file_path}" | jq '.cells[] | select(.cell_type=="code") | .source' # All code sources`, 835 ) 836 } 837 838 await validateContentTokens(cellsJson, ext, maxTokens) 839 840 // Get mtime via async stat (single call, no prior existence check) 841 const stats = await getFsImplementation().stat(resolvedFilePath) 842 readFileState.set(fullFilePath, { 843 content: cellsJson, 844 timestamp: Math.floor(stats.mtimeMs), 845 offset, 846 limit, 847 }) 848 context.nestedMemoryAttachmentTriggers?.add(fullFilePath) 849 850 const data = { 851 type: 'notebook' as const, 852 file: { filePath: file_path, cells }, 853 } 854 855 logFileOperation({ 856 operation: 'read', 857 tool: 'FileReadTool', 858 filePath: fullFilePath, 859 content: cellsJson, 860 }) 861 862 return { data } 863 } 864 865 // --- Image (single read, no double-read) --- 866 if (IMAGE_EXTENSIONS.has(ext)) { 867 // Images have their own size limits (token budget + compression) — 868 // don't apply the text maxSizeBytes cap. 869 const data = await readImageWithTokenBudget(resolvedFilePath, maxTokens) 870 context.nestedMemoryAttachmentTriggers?.add(fullFilePath) 871 872 logFileOperation({ 873 operation: 'read', 874 tool: 'FileReadTool', 875 filePath: fullFilePath, 876 content: data.file.base64, 877 }) 878 879 const metadataText = data.file.dimensions 880 ? createImageMetadataText(data.file.dimensions) 881 : null 882 883 return { 884 data, 885 ...(metadataText && { 886 newMessages: [ 887 createUserMessage({ content: metadataText, isMeta: true }), 888 ], 889 }), 890 } 891 } 892 893 // --- PDF --- 894 if (isPDFExtension(ext)) { 895 if (pages) { 896 const parsedRange = parsePDFPageRange(pages) 897 const extractResult = await extractPDFPages( 898 resolvedFilePath, 899 parsedRange ?? undefined, 900 ) 901 if (!extractResult.success) { 902 throw new Error(extractResult.error.message) 903 } 904 logEvent('tengu_pdf_page_extraction', { 905 success: true, 906 pageCount: extractResult.data.file.count, 907 fileSize: extractResult.data.file.originalSize, 908 hasPageRange: true, 909 }) 910 logFileOperation({ 911 operation: 'read', 912 tool: 'FileReadTool', 913 filePath: fullFilePath, 914 content: `PDF pages ${pages}`, 915 }) 916 const entries = await readdir(extractResult.data.file.outputDir) 917 const imageFiles = entries.filter(f => f.endsWith('.jpg')).sort() 918 const imageBlocks = await Promise.all( 919 imageFiles.map(async f => { 920 const imgPath = path.join(extractResult.data.file.outputDir, f) 921 const imgBuffer = await readFileAsync(imgPath) 922 const resized = await maybeResizeAndDownsampleImageBuffer( 923 imgBuffer, 924 imgBuffer.length, 925 'jpeg', 926 ) 927 return { 928 type: 'image' as const, 929 source: { 930 type: 'base64' as const, 931 media_type: 932 `image/${resized.mediaType}` as Base64ImageSource['media_type'], 933 data: resized.buffer.toString('base64'), 934 }, 935 } 936 }), 937 ) 938 return { 939 data: extractResult.data, 940 ...(imageBlocks.length > 0 && { 941 newMessages: [ 942 createUserMessage({ content: imageBlocks, isMeta: true }), 943 ], 944 }), 945 } 946 } 947 948 const pageCount = await getPDFPageCount(resolvedFilePath) 949 if (pageCount !== null && pageCount > PDF_AT_MENTION_INLINE_THRESHOLD) { 950 throw new Error( 951 `This PDF has ${pageCount} pages, which is too many to read at once. ` + 952 `Use the pages parameter to read specific page ranges (e.g., pages: "1-5"). ` + 953 `Maximum ${PDF_MAX_PAGES_PER_READ} pages per request.`, 954 ) 955 } 956 957 const fs = getFsImplementation() 958 const stats = await fs.stat(resolvedFilePath) 959 const shouldExtractPages = 960 !isPDFSupported() || stats.size > PDF_EXTRACT_SIZE_THRESHOLD 961 962 if (shouldExtractPages) { 963 const extractResult = await extractPDFPages(resolvedFilePath) 964 if (extractResult.success) { 965 logEvent('tengu_pdf_page_extraction', { 966 success: true, 967 pageCount: extractResult.data.file.count, 968 fileSize: extractResult.data.file.originalSize, 969 }) 970 } else { 971 logEvent('tengu_pdf_page_extraction', { 972 success: false, 973 available: extractResult.error.reason !== 'unavailable', 974 fileSize: stats.size, 975 }) 976 } 977 } 978 979 if (!isPDFSupported()) { 980 throw new Error( 981 'Reading full PDFs is not supported with this model. Use a newer model (Sonnet 3.5 v2 or later), ' + 982 `or use the pages parameter to read specific page ranges (e.g., pages: "1-5", maximum ${PDF_MAX_PAGES_PER_READ} pages per request). ` + 983 'Page extraction requires poppler-utils: install with `brew install poppler` on macOS or `apt-get install poppler-utils` on Debian/Ubuntu.', 984 ) 985 } 986 987 const readResult = await readPDF(resolvedFilePath) 988 if (!readResult.success) { 989 throw new Error(readResult.error.message) 990 } 991 const pdfData = readResult.data 992 logFileOperation({ 993 operation: 'read', 994 tool: 'FileReadTool', 995 filePath: fullFilePath, 996 content: pdfData.file.base64, 997 }) 998 999 return { 1000 data: pdfData, 1001 newMessages: [ 1002 createUserMessage({ 1003 content: [ 1004 { 1005 type: 'document', 1006 source: { 1007 type: 'base64', 1008 media_type: 'application/pdf', 1009 data: pdfData.file.base64, 1010 }, 1011 }, 1012 ], 1013 isMeta: true, 1014 }), 1015 ], 1016 } 1017 } 1018 1019 // --- Text file (single async read via readFileInRange) --- 1020 const lineOffset = offset === 0 ? 0 : offset - 1 1021 const { content, lineCount, totalLines, totalBytes, readBytes, mtimeMs } = 1022 await readFileInRange( 1023 resolvedFilePath, 1024 lineOffset, 1025 limit, 1026 limit === undefined ? maxSizeBytes : undefined, 1027 context.abortController.signal, 1028 ) 1029 1030 await validateContentTokens(content, ext, maxTokens) 1031 1032 readFileState.set(fullFilePath, { 1033 content, 1034 timestamp: Math.floor(mtimeMs), 1035 offset, 1036 limit, 1037 }) 1038 context.nestedMemoryAttachmentTriggers?.add(fullFilePath) 1039 1040 // Snapshot before iterating — a listener that unsubscribes mid-callback 1041 // would splice the live array and skip the next listener. 1042 for (const listener of fileReadListeners.slice()) { 1043 listener(resolvedFilePath, content) 1044 } 1045 1046 const data = { 1047 type: 'text' as const, 1048 file: { 1049 filePath: file_path, 1050 content, 1051 numLines: lineCount, 1052 startLine: offset, 1053 totalLines, 1054 }, 1055 } 1056 if (isAutoMemFile(fullFilePath)) { 1057 memoryFileMtimes.set(data, mtimeMs) 1058 } 1059 1060 logFileOperation({ 1061 operation: 'read', 1062 tool: 'FileReadTool', 1063 filePath: fullFilePath, 1064 content, 1065 }) 1066 1067 const sessionFileType = detectSessionFileType(fullFilePath) 1068 const analyticsExt = getFileExtensionForAnalytics(fullFilePath) 1069 logEvent('tengu_session_file_read', { 1070 totalLines, 1071 readLines: lineCount, 1072 totalBytes, 1073 readBytes, 1074 offset, 1075 ...(limit !== undefined && { limit }), 1076 ...(analyticsExt !== undefined && { ext: analyticsExt }), 1077 ...(messageId !== undefined && { 1078 messageID: 1079 messageId as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, 1080 }), 1081 is_session_memory: sessionFileType === 'session_memory', 1082 is_session_transcript: sessionFileType === 'session_transcript', 1083 }) 1084 1085 return { data } 1086} 1087 1088/** 1089 * Reads an image file and applies token-based compression if needed. 1090 * Reads the file ONCE, then applies standard resize. If the result exceeds 1091 * the token limit, applies aggressive compression from the same buffer. 1092 * 1093 * @param filePath - Path to the image file 1094 * @param maxTokens - Maximum token budget for the image 1095 * @returns Image data with appropriate compression applied 1096 */ 1097export async function readImageWithTokenBudget( 1098 filePath: string, 1099 maxTokens: number = getDefaultFileReadingLimits().maxTokens, 1100 maxBytes?: number, 1101): Promise<ImageResult> { 1102 // Read file ONCE — capped to maxBytes to avoid OOM on huge files 1103 const imageBuffer = await getFsImplementation().readFileBytes( 1104 filePath, 1105 maxBytes, 1106 ) 1107 const originalSize = imageBuffer.length 1108 1109 if (originalSize === 0) { 1110 throw new Error(`Image file is empty: ${filePath}`) 1111 } 1112 1113 const detectedMediaType = detectImageFormatFromBuffer(imageBuffer) 1114 const detectedFormat = detectedMediaType.split('/')[1] || 'png' 1115 1116 // Try standard resize 1117 let result: ImageResult 1118 try { 1119 const resized = await maybeResizeAndDownsampleImageBuffer( 1120 imageBuffer, 1121 originalSize, 1122 detectedFormat, 1123 ) 1124 result = createImageResponse( 1125 resized.buffer, 1126 resized.mediaType, 1127 originalSize, 1128 resized.dimensions, 1129 ) 1130 } catch (e) { 1131 if (e instanceof ImageResizeError) throw e 1132 logError(e) 1133 result = createImageResponse(imageBuffer, detectedFormat, originalSize) 1134 } 1135 1136 // Check if it fits in token budget 1137 const estimatedTokens = Math.ceil(result.file.base64.length * 0.125) 1138 if (estimatedTokens > maxTokens) { 1139 // Aggressive compression from the SAME buffer (no re-read) 1140 try { 1141 const compressed = await compressImageBufferWithTokenLimit( 1142 imageBuffer, 1143 maxTokens, 1144 detectedMediaType, 1145 ) 1146 return { 1147 type: 'image', 1148 file: { 1149 base64: compressed.base64, 1150 type: compressed.mediaType, 1151 originalSize, 1152 }, 1153 } 1154 } catch (e) { 1155 logError(e) 1156 // Fallback: heavily compressed version from the SAME buffer 1157 try { 1158 const sharpModule = await import('sharp') 1159 const sharp = 1160 ( 1161 sharpModule as { 1162 default?: typeof sharpModule 1163 } & typeof sharpModule 1164 ).default || sharpModule 1165 1166 const fallbackBuffer = await sharp(imageBuffer) 1167 .resize(400, 400, { 1168 fit: 'inside', 1169 withoutEnlargement: true, 1170 }) 1171 .jpeg({ quality: 20 }) 1172 .toBuffer() 1173 1174 return createImageResponse(fallbackBuffer, 'jpeg', originalSize) 1175 } catch (error) { 1176 logError(error) 1177 return createImageResponse(imageBuffer, detectedFormat, originalSize) 1178 } 1179 } 1180 } 1181 1182 return result 1183}