utils/mcpOutputStorage.ts at main · oppi.li/claude-code

oppi.li / claude-code
fork atom
source dump of claude code
fork atom
claude-code / utils / mcpOutputStorage.ts
at main 189 lines 7.1 kB view raw
wrap content
oppi.li dump from zip 2d ago
63aada3f
  1import { writeFile } from 'fs/promises'
  2import { join } from 'path'
  3import {
  4  type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
  5  logEvent,
  6} from '../services/analytics/index.js'
  7import type { MCPResultType } from '../services/mcp/client.js'
  8import { toError } from './errors.js'
  9import { formatFileSize } from './format.js'
 10import { logError } from './log.js'
 11import { ensureToolResultsDir, getToolResultsDir } from './toolResultStorage.js'
 12
 13/**
 14 * Generates a format description string based on the MCP result type and schema.
 15 */
 16export function getFormatDescription(
 17  type: MCPResultType,
 18  schema?: unknown,
 19): string {
 20  switch (type) {
 21    case 'toolResult':
 22      return 'Plain text'
 23    case 'structuredContent':
 24      return schema ? `JSON with schema: ${schema}` : 'JSON'
 25    case 'contentArray':
 26      return schema ? `JSON array with schema: ${schema}` : 'JSON array'
 27  }
 28}
 29
 30/**
 31 * Generates instruction text for Claude to read from a saved output file.
 32 *
 33 * @param rawOutputPath - Path to the saved output file
 34 * @param contentLength - Length of the content in characters
 35 * @param formatDescription - Description of the content format
 36 * @param maxReadLength - Optional max chars for Read tool (for Bash output context)
 37 * @returns Instruction text to include in the tool result
 38 */
 39export function getLargeOutputInstructions(
 40  rawOutputPath: string,
 41  contentLength: number,
 42  formatDescription: string,
 43  maxReadLength?: number,
 44): string {
 45  const baseInstructions =
 46    `Error: result (${contentLength.toLocaleString()} characters) exceeds maximum allowed tokens. Output has been saved to ${rawOutputPath}.\n` +
 47    `Format: ${formatDescription}\n` +
 48    `Use offset and limit parameters to read specific portions of the file, search within it for specific content, and jq to make structured queries.\n` +
 49    `REQUIREMENTS FOR SUMMARIZATION/ANALYSIS/REVIEW:\n` +
 50    `- You MUST read the content from the file at ${rawOutputPath} in sequential chunks until 100% of the content has been read.\n`
 51
 52  const truncationWarning = maxReadLength
 53    ? `- If you receive truncation warnings when reading the file ("[N lines truncated]"), reduce the chunk size until you have read 100% of the content without truncation ***DO NOT PROCEED UNTIL YOU HAVE DONE THIS***. Bash output is limited to ${maxReadLength.toLocaleString()} chars.\n`
 54    : `- If you receive truncation warnings when reading the file, reduce the chunk size until you have read 100% of the content without truncation.\n`
 55
 56  const completionRequirement = `- Before producing ANY summary or analysis, you MUST explicitly describe what portion of the content you have read. ***If you did not read the entire content, you MUST explicitly state this.***\n`
 57
 58  return baseInstructions + truncationWarning + completionRequirement
 59}
 60
 61/**
 62 * Map a mime type to a file extension. Conservative: known types get their
 63 * proper extension; unknown types get 'bin'. The extension matters because
 64 * the Read tool dispatches on it (PDFs, images, etc. need the right ext).
 65 */
 66export function extensionForMimeType(mimeType: string | undefined): string {
 67  if (!mimeType) return 'bin'
 68  // Strip any charset/boundary parameter
 69  const mt = (mimeType.split(';')[0] ?? '').trim().toLowerCase()
 70  switch (mt) {
 71    case 'application/pdf':
 72      return 'pdf'
 73    case 'application/json':
 74      return 'json'
 75    case 'text/csv':
 76      return 'csv'
 77    case 'text/plain':
 78      return 'txt'
 79    case 'text/html':
 80      return 'html'
 81    case 'text/markdown':
 82      return 'md'
 83    case 'application/zip':
 84      return 'zip'
 85    case 'application/vnd.openxmlformats-officedocument.wordprocessingml.document':
 86      return 'docx'
 87    case 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet':
 88      return 'xlsx'
 89    case 'application/vnd.openxmlformats-officedocument.presentationml.presentation':
 90      return 'pptx'
 91    case 'application/msword':
 92      return 'doc'
 93    case 'application/vnd.ms-excel':
 94      return 'xls'
 95    case 'audio/mpeg':
 96      return 'mp3'
 97    case 'audio/wav':
 98      return 'wav'
 99    case 'audio/ogg':
100      return 'ogg'
101    case 'video/mp4':
102      return 'mp4'
103    case 'video/webm':
104      return 'webm'
105    case 'image/png':
106      return 'png'
107    case 'image/jpeg':
108      return 'jpg'
109    case 'image/gif':
110      return 'gif'
111    case 'image/webp':
112      return 'webp'
113    case 'image/svg+xml':
114      return 'svg'
115    default:
116      return 'bin'
117  }
118}
119
120/**
121 * Heuristic for whether a content-type header indicates binary content that
122 * should be saved to disk rather than put into the model context.
123 * Text-ish types (text/*, json, xml, form data) are treated as non-binary.
124 */
125export function isBinaryContentType(contentType: string): boolean {
126  if (!contentType) return false
127  const mt = (contentType.split(';')[0] ?? '').trim().toLowerCase()
128  if (mt.startsWith('text/')) return false
129  // Structured text formats delivered with an application/ type. Use suffix
130  // or exact match rather than substring so 'openxmlformats' (docx/xlsx) stays binary.
131  if (mt.endsWith('+json') || mt === 'application/json') return false
132  if (mt.endsWith('+xml') || mt === 'application/xml') return false
133  if (mt.startsWith('application/javascript')) return false
134  if (mt === 'application/x-www-form-urlencoded') return false
135  return true
136}
137
138export type PersistBinaryResult =
139  | { filepath: string; size: number; ext: string }
140  | { error: string }
141
142/**
143 * Write raw binary bytes to the tool-results directory with a mime-derived
144 * extension. Unlike persistToolResult (which stringifies), this writes the
145 * bytes as-is so the resulting file can be opened with native tools (Read
146 * for PDFs, pandas for xlsx, etc.).
147 */
148export async function persistBinaryContent(
149  bytes: Buffer,
150  mimeType: string | undefined,
151  persistId: string,
152): Promise<PersistBinaryResult> {
153  await ensureToolResultsDir()
154  const ext = extensionForMimeType(mimeType)
155  const filepath = join(getToolResultsDir(), `${persistId}.${ext}`)
156
157  try {
158    await writeFile(filepath, bytes)
159  } catch (error) {
160    const err = toError(error)
161    logError(err)
162    return { error: err.message }
163  }
164
165  // mime type and extension are safe fixed-vocabulary strings (not paths/code)
166  logEvent('tengu_binary_content_persisted', {
167    mimeType: (mimeType ??
168      'unknown') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
169    sizeBytes: bytes.length,
170    ext: ext as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
171  })
172
173  return { filepath, size: bytes.length, ext }
174}
175
176/**
177 * Build a short message telling Claude where binary content was saved.
178 * Just states the path — no prescriptive hint, since what the model can
179 * actually do with the file depends on provider/tooling.
180 */
181export function getBinaryBlobSavedMessage(
182  filepath: string,
183  mimeType: string | undefined,
184  size: number,
185  sourceDescription: string,
186): string {
187  const mt = mimeType || 'unknown type'
188  return `${sourceDescription}Binary content (${mt}, ${formatFileSize(size)}) saved to ${filepath}`
189}