source dump of claude code
at main 189 lines 7.1 kB view raw
1import { writeFile } from 'fs/promises' 2import { join } from 'path' 3import { 4 type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, 5 logEvent, 6} from '../services/analytics/index.js' 7import type { MCPResultType } from '../services/mcp/client.js' 8import { toError } from './errors.js' 9import { formatFileSize } from './format.js' 10import { logError } from './log.js' 11import { ensureToolResultsDir, getToolResultsDir } from './toolResultStorage.js' 12 13/** 14 * Generates a format description string based on the MCP result type and schema. 15 */ 16export function getFormatDescription( 17 type: MCPResultType, 18 schema?: unknown, 19): string { 20 switch (type) { 21 case 'toolResult': 22 return 'Plain text' 23 case 'structuredContent': 24 return schema ? `JSON with schema: ${schema}` : 'JSON' 25 case 'contentArray': 26 return schema ? `JSON array with schema: ${schema}` : 'JSON array' 27 } 28} 29 30/** 31 * Generates instruction text for Claude to read from a saved output file. 32 * 33 * @param rawOutputPath - Path to the saved output file 34 * @param contentLength - Length of the content in characters 35 * @param formatDescription - Description of the content format 36 * @param maxReadLength - Optional max chars for Read tool (for Bash output context) 37 * @returns Instruction text to include in the tool result 38 */ 39export function getLargeOutputInstructions( 40 rawOutputPath: string, 41 contentLength: number, 42 formatDescription: string, 43 maxReadLength?: number, 44): string { 45 const baseInstructions = 46 `Error: result (${contentLength.toLocaleString()} characters) exceeds maximum allowed tokens. Output has been saved to ${rawOutputPath}.\n` + 47 `Format: ${formatDescription}\n` + 48 `Use offset and limit parameters to read specific portions of the file, search within it for specific content, and jq to make structured queries.\n` + 49 `REQUIREMENTS FOR SUMMARIZATION/ANALYSIS/REVIEW:\n` + 50 `- You MUST read the content from the file at ${rawOutputPath} in sequential chunks until 100% of the content has been read.\n` 51 52 const truncationWarning = maxReadLength 53 ? `- If you receive truncation warnings when reading the file ("[N lines truncated]"), reduce the chunk size until you have read 100% of the content without truncation ***DO NOT PROCEED UNTIL YOU HAVE DONE THIS***. Bash output is limited to ${maxReadLength.toLocaleString()} chars.\n` 54 : `- If you receive truncation warnings when reading the file, reduce the chunk size until you have read 100% of the content without truncation.\n` 55 56 const completionRequirement = `- Before producing ANY summary or analysis, you MUST explicitly describe what portion of the content you have read. ***If you did not read the entire content, you MUST explicitly state this.***\n` 57 58 return baseInstructions + truncationWarning + completionRequirement 59} 60 61/** 62 * Map a mime type to a file extension. Conservative: known types get their 63 * proper extension; unknown types get 'bin'. The extension matters because 64 * the Read tool dispatches on it (PDFs, images, etc. need the right ext). 65 */ 66export function extensionForMimeType(mimeType: string | undefined): string { 67 if (!mimeType) return 'bin' 68 // Strip any charset/boundary parameter 69 const mt = (mimeType.split(';')[0] ?? '').trim().toLowerCase() 70 switch (mt) { 71 case 'application/pdf': 72 return 'pdf' 73 case 'application/json': 74 return 'json' 75 case 'text/csv': 76 return 'csv' 77 case 'text/plain': 78 return 'txt' 79 case 'text/html': 80 return 'html' 81 case 'text/markdown': 82 return 'md' 83 case 'application/zip': 84 return 'zip' 85 case 'application/vnd.openxmlformats-officedocument.wordprocessingml.document': 86 return 'docx' 87 case 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': 88 return 'xlsx' 89 case 'application/vnd.openxmlformats-officedocument.presentationml.presentation': 90 return 'pptx' 91 case 'application/msword': 92 return 'doc' 93 case 'application/vnd.ms-excel': 94 return 'xls' 95 case 'audio/mpeg': 96 return 'mp3' 97 case 'audio/wav': 98 return 'wav' 99 case 'audio/ogg': 100 return 'ogg' 101 case 'video/mp4': 102 return 'mp4' 103 case 'video/webm': 104 return 'webm' 105 case 'image/png': 106 return 'png' 107 case 'image/jpeg': 108 return 'jpg' 109 case 'image/gif': 110 return 'gif' 111 case 'image/webp': 112 return 'webp' 113 case 'image/svg+xml': 114 return 'svg' 115 default: 116 return 'bin' 117 } 118} 119 120/** 121 * Heuristic for whether a content-type header indicates binary content that 122 * should be saved to disk rather than put into the model context. 123 * Text-ish types (text/*, json, xml, form data) are treated as non-binary. 124 */ 125export function isBinaryContentType(contentType: string): boolean { 126 if (!contentType) return false 127 const mt = (contentType.split(';')[0] ?? '').trim().toLowerCase() 128 if (mt.startsWith('text/')) return false 129 // Structured text formats delivered with an application/ type. Use suffix 130 // or exact match rather than substring so 'openxmlformats' (docx/xlsx) stays binary. 131 if (mt.endsWith('+json') || mt === 'application/json') return false 132 if (mt.endsWith('+xml') || mt === 'application/xml') return false 133 if (mt.startsWith('application/javascript')) return false 134 if (mt === 'application/x-www-form-urlencoded') return false 135 return true 136} 137 138export type PersistBinaryResult = 139 | { filepath: string; size: number; ext: string } 140 | { error: string } 141 142/** 143 * Write raw binary bytes to the tool-results directory with a mime-derived 144 * extension. Unlike persistToolResult (which stringifies), this writes the 145 * bytes as-is so the resulting file can be opened with native tools (Read 146 * for PDFs, pandas for xlsx, etc.). 147 */ 148export async function persistBinaryContent( 149 bytes: Buffer, 150 mimeType: string | undefined, 151 persistId: string, 152): Promise<PersistBinaryResult> { 153 await ensureToolResultsDir() 154 const ext = extensionForMimeType(mimeType) 155 const filepath = join(getToolResultsDir(), `${persistId}.${ext}`) 156 157 try { 158 await writeFile(filepath, bytes) 159 } catch (error) { 160 const err = toError(error) 161 logError(err) 162 return { error: err.message } 163 } 164 165 // mime type and extension are safe fixed-vocabulary strings (not paths/code) 166 logEvent('tengu_binary_content_persisted', { 167 mimeType: (mimeType ?? 168 'unknown') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, 169 sizeBytes: bytes.length, 170 ext: ext as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, 171 }) 172 173 return { filepath, size: bytes.length, ext } 174} 175 176/** 177 * Build a short message telling Claude where binary content was saved. 178 * Just states the path — no prescriptive hint, since what the model can 179 * actually do with the file depends on provider/tooling. 180 */ 181export function getBinaryBlobSavedMessage( 182 filepath: string, 183 mimeType: string | undefined, 184 size: number, 185 sourceDescription: string, 186): string { 187 const mt = mimeType || 'unknown type' 188 return `${sourceDescription}Binary content (${mt}, ${formatFileSize(size)}) saved to ${filepath}` 189}