source dump of claude code
at main 156 lines 2.6 kB view raw
1/** 2 * Binary file extensions to skip for text-based operations. 3 * These files can't be meaningfully compared as text and are often large. 4 */ 5export const BINARY_EXTENSIONS = new Set([ 6 // Images 7 '.png', 8 '.jpg', 9 '.jpeg', 10 '.gif', 11 '.bmp', 12 '.ico', 13 '.webp', 14 '.tiff', 15 '.tif', 16 // Videos 17 '.mp4', 18 '.mov', 19 '.avi', 20 '.mkv', 21 '.webm', 22 '.wmv', 23 '.flv', 24 '.m4v', 25 '.mpeg', 26 '.mpg', 27 // Audio 28 '.mp3', 29 '.wav', 30 '.ogg', 31 '.flac', 32 '.aac', 33 '.m4a', 34 '.wma', 35 '.aiff', 36 '.opus', 37 // Archives 38 '.zip', 39 '.tar', 40 '.gz', 41 '.bz2', 42 '.7z', 43 '.rar', 44 '.xz', 45 '.z', 46 '.tgz', 47 '.iso', 48 // Executables/binaries 49 '.exe', 50 '.dll', 51 '.so', 52 '.dylib', 53 '.bin', 54 '.o', 55 '.a', 56 '.obj', 57 '.lib', 58 '.app', 59 '.msi', 60 '.deb', 61 '.rpm', 62 // Documents (PDF is here; FileReadTool excludes it at the call site) 63 '.pdf', 64 '.doc', 65 '.docx', 66 '.xls', 67 '.xlsx', 68 '.ppt', 69 '.pptx', 70 '.odt', 71 '.ods', 72 '.odp', 73 // Fonts 74 '.ttf', 75 '.otf', 76 '.woff', 77 '.woff2', 78 '.eot', 79 // Bytecode / VM artifacts 80 '.pyc', 81 '.pyo', 82 '.class', 83 '.jar', 84 '.war', 85 '.ear', 86 '.node', 87 '.wasm', 88 '.rlib', 89 // Database files 90 '.sqlite', 91 '.sqlite3', 92 '.db', 93 '.mdb', 94 '.idx', 95 // Design / 3D 96 '.psd', 97 '.ai', 98 '.eps', 99 '.sketch', 100 '.fig', 101 '.xd', 102 '.blend', 103 '.3ds', 104 '.max', 105 // Flash 106 '.swf', 107 '.fla', 108 // Lock/profiling data 109 '.lockb', 110 '.dat', 111 '.data', 112]) 113 114/** 115 * Check if a file path has a binary extension. 116 */ 117export function hasBinaryExtension(filePath: string): boolean { 118 const ext = filePath.slice(filePath.lastIndexOf('.')).toLowerCase() 119 return BINARY_EXTENSIONS.has(ext) 120} 121 122/** 123 * Number of bytes to read for binary content detection. 124 */ 125const BINARY_CHECK_SIZE = 8192 126 127/** 128 * Check if a buffer contains binary content by looking for null bytes 129 * or a high proportion of non-printable characters. 130 */ 131export function isBinaryContent(buffer: Buffer): boolean { 132 // Check first BINARY_CHECK_SIZE bytes (or full buffer if smaller) 133 const checkSize = Math.min(buffer.length, BINARY_CHECK_SIZE) 134 135 let nonPrintable = 0 136 for (let i = 0; i < checkSize; i++) { 137 const byte = buffer[i]! 138 // Null byte is a strong indicator of binary 139 if (byte === 0) { 140 return true 141 } 142 // Count non-printable, non-whitespace bytes 143 // Printable ASCII is 32-126, plus common whitespace (9, 10, 13) 144 if ( 145 byte < 32 && 146 byte !== 9 && // tab 147 byte !== 10 && // newline 148 byte !== 13 // carriage return 149 ) { 150 nonPrintable++ 151 } 152 } 153 154 // If more than 10% non-printable, likely binary 155 return nonPrintable / checkSize > 0.1 156}