source dump of claude code
at main 136 lines 3.5 kB view raw
1import { basename, extname, posix, sep } from 'path' 2 3/** 4 * File patterns that should be excluded from attribution. 5 * Based on GitHub Linguist vendored patterns and common generated file patterns. 6 */ 7 8// Exact file name matches (case-insensitive) 9const EXCLUDED_FILENAMES = new Set([ 10 'package-lock.json', 11 'yarn.lock', 12 'pnpm-lock.yaml', 13 'bun.lockb', 14 'bun.lock', 15 'composer.lock', 16 'gemfile.lock', 17 'cargo.lock', 18 'poetry.lock', 19 'pipfile.lock', 20 'shrinkwrap.json', 21 'npm-shrinkwrap.json', 22]) 23 24// File extension patterns (case-insensitive) 25const EXCLUDED_EXTENSIONS = new Set([ 26 '.lock', 27 '.min.js', 28 '.min.css', 29 '.min.html', 30 '.bundle.js', 31 '.bundle.css', 32 '.generated.ts', 33 '.generated.js', 34 '.d.ts', // TypeScript declaration files 35]) 36 37// Directory patterns that indicate generated/vendored content 38const EXCLUDED_DIRECTORIES = [ 39 '/dist/', 40 '/build/', 41 '/out/', 42 '/output/', 43 '/node_modules/', 44 '/vendor/', 45 '/vendored/', 46 '/third_party/', 47 '/third-party/', 48 '/external/', 49 '/.next/', 50 '/.nuxt/', 51 '/.svelte-kit/', 52 '/coverage/', 53 '/__pycache__/', 54 '/.tox/', 55 '/venv/', 56 '/.venv/', 57 '/target/release/', 58 '/target/debug/', 59] 60 61// Filename patterns using regex for more complex matching 62const EXCLUDED_FILENAME_PATTERNS = [ 63 /^.*\.min\.[a-z]+$/i, // *.min.* 64 /^.*-min\.[a-z]+$/i, // *-min.* 65 /^.*\.bundle\.[a-z]+$/i, // *.bundle.* 66 /^.*\.generated\.[a-z]+$/i, // *.generated.* 67 /^.*\.gen\.[a-z]+$/i, // *.gen.* 68 /^.*\.auto\.[a-z]+$/i, // *.auto.* 69 /^.*_generated\.[a-z]+$/i, // *_generated.* 70 /^.*_gen\.[a-z]+$/i, // *_gen.* 71 /^.*\.pb\.(go|js|ts|py|rb)$/i, // Protocol buffer generated files 72 /^.*_pb2?\.py$/i, // Python protobuf files 73 /^.*\.pb\.h$/i, // C++ protobuf headers 74 /^.*\.grpc\.[a-z]+$/i, // gRPC generated files 75 /^.*\.swagger\.[a-z]+$/i, // Swagger generated files 76 /^.*\.openapi\.[a-z]+$/i, // OpenAPI generated files 77] 78 79/** 80 * Check if a file should be excluded from attribution based on Linguist-style rules. 81 * 82 * @param filePath - Relative file path from repository root 83 * @returns true if the file should be excluded from attribution 84 */ 85export function isGeneratedFile(filePath: string): boolean { 86 // Normalize path separators for consistent pattern matching (patterns use posix-style /) 87 const normalizedPath = 88 posix.sep + filePath.split(sep).join(posix.sep).replace(/^\/+/, '') 89 const fileName = basename(filePath).toLowerCase() 90 const ext = extname(filePath).toLowerCase() 91 92 // Check exact filename matches 93 if (EXCLUDED_FILENAMES.has(fileName)) { 94 return true 95 } 96 97 // Check extension matches 98 if (EXCLUDED_EXTENSIONS.has(ext)) { 99 return true 100 } 101 102 // Check for compound extensions like .min.js 103 const parts = fileName.split('.') 104 if (parts.length > 2) { 105 const compoundExt = '.' + parts.slice(-2).join('.') 106 if (EXCLUDED_EXTENSIONS.has(compoundExt)) { 107 return true 108 } 109 } 110 111 // Check directory patterns 112 for (const dir of EXCLUDED_DIRECTORIES) { 113 if (normalizedPath.includes(dir)) { 114 return true 115 } 116 } 117 118 // Check filename patterns 119 for (const pattern of EXCLUDED_FILENAME_PATTERNS) { 120 if (pattern.test(fileName)) { 121 return true 122 } 123 } 124 125 return false 126} 127 128/** 129 * Filter a list of files to exclude generated files. 130 * 131 * @param files - Array of file paths 132 * @returns Array of files that are not generated 133 */ 134export function filterGeneratedFiles(files: string[]): string[] { 135 return files.filter(file => !isGeneratedFile(file)) 136}