source dump of claude code
at main 324 lines 9.5 kB view raw
1/** 2 * Client-side secret scanner for team memory (PSR M22174). 3 * 4 * Scans content for credentials before upload so secrets never leave the 5 * user's machine. Uses a curated subset of high-confidence rules from 6 * gitleaks (https://github.com/gitleaks/gitleaks, MIT license) — only 7 * rules with distinctive prefixes that have near-zero false-positive 8 * rates are included. Generic keyword-context rules are omitted. 9 * 10 * Rule IDs and regexes sourced directly from the public gitleaks config: 11 * https://github.com/gitleaks/gitleaks/blob/master/config/gitleaks.toml 12 * 13 * JS regex notes: 14 * - gitleaks uses Go regex; inline (?i) and mode groups (?-i:...) are 15 * not portable to JS. Affected rules are rewritten with explicit 16 * character classes ([a-zA-Z0-9] instead of (?i)[a-z0-9]). 17 * - Trailing boundary alternations like (?:[\x60'"\s;]|\\[nr]|$) from 18 * Go regex are kept (JS $ matches end-of-string in default mode). 19 */ 20 21import { capitalize } from '../../utils/stringUtils.js' 22 23type SecretRule = { 24 /** Gitleaks rule ID (kebab-case), used in labels and analytics */ 25 id: string 26 /** Regex source, lazily compiled on first scan */ 27 source: string 28 /** Optional JS regex flags (most rules are case-sensitive by default) */ 29 flags?: string 30} 31 32export type SecretMatch = { 33 /** Gitleaks rule ID that matched (e.g., "github-pat", "aws-access-token") */ 34 ruleId: string 35 /** Human-readable label derived from the rule ID */ 36 label: string 37} 38 39// ─── Curated rules ────────────────────────────────────────────── 40// High-confidence patterns from gitleaks with distinctive prefixes. 41// Ordered roughly by likelihood of appearing in dev-team content. 42 43// Anthropic API key prefix, assembled at runtime so the literal byte 44// sequence isn't present in the external bundle (excluded-strings check). 45// join() is not constant-folded by the minifier. 46const ANT_KEY_PFX = ['sk', 'ant', 'api'].join('-') 47 48const SECRET_RULES: SecretRule[] = [ 49 // — Cloud providers — 50 { 51 id: 'aws-access-token', 52 source: '\\b((?:A3T[A-Z0-9]|AKIA|ASIA|ABIA|ACCA)[A-Z2-7]{16})\\b', 53 }, 54 { 55 id: 'gcp-api-key', 56 source: '\\b(AIza[\\w-]{35})(?:[\\x60\'"\\s;]|\\\\[nr]|$)', 57 }, 58 { 59 id: 'azure-ad-client-secret', 60 source: 61 '(?:^|[\\\\\'"\\x60\\s>=:(,)])([a-zA-Z0-9_~.]{3}\\dQ~[a-zA-Z0-9_~.-]{31,34})(?:$|[\\\\\'"\\x60\\s<),])', 62 }, 63 { 64 id: 'digitalocean-pat', 65 source: '\\b(dop_v1_[a-f0-9]{64})(?:[\\x60\'"\\s;]|\\\\[nr]|$)', 66 }, 67 { 68 id: 'digitalocean-access-token', 69 source: '\\b(doo_v1_[a-f0-9]{64})(?:[\\x60\'"\\s;]|\\\\[nr]|$)', 70 }, 71 72 // — AI APIs — 73 { 74 id: 'anthropic-api-key', 75 source: `\\b(${ANT_KEY_PFX}03-[a-zA-Z0-9_\\-]{93}AA)(?:[\\x60'"\\s;]|\\\\[nr]|$)`, 76 }, 77 { 78 id: 'anthropic-admin-api-key', 79 source: 80 '\\b(sk-ant-admin01-[a-zA-Z0-9_\\-]{93}AA)(?:[\\x60\'"\\s;]|\\\\[nr]|$)', 81 }, 82 { 83 id: 'openai-api-key', 84 source: 85 '\\b(sk-(?:proj|svcacct|admin)-(?:[A-Za-z0-9_-]{74}|[A-Za-z0-9_-]{58})T3BlbkFJ(?:[A-Za-z0-9_-]{74}|[A-Za-z0-9_-]{58})\\b|sk-[a-zA-Z0-9]{20}T3BlbkFJ[a-zA-Z0-9]{20})(?:[\\x60\'"\\s;]|\\\\[nr]|$)', 86 }, 87 { 88 id: 'huggingface-access-token', 89 // gitleaks: hf_(?i:[a-z]{34}) → JS: hf_[a-zA-Z]{34} 90 source: '\\b(hf_[a-zA-Z]{34})(?:[\\x60\'"\\s;]|\\\\[nr]|$)', 91 }, 92 93 // — Version control — 94 { 95 id: 'github-pat', 96 source: 'ghp_[0-9a-zA-Z]{36}', 97 }, 98 { 99 id: 'github-fine-grained-pat', 100 source: 'github_pat_\\w{82}', 101 }, 102 { 103 id: 'github-app-token', 104 source: '(?:ghu|ghs)_[0-9a-zA-Z]{36}', 105 }, 106 { 107 id: 'github-oauth', 108 source: 'gho_[0-9a-zA-Z]{36}', 109 }, 110 { 111 id: 'github-refresh-token', 112 source: 'ghr_[0-9a-zA-Z]{36}', 113 }, 114 { 115 id: 'gitlab-pat', 116 source: 'glpat-[\\w-]{20}', 117 }, 118 { 119 id: 'gitlab-deploy-token', 120 source: 'gldt-[0-9a-zA-Z_\\-]{20}', 121 }, 122 123 // — Communication — 124 { 125 id: 'slack-bot-token', 126 source: 'xoxb-[0-9]{10,13}-[0-9]{10,13}[a-zA-Z0-9-]*', 127 }, 128 { 129 id: 'slack-user-token', 130 source: 'xox[pe](?:-[0-9]{10,13}){3}-[a-zA-Z0-9-]{28,34}', 131 }, 132 { 133 id: 'slack-app-token', 134 source: 'xapp-\\d-[A-Z0-9]+-\\d+-[a-z0-9]+', 135 flags: 'i', 136 }, 137 { 138 id: 'twilio-api-key', 139 source: 'SK[0-9a-fA-F]{32}', 140 }, 141 { 142 id: 'sendgrid-api-token', 143 // gitleaks: SG\.(?i)[a-z0-9=_\-\.]{66} → JS: case-insensitive via flag 144 source: '\\b(SG\\.[a-zA-Z0-9=_\\-.]{66})(?:[\\x60\'"\\s;]|\\\\[nr]|$)', 145 }, 146 147 // — Dev tooling — 148 { 149 id: 'npm-access-token', 150 source: '\\b(npm_[a-zA-Z0-9]{36})(?:[\\x60\'"\\s;]|\\\\[nr]|$)', 151 }, 152 { 153 id: 'pypi-upload-token', 154 source: 'pypi-AgEIcHlwaS5vcmc[\\w-]{50,1000}', 155 }, 156 { 157 id: 'databricks-api-token', 158 source: '\\b(dapi[a-f0-9]{32}(?:-\\d)?)(?:[\\x60\'"\\s;]|\\\\[nr]|$)', 159 }, 160 { 161 id: 'hashicorp-tf-api-token', 162 // gitleaks: (?i)[a-z0-9]{14}\.(?-i:atlasv1)\.[a-z0-9\-_=]{60,70} 163 // → JS: case-insensitive hex+alnum prefix, literal "atlasv1", case-insensitive suffix 164 source: '[a-zA-Z0-9]{14}\\.atlasv1\\.[a-zA-Z0-9\\-_=]{60,70}', 165 }, 166 { 167 id: 'pulumi-api-token', 168 source: '\\b(pul-[a-f0-9]{40})(?:[\\x60\'"\\s;]|\\\\[nr]|$)', 169 }, 170 { 171 id: 'postman-api-token', 172 // gitleaks: PMAK-(?i)[a-f0-9]{24}\-[a-f0-9]{34} → JS: use [a-fA-F0-9] 173 source: 174 '\\b(PMAK-[a-fA-F0-9]{24}-[a-fA-F0-9]{34})(?:[\\x60\'"\\s;]|\\\\[nr]|$)', 175 }, 176 177 // — Observability — 178 { 179 id: 'grafana-api-key', 180 source: 181 '\\b(eyJrIjoi[A-Za-z0-9+/]{70,400}={0,3})(?:[\\x60\'"\\s;]|\\\\[nr]|$)', 182 }, 183 { 184 id: 'grafana-cloud-api-token', 185 source: '\\b(glc_[A-Za-z0-9+/]{32,400}={0,3})(?:[\\x60\'"\\s;]|\\\\[nr]|$)', 186 }, 187 { 188 id: 'grafana-service-account-token', 189 source: 190 '\\b(glsa_[A-Za-z0-9]{32}_[A-Fa-f0-9]{8})(?:[\\x60\'"\\s;]|\\\\[nr]|$)', 191 }, 192 { 193 id: 'sentry-user-token', 194 source: '\\b(sntryu_[a-f0-9]{64})(?:[\\x60\'"\\s;]|\\\\[nr]|$)', 195 }, 196 { 197 id: 'sentry-org-token', 198 source: 199 '\\bsntrys_eyJpYXQiO[a-zA-Z0-9+/]{10,200}(?:LCJyZWdpb25fdXJs|InJlZ2lvbl91cmwi|cmVnaW9uX3VybCI6)[a-zA-Z0-9+/]{10,200}={0,2}_[a-zA-Z0-9+/]{43}', 200 }, 201 202 // — Payment / commerce — 203 { 204 id: 'stripe-access-token', 205 source: 206 '\\b((?:sk|rk)_(?:test|live|prod)_[a-zA-Z0-9]{10,99})(?:[\\x60\'"\\s;]|\\\\[nr]|$)', 207 }, 208 { 209 id: 'shopify-access-token', 210 source: 'shpat_[a-fA-F0-9]{32}', 211 }, 212 { 213 id: 'shopify-shared-secret', 214 source: 'shpss_[a-fA-F0-9]{32}', 215 }, 216 217 // — Crypto — 218 { 219 id: 'private-key', 220 source: 221 '-----BEGIN[ A-Z0-9_-]{0,100}PRIVATE KEY(?: BLOCK)?-----[\\s\\S-]{64,}?-----END[ A-Z0-9_-]{0,100}PRIVATE KEY(?: BLOCK)?-----', 222 flags: 'i', 223 }, 224] 225 226// Lazily compiled pattern cache — compile once on first scan. 227let compiledRules: Array<{ id: string; re: RegExp }> | null = null 228 229function getCompiledRules(): Array<{ id: string; re: RegExp }> { 230 if (compiledRules === null) { 231 compiledRules = SECRET_RULES.map(r => ({ 232 id: r.id, 233 re: new RegExp(r.source, r.flags), 234 })) 235 } 236 return compiledRules 237} 238 239/** 240 * Convert a gitleaks rule ID (kebab-case) to a human-readable label. 241 * e.g., "github-pat" → "GitHub PAT", "aws-access-token" → "AWS Access Token" 242 */ 243function ruleIdToLabel(ruleId: string): string { 244 // Words where the canonical capitalization differs from title case 245 const specialCase: Record<string, string> = { 246 aws: 'AWS', 247 gcp: 'GCP', 248 api: 'API', 249 pat: 'PAT', 250 ad: 'AD', 251 tf: 'TF', 252 oauth: 'OAuth', 253 npm: 'NPM', 254 pypi: 'PyPI', 255 jwt: 'JWT', 256 github: 'GitHub', 257 gitlab: 'GitLab', 258 openai: 'OpenAI', 259 digitalocean: 'DigitalOcean', 260 huggingface: 'HuggingFace', 261 hashicorp: 'HashiCorp', 262 sendgrid: 'SendGrid', 263 } 264 return ruleId 265 .split('-') 266 .map(part => specialCase[part] ?? capitalize(part)) 267 .join(' ') 268} 269 270/** 271 * Scan a string for potential secrets. 272 * 273 * Returns one match per rule that fired (deduplicated by rule ID). The 274 * actual matched text is intentionally NOT returned — we never log or 275 * display secret values. 276 */ 277export function scanForSecrets(content: string): SecretMatch[] { 278 const matches: SecretMatch[] = [] 279 const seen = new Set<string>() 280 281 for (const rule of getCompiledRules()) { 282 if (seen.has(rule.id)) { 283 continue 284 } 285 if (rule.re.test(content)) { 286 seen.add(rule.id) 287 matches.push({ 288 ruleId: rule.id, 289 label: ruleIdToLabel(rule.id), 290 }) 291 } 292 } 293 294 return matches 295} 296 297/** 298 * Get a human-readable label for a gitleaks rule ID. 299 * Falls back to kebab-to-Title conversion for unknown IDs. 300 */ 301export function getSecretLabel(ruleId: string): string { 302 return ruleIdToLabel(ruleId) 303} 304 305/** 306 * Redact any matched secrets in-place with [REDACTED]. 307 * Unlike scanForSecrets, this returns the content with spans replaced 308 * so the surrounding text can still be written to disk safely. 309 */ 310let redactRules: RegExp[] | null = null 311 312export function redactSecrets(content: string): string { 313 redactRules ??= SECRET_RULES.map( 314 r => new RegExp(r.source, (r.flags ?? '').replace('g', '') + 'g'), 315 ) 316 for (const re of redactRules) { 317 // Replace only the captured group, not the full match — patterns include 318 // boundary chars (space, quote, ;) outside the group that must survive. 319 content = content.replace(re, (match, g1) => 320 typeof g1 === 'string' ? match.replace(g1, '[REDACTED]') : '[REDACTED]', 321 ) 322 } 323 return content 324}