source dump of claude code
at main 433 lines 12 kB view raw
1/** 2 * PID-Based Version Locking 3 * 4 * This module provides PID-based locking for running Claude Code versions. 5 * Unlike mtime-based locking (which can hold locks for 30 days after a crash), 6 * PID-based locking can immediately detect when a process is no longer running. 7 * 8 * Lock files contain JSON with the PID and metadata, and staleness is determined 9 * by checking if the process is still alive. 10 */ 11 12import { basename, join } from 'path' 13import { getFeatureValue_CACHED_MAY_BE_STALE } from '../../services/analytics/growthbook.js' 14import { logForDebugging } from '../debug.js' 15import { isEnvDefinedFalsy, isEnvTruthy } from '../envUtils.js' 16import { isENOENT, toError } from '../errors.js' 17import { getFsImplementation } from '../fsOperations.js' 18import { getProcessCommand } from '../genericProcessUtils.js' 19import { logError } from '../log.js' 20import { 21 jsonParse, 22 jsonStringify, 23 writeFileSync_DEPRECATED, 24} from '../slowOperations.js' 25 26/** 27 * Check if PID-based version locking is enabled. 28 * When disabled, falls back to mtime-based locking (30-day timeout). 29 * 30 * Controlled by GrowthBook gate with local override: 31 * - Set ENABLE_PID_BASED_VERSION_LOCKING=true to force-enable 32 * - Set ENABLE_PID_BASED_VERSION_LOCKING=false to force-disable 33 * - If unset, GrowthBook gate (tengu_pid_based_version_locking) controls rollout 34 */ 35export function isPidBasedLockingEnabled(): boolean { 36 const envVar = process.env.ENABLE_PID_BASED_VERSION_LOCKING 37 // If env var is explicitly set, respect it 38 if (isEnvTruthy(envVar)) { 39 return true 40 } 41 if (isEnvDefinedFalsy(envVar)) { 42 return false 43 } 44 // GrowthBook controls gradual rollout (returns false for external users) 45 return getFeatureValue_CACHED_MAY_BE_STALE( 46 'tengu_pid_based_version_locking', 47 false, 48 ) 49} 50 51/** 52 * Content stored in a version lock file 53 */ 54export type VersionLockContent = { 55 pid: number 56 version: string 57 execPath: string 58 acquiredAt: number // timestamp when lock was acquired 59} 60 61/** 62 * Information about a lock for diagnostic purposes 63 */ 64export type LockInfo = { 65 version: string 66 pid: number 67 isProcessRunning: boolean 68 execPath: string 69 acquiredAt: Date 70 lockFilePath: string 71} 72 73// Fallback stale timeout (2 hours) - used when PID check is inconclusive 74// This is much shorter than the previous 30-day timeout but still allows 75// for edge cases like network filesystems where PID check might fail 76const FALLBACK_STALE_MS = 2 * 60 * 60 * 1000 77 78/** 79 * Check if a process with the given PID is currently running 80 * Uses signal 0 which doesn't actually send a signal but checks if we can 81 */ 82export function isProcessRunning(pid: number): boolean { 83 // PID 0 is special - it refers to the current process group, not a real process 84 // PID 1 is init/systemd and is always running but shouldn't be considered for locks 85 if (pid <= 1) { 86 return false 87 } 88 89 try { 90 process.kill(pid, 0) 91 return true 92 } catch { 93 return false 94 } 95} 96 97/** 98 * Validate that a running process is actually a Claude process 99 * This helps mitigate PID reuse issues 100 */ 101function isClaudeProcess(pid: number, expectedExecPath: string): boolean { 102 if (!isProcessRunning(pid)) { 103 return false 104 } 105 106 // If the PID matches our current process, we know it's valid 107 // This handles test environments where the command might not contain 'claude' 108 if (pid === process.pid) { 109 return true 110 } 111 112 try { 113 const command = getProcessCommand(pid) 114 if (!command) { 115 // If we can't get the command, trust the PID check 116 // This is conservative - we'd rather not delete a running version 117 return true 118 } 119 120 // Check if the command contains 'claude' or the expected exec path 121 const normalizedCommand = command.toLowerCase() 122 const normalizedExecPath = expectedExecPath.toLowerCase() 123 124 return ( 125 normalizedCommand.includes('claude') || 126 normalizedCommand.includes(normalizedExecPath) 127 ) 128 } catch { 129 // If command check fails, trust the PID check 130 return true 131 } 132} 133 134/** 135 * Read and parse a lock file's content 136 */ 137export function readLockContent( 138 lockFilePath: string, 139): VersionLockContent | null { 140 const fs = getFsImplementation() 141 142 try { 143 const content = fs.readFileSync(lockFilePath, { encoding: 'utf8' }) 144 if (!content || content.trim() === '') { 145 return null 146 } 147 148 const parsed = jsonParse(content) as VersionLockContent 149 150 // Validate required fields 151 if (typeof parsed.pid !== 'number' || !parsed.version || !parsed.execPath) { 152 return null 153 } 154 155 return parsed 156 } catch { 157 return null 158 } 159} 160 161/** 162 * Check if a lock file represents an active lock (process still running) 163 */ 164export function isLockActive(lockFilePath: string): boolean { 165 const content = readLockContent(lockFilePath) 166 167 if (!content) { 168 return false 169 } 170 171 const { pid, execPath } = content 172 173 // Primary check: is the process running? 174 if (!isProcessRunning(pid)) { 175 return false 176 } 177 178 // Secondary validation: is it actually a Claude process? 179 // This helps with PID reuse scenarios 180 if (!isClaudeProcess(pid, execPath)) { 181 logForDebugging( 182 `Lock PID ${pid} is running but does not appear to be Claude - treating as stale`, 183 ) 184 return false 185 } 186 187 // Fallback: if the lock is very old (> 2 hours) and we can't validate 188 // the command, be conservative and consider it potentially stale 189 // This handles edge cases like network filesystems 190 const fs = getFsImplementation() 191 try { 192 const stats = fs.statSync(lockFilePath) 193 const age = Date.now() - stats.mtimeMs 194 if (age > FALLBACK_STALE_MS) { 195 // Double-check that we can still see the process 196 if (!isProcessRunning(pid)) { 197 return false 198 } 199 } 200 } catch { 201 // If we can't stat the file, trust the PID check 202 } 203 204 return true 205} 206 207/** 208 * Write lock content to a file atomically 209 */ 210function writeLockFile( 211 lockFilePath: string, 212 content: VersionLockContent, 213): void { 214 const fs = getFsImplementation() 215 const tempPath = `${lockFilePath}.tmp.${process.pid}.${Date.now()}` 216 217 try { 218 writeFileSync_DEPRECATED(tempPath, jsonStringify(content, null, 2), { 219 encoding: 'utf8', 220 flush: true, 221 }) 222 fs.renameSync(tempPath, lockFilePath) 223 } catch (error) { 224 // Clean up temp file on failure (best-effort) 225 try { 226 fs.unlinkSync(tempPath) 227 } catch { 228 // Ignore cleanup errors (ENOENT expected if write failed before file creation) 229 } 230 throw error 231 } 232} 233 234/** 235 * Try to acquire a lock on a version file 236 * Returns a release function if successful, null if the lock is already held 237 */ 238export async function tryAcquireLock( 239 versionPath: string, 240 lockFilePath: string, 241): Promise<(() => void) | null> { 242 const fs = getFsImplementation() 243 const versionName = basename(versionPath) 244 245 // Check if there's an existing active lock (including by our own process) 246 // Use isLockActive for consistency with cleanup - it checks both PID running AND 247 // validates it's actually a Claude process (to handle PID reuse scenarios) 248 if (isLockActive(lockFilePath)) { 249 const existingContent = readLockContent(lockFilePath) 250 logForDebugging( 251 `Cannot acquire lock for ${versionName} - held by PID ${existingContent?.pid}`, 252 ) 253 return null 254 } 255 256 // Try to acquire the lock 257 const lockContent: VersionLockContent = { 258 pid: process.pid, 259 version: versionName, 260 execPath: process.execPath, 261 acquiredAt: Date.now(), 262 } 263 264 try { 265 writeLockFile(lockFilePath, lockContent) 266 267 // Verify we actually got the lock (race condition check) 268 const verifyContent = readLockContent(lockFilePath) 269 if (verifyContent?.pid !== process.pid) { 270 // Another process won the race 271 return null 272 } 273 274 logForDebugging(`Acquired PID lock for ${versionName} (PID ${process.pid})`) 275 276 // Return release function 277 return () => { 278 try { 279 // Only release if we still own the lock 280 const currentContent = readLockContent(lockFilePath) 281 if (currentContent?.pid === process.pid) { 282 fs.unlinkSync(lockFilePath) 283 logForDebugging(`Released PID lock for ${versionName}`) 284 } 285 } catch (error) { 286 logForDebugging(`Failed to release lock for ${versionName}: ${error}`) 287 } 288 } 289 } catch (error) { 290 logForDebugging(`Failed to acquire lock for ${versionName}: ${error}`) 291 return null 292 } 293} 294 295/** 296 * Acquire a lock and hold it for the lifetime of the process 297 * This is used for locking the currently running version 298 */ 299export async function acquireProcessLifetimeLock( 300 versionPath: string, 301 lockFilePath: string, 302): Promise<boolean> { 303 const release = await tryAcquireLock(versionPath, lockFilePath) 304 305 if (!release) { 306 return false 307 } 308 309 // Register cleanup on process exit 310 const cleanup = () => { 311 try { 312 release() 313 } catch { 314 // Ignore errors during process exit 315 } 316 } 317 318 process.on('exit', cleanup) 319 process.on('SIGINT', cleanup) 320 process.on('SIGTERM', cleanup) 321 322 // Don't call release() - we want to hold the lock until process exits 323 return true 324} 325 326/** 327 * Execute a callback while holding a lock 328 * Returns true if the callback executed, false if lock couldn't be acquired 329 */ 330export async function withLock( 331 versionPath: string, 332 lockFilePath: string, 333 callback: () => void | Promise<void>, 334): Promise<boolean> { 335 const release = await tryAcquireLock(versionPath, lockFilePath) 336 337 if (!release) { 338 return false 339 } 340 341 try { 342 await callback() 343 return true 344 } finally { 345 release() 346 } 347} 348 349/** 350 * Get information about all version locks for diagnostics 351 */ 352export function getAllLockInfo(locksDir: string): LockInfo[] { 353 const fs = getFsImplementation() 354 const lockInfos: LockInfo[] = [] 355 356 try { 357 const lockFiles = fs 358 .readdirStringSync(locksDir) 359 .filter((f: string) => f.endsWith('.lock')) 360 361 for (const lockFile of lockFiles) { 362 const lockFilePath = join(locksDir, lockFile) 363 const content = readLockContent(lockFilePath) 364 365 if (content) { 366 lockInfos.push({ 367 version: content.version, 368 pid: content.pid, 369 isProcessRunning: isProcessRunning(content.pid), 370 execPath: content.execPath, 371 acquiredAt: new Date(content.acquiredAt), 372 lockFilePath, 373 }) 374 } 375 } 376 } catch (error) { 377 if (isENOENT(error)) { 378 return lockInfos 379 } 380 logError(toError(error)) 381 } 382 383 return lockInfos 384} 385 386/** 387 * Clean up stale locks (locks where the process is no longer running) 388 * Returns the number of locks cleaned up 389 * 390 * Handles both: 391 * - PID-based locks (files containing JSON with PID) 392 * - Legacy proper-lockfile locks (directories created by mtime-based locking) 393 */ 394export function cleanupStaleLocks(locksDir: string): number { 395 const fs = getFsImplementation() 396 let cleanedCount = 0 397 398 try { 399 const lockEntries = fs 400 .readdirStringSync(locksDir) 401 .filter((f: string) => f.endsWith('.lock')) 402 403 for (const lockEntry of lockEntries) { 404 const lockFilePath = join(locksDir, lockEntry) 405 406 try { 407 const stats = fs.lstatSync(lockFilePath) 408 409 if (stats.isDirectory()) { 410 // Legacy proper-lockfile directory lock - always remove when PID-based 411 // locking is enabled since these are from a different locking mechanism 412 fs.rmSync(lockFilePath, { recursive: true, force: true }) 413 cleanedCount++ 414 logForDebugging(`Cleaned up legacy directory lock: ${lockEntry}`) 415 } else if (!isLockActive(lockFilePath)) { 416 // PID-based file lock with no running process 417 fs.unlinkSync(lockFilePath) 418 cleanedCount++ 419 logForDebugging(`Cleaned up stale lock: ${lockEntry}`) 420 } 421 } catch { 422 // Ignore individual cleanup errors 423 } 424 } 425 } catch (error) { 426 if (isENOENT(error)) { 427 return 0 428 } 429 logError(toError(error)) 430 } 431 432 return cleanedCount 433}