this repo has no description
at main 383 lines 12 kB view raw
1import { createHash } from 'crypto'; 2import { existsSync, readFileSync, readdirSync, statSync } from 'fs'; 3import { homedir } from 'os'; 4import { dirname, join } from 'path'; 5 6import type { SessionFile } from '../types'; 7import { findAllCodexSessionFiles } from './codex-detector'; 8import { isFileProcessed } from './db'; 9 10/** 11 * Find the git root for a given path. 12 * Returns the path if it's a git root, or walks up to find one. 13 * Returns null if no git root is found. 14 */ 15export function findGitRoot(path: string): string | null { 16 let current = path; 17 const root = '/'; 18 19 while (current !== root) { 20 if (existsSync(join(current, '.git'))) { 21 return current; 22 } 23 const parent = dirname(current); 24 if (parent === current) break; // Reached filesystem root 25 current = parent; 26 } 27 28 return null; 29} 30 31/** 32 * Get possible Claude config directories 33 */ 34export function getClaudePaths(): string[] { 35 const envPaths = process.env.CLAUDE_CONFIG_DIR?.split(',') ?? []; 36 const defaults = [join(homedir(), '.config', 'claude'), join(homedir(), '.claude')]; 37 38 return [...envPaths, ...defaults].filter((p) => existsSync(join(p, 'projects'))); 39} 40 41/** 42 * Resolve a full encoded path by probing the filesystem. 43 * Handles paths like "Users-sarah/.cache-pdf-to-markdown" where we need to find 44 * where the base directory ends and the project name (with dashes) begins. 45 * 46 * If sessionFilePath is provided and filesystem probing fails, reads the session 47 * file to extract the original cwd. 48 */ 49function resolveFullPath(encodedPath: string, sessionFilePath?: string): string { 50 const parts = encodedPath.split('-'); 51 52 // Try interpretations from left to right 53 // Build up the path, converting dashes to slashes until we find a directory 54 // that contains the rest as a project folder 55 for (let i = parts.length - 1; i >= 1; i--) { 56 const baseParts = parts.slice(0, i); 57 const projectParts = parts.slice(i); 58 59 const basePath = '/' + baseParts.join('/'); 60 const projectName = projectParts.join('-'); 61 62 // Check if basePath exists and contains projectName 63 const fullPath = `${basePath}/${projectName}`; 64 if (existsSync(fullPath)) { 65 return fullPath; 66 } 67 } 68 69 // Filesystem probing failed - try reading cwd from session file 70 if (sessionFilePath !== undefined && existsSync(sessionFilePath)) { 71 const cwd = extractCwdFromSessionFile(sessionFilePath); 72 if (cwd !== null) { 73 return cwd; 74 } 75 } 76 77 // Nothing found - just convert all dashes to slashes 78 return '/' + encodedPath.replace(/-/g, '/'); 79} 80 81/** 82 * Read the first few lines of a session file to extract the cwd. 83 */ 84function extractCwdFromSessionFile(filePath: string): string | null { 85 try { 86 const content = readFileSync(filePath, 'utf-8'); 87 const lines = content.split('\n').slice(0, 10); 88 for (const line of lines) { 89 if (!line.trim()) continue; 90 try { 91 const entry: unknown = JSON.parse(line); 92 if (typeof entry === 'object' && entry !== null && 'cwd' in entry && typeof entry.cwd === 'string') { 93 return entry.cwd; 94 } 95 } catch { 96 // Ignore malformed JSON lines 97 } 98 } 99 } catch { 100 // Ignore file read errors 101 } 102 return null; 103} 104 105/** 106 * Try different interpretations of a dash-separated string by progressively 107 * replacing dashes with slashes from right to left. 108 * 109 * For "taper-calculator-apps-web", tries: 110 * 1. taper-calculator-apps-web (all dashes literal) 111 * 2. taper-calculator-apps/web 112 * 3. taper-calculator/apps/web 113 * 4. taper/calculator/apps/web (all dashes as slashes) 114 * 115 * Returns the first path that exists on the filesystem. 116 */ 117function resolveProjectPath(basePath: string, projectPart: string): string { 118 // Split on dashes 119 const parts = projectPart.split('-'); 120 121 // Try interpretations from "all dashes literal" to "all dashes as slashes" 122 // We iterate by how many trailing parts are split off as directories 123 for (let splitCount = 0; splitCount <= parts.length - 1; splitCount++) { 124 let path: string; 125 126 if (splitCount === 0) { 127 // Keep all dashes - treat entire projectPart as folder name 128 path = `${basePath}/${projectPart}`; 129 } else { 130 // Split the last N parts as subdirectories 131 const projectNameParts = parts.slice(0, parts.length - splitCount); 132 const subdirParts = parts.slice(parts.length - splitCount); 133 const projectName = projectNameParts.join('-'); 134 const subdirs = subdirParts.join('/'); 135 path = `${basePath}/${projectName}/${subdirs}`; 136 } 137 138 if (existsSync(path)) { 139 return path; 140 } 141 } 142 143 // Nothing exists - return the literal interpretation (all dashes preserved) 144 return `${basePath}/${projectPart}`; 145} 146 147/** 148 * Decode project folder name back to path and extract project name. 149 * 150 * Claude encodes paths by replacing / with - but project folders under 151 * src/a/ or src/tries/ may have dashes in their actual names. 152 * 153 * Since the encoding is lossy, we probe the filesystem to find the correct 154 * interpretation, then use git root as the canonical project identity. 155 * 156 * Examples: 157 * -Users-USERNAME-src-a-drink-reminder-native 158 * -> tries: drink-reminder-native (exists!) ✓ 159 * -> path: /Users/USERNAME/src/a/drink-reminder-native 160 * -> name: drink-reminder-native 161 * 162 * -Users-USERNAME-src-a-taper-calculator-apps-web 163 * -> tries: taper-calculator-apps-web (doesn't exist) 164 * -> tries: taper-calculator-apps/web (doesn't exist) 165 * -> tries: taper-calculator/apps/web (exists!) ✓ 166 * -> git root: /Users/USERNAME/src/a/taper-calculator 167 * -> name: taper-calculator 168 */ 169/** 170 * Strip date prefix from project names (common in src/tries/ experiments). 171 * Pattern: "2025-12-15-todo-calendar-adhd" → "todo-calendar-adhd" 172 */ 173function stripDatePrefix(name: string): string { 174 return name.replace(/^\d{4}-\d{2}-\d{2}-/, ''); 175} 176 177export function decodeProjectFolder(folderName: string, sessionFilePath?: string): { path: string; name: string } { 178 // Remove leading dash 179 const withoutLeading = folderName.slice(1); 180 181 // Find the src/a/ or src/tries/ marker 182 const srcAMatch = /^(Users-[^-]+-src-a)-(.+)$/.exec(withoutLeading); 183 const srcTriesMatch = /^(Users-[^-]+-src-tries)-(.+)$/.exec(withoutLeading); 184 185 let decodedPath: string; 186 let isTriesProject = false; 187 188 if (srcAMatch) { 189 const basePath = '/' + srcAMatch[1].replace(/-/g, '/'); 190 const projectPart = srcAMatch[2]; 191 decodedPath = resolveProjectPath(basePath, projectPart); 192 } else if (srcTriesMatch) { 193 const basePath = '/' + srcTriesMatch[1].replace(/-/g, '/'); 194 const projectPart = srcTriesMatch[2]; 195 decodedPath = resolveProjectPath(basePath, projectPart); 196 isTriesProject = true; 197 } else { 198 // Fallback for paths outside src/a/ and src/tries/ 199 // Handle hidden folders: -- encodes /. (e.g., /.cache, /.config) 200 const withHiddenFolders = withoutLeading.replace(/--/g, '/.'); 201 202 // Try to find where the base path ends and project name begins 203 // by probing the filesystem progressively, with session file fallback 204 decodedPath = resolveFullPath(withHiddenFolders, sessionFilePath); 205 } 206 207 // Special case: home directory should show as "~" 208 const homeDir = homedir(); 209 if (decodedPath === homeDir) { 210 return { path: decodedPath, name: '~' }; 211 } 212 213 // Try to find git root to normalize monorepo subdirectories 214 if (existsSync(decodedPath)) { 215 const gitRoot = findGitRoot(decodedPath); 216 if (gitRoot !== null) { 217 let projectName = gitRoot.split('/').pop() ?? 'unknown'; 218 // Strip date prefix from tries projects 219 if (isTriesProject) { 220 projectName = stripDatePrefix(projectName); 221 } 222 return { path: gitRoot, name: projectName }; 223 } 224 } 225 226 // No git root found - use the decoded path as-is 227 let projectName = decodedPath.split('/').pop() ?? 'unknown'; 228 // Strip date prefix from tries projects 229 if (isTriesProject) { 230 projectName = stripDatePrefix(projectName); 231 } 232 return { path: decodedPath, name: projectName }; 233} 234 235/** 236 * @deprecated Use decodeProjectFolder instead 237 */ 238export function decodeProjectPath(folderName: string): string { 239 return decodeProjectFolder(folderName).path; 240} 241 242/** 243 * @deprecated Use decodeProjectFolder instead 244 */ 245export function getProjectName(projectPath: string): string { 246 return projectPath.split('/').filter(Boolean).pop() ?? 'unknown'; 247} 248 249/** 250 * Calculate MD5 hash of a file 251 */ 252export async function getFileHash(filePath: string): Promise<string> { 253 const file = Bun.file(filePath); 254 const content = await file.text(); 255 return createHash('md5').update(content).digest('hex'); 256} 257 258/** 259 * Scan for all session files 260 */ 261export function findAllSessionFiles(): SessionFile[] { 262 const sessions: SessionFile[] = []; 263 264 for (const claudePath of getClaudePaths()) { 265 const projectsDir = join(claudePath, 'projects'); 266 if (!existsSync(projectsDir)) continue; 267 268 const projectFolders = readdirSync(projectsDir); 269 270 for (const folder of projectFolders) { 271 const folderPath = join(projectsDir, folder); 272 const stat = statSync(folderPath); 273 if (!stat.isDirectory()) continue; 274 275 // Find all .jsonl files in this project folder 276 const files = readdirSync(folderPath).filter((f) => f.endsWith('.jsonl')); 277 if (files.length === 0) continue; 278 279 // Use the first session file to help decode the project folder 280 // (provides cwd fallback when the original directory no longer exists) 281 const firstSessionPath = join(folderPath, files[0]); 282 const { path: projectPath, name: projectName } = decodeProjectFolder(folder, firstSessionPath); 283 284 for (const file of files) { 285 const filePath = join(folderPath, file); 286 const fileStat = statSync(filePath); 287 const sessionId = file.replace('.jsonl', ''); 288 289 sessions.push({ 290 path: filePath, 291 projectPath, 292 projectName, 293 sessionId, 294 modifiedAt: fileStat.mtime, 295 fileHash: '', // Computed lazily 296 source: 'claude', 297 }); 298 } 299 } 300 } 301 302 // Sort by modification time (newest first) 303 sessions.sort((a, b) => b.modifiedAt.getTime() - a.modifiedAt.getTime()); 304 305 return sessions; 306} 307 308/** 309 * Find unprocessed or modified session files 310 */ 311export async function findUnprocessedSessions(force = false): Promise<SessionFile[]> { 312 const allSessions = findAllSessions(); 313 314 if (force) { 315 // Compute hashes for all files 316 for (const session of allSessions) { 317 session.fileHash = await computeFileHash(session.path); 318 } 319 return allSessions; 320 } 321 322 const unprocessed: SessionFile[] = []; 323 324 for (const session of allSessions) { 325 const hash = await computeFileHash(session.path); 326 session.fileHash = hash; 327 328 if (!isFileProcessed(session.path, hash)) { 329 unprocessed.push(session); 330 } 331 } 332 333 return unprocessed; 334} 335 336/** 337 * Compute file hash asynchronously 338 */ 339async function computeFileHash(filePath: string): Promise<string> { 340 const file = Bun.file(filePath); 341 const content = await file.text(); 342 return createHash('md5').update(content).digest('hex'); 343} 344 345/** 346 * Filter sessions by date 347 */ 348export function filterSessionsByDate(sessions: SessionFile[], _targetDate: string): SessionFile[] { 349 // We need to peek into files to check dates, but that's expensive 350 // For now, return all and let the processor filter 351 return sessions; 352} 353 354/** 355 * Find all sessions from all supported sources (Claude + Codex) 356 */ 357export function findAllSessions(): SessionFile[] { 358 const claudeSessions = findAllSessionFiles(); 359 const codexSessions = findAllCodexSessionFiles(); 360 361 const all = [...claudeSessions, ...codexSessions]; 362 all.sort((a, b) => b.modifiedAt.getTime() - a.modifiedAt.getTime()); 363 364 return all; 365} 366 367/** 368 * Get stats about session files 369 */ 370export function getSessionStats(): { 371 totalFiles: number; 372 totalProjects: number; 373 claudePaths: string[]; 374} { 375 const allSessions = findAllSessions(); 376 const projects = new Set(allSessions.map((s) => s.projectPath)); 377 378 return { 379 totalFiles: allSessions.length, 380 totalProjects: projects.size, 381 claudePaths: getClaudePaths(), 382 }; 383}