this repo has no description
at main 427 lines 14 kB view raw
1import { createReadStream } from 'fs'; 2import * as readline from 'readline'; 3 4import type { ParsedMessage, ParsedSession, SessionStats, ToolUse } from '../types'; 5 6// Codex JSONL entry types 7interface CodexEntry { 8 timestamp?: string; 9 type: 'session_meta' | 'event_msg' | 'response_item' | 'turn_context' | 'message' | 'function_call'; 10 payload: unknown; 11} 12 13interface CodexSessionMeta { 14 id?: string; 15 cwd?: string; 16 cli_version?: string; 17 model_provider?: string; 18 git?: { 19 branch: string; 20 commit_hash?: string; 21 repository_url?: string; 22 }; 23} 24 25interface CodexEventMsg { 26 type: 'user_message' | 'agent_message' | 'agent_reasoning' | 'token_count'; 27 message?: string; 28 info?: { 29 total_token_usage?: { 30 input_tokens: number; 31 output_tokens: number; 32 cached_input_tokens?: number; 33 reasoning_output_tokens?: number; 34 }; 35 }; 36} 37 38interface CodexContentItem { 39 type: string; 40 text?: string; 41} 42 43interface CodexResponseItem { 44 type: 'message' | 'function_call' | 'function_call_output' | 'custom_tool_call' | 'reasoning'; 45 role?: string; 46 content?: CodexContentItem[]; 47 name?: string; 48 input?: string; // For function_call/custom_tool_call (apply_patch content) 49 arguments?: string; // For function_call (shell args as JSON) 50 call_id?: string; 51 output?: string; 52 status?: string; // For custom_tool_call 53} 54 55/** 56 * Get the "effective date" for a timestamp using a 3am boundary. 57 * Work done before 3am counts as the previous day (aligns with sleep cycle). 58 */ 59function getEffectiveDate(timestamp: string): string { 60 const d = new Date(timestamp); 61 d.setHours(d.getHours() - 3); 62 return d.toISOString().split('T')[0]; 63} 64 65/** 66 * Stream-parse a Codex JSONL session file 67 */ 68async function* parseCodexJSONLStream(filePath: string): AsyncGenerator<CodexEntry> { 69 const rl = readline.createInterface({ 70 input: createReadStream(filePath), 71 crlfDelay: Infinity, 72 }); 73 74 for await (const line of rl) { 75 if (!line.trim()) continue; 76 try { 77 yield JSON.parse(line) as CodexEntry; 78 } catch { 79 // Skip invalid JSON lines 80 } 81 } 82} 83 84/** 85 * Extract file paths from apply_patch unified diff format 86 * Format: "*** Add File: path" or "*** Update File: path" or "*** Delete File: path" 87 */ 88function extractFilesFromPatch(patchContent: string): string[] { 89 const files: string[] = []; 90 const regex = /\*\*\* (?:Add|Update|Delete) File:\s*(.+)/g; 91 let match = regex.exec(patchContent); 92 while (match !== null) { 93 const filePath = match[1].trim(); 94 if (filePath !== '' && !files.includes(filePath)) { 95 files.push(filePath); 96 } 97 match = regex.exec(patchContent); 98 } 99 return files; 100} 101 102/** 103 * Map Codex tool names to Claude-equivalent names for consistent tracking 104 */ 105function mapCodexToolName(name: string): string { 106 const mapping: Record<string, string> = { 107 shell: 'Bash', 108 shell_command: 'Bash', 109 apply_patch: 'Edit', 110 update_plan: 'TodoWrite', 111 }; 112 return mapping[name] ?? name; 113} 114 115interface ShellArgs { 116 command?: unknown; 117} 118 119/** 120 * Summarize tool input for display (truncate long content) 121 */ 122function summarizeCodexToolInput(name: string, payload: CodexResponseItem): string { 123 const MAX_LENGTH = 200; 124 125 if ((name === 'shell' || name === 'shell_command') && payload.arguments !== undefined) { 126 try { 127 const args = JSON.parse(payload.arguments) as ShellArgs; 128 let cmd = ''; 129 if (Array.isArray(args.command)) { 130 cmd = args.command.join(' '); 131 } else if ( 132 typeof args.command === 'string' || 133 typeof args.command === 'number' || 134 typeof args.command === 'boolean' 135 ) { 136 cmd = String(args.command); 137 } 138 return truncate(cmd, MAX_LENGTH); 139 } catch { 140 return truncate(payload.arguments, MAX_LENGTH); 141 } 142 } 143 144 if (name === 'apply_patch' && payload.input !== undefined) { 145 // Extract first file path from patch 146 const files = extractFilesFromPatch(payload.input); 147 if (files.length > 0) { 148 const additionalFiles = files.length - 1; 149 return files.length === 1 ? files[0] : `${files[0]} (+${additionalFiles.toString()} more)`; 150 } 151 return truncate(payload.input, MAX_LENGTH); 152 } 153 154 return ''; 155} 156 157function truncate(str: string, maxLength: number): string { 158 if (str.length <= maxLength) return str; 159 return str.slice(0, maxLength - 3) + '...'; 160} 161 162/** 163 * Extract text content from Codex message content array 164 */ 165function extractTextFromContent(content: CodexContentItem[] | undefined): string { 166 if (content === undefined || !Array.isArray(content)) return ''; 167 const texts: string[] = []; 168 for (const item of content) { 169 // Handle both new format ('text') and old format ('input_text', 'output_text') 170 if ( 171 (item.type === 'text' || item.type === 'input_text' || item.type === 'output_text') && 172 item.text !== undefined 173 ) { 174 texts.push(item.text); 175 } 176 } 177 return texts.join('\n'); 178} 179 180/** 181 * Parse a Codex session file into the unified ParsedSession format 182 */ 183export async function parseCodexSessionFile( 184 filePath: string, 185 projectPath: string, 186 projectName: string, 187): Promise<ParsedSession> { 188 const messages: ParsedMessage[] = []; 189 const toolCalls: Record<string, number> = {}; 190 let sessionId = ''; 191 let gitBranch = ''; 192 let startTime = ''; 193 let endTime = ''; 194 let totalInputTokens = 0; 195 let totalOutputTokens = 0; 196 let userMessages = 0; 197 let assistantMessages = 0; 198 199 const filesChanged = new Set<string>(); 200 201 for await (const entry of parseCodexJSONLStream(filePath)) { 202 // Track timestamps (skip entries without timestamps - common in old format) 203 if (entry.timestamp !== undefined && entry.timestamp !== '') { 204 if (startTime === '' || entry.timestamp < startTime) startTime = entry.timestamp; 205 if (endTime === '' || entry.timestamp > endTime) endTime = entry.timestamp; 206 } 207 208 // Handle session_meta (first line) - new format 209 if (entry.type === 'session_meta') { 210 const meta = entry.payload as CodexSessionMeta; 211 sessionId = meta.id ?? ''; 212 gitBranch = meta.git !== undefined ? meta.git.branch : ''; 213 continue; 214 } 215 216 // Handle old format first line (pre-October 2025): {id, timestamp, git, ...} without type 217 const rawEntry = entry as unknown as Record<string, unknown>; 218 if (rawEntry.id !== undefined && rawEntry.type === undefined && rawEntry.git !== undefined) { 219 sessionId = rawEntry.id as string; 220 const git = rawEntry.git as Record<string, unknown>; 221 gitBranch = git.branch !== undefined ? (git.branch as string) : ''; 222 continue; 223 } 224 225 // Handle event_msg (user/assistant text messages) 226 if (entry.type === 'event_msg') { 227 const payload = entry.payload as CodexEventMsg; 228 229 if (payload.type === 'user_message') { 230 userMessages++; 231 messages.push({ 232 type: 'user', 233 timestamp: entry.timestamp ?? '', 234 text: payload.message ?? '', 235 toolUses: [], 236 }); 237 } else if (payload.type === 'agent_message') { 238 assistantMessages++; 239 messages.push({ 240 type: 'assistant', 241 timestamp: entry.timestamp ?? '', 242 text: payload.message ?? '', 243 toolUses: [], 244 }); 245 } else if (payload.type === 'token_count' && payload.info?.total_token_usage !== undefined) { 246 // Track final token counts (total_token_usage accumulates) 247 const usage = payload.info.total_token_usage; 248 totalInputTokens = usage.input_tokens + (usage.cached_input_tokens ?? 0); 249 totalOutputTokens = usage.output_tokens + (usage.reasoning_output_tokens ?? 0); 250 } 251 continue; 252 } 253 254 // Handle response_item (function calls and custom tool calls) 255 if (entry.type === 'response_item') { 256 const payload = entry.payload as CodexResponseItem; 257 258 // Function calls and custom tool calls (equivalent to Claude tool_use) 259 if ((payload.type === 'function_call' || payload.type === 'custom_tool_call') && payload.name !== undefined) { 260 const mappedName = mapCodexToolName(payload.name); 261 toolCalls[mappedName] = (toolCalls[mappedName] ?? 0) + 1; 262 263 // Extract files from apply_patch 264 if (payload.name === 'apply_patch' && payload.input !== undefined) { 265 const files = extractFilesFromPatch(payload.input); 266 files.forEach((f) => filesChanged.add(f)); 267 } 268 269 const toolUse: ToolUse = { 270 name: mappedName, 271 input: summarizeCodexToolInput(payload.name, payload), 272 rawInput: payload as unknown as Record<string, unknown>, 273 }; 274 275 // Add as assistant message with tool use 276 assistantMessages++; 277 messages.push({ 278 type: 'assistant', 279 timestamp: entry.timestamp ?? '', 280 text: '', 281 toolUses: [toolUse], 282 }); 283 } 284 285 // Agent text messages from response_item 286 if (payload.type === 'message' && payload.role === 'assistant' && payload.content !== undefined) { 287 const text = extractTextFromContent(payload.content); 288 if (text !== '') { 289 assistantMessages++; 290 messages.push({ 291 type: 'assistant', 292 timestamp: entry.timestamp ?? '', 293 text, 294 toolUses: [], 295 }); 296 } 297 } 298 } 299 300 // Handle old format: top-level function_call (pre-October 2025) 301 // Old format: {"type":"function_call","name":"shell","arguments":"{\"command\":[\"bash\",\"-lc\",\"apply_patch...\"]}"} 302 if (entry.type === 'function_call' && (entry as unknown as Record<string, unknown>).name !== undefined) { 303 const oldEntry = entry as unknown as Record<string, unknown>; 304 const name = oldEntry.name as string; 305 const argsStr = oldEntry.arguments as string | undefined; 306 307 // Check if this is a shell command containing apply_patch 308 if (name === 'shell' && argsStr !== undefined) { 309 try { 310 const args = JSON.parse(argsStr) as ShellArgs; 311 const command = args.command; 312 if (Array.isArray(command) && command.length >= 3) { 313 const shellCmd = command[2] as string; 314 const patchRegex = /apply_patch\s*<<\s*['"]?PATCH['"]?\n([\s\S]*?)\n\s*PATCH/; 315 const patchMatch = patchRegex.exec(shellCmd); 316 if (shellCmd.includes('apply_patch') && patchMatch !== null) { 317 // Extract the patch content from the heredoc 318 toolCalls.Edit = ('Edit' in toolCalls ? toolCalls.Edit : 0) + 1; 319 const files = extractFilesFromPatch(patchMatch[1]); 320 files.forEach((f) => filesChanged.add(f)); 321 322 assistantMessages++; 323 messages.push({ 324 type: 'assistant', 325 timestamp: (oldEntry.timestamp as string | undefined) ?? '', 326 text: '', 327 toolUses: [ 328 { 329 name: 'Edit', 330 input: `apply_patch: ${files.join(', ') !== '' ? files.join(', ') : 'file changes'}`, 331 rawInput: oldEntry, 332 }, 333 ], 334 }); 335 } else { 336 // Regular shell command 337 toolCalls.Bash = ('Bash' in toolCalls ? toolCalls.Bash : 0) + 1; 338 assistantMessages++; 339 messages.push({ 340 type: 'assistant', 341 timestamp: (oldEntry.timestamp as string | undefined) ?? '', 342 text: '', 343 toolUses: [ 344 { 345 name: 'Bash', 346 input: shellCmd.substring(0, 100), 347 rawInput: oldEntry, 348 }, 349 ], 350 }); 351 } 352 } 353 } catch { 354 // Invalid JSON in arguments 355 } 356 } 357 } 358 359 // Handle old format: top-level message (pre-October 2025) 360 // Old format: {"type":"message","role":"user/assistant","content":[{"type":"input_text/output_text","text":"..."}]} 361 if (entry.type === 'message') { 362 const msgEntry = entry as unknown as { 363 type: string; 364 role: string; 365 content?: CodexContentItem[]; 366 timestamp?: string; 367 }; 368 const text = extractTextFromContent(msgEntry.content); 369 370 // Skip environment_context messages (just contain cwd/approval policy info) 371 if (text !== '' && !text.includes('<environment_context>')) { 372 if (msgEntry.role === 'user') { 373 userMessages++; 374 messages.push({ 375 type: 'user', 376 timestamp: msgEntry.timestamp ?? '', 377 text, 378 toolUses: [], 379 }); 380 } else if (msgEntry.role === 'assistant') { 381 assistantMessages++; 382 messages.push({ 383 type: 'assistant', 384 timestamp: msgEntry.timestamp ?? '', 385 text, 386 toolUses: [], 387 }); 388 } 389 } 390 } 391 } 392 393 // Fallback to filename for sessionId 394 if (sessionId === '') { 395 const filename = filePath.split('/').pop(); 396 sessionId = filename?.replace('.jsonl', '') ?? 'unknown'; 397 } 398 399 // Provide default timestamps if none found 400 const now = new Date().toISOString(); 401 if (startTime === '') startTime = now; 402 if (endTime === '') endTime = startTime; 403 404 // Derive date from endTime with 3am boundary 405 const date = getEffectiveDate(endTime); 406 407 const stats: SessionStats = { 408 userMessages, 409 assistantMessages, 410 toolCalls, 411 totalInputTokens, 412 totalOutputTokens, 413 }; 414 415 return { 416 sessionId, 417 filePath, 418 projectPath, 419 projectName, 420 gitBranch, 421 startTime, 422 endTime, 423 date, 424 messages, 425 stats, 426 }; 427}