this repo has no description
1import { createReadStream } from 'fs';
2import * as readline from 'readline';
3
4import type { ParsedMessage, ParsedSession, SessionStats, ToolUse } from '../types';
5
6// Codex JSONL entry types
7interface CodexEntry {
8 timestamp?: string;
9 type: 'session_meta' | 'event_msg' | 'response_item' | 'turn_context' | 'message' | 'function_call';
10 payload: unknown;
11}
12
13interface CodexSessionMeta {
14 id?: string;
15 cwd?: string;
16 cli_version?: string;
17 model_provider?: string;
18 git?: {
19 branch: string;
20 commit_hash?: string;
21 repository_url?: string;
22 };
23}
24
25interface CodexEventMsg {
26 type: 'user_message' | 'agent_message' | 'agent_reasoning' | 'token_count';
27 message?: string;
28 info?: {
29 total_token_usage?: {
30 input_tokens: number;
31 output_tokens: number;
32 cached_input_tokens?: number;
33 reasoning_output_tokens?: number;
34 };
35 };
36}
37
38interface CodexContentItem {
39 type: string;
40 text?: string;
41}
42
43interface CodexResponseItem {
44 type: 'message' | 'function_call' | 'function_call_output' | 'custom_tool_call' | 'reasoning';
45 role?: string;
46 content?: CodexContentItem[];
47 name?: string;
48 input?: string; // For function_call/custom_tool_call (apply_patch content)
49 arguments?: string; // For function_call (shell args as JSON)
50 call_id?: string;
51 output?: string;
52 status?: string; // For custom_tool_call
53}
54
55/**
56 * Get the "effective date" for a timestamp using a 3am boundary.
57 * Work done before 3am counts as the previous day (aligns with sleep cycle).
58 */
59function getEffectiveDate(timestamp: string): string {
60 const d = new Date(timestamp);
61 d.setHours(d.getHours() - 3);
62 return d.toISOString().split('T')[0];
63}
64
65/**
66 * Stream-parse a Codex JSONL session file
67 */
68async function* parseCodexJSONLStream(filePath: string): AsyncGenerator<CodexEntry> {
69 const rl = readline.createInterface({
70 input: createReadStream(filePath),
71 crlfDelay: Infinity,
72 });
73
74 for await (const line of rl) {
75 if (!line.trim()) continue;
76 try {
77 yield JSON.parse(line) as CodexEntry;
78 } catch {
79 // Skip invalid JSON lines
80 }
81 }
82}
83
84/**
85 * Extract file paths from apply_patch unified diff format
86 * Format: "*** Add File: path" or "*** Update File: path" or "*** Delete File: path"
87 */
88function extractFilesFromPatch(patchContent: string): string[] {
89 const files: string[] = [];
90 const regex = /\*\*\* (?:Add|Update|Delete) File:\s*(.+)/g;
91 let match = regex.exec(patchContent);
92 while (match !== null) {
93 const filePath = match[1].trim();
94 if (filePath !== '' && !files.includes(filePath)) {
95 files.push(filePath);
96 }
97 match = regex.exec(patchContent);
98 }
99 return files;
100}
101
102/**
103 * Map Codex tool names to Claude-equivalent names for consistent tracking
104 */
105function mapCodexToolName(name: string): string {
106 const mapping: Record<string, string> = {
107 shell: 'Bash',
108 shell_command: 'Bash',
109 apply_patch: 'Edit',
110 update_plan: 'TodoWrite',
111 };
112 return mapping[name] ?? name;
113}
114
115interface ShellArgs {
116 command?: unknown;
117}
118
119/**
120 * Summarize tool input for display (truncate long content)
121 */
122function summarizeCodexToolInput(name: string, payload: CodexResponseItem): string {
123 const MAX_LENGTH = 200;
124
125 if ((name === 'shell' || name === 'shell_command') && payload.arguments !== undefined) {
126 try {
127 const args = JSON.parse(payload.arguments) as ShellArgs;
128 let cmd = '';
129 if (Array.isArray(args.command)) {
130 cmd = args.command.join(' ');
131 } else if (
132 typeof args.command === 'string' ||
133 typeof args.command === 'number' ||
134 typeof args.command === 'boolean'
135 ) {
136 cmd = String(args.command);
137 }
138 return truncate(cmd, MAX_LENGTH);
139 } catch {
140 return truncate(payload.arguments, MAX_LENGTH);
141 }
142 }
143
144 if (name === 'apply_patch' && payload.input !== undefined) {
145 // Extract first file path from patch
146 const files = extractFilesFromPatch(payload.input);
147 if (files.length > 0) {
148 const additionalFiles = files.length - 1;
149 return files.length === 1 ? files[0] : `${files[0]} (+${additionalFiles.toString()} more)`;
150 }
151 return truncate(payload.input, MAX_LENGTH);
152 }
153
154 return '';
155}
156
157function truncate(str: string, maxLength: number): string {
158 if (str.length <= maxLength) return str;
159 return str.slice(0, maxLength - 3) + '...';
160}
161
162/**
163 * Extract text content from Codex message content array
164 */
165function extractTextFromContent(content: CodexContentItem[] | undefined): string {
166 if (content === undefined || !Array.isArray(content)) return '';
167 const texts: string[] = [];
168 for (const item of content) {
169 // Handle both new format ('text') and old format ('input_text', 'output_text')
170 if (
171 (item.type === 'text' || item.type === 'input_text' || item.type === 'output_text') &&
172 item.text !== undefined
173 ) {
174 texts.push(item.text);
175 }
176 }
177 return texts.join('\n');
178}
179
180/**
181 * Parse a Codex session file into the unified ParsedSession format
182 */
183export async function parseCodexSessionFile(
184 filePath: string,
185 projectPath: string,
186 projectName: string,
187): Promise<ParsedSession> {
188 const messages: ParsedMessage[] = [];
189 const toolCalls: Record<string, number> = {};
190 let sessionId = '';
191 let gitBranch = '';
192 let startTime = '';
193 let endTime = '';
194 let totalInputTokens = 0;
195 let totalOutputTokens = 0;
196 let userMessages = 0;
197 let assistantMessages = 0;
198
199 const filesChanged = new Set<string>();
200
201 for await (const entry of parseCodexJSONLStream(filePath)) {
202 // Track timestamps (skip entries without timestamps - common in old format)
203 if (entry.timestamp !== undefined && entry.timestamp !== '') {
204 if (startTime === '' || entry.timestamp < startTime) startTime = entry.timestamp;
205 if (endTime === '' || entry.timestamp > endTime) endTime = entry.timestamp;
206 }
207
208 // Handle session_meta (first line) - new format
209 if (entry.type === 'session_meta') {
210 const meta = entry.payload as CodexSessionMeta;
211 sessionId = meta.id ?? '';
212 gitBranch = meta.git !== undefined ? meta.git.branch : '';
213 continue;
214 }
215
216 // Handle old format first line (pre-October 2025): {id, timestamp, git, ...} without type
217 const rawEntry = entry as unknown as Record<string, unknown>;
218 if (rawEntry.id !== undefined && rawEntry.type === undefined && rawEntry.git !== undefined) {
219 sessionId = rawEntry.id as string;
220 const git = rawEntry.git as Record<string, unknown>;
221 gitBranch = git.branch !== undefined ? (git.branch as string) : '';
222 continue;
223 }
224
225 // Handle event_msg (user/assistant text messages)
226 if (entry.type === 'event_msg') {
227 const payload = entry.payload as CodexEventMsg;
228
229 if (payload.type === 'user_message') {
230 userMessages++;
231 messages.push({
232 type: 'user',
233 timestamp: entry.timestamp ?? '',
234 text: payload.message ?? '',
235 toolUses: [],
236 });
237 } else if (payload.type === 'agent_message') {
238 assistantMessages++;
239 messages.push({
240 type: 'assistant',
241 timestamp: entry.timestamp ?? '',
242 text: payload.message ?? '',
243 toolUses: [],
244 });
245 } else if (payload.type === 'token_count' && payload.info?.total_token_usage !== undefined) {
246 // Track final token counts (total_token_usage accumulates)
247 const usage = payload.info.total_token_usage;
248 totalInputTokens = usage.input_tokens + (usage.cached_input_tokens ?? 0);
249 totalOutputTokens = usage.output_tokens + (usage.reasoning_output_tokens ?? 0);
250 }
251 continue;
252 }
253
254 // Handle response_item (function calls and custom tool calls)
255 if (entry.type === 'response_item') {
256 const payload = entry.payload as CodexResponseItem;
257
258 // Function calls and custom tool calls (equivalent to Claude tool_use)
259 if ((payload.type === 'function_call' || payload.type === 'custom_tool_call') && payload.name !== undefined) {
260 const mappedName = mapCodexToolName(payload.name);
261 toolCalls[mappedName] = (toolCalls[mappedName] ?? 0) + 1;
262
263 // Extract files from apply_patch
264 if (payload.name === 'apply_patch' && payload.input !== undefined) {
265 const files = extractFilesFromPatch(payload.input);
266 files.forEach((f) => filesChanged.add(f));
267 }
268
269 const toolUse: ToolUse = {
270 name: mappedName,
271 input: summarizeCodexToolInput(payload.name, payload),
272 rawInput: payload as unknown as Record<string, unknown>,
273 };
274
275 // Add as assistant message with tool use
276 assistantMessages++;
277 messages.push({
278 type: 'assistant',
279 timestamp: entry.timestamp ?? '',
280 text: '',
281 toolUses: [toolUse],
282 });
283 }
284
285 // Agent text messages from response_item
286 if (payload.type === 'message' && payload.role === 'assistant' && payload.content !== undefined) {
287 const text = extractTextFromContent(payload.content);
288 if (text !== '') {
289 assistantMessages++;
290 messages.push({
291 type: 'assistant',
292 timestamp: entry.timestamp ?? '',
293 text,
294 toolUses: [],
295 });
296 }
297 }
298 }
299
300 // Handle old format: top-level function_call (pre-October 2025)
301 // Old format: {"type":"function_call","name":"shell","arguments":"{\"command\":[\"bash\",\"-lc\",\"apply_patch...\"]}"}
302 if (entry.type === 'function_call' && (entry as unknown as Record<string, unknown>).name !== undefined) {
303 const oldEntry = entry as unknown as Record<string, unknown>;
304 const name = oldEntry.name as string;
305 const argsStr = oldEntry.arguments as string | undefined;
306
307 // Check if this is a shell command containing apply_patch
308 if (name === 'shell' && argsStr !== undefined) {
309 try {
310 const args = JSON.parse(argsStr) as ShellArgs;
311 const command = args.command;
312 if (Array.isArray(command) && command.length >= 3) {
313 const shellCmd = command[2] as string;
314 const patchRegex = /apply_patch\s*<<\s*['"]?PATCH['"]?\n([\s\S]*?)\n\s*PATCH/;
315 const patchMatch = patchRegex.exec(shellCmd);
316 if (shellCmd.includes('apply_patch') && patchMatch !== null) {
317 // Extract the patch content from the heredoc
318 toolCalls.Edit = ('Edit' in toolCalls ? toolCalls.Edit : 0) + 1;
319 const files = extractFilesFromPatch(patchMatch[1]);
320 files.forEach((f) => filesChanged.add(f));
321
322 assistantMessages++;
323 messages.push({
324 type: 'assistant',
325 timestamp: (oldEntry.timestamp as string | undefined) ?? '',
326 text: '',
327 toolUses: [
328 {
329 name: 'Edit',
330 input: `apply_patch: ${files.join(', ') !== '' ? files.join(', ') : 'file changes'}`,
331 rawInput: oldEntry,
332 },
333 ],
334 });
335 } else {
336 // Regular shell command
337 toolCalls.Bash = ('Bash' in toolCalls ? toolCalls.Bash : 0) + 1;
338 assistantMessages++;
339 messages.push({
340 type: 'assistant',
341 timestamp: (oldEntry.timestamp as string | undefined) ?? '',
342 text: '',
343 toolUses: [
344 {
345 name: 'Bash',
346 input: shellCmd.substring(0, 100),
347 rawInput: oldEntry,
348 },
349 ],
350 });
351 }
352 }
353 } catch {
354 // Invalid JSON in arguments
355 }
356 }
357 }
358
359 // Handle old format: top-level message (pre-October 2025)
360 // Old format: {"type":"message","role":"user/assistant","content":[{"type":"input_text/output_text","text":"..."}]}
361 if (entry.type === 'message') {
362 const msgEntry = entry as unknown as {
363 type: string;
364 role: string;
365 content?: CodexContentItem[];
366 timestamp?: string;
367 };
368 const text = extractTextFromContent(msgEntry.content);
369
370 // Skip environment_context messages (just contain cwd/approval policy info)
371 if (text !== '' && !text.includes('<environment_context>')) {
372 if (msgEntry.role === 'user') {
373 userMessages++;
374 messages.push({
375 type: 'user',
376 timestamp: msgEntry.timestamp ?? '',
377 text,
378 toolUses: [],
379 });
380 } else if (msgEntry.role === 'assistant') {
381 assistantMessages++;
382 messages.push({
383 type: 'assistant',
384 timestamp: msgEntry.timestamp ?? '',
385 text,
386 toolUses: [],
387 });
388 }
389 }
390 }
391 }
392
393 // Fallback to filename for sessionId
394 if (sessionId === '') {
395 const filename = filePath.split('/').pop();
396 sessionId = filename?.replace('.jsonl', '') ?? 'unknown';
397 }
398
399 // Provide default timestamps if none found
400 const now = new Date().toISOString();
401 if (startTime === '') startTime = now;
402 if (endTime === '') endTime = startTime;
403
404 // Derive date from endTime with 3am boundary
405 const date = getEffectiveDate(endTime);
406
407 const stats: SessionStats = {
408 userMessages,
409 assistantMessages,
410 toolCalls,
411 totalInputTokens,
412 totalOutputTokens,
413 };
414
415 return {
416 sessionId,
417 filePath,
418 projectPath,
419 projectName,
420 gitBranch,
421 startTime,
422 endTime,
423 date,
424 messages,
425 stats,
426 };
427}