this repo has no description
1import { createReadStream } from 'fs';
2import * as readline from 'readline';
3import type {
4 RawSessionEntry,
5 ParsedSession,
6 ParsedMessage,
7 ToolUse,
8 SessionStats,
9 MessageContent,
10} from '../types';
11
12/**
13 * Stream-parse a JSONL session file
14 */
15export async function* parseJSONLStream(
16 filePath: string
17): AsyncGenerator<RawSessionEntry> {
18 const rl = readline.createInterface({
19 input: createReadStream(filePath),
20 crlfDelay: Infinity,
21 });
22
23 for await (const line of rl) {
24 if (!line.trim()) continue;
25 try {
26 yield JSON.parse(line) as RawSessionEntry;
27 } catch {
28 // Skip invalid JSON lines
29 }
30 }
31}
32
33/**
34 * Parse a session file into a structured format
35 */
36export async function parseSessionFile(
37 filePath: string,
38 projectPath: string,
39 projectName: string
40): Promise<ParsedSession> {
41 const messages: ParsedMessage[] = [];
42 const toolCalls: Record<string, number> = {};
43 let sessionId = '';
44 let gitBranch = '';
45 let startTime = '';
46 let endTime = '';
47 let totalInputTokens = 0;
48 let totalOutputTokens = 0;
49 let userMessages = 0;
50 let assistantMessages = 0;
51
52 const seen = new Set<string>();
53
54 for await (const entry of parseJSONLStream(filePath)) {
55 // Deduplication - use uuid (unique per chunk) not message.id (same across streaming chunks)
56 if (seen.has(entry.uuid)) continue;
57 seen.add(entry.uuid);
58
59 // Extract metadata from first entry
60 if (!sessionId && entry.sessionId) {
61 sessionId = entry.sessionId;
62 }
63 if (!gitBranch && entry.gitBranch) {
64 gitBranch = entry.gitBranch;
65 }
66
67 // Track timestamps
68 if (!startTime || entry.timestamp < startTime) {
69 startTime = entry.timestamp;
70 }
71 if (!endTime || entry.timestamp > endTime) {
72 endTime = entry.timestamp;
73 }
74
75 // Extract token usage from assistant messages
76 if (entry.type === 'assistant' && entry.message?.usage) {
77 const usage = entry.message.usage;
78 totalInputTokens += usage.input_tokens || 0;
79 totalOutputTokens += usage.output_tokens || 0;
80 totalInputTokens += usage.cache_creation_input_tokens || 0;
81 totalInputTokens += usage.cache_read_input_tokens || 0;
82 }
83
84 // Parse message content
85 const text = extractText(entry.message?.content);
86 const toolUses = extractToolUses(entry.message?.content);
87
88 // Count tool calls
89 for (const tool of toolUses) {
90 toolCalls[tool.name] = (toolCalls[tool.name] || 0) + 1;
91 }
92
93 if (entry.type === 'user') userMessages++;
94 if (entry.type === 'assistant') assistantMessages++;
95
96 messages.push({
97 type: entry.type,
98 timestamp: entry.timestamp,
99 text,
100 toolUses,
101 });
102 }
103
104 // Use filename as sessionId fallback
105 if (!sessionId) {
106 sessionId = filePath.split('/').pop()?.replace('.jsonl', '') || 'unknown';
107 }
108
109 // Provide default timestamps if none found
110 const now = new Date().toISOString();
111 if (!startTime) {
112 startTime = now;
113 }
114 if (!endTime) {
115 endTime = startTime;
116 }
117
118 // Derive date from startTime
119 const date = startTime.split('T')[0];
120
121 const stats: SessionStats = {
122 userMessages,
123 assistantMessages,
124 toolCalls,
125 totalInputTokens,
126 totalOutputTokens,
127 };
128
129 return {
130 sessionId,
131 filePath,
132 projectPath,
133 projectName,
134 gitBranch,
135 startTime,
136 endTime,
137 date,
138 messages,
139 stats,
140 };
141}
142
143/**
144 * Extract text from message content array
145 */
146function extractText(content: MessageContent[] | undefined): string {
147 if (!content || !Array.isArray(content)) return '';
148
149 const texts: string[] = [];
150 for (const item of content) {
151 if (item.type === 'text') {
152 // Handle both formats: { text: "..." } and { content: "..." }
153 const text = 'text' in item ? item.text : 'content' in item ? item.content : '';
154 if (text) texts.push(text);
155 }
156 }
157 return texts.join('\n');
158}
159
160/**
161 * Extract tool uses from message content
162 */
163function extractToolUses(content: MessageContent[] | undefined): ToolUse[] {
164 if (!content || !Array.isArray(content)) return [];
165
166 const tools: ToolUse[] = [];
167 for (const item of content) {
168 if (item.type === 'tool_use') {
169 tools.push({
170 name: item.name,
171 input: summarizeToolInput(item.name, item.input),
172 rawInput: item.input,
173 });
174 }
175 }
176 return tools;
177}
178
179/**
180 * Summarize tool input for display (truncate long content)
181 */
182function summarizeToolInput(
183 toolName: string,
184 input: Record<string, unknown>
185): string {
186 const MAX_LENGTH = 200;
187
188 switch (toolName) {
189 case 'Bash':
190 return truncate(String(input.command || ''), MAX_LENGTH);
191 case 'Read':
192 return truncate(String(input.file_path || ''), MAX_LENGTH);
193 case 'Write':
194 case 'Edit':
195 return truncate(String(input.file_path || ''), MAX_LENGTH);
196 case 'Glob':
197 return truncate(String(input.pattern || ''), MAX_LENGTH);
198 case 'Grep':
199 return truncate(String(input.pattern || ''), MAX_LENGTH);
200 case 'Task':
201 return truncate(String(input.description || ''), MAX_LENGTH);
202 default:
203 return truncate(JSON.stringify(input), MAX_LENGTH);
204 }
205}
206
207function truncate(str: string, maxLength: number): string {
208 if (str.length <= maxLength) return str;
209 return str.slice(0, maxLength - 3) + '...';
210}
211
212/**
213 * Work type classification based on files changed
214 */
215export type WorkType = 'feature' | 'infrastructure' | 'tests' | 'docs' | 'mixed';
216
217export interface WorkScope {
218 frontend: number;
219 backend: number;
220 tests: number;
221 types: number;
222 config: number;
223 docs: number;
224}
225
226export interface WorkClassification {
227 type: WorkType;
228 signals: string[]; // Human-readable explanation of why
229 scope: WorkScope;
230 scopeSummary: string; // e.g., "frontend, backend" or "tests"
231}
232
233/**
234 * Check if a file path looks like frontend code
235 */
236function isFrontend(file: string): boolean {
237 const lower = file.toLowerCase();
238 return (
239 lower.includes('/components/') ||
240 lower.includes('/pages/') ||
241 lower.includes('/screens/') ||
242 lower.includes('/views/') ||
243 lower.includes('/ui/') ||
244 lower.includes('/app/') ||
245 lower.includes('/apps/web/') ||
246 lower.includes('/web/') ||
247 lower.includes('/frontend/') ||
248 lower.includes('/client/') ||
249 lower.endsWith('.tsx') ||
250 lower.endsWith('.jsx') ||
251 lower.endsWith('.css') ||
252 lower.endsWith('.scss')
253 );
254}
255
256/**
257 * Check if a file path looks like backend code
258 */
259function isBackend(file: string): boolean {
260 const lower = file.toLowerCase();
261 return (
262 lower.includes('/api/') ||
263 lower.includes('/server/') ||
264 lower.includes('/services/') ||
265 lower.includes('/lib/') ||
266 lower.includes('/core/') ||
267 lower.includes('/packages/') ||
268 lower.includes('/backend/') ||
269 lower.includes('/handlers/') ||
270 lower.includes('/routes/') ||
271 lower.includes('/controllers/') ||
272 lower.includes('/models/') ||
273 lower.includes('/utils/') ||
274 (lower.endsWith('.ts') && !lower.endsWith('.test.ts') && !lower.endsWith('.spec.ts') && !lower.endsWith('.d.ts') && !isFrontend(file))
275 );
276}
277
278/**
279 * Classify the type of work based on file paths
280 */
281export function classifyWork(files: string[]): WorkClassification {
282 const signals: string[] = [];
283 const scope: WorkScope = {
284 frontend: 0,
285 backend: 0,
286 tests: 0,
287 types: 0,
288 config: 0,
289 docs: 0,
290 };
291
292 let featureFiles = 0;
293
294 for (const file of files) {
295 const lower = file.toLowerCase();
296 const filename = file.split('/').pop() || '';
297
298 // Tests
299 if (
300 lower.includes('.test.') ||
301 lower.includes('.spec.') ||
302 lower.includes('__tests__') ||
303 lower.includes('/test/') ||
304 lower.includes('/tests/')
305 ) {
306 scope.tests++;
307 continue;
308 }
309
310 // Types/interfaces
311 if (
312 filename === 'types.ts' ||
313 filename === 'interfaces.ts' ||
314 lower.endsWith('.d.ts') ||
315 lower.includes('/types/') ||
316 lower.includes('/interfaces/')
317 ) {
318 scope.types++;
319 continue;
320 }
321
322 // Config/devops
323 if (
324 lower.includes('.config.') ||
325 lower.includes('/config/') ||
326 lower.includes('.github/') ||
327 lower.includes('dockerfile') ||
328 lower.includes('.yml') ||
329 lower.includes('.yaml') ||
330 filename.startsWith('.') ||
331 filename === 'package.json' ||
332 filename === 'tsconfig.json'
333 ) {
334 scope.config++;
335 continue;
336 }
337
338 // Docs
339 if (
340 lower.endsWith('.md') ||
341 lower.includes('/docs/') ||
342 lower.includes('/documentation/')
343 ) {
344 scope.docs++;
345 continue;
346 }
347
348 // Feature work - classify as frontend or backend
349 featureFiles++;
350 if (isFrontend(file)) {
351 scope.frontend++;
352 } else if (isBackend(file)) {
353 scope.backend++;
354 } else {
355 // Default to backend for unclassified .ts files
356 scope.backend++;
357 }
358 }
359
360 const total = files.length;
361 if (total === 0) {
362 return {
363 type: 'mixed',
364 signals: ['no files changed'],
365 scope,
366 scopeSummary: '',
367 };
368 }
369
370 // Build scope summary - simplified to frontend/backend/both
371 let scopeSummary = '';
372 const hasFrontend = scope.frontend > 0;
373 const hasBackend = scope.backend > 0;
374 if (hasFrontend && hasBackend) {
375 scopeSummary = 'frontend, backend';
376 } else if (hasFrontend) {
377 scopeSummary = 'frontend';
378 } else if (hasBackend) {
379 scopeSummary = 'backend';
380 } else if (scope.tests > 0) {
381 scopeSummary = 'tests';
382 } else if (scope.docs > 0) {
383 scopeSummary = 'docs';
384 } else if (scope.config > 0) {
385 scopeSummary = 'config';
386 }
387
388 // Determine primary type (>50% of files)
389 const threshold = total * 0.5;
390
391 if (scope.tests > threshold) {
392 signals.push(`${scope.tests}/${total} files are tests`);
393 return { type: 'tests', signals, scope, scopeSummary };
394 }
395
396 if (scope.docs > threshold) {
397 signals.push(`${scope.docs}/${total} files are documentation`);
398 return { type: 'docs', signals, scope, scopeSummary };
399 }
400
401 if (scope.types + scope.config > threshold) {
402 if (scope.types > scope.config) {
403 signals.push(`${scope.types}/${total} files are types`);
404 } else {
405 signals.push(`${scope.config}/${total} files are config`);
406 }
407 return { type: 'infrastructure', signals, scope, scopeSummary };
408 }
409
410 if (featureFiles > threshold) {
411 signals.push(`${featureFiles}/${total} files are feature code`);
412 return { type: 'feature', signals, scope, scopeSummary };
413 }
414
415 // Mixed - build a description
416 if (featureFiles > 0) signals.push(`${featureFiles} feature`);
417 if (scope.tests > 0) signals.push(`${scope.tests} test`);
418 if (scope.types > 0) signals.push(`${scope.types} type`);
419 if (scope.config > 0) signals.push(`${scope.config} config`);
420 if (scope.docs > 0) signals.push(`${scope.docs} doc`);
421
422 return { type: 'mixed', signals, scope, scopeSummary };
423}
424
425/**
426 * Create a condensed transcript for LLM summarization
427 * Leads with action summary (files changed) to ensure implementation work is captured
428 */
429export function createCondensedTranscript(session: ParsedSession): string {
430 const parts: string[] = [];
431
432 parts.push(`Project: ${session.projectName}`);
433 if (session.gitBranch) {
434 parts.push(`Branch: ${session.gitBranch}`);
435 }
436 parts.push(`Duration: ${formatDuration(session.startTime, session.endTime)}`);
437 parts.push('');
438
439 // LEAD with files changed - this is the most important signal of actual work
440 const filesWritten: string[] = [];
441 const filesEdited: string[] = [];
442 const commandsRun: string[] = [];
443
444 for (const msg of session.messages) {
445 if (msg.type === 'assistant') {
446 for (const tool of msg.toolUses) {
447 if (tool.name === 'Write') {
448 const path = String((tool.rawInput as any)?.file_path || '');
449 if (path && !filesWritten.includes(path)) {
450 filesWritten.push(path);
451 }
452 } else if (tool.name === 'Edit') {
453 const path = String((tool.rawInput as any)?.file_path || '');
454 if (path && !filesEdited.includes(path)) {
455 filesEdited.push(path);
456 }
457 } else if (tool.name === 'Bash') {
458 const cmd = String((tool.rawInput as any)?.command || '').slice(0, 100);
459 if (cmd && commandsRun.length < 10) {
460 commandsRun.push(cmd);
461 }
462 }
463 }
464 }
465 }
466
467 // Classify the work based on file paths
468 const allFiles = [...filesWritten, ...filesEdited];
469 const classification = classifyWork(allFiles);
470 parts.push(`WORK TYPE: ${classification.type}`);
471 if (classification.scopeSummary) {
472 parts.push(`SCOPE: ${classification.scopeSummary}`);
473 }
474 parts.push('');
475
476 // Show action summary at the TOP
477 if (filesWritten.length > 0) {
478 parts.push(`FILES CREATED (${filesWritten.length}):`);
479 filesWritten.slice(0, 15).forEach(f => parts.push(` - ${f}`));
480 if (filesWritten.length > 15) parts.push(` ... and ${filesWritten.length - 15} more`);
481 parts.push('');
482 }
483
484 if (filesEdited.length > 0) {
485 parts.push(`FILES EDITED (${filesEdited.length}):`);
486 filesEdited.slice(0, 15).forEach(f => parts.push(` - ${f}`));
487 if (filesEdited.length > 15) parts.push(` ... and ${filesEdited.length - 15} more`);
488 parts.push('');
489 }
490
491 if (commandsRun.length > 0) {
492 parts.push(`COMMANDS RUN (${commandsRun.length}):`);
493 commandsRun.slice(0, 5).forEach(c => parts.push(` $ ${c}`));
494 parts.push('');
495 }
496
497 // Then show conversation context (but less of it)
498 parts.push('CONVERSATION:');
499 let messageCount = 0;
500 for (const msg of session.messages) {
501 if (messageCount > 20) break; // Limit to avoid overwhelming
502
503 if (msg.type === 'user' && msg.text) {
504 const text = msg.text.slice(0, 300);
505 parts.push(`User: ${text}`);
506 messageCount++;
507 } else if (msg.type === 'assistant' && msg.text) {
508 const text = msg.text.slice(0, 200);
509 parts.push(`Assistant: ${text}`);
510 messageCount++;
511 }
512 }
513
514 // Add stats at end
515 parts.push('');
516 const toolSummary = Object.entries(session.stats.toolCalls)
517 .sort((a, b) => b[1] - a[1])
518 .slice(0, 10)
519 .map(([name, count]) => `${name}(${count})`)
520 .join(', ');
521 if (toolSummary) {
522 parts.push(`Tool usage: ${toolSummary}`);
523 }
524
525 return parts.join('\n');
526}
527
528function formatDuration(start: string, end: string): string {
529 if (!start || !end) return 'unknown';
530
531 const startDate = new Date(start);
532 const endDate = new Date(end);
533 const diffMs = endDate.getTime() - startDate.getTime();
534
535 const minutes = Math.floor(diffMs / 60000);
536 if (minutes < 60) return `${minutes} min`;
537
538 const hours = Math.floor(minutes / 60);
539 const remainingMinutes = minutes % 60;
540 return `${hours}h ${remainingMinutes}m`;
541}