this repo has no description
at experiment/session-classification 541 lines 15 kB view raw
1import { createReadStream } from 'fs'; 2import * as readline from 'readline'; 3import type { 4 RawSessionEntry, 5 ParsedSession, 6 ParsedMessage, 7 ToolUse, 8 SessionStats, 9 MessageContent, 10} from '../types'; 11 12/** 13 * Stream-parse a JSONL session file 14 */ 15export async function* parseJSONLStream( 16 filePath: string 17): AsyncGenerator<RawSessionEntry> { 18 const rl = readline.createInterface({ 19 input: createReadStream(filePath), 20 crlfDelay: Infinity, 21 }); 22 23 for await (const line of rl) { 24 if (!line.trim()) continue; 25 try { 26 yield JSON.parse(line) as RawSessionEntry; 27 } catch { 28 // Skip invalid JSON lines 29 } 30 } 31} 32 33/** 34 * Parse a session file into a structured format 35 */ 36export async function parseSessionFile( 37 filePath: string, 38 projectPath: string, 39 projectName: string 40): Promise<ParsedSession> { 41 const messages: ParsedMessage[] = []; 42 const toolCalls: Record<string, number> = {}; 43 let sessionId = ''; 44 let gitBranch = ''; 45 let startTime = ''; 46 let endTime = ''; 47 let totalInputTokens = 0; 48 let totalOutputTokens = 0; 49 let userMessages = 0; 50 let assistantMessages = 0; 51 52 const seen = new Set<string>(); 53 54 for await (const entry of parseJSONLStream(filePath)) { 55 // Deduplication - use uuid (unique per chunk) not message.id (same across streaming chunks) 56 if (seen.has(entry.uuid)) continue; 57 seen.add(entry.uuid); 58 59 // Extract metadata from first entry 60 if (!sessionId && entry.sessionId) { 61 sessionId = entry.sessionId; 62 } 63 if (!gitBranch && entry.gitBranch) { 64 gitBranch = entry.gitBranch; 65 } 66 67 // Track timestamps 68 if (!startTime || entry.timestamp < startTime) { 69 startTime = entry.timestamp; 70 } 71 if (!endTime || entry.timestamp > endTime) { 72 endTime = entry.timestamp; 73 } 74 75 // Extract token usage from assistant messages 76 if (entry.type === 'assistant' && entry.message?.usage) { 77 const usage = entry.message.usage; 78 totalInputTokens += usage.input_tokens || 0; 79 totalOutputTokens += usage.output_tokens || 0; 80 totalInputTokens += usage.cache_creation_input_tokens || 0; 81 totalInputTokens += usage.cache_read_input_tokens || 0; 82 } 83 84 // Parse message content 85 const text = extractText(entry.message?.content); 86 const toolUses = extractToolUses(entry.message?.content); 87 88 // Count tool calls 89 for (const tool of toolUses) { 90 toolCalls[tool.name] = (toolCalls[tool.name] || 0) + 1; 91 } 92 93 if (entry.type === 'user') userMessages++; 94 if (entry.type === 'assistant') assistantMessages++; 95 96 messages.push({ 97 type: entry.type, 98 timestamp: entry.timestamp, 99 text, 100 toolUses, 101 }); 102 } 103 104 // Use filename as sessionId fallback 105 if (!sessionId) { 106 sessionId = filePath.split('/').pop()?.replace('.jsonl', '') || 'unknown'; 107 } 108 109 // Provide default timestamps if none found 110 const now = new Date().toISOString(); 111 if (!startTime) { 112 startTime = now; 113 } 114 if (!endTime) { 115 endTime = startTime; 116 } 117 118 // Derive date from startTime 119 const date = startTime.split('T')[0]; 120 121 const stats: SessionStats = { 122 userMessages, 123 assistantMessages, 124 toolCalls, 125 totalInputTokens, 126 totalOutputTokens, 127 }; 128 129 return { 130 sessionId, 131 filePath, 132 projectPath, 133 projectName, 134 gitBranch, 135 startTime, 136 endTime, 137 date, 138 messages, 139 stats, 140 }; 141} 142 143/** 144 * Extract text from message content array 145 */ 146function extractText(content: MessageContent[] | undefined): string { 147 if (!content || !Array.isArray(content)) return ''; 148 149 const texts: string[] = []; 150 for (const item of content) { 151 if (item.type === 'text') { 152 // Handle both formats: { text: "..." } and { content: "..." } 153 const text = 'text' in item ? item.text : 'content' in item ? item.content : ''; 154 if (text) texts.push(text); 155 } 156 } 157 return texts.join('\n'); 158} 159 160/** 161 * Extract tool uses from message content 162 */ 163function extractToolUses(content: MessageContent[] | undefined): ToolUse[] { 164 if (!content || !Array.isArray(content)) return []; 165 166 const tools: ToolUse[] = []; 167 for (const item of content) { 168 if (item.type === 'tool_use') { 169 tools.push({ 170 name: item.name, 171 input: summarizeToolInput(item.name, item.input), 172 rawInput: item.input, 173 }); 174 } 175 } 176 return tools; 177} 178 179/** 180 * Summarize tool input for display (truncate long content) 181 */ 182function summarizeToolInput( 183 toolName: string, 184 input: Record<string, unknown> 185): string { 186 const MAX_LENGTH = 200; 187 188 switch (toolName) { 189 case 'Bash': 190 return truncate(String(input.command || ''), MAX_LENGTH); 191 case 'Read': 192 return truncate(String(input.file_path || ''), MAX_LENGTH); 193 case 'Write': 194 case 'Edit': 195 return truncate(String(input.file_path || ''), MAX_LENGTH); 196 case 'Glob': 197 return truncate(String(input.pattern || ''), MAX_LENGTH); 198 case 'Grep': 199 return truncate(String(input.pattern || ''), MAX_LENGTH); 200 case 'Task': 201 return truncate(String(input.description || ''), MAX_LENGTH); 202 default: 203 return truncate(JSON.stringify(input), MAX_LENGTH); 204 } 205} 206 207function truncate(str: string, maxLength: number): string { 208 if (str.length <= maxLength) return str; 209 return str.slice(0, maxLength - 3) + '...'; 210} 211 212/** 213 * Work type classification based on files changed 214 */ 215export type WorkType = 'feature' | 'infrastructure' | 'tests' | 'docs' | 'mixed'; 216 217export interface WorkScope { 218 frontend: number; 219 backend: number; 220 tests: number; 221 types: number; 222 config: number; 223 docs: number; 224} 225 226export interface WorkClassification { 227 type: WorkType; 228 signals: string[]; // Human-readable explanation of why 229 scope: WorkScope; 230 scopeSummary: string; // e.g., "frontend, backend" or "tests" 231} 232 233/** 234 * Check if a file path looks like frontend code 235 */ 236function isFrontend(file: string): boolean { 237 const lower = file.toLowerCase(); 238 return ( 239 lower.includes('/components/') || 240 lower.includes('/pages/') || 241 lower.includes('/screens/') || 242 lower.includes('/views/') || 243 lower.includes('/ui/') || 244 lower.includes('/app/') || 245 lower.includes('/apps/web/') || 246 lower.includes('/web/') || 247 lower.includes('/frontend/') || 248 lower.includes('/client/') || 249 lower.endsWith('.tsx') || 250 lower.endsWith('.jsx') || 251 lower.endsWith('.css') || 252 lower.endsWith('.scss') 253 ); 254} 255 256/** 257 * Check if a file path looks like backend code 258 */ 259function isBackend(file: string): boolean { 260 const lower = file.toLowerCase(); 261 return ( 262 lower.includes('/api/') || 263 lower.includes('/server/') || 264 lower.includes('/services/') || 265 lower.includes('/lib/') || 266 lower.includes('/core/') || 267 lower.includes('/packages/') || 268 lower.includes('/backend/') || 269 lower.includes('/handlers/') || 270 lower.includes('/routes/') || 271 lower.includes('/controllers/') || 272 lower.includes('/models/') || 273 lower.includes('/utils/') || 274 (lower.endsWith('.ts') && !lower.endsWith('.test.ts') && !lower.endsWith('.spec.ts') && !lower.endsWith('.d.ts') && !isFrontend(file)) 275 ); 276} 277 278/** 279 * Classify the type of work based on file paths 280 */ 281export function classifyWork(files: string[]): WorkClassification { 282 const signals: string[] = []; 283 const scope: WorkScope = { 284 frontend: 0, 285 backend: 0, 286 tests: 0, 287 types: 0, 288 config: 0, 289 docs: 0, 290 }; 291 292 let featureFiles = 0; 293 294 for (const file of files) { 295 const lower = file.toLowerCase(); 296 const filename = file.split('/').pop() || ''; 297 298 // Tests 299 if ( 300 lower.includes('.test.') || 301 lower.includes('.spec.') || 302 lower.includes('__tests__') || 303 lower.includes('/test/') || 304 lower.includes('/tests/') 305 ) { 306 scope.tests++; 307 continue; 308 } 309 310 // Types/interfaces 311 if ( 312 filename === 'types.ts' || 313 filename === 'interfaces.ts' || 314 lower.endsWith('.d.ts') || 315 lower.includes('/types/') || 316 lower.includes('/interfaces/') 317 ) { 318 scope.types++; 319 continue; 320 } 321 322 // Config/devops 323 if ( 324 lower.includes('.config.') || 325 lower.includes('/config/') || 326 lower.includes('.github/') || 327 lower.includes('dockerfile') || 328 lower.includes('.yml') || 329 lower.includes('.yaml') || 330 filename.startsWith('.') || 331 filename === 'package.json' || 332 filename === 'tsconfig.json' 333 ) { 334 scope.config++; 335 continue; 336 } 337 338 // Docs 339 if ( 340 lower.endsWith('.md') || 341 lower.includes('/docs/') || 342 lower.includes('/documentation/') 343 ) { 344 scope.docs++; 345 continue; 346 } 347 348 // Feature work - classify as frontend or backend 349 featureFiles++; 350 if (isFrontend(file)) { 351 scope.frontend++; 352 } else if (isBackend(file)) { 353 scope.backend++; 354 } else { 355 // Default to backend for unclassified .ts files 356 scope.backend++; 357 } 358 } 359 360 const total = files.length; 361 if (total === 0) { 362 return { 363 type: 'mixed', 364 signals: ['no files changed'], 365 scope, 366 scopeSummary: '', 367 }; 368 } 369 370 // Build scope summary - simplified to frontend/backend/both 371 let scopeSummary = ''; 372 const hasFrontend = scope.frontend > 0; 373 const hasBackend = scope.backend > 0; 374 if (hasFrontend && hasBackend) { 375 scopeSummary = 'frontend, backend'; 376 } else if (hasFrontend) { 377 scopeSummary = 'frontend'; 378 } else if (hasBackend) { 379 scopeSummary = 'backend'; 380 } else if (scope.tests > 0) { 381 scopeSummary = 'tests'; 382 } else if (scope.docs > 0) { 383 scopeSummary = 'docs'; 384 } else if (scope.config > 0) { 385 scopeSummary = 'config'; 386 } 387 388 // Determine primary type (>50% of files) 389 const threshold = total * 0.5; 390 391 if (scope.tests > threshold) { 392 signals.push(`${scope.tests}/${total} files are tests`); 393 return { type: 'tests', signals, scope, scopeSummary }; 394 } 395 396 if (scope.docs > threshold) { 397 signals.push(`${scope.docs}/${total} files are documentation`); 398 return { type: 'docs', signals, scope, scopeSummary }; 399 } 400 401 if (scope.types + scope.config > threshold) { 402 if (scope.types > scope.config) { 403 signals.push(`${scope.types}/${total} files are types`); 404 } else { 405 signals.push(`${scope.config}/${total} files are config`); 406 } 407 return { type: 'infrastructure', signals, scope, scopeSummary }; 408 } 409 410 if (featureFiles > threshold) { 411 signals.push(`${featureFiles}/${total} files are feature code`); 412 return { type: 'feature', signals, scope, scopeSummary }; 413 } 414 415 // Mixed - build a description 416 if (featureFiles > 0) signals.push(`${featureFiles} feature`); 417 if (scope.tests > 0) signals.push(`${scope.tests} test`); 418 if (scope.types > 0) signals.push(`${scope.types} type`); 419 if (scope.config > 0) signals.push(`${scope.config} config`); 420 if (scope.docs > 0) signals.push(`${scope.docs} doc`); 421 422 return { type: 'mixed', signals, scope, scopeSummary }; 423} 424 425/** 426 * Create a condensed transcript for LLM summarization 427 * Leads with action summary (files changed) to ensure implementation work is captured 428 */ 429export function createCondensedTranscript(session: ParsedSession): string { 430 const parts: string[] = []; 431 432 parts.push(`Project: ${session.projectName}`); 433 if (session.gitBranch) { 434 parts.push(`Branch: ${session.gitBranch}`); 435 } 436 parts.push(`Duration: ${formatDuration(session.startTime, session.endTime)}`); 437 parts.push(''); 438 439 // LEAD with files changed - this is the most important signal of actual work 440 const filesWritten: string[] = []; 441 const filesEdited: string[] = []; 442 const commandsRun: string[] = []; 443 444 for (const msg of session.messages) { 445 if (msg.type === 'assistant') { 446 for (const tool of msg.toolUses) { 447 if (tool.name === 'Write') { 448 const path = String((tool.rawInput as any)?.file_path || ''); 449 if (path && !filesWritten.includes(path)) { 450 filesWritten.push(path); 451 } 452 } else if (tool.name === 'Edit') { 453 const path = String((tool.rawInput as any)?.file_path || ''); 454 if (path && !filesEdited.includes(path)) { 455 filesEdited.push(path); 456 } 457 } else if (tool.name === 'Bash') { 458 const cmd = String((tool.rawInput as any)?.command || '').slice(0, 100); 459 if (cmd && commandsRun.length < 10) { 460 commandsRun.push(cmd); 461 } 462 } 463 } 464 } 465 } 466 467 // Classify the work based on file paths 468 const allFiles = [...filesWritten, ...filesEdited]; 469 const classification = classifyWork(allFiles); 470 parts.push(`WORK TYPE: ${classification.type}`); 471 if (classification.scopeSummary) { 472 parts.push(`SCOPE: ${classification.scopeSummary}`); 473 } 474 parts.push(''); 475 476 // Show action summary at the TOP 477 if (filesWritten.length > 0) { 478 parts.push(`FILES CREATED (${filesWritten.length}):`); 479 filesWritten.slice(0, 15).forEach(f => parts.push(` - ${f}`)); 480 if (filesWritten.length > 15) parts.push(` ... and ${filesWritten.length - 15} more`); 481 parts.push(''); 482 } 483 484 if (filesEdited.length > 0) { 485 parts.push(`FILES EDITED (${filesEdited.length}):`); 486 filesEdited.slice(0, 15).forEach(f => parts.push(` - ${f}`)); 487 if (filesEdited.length > 15) parts.push(` ... and ${filesEdited.length - 15} more`); 488 parts.push(''); 489 } 490 491 if (commandsRun.length > 0) { 492 parts.push(`COMMANDS RUN (${commandsRun.length}):`); 493 commandsRun.slice(0, 5).forEach(c => parts.push(` $ ${c}`)); 494 parts.push(''); 495 } 496 497 // Then show conversation context (but less of it) 498 parts.push('CONVERSATION:'); 499 let messageCount = 0; 500 for (const msg of session.messages) { 501 if (messageCount > 20) break; // Limit to avoid overwhelming 502 503 if (msg.type === 'user' && msg.text) { 504 const text = msg.text.slice(0, 300); 505 parts.push(`User: ${text}`); 506 messageCount++; 507 } else if (msg.type === 'assistant' && msg.text) { 508 const text = msg.text.slice(0, 200); 509 parts.push(`Assistant: ${text}`); 510 messageCount++; 511 } 512 } 513 514 // Add stats at end 515 parts.push(''); 516 const toolSummary = Object.entries(session.stats.toolCalls) 517 .sort((a, b) => b[1] - a[1]) 518 .slice(0, 10) 519 .map(([name, count]) => `${name}(${count})`) 520 .join(', '); 521 if (toolSummary) { 522 parts.push(`Tool usage: ${toolSummary}`); 523 } 524 525 return parts.join('\n'); 526} 527 528function formatDuration(start: string, end: string): string { 529 if (!start || !end) return 'unknown'; 530 531 const startDate = new Date(start); 532 const endDate = new Date(end); 533 const diffMs = endDate.getTime() - startDate.getTime(); 534 535 const minutes = Math.floor(diffMs / 60000); 536 if (minutes < 60) return `${minutes} min`; 537 538 const hours = Math.floor(minutes / 60); 539 const remainingMinutes = minutes % 60; 540 return `${hours}h ${remainingMinutes}m`; 541}