src/core/session-reader.ts at experiment/session-classification

alice.mosphere.at / worklog
fork atom
this repo has no description
fork atom
worklog / src / core / session-reader.ts
at experiment/session-classification 541 lines 15 kB view raw
wrap content
alice Add work type classification for session scope 3mo ago
569f6259
  1import { createReadStream } from 'fs';
  2import * as readline from 'readline';
  3import type {
  4  RawSessionEntry,
  5  ParsedSession,
  6  ParsedMessage,
  7  ToolUse,
  8  SessionStats,
  9  MessageContent,
 10} from '../types';
 11
 12/**
 13 * Stream-parse a JSONL session file
 14 */
 15export async function* parseJSONLStream(
 16  filePath: string
 17): AsyncGenerator<RawSessionEntry> {
 18  const rl = readline.createInterface({
 19    input: createReadStream(filePath),
 20    crlfDelay: Infinity,
 21  });
 22
 23  for await (const line of rl) {
 24    if (!line.trim()) continue;
 25    try {
 26      yield JSON.parse(line) as RawSessionEntry;
 27    } catch {
 28      // Skip invalid JSON lines
 29    }
 30  }
 31}
 32
 33/**
 34 * Parse a session file into a structured format
 35 */
 36export async function parseSessionFile(
 37  filePath: string,
 38  projectPath: string,
 39  projectName: string
 40): Promise<ParsedSession> {
 41  const messages: ParsedMessage[] = [];
 42  const toolCalls: Record<string, number> = {};
 43  let sessionId = '';
 44  let gitBranch = '';
 45  let startTime = '';
 46  let endTime = '';
 47  let totalInputTokens = 0;
 48  let totalOutputTokens = 0;
 49  let userMessages = 0;
 50  let assistantMessages = 0;
 51
 52  const seen = new Set<string>();
 53
 54  for await (const entry of parseJSONLStream(filePath)) {
 55    // Deduplication - use uuid (unique per chunk) not message.id (same across streaming chunks)
 56    if (seen.has(entry.uuid)) continue;
 57    seen.add(entry.uuid);
 58
 59    // Extract metadata from first entry
 60    if (!sessionId && entry.sessionId) {
 61      sessionId = entry.sessionId;
 62    }
 63    if (!gitBranch && entry.gitBranch) {
 64      gitBranch = entry.gitBranch;
 65    }
 66
 67    // Track timestamps
 68    if (!startTime || entry.timestamp < startTime) {
 69      startTime = entry.timestamp;
 70    }
 71    if (!endTime || entry.timestamp > endTime) {
 72      endTime = entry.timestamp;
 73    }
 74
 75    // Extract token usage from assistant messages
 76    if (entry.type === 'assistant' && entry.message?.usage) {
 77      const usage = entry.message.usage;
 78      totalInputTokens += usage.input_tokens || 0;
 79      totalOutputTokens += usage.output_tokens || 0;
 80      totalInputTokens += usage.cache_creation_input_tokens || 0;
 81      totalInputTokens += usage.cache_read_input_tokens || 0;
 82    }
 83
 84    // Parse message content
 85    const text = extractText(entry.message?.content);
 86    const toolUses = extractToolUses(entry.message?.content);
 87
 88    // Count tool calls
 89    for (const tool of toolUses) {
 90      toolCalls[tool.name] = (toolCalls[tool.name] || 0) + 1;
 91    }
 92
 93    if (entry.type === 'user') userMessages++;
 94    if (entry.type === 'assistant') assistantMessages++;
 95
 96    messages.push({
 97      type: entry.type,
 98      timestamp: entry.timestamp,
 99      text,
100      toolUses,
101    });
102  }
103
104  // Use filename as sessionId fallback
105  if (!sessionId) {
106    sessionId = filePath.split('/').pop()?.replace('.jsonl', '') || 'unknown';
107  }
108
109  // Provide default timestamps if none found
110  const now = new Date().toISOString();
111  if (!startTime) {
112    startTime = now;
113  }
114  if (!endTime) {
115    endTime = startTime;
116  }
117
118  // Derive date from startTime
119  const date = startTime.split('T')[0];
120
121  const stats: SessionStats = {
122    userMessages,
123    assistantMessages,
124    toolCalls,
125    totalInputTokens,
126    totalOutputTokens,
127  };
128
129  return {
130    sessionId,
131    filePath,
132    projectPath,
133    projectName,
134    gitBranch,
135    startTime,
136    endTime,
137    date,
138    messages,
139    stats,
140  };
141}
142
143/**
144 * Extract text from message content array
145 */
146function extractText(content: MessageContent[] | undefined): string {
147  if (!content || !Array.isArray(content)) return '';
148
149  const texts: string[] = [];
150  for (const item of content) {
151    if (item.type === 'text') {
152      // Handle both formats: { text: "..." } and { content: "..." }
153      const text = 'text' in item ? item.text : 'content' in item ? item.content : '';
154      if (text) texts.push(text);
155    }
156  }
157  return texts.join('\n');
158}
159
160/**
161 * Extract tool uses from message content
162 */
163function extractToolUses(content: MessageContent[] | undefined): ToolUse[] {
164  if (!content || !Array.isArray(content)) return [];
165
166  const tools: ToolUse[] = [];
167  for (const item of content) {
168    if (item.type === 'tool_use') {
169      tools.push({
170        name: item.name,
171        input: summarizeToolInput(item.name, item.input),
172        rawInput: item.input,
173      });
174    }
175  }
176  return tools;
177}
178
179/**
180 * Summarize tool input for display (truncate long content)
181 */
182function summarizeToolInput(
183  toolName: string,
184  input: Record<string, unknown>
185): string {
186  const MAX_LENGTH = 200;
187
188  switch (toolName) {
189    case 'Bash':
190      return truncate(String(input.command || ''), MAX_LENGTH);
191    case 'Read':
192      return truncate(String(input.file_path || ''), MAX_LENGTH);
193    case 'Write':
194    case 'Edit':
195      return truncate(String(input.file_path || ''), MAX_LENGTH);
196    case 'Glob':
197      return truncate(String(input.pattern || ''), MAX_LENGTH);
198    case 'Grep':
199      return truncate(String(input.pattern || ''), MAX_LENGTH);
200    case 'Task':
201      return truncate(String(input.description || ''), MAX_LENGTH);
202    default:
203      return truncate(JSON.stringify(input), MAX_LENGTH);
204  }
205}
206
207function truncate(str: string, maxLength: number): string {
208  if (str.length <= maxLength) return str;
209  return str.slice(0, maxLength - 3) + '...';
210}
211
212/**
213 * Work type classification based on files changed
214 */
215export type WorkType = 'feature' | 'infrastructure' | 'tests' | 'docs' | 'mixed';
216
217export interface WorkScope {
218  frontend: number;
219  backend: number;
220  tests: number;
221  types: number;
222  config: number;
223  docs: number;
224}
225
226export interface WorkClassification {
227  type: WorkType;
228  signals: string[]; // Human-readable explanation of why
229  scope: WorkScope;
230  scopeSummary: string; // e.g., "frontend, backend" or "tests"
231}
232
233/**
234 * Check if a file path looks like frontend code
235 */
236function isFrontend(file: string): boolean {
237  const lower = file.toLowerCase();
238  return (
239    lower.includes('/components/') ||
240    lower.includes('/pages/') ||
241    lower.includes('/screens/') ||
242    lower.includes('/views/') ||
243    lower.includes('/ui/') ||
244    lower.includes('/app/') ||
245    lower.includes('/apps/web/') ||
246    lower.includes('/web/') ||
247    lower.includes('/frontend/') ||
248    lower.includes('/client/') ||
249    lower.endsWith('.tsx') ||
250    lower.endsWith('.jsx') ||
251    lower.endsWith('.css') ||
252    lower.endsWith('.scss')
253  );
254}
255
256/**
257 * Check if a file path looks like backend code
258 */
259function isBackend(file: string): boolean {
260  const lower = file.toLowerCase();
261  return (
262    lower.includes('/api/') ||
263    lower.includes('/server/') ||
264    lower.includes('/services/') ||
265    lower.includes('/lib/') ||
266    lower.includes('/core/') ||
267    lower.includes('/packages/') ||
268    lower.includes('/backend/') ||
269    lower.includes('/handlers/') ||
270    lower.includes('/routes/') ||
271    lower.includes('/controllers/') ||
272    lower.includes('/models/') ||
273    lower.includes('/utils/') ||
274    (lower.endsWith('.ts') && !lower.endsWith('.test.ts') && !lower.endsWith('.spec.ts') && !lower.endsWith('.d.ts') && !isFrontend(file))
275  );
276}
277
278/**
279 * Classify the type of work based on file paths
280 */
281export function classifyWork(files: string[]): WorkClassification {
282  const signals: string[] = [];
283  const scope: WorkScope = {
284    frontend: 0,
285    backend: 0,
286    tests: 0,
287    types: 0,
288    config: 0,
289    docs: 0,
290  };
291
292  let featureFiles = 0;
293
294  for (const file of files) {
295    const lower = file.toLowerCase();
296    const filename = file.split('/').pop() || '';
297
298    // Tests
299    if (
300      lower.includes('.test.') ||
301      lower.includes('.spec.') ||
302      lower.includes('__tests__') ||
303      lower.includes('/test/') ||
304      lower.includes('/tests/')
305    ) {
306      scope.tests++;
307      continue;
308    }
309
310    // Types/interfaces
311    if (
312      filename === 'types.ts' ||
313      filename === 'interfaces.ts' ||
314      lower.endsWith('.d.ts') ||
315      lower.includes('/types/') ||
316      lower.includes('/interfaces/')
317    ) {
318      scope.types++;
319      continue;
320    }
321
322    // Config/devops
323    if (
324      lower.includes('.config.') ||
325      lower.includes('/config/') ||
326      lower.includes('.github/') ||
327      lower.includes('dockerfile') ||
328      lower.includes('.yml') ||
329      lower.includes('.yaml') ||
330      filename.startsWith('.') ||
331      filename === 'package.json' ||
332      filename === 'tsconfig.json'
333    ) {
334      scope.config++;
335      continue;
336    }
337
338    // Docs
339    if (
340      lower.endsWith('.md') ||
341      lower.includes('/docs/') ||
342      lower.includes('/documentation/')
343    ) {
344      scope.docs++;
345      continue;
346    }
347
348    // Feature work - classify as frontend or backend
349    featureFiles++;
350    if (isFrontend(file)) {
351      scope.frontend++;
352    } else if (isBackend(file)) {
353      scope.backend++;
354    } else {
355      // Default to backend for unclassified .ts files
356      scope.backend++;
357    }
358  }
359
360  const total = files.length;
361  if (total === 0) {
362    return {
363      type: 'mixed',
364      signals: ['no files changed'],
365      scope,
366      scopeSummary: '',
367    };
368  }
369
370  // Build scope summary - simplified to frontend/backend/both
371  let scopeSummary = '';
372  const hasFrontend = scope.frontend > 0;
373  const hasBackend = scope.backend > 0;
374  if (hasFrontend && hasBackend) {
375    scopeSummary = 'frontend, backend';
376  } else if (hasFrontend) {
377    scopeSummary = 'frontend';
378  } else if (hasBackend) {
379    scopeSummary = 'backend';
380  } else if (scope.tests > 0) {
381    scopeSummary = 'tests';
382  } else if (scope.docs > 0) {
383    scopeSummary = 'docs';
384  } else if (scope.config > 0) {
385    scopeSummary = 'config';
386  }
387
388  // Determine primary type (>50% of files)
389  const threshold = total * 0.5;
390
391  if (scope.tests > threshold) {
392    signals.push(`${scope.tests}/${total} files are tests`);
393    return { type: 'tests', signals, scope, scopeSummary };
394  }
395
396  if (scope.docs > threshold) {
397    signals.push(`${scope.docs}/${total} files are documentation`);
398    return { type: 'docs', signals, scope, scopeSummary };
399  }
400
401  if (scope.types + scope.config > threshold) {
402    if (scope.types > scope.config) {
403      signals.push(`${scope.types}/${total} files are types`);
404    } else {
405      signals.push(`${scope.config}/${total} files are config`);
406    }
407    return { type: 'infrastructure', signals, scope, scopeSummary };
408  }
409
410  if (featureFiles > threshold) {
411    signals.push(`${featureFiles}/${total} files are feature code`);
412    return { type: 'feature', signals, scope, scopeSummary };
413  }
414
415  // Mixed - build a description
416  if (featureFiles > 0) signals.push(`${featureFiles} feature`);
417  if (scope.tests > 0) signals.push(`${scope.tests} test`);
418  if (scope.types > 0) signals.push(`${scope.types} type`);
419  if (scope.config > 0) signals.push(`${scope.config} config`);
420  if (scope.docs > 0) signals.push(`${scope.docs} doc`);
421
422  return { type: 'mixed', signals, scope, scopeSummary };
423}
424
425/**
426 * Create a condensed transcript for LLM summarization
427 * Leads with action summary (files changed) to ensure implementation work is captured
428 */
429export function createCondensedTranscript(session: ParsedSession): string {
430  const parts: string[] = [];
431
432  parts.push(`Project: ${session.projectName}`);
433  if (session.gitBranch) {
434    parts.push(`Branch: ${session.gitBranch}`);
435  }
436  parts.push(`Duration: ${formatDuration(session.startTime, session.endTime)}`);
437  parts.push('');
438
439  // LEAD with files changed - this is the most important signal of actual work
440  const filesWritten: string[] = [];
441  const filesEdited: string[] = [];
442  const commandsRun: string[] = [];
443
444  for (const msg of session.messages) {
445    if (msg.type === 'assistant') {
446      for (const tool of msg.toolUses) {
447        if (tool.name === 'Write') {
448          const path = String((tool.rawInput as any)?.file_path || '');
449          if (path && !filesWritten.includes(path)) {
450            filesWritten.push(path);
451          }
452        } else if (tool.name === 'Edit') {
453          const path = String((tool.rawInput as any)?.file_path || '');
454          if (path && !filesEdited.includes(path)) {
455            filesEdited.push(path);
456          }
457        } else if (tool.name === 'Bash') {
458          const cmd = String((tool.rawInput as any)?.command || '').slice(0, 100);
459          if (cmd && commandsRun.length < 10) {
460            commandsRun.push(cmd);
461          }
462        }
463      }
464    }
465  }
466
467  // Classify the work based on file paths
468  const allFiles = [...filesWritten, ...filesEdited];
469  const classification = classifyWork(allFiles);
470  parts.push(`WORK TYPE: ${classification.type}`);
471  if (classification.scopeSummary) {
472    parts.push(`SCOPE: ${classification.scopeSummary}`);
473  }
474  parts.push('');
475
476  // Show action summary at the TOP
477  if (filesWritten.length > 0) {
478    parts.push(`FILES CREATED (${filesWritten.length}):`);
479    filesWritten.slice(0, 15).forEach(f => parts.push(`  - ${f}`));
480    if (filesWritten.length > 15) parts.push(`  ... and ${filesWritten.length - 15} more`);
481    parts.push('');
482  }
483
484  if (filesEdited.length > 0) {
485    parts.push(`FILES EDITED (${filesEdited.length}):`);
486    filesEdited.slice(0, 15).forEach(f => parts.push(`  - ${f}`));
487    if (filesEdited.length > 15) parts.push(`  ... and ${filesEdited.length - 15} more`);
488    parts.push('');
489  }
490
491  if (commandsRun.length > 0) {
492    parts.push(`COMMANDS RUN (${commandsRun.length}):`);
493    commandsRun.slice(0, 5).forEach(c => parts.push(`  $ ${c}`));
494    parts.push('');
495  }
496
497  // Then show conversation context (but less of it)
498  parts.push('CONVERSATION:');
499  let messageCount = 0;
500  for (const msg of session.messages) {
501    if (messageCount > 20) break; // Limit to avoid overwhelming
502
503    if (msg.type === 'user' && msg.text) {
504      const text = msg.text.slice(0, 300);
505      parts.push(`User: ${text}`);
506      messageCount++;
507    } else if (msg.type === 'assistant' && msg.text) {
508      const text = msg.text.slice(0, 200);
509      parts.push(`Assistant: ${text}`);
510      messageCount++;
511    }
512  }
513
514  // Add stats at end
515  parts.push('');
516  const toolSummary = Object.entries(session.stats.toolCalls)
517    .sort((a, b) => b[1] - a[1])
518    .slice(0, 10)
519    .map(([name, count]) => `${name}(${count})`)
520    .join(', ');
521  if (toolSummary) {
522    parts.push(`Tool usage: ${toolSummary}`);
523  }
524
525  return parts.join('\n');
526}
527
528function formatDuration(start: string, end: string): string {
529  if (!start || !end) return 'unknown';
530
531  const startDate = new Date(start);
532  const endDate = new Date(end);
533  const diffMs = endDate.getTime() - startDate.getTime();
534
535  const minutes = Math.floor(diffMs / 60000);
536  if (minutes < 60) return `${minutes} min`;
537
538  const hours = Math.floor(minutes / 60);
539  const remainingMinutes = minutes % 60;
540  return `${hours}h ${remainingMinutes}m`;
541}