packages/ai-parser/src/ai-parser.service.ts at main

mokkenstorm.dev / cv-manager
fork atom
because I got bored of customising my CV for every job
fork atom
cv-manager / packages / ai-parser / src / ai-parser.service.ts
at main 106 lines 3.0 kB view raw
wrap content
mokkenstorm.dev refactor(ai-parser): improve prompts, schemas, and context-aware parsing 7w ago
8b59e1df
  1import type { AIProvider } from '@cv/ai-provider';
  2import { ParsedCVDataSchema, type ParsedCVData } from './schemas';
  3import {
  4  CV_SYSTEM_PROMPT,
  5  buildCvUserPrompt,
  6  type ExistingUserContext,
  7} from './prompts';
  8
  9/**
 10 * Configuration for CV parser service
 11 */
 12export interface CVParserConfig {
 13  /** Temperature for AI completions */
 14  temperature?: number;
 15  /** Maximum tokens for AI completions */
 16  maxTokens?: number;
 17}
 18
 19/**
 20 * Service for parsing CV text using AI
 21 * Uses dependency injection for the AI provider
 22 */
 23export class CVParserService {
 24  private provider: AIProvider;
 25  private temperature: number;
 26  private maxTokens: number;
 27
 28  constructor(provider: AIProvider, config?: CVParserConfig) {
 29    this.provider = provider;
 30    this.temperature = config?.temperature ?? 0.1;
 31    this.maxTokens = config?.maxTokens ?? 8192;
 32  }
 33
 34  /**
 35   * Parse CV text using the AI provider
 36   * @param cvText Raw text from CV (extracted from PDF, DOCX, etc.)
 37   * @param context Optional existing user data for deduplication and disambiguation
 38   * @returns Structured CV data matching ParsedCVDataSchema
 39   */
 40  async parseCVText(
 41    cvText: string,
 42    context?: ExistingUserContext,
 43  ): Promise<ParsedCVData> {
 44    if (!cvText || cvText.trim().length === 0) {
 45      throw new Error('CV text cannot be empty');
 46    }
 47
 48    try {
 49      const response = await this.provider.complete({
 50        systemPrompt: CV_SYSTEM_PROMPT,
 51        prompt: buildCvUserPrompt(cvText, context),
 52        temperature: this.temperature,
 53        maxTokens: this.maxTokens,
 54      });
 55
 56      if (response.finishReason === 'length') {
 57        throw new Error(
 58          'LLM response was truncated (hit max token limit). ' +
 59            `Increase maxTokens (currently ${this.maxTokens}) to allow longer responses.`
 60        );
 61      }
 62
 63      // Extract JSON from response (handle markdown code blocks)
 64      const rawJson = this.extractJson(response.content);
 65
 66      // Parse and validate with Zod
 67      const parsed = ParsedCVDataSchema.parse(JSON.parse(rawJson));
 68
 69      return parsed;
 70    } catch (error) {
 71      if (error instanceof SyntaxError) {
 72        throw new Error(
 73          `Failed to parse LLM response as JSON: ${error.message}`
 74        );
 75      }
 76
 77      if (error instanceof Error && 'issues' in error) {
 78        // Zod validation error
 79        throw new Error(`CV data validation failed: ${error.message}`);
 80      }
 81
 82      throw error;
 83    }
 84  }
 85
 86  /**
 87   * Extract JSON from LLM response
 88   * Handles markdown code blocks and other formatting
 89   */
 90  private extractJson(text: string): string {
 91    // Try to extract from markdown code block
 92    const codeBlockMatch = text.match(/```(?:json)?\n?([\s\S]*?)```/);
 93    if (codeBlockMatch?.[1]) {
 94      return codeBlockMatch[1].trim();
 95    }
 96
 97    // Try to extract raw JSON object
 98    const jsonMatch = text.match(/\{[\s\S]*\}/);
 99    if (jsonMatch) {
100      return jsonMatch[0];
101    }
102
103    // If no JSON found, return as-is and let JSON.parse fail with clear error
104    return text;
105  }
106}