because I got bored of customising my CV for every job
at main 106 lines 3.0 kB view raw
1import type { AIProvider } from '@cv/ai-provider'; 2import { ParsedCVDataSchema, type ParsedCVData } from './schemas'; 3import { 4 CV_SYSTEM_PROMPT, 5 buildCvUserPrompt, 6 type ExistingUserContext, 7} from './prompts'; 8 9/** 10 * Configuration for CV parser service 11 */ 12export interface CVParserConfig { 13 /** Temperature for AI completions */ 14 temperature?: number; 15 /** Maximum tokens for AI completions */ 16 maxTokens?: number; 17} 18 19/** 20 * Service for parsing CV text using AI 21 * Uses dependency injection for the AI provider 22 */ 23export class CVParserService { 24 private provider: AIProvider; 25 private temperature: number; 26 private maxTokens: number; 27 28 constructor(provider: AIProvider, config?: CVParserConfig) { 29 this.provider = provider; 30 this.temperature = config?.temperature ?? 0.1; 31 this.maxTokens = config?.maxTokens ?? 8192; 32 } 33 34 /** 35 * Parse CV text using the AI provider 36 * @param cvText Raw text from CV (extracted from PDF, DOCX, etc.) 37 * @param context Optional existing user data for deduplication and disambiguation 38 * @returns Structured CV data matching ParsedCVDataSchema 39 */ 40 async parseCVText( 41 cvText: string, 42 context?: ExistingUserContext, 43 ): Promise<ParsedCVData> { 44 if (!cvText || cvText.trim().length === 0) { 45 throw new Error('CV text cannot be empty'); 46 } 47 48 try { 49 const response = await this.provider.complete({ 50 systemPrompt: CV_SYSTEM_PROMPT, 51 prompt: buildCvUserPrompt(cvText, context), 52 temperature: this.temperature, 53 maxTokens: this.maxTokens, 54 }); 55 56 if (response.finishReason === 'length') { 57 throw new Error( 58 'LLM response was truncated (hit max token limit). ' + 59 `Increase maxTokens (currently ${this.maxTokens}) to allow longer responses.` 60 ); 61 } 62 63 // Extract JSON from response (handle markdown code blocks) 64 const rawJson = this.extractJson(response.content); 65 66 // Parse and validate with Zod 67 const parsed = ParsedCVDataSchema.parse(JSON.parse(rawJson)); 68 69 return parsed; 70 } catch (error) { 71 if (error instanceof SyntaxError) { 72 throw new Error( 73 `Failed to parse LLM response as JSON: ${error.message}` 74 ); 75 } 76 77 if (error instanceof Error && 'issues' in error) { 78 // Zod validation error 79 throw new Error(`CV data validation failed: ${error.message}`); 80 } 81 82 throw error; 83 } 84 } 85 86 /** 87 * Extract JSON from LLM response 88 * Handles markdown code blocks and other formatting 89 */ 90 private extractJson(text: string): string { 91 // Try to extract from markdown code block 92 const codeBlockMatch = text.match(/```(?:json)?\n?([\s\S]*?)```/); 93 if (codeBlockMatch?.[1]) { 94 return codeBlockMatch[1].trim(); 95 } 96 97 // Try to extract raw JSON object 98 const jsonMatch = text.match(/\{[\s\S]*\}/); 99 if (jsonMatch) { 100 return jsonMatch[0]; 101 } 102 103 // If no JSON found, return as-is and let JSON.parse fail with clear error 104 return text; 105 } 106}