because I got bored of customising my CV for every job
1import type { AIProvider } from '@cv/ai-provider';
2import { ParsedCVDataSchema, type ParsedCVData } from './schemas';
3import {
4 CV_SYSTEM_PROMPT,
5 buildCvUserPrompt,
6 type ExistingUserContext,
7} from './prompts';
8
9/**
10 * Configuration for CV parser service
11 */
12export interface CVParserConfig {
13 /** Temperature for AI completions */
14 temperature?: number;
15 /** Maximum tokens for AI completions */
16 maxTokens?: number;
17}
18
19/**
20 * Service for parsing CV text using AI
21 * Uses dependency injection for the AI provider
22 */
23export class CVParserService {
24 private provider: AIProvider;
25 private temperature: number;
26 private maxTokens: number;
27
28 constructor(provider: AIProvider, config?: CVParserConfig) {
29 this.provider = provider;
30 this.temperature = config?.temperature ?? 0.1;
31 this.maxTokens = config?.maxTokens ?? 8192;
32 }
33
34 /**
35 * Parse CV text using the AI provider
36 * @param cvText Raw text from CV (extracted from PDF, DOCX, etc.)
37 * @param context Optional existing user data for deduplication and disambiguation
38 * @returns Structured CV data matching ParsedCVDataSchema
39 */
40 async parseCVText(
41 cvText: string,
42 context?: ExistingUserContext,
43 ): Promise<ParsedCVData> {
44 if (!cvText || cvText.trim().length === 0) {
45 throw new Error('CV text cannot be empty');
46 }
47
48 try {
49 const response = await this.provider.complete({
50 systemPrompt: CV_SYSTEM_PROMPT,
51 prompt: buildCvUserPrompt(cvText, context),
52 temperature: this.temperature,
53 maxTokens: this.maxTokens,
54 });
55
56 if (response.finishReason === 'length') {
57 throw new Error(
58 'LLM response was truncated (hit max token limit). ' +
59 `Increase maxTokens (currently ${this.maxTokens}) to allow longer responses.`
60 );
61 }
62
63 // Extract JSON from response (handle markdown code blocks)
64 const rawJson = this.extractJson(response.content);
65
66 // Parse and validate with Zod
67 const parsed = ParsedCVDataSchema.parse(JSON.parse(rawJson));
68
69 return parsed;
70 } catch (error) {
71 if (error instanceof SyntaxError) {
72 throw new Error(
73 `Failed to parse LLM response as JSON: ${error.message}`
74 );
75 }
76
77 if (error instanceof Error && 'issues' in error) {
78 // Zod validation error
79 throw new Error(`CV data validation failed: ${error.message}`);
80 }
81
82 throw error;
83 }
84 }
85
86 /**
87 * Extract JSON from LLM response
88 * Handles markdown code blocks and other formatting
89 */
90 private extractJson(text: string): string {
91 // Try to extract from markdown code block
92 const codeBlockMatch = text.match(/```(?:json)?\n?([\s\S]*?)```/);
93 if (codeBlockMatch?.[1]) {
94 return codeBlockMatch[1].trim();
95 }
96
97 // Try to extract raw JSON object
98 const jsonMatch = text.match(/\{[\s\S]*\}/);
99 if (jsonMatch) {
100 return jsonMatch[0];
101 }
102
103 // If no JSON found, return as-is and let JSON.parse fail with clear error
104 return text;
105 }
106}