src/regen.ts at main · chadfowler.com/phoenix

chadfowler.com / phoenix
fork atom
Reference implementation for the Phoenix Architecture. Work in progress. aicoding.leaflet.pub/
ai coding crazy
fork atom
phoenix / src / regen.ts
at main 728 lines 26 kB view raw
wrap content
Chad Fowler feat: code gen reliability 5% → 89% — template assembly + SQL repair 20hrs ago
d0ea5dab
  1/**
  2 * Regeneration Engine — generates code for each IU.
  3 *
  4 * Two modes:
  5 * - Stub mode (no LLM): produces typed skeletons with throw stubs.
  6 * - LLM mode: sends IU contract + canonical requirements to an LLM
  7 *   and produces real, working implementations.
  8 *
  9 * The LLM provider is pluggable (Anthropic, OpenAI, etc.)
 10 * and auto-detected from env vars.
 11 */
 12
 13import { execSync } from 'node:child_process';
 14import { writeFileSync, mkdirSync, unlinkSync, existsSync } from 'node:fs';
 15import { join, dirname } from 'node:path';
 16import type { ImplementationUnit } from './models/iu.js';
 17import type { CanonicalNode } from './models/canonical.js';
 18import type { IUManifest, RegenMetadata, FileManifestEntry } from './models/manifest.js';
 19import type { LLMProvider } from './llm/provider.js';
 20import { buildPrompt, getSystemPrompt } from './llm/prompt.js';
 21import type { ResolvedTarget } from './models/architecture.js';
 22import { sha256 } from './semhash.js';
 23
 24const TOOLCHAIN_VERSION = 'phoenix-regen/0.1.0';
 25
 26export interface RegenResult {
 27  iu_id: string;
 28  files: Map<string, string>;    // path → content
 29  manifest: IUManifest;
 30}
 31
 32export interface RegenContext {
 33  /** LLM provider for real code generation. Omit for stub mode. */
 34  llm?: LLMProvider;
 35  /** All canonical nodes (needed for LLM prompt context). */
 36  canonNodes?: CanonicalNode[];
 37  /** All IUs (for sibling module context). */
 38  allIUs?: ImplementationUnit[];
 39  /** Project root directory (for typecheck-and-retry). */
 40  projectRoot?: string;
 41  /** Architecture target (e.g., sqlite-web-api). */
 42  target?: ResolvedTarget | null;
 43  /** Callback for progress reporting. */
 44  onProgress?: (iu: ImplementationUnit, status: 'start' | 'done' | 'error', message?: string) => void;
 45}
 46
 47/**
 48 * Generate code for a single IU.
 49 * Uses LLM if provided in context, otherwise falls back to stubs.
 50 */
 51export async function generateIU(iu: ImplementationUnit, ctx?: RegenContext): Promise<RegenResult> {
 52  const files = new Map<string, string>();
 53  const modelId = ctx?.llm ? `${ctx.llm.name}/${ctx.llm.model}` : 'stub-generator/1.0';
 54
 55  for (const outputPath of iu.output_files) {
 56    let content: string;
 57
 58    if (ctx?.llm && ctx.canonNodes) {
 59      ctx.onProgress?.(iu, 'start', `Generating ${iu.name} via ${ctx.llm.name}…`);
 60      try {
 61        content = await generateWithLLM(iu, ctx.llm, ctx.canonNodes, ctx.allIUs, ctx.projectRoot, ctx.target);
 62        ctx.onProgress?.(iu, 'done');
 63      } catch (err) {
 64        const msg = err instanceof Error ? err.message : String(err);
 65        ctx.onProgress?.(iu, 'error', msg);
 66        // Fall back to stub on LLM failure
 67        content = ctx.target ? generateArchStub(iu) : generateModule(iu);
 68      }
 69    } else {
 70      content = ctx?.target ? generateArchStub(iu) : generateModule(iu);
 71    }
 72
 73    files.set(outputPath, content);
 74  }
 75
 76  // Build manifest entries
 77  const fileEntries: Record<string, FileManifestEntry> = {};
 78  for (const [path, content] of files) {
 79    fileEntries[path] = {
 80      path,
 81      content_hash: sha256(content),
 82      size: content.length,
 83    };
 84  }
 85
 86  const now = new Date().toISOString();
 87  const promptpackHash = sha256(JSON.stringify(iu.contract));
 88
 89  const metadata: RegenMetadata = {
 90    model_id: modelId,
 91    promptpack_hash: promptpackHash,
 92    toolchain_version: TOOLCHAIN_VERSION,
 93    generated_at: now,
 94  };
 95
 96  return {
 97    iu_id: iu.iu_id,
 98    files,
 99    manifest: {
100      iu_id: iu.iu_id,
101      iu_name: iu.name,
102      files: fileEntries,
103      regen_metadata: metadata,
104    },
105  };
106}
107
108/**
109 * Generate code for all IUs. Runs sequentially to respect LLM rate limits.
110 */
111export async function generateAll(ius: ImplementationUnit[], ctx?: RegenContext): Promise<RegenResult[]> {
112  const results: RegenResult[] = [];
113  for (const iu of ius) {
114    results.push(await generateIU(iu, ctx));
115  }
116  return results;
117}
118
119// ─── LLM Generation ─────────────────────────────────────────────────────────
120
121const MAX_RETRIES = 2;
122
123/**
124 * Generate code for an IU using an LLM provider.
125 *
126 * Two modes:
127 * - Template mode (when runtime target provides moduleTemplate): LLM fills in
128 *   marked sections only. Structure is guaranteed by the template.
129 * - Freeform mode (no template): LLM generates the entire module.
130 *
131 * Both modes include typecheck-and-retry.
132 */
133async function generateWithLLM(
134  iu: ImplementationUnit,
135  llm: LLMProvider,
136  canonNodes: CanonicalNode[],
137  allIUs?: ImplementationUnit[],
138  projectRoot?: string,
139  target?: ResolvedTarget | null,
140): Promise<string> {
141  // Find sibling modules in the same service
142  const iuDir = iu.output_files[0]?.split('/').slice(0, -1).join('/');
143  const siblings = allIUs
144    ?.filter(other => other.iu_id !== iu.iu_id && other.output_files[0]?.startsWith(iuDir || ''))
145    .map(other => other.name) ?? [];
146
147  const systemPrompt = getSystemPrompt(target);
148  const prompt = buildPrompt(iu, canonNodes, siblings, target);
149  const template = target?.runtime.moduleTemplate;
150
151  let code: string;
152
153  if (template) {
154    // Template mode: LLM fills in sections, we splice into template
155    const raw = await llm.generate(prompt, {
156      system: systemPrompt,
157      temperature: 0.1, // lower temp for more deterministic section filling
158      maxTokens: 8192,
159    });
160
161    code = assembleFromTemplate(template, raw, iu);
162  } else {
163    // Freeform mode
164    code = cleanCodeResponse(await llm.generate(prompt, {
165      system: systemPrompt,
166      temperature: 0.2,
167      maxTokens: 8192,
168    }));
169  }
170
171  // Typecheck-and-retry loop
172  if (projectRoot && iu.output_files[0]) {
173    for (let attempt = 0; attempt < MAX_RETRIES; attempt++) {
174      const errors = typecheckFile(projectRoot, iu.output_files[0], code);
175      if (!errors) break; // clean!
176
177      // Feed errors back to LLM with the current code
178      const fixPrompt = buildFixPrompt(code, errors);
179      const fixResponse = await llm.generate(fixPrompt, {
180        system: systemPrompt,
181        temperature: 0.1,
182        maxTokens: 8192,
183      });
184
185      if (template) {
186        code = assembleFromTemplate(template, fixResponse, iu);
187      } else {
188        code = cleanCodeResponse(fixResponse);
189      }
190    }
191  }
192
193  return code;
194}
195
196/**
197 * Repair LLM-generated code using the template as a structural guarantee.
198 *
199 * The LLM generates a full module. This function:
200 * 1. Strips any imports the LLM wrote and replaces with template imports
201 * 2. Ensures `export default router` exists
202 * 3. Ensures `_phoenix` metadata exists
203 * 4. Ensures `const router = new Hono()` exists
204 *
205 * This is more robust than section parsing — accepts whatever the LLM
206 * generates and fixes the structural parts that must be exact.
207 */
208function assembleFromTemplate(template: string, llmResponse: string, iu: ImplementationUnit): string {
209  let code = cleanCodeResponse(llmResponse);
210
211  // Extract the template's fixed header (imports)
212  const templateLines = template.split('\n');
213  const headerEnd = templateLines.findIndex(l => l.includes('__MIGRATIONS__'));
214  const templateHeader = templateLines.slice(0, Math.max(headerEnd, 0)).join('\n');
215
216  // Strip LLM's import lines — we'll use the template's
217  const codeLines = code.split('\n');
218  const bodyLines = codeLines.filter(line => {
219    const trimmed = line.trim();
220    // Remove import statements that the template already provides
221    if (trimmed.startsWith('import ') && (
222      trimmed.includes('hono') ||
223      trimmed.includes('db.js') ||
224      trimmed.includes('better-sqlite3') ||
225      trimmed.includes('zod')
226    )) return false;
227    return true;
228  });
229  let body = bodyLines.join('\n').trim();
230
231  // Remove any duplicate "const router = new Hono()" — template has one, LLM might add another
232  const routerDecls = (body.match(/const router\s*=\s*new Hono\(\)/g) ?? []).length;
233  if (routerDecls > 1) {
234    // Keep only the first occurrence
235    let found = false;
236    body = body.split('\n').filter(line => {
237      if (line.includes('const router') && line.includes('new Hono()')) {
238        if (found) return false;
239        found = true;
240      }
241      return true;
242    }).join('\n');
243  }
244
245  // Remove any "export default router" — we'll add it at the end
246  body = body.replace(/\nexport\s+default\s+router\s*;?\s*/g, '\n');
247
248  // Remove any existing _phoenix export
249  body = body.replace(/\/\*\*[^]*?_phoenix[^]*?\*\/\s*export\s+const\s+_phoenix\s*=\s*\{[^}]*\}\s*as\s+const\s*;?\s*/g, '');
250  body = body.replace(/export\s+const\s+_phoenix\s*=\s*\{[^}]*\}\s*as\s+const\s*;?\s*/g, '');
251
252  // Ensure router declaration exists
253  if (!body.includes('const router') && !body.includes('new Hono()')) {
254    body = 'const router = new Hono();\n\n' + body;
255  }
256
257  // Build the phoenix metadata
258  const phoenixMeta = `/** @internal Phoenix VCS traceability — do not remove. */
259export const _phoenix = {
260  iu_id: '${iu.iu_id}',
261  name: '${iu.name}',
262  risk_tier: '${iu.risk_tier}',
263  canon_ids: [${iu.source_canon_ids.length} as const],
264} as const;`;
265
266  // Fix SQL double-quote issue globally: SQLite treats "x" as column name, needs 'x'
267  // Replace ALL double-quoted SQL keywords that should be single-quoted
268  body = body.replace(/datetime\("now"\)/g, "datetime('now')");
269  body = body.replace(/date\("now"\)/g, "date('now')");
270  body = body.replace(/WHEN "(\w+)" THEN/g, "WHEN '$1' THEN");
271  body = body.replace(/DEFAULT "([^"]+)"/g, "DEFAULT '$1'");
272  body = body.replace(/< datetime\("now"\)/g, "< datetime('now')");
273  body = body.replace(/< date\("now"\)/g, "< date('now')");
274  // Catch any remaining datetime/date with double quotes
275  body = body.replace(/datetime\s*\(\s*"now"\s*\)/g, "datetime('now')");
276  body = body.replace(/date\s*\(\s*"now"\s*\)/g, "date('now')");
277
278  // Assemble: template header + LLM body + exports + metadata
279  return `${templateHeader}\n\n${body}\n\nexport default router;\n\n${phoenixMeta}\n`;
280}
281
282const MINIMAL_TSCONFIG = JSON.stringify({
283  compilerOptions: {
284    target: 'ES2022',
285    module: 'Node16',
286    moduleResolution: 'Node16',
287    strict: true,
288    esModuleInterop: true,
289    skipLibCheck: true,
290    outDir: 'dist',
291    rootDir: 'src',
292  },
293  include: ['src'],
294}, null, 2);
295
296/**
297 * Typecheck a single file by writing it to disk and running tsc.
298 * Returns error output or null if clean.
299 */
300function typecheckFile(projectRoot: string, filePath: string, content: string): string | null {
301  const fullPath = join(projectRoot, filePath);
302  mkdirSync(dirname(fullPath), { recursive: true });
303  writeFileSync(fullPath, content, 'utf8');
304
305  // Ensure tsconfig.json exists for tsc
306  const tsconfigPath = join(projectRoot, 'tsconfig.json');
307  const hadTsconfig = existsSync(tsconfigPath);
308  if (!hadTsconfig) {
309    writeFileSync(tsconfigPath, MINIMAL_TSCONFIG, 'utf8');
310  }
311
312  try {
313    execSync('npx tsc --noEmit 2>&1', {
314      cwd: projectRoot,
315      timeout: 30000,
316      stdio: 'pipe',
317    });
318    return null; // clean
319  } catch (err: unknown) {
320    const execErr = err as { stdout?: Buffer; stderr?: Buffer };
321    const output = (execErr.stdout?.toString() || '') + (execErr.stderr?.toString() || '');
322    // Filter to only errors from this file
323    const fileErrors = output
324      .split('\n')
325      .filter(line => line.includes(filePath))
326      .join('\n')
327      .trim();
328    return fileErrors || output.trim();
329  }
330}
331
332/**
333 * Build a prompt asking the LLM to fix typecheck errors.
334 */
335function buildFixPrompt(code: string, errors: string): string {
336  return `The following TypeScript module has compilation errors. Fix them.
337
338## Current code:
339\`\`\`typescript
340${code}
341\`\`\`
342
343## TypeScript errors:
344${errors}
345
346## Rules:
347- Output ONLY the fixed TypeScript module. No markdown fences, no explanation.
348- Do NOT import external packages. Use only Node.js built-in modules.
349- For WebSocket features, use node:http — do NOT import 'ws'.
350- For DOM/browser code, use string HTML templates — no DOM APIs.
351- The code must compile under strict mode.
352- Keep all existing exports and the _phoenix metadata constant.
353
354Output the complete fixed TypeScript module now.`;
355}
356
357/**
358 * Strip markdown code fences from LLM response.
359 */
360function cleanCodeResponse(raw: string): string {
361  let code = raw.trim();
362
363  // Remove ```typescript ... ``` or ```ts ... ``` or ``` ... ```
364  const fenceMatch = code.match(/^```(?:typescript|ts)?\s*\n([\s\S]*?)\n```\s*$/);
365  if (fenceMatch) {
366    code = fenceMatch[1];
367  }
368
369  // Also handle case where there's text before/after the fence
370  const innerMatch = code.match(/```(?:typescript|ts)?\s*\n([\s\S]*?)\n```/);
371  if (innerMatch && innerMatch[1].includes('export')) {
372    code = innerMatch[1];
373  }
374
375  return code;
376}
377
378// ─── Module Generation ───────────────────────────────────────────────────────
379
380/**
381 * Generate a minimal Hono router stub for architecture mode.
382 * Ensures fallback code still produces a valid default-export router.
383 */
384function generateArchStub(iu: ImplementationUnit): string {
385  return `import { Hono } from 'hono';
386
387const router = new Hono();
388
389router.get('/', (c) => c.json({ stub: true, module: '${iu.name}', message: 'Not yet implemented' }));
390
391export default router;
392
393/** @internal Phoenix VCS traceability — do not remove. */
394export const _phoenix = {
395  iu_id: '${iu.iu_id}',
396  name: '${iu.name}',
397  risk_tier: '${iu.risk_tier}',
398  canon_ids: [${iu.source_canon_ids.length} as const],
399} as const;
400`;
401}
402
403/**
404 * Generate a natural TypeScript module from an IU contract.
405 */
406function generateModule(iu: ImplementationUnit): string {
407  const lines: string[] = [];
408  const moduleName = toPascalCase(iu.name);
409  const configName = `${moduleName}Config`;
410
411  // Header
412  lines.push(`/**`);
413  lines.push(` * ${iu.name}`);
414  lines.push(` *`);
415  lines.push(` * AUTO-GENERATED by Phoenix VCS — DO NOT EDIT DIRECTLY`);
416  lines.push(` * Risk Tier: ${iu.risk_tier}`);
417  lines.push(` */`);
418  lines.push('');
419
420  // Config interface from constraints/invariants
421  if (iu.contract.invariants.length > 0) {
422    const fields = iu.contract.invariants
423      .map(inv => ({ inv, field: constraintToConfigField(inv) }))
424      .filter((x): x is { inv: string; field: { name: string; type: string } } => x.field !== null);
425
426    if (fields.length > 0) {
427      lines.push(`/**`);
428      lines.push(` * Configuration and constraints for ${iu.name}.`);
429      lines.push(` */`);
430      lines.push(`export interface ${configName} {`);
431      for (const { inv, field } of fields) {
432        lines.push(`  /** ${inv} */`);
433        lines.push(`  ${field.name}: ${field.type};`);
434      }
435      lines.push('}');
436      lines.push('');
437    }
438  }
439
440  // Input/output interfaces
441  const inputTypeName = `${moduleName}Input`;
442  const outputTypeName = `${moduleName}Result`;
443
444  if (iu.contract.inputs.length > 0) {
445    lines.push(`export interface ${inputTypeName} {`);
446    for (const inp of iu.contract.inputs) {
447      lines.push(`  ${inp}: unknown;`);
448    }
449    lines.push('}');
450    lines.push('');
451  }
452
453  if (iu.contract.outputs.length > 0) {
454    lines.push(`export interface ${outputTypeName} {`);
455    for (const out of iu.contract.outputs) {
456      lines.push(`  ${out}: unknown;`);
457    }
458    lines.push('}');
459    lines.push('');
460  }
461
462  // Extract distinct operations from requirement statements
463  const operations = extractOperations(iu);
464
465  // Collect and emit placeholder types referenced by operations
466  if (operations.length > 0) {
467    const builtinTypes = new Set(['unknown', 'void', 'boolean', 'string', 'number', 'object',
468      inputTypeName, outputTypeName, configName]);
469    const placeholders = new Set<string>();
470    for (const op of operations) {
471      for (const t of extractTypeRefs(op.params, op.returnType)) {
472        if (!builtinTypes.has(t)) placeholders.add(t);
473      }
474    }
475    if (placeholders.size > 0) {
476      for (const t of placeholders) {
477        lines.push(`/** Placeholder type — replace with your domain model. */`);
478        lines.push(`export type ${t} = Record<string, unknown>;`);
479        lines.push('');
480      }
481    }
482  }
483
484  if (operations.length > 0) {
485    for (const op of operations) {
486      lines.push(`/**`);
487      lines.push(` * ${op.description}`);
488      lines.push(` */`);
489      lines.push(`export function ${op.name}(${op.params}): ${op.returnType} {`);
490      lines.push(`  // TODO: implement`);
491      lines.push(`  throw new Error('Not implemented: ${op.name}');`);
492      lines.push('}');
493      lines.push('');
494    }
495  } else {
496    // Fallback: single entry-point function
497    const funcName = toCamelCase(iu.name);
498    const params = iu.contract.inputs.length > 0
499      ? `input: ${inputTypeName}`
500      : '';
501    const ret = iu.contract.outputs.length > 0 ? outputTypeName : 'void';
502    lines.push(`/**`);
503    lines.push(` * ${iu.contract.description.split('.')[0] || iu.name}.`);
504    lines.push(` */`);
505    lines.push(`export function ${funcName}(${params}): ${ret} {`);
506    lines.push(`  // TODO: implement`);
507    lines.push(`  throw new Error('Not implemented: ${funcName}');`);
508    lines.push('}');
509    lines.push('');
510  }
511
512  // Phoenix metadata (compact)
513  lines.push(`/** @internal Phoenix VCS traceability — do not remove. */`);
514  lines.push(`export const _phoenix = {`);
515  lines.push(`  iu_id: '${iu.iu_id}',`);
516  lines.push(`  name: '${iu.name}',`);
517  lines.push(`  risk_tier: '${iu.risk_tier}',`);
518  lines.push(`  canon_ids: [${iu.source_canon_ids.length} as const],`);
519  lines.push('} as const;');
520  lines.push('');
521
522  return lines.join('\n');
523}
524
525// ─── Operation Extraction ────────────────────────────────────────────────────
526
527interface Operation {
528  name: string;
529  description: string;
530  params: string;
531  returnType: string;
532}
533
534/**
535 * Extract distinct function operations from an IU's canonical requirements.
536 * Looks for verb patterns in requirement statements and deduplicates.
537 */
538function extractOperations(iu: ImplementationUnit): Operation[] {
539  const ops: Operation[] = [];
540  const seenNames = new Set<string>();
541
542  // Parse requirements for action verbs
543  const patterns: { pattern: RegExp; verb: string }[] = [
544    { pattern: /\bmust (?:support |handle )?creat(?:e|ing)\b/i, verb: 'create' },
545    { pattern: /\bmust (?:support |handle )?validat(?:e|ing)\b/i, verb: 'validate' },
546    { pattern: /\bmust (?:support |handle )?verif(?:y|ying)\b/i, verb: 'verify' },
547    { pattern: /\bmust (?:support |handle )?authenticat(?:e|ing)\b/i, verb: 'authenticate' },
548    { pattern: /\bmust (?:support |handle )?delet(?:e|ing)\b/i, verb: 'delete' },
549    { pattern: /\bmust (?:support |handle )?updat(?:e|ing)\b/i, verb: 'update' },
550    { pattern: /\bmust (?:support |handle )?search(?:ing)?\b/i, verb: 'search' },
551    { pattern: /\bmust (?:support |handle )?send(?:ing)?\b/i, verb: 'send' },
552    { pattern: /\bmust (?:support |handle )?deliver(?:y|ing)?\b/i, verb: 'deliver' },
553    { pattern: /\bmust (?:support |handle )?publish(?:ing)?\b/i, verb: 'publish' },
554    { pattern: /\bmust (?:support |handle )?rout(?:e|ing)\b/i, verb: 'route' },
555    { pattern: /\bmust (?:support |handle )?log(?:ging)?\b/i, verb: 'log' },
556    { pattern: /\bmust (?:support |handle )?reject(?:ed|ing)?\b/i, verb: 'reject' },
557    { pattern: /\bmust (?:be )?rate.?limit(?:ed|ing)?\b/i, verb: 'rateLimit' },
558    { pattern: /\bmust (?:support |handle )?retr(?:y|ying|ied)\b/i, verb: 'retry' },
559    { pattern: /\bmust (?:support |handle )?configur(?:e|ing|able)\b/i, verb: 'configure' },
560    { pattern: /\bmust (?:support |handle )?expos(?:e|ing)\b/i, verb: 'expose' },
561    { pattern: /\bmust (?:support |handle )?implement(?:ing)?\b/i, verb: 'handle' },
562    { pattern: /\bmust (?:support |handle )?inject(?:ing)?\b/i, verb: 'inject' },
563    { pattern: /\bmust (?:support |handle )?stor(?:e|ing)\b/i, verb: 'store' },
564    { pattern: /\bmust (?:support |handle )?archiv(?:e|ing)\b/i, verb: 'archive' },
565    { pattern: /\bmust (?:support |handle )?mark(?:ing)?\b/i, verb: 'mark' },
566    { pattern: /\bmust (?:support |handle )?process(?:ing|ed)?\b/i, verb: 'process' },
567  ];
568
569  // Group requirements by detected verb
570  const verbGroups = new Map<string, string[]>();
571  const moduleName = toPascalCase(iu.name);
572
573  for (const statement of iu.contract.description.split('. ').filter(Boolean)) {
574    for (const { pattern, verb } of patterns) {
575      if (pattern.test(statement)) {
576        const list = verbGroups.get(verb) ?? [];
577        list.push(statement);
578        verbGroups.set(verb, list);
579        break; // one verb per statement
580      }
581    }
582  }
583
584  // Generate one function per unique verb
585  for (const [verb, statements] of verbGroups) {
586    if (seenNames.has(verb)) continue;
587    seenNames.add(verb);
588
589    // Derive params from the object being acted on
590    const subject = extractSubject(statements[0], verb);
591    const paramName = subject ? toCamelCase(subject) : 'input';
592    const paramType = subject ? toPascalCase(subject) : 'unknown';
593
594    ops.push({
595      name: verb,
596      description: statements[0],
597      params: `${paramName}: ${paramType}`,
598      returnType: verb === 'validate' || verb === 'verify'
599        ? 'boolean'
600        : verb === 'search'
601          ? `${paramType}[]`
602          : verb === 'delete' || verb === 'log' || verb === 'archive' || verb === 'mark'
603            ? 'void'
604            : paramType,
605    });
606  }
607
608  // Limit to reasonable number
609  return ops.slice(0, 8);
610}
611
612/**
613 * Try to extract the object/subject from a requirement statement.
614 * "the service must validate JWT tokens" → "token"
615 * "the gateway must reject expired tokens" → "token"
616 */
617function extractSubject(statement: string, verb: string): string | null {
618  // Pattern: "must <verb> <object>"
619  const regex = new RegExp(`must\\s+(?:support\\s+|handle\\s+)?${verb}\\w*\\s+(.+?)(?:\\s+(?:with|from|to|for|on|in|at|by|using|via|when|after|before)\\b|[.;,]|$)`, 'i');
620  const match = statement.match(regex);
621  if (match) {
622    const raw = match[1]
623      .replace(/^(?:a|an|the|all|each|every|new)\s+/i, '')
624      .replace(/\s*\(.*?\)/g, '')
625      .trim();
626    // Take the core noun — typically 1-2 meaningful words
627    const words = raw.split(/\s+/)
628      .filter(w => w.length > 1)
629      .slice(0, 2);
630    if (words.length > 0) {
631      // Singularize simple plurals
632      const noun = words[words.length - 1].replace(/s$/, '');
633      words[words.length - 1] = noun;
634      return words.join(' ');
635    }
636  }
637  return null;
638}
639
640/**
641 * Convert a constraint statement to a config field.
642 * Returns null for constraints that are better expressed as code logic
643 * rather than configuration.
644 */
645function constraintToConfigField(constraint: string): { name: string; type: string } | null {
646  // Numeric limits: "rate limited to 5 per minute", "limited to 100 characters"
647  const numMatch = constraint.match(/(\d+)\s*(per\s+\w+|characters|bytes|kb|mb|seconds?|minutes?|hours?|days?|retries|attempts)/i);
648  if (numMatch) {
649    const unit = numMatch[2].replace(/\s+/g, '').toLowerCase();
650    const subject = extractConstraintSubject(constraint);
651    if (/rate.?limit/i.test(constraint)) {
652      return { name: `${subject}RateLimitPer${capitalize(unit)}`, type: 'number' };
653    }
654    if (/expir|ttl|window/i.test(constraint)) {
655      return { name: `${subject}Ttl${capitalize(unit)}`, type: 'number' };
656    }
657    return { name: `${subject}Max${capitalize(unit)}`, type: 'number' };
658  }
659
660  // Configurable things: "CORS headers must be configurable per route"
661  if (/\bconfigurable\b/i.test(constraint)) {
662    const subject = extractConstraintSubject(constraint);
663    return { name: `${subject}Config`, type: 'Record<string, unknown>' };
664  }
665
666  // Skip vague "must not" / "never" constraints — they're invariants, not config
667  return null;
668}
669
670/**
671 * Extract a short subject identifier from a constraint.
672 * "the service must not send more than 10 emails" → "email"
673 */
674function extractConstraintSubject(statement: string): string {
675  // Find the most specific noun near the numbers/keywords
676  const words = statement
677    .toLowerCase()
678    .replace(/\b(?:the|a|an|must|be|is|are|not|no|shall|never|always|service|gateway|system)\b/g, '')
679    .replace(/[^a-z0-9\s]/g, '')
680    .trim()
681    .split(/\s+/)
682    .filter(w => w.length > 2);
683
684  // Pick the most meaningful word (skip common verbs)
685  const skip = new Set(['send', 'store', 'access', 'more', 'than', 'per', 'with', 'for', 'from', 'limited', 'exceed', 'larger']);
686  const meaningful = words.filter(w => !skip.has(w));
687  return toCamelCase(meaningful.slice(0, 2).join(' ')) || 'value';
688}
689
690function capitalize(s: string): string {
691  return s.charAt(0).toUpperCase() + s.slice(1);
692}
693
694/**
695 * Extract type references from param and return type strings.
696 * "jwtToken: JwtToken" → ["JwtToken"]
697 * "User[]" → ["User"]
698 */
699function extractTypeRefs(params: string, returnType: string): string[] {
700  const types: string[] = [];
701  // From params: "name: Type" patterns
702  const paramMatches = params.matchAll(/:\s*([A-Z][A-Za-z0-9]*)/g);
703  for (const m of paramMatches) types.push(m[1]);
704  // From return type
705  const retMatch = returnType.replace(/\[\]$/, '');
706  if (/^[A-Z]/.test(retMatch)) types.push(retMatch);
707  return types;
708}
709
710// ─── Naming Utilities ────────────────────────────────────────────────────────
711
712function toCamelCase(str: string): string {
713  return str
714    .replace(/[^a-zA-Z0-9 ]/g, ' ')
715    .split(/\s+/)
716    .filter(Boolean)
717    .map((w, i) => i === 0 ? w.toLowerCase() : w.charAt(0).toUpperCase() + w.slice(1).toLowerCase())
718    .join('');
719}
720
721function toPascalCase(str: string): string {
722  return str
723    .replace(/[^a-zA-Z0-9 ]/g, ' ')
724    .split(/\s+/)
725    .filter(Boolean)
726    .map(w => w.charAt(0).toUpperCase() + w.slice(1).toLowerCase())
727    .join('');
728}