Reference implementation for the Phoenix Architecture. Work in progress. aicoding.leaflet.pub/
ai coding crazy
at main 87 lines 2.9 kB view raw
1/** 2 * Evaluation model — durable behavioral truth surface. 3 * 4 * Evaluations bind to behavior at IU boundaries, not to implementation internals. 5 * They survive regeneration. The separating question: "Would this assertion still 6 * be meaningful if the entire implementation were replaced tomorrow?" 7 * 8 * Distinct from implementation tests, which die with the code they describe. 9 * (See: Fowler, The Phoenix Architecture, Chapter 5) 10 */ 11 12/** What the evaluation asserts about */ 13export type EvaluationBinding = 14 | 'domain_rule' // business logic invariant 15 | 'boundary_contract' // input/output shape at IU boundary 16 | 'constraint' // latency, throughput, error rate 17 | 'invariant' // property that holds across all states 18 | 'failure_mode'; // behavior under error conditions 19 20/** How confident we are the evaluation captures real behavior */ 21export type EvaluationOrigin = 22 | 'specified' // derived from spec/intent 23 | 'characterization' // captured from existing implementation (legacy) 24 | 'incident' // added after a production incident 25 | 'audit'; // added during evaluation audit 26 27export interface Evaluation { 28 /** Unique ID, content-addressed */ 29 eval_id: string; 30 /** Human-readable name */ 31 name: string; 32 /** Which IU boundary this evaluates */ 33 iu_id: string; 34 /** What this evaluation binds to */ 35 binding: EvaluationBinding; 36 /** How this evaluation was created */ 37 origin: EvaluationOrigin; 38 /** Behavioral assertion in human-readable form */ 39 assertion: string; 40 /** 41 * Given/When/Then specification: 42 * - given: preconditions 43 * - when: action at the boundary 44 * - then: expected observable outcome 45 */ 46 given: string; 47 when: string; 48 then: string; 49 /** Canonical node IDs this evaluation covers */ 50 canon_ids: string[]; 51 /** Whether this is a conservation-layer evaluation (surface stability) */ 52 conservation: boolean; 53 /** Provenance: why this evaluation exists */ 54 rationale?: string; 55 /** Link to incident/decision that motivated this */ 56 provenance_ref?: string; 57 /** Created timestamp */ 58 created_at: string; 59 /** Last verified timestamp */ 60 last_verified_at?: string; 61 /** Status of last verification */ 62 last_status?: 'pass' | 'fail' | 'untested'; 63} 64 65/** 66 * Evaluation coverage report for an IU 67 */ 68export interface EvaluationCoverage { 69 iu_id: string; 70 iu_name: string; 71 total_evaluations: number; 72 by_binding: Record<EvaluationBinding, number>; 73 by_origin: Record<EvaluationOrigin, number>; 74 canon_ids_covered: string[]; 75 canon_ids_uncovered: string[]; 76 coverage_ratio: number; 77 conservation_count: number; 78 /** Gap analysis */ 79 gaps: EvaluationGap[]; 80} 81 82export interface EvaluationGap { 83 category: 'missing_boundary' | 'missing_invariant' | 'missing_failure_mode' | 'untested' | 'stale'; 84 subject: string; 85 message: string; 86 recommended_action: string; 87}