Reference implementation for the Phoenix Architecture. Work in progress.
aicoding.leaflet.pub/
ai
coding
crazy
1/**
2 * Evaluation model — durable behavioral truth surface.
3 *
4 * Evaluations bind to behavior at IU boundaries, not to implementation internals.
5 * They survive regeneration. The separating question: "Would this assertion still
6 * be meaningful if the entire implementation were replaced tomorrow?"
7 *
8 * Distinct from implementation tests, which die with the code they describe.
9 * (See: Fowler, The Phoenix Architecture, Chapter 5)
10 */
11
12/** What the evaluation asserts about */
13export type EvaluationBinding =
14 | 'domain_rule' // business logic invariant
15 | 'boundary_contract' // input/output shape at IU boundary
16 | 'constraint' // latency, throughput, error rate
17 | 'invariant' // property that holds across all states
18 | 'failure_mode'; // behavior under error conditions
19
20/** How confident we are the evaluation captures real behavior */
21export type EvaluationOrigin =
22 | 'specified' // derived from spec/intent
23 | 'characterization' // captured from existing implementation (legacy)
24 | 'incident' // added after a production incident
25 | 'audit'; // added during evaluation audit
26
27export interface Evaluation {
28 /** Unique ID, content-addressed */
29 eval_id: string;
30 /** Human-readable name */
31 name: string;
32 /** Which IU boundary this evaluates */
33 iu_id: string;
34 /** What this evaluation binds to */
35 binding: EvaluationBinding;
36 /** How this evaluation was created */
37 origin: EvaluationOrigin;
38 /** Behavioral assertion in human-readable form */
39 assertion: string;
40 /**
41 * Given/When/Then specification:
42 * - given: preconditions
43 * - when: action at the boundary
44 * - then: expected observable outcome
45 */
46 given: string;
47 when: string;
48 then: string;
49 /** Canonical node IDs this evaluation covers */
50 canon_ids: string[];
51 /** Whether this is a conservation-layer evaluation (surface stability) */
52 conservation: boolean;
53 /** Provenance: why this evaluation exists */
54 rationale?: string;
55 /** Link to incident/decision that motivated this */
56 provenance_ref?: string;
57 /** Created timestamp */
58 created_at: string;
59 /** Last verified timestamp */
60 last_verified_at?: string;
61 /** Status of last verification */
62 last_status?: 'pass' | 'fail' | 'untested';
63}
64
65/**
66 * Evaluation coverage report for an IU
67 */
68export interface EvaluationCoverage {
69 iu_id: string;
70 iu_name: string;
71 total_evaluations: number;
72 by_binding: Record<EvaluationBinding, number>;
73 by_origin: Record<EvaluationOrigin, number>;
74 canon_ids_covered: string[];
75 canon_ids_uncovered: string[];
76 coverage_ratio: number;
77 conservation_count: number;
78 /** Gap analysis */
79 gaps: EvaluationGap[];
80}
81
82export interface EvaluationGap {
83 category: 'missing_boundary' | 'missing_invariant' | 'missing_failure_mode' | 'untested' | 'stale';
84 subject: string;
85 message: string;
86 recommended_action: string;
87}