src/models/evaluation.ts at main · chadfowler.com/phoenix

chadfowler.com / phoenix

fork atom

Reference implementation for the Phoenix Architecture. Work in progress. aicoding.leaflet.pub/

ai coding crazy

fork atom

phoenix / src / models / evaluation.ts

at main 87 lines 2.9 kB view raw

wrap content

Chad Fowler feat: implement Fowler gap analysis — evaluations, pace layers, conceptual mass, negative knowledge, replacement audit 4w ago

f5187379

 1/**
 2 * Evaluation model — durable behavioral truth surface.
 3 *
 4 * Evaluations bind to behavior at IU boundaries, not to implementation internals.
 5 * They survive regeneration. The separating question: "Would this assertion still
 6 * be meaningful if the entire implementation were replaced tomorrow?"
 7 *
 8 * Distinct from implementation tests, which die with the code they describe.
 9 * (See: Fowler, The Phoenix Architecture, Chapter 5)
10 */
11
12/** What the evaluation asserts about */
13export type EvaluationBinding =
14  | 'domain_rule'        // business logic invariant
15  | 'boundary_contract'  // input/output shape at IU boundary
16  | 'constraint'         // latency, throughput, error rate
17  | 'invariant'          // property that holds across all states
18  | 'failure_mode';      // behavior under error conditions
19
20/** How confident we are the evaluation captures real behavior */
21export type EvaluationOrigin =
22  | 'specified'          // derived from spec/intent
23  | 'characterization'   // captured from existing implementation (legacy)
24  | 'incident'           // added after a production incident
25  | 'audit';             // added during evaluation audit
26
27export interface Evaluation {
28  /** Unique ID, content-addressed */
29  eval_id: string;
30  /** Human-readable name */
31  name: string;
32  /** Which IU boundary this evaluates */
33  iu_id: string;
34  /** What this evaluation binds to */
35  binding: EvaluationBinding;
36  /** How this evaluation was created */
37  origin: EvaluationOrigin;
38  /** Behavioral assertion in human-readable form */
39  assertion: string;
40  /**
41   * Given/When/Then specification:
42   * - given: preconditions
43   * - when: action at the boundary
44   * - then: expected observable outcome
45   */
46  given: string;
47  when: string;
48  then: string;
49  /** Canonical node IDs this evaluation covers */
50  canon_ids: string[];
51  /** Whether this is a conservation-layer evaluation (surface stability) */
52  conservation: boolean;
53  /** Provenance: why this evaluation exists */
54  rationale?: string;
55  /** Link to incident/decision that motivated this */
56  provenance_ref?: string;
57  /** Created timestamp */
58  created_at: string;
59  /** Last verified timestamp */
60  last_verified_at?: string;
61  /** Status of last verification */
62  last_status?: 'pass' | 'fail' | 'untested';
63}
64
65/**
66 * Evaluation coverage report for an IU
67 */
68export interface EvaluationCoverage {
69  iu_id: string;
70  iu_name: string;
71  total_evaluations: number;
72  by_binding: Record<EvaluationBinding, number>;
73  by_origin: Record<EvaluationOrigin, number>;
74  canon_ids_covered: string[];
75  canon_ids_uncovered: string[];
76  coverage_ratio: number;
77  conservation_count: number;
78  /** Gap analysis */
79  gaps: EvaluationGap[];
80}
81
82export interface EvaluationGap {
83  category: 'missing_boundary' | 'missing_invariant' | 'missing_failure_mode' | 'untested' | 'stale';
84  subject: string;
85  message: string;
86  recommended_action: string;
87}