Reference implementation for the Phoenix Architecture. Work in progress.
aicoding.leaflet.pub/
ai
coding
crazy
1/**
2 * Semantic hashing for clauses.
3 *
4 * Two hash types:
5 * - clause_semhash: content identity (normalized text only)
6 * - context_semhash_cold: local structural context (content + section + neighbors)
7 */
8
9import { createHash } from 'node:crypto';
10
11/**
12 * Compute SHA-256 hex digest of input string.
13 */
14export function sha256(input: string): string {
15 return createHash('sha256').update(input, 'utf8').digest('hex');
16}
17
18/**
19 * Compute clause_semhash — pure content identity.
20 */
21export function clauseSemhash(normalizedText: string): string {
22 return sha256(normalizedText);
23}
24
25/**
26 * Compute context_semhash_cold — content + local structural context.
27 *
28 * Includes:
29 * - normalized text
30 * - section path (heading hierarchy)
31 * - previous clause's semhash (or empty string)
32 * - next clause's semhash (or empty string)
33 */
34export function contextSemhashCold(
35 normalizedText: string,
36 sectionPath: string[],
37 prevClauseSemhash: string,
38 nextClauseSemhash: string,
39): string {
40 const parts = [
41 normalizedText,
42 sectionPath.join('/'),
43 prevClauseSemhash,
44 nextClauseSemhash,
45 ];
46 return sha256(parts.join('\x00'));
47}
48
49/**
50 * Compute content-addressed clause ID.
51 */
52export function clauseId(
53 sourceDocId: string,
54 sectionPath: string[],
55 normalizedText: string,
56): string {
57 return sha256([sourceDocId, sectionPath.join('/'), normalizedText].join('\x00'));
58}