Reference implementation for the Phoenix Architecture. Work in progress. aicoding.leaflet.pub/
ai coding crazy
at main 58 lines 1.4 kB view raw
1/** 2 * Semantic hashing for clauses. 3 * 4 * Two hash types: 5 * - clause_semhash: content identity (normalized text only) 6 * - context_semhash_cold: local structural context (content + section + neighbors) 7 */ 8 9import { createHash } from 'node:crypto'; 10 11/** 12 * Compute SHA-256 hex digest of input string. 13 */ 14export function sha256(input: string): string { 15 return createHash('sha256').update(input, 'utf8').digest('hex'); 16} 17 18/** 19 * Compute clause_semhash — pure content identity. 20 */ 21export function clauseSemhash(normalizedText: string): string { 22 return sha256(normalizedText); 23} 24 25/** 26 * Compute context_semhash_cold — content + local structural context. 27 * 28 * Includes: 29 * - normalized text 30 * - section path (heading hierarchy) 31 * - previous clause's semhash (or empty string) 32 * - next clause's semhash (or empty string) 33 */ 34export function contextSemhashCold( 35 normalizedText: string, 36 sectionPath: string[], 37 prevClauseSemhash: string, 38 nextClauseSemhash: string, 39): string { 40 const parts = [ 41 normalizedText, 42 sectionPath.join('/'), 43 prevClauseSemhash, 44 nextClauseSemhash, 45 ]; 46 return sha256(parts.join('\x00')); 47} 48 49/** 50 * Compute content-addressed clause ID. 51 */ 52export function clauseId( 53 sourceDocId: string, 54 sectionPath: string[], 55 normalizedText: string, 56): string { 57 return sha256([sourceDocId, sectionPath.join('/'), normalizedText].join('\x00')); 58}