web updates · metaend.eth.xyz/fantasma@e5600d1

+5

.gitignore

··· 5 5 6 6 # Model weights 7 7 weights/ 8 + QwenTheBard/ 8 9 *.bin 10 + *.safetensors 11 + *.gguf 9 12 10 13 # Distribution 11 14 dist/ ··· 15 18 .gemini/ 16 19 .opencode/ 17 20 .claude/ 21 + 22 + FANTASMA-PRD.md 18 23 19 24 # OS 20 25 .DS_Store

+8 -15

dist.sh

··· 2 2 set -euo pipefail 3 3 4 4 ROOT="$(cd "$(dirname "$0")" && pwd)" 5 - DIST="$ROOT/dist" 5 + DIST="$ROOT/web/dist" 6 6 7 7 echo "=== Fantasma dist ===" 8 8 ··· 12 12 zig build wasm 13 13 echo " fantasma.wasm: $(du -h zig-out/bin/fantasma.wasm | cut -f1)" 14 14 15 - # Assemble dist/ 16 - echo "[2/3] Assembling dist/..." 17 - rm -rf "$DIST" 18 - mkdir -p "$DIST" 15 + # Copy WASM to public/ so Vite can serve it 16 + cp zig-out/bin/fantasma.wasm "$ROOT/web/public/" 19 17 20 - cp zig-out/bin/fantasma.wasm "$DIST/" 21 - cp web/index.html "$DIST/" 22 - cp web/style.css "$DIST/" 23 - cp web/app.js "$DIST/" 24 - cp web/oat.min.css "$DIST/" 25 - cp web/oat.min.js "$DIST/" 26 - cp web/sw.js "$DIST/" 27 - cp web/manifest.json "$DIST/" 28 - cp web/favicon.svg "$DIST/" 18 + # Build frontend with Vite 19 + echo "[2/3] Building frontend..." 20 + cd "$ROOT/web" 21 + bun run build 29 22 30 - echo " $(ls "$DIST" | wc -l) files -> dist/" 23 + echo " $(find "$DIST" -type f | wc -l) files -> web/dist/" 31 24 echo "" 32 25 ls -lh "$DIST" 33 26 echo ""

+1143

docs/plans/2026-03-06-int4-simd-inference-wiring.md

··· 1 + # INT4 Quantization + WASM SIMD + Inference Wiring 2 + 3 + > **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task. 4 + 5 + **Goal:** Shrink model from 1.75GB BF16 to ~400MB INT4, add WASM SIMD128 dot products, and wire the inference engine into the rewrite pipeline so `neutralize()` actually calls the model. 6 + 7 + **Architecture:** Python script converts BF16 safetensors to a custom Q4MF binary format (group_size=128, asymmetric quantization). Zig loads Q4MF and dequantizes on-the-fly in `Tensor.dot()`. SIMD128 via `@Vector(4, f32)` accelerates dot products for all data formats. An adapter function bridges the inference engine to `rewrite.zig`'s `InferenceFn` interface using file-level globals for model state. 8 + 9 + **Tech Stack:** Python 3 + numpy + safetensors (converter), Zig 0.15.2 (engine), WASM SIMD128 (browser perf) 10 + 11 + --- 12 + 13 + ### Task 1: Python Q4MF Converter Script 14 + 15 + **Files:** 16 + - Create: `scripts/convert_q4.py` 17 + 18 + **Context:** The Q4MF binary format packs weights as INT4 with per-group (128 elements) f16 scale and f16 zero point. Small tensors (numel <= 1024 or 1D) stay BF16/F32. Only `model.language_model.*` tensors are included (skip `mtp.*` tensors — those are multi-token prediction heads, unused for inference). 19 + 20 + Q4MF format: 21 + ``` 22 + Header (16 bytes): 23 + magic: u32 LE = 0x46344D51 ("Q4MF") 24 + version: u32 LE = 1 25 + num_tensors: u32 LE 26 + group_size: u32 LE = 128 27 + 28 + Per tensor: 29 + name_len: u16 LE 30 + name: [name_len] bytes (UTF-8) 31 + ndim: u8 32 + shape: [ndim] u32 LE 33 + dtype: u8 (0=Q4_GROUP, 1=F32, 2=BF16) 34 + data_len: u32 LE 35 + data: [data_len] bytes 36 + 37 + Q4_GROUP data layout (per group of 128 weights): 38 + scale: f16 LE (2 bytes) 39 + zero: f16 LE (2 bytes) 40 + packed: 64 bytes (128 nibbles, 2 per byte, low nibble first) 41 + Total: 68 bytes per group 42 + 43 + Asymmetric quantization: 44 + scale = (max - min) / 15.0 45 + zero = min 46 + quantized = clamp(round((val - zero) / scale), 0, 15) 47 + dequantized = quantized * scale + zero 48 + ``` 49 + 50 + **Step 1: Write the converter script** 51 + 52 + ```python 53 + #!/usr/bin/env python3 54 + """Convert BF16 safetensors to INT4 group-quantized Q4MF format.""" 55 + import struct 56 + import sys 57 + import json 58 + import numpy as np 59 + 60 + MAGIC = 0x46344D51 61 + VERSION = 1 62 + GROUP_SIZE = 128 63 + DTYPE_Q4 = 0 64 + DTYPE_F32 = 1 65 + DTYPE_BF16 = 2 66 + MIN_QUANTIZE_NUMEL = 1024 67 + 68 + 69 + def bf16_to_f32(raw: bytes) -> np.ndarray: 70 + u16 = np.frombuffer(raw, dtype=np.uint16) 71 + u32 = u16.astype(np.uint32) << 16 72 + return u32.view(np.float32) 73 + 74 + 75 + def quantize_group(values: np.ndarray) -> tuple[np.float16, np.float16, bytes]: 76 + vmin, vmax = values.min(), values.max() 77 + if vmax == vmin: 78 + return np.float16(0.0), np.float16(vmin), bytes(len(values) // 2) 79 + 80 + scale = np.float16((vmax - vmin) / 15.0) 81 + zero = np.float16(vmin) 82 + scale_f32, zero_f32 = float(scale), float(zero) 83 + 84 + if scale_f32 == 0.0: 85 + return scale, zero, bytes(len(values) // 2) 86 + 87 + q = np.clip(np.round((values - zero_f32) / scale_f32), 0, 15).astype(np.uint8) 88 + packed = q[0::2] | (q[1::2] << 4) 89 + return scale, zero, packed.tobytes() 90 + 91 + 92 + def convert(input_path: str, output_path: str): 93 + with open(input_path, "rb") as f: 94 + header_len = struct.unpack("<Q", f.read(8))[0] 95 + header = json.loads(f.read(header_len)) 96 + data_start = 8 + header_len 97 + 98 + tensor_meta = {k: v for k, v in header.items() if k != "__metadata__"} 99 + 100 + # Only keep model.language_model.* tensors (skip mtp.*) 101 + model_tensors = { 102 + k: v for k, v in tensor_meta.items() if k.startswith("model.language_model.") 103 + } 104 + 105 + tensors = [] 106 + with open(input_path, "rb") as f: 107 + for name in sorted(model_tensors): 108 + info = model_tensors[name] 109 + shape = info["shape"] 110 + dtype_str = info["dtype"] 111 + offsets = info["data_offsets"] 112 + data_len = offsets[1] - offsets[0] 113 + 114 + f.seek(data_start + offsets[0]) 115 + raw = f.read(data_len) 116 + 117 + numel = 1 118 + for s in shape: 119 + numel *= s 120 + 121 + if dtype_str == "F32": 122 + tensors.append((name, shape, DTYPE_F32, raw)) 123 + print(f" F32 {name}: {shape}") 124 + elif numel > MIN_QUANTIZE_NUMEL and len(shape) >= 2: 125 + values = bf16_to_f32(raw) 126 + # Pad to multiple of GROUP_SIZE 127 + remainder = len(values) % GROUP_SIZE 128 + if remainder != 0: 129 + values = np.concatenate( 130 + [values, np.zeros(GROUP_SIZE - remainder, dtype=np.float32)] 131 + ) 132 + 133 + num_groups = len(values) // GROUP_SIZE 134 + q4_data = bytearray() 135 + for g in range(num_groups): 136 + group = values[g * GROUP_SIZE : (g + 1) * GROUP_SIZE] 137 + scale, zero, packed = quantize_group(group) 138 + q4_data += struct.pack("<e", scale) 139 + q4_data += struct.pack("<e", zero) 140 + q4_data += packed 141 + 142 + tensors.append((name, shape, DTYPE_Q4, bytes(q4_data))) 143 + ratio = len(q4_data) / data_len * 100 144 + print(f" Q4 {name}: {shape} ({ratio:.0f}% of original)") 145 + else: 146 + tensors.append((name, shape, DTYPE_BF16, raw)) 147 + print(f" BF16 {name}: {shape}") 148 + 149 + with open(output_path, "wb") as f: 150 + f.write(struct.pack("<IIII", MAGIC, VERSION, len(tensors), GROUP_SIZE)) 151 + 152 + for name, shape, dtype, data in tensors: 153 + name_bytes = name.encode("utf-8") 154 + f.write(struct.pack("<H", len(name_bytes))) 155 + f.write(name_bytes) 156 + f.write(struct.pack("<B", len(shape))) 157 + for s in shape: 158 + f.write(struct.pack("<I", s)) 159 + f.write(struct.pack("<B", dtype)) 160 + f.write(struct.pack("<I", len(data))) 161 + f.write(data) 162 + 163 + import os 164 + 165 + size_mb = os.path.getsize(output_path) / 1e6 166 + print(f"\nWrote {output_path}: {len(tensors)} tensors, {size_mb:.1f} MB") 167 + 168 + 169 + if __name__ == "__main__": 170 + input_path = ( 171 + sys.argv[1] 172 + if len(sys.argv) > 1 173 + else "QwenTheBard/model.safetensors-00001-of-00001.safetensors" 174 + ) 175 + output_path = sys.argv[2] if len(sys.argv) > 2 else "QwenTheBard/model.q4" 176 + convert(input_path, output_path) 177 + ``` 178 + 179 + **Step 2: Run the converter and verify output** 180 + 181 + ```bash 182 + python3 scripts/convert_q4.py 183 + # Expected: prints each tensor with Q4/BF16/F32 label 184 + # Expected: "Wrote QwenTheBard/model.q4: ~200 tensors, ~400 MB" 185 + ls -lh QwenTheBard/model.q4 186 + # Expected: ~400M file 187 + ``` 188 + 189 + **Step 3: Verify Q4MF header is parseable** 190 + 191 + ```bash 192 + python3 -c " 193 + import struct 194 + with open('QwenTheBard/model.q4', 'rb') as f: 195 + magic, version, num_tensors, group_size = struct.unpack('<IIII', f.read(16)) 196 + assert magic == 0x46344D51, f'Bad magic: {hex(magic)}' 197 + assert version == 1 198 + assert group_size == 128 199 + print(f'Q4MF v{version}: {num_tensors} tensors, group_size={group_size}') 200 + # Read first tensor to verify structure 201 + name_len = struct.unpack('<H', f.read(2))[0] 202 + name = f.read(name_len).decode() 203 + ndim = struct.unpack('<B', f.read(1))[0] 204 + shape = [struct.unpack('<I', f.read(4))[0] for _ in range(ndim)] 205 + dtype = struct.unpack('<B', f.read(1))[0] 206 + data_len = struct.unpack('<I', f.read(4))[0] 207 + dtypes = ['Q4', 'F32', 'BF16'] 208 + print(f'First tensor: {name} shape={shape} dtype={dtypes[dtype]} data={data_len} bytes') 209 + " 210 + # Expected: valid header, first tensor name matches model.language_model.* 211 + ``` 212 + 213 + **Step 4: Commit** 214 + 215 + ```bash 216 + git add scripts/convert_q4.py 217 + git commit -m "feat: add Python Q4MF converter for INT4 quantization" 218 + ``` 219 + 220 + --- 221 + 222 + ### Task 2: Q4 Data Structures and Dequantization in Zig 223 + 224 + **Files:** 225 + - Modify: `src/inference/quantized.zig` 226 + 227 + **Context:** Add `Q4Data` struct to `Tensor` and implement element-wise and group-wise dequantization. The `get()` and `dot()` methods must transparently handle Q4 data alongside existing BF16/F32 paths. 228 + 229 + **Step 1: Write failing test for Q4 dequantization** 230 + 231 + Add at the bottom of `src/inference/quantized.zig`: 232 + 233 + ```zig 234 + test "q4 dequantization" { 235 + // Manually construct a single group of 128 elements: 236 + // scale = 1.0 (f16), zero = 0.0 (f16), nibbles all encode their index mod 16 237 + var group_data: [68]u8 = undefined; 238 + 239 + // scale = 1.0 as f16 = 0x3C00 240 + group_data[0] = 0x00; 241 + group_data[1] = 0x3C; 242 + // zero = 0.0 as f16 = 0x0000 243 + group_data[2] = 0x00; 244 + group_data[3] = 0x00; 245 + 246 + // Pack nibbles: element 2i = i%16, element 2i+1 = (i+1)%16 247 + // Actually, simpler: set all nibbles to known values 248 + // byte[j] = low_nibble | (high_nibble << 4) 249 + // elements 0,1 → byte 0; elements 2,3 → byte 1; etc. 250 + for (0..64) |j| { 251 + const low: u8 = @intCast((j * 2) % 16); 252 + const high: u8 = @intCast((j * 2 + 1) % 16); 253 + group_data[4 + j] = low | (high << 4); 254 + } 255 + 256 + const q4 = Q4Data{ 257 + .raw = &group_data, 258 + .num_elements = 128, 259 + .group_size = 128, 260 + }; 261 + 262 + // Element 0: nibble=0, dequant = 0 * 1.0 + 0.0 = 0.0 263 + try std.testing.expectApproxEqAbs(@as(f32, 0.0), q4.get(0), 1e-3); 264 + // Element 1: nibble=1, dequant = 1 * 1.0 + 0.0 = 1.0 265 + try std.testing.expectApproxEqAbs(@as(f32, 1.0), q4.get(1), 1e-3); 266 + // Element 14: nibble=14, dequant = 14.0 267 + try std.testing.expectApproxEqAbs(@as(f32, 14.0), q4.get(14), 1e-3); 268 + // Element 15: nibble=15, dequant = 15.0 269 + try std.testing.expectApproxEqAbs(@as(f32, 15.0), q4.get(15), 1e-3); 270 + // Element 16: nibble=0 (wraps mod 16), dequant = 0.0 271 + try std.testing.expectApproxEqAbs(@as(f32, 0.0), q4.get(16), 1e-3); 272 + } 273 + 274 + test "q4 dequantization with offset" { 275 + // Two groups: second group has scale=2.0, zero=10.0 276 + var data: [68 * 2]u8 = undefined; 277 + 278 + // Group 0: scale=1.0, zero=0.0, all nibbles=5 279 + data[0] = 0x00; 280 + data[1] = 0x3C; // f16 1.0 281 + data[2] = 0x00; 282 + data[3] = 0x00; // f16 0.0 283 + @memset(data[4..68], 0x55); // nibble 5 in both low and high 284 + 285 + // Group 1: scale=2.0 (0x4000), zero=10.0 (0x4900) 286 + data[68] = 0x00; 287 + data[69] = 0x40; // f16 2.0 288 + data[70] = 0x00; 289 + data[71] = 0x49; // f16 10.0 290 + @memset(data[72..136], 0x33); // nibble 3 in both low and high 291 + 292 + const q4 = Q4Data{ 293 + .raw = &data, 294 + .num_elements = 256, 295 + .group_size = 128, 296 + }; 297 + 298 + // Group 0, element 0: 5 * 1.0 + 0.0 = 5.0 299 + try std.testing.expectApproxEqAbs(@as(f32, 5.0), q4.get(0), 1e-2); 300 + // Group 1, element 128: 3 * 2.0 + 10.0 = 16.0 301 + try std.testing.expectApproxEqAbs(@as(f32, 16.0), q4.get(128), 1e-1); 302 + } 303 + ``` 304 + 305 + **Step 2: Run test to verify it fails** 306 + 307 + ```bash 308 + cd /home/christian/dev/klearu/klearu-sdc/fantasma-zig && zig build test 2>&1 | tail -20 309 + # Expected: FAIL — Q4Data not defined 310 + ``` 311 + 312 + **Step 3: Implement Q4Data struct and update Tensor** 313 + 314 + Add `Q4Data` struct after the `Tensor` struct in `quantized.zig`, then add the `data_q4` field to `Tensor`, and update `get()` and `dot()`: 315 + 316 + ```zig 317 + // Add to Tensor struct: 318 + pub const Tensor = struct { 319 + data_bf16: ?[]const u16 = null, 320 + data_f32: ?[]const f32 = null, 321 + data_q4: ?Q4Data = null, // ADD THIS FIELD 322 + shape: [4]u32 = .{ 0, 0, 0, 0 }, 323 + ndim: u8 = 0, 324 + 325 + // ... numel() unchanged ... 326 + 327 + pub inline fn get(self: *const Tensor, idx: usize) f32 { 328 + if (self.data_f32) |f| return f[idx]; 329 + if (self.data_bf16) |b| return bf16ToF32(b[idx]); 330 + if (self.data_q4) |*q| return q.get(idx); // ADD THIS LINE 331 + return 0.0; 332 + } 333 + 334 + pub fn dot(self: *const Tensor, offset: usize, other: []const f32) f32 { 335 + const n = other.len; 336 + 337 + if (self.data_f32) |f| { 338 + return simdDotF32(f[offset .. offset + n], other); 339 + } 340 + 341 + if (self.data_bf16) |b| { 342 + return simdDotBf16(b[offset .. offset + n], other); 343 + } 344 + 345 + if (self.data_q4) |*q| { 346 + return q.dot(offset, other); 347 + } 348 + 349 + return 0.0; 350 + } 351 + }; 352 + 353 + // Add after Tensor struct: 354 + pub const Q4Data = struct { 355 + raw: []const u8, 356 + num_elements: u32, 357 + group_size: u32, 358 + 359 + const GROUP_HEADER: usize = 4; // 2 bytes scale + 2 bytes zero 360 + const NIBBLES_PER_BYTE: usize = 2; 361 + 362 + fn groupBytes(self: *const Q4Data) usize { 363 + return GROUP_HEADER + self.group_size / NIBBLES_PER_BYTE; 364 + } 365 + 366 + pub inline fn get(self: *const Q4Data, idx: usize) f32 { 367 + const gb = self.groupBytes(); 368 + const group_idx = idx / self.group_size; 369 + const within = idx % self.group_size; 370 + const group_offset = group_idx * gb; 371 + 372 + const scale = f16ToF32(std.mem.readInt(u16, self.raw[group_offset..][0..2], .little)); 373 + const zero = f16ToF32(std.mem.readInt(u16, self.raw[group_offset + 2 ..][0..2], .little)); 374 + 375 + const byte_idx = within / 2; 376 + const packed_byte = self.raw[group_offset + GROUP_HEADER + byte_idx]; 377 + const nibble: u8 = if (within % 2 == 0) packed_byte & 0x0F else packed_byte >> 4; 378 + 379 + return @as(f32, @floatFromInt(nibble)) * scale + zero; 380 + } 381 + 382 + pub fn dot(self: *const Q4Data, offset: usize, other: []const f32) f32 { 383 + const n = other.len; 384 + const gs = self.group_size; 385 + const gb = self.groupBytes(); 386 + var sum: f32 = 0.0; 387 + 388 + var elem_idx = offset; 389 + var other_idx: usize = 0; 390 + 391 + while (other_idx < n) { 392 + const group_idx = elem_idx / gs; 393 + const within_start = elem_idx % gs; 394 + const group_offset = group_idx * gb; 395 + const remaining_in_group = gs - within_start; 396 + const chunk = @min(remaining_in_group, n - other_idx); 397 + 398 + const scale = f16ToF32(std.mem.readInt(u16, self.raw[group_offset..][0..2], .little)); 399 + const zero = f16ToF32(std.mem.readInt(u16, self.raw[group_offset + 2 ..][0..2], .little)); 400 + const packed = self.raw[group_offset + GROUP_HEADER ..][0 .. gs / 2]; 401 + 402 + // Dequantize chunk into stack buffer and SIMD dot 403 + var deq_buf: [256]f32 = undefined; // max group_size=256 404 + const deq = deq_buf[0..chunk]; 405 + 406 + for (0..chunk) |i| { 407 + const wi = within_start + i; 408 + const byte_idx = wi / 2; 409 + const b = packed[byte_idx]; 410 + const nibble: u8 = if (wi % 2 == 0) b & 0x0F else b >> 4; 411 + deq[i] = @as(f32, @floatFromInt(nibble)) * scale + zero; 412 + } 413 + 414 + sum += simdDotF32(deq, other[other_idx .. other_idx + chunk]); 415 + 416 + elem_idx += chunk; 417 + other_idx += chunk; 418 + } 419 + 420 + return sum; 421 + } 422 + }; 423 + ``` 424 + 425 + Also add the `f16ToF32` helper near the top of the file: 426 + 427 + ```zig 428 + /// Convert a single F16 value (u16 bits) to F32. 429 + pub inline fn f16ToF32(bits: u16) f32 { 430 + return @floatCast(@as(f16, @bitCast(bits))); 431 + } 432 + ``` 433 + 434 + **Step 4: Run tests to verify they pass** 435 + 436 + ```bash 437 + cd /home/christian/dev/klearu/klearu-sdc/fantasma-zig && zig build test 2>&1 | tail -5 438 + # Expected: All tests pass including q4 dequantization tests 439 + ``` 440 + 441 + **Step 5: Commit** 442 + 443 + ```bash 444 + git add src/inference/quantized.zig 445 + git commit -m "feat: add Q4Data struct with INT4 group dequantization" 446 + ``` 447 + 448 + --- 449 + 450 + ### Task 3: SIMD Dot Products 451 + 452 + **Files:** 453 + - Modify: `src/inference/quantized.zig` 454 + 455 + **Context:** Replace the scalar dot product loops with `@Vector(4, f32)` SIMD. This compiles to WASM SIMD128 on wasm32 and SSE/AVX on x86. Three functions: `simdDotF32` (for F32 tensors), `simdDotBf16` (for BF16 tensors, dequantizes then SIMD), and the Q4 path already uses `simdDotF32` via the dequantize-then-dot approach from Task 2. 456 + 457 + **Step 1: Write failing test for SIMD dot product** 458 + 459 + Add at the bottom of `src/inference/quantized.zig`: 460 + 461 + ```zig 462 + test "simd dot f32" { 463 + const a = [_]f32{ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0 }; 464 + const b = [_]f32{ 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0 }; 465 + const result = simdDotF32(&a, &b); 466 + // 9+16+21+24+25+24+21+16+9 = 165 467 + try std.testing.expectApproxEqAbs(@as(f32, 165.0), result, 1e-4); 468 + } 469 + 470 + test "simd dot bf16" { 471 + // 1.0 in bf16 = 0x3F80, 2.0 = 0x4000 472 + const a = [_]u16{ 0x3F80, 0x4000, 0x3F80, 0x4000 }; 473 + const b = [_]f32{ 1.0, 1.0, 1.0, 1.0 }; 474 + const result = simdDotBf16(&a, &b); 475 + // 1*1 + 2*1 + 1*1 + 2*1 = 6.0 476 + try std.testing.expectApproxEqAbs(@as(f32, 6.0), result, 1e-4); 477 + } 478 + ``` 479 + 480 + **Step 2: Run test to verify it fails** 481 + 482 + ```bash 483 + cd /home/christian/dev/klearu/klearu-sdc/fantasma-zig && zig build test 2>&1 | tail -10 484 + # Expected: FAIL — simdDotF32/simdDotBf16 not defined 485 + ``` 486 + 487 + **Step 3: Implement SIMD dot product functions** 488 + 489 + Add these functions in `quantized.zig` before the `matVec` function: 490 + 491 + ```zig 492 + /// SIMD-accelerated dot product for two f32 slices. 493 + /// Uses @Vector(4, f32) which compiles to WASM SIMD128 or SSE/AVX. 494 + pub fn simdDotF32(a: []const f32, b: []const f32) f32 { 495 + const n = a.len; 496 + const VF32x4 = @Vector(4, f32); 497 + var acc: VF32x4 = @splat(0.0); 498 + 499 + var i: usize = 0; 500 + while (i + 4 <= n) : (i += 4) { 501 + const va: VF32x4 = a[i..][0..4].*; 502 + const vb: VF32x4 = b[i..][0..4].*; 503 + acc += va * vb; 504 + } 505 + 506 + var sum = @reduce(.Add, acc); 507 + 508 + // Scalar tail 509 + while (i < n) : (i += 1) { 510 + sum += a[i] * b[i]; 511 + } 512 + 513 + return sum; 514 + } 515 + 516 + /// SIMD-accelerated dot product for BF16 weights against f32 input. 517 + /// Dequantizes BF16 in chunks of 4, then SIMD multiplies. 518 + pub fn simdDotBf16(a_bf16: []const u16, b: []const f32) f32 { 519 + const n = a_bf16.len; 520 + const VF32x4 = @Vector(4, f32); 521 + var acc: VF32x4 = @splat(0.0); 522 + 523 + var i: usize = 0; 524 + while (i + 4 <= n) : (i += 4) { 525 + const va = VF32x4{ 526 + bf16ToF32(a_bf16[i]), 527 + bf16ToF32(a_bf16[i + 1]), 528 + bf16ToF32(a_bf16[i + 2]), 529 + bf16ToF32(a_bf16[i + 3]), 530 + }; 531 + const vb: VF32x4 = b[i..][0..4].*; 532 + acc += va * vb; 533 + } 534 + 535 + var sum = @reduce(.Add, acc); 536 + 537 + while (i < n) : (i += 1) { 538 + sum += bf16ToF32(a_bf16[i]) * b[i]; 539 + } 540 + 541 + return sum; 542 + } 543 + ``` 544 + 545 + **Step 4: Update `Tensor.dot()` to use SIMD helpers** 546 + 547 + Replace the existing `Tensor.dot()` method with the version from Task 2 Step 3 (which calls `simdDotF32` and `simdDotBf16`). The full replacement: 548 + 549 + ```zig 550 + pub fn dot(self: *const Tensor, offset: usize, other: []const f32) f32 { 551 + const n = other.len; 552 + 553 + if (self.data_f32) |f| { 554 + return simdDotF32(f[offset .. offset + n], other); 555 + } 556 + 557 + if (self.data_bf16) |b| { 558 + return simdDotBf16(b[offset .. offset + n], other); 559 + } 560 + 561 + if (self.data_q4) |*q| { 562 + return q.dot(offset, other); 563 + } 564 + 565 + return 0.0; 566 + } 567 + ``` 568 + 569 + **Step 5: Run all tests** 570 + 571 + ```bash 572 + cd /home/christian/dev/klearu/klearu-sdc/fantasma-zig && zig build test 2>&1 | tail -5 573 + # Expected: All tests pass including SIMD dot tests 574 + ``` 575 + 576 + **Step 6: Commit** 577 + 578 + ```bash 579 + git add src/inference/quantized.zig 580 + git commit -m "feat: add SIMD128 dot products for f32, bf16, and q4 tensors" 581 + ``` 582 + 583 + --- 584 + 585 + ### Task 4: Q4MF Loader in model.zig 586 + 587 + **Files:** 588 + - Modify: `src/inference/model.zig` 589 + - Modify: `src/inference/quantized.zig` (add Q4MF parser) 590 + 591 + **Context:** Parse the Q4MF binary format and construct `ModelWeights` with `Tensor` views pointing into the loaded data. For BF16/F32 tensors in the Q4MF file, copy to aligned allocations since the packed file has no alignment guarantees. For Q4 tensors, store as `Q4Data` pointing directly into the file buffer. 592 + 593 + **Step 1: Add Q4MF format constants and parser to `quantized.zig`** 594 + 595 + Add at the bottom of `quantized.zig` before the tests: 596 + 597 + ```zig 598 + // ============================================================ 599 + // Q4MF format parser 600 + // ============================================================ 601 + 602 + pub const Q4MF_MAGIC: u32 = 0x46344D51; 603 + pub const Q4MF_VERSION: u32 = 1; 604 + 605 + pub const Q4MFTensorInfo = struct { 606 + name: []const u8, 607 + shape: [4]u32, 608 + ndim: u8, 609 + dtype: u8, // 0=Q4, 1=F32, 2=BF16 610 + data: []const u8, 611 + }; 612 + 613 + pub const Q4MFHeader = struct { 614 + num_tensors: u32, 615 + group_size: u32, 616 + tensors: []Q4MFTensorInfo, 617 + }; 618 + 619 + /// Parse a Q4MF file buffer and return tensor metadata. 620 + /// The returned tensor infos point into file_data (zero-copy for data). 621 + pub fn parseQ4MF( 622 + allocator: std.mem.Allocator, 623 + file_data: []const u8, 624 + ) !Q4MFHeader { 625 + if (file_data.len < 16) return SafetensorsError.InvalidFormat; 626 + 627 + const magic = std.mem.readInt(u32, file_data[0..4], .little); 628 + if (magic != Q4MF_MAGIC) return SafetensorsError.InvalidFormat; 629 + 630 + const version = std.mem.readInt(u32, file_data[4..8], .little); 631 + if (version != Q4MF_VERSION) return SafetensorsError.InvalidFormat; 632 + 633 + const num_tensors = std.mem.readInt(u32, file_data[8..12], .little); 634 + const group_size = std.mem.readInt(u32, file_data[12..16], .little); 635 + 636 + const tensors = try allocator.alloc(Q4MFTensorInfo, num_tensors); 637 + 638 + var pos: usize = 16; 639 + for (0..num_tensors) |ti| { 640 + if (pos + 2 > file_data.len) return SafetensorsError.InvalidFormat; 641 + const name_len = std.mem.readInt(u16, file_data[pos..][0..2], .little); 642 + pos += 2; 643 + 644 + const name = file_data[pos .. pos + name_len]; 645 + pos += name_len; 646 + 647 + const ndim = file_data[pos]; 648 + pos += 1; 649 + 650 + var shape: [4]u32 = .{ 0, 0, 0, 0 }; 651 + for (0..ndim) |d| { 652 + shape[d] = std.mem.readInt(u32, file_data[pos..][0..4], .little); 653 + pos += 4; 654 + } 655 + 656 + const dtype = file_data[pos]; 657 + pos += 1; 658 + 659 + const data_len = std.mem.readInt(u32, file_data[pos..][0..4], .little); 660 + pos += 4; 661 + 662 + const data = file_data[pos .. pos + data_len]; 663 + pos += data_len; 664 + 665 + tensors[ti] = .{ 666 + .name = name, 667 + .shape = shape, 668 + .ndim = ndim, 669 + .dtype = dtype, 670 + .data = data, 671 + }; 672 + } 673 + 674 + return .{ 675 + .num_tensors = num_tensors, 676 + .group_size = group_size, 677 + .tensors = tensors, 678 + }; 679 + } 680 + 681 + /// Create a Tensor from Q4MF tensor info. 682 + /// For Q4 data, creates a Q4Data view. For F32/BF16, copies to aligned allocation. 683 + pub fn tensorFromQ4MF(info: *const Q4MFTensorInfo, group_size: u32, allocator: std.mem.Allocator) !Tensor { 684 + var t = Tensor{ 685 + .shape = info.shape, 686 + .ndim = info.ndim, 687 + }; 688 + 689 + var numel: u32 = 1; 690 + for (info.shape[0..info.ndim]) |s| numel *= s; 691 + 692 + switch (info.dtype) { 693 + 0 => { 694 + // Q4 — zero-copy view into file data 695 + t.data_q4 = .{ 696 + .raw = info.data, 697 + .num_elements = numel, 698 + .group_size = group_size, 699 + }; 700 + }, 701 + 1 => { 702 + // F32 — copy to aligned allocation 703 + const f32_data = try allocator.alloc(f32, numel); 704 + for (0..numel) |i| { 705 + const bits = std.mem.readInt(u32, info.data[i * 4 ..][0..4], .little); 706 + f32_data[i] = @bitCast(bits); 707 + } 708 + t.data_f32 = f32_data; 709 + }, 710 + 2 => { 711 + // BF16 — copy to aligned allocation 712 + const bf16_data = try allocator.alloc(u16, numel); 713 + for (0..numel) |i| { 714 + bf16_data[i] = std.mem.readInt(u16, info.data[i * 2 ..][0..2], .little); 715 + } 716 + t.data_bf16 = bf16_data; 717 + }, 718 + else => return SafetensorsError.UnsupportedDtype, 719 + } 720 + 721 + return t; 722 + } 723 + ``` 724 + 725 + **Step 2: Add `loadWeightsQ4` to `model.zig`** 726 + 727 + Add this function after `loadWeights` in `model.zig`: 728 + 729 + ```zig 730 + /// Load model weights from Q4MF format (INT4 quantized). 731 + /// file_data must remain valid for the lifetime of the returned ModelWeights. 732 + pub fn loadWeightsQ4(allocator: std.mem.Allocator, file_data: []const u8, max_seq_len: u32) !ModelWeights { 733 + const header = try q.parseQ4MF(allocator, file_data); 734 + // Note: header.tensors is allocated but we don't free it — small and needed during loading 735 + 736 + var weights: ModelWeights = undefined; 737 + weights.allocator = allocator; 738 + 739 + // Embedding 740 + weights.embed_tokens = try findAndLoadQ4(header, "model.language_model.embed_tokens.weight", allocator); 741 + weights.norm = try findAndLoadQ4(header, "model.language_model.norm.weight", allocator); 742 + 743 + // Layers 744 + for (0..Config.num_layers) |layer_idx| { 745 + var lw: LayerWeights = undefined; 746 + 747 + lw.input_layernorm = try findAndLoadLayerQ4(header, layer_idx, "input_layernorm.weight", allocator); 748 + lw.post_attention_layernorm = try findAndLoadLayerQ4(header, layer_idx, "post_attention_layernorm.weight", allocator); 749 + 750 + lw.gate_proj = try findAndLoadLayerQ4(header, layer_idx, "mlp.gate_proj.weight", allocator); 751 + lw.up_proj = try findAndLoadLayerQ4(header, layer_idx, "mlp.up_proj.weight", allocator); 752 + lw.down_proj = try findAndLoadLayerQ4(header, layer_idx, "mlp.down_proj.weight", allocator); 753 + 754 + if (Config.layer_is_full_attn[layer_idx]) { 755 + lw.full_attn = FullAttentionWeights{ 756 + .q_proj = try findAndLoadLayerQ4(header, layer_idx, "self_attn.q_proj.weight", allocator), 757 + .k_proj = try findAndLoadLayerQ4(header, layer_idx, "self_attn.k_proj.weight", allocator), 758 + .v_proj = try findAndLoadLayerQ4(header, layer_idx, "self_attn.v_proj.weight", allocator), 759 + .o_proj = try findAndLoadLayerQ4(header, layer_idx, "self_attn.o_proj.weight", allocator), 760 + .q_norm = try findAndLoadLayerQ4(header, layer_idx, "self_attn.q_norm.weight", allocator), 761 + .k_norm = try findAndLoadLayerQ4(header, layer_idx, "self_attn.k_norm.weight", allocator), 762 + }; 763 + lw.delta_net = null; 764 + } else { 765 + lw.delta_net = DeltaNetWeights{ 766 + .in_proj_qkv = try findAndLoadLayerQ4(header, layer_idx, "linear_attn.in_proj_qkv.weight", allocator), 767 + .in_proj_z = try findAndLoadLayerQ4(header, layer_idx, "linear_attn.in_proj_z.weight", allocator), 768 + .in_proj_a = try findAndLoadLayerQ4(header, layer_idx, "linear_attn.in_proj_a.weight", allocator), 769 + .in_proj_b = try findAndLoadLayerQ4(header, layer_idx, "linear_attn.in_proj_b.weight", allocator), 770 + .conv1d = try findAndLoadLayerQ4(header, layer_idx, "linear_attn.conv1d.weight", allocator), 771 + .A_log = try findAndLoadLayerQ4(header, layer_idx, "linear_attn.A_log", allocator), 772 + .dt_bias = try findAndLoadLayerQ4(header, layer_idx, "linear_attn.dt_bias", allocator), 773 + .norm = try findAndLoadLayerQ4(header, layer_idx, "linear_attn.norm.weight", allocator), 774 + .out_proj = try findAndLoadLayerQ4(header, layer_idx, "linear_attn.out_proj.weight", allocator), 775 + }; 776 + lw.full_attn = null; 777 + } 778 + 779 + weights.layers[layer_idx] = lw; 780 + } 781 + 782 + // Precompute RoPE cos/sin tables (identical to loadWeights) 783 + const rotary_dim = Config.rotary_dim; 784 + weights.rope_cos = try allocator.alloc(f32, max_seq_len * rotary_dim); 785 + weights.rope_sin = try allocator.alloc(f32, max_seq_len * rotary_dim); 786 + 787 + const half_dim = rotary_dim / 2; 788 + for (0..max_seq_len) |pos| { 789 + for (0..half_dim) |i| { 790 + const freq = 1.0 / std.math.pow(f32, Config.rope_theta, @as(f32, @floatFromInt(2 * i)) / @as(f32, @floatFromInt(rotary_dim))); 791 + const angle = @as(f32, @floatFromInt(pos)) * freq; 792 + const cos_val = @cos(angle); 793 + const sin_val = @sin(angle); 794 + weights.rope_cos[pos * rotary_dim + i] = cos_val; 795 + weights.rope_cos[pos * rotary_dim + half_dim + i] = cos_val; 796 + weights.rope_sin[pos * rotary_dim + i] = sin_val; 797 + weights.rope_sin[pos * rotary_dim + half_dim + i] = sin_val; 798 + } 799 + } 800 + 801 + return weights; 802 + } 803 + 804 + fn findAndLoadQ4(header: q.Q4MFHeader, name: []const u8, allocator: std.mem.Allocator) !Tensor { 805 + for (header.tensors) |*info| { 806 + if (std.mem.eql(u8, info.name, name)) { 807 + return try q.tensorFromQ4MF(info, header.group_size, allocator); 808 + } 809 + } 810 + return q.SafetensorsError.TensorNotFound; 811 + } 812 + 813 + fn findAndLoadLayerQ4(header: q.Q4MFHeader, layer_idx: usize, suffix: []const u8, allocator: std.mem.Allocator) !Tensor { 814 + var name_buf: [256]u8 = undefined; 815 + const name = std.fmt.bufPrint(&name_buf, "model.language_model.layers.{d}.{s}", .{ layer_idx, suffix }) catch return error.PathTooLong; 816 + return findAndLoadQ4(header, name, allocator); 817 + } 818 + ``` 819 + 820 + **Step 3: Update CLI to auto-detect Q4MF** 821 + 822 + In `src/main.zig`, in the `generate` command, replace the weight loading section (around lines 307-316) with auto-detection: 823 + 824 + ```zig 825 + // Try Q4 first, fall back to BF16 safetensors 826 + var q4_path_buf: [1024]u8 = undefined; 827 + const q4_path = std.fmt.bufPrint(&q4_path_buf, "{s}/model.q4", .{model_dir}) catch "QwenTheBard/model.q4"; 828 + 829 + const max_seq_len: u32 = 512; 830 + var weights: inference_model.ModelWeights = undefined; 831 + 832 + if (std.fs.cwd().openFile(q4_path, .{})) |q4_file| { 833 + const q4_size = try q4_file.getEndPos(); 834 + const q4_data = try allocator.alloc(u8, q4_size); 835 + const q4_read = try q4_file.readAll(q4_data); 836 + q4_file.close(); 837 + if (q4_read != q4_size) { 838 + try stderr.print("Incomplete Q4 read\n", .{}); 839 + try stderr.flush(); 840 + return; 841 + } 842 + weights = inference_model.loadWeightsQ4(allocator, q4_data, max_seq_len) catch |e| { 843 + try stderr.print("Failed to load Q4 weights: {}\n", .{e}); 844 + try stderr.flush(); 845 + return; 846 + }; 847 + try stderr.print("Loaded Q4 model from {s}\n", .{q4_path}); 848 + try stderr.flush(); 849 + } else |_| { 850 + try stderr.print("Loading BF16 model from {s}...\n", .{model_dir}); 851 + try stderr.flush(); 852 + weights = inference_model.loadWeights(allocator, model_dir, max_seq_len) catch |e| { 853 + try stderr.print("Failed to load weights: {}\n", .{e}); 854 + try stderr.flush(); 855 + return; 856 + }; 857 + } 858 + ``` 859 + 860 + Also remove the `_ = &weights;` line that follows the old weight loading. 861 + 862 + **Step 4: Build and test with Q4 model** 863 + 864 + ```bash 865 + cd /home/christian/dev/klearu/klearu-sdc/fantasma-zig && zig build test 2>&1 | tail -5 866 + # Expected: all tests pass (unit tests don't load actual model) 867 + 868 + zig build 869 + ./zig-out/bin/fantasma generate QwenTheBard "What is the capital of France?" 870 + # Expected: loads Q4 model (if model.q4 exists), generates coherent text 871 + # Compare output quality with BF16: should be similar but not identical 872 + ``` 873 + 874 + **Step 5: Commit** 875 + 876 + ```bash 877 + git add src/inference/quantized.zig src/inference/model.zig src/main.zig 878 + git commit -m "feat: add Q4MF loader with auto-detection (Q4 vs BF16)" 879 + ``` 880 + 881 + --- 882 + 883 + ### Task 5: Wire Inference into Rewrite Pipeline 884 + 885 + **Files:** 886 + - Modify: `src/main.zig` (add globals, adapter function, update WASM exports) 887 + - Modify: `src/neutralizer/rewrite.zig` (no changes needed — InferenceFn interface is ready) 888 + 889 + **Context:** Create an inference adapter function that matches the `InferenceFn = *const fn (prompt: []const u8, output_buf: []u8) usize` signature. It uses file-level globals for model state. Remove `wasm32` guards so inference compiles for both targets. Update WASM `init()` to accept both weights and tokenizer data, and `neutralize()` to call the real pipeline. 890 + 891 + **Step 1: Remove wasm32 conditional imports** 892 + 893 + In `src/main.zig`, replace lines 17-21: 894 + 895 + ```zig 896 + // OLD: 897 + pub const inference_model = if (builtin.target.cpu.arch != .wasm32) @import("inference/model.zig") else struct {}; 898 + // ... etc 899 + 900 + // NEW: 901 + pub const inference_model = @import("inference/model.zig"); 902 + pub const inference_tokenizer = @import("inference/tokenizer.zig"); 903 + pub const inference_sampler = @import("inference/sampler.zig"); 904 + pub const inference_kv_cache = @import("inference/kv_cache.zig"); 905 + pub const inference_quantized = @import("inference/quantized.zig"); 906 + ``` 907 + 908 + Also remove the `if (@import("builtin").target.cpu.arch != .wasm32)` guard around the comptime test block at the bottom. 909 + 910 + **Step 2: Add global model state and inference adapter** 911 + 912 + Add after the `output_buf`/`output_len` declarations in `main.zig`: 913 + 914 + ```zig 915 + // ============================================================ 916 + // Global model state (for inference adapter) 917 + // ============================================================ 918 + 919 + var g_weights: ?inference_model.ModelWeights = null; 920 + var g_tokenizer: ?inference_tokenizer.Tokenizer = null; 921 + var g_cache: ?inference_kv_cache.ModelCache = null; 922 + var g_fwd_state: ?inference_model.ForwardState = null; 923 + var g_sampler: inference_sampler.Sampler = inference_sampler.Sampler.init(0.7, 40, 42); 924 + 925 + const MAX_SEQ_LEN: u32 = 512; 926 + const MAX_NEW_TOKENS: u32 = 256; 927 + 928 + /// Inference adapter matching InferenceFn signature. 929 + /// Takes a plain text prompt (already formatted with MARKERS + TEXT), 930 + /// wraps in ChatML, tokenizes, runs forward passes, returns decoded bytes. 931 + fn inferenceAdapter(prompt: []const u8, out_buf: []u8) usize { 932 + const allocator = getAllocator(); 933 + const tokenizer = &(g_tokenizer orelse return 0); 934 + const weights = &(g_weights orelse return 0); 935 + var model_cache = &(g_cache orelse return 0); 936 + var fwd_state = &(g_fwd_state orelse return 0); 937 + 938 + // Reset cache for each independent neutralization 939 + model_cache.reset(); 940 + 941 + // Tokenize with ChatML wrapping 942 + var token_buf: [2048]u32 = undefined; 943 + const prompt_len = tokenizer.encodeChatML( 944 + "/no_think\nYou are a stylometric neutralizer. Rewrite the text to remove authorship markers while preserving meaning exactly.", 945 + prompt, 946 + allocator, 947 + &token_buf, 948 + ) catch return 0; 949 + 950 + const Config = inference_kv_cache.Config; 951 + 952 + // Prefill 953 + var pos: u32 = 0; 954 + for (token_buf[0..prompt_len]) |tok| { 955 + _ = inference_model.forward(weights, model_cache, fwd_state, tok, pos); 956 + pos += 1; 957 + } 958 + 959 + // Decode 960 + var out_pos: usize = 0; 961 + var generated: u32 = 0; 962 + var decode_tmp: [512]u8 = undefined; 963 + 964 + while (generated < MAX_NEW_TOKENS and pos < MAX_SEQ_LEN) { 965 + const logits_copy = allocator.alloc(f32, Config.vocab_size) catch return out_pos; 966 + defer allocator.free(logits_copy); 967 + @memcpy(logits_copy, fwd_state.logits); 968 + const next_token = g_sampler.sample(logits_copy); 969 + 970 + if (next_token == Config.eos_token_id or next_token == Config.im_end_id) break; 971 + 972 + // Decode token to bytes 973 + const byte_len = tokenizer.decodeToBytes(next_token, &decode_tmp); 974 + if (byte_len > 0 and out_pos + byte_len <= out_buf.len) { 975 + @memcpy(out_buf[out_pos .. out_pos + byte_len], decode_tmp[0..byte_len]); 976 + out_pos += byte_len; 977 + } 978 + 979 + _ = inference_model.forward(weights, model_cache, fwd_state, next_token, pos); 980 + pos += 1; 981 + generated += 1; 982 + } 983 + 984 + return out_pos; 985 + } 986 + ``` 987 + 988 + **Step 3: Update WASM exports** 989 + 990 + Replace the `init` export: 991 + 992 + ```zig 993 + /// Initialize model from Q4MF weight data and tokenizer JSON. 994 + /// For WASM: JS passes pointers from IndexedDB blobs. 995 + /// For native: called from CLI after reading files. 996 + export fn init(weights_ptr: [*]const u8, weights_len: u32) bool { 997 + return initFromData(weights_ptr[0..weights_len], null, null); 998 + } 999 + 1000 + /// Extended init that also loads tokenizer from raw JSON bytes. 1001 + export fn init_with_tokenizer( 1002 + weights_ptr: [*]const u8, 1003 + weights_len: u32, 1004 + tok_ptr: [*]const u8, 1005 + tok_len: u32, 1006 + ) bool { 1007 + return initFromData(weights_ptr[0..weights_len], tok_ptr, tok_len); 1008 + } 1009 + 1010 + fn initFromData(weights_data: []const u8, tok_ptr: ?[*]const u8, tok_len: ?u32) bool { 1011 + const allocator = getAllocator(); 1012 + 1013 + // Load tokenizer 1014 + if (tok_ptr) |tp| { 1015 + if (tok_len) |tl| { 1016 + g_tokenizer = inference_tokenizer.Tokenizer.loadFromJson(allocator, tp[0..tl]) catch return false; 1017 + } 1018 + } 1019 + 1020 + // Load Q4 weights 1021 + g_weights = inference_model.loadWeightsQ4(allocator, weights_data, MAX_SEQ_LEN) catch return false; 1022 + 1023 + // Initialize cache and forward state 1024 + g_cache = inference_kv_cache.ModelCache.init(allocator, MAX_SEQ_LEN) catch return false; 1025 + g_fwd_state = inference_model.ForwardState.init(allocator) catch return false; 1026 + 1027 + return true; 1028 + } 1029 + ``` 1030 + 1031 + Replace the `neutralize` export: 1032 + 1033 + ```zig 1034 + /// Full neutralization: extract markers, run model, verify. 1035 + export fn neutralize(text_ptr: [*]const u8, text_len: u32) bool { 1036 + const text = text_ptr[0..text_len]; 1037 + const allocator = getAllocator(); 1038 + 1039 + // If model not loaded, fall back to profile-only 1040 + if (g_weights == null or g_tokenizer == null) { 1041 + return profile(text_ptr, text_len); 1042 + } 1043 + 1044 + const result = rewrite.neutralize(text, inferenceAdapter, allocator) catch { 1045 + // Fall back to profile on error 1046 + return profile(text_ptr, text_len); 1047 + }; 1048 + 1049 + // Serialize result to JSON 1050 + var jw = json.JsonWriter.init(&output_buf); 1051 + jw.objectBegin(); 1052 + 1053 + jw.key("clean_text"); 1054 + jw.stringValue(result.clean_text); 1055 + jw.key("markers_found"); 1056 + jw.intValue(result.markers_found); 1057 + jw.key("markers_fixed"); 1058 + jw.intValue(result.markers_fixed); 1059 + jw.key("deviation_before"); 1060 + jw.floatValue(result.deviation_before); 1061 + jw.key("deviation_after"); 1062 + jw.floatValue(result.deviation_after); 1063 + 1064 + jw.objectEnd(); 1065 + output_len = @intCast(jw.written().len); 1066 + return true; 1067 + } 1068 + ``` 1069 + 1070 + **Step 4: Build both targets** 1071 + 1072 + ```bash 1073 + cd /home/christian/dev/klearu/klearu-sdc/fantasma-zig 1074 + zig build test 2>&1 | tail -5 1075 + # Expected: all tests pass 1076 + 1077 + zig build 2>&1 | tail -5 1078 + # Expected: native build succeeds 1079 + 1080 + zig build wasm 2>&1 | tail -5 1081 + # Expected: WASM build succeeds (inference now compiles for wasm32) 1082 + ``` 1083 + 1084 + **Step 5: Integration test with native CLI** 1085 + 1086 + ```bash 1087 + # Generate with Q4 model through the full pipeline 1088 + ./zig-out/bin/fantasma generate QwenTheBard "The quick brown fox jumps over the lazy dog." 1089 + # Expected: generates text (verifies Q4 + SIMD + inference works end-to-end) 1090 + ``` 1091 + 1092 + **Step 6: Commit** 1093 + 1094 + ```bash 1095 + git add src/main.zig 1096 + git commit -m "feat: wire inference into rewrite pipeline, enable WASM inference" 1097 + ``` 1098 + 1099 + --- 1100 + 1101 + ### Task 6: Verify WASM Build Size and Update .gitignore 1102 + 1103 + **Files:** 1104 + - Modify: `.gitignore` 1105 + 1106 + **Step 1: Check WASM binary size** 1107 + 1108 + ```bash 1109 + cd /home/christian/dev/klearu/klearu-sdc/fantasma-zig 1110 + zig build wasm 1111 + ls -lh zig-out/bin/fantasma.wasm 1112 + # Expected: larger than before (~27KB) but reasonable (< 200KB without weights) 1113 + ``` 1114 + 1115 + **Step 2: Update .gitignore for Q4 weights** 1116 + 1117 + Add to `.gitignore`: 1118 + 1119 + ``` 1120 + # Model weights 1121 + QwenTheBard/model.q4 1122 + *.q4 1123 + ``` 1124 + 1125 + **Step 3: Commit** 1126 + 1127 + ```bash 1128 + git add .gitignore 1129 + git commit -m "chore: add Q4 weights to gitignore" 1130 + ``` 1131 + 1132 + --- 1133 + 1134 + ## Verification Checklist 1135 + 1136 + After all tasks are complete, verify: 1137 + 1138 + 1. `python3 scripts/convert_q4.py` produces `QwenTheBard/model.q4` (~400MB) 1139 + 2. `zig build test` — all unit tests pass 1140 + 3. `zig build` — native build succeeds 1141 + 4. `zig build wasm` — WASM build succeeds with inference included 1142 + 5. `./zig-out/bin/fantasma generate QwenTheBard "Hello"` — generates with Q4 model 1143 + 6. Compare Q4 vs BF16 output: run same prompt with both, outputs should be similar quality

+202

src/inference/kv_cache.zig

··· 1 + const std = @import("std"); 2 + 3 + // ============================================================ 4 + // Model configuration constants (Qwen3.5-0.8B) 5 + // ============================================================ 6 + pub const Config = struct { 7 + pub const vocab_size: u32 = 248320; 8 + pub const hidden_size: u32 = 1024; 9 + pub const num_layers: u32 = 24; 10 + pub const rms_norm_eps: f32 = 1e-6; 11 + 12 + // Full attention config 13 + pub const num_attention_heads: u32 = 8; // query heads (actual, not including gate) 14 + pub const num_kv_heads: u32 = 2; 15 + pub const head_dim: u32 = 256; 16 + pub const attn_output_dim: u32 = num_attention_heads * head_dim; // 2048 17 + 18 + // RoPE 19 + pub const partial_rotary_factor: f32 = 0.25; 20 + pub const rotary_dim: u32 = @intFromFloat(@as(f32, head_dim) * partial_rotary_factor); // 64 21 + pub const rope_theta: f32 = 10_000_000.0; 22 + pub const mrope_section: [3]u32 = .{ 11, 11, 10 }; // total = 32 = rotary_dim/2 23 + 24 + // GatedDeltaNet (linear attention) config 25 + pub const linear_num_heads: u32 = 16; 26 + pub const linear_key_head_dim: u32 = 128; 27 + pub const linear_value_head_dim: u32 = 128; 28 + pub const linear_key_dim: u32 = linear_num_heads * linear_key_head_dim; // 2048 29 + pub const linear_value_dim: u32 = linear_num_heads * linear_value_head_dim; // 2048 30 + pub const linear_conv_dim: u32 = linear_key_dim * 2 + linear_value_dim; // 6144 31 + pub const linear_conv_kernel: u32 = 4; 32 + 33 + // MLP 34 + pub const intermediate_size: u32 = 3584; 35 + 36 + // Special tokens 37 + pub const eos_token_id: u32 = 248046; // <|im_end|> 38 + pub const im_start_id: u32 = 248045; 39 + pub const im_end_id: u32 = 248046; 40 + 41 + // Layer types: false = linear_attention, true = full_attention 42 + pub const layer_is_full_attn: [num_layers]bool = .{ 43 + false, false, false, true, // 0-3 44 + false, false, false, true, // 4-7 45 + false, false, false, true, // 8-11 46 + false, false, false, true, // 12-15 47 + false, false, false, true, // 16-19 48 + false, false, false, true, // 20-23 49 + }; 50 + }; 51 + 52 + // ============================================================ 53 + // Cache for autoregressive generation 54 + // ============================================================ 55 + 56 + /// KV cache for full attention layers. 57 + /// Stores key and value vectors for all past positions. 58 + pub const KVCache = struct { 59 + /// [max_seq_len][num_kv_heads * head_dim] 60 + keys: []f32, 61 + /// [max_seq_len][num_kv_heads * head_dim] 62 + values: []f32, 63 + len: u32, 64 + max_len: u32, 65 + 66 + pub fn init(allocator: std.mem.Allocator, max_seq_len: u32) !KVCache { 67 + const kv_dim = Config.num_kv_heads * Config.head_dim; 68 + return .{ 69 + .keys = try allocator.alloc(f32, max_seq_len * kv_dim), 70 + .values = try allocator.alloc(f32, max_seq_len * kv_dim), 71 + .len = 0, 72 + .max_len = max_seq_len, 73 + }; 74 + } 75 + 76 + pub fn deinit(self: *KVCache, allocator: std.mem.Allocator) void { 77 + allocator.free(self.keys); 78 + allocator.free(self.values); 79 + } 80 + 81 + /// Append a new key/value pair for the current position. 82 + pub fn append(self: *KVCache, key: []const f32, value: []const f32) void { 83 + const kv_dim = Config.num_kv_heads * Config.head_dim; 84 + const offset = self.len * kv_dim; 85 + @memcpy(self.keys[offset .. offset + kv_dim], key); 86 + @memcpy(self.values[offset .. offset + kv_dim], value); 87 + self.len += 1; 88 + } 89 + 90 + /// Get the key vector at a given position. 91 + pub fn getKey(self: *const KVCache, pos: u32) []const f32 { 92 + const kv_dim = Config.num_kv_heads * Config.head_dim; 93 + const offset = pos * kv_dim; 94 + return self.keys[offset .. offset + kv_dim]; 95 + } 96 + 97 + /// Get the value vector at a given position. 98 + pub fn getValue(self: *const KVCache, pos: u32) []const f32 { 99 + const kv_dim = Config.num_kv_heads * Config.head_dim; 100 + const offset = pos * kv_dim; 101 + return self.values[offset .. offset + kv_dim]; 102 + } 103 + 104 + pub fn reset(self: *KVCache) void { 105 + self.len = 0; 106 + } 107 + }; 108 + 109 + /// Recurrent state for GatedDeltaNet (linear attention) layers. 110 + /// Stores the recurrent state matrix S[num_heads][key_dim][value_dim] 111 + /// and the conv1d state buffer. 112 + pub const DeltaNetState = struct { 113 + /// Recurrent state: [num_heads * key_dim * value_dim] 114 + recurrent: []f32, 115 + /// Conv1d state: [conv_dim * (kernel_size - 1)] 116 + conv_state: []f32, 117 + 118 + pub fn init(allocator: std.mem.Allocator) !DeltaNetState { 119 + const state_size = Config.linear_num_heads * Config.linear_key_head_dim * Config.linear_value_head_dim; 120 + const conv_size = Config.linear_conv_dim * (Config.linear_conv_kernel - 1); 121 + const recurrent = try allocator.alloc(f32, state_size); 122 + const conv_state = try allocator.alloc(f32, conv_size); 123 + @memset(recurrent, 0.0); 124 + @memset(conv_state, 0.0); 125 + return .{ 126 + .recurrent = recurrent, 127 + .conv_state = conv_state, 128 + }; 129 + } 130 + 131 + pub fn deinit(self: *DeltaNetState, allocator: std.mem.Allocator) void { 132 + allocator.free(self.recurrent); 133 + allocator.free(self.conv_state); 134 + } 135 + 136 + pub fn reset(self: *DeltaNetState) void { 137 + @memset(self.recurrent, 0.0); 138 + @memset(self.conv_state, 0.0); 139 + } 140 + 141 + /// Get a mutable slice of the recurrent state for a given head. 142 + /// Returns [key_dim][value_dim] as a flat slice of length key_dim * value_dim. 143 + pub fn getHeadState(self: *DeltaNetState, head: u32) []f32 { 144 + const head_state_size = Config.linear_key_head_dim * Config.linear_value_head_dim; 145 + const offset = head * head_state_size; 146 + return self.recurrent[offset .. offset + head_state_size]; 147 + } 148 + }; 149 + 150 + /// Combined cache for all layers. 151 + pub const ModelCache = struct { 152 + kv_caches: [Config.num_layers]?KVCache, 153 + delta_states: [Config.num_layers]?DeltaNetState, 154 + 155 + pub fn init(allocator: std.mem.Allocator, max_seq_len: u32) !ModelCache { 156 + var cache = ModelCache{ 157 + .kv_caches = .{null} ** Config.num_layers, 158 + .delta_states = .{null} ** Config.num_layers, 159 + }; 160 + 161 + for (0..Config.num_layers) |i| { 162 + if (Config.layer_is_full_attn[i]) { 163 + cache.kv_caches[i] = try KVCache.init(allocator, max_seq_len); 164 + } else { 165 + cache.delta_states[i] = try DeltaNetState.init(allocator); 166 + } 167 + } 168 + 169 + return cache; 170 + } 171 + 172 + pub fn deinit(self: *ModelCache, allocator: std.mem.Allocator) void { 173 + for (0..Config.num_layers) |i| { 174 + if (self.kv_caches[i]) |*kv| kv.deinit(allocator); 175 + if (self.delta_states[i]) |*ds| ds.deinit(allocator); 176 + } 177 + } 178 + 179 + pub fn reset(self: *ModelCache) void { 180 + for (0..Config.num_layers) |i| { 181 + if (self.kv_caches[i]) |*kv| kv.reset(); 182 + if (self.delta_states[i]) |*ds| ds.reset(); 183 + } 184 + } 185 + }; 186 + 187 + test "kv cache append and retrieve" { 188 + const allocator = std.testing.allocator; 189 + var kv = try KVCache.init(allocator, 16); 190 + defer kv.deinit(allocator); 191 + 192 + var key: [Config.num_kv_heads * Config.head_dim]f32 = undefined; 193 + var value: [Config.num_kv_heads * Config.head_dim]f32 = undefined; 194 + @memset(&key, 1.0); 195 + @memset(&value, 2.0); 196 + 197 + kv.append(&key, &value); 198 + try std.testing.expectEqual(@as(u32, 1), kv.len); 199 + 200 + const retrieved_k = kv.getKey(0); 201 + try std.testing.expectEqual(@as(f32, 1.0), retrieved_k[0]); 202 + }

+631

src/inference/model.zig

··· 1 + const std = @import("std"); 2 + const q = @import("quantized.zig"); 3 + const cache = @import("kv_cache.zig"); 4 + const Config = cache.Config; 5 + const Tensor = q.Tensor; 6 + 7 + // ============================================================ 8 + // Model weight structure 9 + // ============================================================ 10 + 11 + /// Weights for a full attention layer 12 + const FullAttentionWeights = struct { 13 + q_proj: Tensor, // [num_heads * head_dim * 2, hidden_size] (includes gate) 14 + k_proj: Tensor, // [num_kv_heads * head_dim, hidden_size] 15 + v_proj: Tensor, // [num_kv_heads * head_dim, hidden_size] 16 + o_proj: Tensor, // [hidden_size, num_heads * head_dim] 17 + q_norm: Tensor, // [head_dim] 18 + k_norm: Tensor, // [head_dim] 19 + }; 20 + 21 + /// Weights for a GatedDeltaNet (linear attention) layer 22 + const DeltaNetWeights = struct { 23 + in_proj_qkv: Tensor, // [conv_dim, hidden_size] 24 + in_proj_z: Tensor, // [value_dim, hidden_size] 25 + in_proj_a: Tensor, // [num_heads, hidden_size] 26 + in_proj_b: Tensor, // [num_heads, hidden_size] 27 + conv1d: Tensor, // [conv_dim, 1, kernel_size] 28 + A_log: Tensor, // [num_heads] (F32) 29 + dt_bias: Tensor, // [num_heads] 30 + norm: Tensor, // [value_head_dim] (F32) 31 + out_proj: Tensor, // [hidden_size, value_dim] 32 + }; 33 + 34 + /// Weights for a single decoder layer 35 + const LayerWeights = struct { 36 + input_layernorm: Tensor, // [hidden_size] 37 + post_attention_layernorm: Tensor, // [hidden_size] 38 + 39 + // Attention (one of these is active) 40 + full_attn: ?FullAttentionWeights, 41 + delta_net: ?DeltaNetWeights, 42 + 43 + // MLP 44 + gate_proj: Tensor, // [intermediate_size, hidden_size] 45 + up_proj: Tensor, // [intermediate_size, hidden_size] 46 + down_proj: Tensor, // [hidden_size, intermediate_size] 47 + }; 48 + 49 + /// Complete model weights 50 + pub const ModelWeights = struct { 51 + embed_tokens: Tensor, // [vocab_size, hidden_size] 52 + norm: Tensor, // [hidden_size] 53 + layers: [Config.num_layers]LayerWeights, 54 + 55 + // RoPE precomputed cos/sin tables 56 + rope_cos: []f32, // [max_seq_len, rotary_dim] 57 + rope_sin: []f32, // [max_seq_len, rotary_dim] 58 + 59 + allocator: std.mem.Allocator, 60 + }; 61 + 62 + // ============================================================ 63 + // Weight loading from safetensors 64 + // ============================================================ 65 + 66 + pub fn loadWeights(allocator: std.mem.Allocator, model_dir: []const u8, max_seq_len: u32) !ModelWeights { 67 + // Build path to safetensors file 68 + var path_buf: [1024]u8 = undefined; 69 + const path = std.fmt.bufPrint(&path_buf, "{s}/model.safetensors-00001-of-00001.safetensors", .{model_dir}) catch return error.PathTooLong; 70 + 71 + // Memory-map the file 72 + const file = try std.fs.cwd().openFile(path, .{}); 73 + defer file.close(); 74 + const file_size = try file.getEndPos(); 75 + 76 + // Read entire file into memory (mmap would be better but this works cross-platform) 77 + const file_data = try allocator.alloc(u8, file_size); 78 + // Note: we don't free file_data — it backs all tensor views for the model lifetime 79 + const bytes_read = try file.readAll(file_data); 80 + if (bytes_read != file_size) return error.IncompleteRead; 81 + 82 + // Parse safetensors header 83 + var tensor_infos: [512]q.TensorInfo = undefined; 84 + const header = try q.parseSafetensorsHeader(file_data, &tensor_infos); 85 + 86 + // Helper: find tensor by name 87 + const infos = tensor_infos[0..header.count]; 88 + 89 + var weights: ModelWeights = undefined; 90 + weights.allocator = allocator; 91 + 92 + // Embedding 93 + weights.embed_tokens = try findAndLoad(infos, "model.language_model.embed_tokens.weight", file_data); 94 + weights.norm = try findAndLoad(infos, "model.language_model.norm.weight", file_data); 95 + 96 + // Layers 97 + for (0..Config.num_layers) |layer_idx| { 98 + var lw: LayerWeights = undefined; 99 + 100 + // Layer norms 101 + lw.input_layernorm = try findAndLoadLayer(infos, layer_idx, "input_layernorm.weight", file_data); 102 + lw.post_attention_layernorm = try findAndLoadLayer(infos, layer_idx, "post_attention_layernorm.weight", file_data); 103 + 104 + // MLP 105 + lw.gate_proj = try findAndLoadLayer(infos, layer_idx, "mlp.gate_proj.weight", file_data); 106 + lw.up_proj = try findAndLoadLayer(infos, layer_idx, "mlp.up_proj.weight", file_data); 107 + lw.down_proj = try findAndLoadLayer(infos, layer_idx, "mlp.down_proj.weight", file_data); 108 + 109 + if (Config.layer_is_full_attn[layer_idx]) { 110 + lw.full_attn = FullAttentionWeights{ 111 + .q_proj = try findAndLoadLayer(infos, layer_idx, "self_attn.q_proj.weight", file_data), 112 + .k_proj = try findAndLoadLayer(infos, layer_idx, "self_attn.k_proj.weight", file_data), 113 + .v_proj = try findAndLoadLayer(infos, layer_idx, "self_attn.v_proj.weight", file_data), 114 + .o_proj = try findAndLoadLayer(infos, layer_idx, "self_attn.o_proj.weight", file_data), 115 + .q_norm = try findAndLoadLayer(infos, layer_idx, "self_attn.q_norm.weight", file_data), 116 + .k_norm = try findAndLoadLayer(infos, layer_idx, "self_attn.k_norm.weight", file_data), 117 + }; 118 + lw.delta_net = null; 119 + } else { 120 + lw.delta_net = DeltaNetWeights{ 121 + .in_proj_qkv = try findAndLoadLayer(infos, layer_idx, "linear_attn.in_proj_qkv.weight", file_data), 122 + .in_proj_z = try findAndLoadLayer(infos, layer_idx, "linear_attn.in_proj_z.weight", file_data), 123 + .in_proj_a = try findAndLoadLayer(infos, layer_idx, "linear_attn.in_proj_a.weight", file_data), 124 + .in_proj_b = try findAndLoadLayer(infos, layer_idx, "linear_attn.in_proj_b.weight", file_data), 125 + .conv1d = try findAndLoadLayer(infos, layer_idx, "linear_attn.conv1d.weight", file_data), 126 + .A_log = try findAndLoadLayer(infos, layer_idx, "linear_attn.A_log", file_data), 127 + .dt_bias = try findAndLoadLayer(infos, layer_idx, "linear_attn.dt_bias", file_data), 128 + .norm = try findAndLoadLayer(infos, layer_idx, "linear_attn.norm.weight", file_data), 129 + .out_proj = try findAndLoadLayer(infos, layer_idx, "linear_attn.out_proj.weight", file_data), 130 + }; 131 + lw.full_attn = null; 132 + } 133 + 134 + weights.layers[layer_idx] = lw; 135 + } 136 + 137 + // Precompute RoPE cos/sin tables 138 + const rotary_dim = Config.rotary_dim; 139 + weights.rope_cos = try allocator.alloc(f32, max_seq_len * rotary_dim); 140 + weights.rope_sin = try allocator.alloc(f32, max_seq_len * rotary_dim); 141 + 142 + // inv_freq = 1 / (theta ^ (2i / dim)) for i in 0..dim/2 143 + // HuggingFace layout: emb = cat(freqs, freqs) → [f0,f1,...,f31,f0,f1,...,f31] 144 + const half_dim = rotary_dim / 2; 145 + for (0..max_seq_len) |pos| { 146 + for (0..half_dim) |i| { 147 + const freq = 1.0 / std.math.pow(f32, Config.rope_theta, @as(f32, @floatFromInt(2 * i)) / @as(f32, @floatFromInt(rotary_dim))); 148 + const angle = @as(f32, @floatFromInt(pos)) * freq; 149 + const cos_val = @cos(angle); 150 + const sin_val = @sin(angle); 151 + // First half and second half are identical (cat(freqs, freqs)) 152 + weights.rope_cos[pos * rotary_dim + i] = cos_val; 153 + weights.rope_cos[pos * rotary_dim + half_dim + i] = cos_val; 154 + weights.rope_sin[pos * rotary_dim + i] = sin_val; 155 + weights.rope_sin[pos * rotary_dim + half_dim + i] = sin_val; 156 + } 157 + } 158 + 159 + return weights; 160 + } 161 + 162 + fn findAndLoad(infos: []const q.TensorInfo, name: []const u8, file_data: []const u8) !Tensor { 163 + for (infos) |*info| { 164 + if (std.mem.eql(u8, info.name, name)) { 165 + return try q.tensorFromInfo(info, file_data); 166 + } 167 + } 168 + return q.SafetensorsError.TensorNotFound; 169 + } 170 + 171 + fn findAndLoadLayer(infos: []const q.TensorInfo, layer_idx: usize, suffix: []const u8, file_data: []const u8) !Tensor { 172 + var name_buf: [256]u8 = undefined; 173 + const name = std.fmt.bufPrint(&name_buf, "model.language_model.layers.{d}.{s}", .{ layer_idx, suffix }) catch return error.PathTooLong; 174 + return findAndLoad(infos, name, file_data); 175 + } 176 + 177 + // ============================================================ 178 + // Forward pass - single token 179 + // ============================================================ 180 + 181 + /// Scratch buffers for a single forward pass 182 + pub const ForwardState = struct { 183 + hidden: [Config.hidden_size]f32, 184 + residual: [Config.hidden_size]f32, 185 + normed: [Config.hidden_size]f32, 186 + 187 + // Attention temporaries 188 + q_buf: [Config.num_attention_heads * Config.head_dim * 2]f32, // includes gate 189 + k_buf: [Config.num_kv_heads * Config.head_dim]f32, 190 + v_buf: [Config.num_kv_heads * Config.head_dim]f32, 191 + attn_out: [Config.attn_output_dim]f32, 192 + 193 + // DeltaNet temporaries 194 + dn_qkv: [Config.linear_conv_dim]f32, 195 + dn_z: [Config.linear_value_dim]f32, 196 + dn_a: [Config.linear_num_heads]f32, 197 + dn_b: [Config.linear_num_heads]f32, 198 + dn_out: [Config.linear_value_dim]f32, 199 + 200 + // MLP temporaries 201 + mlp_gate: [Config.intermediate_size]f32, 202 + mlp_up: [Config.intermediate_size]f32, 203 + mlp_down: [Config.hidden_size]f32, 204 + 205 + // Logits (reuses space since it's only needed at the end) 206 + logits: []f32, 207 + 208 + allocator: std.mem.Allocator, 209 + 210 + pub fn init(allocator: std.mem.Allocator) !ForwardState { 211 + var state: ForwardState = undefined; 212 + state.allocator = allocator; 213 + state.logits = try allocator.alloc(f32, Config.vocab_size); 214 + return state; 215 + } 216 + 217 + pub fn deinit(self: *ForwardState) void { 218 + self.allocator.free(self.logits); 219 + } 220 + }; 221 + 222 + /// Run a single forward pass for one token. 223 + /// Returns logits over the vocabulary. 224 + pub fn forward( 225 + weights: *const ModelWeights, 226 + model_cache: *cache.ModelCache, 227 + state: *ForwardState, 228 + token: u32, 229 + pos: u32, 230 + ) []f32 { 231 + // 1. Embedding lookup 232 + const embed_offset = @as(usize, token) * Config.hidden_size; 233 + for (0..Config.hidden_size) |i| { 234 + state.hidden[i] = weights.embed_tokens.get(embed_offset + i); 235 + } 236 + 237 + // 2. Process each layer 238 + for (0..Config.num_layers) |layer_idx| { 239 + const lw = &weights.layers[layer_idx]; 240 + 241 + // Save residual 242 + @memcpy(&state.residual, &state.hidden); 243 + 244 + // Pre-attention RMSNorm 245 + q.rmsNorm(&state.normed, &state.hidden, &lw.input_layernorm, Config.rms_norm_eps); 246 + 247 + // Attention (full or delta) 248 + if (Config.layer_is_full_attn[layer_idx]) { 249 + fullAttentionForward(weights, lw, &model_cache.kv_caches[layer_idx].?, state, pos); 250 + } else { 251 + deltaNetForward(lw, &model_cache.delta_states[layer_idx].?, state); 252 + } 253 + 254 + // Residual connection 255 + for (0..Config.hidden_size) |i| { 256 + state.hidden[i] = state.residual[i] + state.hidden[i]; 257 + } 258 + 259 + // Save residual for MLP 260 + @memcpy(&state.residual, &state.hidden); 261 + 262 + // Pre-MLP RMSNorm 263 + q.rmsNorm(&state.normed, &state.hidden, &lw.post_attention_layernorm, Config.rms_norm_eps); 264 + 265 + // MLP (SwiGLU) 266 + mlpForward(lw, state); 267 + 268 + // Residual connection 269 + for (0..Config.hidden_size) |i| { 270 + state.hidden[i] = state.residual[i] + state.hidden[i]; 271 + } 272 + } 273 + 274 + // 3. Final RMSNorm 275 + q.rmsNorm(&state.normed, &state.hidden, &weights.norm, Config.rms_norm_eps); 276 + 277 + // 4. LM head (tied weights: logits = embed_tokens^T @ normed) 278 + // embed_tokens is [vocab_size, hidden_size], so each row is a token embedding 279 + for (0..Config.vocab_size) |i| { 280 + state.logits[i] = weights.embed_tokens.dot(i * Config.hidden_size, &state.normed); 281 + } 282 + 283 + return state.logits; 284 + } 285 + 286 + // ============================================================ 287 + // Full Attention (GQA with output gate + QK-norm + partial RoPE) 288 + // ============================================================ 289 + 290 + fn fullAttentionForward( 291 + weights: *const ModelWeights, 292 + lw: *const LayerWeights, 293 + kv: *cache.KVCache, 294 + state: *ForwardState, 295 + pos: u32, 296 + ) void { 297 + const attn = &lw.full_attn.?; 298 + const num_heads = Config.num_attention_heads; 299 + const num_kv_heads = Config.num_kv_heads; 300 + const head_dim = Config.head_dim; 301 + const kv_groups = num_heads / num_kv_heads; 302 + const rotary_dim = Config.rotary_dim; 303 + 304 + // Project Q (includes gate), K, V 305 + q.matVec(&attn.q_proj, &state.normed, &state.q_buf); 306 + q.matVec(&attn.k_proj, &state.normed, &state.k_buf); 307 + q.matVec(&attn.v_proj, &state.normed, &state.v_buf); 308 + 309 + // Split Q into query and gate: q_buf has [num_heads * head_dim * 2] 310 + // Organized as [num_heads][head_dim * 2], split each head's output into q and gate 311 + var gate_buf: [Config.attn_output_dim]f32 = undefined; 312 + for (0..num_heads) |h| { 313 + const src_offset = h * head_dim * 2; 314 + const dst_offset = h * head_dim; 315 + // First head_dim elements are query, second head_dim are gate 316 + for (0..head_dim) |d| { 317 + state.attn_out[dst_offset + d] = state.q_buf[src_offset + d]; // query 318 + gate_buf[dst_offset + d] = state.q_buf[src_offset + head_dim + d]; // gate 319 + } 320 + } 321 + // Now state.attn_out[0..num_heads*head_dim] = query, gate_buf = gate 322 + // Copy query back (we'll use attn_out for the final output later) 323 + var query: [Config.attn_output_dim]f32 = undefined; 324 + @memcpy(&query, &state.attn_out); 325 + 326 + // QK-norm: normalize each head's Q and K with RMSNorm 327 + for (0..num_heads) |h| { 328 + const offset = h * head_dim; 329 + q.rmsNorm( 330 + query[offset .. offset + head_dim], 331 + query[offset .. offset + head_dim], 332 + &attn.q_norm, 333 + Config.rms_norm_eps, 334 + ); 335 + } 336 + for (0..num_kv_heads) |h| { 337 + const offset = h * head_dim; 338 + q.rmsNorm( 339 + state.k_buf[offset .. offset + head_dim], 340 + state.k_buf[offset .. offset + head_dim], 341 + &attn.k_norm, 342 + Config.rms_norm_eps, 343 + ); 344 + } 345 + 346 + // Apply partial RoPE to query and key 347 + const cos = weights.rope_cos[pos * rotary_dim .. (pos + 1) * rotary_dim]; 348 + const sin = weights.rope_sin[pos * rotary_dim .. (pos + 1) * rotary_dim]; 349 + 350 + for (0..num_heads) |h| { 351 + applyRope(query[h * head_dim ..][0..head_dim], cos, sin, rotary_dim); 352 + } 353 + for (0..num_kv_heads) |h| { 354 + applyRope(state.k_buf[h * head_dim ..][0..head_dim], cos, sin, rotary_dim); 355 + } 356 + 357 + // Store K, V in cache 358 + kv.append(&state.k_buf, &state.v_buf); 359 + 360 + // Compute attention for each query head 361 + const scale = 1.0 / @sqrt(@as(f32, @floatFromInt(head_dim))); 362 + const seq_len = kv.len; 363 + 364 + for (0..num_heads) |h| { 365 + const kv_h = h / kv_groups; // which KV head this query head uses 366 + const q_offset = h * head_dim; 367 + 368 + // Dot product with all cached keys 369 + var max_score: f32 = -std.math.inf(f32); 370 + 371 + // We need a temporary for attention scores 372 + // Use mlp_gate as scratch (it's big enough for seq_len <= intermediate_size) 373 + const scores = state.mlp_gate[0..seq_len]; 374 + 375 + for (0..seq_len) |t| { 376 + const cached_k = kv.getKey(@intCast(t)); 377 + const k_head = cached_k[kv_h * head_dim .. (kv_h + 1) * head_dim]; 378 + 379 + var score: f32 = 0.0; 380 + for (0..head_dim) |d| { 381 + score += query[q_offset + d] * k_head[d]; 382 + } 383 + score *= scale; 384 + scores[t] = score; 385 + if (score > max_score) max_score = score; 386 + } 387 + 388 + // Softmax 389 + var sum_exp: f32 = 0.0; 390 + for (scores) |*s| { 391 + s.* = @exp(s.* - max_score); 392 + sum_exp += s.*; 393 + } 394 + if (sum_exp > 0.0) { 395 + const inv_sum = 1.0 / sum_exp; 396 + for (scores) |*s| { 397 + s.* *= inv_sum; 398 + } 399 + } 400 + 401 + // Weighted sum of values 402 + const out_offset = h * head_dim; 403 + @memset(state.attn_out[out_offset .. out_offset + head_dim], 0.0); 404 + for (0..seq_len) |t| { 405 + const cached_v = kv.getValue(@intCast(t)); 406 + const v_head = cached_v[kv_h * head_dim .. (kv_h + 1) * head_dim]; 407 + const w = scores[t]; 408 + for (0..head_dim) |d| { 409 + state.attn_out[out_offset + d] += w * v_head[d]; 410 + } 411 + } 412 + } 413 + 414 + // Apply output gate: attn_out *= sigmoid(gate) 415 + for (0..Config.attn_output_dim) |i| { 416 + state.attn_out[i] *= q.sigmoid(gate_buf[i]); 417 + } 418 + 419 + // Flatten gate to [batch, seq, num_heads * head_dim] and reshape as needed 420 + // Already in the right shape. The gate_buf reshape in Python is: 421 + // gate = gate.reshape(*input_shape, -1) # flatten across heads 422 + // Our gate_buf is already [num_heads * head_dim] = [2048] 423 + 424 + // Output projection: hidden = o_proj @ attn_out 425 + q.matVec(&attn.o_proj, &state.attn_out, &state.hidden); 426 + } 427 + 428 + /// Apply rotary position embedding to the first rotary_dim elements of x. 429 + /// x is [head_dim], only first rotary_dim elements are rotated. 430 + fn applyRope(x: []f32, cos: []const f32, sin: []const f32, rotary_dim: u32) void { 431 + // RoPE: for pairs (x[2i], x[2i+1]): 432 + // x'[2i] = x[2i] * cos[2i] - x[2i+1] * sin[2i] 433 + // x'[2i+1] = x[2i] * sin[2i+1] + x[2i+1] * cos[2i+1] 434 + // Wait — HuggingFace uses rotate_half style: split in half, not interleaved. 435 + // rotate_half: x1 = x[..dim//2], x2 = x[dim//2..], return cat(-x2, x1) 436 + // So: x_embed = x * cos + rotate_half(x) * sin 437 + const half = rotary_dim / 2; 438 + var i: usize = 0; 439 + while (i < half) : (i += 1) { 440 + const x0 = x[i]; 441 + const x1 = x[i + half]; 442 + // cos/sin are [rotary_dim], duplicated as [cos0,cos0,cos1,cos1,...] 443 + // But with rotate_half convention: 444 + // x_embed[i] = x[i] * cos[i] + (-x[i+half]) * sin[i] 445 + // x_embed[i+half] = x[i+half] * cos[i+half] + x[i] * sin[i+half] 446 + x[i] = x0 * cos[i] - x1 * sin[i]; 447 + x[i + half] = x1 * cos[i + half] + x0 * sin[i + half]; 448 + } 449 + } 450 + 451 + // ============================================================ 452 + // GatedDeltaNet (linear attention) - recurrent mode 453 + // ============================================================ 454 + 455 + fn deltaNetForward( 456 + lw: *const LayerWeights, 457 + delta_state: *cache.DeltaNetState, 458 + state: *ForwardState, 459 + ) void { 460 + const dn = &lw.delta_net.?; 461 + const num_heads = Config.linear_num_heads; 462 + const key_dim = Config.linear_key_head_dim; 463 + const value_dim = Config.linear_value_head_dim; 464 + const conv_dim = Config.linear_conv_dim; 465 + const kernel_size = Config.linear_conv_kernel; 466 + 467 + // 1. Project QKV 468 + q.matVec(&dn.in_proj_qkv, &state.normed, &state.dn_qkv); 469 + 470 + // 2. Project Z (gate), A, B 471 + q.matVec(&dn.in_proj_z, &state.normed, &state.dn_z); 472 + q.matVec(&dn.in_proj_a, &state.normed, &state.dn_a); 473 + q.matVec(&dn.in_proj_b, &state.normed, &state.dn_b); 474 + 475 + // 3. Causal conv1d update (single step) 476 + // conv_state is [conv_dim, kernel_size - 1], stored as [conv_dim][state_cols] 477 + // Full window = [conv_state..., new_input] = kernel_size values 478 + // Must compute conv BEFORE updating state 479 + const state_cols = kernel_size - 1; 480 + for (0..conv_dim) |ch| { 481 + const cs_base = ch * state_cols; 482 + 483 + // Compute depthwise convolution: dot(window, weight) 484 + // window = [conv_state[0], conv_state[1], conv_state[2], new_input] 485 + var conv_out: f32 = 0.0; 486 + for (0..state_cols) |k| { 487 + conv_out += delta_state.conv_state[cs_base + k] * dn.conv1d.get(ch * kernel_size + k); 488 + } 489 + conv_out += state.dn_qkv[ch] * dn.conv1d.get(ch * kernel_size + state_cols); 490 + 491 + // Update state: shift left and append new input 492 + var j: usize = 0; 493 + while (j + 1 < state_cols) : (j += 1) { 494 + delta_state.conv_state[cs_base + j] = delta_state.conv_state[cs_base + j + 1]; 495 + } 496 + delta_state.conv_state[cs_base + state_cols - 1] = state.dn_qkv[ch]; 497 + 498 + // Apply SiLU activation 499 + state.dn_qkv[ch] = q.silu(conv_out); 500 + } 501 + 502 + // 4. Split QKV: [key_dim, key_dim, value_dim] 503 + const key_total = Config.linear_key_dim; // 2048 504 + const q_start: usize = 0; 505 + const k_start = key_total; 506 + const v_start = key_total * 2; 507 + 508 + // 5. Compute decay g and write gate beta per head 509 + // g = -exp(A_log) * softplus(a + dt_bias) 510 + // beta = sigmoid(b) 511 + var g_vals: [Config.linear_num_heads]f32 = undefined; 512 + var beta_vals: [Config.linear_num_heads]f32 = undefined; 513 + for (0..num_heads) |h| { 514 + const a_log = dn.A_log.get(h); 515 + const a_val = state.dn_a[h]; 516 + const dt_b = dn.dt_bias.get(h); 517 + g_vals[h] = -@exp(a_log) * q.softplus(a_val + dt_b); 518 + beta_vals[h] = q.sigmoid(state.dn_b[h]); 519 + } 520 + 521 + // 6. Recurrent delta rule (single step per head) 522 + // For each head h: 523 + // q_t = l2norm(qkv[q_start + h*key_dim .. + key_dim]) 524 + // k_t = l2norm(qkv[k_start + h*key_dim .. + key_dim]) 525 + // v_t = qkv[v_start + h*value_dim .. + value_dim] 526 + // decay = exp(g[h]) 527 + // S = S * decay + k_t * (v_t - S^T @ k_t)^T * beta[h] 528 + // output[h] = S^T @ q_t 529 + const scale = 1.0 / @sqrt(@as(f32, @floatFromInt(key_dim))); 530 + 531 + for (0..num_heads) |h| { 532 + const state_matrix = delta_state.getHeadState(@intCast(h)); 533 + 534 + // Extract and L2-normalize Q and K for this head 535 + var q_head: [Config.linear_key_head_dim]f32 = undefined; 536 + var k_head: [Config.linear_key_head_dim]f32 = undefined; 537 + 538 + @memcpy(&q_head, state.dn_qkv[q_start + h * key_dim ..][0..key_dim]); 539 + @memcpy(&k_head, state.dn_qkv[k_start + h * key_dim ..][0..key_dim]); 540 + 541 + q.l2Norm(&q_head); 542 + q.l2Norm(&k_head); 543 + 544 + // Scale query 545 + for (&q_head) |*qv| { 546 + qv.* *= scale; 547 + } 548 + 549 + const v_head = state.dn_qkv[v_start + h * value_dim ..][0..value_dim]; 550 + 551 + // Decay the state: S *= exp(g) 552 + const decay = @exp(g_vals[h]); 553 + for (state_matrix) |*s| { 554 + s.* *= decay; 555 + } 556 + 557 + // Compute kv_mem = S @ k_t (matrix-vector product, S is [key_dim x value_dim]) 558 + // S is stored as [key_dim][value_dim], so S @ k means: for each value dim v, 559 + // sum over key_dim d: S[d][v] * k[d] 560 + // Actually, looking at the Python code: 561 + // kv_mem = (last_recurrent_state * k_t.unsqueeze(-1)).sum(dim=-2) 562 + // This means: kv_mem[v] = sum_d(S[d,v] * k[d]) 563 + var kv_mem: [Config.linear_value_head_dim]f32 = undefined; 564 + for (0..value_dim) |v| { 565 + var sum: f32 = 0.0; 566 + for (0..key_dim) |d| { 567 + sum += state_matrix[d * value_dim + v] * k_head[d]; 568 + } 569 + kv_mem[v] = sum; 570 + } 571 + 572 + // delta = (v_t - kv_mem) * beta 573 + var delta: [Config.linear_value_head_dim]f32 = undefined; 574 + for (0..value_dim) |v| { 575 + delta[v] = (v_head[v] - kv_mem[v]) * beta_vals[h]; 576 + } 577 + 578 + // Update state: S += k_t * delta^T 579 + // S[d,v] += k[d] * delta[v] 580 + for (0..key_dim) |d| { 581 + for (0..value_dim) |v| { 582 + state_matrix[d * value_dim + v] += k_head[d] * delta[v]; 583 + } 584 + } 585 + 586 + // Output: out[v] = sum_d(S[d,v] * q[d]) 587 + for (0..value_dim) |v| { 588 + var sum: f32 = 0.0; 589 + for (0..key_dim) |d| { 590 + sum += state_matrix[d * value_dim + v] * q_head[d]; 591 + } 592 + state.dn_out[h * value_dim + v] = sum; 593 + } 594 + } 595 + 596 + // 7. Gated RMS norm: out = rmsNorm(dn_out) * silu(z) 597 + // Applied per-head (norm weight is [value_head_dim]) 598 + for (0..num_heads) |h| { 599 + const offset = h * value_dim; 600 + q.rmsNormGated( 601 + state.dn_out[offset .. offset + value_dim], 602 + state.dn_out[offset .. offset + value_dim], 603 + state.dn_z[offset .. offset + value_dim], 604 + &dn.norm, 605 + Config.rms_norm_eps, 606 + ); 607 + } 608 + 609 + // 8. Output projection: hidden = out_proj @ dn_out 610 + q.matVec(&dn.out_proj, &state.dn_out, &state.hidden); 611 + } 612 + 613 + // ============================================================ 614 + // SwiGLU MLP 615 + // ============================================================ 616 + 617 + fn mlpForward(lw: *const LayerWeights, state: *ForwardState) void { 618 + // gate = silu(gate_proj @ normed) 619 + // up = up_proj @ normed 620 + // hidden = down_proj @ (gate * up) 621 + q.matVec(&lw.gate_proj, &state.normed, &state.mlp_gate); 622 + q.matVec(&lw.up_proj, &state.normed, &state.mlp_up); 623 + 624 + // gate = silu(gate) * up 625 + for (0..Config.intermediate_size) |i| { 626 + state.mlp_gate[i] = q.silu(state.mlp_gate[i]) * state.mlp_up[i]; 627 + } 628 + 629 + // hidden = down_proj @ gate 630 + q.matVec(&lw.down_proj, state.mlp_gate[0..Config.intermediate_size], &state.hidden); 631 + }

+396

src/inference/quantized.zig

··· 1 + const std = @import("std"); 2 + 3 + /// BF16 to F32 conversion and tensor utilities for weight loading. 4 + /// Weights are stored as BF16 (u16) and converted to F32 on the fly. 5 + 6 + /// Convert a single BF16 value (u16) to F32. 7 + pub inline fn bf16ToF32(v: u16) f32 { 8 + // BF16 is the upper 16 bits of F32 9 + const bits: u32 = @as(u32, v) << 16; 10 + return @bitCast(bits); 11 + } 12 + 13 + /// Convert a single F32 to BF16 (truncation, no rounding). 14 + pub inline fn f32ToBf16(v: f32) u16 { 15 + const bits: u32 = @bitCast(v); 16 + return @intCast(bits >> 16); 17 + } 18 + 19 + /// A view into a weight tensor stored as BF16. 20 + /// Does not own the data — points into the mmap'd safetensors buffer. 21 + pub const Tensor = struct { 22 + /// Raw BF16 data (u16 elements) 23 + data_bf16: ?[]const u16 = null, 24 + /// Raw F32 data (some tensors like A_log are stored as F32) 25 + data_f32: ?[]const f32 = null, 26 + /// Shape dimensions 27 + shape: [4]u32 = .{ 0, 0, 0, 0 }, 28 + /// Number of dimensions 29 + ndim: u8 = 0, 30 + 31 + pub fn numel(self: *const Tensor) usize { 32 + if (self.ndim == 0) return 0; 33 + var n: usize = 1; 34 + for (self.shape[0..self.ndim]) |d| { 35 + n *= d; 36 + } 37 + return n; 38 + } 39 + 40 + /// Get a single F32 value from the tensor by flat index. 41 + pub inline fn get(self: *const Tensor, idx: usize) f32 { 42 + if (self.data_f32) |f| return f[idx]; 43 + if (self.data_bf16) |b| return bf16ToF32(b[idx]); 44 + return 0.0; 45 + } 46 + 47 + /// Perform dot product: sum(self[offset..offset+len] * other[0..len]) 48 + /// where self is the weight tensor and other is an f32 slice. 49 + pub fn dot(self: *const Tensor, offset: usize, other: []const f32) f32 { 50 + const n = other.len; 51 + var sum: f32 = 0.0; 52 + 53 + if (self.data_f32) |f| { 54 + const slice = f[offset .. offset + n]; 55 + for (slice, other) |a, b| { 56 + sum += a * b; 57 + } 58 + return sum; 59 + } 60 + 61 + if (self.data_bf16) |b| { 62 + const slice = b[offset .. offset + n]; 63 + // Unroll by 4 for performance 64 + const n4 = n & ~@as(usize, 3); 65 + var i: usize = 0; 66 + while (i < n4) : (i += 4) { 67 + sum += bf16ToF32(slice[i]) * other[i] + 68 + bf16ToF32(slice[i + 1]) * other[i + 1] + 69 + bf16ToF32(slice[i + 2]) * other[i + 2] + 70 + bf16ToF32(slice[i + 3]) * other[i + 3]; 71 + } 72 + while (i < n) : (i += 1) { 73 + sum += bf16ToF32(slice[i]) * other[i]; 74 + } 75 + return sum; 76 + } 77 + 78 + return 0.0; 79 + } 80 + }; 81 + 82 + /// Matrix-vector multiply: out = weight @ input 83 + /// weight shape: [out_dim, in_dim], input shape: [in_dim], out shape: [out_dim] 84 + pub fn matVec(weight: *const Tensor, input: []const f32, out: []f32) void { 85 + const out_dim = out.len; 86 + for (0..out_dim) |i| { 87 + out[i] = weight.dot(i * input.len, input); 88 + } 89 + } 90 + 91 + /// Matrix-vector multiply and accumulate: out += weight @ input 92 + pub fn matVecAdd(weight: *const Tensor, input: []const f32, out: []f32) void { 93 + const out_dim = out.len; 94 + for (0..out_dim) |i| { 95 + out[i] += weight.dot(i * input.len, input); 96 + } 97 + } 98 + 99 + /// RMS normalization: out = x * rsqrt(mean(x^2) + eps) * (1 + weight) 100 + /// Qwen3.5 uses (1+weight) scaling since weights are initialized to 0. 101 + pub fn rmsNorm(out: []f32, x: []const f32, weight: *const Tensor, eps: f32) void { 102 + const n = x.len; 103 + var sum_sq: f32 = 0.0; 104 + for (x) |v| { 105 + sum_sq += v * v; 106 + } 107 + const rms = 1.0 / @sqrt(sum_sq / @as(f32, @floatFromInt(n)) + eps); 108 + for (0..n) |i| { 109 + out[i] = x[i] * rms * (1.0 + weight.get(i)); 110 + } 111 + } 112 + 113 + /// RMS normalization with gating: out = rmsNorm(x) * silu(gate) 114 + /// Used in GatedDeltaNet for the output normalization. 115 + pub fn rmsNormGated(out: []f32, x: []const f32, gate: []const f32, weight: *const Tensor, eps: f32) void { 116 + const n = x.len; 117 + var sum_sq: f32 = 0.0; 118 + for (x) |v| { 119 + sum_sq += v * v; 120 + } 121 + const rms = 1.0 / @sqrt(sum_sq / @as(f32, @floatFromInt(n)) + eps); 122 + for (0..n) |i| { 123 + const normed = x[i] * rms * weight.get(i); 124 + const g = gate[i]; 125 + const silu_g = g * sigmoid(g); 126 + out[i] = normed * silu_g; 127 + } 128 + } 129 + 130 + pub inline fn silu(x: f32) f32 { 131 + return x * sigmoid(x); 132 + } 133 + 134 + pub inline fn sigmoid(x: f32) f32 { 135 + return 1.0 / (1.0 + @exp(-x)); 136 + } 137 + 138 + pub inline fn softplus(x: f32) f32 { 139 + return @log(1.0 + @exp(x)); 140 + } 141 + 142 + /// L2 normalization of a vector (in-place). Returns the same slice. 143 + pub fn l2Norm(x: []f32) void { 144 + const eps: f32 = 1e-6; 145 + var sum_sq: f32 = 0.0; 146 + for (x) |v| { 147 + sum_sq += v * v; 148 + } 149 + const inv_norm = 1.0 / @sqrt(sum_sq + eps); 150 + for (x) |*v| { 151 + v.* *= inv_norm; 152 + } 153 + } 154 + 155 + // ============================================================ 156 + // Safetensors parser 157 + // ============================================================ 158 + 159 + pub const SafetensorsError = error{ 160 + InvalidFormat, 161 + UnsupportedDtype, 162 + TensorNotFound, 163 + }; 164 + 165 + pub const TensorInfo = struct { 166 + name: []const u8, 167 + dtype: []const u8, 168 + shape: [4]u32, 169 + ndim: u8, 170 + data_offset: usize, 171 + data_len: usize, 172 + }; 173 + 174 + /// Parse safetensors header and return tensor metadata. 175 + /// The file format is: [8 bytes: header_len LE u64] [header_len bytes: JSON] [raw tensor data] 176 + pub fn parseSafetensorsHeader( 177 + file_data: []const u8, 178 + out_tensors: []TensorInfo, 179 + ) SafetensorsError!struct { count: usize, data_start: usize } { 180 + if (file_data.len < 8) return SafetensorsError.InvalidFormat; 181 + 182 + const header_len = std.mem.readInt(u64, file_data[0..8], .little); 183 + if (8 + header_len > file_data.len) return SafetensorsError.InvalidFormat; 184 + 185 + const data_start = 8 + @as(usize, @intCast(header_len)); 186 + const header_json = file_data[8..data_start]; 187 + 188 + // Parse the JSON header manually (avoid std.json for WASM compat) 189 + var count: usize = 0; 190 + var pos: usize = 0; 191 + 192 + while (pos < header_json.len and count < out_tensors.len) { 193 + // Find next key (tensor name) - skip __metadata__ 194 + const key_start = findChar(header_json, pos, '"') orelse break; 195 + const key_end = findChar(header_json, key_start + 1, '"') orelse break; 196 + const key = header_json[key_start + 1 .. key_end]; 197 + pos = key_end + 1; 198 + 199 + if (std.mem.eql(u8, key, "__metadata__")) { 200 + // Skip metadata object 201 + pos = skipJsonValue(header_json, pos); 202 + continue; 203 + } 204 + 205 + // Parse tensor info object 206 + const obj_start = findChar(header_json, pos, '{') orelse break; 207 + pos = obj_start + 1; 208 + 209 + var info = TensorInfo{ 210 + .name = key, 211 + .dtype = "", 212 + .shape = .{ 0, 0, 0, 0 }, 213 + .ndim = 0, 214 + .data_offset = 0, 215 + .data_len = 0, 216 + }; 217 + 218 + // Parse fields within the tensor info object 219 + var brace_depth: u32 = 1; 220 + while (pos < header_json.len and brace_depth > 0) { 221 + if (header_json[pos] == '}') { 222 + brace_depth -= 1; 223 + pos += 1; 224 + continue; 225 + } 226 + 227 + const fk_start = findChar(header_json, pos, '"') orelse break; 228 + const fk_end = findChar(header_json, fk_start + 1, '"') orelse break; 229 + const field_key = header_json[fk_start + 1 .. fk_end]; 230 + pos = fk_end + 1; 231 + 232 + // Skip colon 233 + while (pos < header_json.len and header_json[pos] != ':') pos += 1; 234 + pos += 1; 235 + while (pos < header_json.len and header_json[pos] == ' ') pos += 1; 236 + 237 + if (std.mem.eql(u8, field_key, "dtype")) { 238 + const ds = findChar(header_json, pos, '"') orelse break; 239 + const de = findChar(header_json, ds + 1, '"') orelse break; 240 + info.dtype = header_json[ds + 1 .. de]; 241 + pos = de + 1; 242 + } else if (std.mem.eql(u8, field_key, "shape")) { 243 + // Parse array of ints 244 + const arr_start = findChar(header_json, pos, '[') orelse break; 245 + const arr_end = findChar(header_json, arr_start, ']') orelse break; 246 + var dim_pos = arr_start + 1; 247 + while (dim_pos < arr_end and info.ndim < 4) { 248 + while (dim_pos < arr_end and !isDigit(header_json[dim_pos])) dim_pos += 1; 249 + if (dim_pos >= arr_end) break; 250 + var num: u32 = 0; 251 + while (dim_pos < arr_end and isDigit(header_json[dim_pos])) { 252 + num = num * 10 + @as(u32, header_json[dim_pos] - '0'); 253 + dim_pos += 1; 254 + } 255 + info.shape[info.ndim] = num; 256 + info.ndim += 1; 257 + } 258 + pos = arr_end + 1; 259 + } else if (std.mem.eql(u8, field_key, "data_offsets")) { 260 + // Parse [start, end] 261 + const arr_start = findChar(header_json, pos, '[') orelse break; 262 + const arr_end = findChar(header_json, arr_start, ']') orelse break; 263 + var dim_pos = arr_start + 1; 264 + var offsets: [2]usize = .{ 0, 0 }; 265 + var oi: usize = 0; 266 + while (dim_pos < arr_end and oi < 2) { 267 + while (dim_pos < arr_end and !isDigit(header_json[dim_pos])) dim_pos += 1; 268 + if (dim_pos >= arr_end) break; 269 + var num: usize = 0; 270 + while (dim_pos < arr_end and isDigit(header_json[dim_pos])) { 271 + num = num * 10 + @as(usize, header_json[dim_pos] - '0'); 272 + dim_pos += 1; 273 + } 274 + offsets[oi] = num; 275 + oi += 1; 276 + } 277 + info.data_offset = offsets[0]; 278 + info.data_len = offsets[1] - offsets[0]; 279 + pos = arr_end + 1; 280 + } else { 281 + pos = skipJsonValue(header_json, pos); 282 + } 283 + } 284 + 285 + // Adjust offset relative to data section 286 + info.data_offset += data_start; 287 + 288 + out_tensors[count] = info; 289 + count += 1; 290 + } 291 + 292 + return .{ .count = count, .data_start = data_start }; 293 + } 294 + 295 + /// Create a Tensor view from a TensorInfo and the raw file data. 296 + pub fn tensorFromInfo(info: *const TensorInfo, file_data: []const u8) SafetensorsError!Tensor { 297 + var t = Tensor{ 298 + .shape = info.shape, 299 + .ndim = info.ndim, 300 + }; 301 + 302 + const raw = file_data[info.data_offset .. info.data_offset + info.data_len]; 303 + 304 + if (std.mem.eql(u8, info.dtype, "BF16") or std.mem.eql(u8, info.dtype, "bf16")) { 305 + const aligned: []align(@alignOf(u16)) const u8 = @alignCast(raw); 306 + t.data_bf16 = std.mem.bytesAsSlice(u16, aligned); 307 + } else if (std.mem.eql(u8, info.dtype, "F32") or std.mem.eql(u8, info.dtype, "f32")) { 308 + const aligned: []align(@alignOf(f32)) const u8 = @alignCast(raw); 309 + t.data_f32 = std.mem.bytesAsSlice(f32, aligned); 310 + } else { 311 + return SafetensorsError.UnsupportedDtype; 312 + } 313 + 314 + return t; 315 + } 316 + 317 + fn findChar(buf: []const u8, start: usize, c: u8) ?usize { 318 + var i = start; 319 + while (i < buf.len) : (i += 1) { 320 + if (buf[i] == c and (i == 0 or buf[i - 1] != '\\')) return i; 321 + } 322 + return null; 323 + } 324 + 325 + fn isDigit(c: u8) bool { 326 + return c >= '0' and c <= '9'; 327 + } 328 + 329 + fn skipJsonValue(buf: []const u8, start: usize) usize { 330 + var pos = start; 331 + while (pos < buf.len and (buf[pos] == ' ' or buf[pos] == ':' or buf[pos] == ',')) pos += 1; 332 + if (pos >= buf.len) return pos; 333 + 334 + return switch (buf[pos]) { 335 + '"' => { 336 + // Skip string 337 + pos += 1; 338 + while (pos < buf.len) : (pos += 1) { 339 + if (buf[pos] == '"' and buf[pos - 1] != '\\') return pos + 1; 340 + } 341 + return pos; 342 + }, 343 + '{' => { 344 + var depth: u32 = 1; 345 + pos += 1; 346 + while (pos < buf.len and depth > 0) : (pos += 1) { 347 + if (buf[pos] == '{') depth += 1; 348 + if (buf[pos] == '}') depth -= 1; 349 + } 350 + return pos; 351 + }, 352 + '[' => { 353 + var depth: u32 = 1; 354 + pos += 1; 355 + while (pos < buf.len and depth > 0) : (pos += 1) { 356 + if (buf[pos] == '[') depth += 1; 357 + if (buf[pos] == ']') depth -= 1; 358 + } 359 + return pos; 360 + }, 361 + else => { 362 + // Number, bool, null 363 + while (pos < buf.len and buf[pos] != ',' and buf[pos] != '}' and buf[pos] != ']') pos += 1; 364 + return pos; 365 + }, 366 + }; 367 + } 368 + 369 + test "bf16 conversion" { 370 + // 1.0 in f32 is 0x3F800000, in bf16 is 0x3F80 371 + const one = bf16ToF32(0x3F80); 372 + try std.testing.expectEqual(@as(f32, 1.0), one); 373 + 374 + const back = f32ToBf16(1.0); 375 + try std.testing.expectEqual(@as(u16, 0x3F80), back); 376 + 377 + // 0.0 378 + const zero = bf16ToF32(0x0000); 379 + try std.testing.expectEqual(@as(f32, 0.0), zero); 380 + } 381 + 382 + test "rms norm" { 383 + var x = [_]f32{ 1.0, 2.0, 3.0, 4.0 }; 384 + var out: [4]f32 = undefined; 385 + 386 + // Weight of all zeros means (1+0)=1 scaling (identity weight) 387 + var w_data = [_]f32{ 0.0, 0.0, 0.0, 0.0 }; 388 + const w = Tensor{ .data_f32 = &w_data, .shape = .{ 4, 0, 0, 0 }, .ndim = 1 }; 389 + 390 + rmsNorm(&out, &x, &w, 1e-6); 391 + 392 + // RMS of [1,2,3,4] = sqrt((1+4+9+16)/4) = sqrt(30/4) = sqrt(7.5) 393 + const rms_val = @sqrt(30.0 / 4.0); 394 + const expected_0 = 1.0 / rms_val; 395 + try std.testing.expectApproxEqAbs(expected_0, out[0], 1e-5); 396 + }

+139

src/inference/sampler.zig

··· 1 + const std = @import("std"); 2 + 3 + /// Temperature + top-k sampling for autoregressive generation. 4 + pub const Sampler = struct { 5 + temperature: f32, 6 + top_k: u32, 7 + xoshiro: std.Random.Xoshiro256, 8 + 9 + pub fn init(temperature: f32, top_k: u32, seed: u64) Sampler { 10 + return .{ 11 + .temperature = temperature, 12 + .top_k = top_k, 13 + .xoshiro = std.Random.Xoshiro256.init(seed), 14 + }; 15 + } 16 + 17 + /// Sample a token index from logits. 18 + /// Applies temperature scaling, then top-k filtering, then categorical sampling. 19 + pub fn sample(self: *Sampler, logits: []f32) u32 { 20 + const n = logits.len; 21 + if (n == 0) return 0; 22 + 23 + // Temperature scaling 24 + if (self.temperature != 1.0 and self.temperature > 0.0) { 25 + const inv_temp = 1.0 / self.temperature; 26 + for (logits) |*l| { 27 + l.* *= inv_temp; 28 + } 29 + } 30 + 31 + // Greedy (temperature ~0) 32 + if (self.temperature <= 0.0 or self.top_k == 1) { 33 + return argmax(logits); 34 + } 35 + 36 + // Top-k: find the k-th largest value and zero out everything below 37 + const k = @min(self.top_k, @as(u32, @intCast(n))); 38 + 39 + // Find k-th largest using partial sort via repeated max finding 40 + // For small k this is efficient enough 41 + var threshold: f32 = -std.math.inf(f32); 42 + if (k < n) { 43 + // Use a simple approach: find the k-th largest value 44 + // by maintaining the minimum of the top-k seen so far 45 + var top_k_min: f32 = std.math.inf(f32); 46 + var top_k_vals: [256]f32 = undefined; 47 + const actual_k: usize = @min(k, 256); 48 + var count: usize = 0; 49 + 50 + for (logits) |l| { 51 + if (count < actual_k) { 52 + top_k_vals[count] = l; 53 + count += 1; 54 + if (count == actual_k) { 55 + // Find min of top_k_vals 56 + top_k_min = top_k_vals[0]; 57 + for (top_k_vals[1..actual_k]) |v| { 58 + if (v < top_k_min) top_k_min = v; 59 + } 60 + } 61 + } else if (l > top_k_min) { 62 + // Replace the min 63 + for (&top_k_vals, 0..) |*v, i| { 64 + _ = i; 65 + if (v.* == top_k_min) { 66 + v.* = l; 67 + break; 68 + } 69 + } 70 + top_k_min = top_k_vals[0]; 71 + for (top_k_vals[1..actual_k]) |v| { 72 + if (v < top_k_min) top_k_min = v; 73 + } 74 + } 75 + } 76 + threshold = top_k_min; 77 + } 78 + 79 + // Softmax with top-k filtering 80 + var max_val: f32 = -std.math.inf(f32); 81 + for (logits) |l| { 82 + if (l >= threshold and l > max_val) max_val = l; 83 + } 84 + 85 + var sum: f32 = 0.0; 86 + for (logits) |*l| { 87 + if (l.* >= threshold) { 88 + l.* = @exp(l.* - max_val); 89 + sum += l.*; 90 + } else { 91 + l.* = 0.0; 92 + } 93 + } 94 + 95 + if (sum <= 0.0) return argmax(logits); 96 + 97 + // Normalize 98 + const inv_sum = 1.0 / sum; 99 + for (logits) |*l| { 100 + l.* *= inv_sum; 101 + } 102 + 103 + // Categorical sampling 104 + var r = self.xoshiro.random().float(f32); 105 + for (logits, 0..) |p, i| { 106 + r -= p; 107 + if (r <= 0.0) return @intCast(i); 108 + } 109 + 110 + return @intCast(n - 1); 111 + } 112 + }; 113 + 114 + fn argmax(vals: []const f32) u32 { 115 + var best: u32 = 0; 116 + var best_val: f32 = -std.math.inf(f32); 117 + for (vals, 0..) |v, i| { 118 + if (v > best_val) { 119 + best_val = v; 120 + best = @intCast(i); 121 + } 122 + } 123 + return best; 124 + } 125 + 126 + test "sampler greedy" { 127 + var logits = [_]f32{ 1.0, 3.0, 2.0, 0.5 }; 128 + var s = Sampler.init(0.0, 1, 42); 129 + const token = s.sample(&logits); 130 + try std.testing.expectEqual(@as(u32, 1), token); 131 + } 132 + 133 + test "sampler top_k" { 134 + var logits = [_]f32{ 1.0, 10.0, 9.0, 0.5 }; 135 + var s = Sampler.init(1.0, 2, 42); 136 + const token = s.sample(&logits); 137 + // Should be either 1 or 2 (top-2) 138 + try std.testing.expect(token == 1 or token == 2); 139 + }

+460

src/inference/tokenizer.zig

··· 1 + const std = @import("std"); 2 + 3 + /// BPE tokenizer for Qwen3.5 models. 4 + /// Loads from HuggingFace tokenizer.json format. 5 + /// Uses byte-level BPE (GPT-2 style): bytes are mapped to unicode chars before BPE. 6 + pub const Tokenizer = struct { 7 + /// Token ID → string mapping for decoding 8 + vocab: [][]const u8, 9 + vocab_size: u32, 10 + /// BPE merge rules: pair → merged token. Stored as a sorted list for lookup. 11 + merges: []Merge, 12 + merge_count: u32, 13 + /// String → Token ID mapping for encoding (sorted for binary search) 14 + token_to_id: []TokenEntry, 15 + token_count: u32, 16 + /// Special token IDs 17 + eos_id: u32, 18 + im_start_id: u32, 19 + im_end_id: u32, 20 + /// Allocator used for loading 21 + allocator: std.mem.Allocator, 22 + /// Backing memory for all strings 23 + string_pool: []u8, 24 + 25 + const Merge = struct { 26 + first: u32, 27 + second: u32, 28 + result: u32, 29 + rank: u32, // lower = higher priority 30 + }; 31 + 32 + const TokenEntry = struct { 33 + text: []const u8, 34 + id: u32, 35 + }; 36 + 37 + pub fn deinit(self: *Tokenizer) void { 38 + self.allocator.free(self.vocab); 39 + self.allocator.free(self.merges); 40 + self.allocator.free(self.token_to_id); 41 + self.allocator.free(self.string_pool); 42 + } 43 + 44 + /// Load tokenizer from a tokenizer.json file using std.json. 45 + pub fn loadFromFile(allocator: std.mem.Allocator, path: []const u8) !Tokenizer { 46 + const file = try std.fs.cwd().openFile(path, .{}); 47 + defer file.close(); 48 + 49 + const file_size = try file.getEndPos(); 50 + const data = try allocator.alloc(u8, file_size); 51 + defer allocator.free(data); 52 + const bytes_read = try file.readAll(data); 53 + if (bytes_read != file_size) return error.IncompleteRead; 54 + 55 + return loadFromJson(allocator, data[0..bytes_read]); 56 + } 57 + 58 + pub fn loadFromJson(allocator: std.mem.Allocator, json_data: []const u8) !Tokenizer { 59 + // Parse using std.json 60 + var parsed = try std.json.parseFromSlice(std.json.Value, allocator, json_data, .{ 61 + .allocate = .alloc_always, 62 + .max_value_len = std.json.default_max_value_len, 63 + }); 64 + defer parsed.deinit(); 65 + 66 + const root = parsed.value; 67 + 68 + // Extract model section 69 + const model = root.object.get("model") orelse return error.InvalidFormat; 70 + const vocab_obj = model.object.get("vocab") orelse return error.InvalidFormat; 71 + const merges_arr = model.object.get("merges") orelse return error.InvalidFormat; 72 + 73 + // Count vocab entries 74 + const vocab_map = vocab_obj.object; 75 + const num_vocab_entries: u32 = @intCast(vocab_map.count()); 76 + 77 + // Find max token ID to size the vocab array 78 + var max_id: u32 = 0; 79 + var it = vocab_map.iterator(); 80 + while (it.next()) |entry| { 81 + const id: u32 = @intCast(entry.value_ptr.integer); 82 + if (id > max_id) max_id = id; 83 + } 84 + 85 + // Also check added_tokens 86 + var num_added: u32 = 0; 87 + if (root.object.get("added_tokens")) |added| { 88 + for (added.array.items) |tok| { 89 + const id: u32 = @intCast(tok.object.get("id").?.integer); 90 + if (id > max_id) max_id = id; 91 + num_added += 1; 92 + } 93 + } 94 + 95 + const total_vocab: u32 = max_id + 1; 96 + 97 + // Allocate string pool - estimate size 98 + var pool_size: usize = 0; 99 + it = vocab_map.iterator(); 100 + while (it.next()) |entry| { 101 + pool_size += entry.key_ptr.len + 1; 102 + } 103 + if (root.object.get("added_tokens")) |added| { 104 + for (added.array.items) |tok| { 105 + const content = tok.object.get("content").?.string; 106 + pool_size += content.len + 1; 107 + } 108 + } 109 + 110 + var string_pool = try allocator.alloc(u8, pool_size); 111 + var pool_pos: usize = 0; 112 + 113 + // Allocate vocab and token_to_id arrays 114 + const vocab = try allocator.alloc([]const u8, total_vocab); 115 + for (vocab) |*v| v.* = ""; 116 + 117 + const total_entries = num_vocab_entries + num_added; 118 + const token_to_id = try allocator.alloc(TokenEntry, total_entries); 119 + var entry_count: u32 = 0; 120 + 121 + // Fill vocab from model.vocab 122 + it = vocab_map.iterator(); 123 + while (it.next()) |entry| { 124 + const text = entry.key_ptr.*; 125 + const id: u32 = @intCast(entry.value_ptr.integer); 126 + 127 + // Copy string to pool 128 + const str_start = pool_pos; 129 + @memcpy(string_pool[pool_pos .. pool_pos + text.len], text); 130 + pool_pos += text.len; 131 + const pooled = string_pool[str_start..pool_pos]; 132 + 133 + vocab[id] = pooled; 134 + token_to_id[entry_count] = .{ .text = pooled, .id = id }; 135 + entry_count += 1; 136 + } 137 + 138 + // Add special/added tokens 139 + var eos_id: u32 = 248046; 140 + var im_start_id: u32 = 248045; 141 + var im_end_id: u32 = 248046; 142 + 143 + if (root.object.get("added_tokens")) |added| { 144 + for (added.array.items) |tok| { 145 + const id: u32 = @intCast(tok.object.get("id").?.integer); 146 + const content = tok.object.get("content").?.string; 147 + 148 + // Copy to pool 149 + const str_start = pool_pos; 150 + @memcpy(string_pool[pool_pos .. pool_pos + content.len], content); 151 + pool_pos += content.len; 152 + const pooled = string_pool[str_start..pool_pos]; 153 + 154 + if (id < total_vocab) { 155 + vocab[id] = pooled; 156 + } 157 + token_to_id[entry_count] = .{ .text = pooled, .id = id }; 158 + entry_count += 1; 159 + 160 + // Detect special tokens 161 + if (std.mem.eql(u8, content, "<|im_start|>")) im_start_id = id; 162 + if (std.mem.eql(u8, content, "<|im_end|>")) im_end_id = id; 163 + if (std.mem.eql(u8, content, "<|endoftext|>")) eos_id = id; 164 + } 165 + } 166 + 167 + // Sort token_to_id by text for binary search 168 + std.mem.sort(TokenEntry, token_to_id[0..entry_count], {}, struct { 169 + fn lessThan(_: void, a: TokenEntry, b: TokenEntry) bool { 170 + return std.mem.order(u8, a.text, b.text) == .lt; 171 + } 172 + }.lessThan); 173 + 174 + // Parse merges 175 + const merges_items = merges_arr.array.items; 176 + const num_merges: u32 = @intCast(merges_items.len); 177 + const merges = try allocator.alloc(Merge, num_merges); 178 + var merge_count: u32 = 0; 179 + 180 + for (merges_items, 0..) |merge_val, rank| { 181 + // Merges can be either "token1 token2" (string) or ["token1", "token2"] (array) 182 + var first_str: []const u8 = undefined; 183 + var second_str: []const u8 = undefined; 184 + 185 + switch (merge_val) { 186 + .string => |merge_str| { 187 + const space_idx = std.mem.indexOf(u8, merge_str, " ") orelse continue; 188 + first_str = merge_str[0..space_idx]; 189 + second_str = merge_str[space_idx + 1 ..]; 190 + }, 191 + .array => |arr| { 192 + if (arr.items.len != 2) continue; 193 + first_str = arr.items[0].string; 194 + second_str = arr.items[1].string; 195 + }, 196 + else => continue, 197 + } 198 + 199 + const first_id = lookupTokenId(token_to_id[0..entry_count], first_str) orelse continue; 200 + const second_id = lookupTokenId(token_to_id[0..entry_count], second_str) orelse continue; 201 + 202 + // The merged result is the concatenation 203 + const merged_str_buf = [_]u8{0} ** 0; 204 + _ = merged_str_buf; 205 + 206 + // Look up merged token 207 + var merged_buf: [512]u8 = undefined; 208 + if (first_str.len + second_str.len > merged_buf.len) continue; 209 + @memcpy(merged_buf[0..first_str.len], first_str); 210 + @memcpy(merged_buf[first_str.len .. first_str.len + second_str.len], second_str); 211 + const merged_key = merged_buf[0 .. first_str.len + second_str.len]; 212 + 213 + const result_id = lookupTokenId(token_to_id[0..entry_count], merged_key) orelse continue; 214 + 215 + merges[merge_count] = .{ 216 + .first = first_id, 217 + .second = second_id, 218 + .result = result_id, 219 + .rank = @intCast(rank), 220 + }; 221 + merge_count += 1; 222 + } 223 + 224 + return Tokenizer{ 225 + .vocab = vocab, 226 + .vocab_size = total_vocab, 227 + .merges = merges, 228 + .merge_count = merge_count, 229 + .token_to_id = token_to_id, 230 + .token_count = entry_count, 231 + .eos_id = eos_id, 232 + .im_start_id = im_start_id, 233 + .im_end_id = im_end_id, 234 + .allocator = allocator, 235 + .string_pool = string_pool, 236 + }; 237 + } 238 + 239 + /// Encode text to token IDs using BPE. 240 + pub fn encode(self: *const Tokenizer, text: []const u8, allocator: std.mem.Allocator, out: []u32) !u32 { 241 + // Step 1: Convert text bytes to byte-level BPE characters 242 + var byte_tokens = std.ArrayListUnmanaged(u32){}; 243 + defer byte_tokens.deinit(allocator); 244 + 245 + // Map each byte to its corresponding single-byte token 246 + for (text) |byte| { 247 + const ch = byteToBpeChar(byte); 248 + var buf: [4]u8 = undefined; 249 + const len = std.unicode.utf8Encode(ch, &buf) catch continue; 250 + const char_str = buf[0..len]; 251 + 252 + if (lookupTokenId(self.token_to_id[0..self.token_count], char_str)) |id| { 253 + try byte_tokens.append(allocator, id); 254 + } 255 + } 256 + 257 + if (byte_tokens.items.len == 0) return 0; 258 + 259 + // Step 2: Apply BPE merges greedily 260 + // Repeatedly find the highest-priority merge and apply it 261 + var tokens = byte_tokens.items; 262 + var len: u32 = @intCast(tokens.len); 263 + 264 + var changed = true; 265 + while (changed) { 266 + changed = false; 267 + var best_rank: u32 = std.math.maxInt(u32); 268 + var best_pos: u32 = 0; 269 + var best_result: u32 = 0; 270 + 271 + // Find the best merge 272 + var i: u32 = 0; 273 + while (i + 1 < len) : (i += 1) { 274 + if (self.findMerge(tokens[i], tokens[i + 1])) |merge| { 275 + if (merge.rank < best_rank) { 276 + best_rank = merge.rank; 277 + best_pos = i; 278 + best_result = merge.result; 279 + } 280 + } 281 + } 282 + 283 + if (best_rank < std.math.maxInt(u32)) { 284 + // Apply merge: replace tokens[best_pos] and tokens[best_pos+1] with best_result 285 + tokens[best_pos] = best_result; 286 + // Shift remaining tokens left 287 + var j: u32 = best_pos + 1; 288 + while (j + 1 < len) : (j += 1) { 289 + tokens[j] = tokens[j + 1]; 290 + } 291 + len -= 1; 292 + changed = true; 293 + } 294 + } 295 + 296 + // Copy to output 297 + const out_len = @min(len, @as(u32, @intCast(out.len))); 298 + @memcpy(out[0..out_len], tokens[0..out_len]); 299 + return out_len; 300 + } 301 + 302 + /// Encode a ChatML-wrapped prompt. 303 + /// Format: <|im_start|>system\n{system}<|im_end|>\n<|im_start|>user\n{user}<|im_end|>\n<|im_start|>assistant\n 304 + pub fn encodeChatML( 305 + self: *const Tokenizer, 306 + system_prompt: ?[]const u8, 307 + user_prompt: []const u8, 308 + allocator: std.mem.Allocator, 309 + out: []u32, 310 + ) !u32 { 311 + var pos: u32 = 0; 312 + 313 + if (system_prompt) |sys| { 314 + out[pos] = self.im_start_id; 315 + pos += 1; 316 + pos += try self.encode("system\n", allocator, out[pos..]); 317 + pos += try self.encode(sys, allocator, out[pos..]); 318 + out[pos] = self.im_end_id; 319 + pos += 1; 320 + pos += try self.encode("\n", allocator, out[pos..]); 321 + } 322 + 323 + out[pos] = self.im_start_id; 324 + pos += 1; 325 + pos += try self.encode("user\n", allocator, out[pos..]); 326 + pos += try self.encode(user_prompt, allocator, out[pos..]); 327 + out[pos] = self.im_end_id; 328 + pos += 1; 329 + pos += try self.encode("\n", allocator, out[pos..]); 330 + 331 + out[pos] = self.im_start_id; 332 + pos += 1; 333 + pos += try self.encode("assistant\n", allocator, out[pos..]); 334 + 335 + return pos; 336 + } 337 + 338 + /// Decode a single token ID to its text representation. 339 + pub fn decode(self: *const Tokenizer, token_id: u32) []const u8 { 340 + if (token_id >= self.vocab_size) return ""; 341 + return self.vocab[token_id]; 342 + } 343 + 344 + /// Decode a token and convert byte-level BPE chars back to actual bytes. 345 + pub fn decodeToBytes(self: *const Tokenizer, token_id: u32, out: []u8) u32 { 346 + const text = self.decode(token_id); 347 + var pos: u32 = 0; 348 + 349 + var i: usize = 0; 350 + while (i < text.len) { 351 + const cp_len = std.unicode.utf8ByteSequenceLength(text[i]) catch { 352 + if (pos < out.len) { 353 + out[pos] = text[i]; 354 + pos += 1; 355 + } 356 + i += 1; 357 + continue; 358 + }; 359 + if (i + cp_len > text.len) break; 360 + const cp = std.unicode.utf8Decode(text[i .. i + cp_len]) catch { 361 + if (pos < out.len) { 362 + out[pos] = text[i]; 363 + pos += 1; 364 + } 365 + i += 1; 366 + continue; 367 + }; 368 + 369 + if (bpeCharToByte(cp)) |byte| { 370 + if (pos < out.len) { 371 + out[pos] = byte; 372 + pos += 1; 373 + } 374 + } 375 + i += cp_len; 376 + } 377 + 378 + return pos; 379 + } 380 + 381 + fn findMerge(self: *const Tokenizer, first: u32, second: u32) ?*const Merge { 382 + // Linear scan (could use hash map for speed, but this is simple) 383 + for (self.merges[0..self.merge_count]) |*m| { 384 + if (m.first == first and m.second == second) return m; 385 + } 386 + return null; 387 + } 388 + }; 389 + 390 + fn lookupTokenId(entries: []const Tokenizer.TokenEntry, text: []const u8) ?u32 { 391 + // Binary search 392 + var lo: usize = 0; 393 + var hi: usize = entries.len; 394 + while (lo < hi) { 395 + const mid = lo + (hi - lo) / 2; 396 + const cmp = std.mem.order(u8, entries[mid].text, text); 397 + switch (cmp) { 398 + .lt => lo = mid + 1, 399 + .gt => hi = mid, 400 + .eq => return entries[mid].id, 401 + } 402 + } 403 + return null; 404 + } 405 + 406 + // ============================================================ 407 + // GPT-2 byte-level BPE character mapping 408 + // ============================================================ 409 + 410 + /// Map a byte value to the corresponding unicode codepoint used in GPT-2 BPE. 411 + /// Printable ASCII bytes map to themselves; others map to a range starting at U+0100. 412 + fn byteToBpeChar(byte: u8) u21 { 413 + return byte_to_unicode_table[byte]; 414 + } 415 + 416 + /// Reverse mapping: unicode codepoint back to byte. 417 + fn bpeCharToByte(cp: u21) ?u8 { 418 + if (cp < 256) { 419 + // Check if this is a direct-mapped printable char 420 + if (byte_to_unicode_table[cp] == cp) return @intCast(cp); 421 + } 422 + // Search in the table for mapped values 423 + for (byte_to_unicode_table, 0..) |mapped, byte| { 424 + if (mapped == cp) return @intCast(byte); 425 + } 426 + return null; 427 + } 428 + 429 + /// GPT-2 byte_to_unicode mapping table. 430 + /// Printable chars (33-126, 161-172, 174-255) map to themselves. 431 + /// Other bytes (0-32, 127-160, 173) map to U+0100 onwards. 432 + const byte_to_unicode_table: [256]u21 = blk: { 433 + var table: [256]u21 = undefined; 434 + var n: u21 = 0x100; // Start of remapped range 435 + 436 + for (0..256) |i| { 437 + const b: u8 = @intCast(i); 438 + if ((b >= 33 and b <= 126) or (b >= 161 and b <= 172) or (b >= 174 and b <= 255)) { 439 + table[i] = b; 440 + } else { 441 + table[i] = n; 442 + n += 1; 443 + } 444 + } 445 + break :blk table; 446 + }; 447 + 448 + test "byte to bpe char roundtrip" { 449 + // Space (0x20 = 32) should map to Ġ (U+0120) 450 + const space_char = byteToBpeChar(' '); 451 + try std.testing.expectEqual(@as(u21, 0x0120), space_char); 452 + const back = bpeCharToByte(0x0120); 453 + try std.testing.expectEqual(@as(?u8, ' '), back); 454 + 455 + // 'A' (65) should map to itself 456 + const a_char = byteToBpeChar('A'); 457 + try std.testing.expectEqual(@as(u21, 'A'), a_char); 458 + const a_back = bpeCharToByte('A'); 459 + try std.testing.expectEqual(@as(?u8, 'A'), a_back); 460 + }

+121 -1

src/main.zig

··· 13 13 pub const json = @import("utils/json.zig"); 14 14 const alloc_utils = @import("utils/allocator.zig"); 15 15 16 + // Inference engine (native only — not compiled for WASM yet) 17 + pub const inference_model = if (builtin.target.cpu.arch != .wasm32) @import("inference/model.zig") else struct {}; 18 + pub const inference_tokenizer = if (builtin.target.cpu.arch != .wasm32) @import("inference/tokenizer.zig") else struct {}; 19 + pub const inference_sampler = if (builtin.target.cpu.arch != .wasm32) @import("inference/sampler.zig") else struct {}; 20 + pub const inference_kv_cache = if (builtin.target.cpu.arch != .wasm32) @import("inference/kv_cache.zig") else struct {}; 21 + pub const inference_quantized = if (builtin.target.cpu.arch != .wasm32) @import("inference/quantized.zig") else struct {}; 22 + 16 23 // ============================================================ 17 24 // Shared state 18 25 // ============================================================ ··· 204 211 _ = args.next(); 205 212 206 213 const command = args.next() orelse { 207 - try stderr.print("Usage: fantasma <profile|verify> [args...]\n", .{}); 214 + try stderr.print("Usage: fantasma <profile|verify|generate> [args...]\n", .{}); 208 215 try stderr.flush(); 209 216 return; 210 217 }; ··· 280 287 } 281 288 } 282 289 try stdout.flush(); 290 + } else if (std.mem.eql(u8, command, "generate")) { 291 + const model_dir = args.next() orelse "QwenTheBard"; 292 + const prompt_text = args.next() orelse "Hello, how are you?"; 293 + 294 + try stderr.print("Loading tokenizer...\n", .{}); 295 + try stderr.flush(); 296 + 297 + var tok_path_buf: [1024]u8 = undefined; 298 + const tok_path = std.fmt.bufPrint(&tok_path_buf, "{s}/tokenizer.json", .{model_dir}) catch "QwenTheBard/tokenizer.json"; 299 + 300 + var tokenizer = inference_tokenizer.Tokenizer.loadFromFile(allocator, tok_path) catch |e| { 301 + try stderr.print("Failed to load tokenizer: {}\n", .{e}); 302 + try stderr.flush(); 303 + return; 304 + }; 305 + defer tokenizer.deinit(); 306 + 307 + try stderr.print("Loading model weights from {s}...\n", .{model_dir}); 308 + try stderr.flush(); 309 + 310 + const max_seq_len: u32 = 512; 311 + var weights = inference_model.loadWeights(allocator, model_dir, max_seq_len) catch |e| { 312 + try stderr.print("Failed to load weights: {}\n", .{e}); 313 + try stderr.flush(); 314 + return; 315 + }; 316 + _ = &weights; 317 + 318 + try stderr.print("Model loaded. Generating...\n", .{}); 319 + try stderr.flush(); 320 + 321 + // Tokenize prompt with ChatML wrapping 322 + var token_buf: [2048]u32 = undefined; 323 + const prompt_len = tokenizer.encodeChatML( 324 + "/no_think", 325 + prompt_text, 326 + allocator, 327 + &token_buf, 328 + ) catch |e| { 329 + try stderr.print("Failed to tokenize: {}\n", .{e}); 330 + try stderr.flush(); 331 + return; 332 + }; 333 + 334 + try stderr.print("Prompt tokens: {d}\n", .{prompt_len}); 335 + try stderr.flush(); 336 + 337 + // Initialize cache and forward state 338 + const Config = inference_kv_cache.Config; 339 + var model_cache = inference_kv_cache.ModelCache.init(allocator, max_seq_len) catch |e| { 340 + try stderr.print("Failed to init cache: {}\n", .{e}); 341 + try stderr.flush(); 342 + return; 343 + }; 344 + defer model_cache.deinit(allocator); 345 + 346 + var fwd_state = inference_model.ForwardState.init(allocator) catch |e| { 347 + try stderr.print("Failed to init state: {}\n", .{e}); 348 + try stderr.flush(); 349 + return; 350 + }; 351 + defer fwd_state.deinit(); 352 + 353 + var sampler = inference_sampler.Sampler.init(0.7, 40, 42); 354 + 355 + // Prefill: process prompt tokens 356 + var pos: u32 = 0; 357 + for (token_buf[0..prompt_len], 0..) |tok, ti| { 358 + _ = inference_model.forward(&weights, &model_cache, &fwd_state, tok, pos); 359 + pos += 1; 360 + try stderr.print("\rPrefill: {d}/{d}", .{ ti + 1, prompt_len }); 361 + try stderr.flush(); 362 + } 363 + try stderr.print(" done\n", .{}); 364 + try stderr.flush(); 365 + 366 + // Decode: generate new tokens 367 + const max_new_tokens: u32 = 256; 368 + var decode_buf: [512]u8 = undefined; 369 + var generated: u32 = 0; 370 + while (generated < max_new_tokens and pos < max_seq_len) { 371 + // Sample next token from current logits 372 + const logits_copy = try allocator.alloc(f32, Config.vocab_size); 373 + defer allocator.free(logits_copy); 374 + @memcpy(logits_copy, fwd_state.logits); 375 + const next_token = sampler.sample(logits_copy); 376 + 377 + if (next_token == Config.eos_token_id) break; 378 + 379 + // Decode and print the token 380 + const byte_len = tokenizer.decodeToBytes(next_token, &decode_buf); 381 + if (byte_len > 0) { 382 + try stdout.writeAll(decode_buf[0..byte_len]); 383 + try stdout.flush(); 384 + } 385 + 386 + // Forward pass for next token 387 + _ = inference_model.forward(&weights, &model_cache, &fwd_state, next_token, pos); 388 + pos += 1; 389 + generated += 1; 390 + } 391 + 392 + try stdout.print("\n", .{}); 393 + try stdout.flush(); 394 + try stderr.print("\n[Generated {d} tokens]\n", .{generated}); 395 + try stderr.flush(); 283 396 } else { 284 397 try stderr.print("Unknown command: {s}\n", .{command}); 285 398 try stderr.flush(); ··· 295 408 _ = chunker; 296 409 _ = unicode; 297 410 _ = json; 411 + if (@import("builtin").target.cpu.arch != .wasm32) { 412 + _ = inference_model; 413 + _ = inference_tokenizer; 414 + _ = inference_sampler; 415 + _ = inference_kv_cache; 416 + _ = inference_quantized; 417 + } 298 418 }

+3

web/.gitignore

··· 1 + node_modules 2 + dist 3 + *.wasm

-516

web/app.js

··· 1 - // Fantasma — client-side stylometric neutralizer 2 - // WASM profiler loads directly on page load (~27KB). 3 - // Model weights are downloaded on demand and stored in IndexedDB. 4 - 5 - (function () { 6 - "use strict"; 7 - 8 - const $ = (sel) => document.querySelector(sel); 9 - 10 - // Elements 11 - const inputEl = $("#input-text"); 12 - const btnAnalyze = $("#btn-analyze"); 13 - const btnRetry = $("#btn-retry"); 14 - const resultsSection = $("#results"); 15 - const resultsSummary = $("#results-summary"); 16 - const markerCount = $("#marker-count"); 17 - const markersContainer = $("#markers-container"); 18 - const wordCount = $("#word-count"); 19 - const statusIndicator = $("#status-indicator"); 20 - const forensicToggle = $("#forensic-toggle"); 21 - const forensicPanel = $("#forensic-panel"); 22 - const forensicBody = $("#forensic-table-body"); 23 - const rewriterSection = $("#rewriter-section"); 24 - const btnDownload = $("#btn-download-model"); 25 - const downloadProgress = $("#download-progress"); 26 - const progressFill = $("#progress-fill"); 27 - const progressText = $("#progress-text"); 28 - const btnNeutralize = $("#btn-neutralize"); 29 - const btnPanic = $("#btn-panic"); 30 - 31 - let wasmInstance = null; 32 - let modelLoaded = false; 33 - 34 - const DB_NAME = "fantasma"; 35 - const DB_VERSION = 1; 36 - const STORE_NAME = "model"; 37 - const WEIGHTS_KEY = "weights"; 38 - 39 - // ============================================================ 40 - // IndexedDB helpers (model weights only) 41 - // ============================================================ 42 - 43 - function openDB() { 44 - return new Promise((resolve, reject) => { 45 - const req = indexedDB.open(DB_NAME, DB_VERSION); 46 - req.onupgradeneeded = () => req.result.createObjectStore(STORE_NAME); 47 - req.onsuccess = () => resolve(req.result); 48 - req.onerror = () => reject(req.error); 49 - }); 50 - } 51 - 52 - async function dbGet(key) { 53 - const db = await openDB(); 54 - return new Promise((resolve, reject) => { 55 - const tx = db.transaction(STORE_NAME, "readonly"); 56 - const req = tx.objectStore(STORE_NAME).get(key); 57 - req.onsuccess = () => resolve(req.result); 58 - req.onerror = () => reject(req.error); 59 - }); 60 - } 61 - 62 - async function dbPut(key, value) { 63 - const db = await openDB(); 64 - return new Promise((resolve, reject) => { 65 - const tx = db.transaction(STORE_NAME, "readwrite"); 66 - const req = tx.objectStore(STORE_NAME).put(value, key); 67 - req.onsuccess = () => resolve(); 68 - req.onerror = () => reject(req.error); 69 - }); 70 - } 71 - 72 - // ============================================================ 73 - // Fetch with progress 74 - // ============================================================ 75 - 76 - async function fetchWithProgress(url, onProgress) { 77 - const response = await fetch(url); 78 - if (!response.ok) throw new Error(`HTTP ${response.status}`); 79 - const total = parseInt(response.headers.get("content-length") || "0", 10); 80 - const reader = response.body.getReader(); 81 - const chunks = []; 82 - let received = 0; 83 - 84 - while (true) { 85 - const { done, value } = await reader.read(); 86 - if (done) break; 87 - chunks.push(value); 88 - received += value.length; 89 - if (onProgress) onProgress(received, total); 90 - } 91 - 92 - const result = new Uint8Array(received); 93 - let offset = 0; 94 - for (const chunk of chunks) { 95 - result.set(chunk, offset); 96 - offset += chunk.length; 97 - } 98 - return result.buffer; 99 - } 100 - 101 - function formatBytes(bytes) { 102 - if (bytes < 1024) return bytes + " B"; 103 - if (bytes < 1048576) return (bytes / 1024).toFixed(0) + " KB"; 104 - return (bytes / 1048576).toFixed(1) + " MB"; 105 - } 106 - 107 - // ============================================================ 108 - // WASM helpers 109 - // ============================================================ 110 - 111 - function wasmCall(exportName, text) { 112 - const encoder = new TextEncoder(); 113 - const bytes = encoder.encode(text); 114 - const ptr = wasmInstance.exports.alloc(bytes.length); 115 - new Uint8Array(wasmInstance.exports.memory.buffer, ptr, bytes.length).set(bytes); 116 - 117 - const ok = wasmInstance.exports[exportName](ptr, bytes.length); 118 - wasmInstance.exports.dealloc(ptr, bytes.length); 119 - if (!ok) throw new Error(`WASM ${exportName} failed`); 120 - 121 - const outPtr = wasmInstance.exports.get_output_ptr(); 122 - const outLen = wasmInstance.exports.get_output_len(); 123 - const outBytes = new Uint8Array(wasmInstance.exports.memory.buffer, outPtr, outLen); 124 - return JSON.parse(new TextDecoder().decode(outBytes)); 125 - } 126 - 127 - function loadModelWeights(weightsBytes) { 128 - const arr = new Uint8Array(weightsBytes); 129 - const ptr = wasmInstance.exports.alloc(arr.length); 130 - new Uint8Array(wasmInstance.exports.memory.buffer, ptr, arr.length).set(arr); 131 - const ok = wasmInstance.exports.init(ptr, arr.length); 132 - wasmInstance.exports.dealloc(ptr, arr.length); 133 - return ok; 134 - } 135 - 136 - // ============================================================ 137 - // Status 138 - // ============================================================ 139 - 140 - function setStatus(state, msg) { 141 - switch (state) { 142 - case "loading": 143 - statusIndicator.textContent = "Loading..."; 144 - statusIndicator.className = "status status-loading"; 145 - break; 146 - case "ready": 147 - statusIndicator.textContent = "Ready"; 148 - statusIndicator.className = "status status-ready"; 149 - break; 150 - case "model-ready": 151 - statusIndicator.textContent = "Offline"; 152 - statusIndicator.className = "status status-ready"; 153 - break; 154 - case "error": 155 - statusIndicator.textContent = msg || "Error"; 156 - statusIndicator.className = "status status-error"; 157 - break; 158 - } 159 - } 160 - 161 - // ============================================================ 162 - // Word count 163 - // ============================================================ 164 - 165 - inputEl.addEventListener("input", () => { 166 - const words = inputEl.value.trim().split(/\s+/).filter(Boolean).length; 167 - wordCount.textContent = words + " word" + (words !== 1 ? "s" : ""); 168 - btnAnalyze.disabled = !wasmInstance || words < 3; 169 - }); 170 - 171 - // ============================================================ 172 - // Forensic toggle 173 - // ============================================================ 174 - 175 - forensicToggle.addEventListener("change", () => { 176 - forensicPanel.hidden = !forensicToggle.checked; 177 - }); 178 - 179 - // ============================================================ 180 - // Analyze 181 - // ============================================================ 182 - 183 - btnAnalyze.addEventListener("click", () => { 184 - const text = inputEl.value.trim(); 185 - if (!text || !wasmInstance) return; 186 - 187 - btnAnalyze.disabled = true; 188 - btnAnalyze.classList.add("btn-loading"); 189 - btnAnalyze.textContent = "Analyzing"; 190 - 191 - try { 192 - const result = wasmCall("profile", text); 193 - renderProfile(result); 194 - } catch (e) { 195 - console.error("Profile error:", e); 196 - setStatus("error", "Analysis failed"); 197 - } finally { 198 - btnAnalyze.disabled = false; 199 - btnAnalyze.classList.remove("btn-loading"); 200 - btnAnalyze.textContent = "Analyze Fingerprint"; 201 - } 202 - }); 203 - 204 - // ============================================================ 205 - // Render profile 206 - // ============================================================ 207 - 208 - // Profile JSON keys → display names 209 - const FEATURE_LABELS = { 210 - avg_word_length: "Avg word length", 211 - vocabulary_richness: "Vocabulary richness", 212 - hapax_ratio: "Hapax ratio", 213 - avg_sentence_length: "Avg sentence length", 214 - sentence_length_variance: "Sentence length variance", 215 - passive_voice_ratio: "Passive voice ratio", 216 - contraction_rate: "Contraction rate", 217 - semicolon_rate: "Semicolon rate", 218 - comma_rate: "Comma rate", 219 - dash_rate: "Dash rate", 220 - exclamation_rate: "Exclamation rate", 221 - hyphenation_rate: "Hyphenation rate", 222 - }; 223 - 224 - // WASM marker names → profile feature keys 225 - const MARKER_TO_FEATURE = { 226 - semicolon_high: "semicolon_rate", 227 - contraction_high: "contraction_rate", 228 - comma_rate: "comma_rate", 229 - colon_high: "colon_rate", 230 - dash_preference: "dash_rate", 231 - exclamation_high: "exclamation_rate", 232 - ellipsis_high: "ellipsis_rate", 233 - parenthetical_high: "parenthetical_rate", 234 - sentence_length_high: "avg_sentence_length", 235 - sentence_length_variance_high: "sentence_length_variance", 236 - passive_voice_high: "passive_voice_ratio", 237 - vocabulary_richness_high: "vocabulary_richness", 238 - hapax_ratio_high: "hapax_ratio", 239 - hyphenation_high: "hyphenation_rate", 240 - avg_word_length: "avg_word_length", 241 - avg_syllables: "avg_syllables_per_word", 242 - }; 243 - 244 - function formatMarkerName(name) { 245 - return name.replace(/_/g, " ").replace(/\b\w/g, (c) => c.toUpperCase()); 246 - } 247 - 248 - function renderProfile(result) { 249 - // Summary 250 - markerCount.textContent = result.markers.length; 251 - resultsSummary.dataset.variant = result.markers.length === 0 ? "success" : "warning"; 252 - 253 - // Markers list 254 - markersContainer.innerHTML = ""; 255 - for (const m of result.markers) { 256 - const item = document.createElement("div"); 257 - item.className = "change-item"; 258 - const dir = m.z_score > 0 ? "HIGH" : "LOW"; 259 - item.innerHTML = ` 260 - <div class="change-header"> 261 - <span class="change-marker">${esc(formatMarkerName(m.name))}</span> 262 - <span class="change-arrow">${dir}</span> 263 - </div> 264 - <div class="change-reason">Value: ${m.value.toFixed(4)} · Baseline: ${m.baseline.toFixed(4)} · z = ${m.z_score.toFixed(2)}</div> 265 - `; 266 - markersContainer.appendChild(item); 267 - } 268 - 269 - // Build marker lookup by feature key 270 - const flagged = new Map(); 271 - for (const m of result.markers) { 272 - const featureKey = MARKER_TO_FEATURE[m.name]; 273 - if (featureKey) flagged.set(featureKey, m); 274 - } 275 - 276 - // Forensic table 277 - forensicBody.innerHTML = ""; 278 - for (const [key, label] of Object.entries(FEATURE_LABELS)) { 279 - if (result[key] === undefined) continue; 280 - const tr = document.createElement("tr"); 281 - const m = flagged.get(key); 282 - 283 - let statusCls, statusTxt; 284 - if (m) { 285 - statusCls = "forensic-warn"; 286 - statusTxt = m.z_score > 0 ? "\u26a0 High" : "\u26a0 Low"; 287 - } else { 288 - statusCls = "forensic-fixed"; 289 - statusTxt = "\u2713 Normal"; 290 - } 291 - 292 - const baseline = m ? m.baseline.toFixed(4) : "\u2014"; 293 - tr.innerHTML = ` 294 - <td>${esc(label)}</td> 295 - <td>${typeof result[key] === "boolean" ? (result[key] ? "Yes" : "No") : result[key].toFixed(4)}</td> 296 - <td>${baseline}</td> 297 - <td class="${statusCls}">${statusTxt}</td> 298 - `; 299 - forensicBody.appendChild(tr); 300 - } 301 - 302 - // Show rewriter section 303 - rewriterSection.hidden = false; 304 - if (modelLoaded) { 305 - btnDownload.hidden = true; 306 - btnNeutralize.hidden = false; 307 - } else { 308 - btnDownload.hidden = false; 309 - btnNeutralize.hidden = true; 310 - } 311 - 312 - resultsSection.hidden = false; 313 - resultsSection.scrollIntoView({ behavior: "smooth", block: "start" }); 314 - } 315 - 316 - // ============================================================ 317 - // Download model 318 - // ============================================================ 319 - 320 - btnDownload.addEventListener("click", async () => { 321 - btnDownload.disabled = true; 322 - downloadProgress.hidden = false; 323 - 324 - try { 325 - progressText.textContent = "Downloading model..."; 326 - progressFill.style.width = "0%"; 327 - 328 - const weightsBytes = await fetchWithProgress("model.bin", (recv, total) => { 329 - if (total > 0) { 330 - const pct = Math.round((recv / total) * 100); 331 - progressFill.style.width = pct + "%"; 332 - progressText.textContent = `${formatBytes(recv)} / ${formatBytes(total)}`; 333 - } else { 334 - progressText.textContent = formatBytes(recv); 335 - } 336 - }); 337 - 338 - await dbPut(WEIGHTS_KEY, weightsBytes); 339 - progressText.textContent = "Initializing model..."; 340 - progressFill.style.width = "100%"; 341 - 342 - const ok = loadModelWeights(weightsBytes); 343 - if (ok) { 344 - modelLoaded = true; 345 - downloadProgress.hidden = true; 346 - btnDownload.hidden = true; 347 - btnNeutralize.hidden = false; 348 - setStatus("model-ready"); 349 - } else { 350 - progressText.textContent = "Model init failed"; 351 - btnDownload.disabled = false; 352 - } 353 - } catch (e) { 354 - console.error("Download error:", e); 355 - progressText.textContent = "Model not available yet"; 356 - progressFill.style.width = "0%"; 357 - btnDownload.disabled = false; 358 - } 359 - }); 360 - 361 - // ============================================================ 362 - // Neutralize 363 - // ============================================================ 364 - 365 - btnNeutralize.addEventListener("click", () => { 366 - const text = inputEl.value.trim(); 367 - if (!text || !wasmInstance || !modelLoaded) return; 368 - 369 - btnNeutralize.disabled = true; 370 - btnNeutralize.classList.add("btn-loading"); 371 - btnNeutralize.textContent = "Neutralizing"; 372 - 373 - try { 374 - const result = wasmCall("neutralize", text); 375 - // TODO: render rewritten text + changes when inference engine is ready 376 - // For now, neutralize returns the same as profile 377 - renderProfile(result); 378 - } catch (e) { 379 - console.error("Neutralize error:", e); 380 - } finally { 381 - btnNeutralize.disabled = false; 382 - btnNeutralize.classList.remove("btn-loading"); 383 - btnNeutralize.textContent = "Neutralize"; 384 - } 385 - }); 386 - 387 - // ============================================================ 388 - // Panic — erase everything 389 - // ============================================================ 390 - 391 - btnPanic.addEventListener("click", async () => { 392 - // Clear all text and UI state immediately 393 - inputEl.value = ""; 394 - wordCount.textContent = "0 words"; 395 - btnAnalyze.disabled = true; 396 - resultsSection.hidden = true; 397 - markersContainer.innerHTML = ""; 398 - forensicBody.innerHTML = ""; 399 - forensicToggle.checked = false; 400 - forensicPanel.hidden = true; 401 - 402 - // Delete model from IndexedDB 403 - try { 404 - const db = await openDB(); 405 - const tx = db.transaction(STORE_NAME, "readwrite"); 406 - tx.objectStore(STORE_NAME).clear(); 407 - await new Promise((resolve, reject) => { 408 - tx.oncomplete = resolve; 409 - tx.onerror = () => reject(tx.error); 410 - }); 411 - } catch { 412 - // Best effort 413 - } 414 - 415 - // Delete the entire database 416 - try { 417 - indexedDB.deleteDatabase(DB_NAME); 418 - } catch { 419 - // Best effort 420 - } 421 - 422 - // Unregister service worker and clear caches 423 - if ("serviceWorker" in navigator) { 424 - const regs = await navigator.serviceWorker.getRegistrations(); 425 - for (const reg of regs) reg.unregister(); 426 - } 427 - if ("caches" in window) { 428 - const names = await caches.keys(); 429 - for (const name of names) await caches.delete(name); 430 - } 431 - 432 - // Clear any other storage 433 - try { sessionStorage.clear(); } catch {} 434 - try { localStorage.clear(); } catch {} 435 - 436 - modelLoaded = false; 437 - wasmInstance = null; 438 - setStatus("loading"); 439 - 440 - // Replace page with blank to remove DOM traces 441 - document.title = ""; 442 - document.body.innerHTML = ""; 443 - history.replaceState(null, "", "about:blank"); 444 - }); 445 - 446 - // ============================================================ 447 - // Retry 448 - // ============================================================ 449 - 450 - btnRetry.addEventListener("click", () => { 451 - resultsSection.hidden = true; 452 - inputEl.focus(); 453 - }); 454 - 455 - // ============================================================ 456 - // Helpers 457 - // ============================================================ 458 - 459 - function esc(str) { 460 - const div = document.createElement("div"); 461 - div.textContent = str; 462 - return div.innerHTML; 463 - } 464 - 465 - // ============================================================ 466 - // Init: fetch WASM directly, check IndexedDB for model 467 - // ============================================================ 468 - 469 - (async function init() { 470 - setStatus("loading"); 471 - 472 - try { 473 - const wasmBytes = await fetch("fantasma.wasm").then((r) => { 474 - if (!r.ok) throw new Error(`HTTP ${r.status}`); 475 - return r.arrayBuffer(); 476 - }); 477 - 478 - const { instance } = await WebAssembly.instantiate(wasmBytes, { 479 - env: { 480 - log_message: (ptr, len) => { 481 - const bytes = new Uint8Array(instance.exports.memory.buffer, ptr, len); 482 - console.log("[wasm]", new TextDecoder().decode(bytes)); 483 - }, 484 - }, 485 - }); 486 - 487 - wasmInstance = instance; 488 - setStatus("ready"); 489 - 490 - // Enable analyze if text already present 491 - const words = inputEl.value.trim().split(/\s+/).filter(Boolean).length; 492 - if (words >= 3) btnAnalyze.disabled = false; 493 - 494 - // Check if model weights are already stored 495 - try { 496 - const storedWeights = await dbGet(WEIGHTS_KEY); 497 - if (storedWeights) { 498 - const ok = loadModelWeights(storedWeights); 499 - if (ok) { 500 - modelLoaded = true; 501 - setStatus("model-ready"); 502 - } 503 - } 504 - } catch { 505 - // IndexedDB not available — model download will be offered later 506 - } 507 - } catch (e) { 508 - console.error("WASM load failed:", e); 509 - setStatus("error", "Engine unavailable"); 510 - } 511 - })(); 512 - 513 - if ("serviceWorker" in navigator) { 514 - navigator.serviceWorker.register("sw.js").catch(() => {}); 515 - } 516 - })();

+153

web/bun.lock

··· 1 + { 2 + "lockfileVersion": 1, 3 + "configVersion": 1, 4 + "workspaces": { 5 + "": { 6 + "name": "fantasma-web", 7 + "dependencies": { 8 + "@knadh/oat": "^0.4.1", 9 + "zod": "^4.3.6", 10 + "zustand": "^5.0.11", 11 + }, 12 + "devDependencies": { 13 + "typescript": "^5.9.3", 14 + "vite": "^7.3.1", 15 + }, 16 + }, 17 + }, 18 + "packages": { 19 + "@esbuild/aix-ppc64": ["@esbuild/aix-ppc64@0.27.3", "", { "os": "aix", "cpu": "ppc64" }, "sha512-9fJMTNFTWZMh5qwrBItuziu834eOCUcEqymSH7pY+zoMVEZg3gcPuBNxH1EvfVYe9h0x/Ptw8KBzv7qxb7l8dg=="], 20 + 21 + "@esbuild/android-arm": ["@esbuild/android-arm@0.27.3", "", { "os": "android", "cpu": "arm" }, "sha512-i5D1hPY7GIQmXlXhs2w8AWHhenb00+GxjxRncS2ZM7YNVGNfaMxgzSGuO8o8SJzRc/oZwU2bcScvVERk03QhzA=="], 22 + 23 + "@esbuild/android-arm64": ["@esbuild/android-arm64@0.27.3", "", { "os": "android", "cpu": "arm64" }, "sha512-YdghPYUmj/FX2SYKJ0OZxf+iaKgMsKHVPF1MAq/P8WirnSpCStzKJFjOjzsW0QQ7oIAiccHdcqjbHmJxRb/dmg=="], 24 + 25 + "@esbuild/android-x64": ["@esbuild/android-x64@0.27.3", "", { "os": "android", "cpu": "x64" }, "sha512-IN/0BNTkHtk8lkOM8JWAYFg4ORxBkZQf9zXiEOfERX/CzxW3Vg1ewAhU7QSWQpVIzTW+b8Xy+lGzdYXV6UZObQ=="], 26 + 27 + "@esbuild/darwin-arm64": ["@esbuild/darwin-arm64@0.27.3", "", { "os": "darwin", "cpu": "arm64" }, "sha512-Re491k7ByTVRy0t3EKWajdLIr0gz2kKKfzafkth4Q8A5n1xTHrkqZgLLjFEHVD+AXdUGgQMq+Godfq45mGpCKg=="], 28 + 29 + "@esbuild/darwin-x64": ["@esbuild/darwin-x64@0.27.3", "", { "os": "darwin", "cpu": "x64" }, "sha512-vHk/hA7/1AckjGzRqi6wbo+jaShzRowYip6rt6q7VYEDX4LEy1pZfDpdxCBnGtl+A5zq8iXDcyuxwtv3hNtHFg=="], 30 + 31 + "@esbuild/freebsd-arm64": ["@esbuild/freebsd-arm64@0.27.3", "", { "os": "freebsd", "cpu": "arm64" }, "sha512-ipTYM2fjt3kQAYOvo6vcxJx3nBYAzPjgTCk7QEgZG8AUO3ydUhvelmhrbOheMnGOlaSFUoHXB6un+A7q4ygY9w=="], 32 + 33 + "@esbuild/freebsd-x64": ["@esbuild/freebsd-x64@0.27.3", "", { "os": "freebsd", "cpu": "x64" }, "sha512-dDk0X87T7mI6U3K9VjWtHOXqwAMJBNN2r7bejDsc+j03SEjtD9HrOl8gVFByeM0aJksoUuUVU9TBaZa2rgj0oA=="], 34 + 35 + "@esbuild/linux-arm": ["@esbuild/linux-arm@0.27.3", "", { "os": "linux", "cpu": "arm" }, "sha512-s6nPv2QkSupJwLYyfS+gwdirm0ukyTFNl3KTgZEAiJDd+iHZcbTPPcWCcRYH+WlNbwChgH2QkE9NSlNrMT8Gfw=="], 36 + 37 + "@esbuild/linux-arm64": ["@esbuild/linux-arm64@0.27.3", "", { "os": "linux", "cpu": "arm64" }, "sha512-sZOuFz/xWnZ4KH3YfFrKCf1WyPZHakVzTiqji3WDc0BCl2kBwiJLCXpzLzUBLgmp4veFZdvN5ChW4Eq/8Fc2Fg=="], 38 + 39 + "@esbuild/linux-ia32": ["@esbuild/linux-ia32@0.27.3", "", { "os": "linux", "cpu": "ia32" }, "sha512-yGlQYjdxtLdh0a3jHjuwOrxQjOZYD/C9PfdbgJJF3TIZWnm/tMd/RcNiLngiu4iwcBAOezdnSLAwQDPqTmtTYg=="], 40 + 41 + "@esbuild/linux-loong64": ["@esbuild/linux-loong64@0.27.3", "", { "os": "linux", "cpu": "none" }, "sha512-WO60Sn8ly3gtzhyjATDgieJNet/KqsDlX5nRC5Y3oTFcS1l0KWba+SEa9Ja1GfDqSF1z6hif/SkpQJbL63cgOA=="], 42 + 43 + "@esbuild/linux-mips64el": ["@esbuild/linux-mips64el@0.27.3", "", { "os": "linux", "cpu": "none" }, "sha512-APsymYA6sGcZ4pD6k+UxbDjOFSvPWyZhjaiPyl/f79xKxwTnrn5QUnXR5prvetuaSMsb4jgeHewIDCIWljrSxw=="], 44 + 45 + "@esbuild/linux-ppc64": ["@esbuild/linux-ppc64@0.27.3", "", { "os": "linux", "cpu": "ppc64" }, "sha512-eizBnTeBefojtDb9nSh4vvVQ3V9Qf9Df01PfawPcRzJH4gFSgrObw+LveUyDoKU3kxi5+9RJTCWlj4FjYXVPEA=="], 46 + 47 + "@esbuild/linux-riscv64": ["@esbuild/linux-riscv64@0.27.3", "", { "os": "linux", "cpu": "none" }, "sha512-3Emwh0r5wmfm3ssTWRQSyVhbOHvqegUDRd0WhmXKX2mkHJe1SFCMJhagUleMq+Uci34wLSipf8Lagt4LlpRFWQ=="], 48 + 49 + "@esbuild/linux-s390x": ["@esbuild/linux-s390x@0.27.3", "", { "os": "linux", "cpu": "s390x" }, "sha512-pBHUx9LzXWBc7MFIEEL0yD/ZVtNgLytvx60gES28GcWMqil8ElCYR4kvbV2BDqsHOvVDRrOxGySBM9Fcv744hw=="], 50 + 51 + "@esbuild/linux-x64": ["@esbuild/linux-x64@0.27.3", "", { "os": "linux", "cpu": "x64" }, "sha512-Czi8yzXUWIQYAtL/2y6vogER8pvcsOsk5cpwL4Gk5nJqH5UZiVByIY8Eorm5R13gq+DQKYg0+JyQoytLQas4dA=="], 52 + 53 + "@esbuild/netbsd-arm64": ["@esbuild/netbsd-arm64@0.27.3", "", { "os": "none", "cpu": "arm64" }, "sha512-sDpk0RgmTCR/5HguIZa9n9u+HVKf40fbEUt+iTzSnCaGvY9kFP0YKBWZtJaraonFnqef5SlJ8/TiPAxzyS+UoA=="], 54 + 55 + "@esbuild/netbsd-x64": ["@esbuild/netbsd-x64@0.27.3", "", { "os": "none", "cpu": "x64" }, "sha512-P14lFKJl/DdaE00LItAukUdZO5iqNH7+PjoBm+fLQjtxfcfFE20Xf5CrLsmZdq5LFFZzb5JMZ9grUwvtVYzjiA=="], 56 + 57 + "@esbuild/openbsd-arm64": ["@esbuild/openbsd-arm64@0.27.3", "", { "os": "openbsd", "cpu": "arm64" }, "sha512-AIcMP77AvirGbRl/UZFTq5hjXK+2wC7qFRGoHSDrZ5v5b8DK/GYpXW3CPRL53NkvDqb9D+alBiC/dV0Fb7eJcw=="], 58 + 59 + "@esbuild/openbsd-x64": ["@esbuild/openbsd-x64@0.27.3", "", { "os": "openbsd", "cpu": "x64" }, "sha512-DnW2sRrBzA+YnE70LKqnM3P+z8vehfJWHXECbwBmH/CU51z6FiqTQTHFenPlHmo3a8UgpLyH3PT+87OViOh1AQ=="], 60 + 61 + "@esbuild/openharmony-arm64": ["@esbuild/openharmony-arm64@0.27.3", "", { "os": "none", "cpu": "arm64" }, "sha512-NinAEgr/etERPTsZJ7aEZQvvg/A6IsZG/LgZy+81wON2huV7SrK3e63dU0XhyZP4RKGyTm7aOgmQk0bGp0fy2g=="], 62 + 63 + "@esbuild/sunos-x64": ["@esbuild/sunos-x64@0.27.3", "", { "os": "sunos", "cpu": "x64" }, "sha512-PanZ+nEz+eWoBJ8/f8HKxTTD172SKwdXebZ0ndd953gt1HRBbhMsaNqjTyYLGLPdoWHy4zLU7bDVJztF5f3BHA=="], 64 + 65 + "@esbuild/win32-arm64": ["@esbuild/win32-arm64@0.27.3", "", { "os": "win32", "cpu": "arm64" }, "sha512-B2t59lWWYrbRDw/tjiWOuzSsFh1Y/E95ofKz7rIVYSQkUYBjfSgf6oeYPNWHToFRr2zx52JKApIcAS/D5TUBnA=="], 66 + 67 + "@esbuild/win32-ia32": ["@esbuild/win32-ia32@0.27.3", "", { "os": "win32", "cpu": "ia32" }, "sha512-QLKSFeXNS8+tHW7tZpMtjlNb7HKau0QDpwm49u0vUp9y1WOF+PEzkU84y9GqYaAVW8aH8f3GcBck26jh54cX4Q=="], 68 + 69 + "@esbuild/win32-x64": ["@esbuild/win32-x64@0.27.3", "", { "os": "win32", "cpu": "x64" }, "sha512-4uJGhsxuptu3OcpVAzli+/gWusVGwZZHTlS63hh++ehExkVT8SgiEf7/uC/PclrPPkLhZqGgCTjd0VWLo6xMqA=="], 70 + 71 + "@knadh/oat": ["@knadh/oat@0.4.1", "", {}, "sha512-Cdpuy1/828ICY5A9mjGYbDszt3tE4OiS0kcbjkHmBRwtFGZ/4oKA0cNkqJEgyNGdabhMlfiGTvKEflKMaax8mA=="], 72 + 73 + "@rollup/rollup-android-arm-eabi": ["@rollup/rollup-android-arm-eabi@4.59.0", "", { "os": "android", "cpu": "arm" }, "sha512-upnNBkA6ZH2VKGcBj9Fyl9IGNPULcjXRlg0LLeaioQWueH30p6IXtJEbKAgvyv+mJaMxSm1l6xwDXYjpEMiLMg=="], 74 + 75 + "@rollup/rollup-android-arm64": ["@rollup/rollup-android-arm64@4.59.0", "", { "os": "android", "cpu": "arm64" }, "sha512-hZ+Zxj3SySm4A/DylsDKZAeVg0mvi++0PYVceVyX7hemkw7OreKdCvW2oQ3T1FMZvCaQXqOTHb8qmBShoqk69Q=="], 76 + 77 + "@rollup/rollup-darwin-arm64": ["@rollup/rollup-darwin-arm64@4.59.0", "", { "os": "darwin", "cpu": "arm64" }, "sha512-W2Psnbh1J8ZJw0xKAd8zdNgF9HRLkdWwwdWqubSVk0pUuQkoHnv7rx4GiF9rT4t5DIZGAsConRE3AxCdJ4m8rg=="], 78 + 79 + "@rollup/rollup-darwin-x64": ["@rollup/rollup-darwin-x64@4.59.0", "", { "os": "darwin", "cpu": "x64" }, "sha512-ZW2KkwlS4lwTv7ZVsYDiARfFCnSGhzYPdiOU4IM2fDbL+QGlyAbjgSFuqNRbSthybLbIJ915UtZBtmuLrQAT/w=="], 80 + 81 + "@rollup/rollup-freebsd-arm64": ["@rollup/rollup-freebsd-arm64@4.59.0", "", { "os": "freebsd", "cpu": "arm64" }, "sha512-EsKaJ5ytAu9jI3lonzn3BgG8iRBjV4LxZexygcQbpiU0wU0ATxhNVEpXKfUa0pS05gTcSDMKpn3Sx+QB9RlTTA=="], 82 + 83 + "@rollup/rollup-freebsd-x64": ["@rollup/rollup-freebsd-x64@4.59.0", "", { "os": "freebsd", "cpu": "x64" }, "sha512-d3DuZi2KzTMjImrxoHIAODUZYoUUMsuUiY4SRRcJy6NJoZ6iIqWnJu9IScV9jXysyGMVuW+KNzZvBLOcpdl3Vg=="], 84 + 85 + "@rollup/rollup-linux-arm-gnueabihf": ["@rollup/rollup-linux-arm-gnueabihf@4.59.0", "", { "os": "linux", "cpu": "arm" }, "sha512-t4ONHboXi/3E0rT6OZl1pKbl2Vgxf9vJfWgmUoCEVQVxhW6Cw/c8I6hbbu7DAvgp82RKiH7TpLwxnJeKv2pbsw=="], 86 + 87 + "@rollup/rollup-linux-arm-musleabihf": ["@rollup/rollup-linux-arm-musleabihf@4.59.0", "", { "os": "linux", "cpu": "arm" }, "sha512-CikFT7aYPA2ufMD086cVORBYGHffBo4K8MQ4uPS/ZnY54GKj36i196u8U+aDVT2LX4eSMbyHtyOh7D7Zvk2VvA=="], 88 + 89 + "@rollup/rollup-linux-arm64-gnu": ["@rollup/rollup-linux-arm64-gnu@4.59.0", "", { "os": "linux", "cpu": "arm64" }, "sha512-jYgUGk5aLd1nUb1CtQ8E+t5JhLc9x5WdBKew9ZgAXg7DBk0ZHErLHdXM24rfX+bKrFe+Xp5YuJo54I5HFjGDAA=="], 90 + 91 + "@rollup/rollup-linux-arm64-musl": ["@rollup/rollup-linux-arm64-musl@4.59.0", "", { "os": "linux", "cpu": "arm64" }, "sha512-peZRVEdnFWZ5Bh2KeumKG9ty7aCXzzEsHShOZEFiCQlDEepP1dpUl/SrUNXNg13UmZl+gzVDPsiCwnV1uI0RUA=="], 92 + 93 + "@rollup/rollup-linux-loong64-gnu": ["@rollup/rollup-linux-loong64-gnu@4.59.0", "", { "os": "linux", "cpu": "none" }, "sha512-gbUSW/97f7+r4gHy3Jlup8zDG190AuodsWnNiXErp9mT90iCy9NKKU0Xwx5k8VlRAIV2uU9CsMnEFg/xXaOfXg=="], 94 + 95 + "@rollup/rollup-linux-loong64-musl": ["@rollup/rollup-linux-loong64-musl@4.59.0", "", { "os": "linux", "cpu": "none" }, "sha512-yTRONe79E+o0FWFijasoTjtzG9EBedFXJMl888NBEDCDV9I2wGbFFfJQQe63OijbFCUZqxpHz1GzpbtSFikJ4Q=="], 96 + 97 + "@rollup/rollup-linux-ppc64-gnu": ["@rollup/rollup-linux-ppc64-gnu@4.59.0", "", { "os": "linux", "cpu": "ppc64" }, "sha512-sw1o3tfyk12k3OEpRddF68a1unZ5VCN7zoTNtSn2KndUE+ea3m3ROOKRCZxEpmT9nsGnogpFP9x6mnLTCaoLkA=="], 98 + 99 + "@rollup/rollup-linux-ppc64-musl": ["@rollup/rollup-linux-ppc64-musl@4.59.0", "", { "os": "linux", "cpu": "ppc64" }, "sha512-+2kLtQ4xT3AiIxkzFVFXfsmlZiG5FXYW7ZyIIvGA7Bdeuh9Z0aN4hVyXS/G1E9bTP/vqszNIN/pUKCk/BTHsKA=="], 100 + 101 + "@rollup/rollup-linux-riscv64-gnu": ["@rollup/rollup-linux-riscv64-gnu@4.59.0", "", { "os": "linux", "cpu": "none" }, "sha512-NDYMpsXYJJaj+I7UdwIuHHNxXZ/b/N2hR15NyH3m2qAtb/hHPA4g4SuuvrdxetTdndfj9b1WOmy73kcPRoERUg=="], 102 + 103 + "@rollup/rollup-linux-riscv64-musl": ["@rollup/rollup-linux-riscv64-musl@4.59.0", "", { "os": "linux", "cpu": "none" }, "sha512-nLckB8WOqHIf1bhymk+oHxvM9D3tyPndZH8i8+35p/1YiVoVswPid2yLzgX7ZJP0KQvnkhM4H6QZ5m0LzbyIAg=="], 104 + 105 + "@rollup/rollup-linux-s390x-gnu": ["@rollup/rollup-linux-s390x-gnu@4.59.0", "", { "os": "linux", "cpu": "s390x" }, "sha512-oF87Ie3uAIvORFBpwnCvUzdeYUqi2wY6jRFWJAy1qus/udHFYIkplYRW+wo+GRUP4sKzYdmE1Y3+rY5Gc4ZO+w=="], 106 + 107 + "@rollup/rollup-linux-x64-gnu": ["@rollup/rollup-linux-x64-gnu@4.59.0", "", { "os": "linux", "cpu": "x64" }, "sha512-3AHmtQq/ppNuUspKAlvA8HtLybkDflkMuLK4DPo77DfthRb71V84/c4MlWJXixZz4uruIH4uaa07IqoAkG64fg=="], 108 + 109 + "@rollup/rollup-linux-x64-musl": ["@rollup/rollup-linux-x64-musl@4.59.0", "", { "os": "linux", "cpu": "x64" }, "sha512-2UdiwS/9cTAx7qIUZB/fWtToJwvt0Vbo0zmnYt7ED35KPg13Q0ym1g442THLC7VyI6JfYTP4PiSOWyoMdV2/xg=="], 110 + 111 + "@rollup/rollup-openbsd-x64": ["@rollup/rollup-openbsd-x64@4.59.0", "", { "os": "openbsd", "cpu": "x64" }, "sha512-M3bLRAVk6GOwFlPTIxVBSYKUaqfLrn8l0psKinkCFxl4lQvOSz8ZrKDz2gxcBwHFpci0B6rttydI4IpS4IS/jQ=="], 112 + 113 + "@rollup/rollup-openharmony-arm64": ["@rollup/rollup-openharmony-arm64@4.59.0", "", { "os": "none", "cpu": "arm64" }, "sha512-tt9KBJqaqp5i5HUZzoafHZX8b5Q2Fe7UjYERADll83O4fGqJ49O1FsL6LpdzVFQcpwvnyd0i+K/VSwu/o/nWlA=="], 114 + 115 + "@rollup/rollup-win32-arm64-msvc": ["@rollup/rollup-win32-arm64-msvc@4.59.0", "", { "os": "win32", "cpu": "arm64" }, "sha512-V5B6mG7OrGTwnxaNUzZTDTjDS7F75PO1ae6MJYdiMu60sq0CqN5CVeVsbhPxalupvTX8gXVSU9gq+Rx1/hvu6A=="], 116 + 117 + "@rollup/rollup-win32-ia32-msvc": ["@rollup/rollup-win32-ia32-msvc@4.59.0", "", { "os": "win32", "cpu": "ia32" }, "sha512-UKFMHPuM9R0iBegwzKF4y0C4J9u8C6MEJgFuXTBerMk7EJ92GFVFYBfOZaSGLu6COf7FxpQNqhNS4c4icUPqxA=="], 118 + 119 + "@rollup/rollup-win32-x64-gnu": ["@rollup/rollup-win32-x64-gnu@4.59.0", "", { "os": "win32", "cpu": "x64" }, "sha512-laBkYlSS1n2L8fSo1thDNGrCTQMmxjYY5G0WFWjFFYZkKPjsMBsgJfGf4TLxXrF6RyhI60L8TMOjBMvXiTcxeA=="], 120 + 121 + "@rollup/rollup-win32-x64-msvc": ["@rollup/rollup-win32-x64-msvc@4.59.0", "", { "os": "win32", "cpu": "x64" }, "sha512-2HRCml6OztYXyJXAvdDXPKcawukWY2GpR5/nxKp4iBgiO3wcoEGkAaqctIbZcNB6KlUQBIqt8VYkNSj2397EfA=="], 122 + 123 + "@types/estree": ["@types/estree@1.0.8", "", {}, "sha512-dWHzHa2WqEXI/O1E9OjrocMTKJl2mSrEolh1Iomrv6U+JuNwaHXsXx9bLu5gG7BUWFIN0skIQJQ/L1rIex4X6w=="], 124 + 125 + "esbuild": ["esbuild@0.27.3", "", { "optionalDependencies": { "@esbuild/aix-ppc64": "0.27.3", "@esbuild/android-arm": "0.27.3", "@esbuild/android-arm64": "0.27.3", "@esbuild/android-x64": "0.27.3", "@esbuild/darwin-arm64": "0.27.3", "@esbuild/darwin-x64": "0.27.3", "@esbuild/freebsd-arm64": "0.27.3", "@esbuild/freebsd-x64": "0.27.3", "@esbuild/linux-arm": "0.27.3", "@esbuild/linux-arm64": "0.27.3", "@esbuild/linux-ia32": "0.27.3", "@esbuild/linux-loong64": "0.27.3", "@esbuild/linux-mips64el": "0.27.3", "@esbuild/linux-ppc64": "0.27.3", "@esbuild/linux-riscv64": "0.27.3", "@esbuild/linux-s390x": "0.27.3", "@esbuild/linux-x64": "0.27.3", "@esbuild/netbsd-arm64": "0.27.3", "@esbuild/netbsd-x64": "0.27.3", "@esbuild/openbsd-arm64": "0.27.3", "@esbuild/openbsd-x64": "0.27.3", "@esbuild/openharmony-arm64": "0.27.3", "@esbuild/sunos-x64": "0.27.3", "@esbuild/win32-arm64": "0.27.3", "@esbuild/win32-ia32": "0.27.3", "@esbuild/win32-x64": "0.27.3" }, "bin": { "esbuild": "bin/esbuild" } }, "sha512-8VwMnyGCONIs6cWue2IdpHxHnAjzxnw2Zr7MkVxB2vjmQ2ivqGFb4LEG3SMnv0Gb2F/G/2yA8zUaiL1gywDCCg=="], 126 + 127 + "fdir": ["fdir@6.5.0", "", { "peerDependencies": { "picomatch": "^3 || ^4" }, "optionalPeers": ["picomatch"] }, "sha512-tIbYtZbucOs0BRGqPJkshJUYdL+SDH7dVM8gjy+ERp3WAUjLEFJE+02kanyHtwjWOnwrKYBiwAmM0p4kLJAnXg=="], 128 + 129 + "fsevents": ["fsevents@2.3.3", "", { "os": "darwin" }, "sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw=="], 130 + 131 + "nanoid": ["nanoid@3.3.11", "", { "bin": { "nanoid": "bin/nanoid.cjs" } }, "sha512-N8SpfPUnUp1bK+PMYW8qSWdl9U+wwNWI4QKxOYDy9JAro3WMX7p2OeVRF9v+347pnakNevPmiHhNmZ2HbFA76w=="], 132 + 133 + "picocolors": ["picocolors@1.1.1", "", {}, "sha512-xceH2snhtb5M9liqDsmEw56le376mTZkEX/jEb/RxNFyegNul7eNslCXP9FDj/Lcu0X8KEyMceP2ntpaHrDEVA=="], 134 + 135 + "picomatch": ["picomatch@4.0.3", "", {}, "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q=="], 136 + 137 + "postcss": ["postcss@8.5.8", "", { "dependencies": { "nanoid": "^3.3.11", "picocolors": "^1.1.1", "source-map-js": "^1.2.1" } }, "sha512-OW/rX8O/jXnm82Ey1k44pObPtdblfiuWnrd8X7GJ7emImCOstunGbXUpp7HdBrFQX6rJzn3sPT397Wp5aCwCHg=="], 138 + 139 + "rollup": ["rollup@4.59.0", "", { "dependencies": { "@types/estree": "1.0.8" }, "optionalDependencies": { "@rollup/rollup-android-arm-eabi": "4.59.0", "@rollup/rollup-android-arm64": "4.59.0", "@rollup/rollup-darwin-arm64": "4.59.0", "@rollup/rollup-darwin-x64": "4.59.0", "@rollup/rollup-freebsd-arm64": "4.59.0", "@rollup/rollup-freebsd-x64": "4.59.0", "@rollup/rollup-linux-arm-gnueabihf": "4.59.0", "@rollup/rollup-linux-arm-musleabihf": "4.59.0", "@rollup/rollup-linux-arm64-gnu": "4.59.0", "@rollup/rollup-linux-arm64-musl": "4.59.0", "@rollup/rollup-linux-loong64-gnu": "4.59.0", "@rollup/rollup-linux-loong64-musl": "4.59.0", "@rollup/rollup-linux-ppc64-gnu": "4.59.0", "@rollup/rollup-linux-ppc64-musl": "4.59.0", "@rollup/rollup-linux-riscv64-gnu": "4.59.0", "@rollup/rollup-linux-riscv64-musl": "4.59.0", "@rollup/rollup-linux-s390x-gnu": "4.59.0", "@rollup/rollup-linux-x64-gnu": "4.59.0", "@rollup/rollup-linux-x64-musl": "4.59.0", "@rollup/rollup-openbsd-x64": "4.59.0", "@rollup/rollup-openharmony-arm64": "4.59.0", "@rollup/rollup-win32-arm64-msvc": "4.59.0", "@rollup/rollup-win32-ia32-msvc": "4.59.0", "@rollup/rollup-win32-x64-gnu": "4.59.0", "@rollup/rollup-win32-x64-msvc": "4.59.0", "fsevents": "~2.3.2" }, "bin": { "rollup": "dist/bin/rollup" } }, "sha512-2oMpl67a3zCH9H79LeMcbDhXW/UmWG/y2zuqnF2jQq5uq9TbM9TVyXvA4+t+ne2IIkBdrLpAaRQAvo7YI/Yyeg=="], 140 + 141 + "source-map-js": ["source-map-js@1.2.1", "", {}, "sha512-UXWMKhLOwVKb728IUtQPXxfYU+usdybtUrK/8uGE8CQMvrhOpwvzDBwj0QhSL7MQc7vIsISBG8VQ8+IDQxpfQA=="], 142 + 143 + "tinyglobby": ["tinyglobby@0.2.15", "", { "dependencies": { "fdir": "^6.5.0", "picomatch": "^4.0.3" } }, "sha512-j2Zq4NyQYG5XMST4cbs02Ak8iJUdxRM0XI5QyxXuZOzKOINmWurp3smXu3y5wDcJrptwpSjgXHzIQxR0omXljQ=="], 144 + 145 + "typescript": ["typescript@5.9.3", "", { "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" } }, "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw=="], 146 + 147 + "vite": ["vite@7.3.1", "", { "dependencies": { "esbuild": "^0.27.0", "fdir": "^6.5.0", "picomatch": "^4.0.3", "postcss": "^8.5.6", "rollup": "^4.43.0", "tinyglobby": "^0.2.15" }, "optionalDependencies": { "fsevents": "~2.3.3" }, "peerDependencies": { "@types/node": "^20.19.0 || >=22.12.0", "jiti": ">=1.21.0", "less": "^4.0.0", "lightningcss": "^1.21.0", "sass": "^1.70.0", "sass-embedded": "^1.70.0", "stylus": ">=0.54.8", "sugarss": "^5.0.0", "terser": "^5.16.0", "tsx": "^4.8.1", "yaml": "^2.4.2" }, "optionalPeers": ["@types/node", "jiti", "less", "lightningcss", "sass", "sass-embedded", "stylus", "sugarss", "terser", "tsx", "yaml"], "bin": { "vite": "bin/vite.js" } }, "sha512-w+N7Hifpc3gRjZ63vYBXA56dvvRlNWRczTdmCBBa+CotUzAPf5b7YMdMR/8CQoeYE5LX3W4wj6RYTgonm1b9DA=="], 148 + 149 + "zod": ["zod@4.3.6", "", {}, "sha512-rftlrkhHZOcjDwkGlnUtZZkvaPHCsDATp4pGpuOOMDaTdDDXF91wuVDJoWoPsKX/3YPQ5fHuF3STjcYyKr+Qhg=="], 150 + 151 + "zustand": ["zustand@5.0.11", "", { "peerDependencies": { "@types/react": ">=18.0.0", "immer": ">=9.0.6", "react": ">=18.0.0", "use-sync-external-store": ">=1.2.0" }, "optionalPeers": ["@types/react", "immer", "react", "use-sync-external-store"] }, "sha512-fdZY+dk7zn/vbWNCYmzZULHRrss0jx5pPFiOuMZ/5HJN6Yv3u+1Wswy/4MpZEkEGhtNH+pwxZB8OKgUBPzYAGg=="], 152 + } 153 + }

web/favicon.svg web/public/favicon.svg

+3 -6

web/index.html

··· 6 6 <title>Fantasma — Write Like No One</title> 7 7 <meta name="description" content="Neutralize your stylometric fingerprint. Same meaning, no authorship signal. Runs entirely in your browser."> 8 8 <meta name="theme-color" content="#1e1e2e"> 9 - <link rel="icon" href="favicon.svg" type="image/svg+xml"> 10 - <link rel="manifest" href="manifest.json"> 11 - <link rel="stylesheet" href="oat.min.css"> 12 - <script src="oat.min.js" defer></script> 13 - <link rel="stylesheet" href="style.css"> 9 + <link rel="icon" href="/favicon.svg" type="image/svg+xml"> 10 + <link rel="manifest" href="/manifest.json"> 14 11 </head> 15 12 <body data-theme="dark"> 16 13 ··· 109 106 <p>made with ❤️ by <a href="https://qontact.qstorage.quilibrium.com/?address=QmeGDXJ58AJ9hJCLqFXyosYg6uW3hekYnuPXpLhiLrKjj8&name=meta" target="_blank" rel="noopener">meta</a> · hosted on <a href="https://quilibrium.com" target="_blank" rel="noopener">Quilibrium</a></p> 110 107 </footer> 111 108 112 - <script src="app.js"></script> 109 + <script type="module" src="/src/main.ts"></script> 113 110 </body> 114 111 </html>

web/manifest.json web/public/manifest.json

-1

web/oat.min.css

··· 1 - @layer theme,base,components,animations,utilities;@layer base{*,*:before,*:after{box-sizing:border-box;-webkit-tap-highlight-color:transparent}*{margin:0}html{tab-size:4}body,dialog,[popover]{font-family:var(--font-sans);font-size:var(--text-regular);line-height:var(--leading-normal);color:var(--foreground)}body{background-color:var(--background);color:var(--foreground);-webkit-font-smoothing:antialiased}main{padding-block-start:var(--space-8)}img,picture,video,canvas,svg{max-width:100%}p,h1,h2,h3,h4,h5,h6{overflow-wrap:break-word}h1,h2,h3,h4,h5,h6{font-weight:var(--font-semibold);line-height:1.25;&:first-child{margin-block-start:0}}h1{font-size:var(--text-1);margin:var(--space-10) 0 var(--space-6)}h2{font-size:var(--text-2);margin:var(--space-8) 0 var(--space-5)}h3{font-size:var(--text-3);margin:var(--space-6) 0 var(--space-4)}h4{font-size:var(--text-4);margin:var(--space-5) 0 var(--space-3)}h5{font-size:var(--text-5);margin:var(--space-4) 0 var(--space-2)}h6{font-size:var(--text-regular);margin:var(--space-4) 0 var(--space-2)}p{margin-block-end:var(--space-4);&:last-child{margin-block-end:0}}a{color:var(--primary);text-decoration:underline;text-underline-offset:2px;transition:color var(--transition-fast);&:hover{color:rgb(from var(--primary) r g b / .8)}}strong,b{font-weight:var(--font-semibold)}em,i{font-style:italic}small{font-size:var(--text-7)}code{font-family:var(--font-mono);font-size:.875em;padding:calc(var(--space-1) / 2) var(--space-1);background-color:var(--faint);border-radius:var(--radius-small)}pre{font-family:var(--font-mono);padding:var(--space-4);background-color:var(--faint);border-radius:var(--radius-medium);overflow-x:auto;margin-block-end:var(--space-4);code{padding:0;background:none;border-radius:0}}blockquote{border-inline-start:4px solid var(--border);padding-inline-start:var(--space-4);margin:var(--space-4) 0;color:var(--muted-foreground);font-style:italic}hr{border:none;border-top:1px solid var(--border);margin:var(--space-2) 0}ul,ol{padding-inline-start:var(--space-6);margin-block-end:var(--space-4)}ul{list-style-type:disc}ol{list-style-type:decimal}li{margin-block-end:var(--space-1)}mark{background-color:rgb(from var(--warning) r g b / .3);padding:calc(var(--space-1) / 2) var(--space-1);border-radius:var(--radius-small)}[hidden]{display:none}:focus-visible{outline:2px solid var(--ring);outline-offset:2px}:disabled{opacity:.5;cursor:not-allowed}}@layer theme{:root{color-scheme:light dark;--background: light-dark(#fff, #09090b);--foreground: light-dark(#09090b, #fafafa);--card: light-dark(#fff, #18181b);--card-foreground: light-dark(#09090b, #fafafa);--primary: light-dark(#574747, #fafafa);--primary-foreground: light-dark(#fafafa, #18181b);--secondary: light-dark(#f4f4f5, #27272a);--secondary-foreground: light-dark(#574747, #fafafa);--muted: light-dark(#f4f4f5, #27272a);--muted-foreground: light-dark(#71717a, #a1a1aa);--faint: light-dark(#fafafa, #1e1e21);--faint-foreground: light-dark(#a1a1aa, #71717a);--accent: light-dark(#f4f4f5, #27272a);--danger: light-dark(#d32f2f, #f4807b);--danger-foreground: light-dark(#fafafa, #18181b);--success: light-dark(#008032, #6cc070);--success-foreground: light-dark(#fafafa, #18181b);--warning: light-dark(#a65b00, #f0a030);--warning-foreground: #09090b;--border: light-dark(#d4d4d8, #52525b);--input: light-dark(#d4d4d8, #52525b);--ring: light-dark(#574747, #d4d4d8);--space-1: .25rem;--space-2: .5rem;--space-3: .75rem;--space-4: 1rem;--space-5: 1.25rem;--space-6: 1.5rem;--space-8: 2rem;--space-10: 2.5rem;--space-12: 3rem;--space-14: 3.5rem;--space-16: 4rem;--space-18: 4.5rem;--radius-small: .125rem;--radius-medium: .375rem;--radius-large: .75rem;--radius-full: 9999px;--bar-height: .5rem;--font-sans: system-ui, sans-serif;--font-mono: ui-monospace, Consolas, monospace;--text-1: clamp(1.75rem, 1.5rem + 1.1vw, 2.25rem);--text-2: clamp(1.5rem, 1.3rem + .8vw, 1.875rem);--text-3: clamp(1.25rem, 1.1rem + .5vw, 1.5rem);--text-4: clamp(1.125rem, 1.05rem + .3vw, 1.25rem);--text-5: 1.125rem;--text-6: 1rem;--text-7: .875rem;--text-8: .75rem;--text-regular: var(--text-6);--leading-normal: 1.5;--font-normal: 400;--font-medium: 500;--font-semibold: 600;--font-bold: 600;--shadow-small: 0 1px 2px 0 rgb(0 0 0 / .05);--shadow-medium: 0 1px 3px 0 rgb(0 0 0 / .1), 0 1px 2px -1px rgb(0 0 0 / .1);--shadow-large: 0 4px 6px -1px rgb(0 0 0 / .1), 0 2px 4px -2px rgb(0 0 0 / .1);--transition-fast: .12s cubic-bezier(.4, 0, .2, 1);--transition: .2s cubic-bezier(.4, 0, .2, 1);--z-dropdown: 50;--z-modal: 200}}@layer animations{.animate-pop-in{opacity:1;transform:perspective(1000px) rotateX(0) translateZ(0);transition:opacity .15s cubic-bezier(.4,0,.2,1),transform .15s cubic-bezier(.4,0,.2,1),overlay .15s cubic-bezier(.4,0,.2,1) allow-discrete,display .15s cubic-bezier(.4,0,.2,1) allow-discrete;@starting-style{opacity:0;transform:perspective(1000px) rotateX(-15deg) translateZ(-80px)}&[data-state=closing]{opacity:0;transform:perspective(1000px) rotateX(-15deg) translateZ(-80px)}&[data-state=closing]::backdrop{opacity:0}}dialog::backdrop{opacity:1;transition:opacity .15s cubic-bezier(.4,0,.2,1);@starting-style{opacity:0}}.animate-slide-in{opacity:1;transform:translate(0);transition:opacity .15s cubic-bezier(.16,1,.3,1),transform .15s cubic-bezier(.16,1,.3,1);@starting-style{opacity:0;transform:translate(100%)}&[data-state=closing]{opacity:0;transform:translate(100%)}}}@layer base{:is(button,[type=submit],[type=reset],[type=button],a.button),::file-selector-button{--_hov: color-mix(in srgb, var(--primary), white 25%);display:inline-flex;align-items:center;justify-content:center;gap:var(--space-2);padding:var(--space-2) var(--space-4);font-size:var(--text-7);font-weight:var(--font-medium);line-height:var(--leading-normal);white-space:nowrap;text-decoration:none;background-color:var(--primary);color:var(--primary-foreground);border-radius:var(--radius-medium);border:1px solid;border-color:rgb(from #fff r g b / .15) rgb(from #000 r g b / .2) rgb(from #000 r g b / .2) rgb(from #fff r g b / .15);transition:background-color var(--transition-fast),opacity var(--transition-fast),transform var(--transition-fast);&:not(:disabled){cursor:pointer}&:hover:not(:disabled){background-color:var(--_hov)}&:active:not(:disabled){transform:translate(1px,1px)}&[data-variant=secondary]{--_hov: color-mix(in srgb, var(--secondary), black 10%);background-color:var(--secondary);color:var(--secondary-foreground);border-color:rgb(from #fff r g b / .5) rgb(from #000 r g b / .1) rgb(from #000 r g b / .1) rgb(from #fff r g b / .5)}&[data-variant=danger]{--_hov: color-mix(in srgb, var(--danger), black 15%);background-color:var(--danger);color:var(--danger-foreground)}&:is(.outline,.ghost){--_hov: var(--accent);background-color:transparent;color:var(--foreground);&[data-variant=danger]{--_hov: color-mix(in srgb, var(--danger), transparent 90%);color:var(--danger)}&[data-variant=secondary]{--_hov: color-mix(in srgb, var(--secondary), transparent 80%);color:var(--secondary-foreground)}}&.outline{border-color:var(--border);&[data-variant=danger]{border-color:var(--danger)}&[data-variant=secondary]{border-color:var(--secondary)}}&.ghost{border-color:transparent}&.small{padding:var(--space-1) var(--space-3);font-size:var(--text-8)}&.large{height:3rem;padding:0 var(--space-6);font-size:var(--text-regular)}&.icon{width:2.5rem;padding:0;&.small{width:2rem}&.large{width:3rem}}}::file-selector-button{background-color:transparent;color:var(--foreground);border:1px solid var(--border)}::file-selector-button:hover{background-color:var(--accent)}}@layer components{menu.buttons{list-style-type:none;padding-inline-start:0;display:inline-flex;>li{&:first-child>*{border-start-start-radius:var(--radius-medium);border-end-start-radius:var(--radius-medium)}&:last-child>*{border-start-end-radius:var(--radius-medium);border-end-end-radius:var(--radius-medium)}>*{border-radius:0}&:not(:last-child)>*{border-inline-end:1px solid rgb(from var(--primary-foreground) r g b / .2)}}}}@layer base{label{display:block;font-size:var(--text-7);font-weight:var(--font-medium);&:has(input:where([type=checkbox],[type=radio])){display:inline-flex;align-items:center;gap:var(--space-2);font-weight:var(--font-normal)}}:where(input:not([type=checkbox],[type=radio],[type=range],[type=file],[type=color]),textarea,select){width:100%;margin-block-start:var(--space-1);padding:var(--space-2) var(--space-3);font-size:var(--text-7);line-height:var(--leading-normal);background-color:var(--background);color:var(--foreground);border:1px solid var(--input);border-radius:var(--radius-medium);transition:border-color var(--transition-fast),box-shadow var(--transition-fast);&::placeholder{color:var(--muted-foreground)}&:focus{outline:none;border-color:var(--ring);box-shadow:0 0 0 2px rgb(from var(--ring) r g b / .2);z-index:1}&:disabled{background-color:var(--muted)}&:is([aria-invalid=true],:user-invalid){border-color:var(--danger);&:focus{box-shadow:0 0 0 2px rgb(from var(--danger) r g b / .2)}}}textarea{height:auto;min-height:5rem;padding:var(--space-3);resize:vertical}select{appearance:none;background-image:url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' width='16' height='16' viewBox='0 0 24 24' fill='none' stroke='%2371717a' stroke-width='2'%3E%3Cpath d='m6 9 6 6 6-6'/%3E%3C/svg%3E");background-repeat:no-repeat;background-position:right var(--space-2) center;padding-inline-end:var(--space-6)}input:where([type=checkbox],[type=radio]){appearance:none;width:1rem;height:1rem;margin:0;position:relative;background-color:var(--background);border:1px solid var(--input);transition:background-color var(--transition-fast),border-color var(--transition-fast);&:checked{background-color:var(--primary);border-color:var(--primary);&:after{content:"";position:absolute;inset:0;background-color:var(--primary-foreground);mask-position:center;mask-repeat:no-repeat;mask-size:100%}}}input[type=checkbox]{border-radius:var(--radius-small);&:checked:after{mask-image:url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 24 24' fill='none' stroke='currentColor' stroke-width='4'%3E%3Cpolyline points='20 6 9 17 4 12'/%3E%3C/svg%3E")}&[role=switch]{--switch-height: calc(var(--bar-height) * 3);--switch-inset: 2px;--switch-thumb: calc(var(--switch-height) - var(--switch-inset) * 3);width:calc(var(--switch-height) * 2);height:var(--switch-height);border-radius:var(--radius-full);background-color:var(--input);&:before{content:"";position:absolute;top:50%;left:var(--switch-inset);transform:translateY(-50%);width:var(--switch-thumb);height:var(--switch-thumb);background-color:var(--background);border-radius:var(--radius-full);transition:transform var(--transition);box-shadow:var(--shadow-small)}&:checked{background-color:var(--primary);&:after{content:none}&:before{transform:translateY(-50%) translate(var(--switch-height))}}}}input[type=radio]{border-radius:var(--radius-full);&:checked:after{mask-image:url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16'%3E%3Ccircle cx='8' cy='8' r='4' fill='currentColor'/%3E%3C/svg%3E")}}:where(input:where([type=checkbox],[type=radio],[type=range]),select):not(:disabled),label:has(input:where([type=checkbox],[type=radio]):not(:disabled)){cursor:pointer}input[type=range]{width:100%;height:var(--bar-height);appearance:none;background:var(--muted);border-radius:var(--radius-full);&::-webkit-slider-thumb{appearance:none;width:1.25rem;height:1.25rem;background:var(--primary);border-radius:var(--radius-full);transition:transform var(--transition-fast);&:hover{transform:scale(1.1)}}&::-moz-range-thumb{width:1.25rem;height:1.25rem;background:var(--primary);border:none;border-radius:var(--radius-full)}}fieldset{border:1px solid var(--border);border-radius:var(--radius-medium);padding:var(--space-4);margin-block-end:var(--space-4)}legend{font-size:var(--text-7);font-weight:var(--font-medium);padding:0 var(--space-2)}}@layer components{fieldset.group{display:flex;align-items:stretch;border:none;padding:0;margin:0;>:is(input,textarea,select){flex:1;margin-block-start:0;&:not(:focus):not(:last-child){border-inline-end-color:transparent}}>:is(input,textarea,select,button){border-radius:0;&:first-child{border-radius:var(--radius-medium) 0 0 var(--radius-medium)}&:last-child{border-radius:0 var(--radius-medium) var(--radius-medium) 0}}>legend{float:inline-start;display:inline-flex;align-items:center;padding:0 var(--space-3);line-height:var(--leading-normal);font-weight:var(--font-normal);color:var(--muted-foreground);background-color:var(--muted);border:1px solid var(--input);border-inline-end:none;border-radius:var(--radius-medium) 0 0 var(--radius-medium)}}[data-field]{margin-block-end:var(--space-4);[data-hint],.error{font-size:var(--text-8);font-weight:var(--font-normal);color:var(--muted-foreground);margin-block-start:var(--space-1)}.error{display:none}&[data-field=error] .error{display:block;color:var(--danger)}}}@layer base{.table{min-width:320px;width:100%;overflow-x:auto}table{border-collapse:collapse;width:100%;font-size:var(--text-7)}thead{border-bottom:1px solid var(--border)}th,td{overflow-wrap:break-word}th{padding:var(--space-3) var(--space-2);text-align:start;font-weight:var(--font-medium);color:var(--muted-foreground)}td{padding:var(--space-3) var(--space-2)}tbody tr{border-bottom:1px solid var(--border);transition:background-color var(--transition-fast);&:last-child{border-bottom:none}&:hover{background-color:rgb(from var(--muted) r g b / .5)}}}@layer base{progress{appearance:none;width:100%;height:var(--bar-height);border:none;border-radius:var(--radius-full);overflow:hidden;background-color:var(--muted);&::-webkit-progress-bar{background-color:var(--muted);border-radius:var(--radius-full)}&::-webkit-progress-value{background-color:var(--primary);border-radius:var(--radius-full);transition:width var(--transition)}&::-moz-progress-bar{background-color:var(--primary);border-radius:var(--radius-full)}}meter{appearance:none;width:100%;height:var(--bar-height);border:none;border-radius:var(--radius-full);overflow:hidden;background:var(--muted);&::-webkit-meter-bar{background:var(--muted);border:none;border-radius:var(--radius-full);height:var(--bar-height)}&::-webkit-meter-optimum-value,&::-webkit-meter-suboptimum-value,&::-webkit-meter-even-less-good-value{border-radius:var(--radius-full)}&::-webkit-meter-optimum-value{background:var(--success)}&::-webkit-meter-suboptimum-value{background:var(--warning)}&::-webkit-meter-even-less-good-value{background:var(--danger)}&::-moz-meter-bar{background:var(--success);border-radius:var(--radius-full)}&:-moz-meter-sub-optimum::-moz-meter-bar{background:var(--warning)}&:-moz-meter-sub-sub-optimum::-moz-meter-bar{background:var(--danger)}}}@layer components{[aria-busy=true]{&:before{content:"";display:inline-block;inset:0;margin:auto;width:1.5rem;height:1.5rem;border:2px solid var(--muted);border-top-color:var(--primary);border-radius:var(--radius-full);animation:spin 1s linear infinite;text-align:center}&[data-spinner~=small]:before{width:1rem;height:1rem}&[data-spinner~=large]:before{width:2rem;height:2rem;border-width:3px}&[data-spinner~=overlay]{position:relative;>*{opacity:.3;pointer-events:none}&:before{position:absolute;inset:0;margin:auto;z-index:1}}}@keyframes spin{to{transform:rotate(360deg)}}}@layer components{:root{--grid-cols: 12;--grid-gap: 1.5rem;--container-max: 1280px;--container-pad: 1rem}.container{width:100%;max-width:var(--container-max);margin-inline:auto;padding-inline:var(--container-pad)}.row{display:grid;grid-template-columns:repeat(var(--grid-cols),1fr);gap:var(--grid-gap);width:100%}.col,[class*=col-]{grid-column-end:span var(--span, var(--grid-cols))}.col-1{--span: 1}.col-2{--span: 2}.col-3{--span: 3}.col-4{--span: 4}.col-5{--span: 5}.col-6{--span: 6}.col-7{--span: 7}.col-8{--span: 8}.col-9{--span: 9}.col-10{--span: 10}.col-11{--span: 11}.col-12{--span: 12}.offset-1{grid-column-start:2}.offset-2{grid-column-start:3}.offset-3{grid-column-start:4}.offset-4{grid-column-start:5}.offset-5{grid-column-start:6}.offset-6{grid-column-start:7}.col-end{grid-column-start:span var(--span, 1);grid-column-end:-1}@media(max-width:768px){.row{--grid-cols: 4;--grid-gap: 1rem}.col,[class*=col-]{--span: 4}[class*=offset-]{grid-column-start:auto}}}@layer components{.card{background-color:var(--card);color:var(--card-foreground);border:1px solid var(--border);border-radius:var(--radius-medium);box-shadow:var(--shadow-small);padding:var(--space-6);overflow:hidden}}@layer components{[role=alert]{position:relative;display:flex;gap:var(--space-3);padding:var(--space-4) var(--space-6);background-color:var(--background);border:1px solid var(--border);border-radius:var(--radius-medium);font-size:var(--text-7);&[data-variant]{border:none}&[data-variant=error],&[data-variant=danger]{color:var(--danger);background-color:light-dark(color-mix(in srgb,var(--danger) 8%,transparent),color-mix(in srgb,var(--danger) 20%,transparent));& a{color:var(--danger)}}&[data-variant=success]{color:var(--success);background-color:light-dark(color-mix(in srgb,var(--success) 8%,transparent),color-mix(in srgb,var(--success) 20%,transparent));& a{color:var(--success)}}&[data-variant=warning]{color:var(--warning);background-color:light-dark(color-mix(in srgb,var(--warning) 8%,transparent),color-mix(in srgb,var(--warning) 20%,transparent));& a{color:var(--warning)}}}}@layer components{.badge{display:inline-flex;align-items:center;gap:var(--space-1);padding:var(--space-1) var(--space-4);font-size:var(--text-8);font-weight:var(--font-medium);line-height:var(--leading-normal);background-color:var(--primary);color:var(--primary-foreground);border-radius:var(--radius-full);&.secondary{background-color:var(--secondary);color:var(--secondary-foreground)}&.outline{background-color:transparent;color:var(--foreground);border:1px solid var(--border)}&.success{color:var(--success);background-color:light-dark(color-mix(in srgb,var(--success) 10%,transparent),color-mix(in srgb,var(--success) 30%,transparent))}&.warning{color:var(--warning);background-color:light-dark(color-mix(in srgb,var(--warning) 10%,transparent),color-mix(in srgb,var(--warning) 30%,transparent))}&.danger{color:var(--danger);background-color:light-dark(color-mix(in srgb,var(--danger) 10%,transparent),color-mix(in srgb,var(--danger) 30%,transparent))}}}@layer components{details{border:1px solid var(--border);border-radius:var(--radius-medium);overflow:hidden;+details{margin-top:-1px;border-start-start-radius:0;border-start-end-radius:0}&:has(+details){border-end-start-radius:0;border-end-end-radius:0}&[open] summary{border-bottom:1px solid var(--border)}}summary{display:flex;align-items:center;justify-content:space-between;gap:var(--space-2);padding:var(--space-4);font-weight:var(--font-medium);cursor:pointer;user-select:none;transition:background-color var(--transition-fast);&:hover{background-color:var(--muted)}&::-webkit-details-marker,&::marker{display:none}&:after{content:"";width:1em;height:1em;flex-shrink:0;background-color:currentColor;mask-image:url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 24 24' fill='none' stroke='currentColor' stroke-width='2'%3E%3Cpath d='m6 9 6 6 6-6'/%3E%3C/svg%3E");mask-size:contain;mask-repeat:no-repeat;transition:transform var(--transition-fast)}details[open] &:after{transform:rotate(180deg)}}details>*:not(summary){margin:var(--space-4)}}@layer components{[role=tablist]{display:inline-flex;align-items:center;gap:var(--space-1);padding:var(--space-1);background-color:var(--muted);border-radius:var(--radius-medium)}[role=tab]{display:inline-flex;align-items:center;justify-content:center;padding:var(--space-2) var(--space-3);font-size:var(--text-7);font-weight:var(--font-medium);white-space:nowrap;background-color:transparent;color:var(--foreground);border:none;border-radius:calc(var(--radius-medium) - 2px);cursor:pointer;transition:background-color var(--transition-fast),color var(--transition-fast);&:hover{color:var(--muted-foreground)}&[aria-selected=true]{background-color:var(--background);box-shadow:var(--shadow-small)}}[role=tabpanel]{padding:var(--space-4) 0;&:focus-visible{outline:none}}}@layer components{dialog{position:fixed;inset:0;z-index:var(--z-modal);width:min(100% - 2rem,32rem);max-height:85vh;margin:auto;padding:0;background-color:var(--card);border:1px solid var(--border);border-radius:var(--radius-large);box-shadow:var(--shadow-large);overflow:hidden;opacity:0;transform:scale(.95);transition:opacity .15s ease,transform .15s ease,overlay .15s ease allow-discrete,display .15s ease allow-discrete;&[open]{opacity:1;transform:scale(1)}@starting-style{&[open]{opacity:0;transform:scale(.95)}}&::backdrop{background-color:#0000;transition:background-color .15s ease,overlay .15s ease allow-discrete,display .15s ease allow-discrete}&[open]::backdrop{background-color:#00000080}@starting-style{&[open]::backdrop{background-color:#0000}}>header,>form>header{display:flex;flex-direction:column;gap:var(--space-1);padding:var(--space-6);padding-block-end:0;>h1,>h2,>h3,>h4,>h5,>h6{margin-block-end:0}>p{font-size:var(--text-7);color:var(--muted-foreground);margin-block-end:0}}>p,>div,>section,>form>p,>form>div,>form>section{padding:var(--space-6);overflow-y:auto}>footer,>form>footer{display:flex;justify-content:flex-end;gap:var(--space-2);padding:var(--space-6);padding-block-start:0}}}@layer components{ot-dropdown{[popover]{position:fixed;margin:0;min-width:12rem;background-color:var(--background);border:1px solid var(--border);border-radius:var(--radius-medium);box-shadow:var(--shadow-medium);opacity:0;transform:translateY(-4px);transition:opacity .15s ease-out,transform .15s ease-out,display .15s allow-discrete,overlay .15s allow-discrete;&:popover-open{opacity:1;transform:translateY(0)}@starting-style{&:popover-open{opacity:0;transform:translateY(-4px)}}}[role=menuitem]{display:flex;align-items:center;justify-content:start;gap:var(--space-2);width:100%;padding:var(--space-2) var(--space-3);font-size:var(--text-7);text-align:start;color:var(--foreground);background:none;border:none;border-radius:var(--radius-small);cursor:pointer;&:hover,&:focus{background-color:var(--accent);outline:none}}}}@layer components{.toast-container{position:fixed;display:flex;flex-direction:column;pointer-events:none;margin:0;padding:0;border:none;background:transparent;overflow:visible;&::backdrop{display:none}&[data-placement=top-left]{inset:var(--space-4) auto auto var(--space-4)}&[data-placement=top-center]{inset:var(--space-4) auto auto 50%;transform:translate(-50%)}&[data-placement=top-right]{inset:var(--space-4) var(--space-4) auto auto}&[data-placement=bottom-left]{inset:auto auto var(--space-4) var(--space-4);flex-direction:column-reverse}&[data-placement=bottom-center]{inset:auto auto var(--space-4) 50%;transform:translate(-50%);flex-direction:column-reverse}&[data-placement=bottom-right]{inset:auto var(--space-4) var(--space-4) auto;flex-direction:column-reverse}}.toast{--transition: .3s;--transition-in: calc(var(--transition) - 50ms);padding:var(--space-5) var(--space-4);max-width:28rem;min-width:20rem;pointer-events:auto;background-color:var(--card);border:1px solid var(--border);border-inline-start-width:var(--space-1);border-inline-start-style:solid;border-radius:var(--radius-medium);box-shadow:var(--shadow-small);transition:opacity var(--transition-in),transform var(--transition-in),margin var(--transition-in);line-height:1;.toast-title{font-weight:600;margin:0 0 var(--space-3) 0}.toast-message{color:var(--muted-foreground)}&[data-variant=success]{border-inline-start-color:var(--success);.toast-title{color:var(--success)}}&[data-variant=danger]{border-inline-start-color:var(--danger);.toast-title{color:var(--danger)}}&[data-variant=warning]{border-inline-start-color:var(--warning);.toast-title{color:var(--warning)}}>[data-close]{margin-inline-start:auto;background:none;border:none;padding:0;cursor:pointer;opacity:.5;&:hover{opacity:1}}margin:var(--space-2) 0;&[data-entering]{opacity:0;transform:translateY(-1rem)}&[data-exiting]{opacity:0;margin:0;padding-block:0;max-height:0;overflow:hidden;transition:opacity var(--transition),margin var(--transition),padding var(--transition),max-height var(--transition)}}}@layer components{[data-sidebar-layout]{display:grid;grid-template-columns:14rem 1fr;grid-template-rows:auto 1fr;height:100dvh;>main{grid-row:2;min-width:0;overflow-y:auto}>aside[data-sidebar]{grid-row:2;min-height:0;z-index:1;background-color:var(--background);border-inline-end:1px solid var(--border);box-shadow:var(--shadow-medium);display:flex;flex-direction:column;>:is(header,footer){flex-shrink:0;padding:var(--space-3)}>footer{margin-block-start:auto}>nav{flex:1;min-height:0;overflow-y:auto;padding:var(--space-3) var(--space-2);font-size:var(--text-7);ul{list-style:none;padding:0;margin:0;display:flex;flex-direction:column;gap:var(--space-1);li{margin:0}}a{display:flex;gap:var(--space-2);padding:var(--space-1) var(--space-3);color:var(--foreground);text-decoration:none;border-radius:var(--radius-small);transition:background-color var(--transition-fast);&:is(:hover,[aria-current]){background-color:var(--accent)}}details{border:none;overflow:visible;+details{margin-top:0}&[open] summary{border-bottom:none}>ul{margin-inline-start:var(--space-4);padding:var(--space-1) 0}}summary{justify-content:flex-start;padding:var(--space-2) var(--space-3);border-radius:var(--radius-small);&:after{width:.75rem;height:.75rem;margin-inline-start:auto}}}}>nav[data-topnav]{grid-column:1 / -1}}nav[data-topnav]{position:sticky;top:0;z-index:5;display:flex;align-items:center;gap:var(--space-3);padding:var(--space-2) var(--space-4);background-color:var(--background);border-bottom:1px solid var(--border);box-shadow:var(--shadow-small);a{text-decoration:none}}:is([data-sidebar-toggle],[data-sidebar-header]){display:none}[data-sidebar-toggle]{padding:0 var(--space-1);background:none;border:1px solid var(--border);border-radius:var(--radius-small)}@media(min-width:769px){[data-sidebar-layout=always]{transition:grid-template-columns var(--transition);[data-sidebar-toggle]{display:inline-block}>aside[data-sidebar]{transform:translate(0);opacity:1;transition:transform var(--transition),opacity var(--transition),visibility var(--transition)}&[data-sidebar-open]{grid-template-columns:0px 1fr;gap:0;>aside[data-sidebar]{overflow:hidden;min-width:0;transform:translate(-100%);opacity:0;visibility:hidden;border-inline-end:none}}}}@media(max-width:768px){[data-sidebar-layout]{grid-template-columns:1fr;>main{grid-column:1}>aside[data-sidebar]{grid-column:1;z-index:2;width:16rem;transform:translate(-100%);transition:transform var(--transition);box-shadow:var(--shadow-large)}&[data-sidebar-open]>aside[data-sidebar]{transform:translate(0)}}[data-sidebar-toggle]{display:inline-block}[data-sidebar-header]{display:flex;align-items:center;gap:var(--space-3);padding:var(--space-3) var(--space-4);border-bottom:1px solid var(--border)}}}@layer components{[role=status].skeleton{--_c: light-dark( color-mix(in srgb, var(--muted) 30%, white), color-mix(in srgb, var(--muted) 90%, var(--foreground)) );margin-block-end:var(--space-3);background:var(--muted);border-radius:var(--radius-medium);animation:anim 2s infinite;background-size:200% 100%;background-image:linear-gradient(90deg,var(--muted) 0%,var(--_c) 50%,var(--muted) 100%);&.box{width:4rem;height:4rem}&.line{height:1rem;width:100%}}[role=status].skeleton:last-child{margin-block-end:0}@keyframes anim{0%{background-position:200% 0}to{background-position:-200% 0}}}@layer components{[data-tooltip]{position:relative}[data-tooltip]:before,[data-tooltip]:after{position:absolute;inset-inline-start:50%;opacity:0;visibility:hidden;transition:opacity var(--transition-fast),transform var(--transition-fast),visibility var(--transition-fast);pointer-events:none;z-index:1000}[data-tooltip]:after{content:attr(data-tooltip);inset-block-end:calc(100% + 10px);transform:translate(-50%) translateY(4px);padding:var(--space-2) var(--space-3);font-size:var(--text-7);line-height:1;white-space:nowrap;background:var(--foreground);color:var(--background);border-radius:var(--radius-medium)}[data-tooltip]:before{content:"";inset-block-end:calc(100% - 5px);transform:translate(-50%) translateY(4px);border:8px solid transparent;border-top-color:var(--foreground)}[data-tooltip]:is(:hover,:focus-visible):before,[data-tooltip]:is(:hover,:focus-visible):after{opacity:1;visibility:visible;transition-delay:.7s;transform:translate(-50%) translateY(0)}}@layer utilities{.align-left{text-align:start}.align-center{text-align:center}.align-right{text-align:end}.text-light{color:var(--muted-foreground)}.text-lighter{color:var(--faint-foreground)}.flex{display:flex}.flex-col{flex-direction:column}.items-center{align-items:center}.justify-center{justify-content:center}.justify-between{justify-content:space-between}.justify-end{justify-content:flex-end}.hstack{display:flex;align-items:center;gap:var(--space-3);flex-wrap:wrap;align-content:flex-start;height:auto;*{margin:0}}.vstack{display:flex;flex-direction:column;gap:var(--space-3)}.gap-1{gap:var(--space-1)}.gap-2{gap:var(--space-2)}.gap-4{gap:var(--space-4)}.mt-2{margin-block-start:var(--space-2)}.mt-4{margin-block-start:var(--space-4)}.mt-6{margin-block-start:var(--space-6)}.mb-2{margin-block-end:var(--space-2)}.mb-4{margin-block-end:var(--space-4)}.mb-6{margin-block-end:var(--space-6)}.p-4{padding:var(--space-4)}.w-100{width:100%}:is(ul,ol,a).unstyled{list-style:none;text-decoration:none;padding:0}}

-1

web/oat.min.js

··· 1 - (()=>{var l=class extends HTMLElement{#t=!1;connectedCallback(){this.#t||(document.readyState==="loading"?document.addEventListener("DOMContentLoaded",()=>this.#e(),{once:!0}):this.#e())}#e(){this.#t||(this.#t=!0,this.init())}disconnectedCallback(){this.cleanup()}cleanup(){}handleEvent(t){let e=this[`on${t.type}`];e&&e.call(this,t)}keyNav(t,e,n,o,s,a=!1){let{key:r}=t,c=-1;return r===s?c=(e+1)%n:r===o?c=(e-1+n)%n:a&&(r==="Home"?c=0:r==="End"&&(c=n-1)),c>=0&&t.preventDefault(),c}emit(t,e=null){return this.dispatchEvent(new CustomEvent(t,{bubbles:!0,composed:!0,cancelable:!0,detail:e}))}$(t){return this.querySelector(t)}$$(t){return Array.from(this.querySelectorAll(t))}uid(){return Math.random().toString(36).slice(2,10)}};"commandForElement"in HTMLButtonElement.prototype||document.addEventListener("click",i=>{let t=i.target.closest("button[commandfor]");if(!t)return;let e=document.getElementById(t.getAttribute("commandfor"));if(!e)return;let n=t.getAttribute("command")||"toggle";e instanceof HTMLDialogElement&&(n==="show-modal"?e.showModal():n==="close"||e.open?e.close():e.showModal())});var u=class extends l{#t=[];#e=[];init(){let t=this.$(':scope > [role="tablist"]');if(this.#t=t?[...t.querySelectorAll('[role="tab"]')]:[],this.#e=this.$$(':scope > [role="tabpanel"]'),this.#t.length===0||this.#e.length===0){console.warn("ot-tabs: Missing tab or tabpanel elements");return}this.#t.forEach((n,o)=>{let s=this.#e[o];if(!s)return;let a=n.id||`ot-tab-${this.uid()}`,r=s.id||`ot-panel-${this.uid()}`;n.id=a,s.id=r,n.setAttribute("aria-controls",r),s.setAttribute("aria-labelledby",a)}),t.addEventListener("click",this),t.addEventListener("keydown",this);let e=this.#t.findIndex(n=>n.ariaSelected==="true");this.#i(e>=0?e:0)}onclick(t){let e=this.#t.indexOf(t.target.closest('[role="tab"]'));e>=0&&this.#i(e)}onkeydown(t){if(!t.target.closest('[role="tab"]'))return;let e=this.keyNav(t,this.activeIndex,this.#t.length,"ArrowLeft","ArrowRight");e>=0&&(this.#i(e),this.#t[e].focus())}#i(t){this.#t.forEach((e,n)=>{let o=n===t;e.ariaSelected=String(o),e.tabIndex=o?0:-1}),this.#e.forEach((e,n)=>{e.hidden=n!==t}),this.emit("ot-tab-change",{index:t,tab:this.#t[t]})}get activeIndex(){return this.#t.findIndex(t=>t.ariaSelected==="true")}set activeIndex(t){t>=0&&t<this.#t.length&&this.#i(t)}};customElements.define("ot-tabs",u);var h=class extends l{#t;#e;#i;#n;init(){this.#t=this.$("[popover]"),this.#e=this.$("[popovertarget]"),!(!this.#t||!this.#e)&&(this.#t.addEventListener("toggle",this),this.#t.addEventListener("keydown",this),this.#i=()=>{let t=this.#e.getBoundingClientRect(),e=this.#t.getBoundingClientRect();this.#t.style.top=`${t.bottom+e.height>window.innerHeight?t.top-e.height:t.bottom}px`,this.#t.style.left=`${t.left+e.width>window.innerWidth?t.right-e.width:t.left}px`})}ontoggle(t){t.newState==="open"?(this.#i(),window.addEventListener("scroll",this.#i,!0),window.addEventListener("resize",this.#i),this.#n=this.$$('[role="menuitem"]'),this.#n[0]?.focus(),this.#e.ariaExpanded="true"):(this.cleanup(),this.#n=null,this.#e.ariaExpanded="false",this.#e.focus())}onkeydown(t){if(!t.target.matches('[role="menuitem"]'))return;let e=this.#n.indexOf(t.target),n=this.keyNav(t,e,this.#n.length,"ArrowUp","ArrowDown",!0);n>=0&&this.#n[n].focus()}cleanup(){window.removeEventListener("scroll",this.#i,!0),window.removeEventListener("resize",this.#i)}};customElements.define("ot-dropdown",h);document.addEventListener("DOMContentLoaded",()=>{let i="title",t="[title]",e=n=>{let o=n.getAttribute(i);o&&(n.setAttribute("data-tooltip",o),n.hasAttribute("aria-label")||n.setAttribute("aria-label",o),n.removeAttribute(i))};document.querySelectorAll(t).forEach(e),new MutationObserver(n=>{for(let o of n){e(o.target);for(let s of o.addedNodes)s.nodeType===1&&(e(s),s.querySelectorAll(t).forEach(e))}}).observe(document.body,{childList:!0,subtree:!0,attributes:!0,attributeFilter:[i]})});document.addEventListener("click",i=>{let t=i.target.closest("[data-sidebar-toggle]");if(t){t.closest("[data-sidebar-layout]")?.toggleAttribute("data-sidebar-open");return}if(!i.target.closest("[data-sidebar]")){let e=document.querySelector("[data-sidebar-layout][data-sidebar-open]");e&&window.matchMedia("(max-width: 768px)").matches&&e.removeAttribute("data-sidebar-open")}});var d={};function E(i){if(!d[i]){let t=document.createElement("div");t.className="toast-container",t.setAttribute("popover","manual"),t.setAttribute("data-placement",i),document.body.appendChild(t),d[i]=t}return d[i]}function p(i,t={}){let{placement:e="top-right",duration:n=4e3}=t,o=E(e);i.classList.add("toast");let s;return i.onmouseenter=()=>clearTimeout(s),i.onmouseleave=()=>{n>0&&(s=setTimeout(()=>f(i,o),n))},i.setAttribute("data-entering",""),o.appendChild(i),o.showPopover(),requestAnimationFrame(()=>{requestAnimationFrame(()=>{i.removeAttribute("data-entering")})}),n>0&&(s=setTimeout(()=>f(i,o),n)),i}function f(i,t){if(i.hasAttribute("data-exiting"))return;i.setAttribute("data-exiting","");let e=()=>{i.remove(),t.children.length||t.hidePopover()};i.addEventListener("transitionend",e,{once:!0});let n=getComputedStyle(i).getPropertyValue("--transition").trim(),o=parseFloat(n),s=n.endsWith("ms")?o:o*1e3;setTimeout(e,s)}function b(i,t,e={}){let{variant:n="info",...o}=e,s=document.createElement("output");if(s.setAttribute("data-variant",n),t){let r=document.createElement("h6");r.className="toast-title",r.textContent=t,s.appendChild(r)}let a=document.createElement("div");return a.className="toast-message",a.textContent=i,s.appendChild(a),p(s,o)}function g(i,t={}){let e;if(i instanceof HTMLTemplateElement?e=i.content.firstElementChild?.cloneNode(!0):i&&(e=i.cloneNode(!0)),!!e)return e.removeAttribute("id"),p(e,t)}function v(i){i&&d[i]?(d[i].innerHTML="",d[i].hidePopover()):Object.values(d).forEach(t=>{t.innerHTML="",t.hidePopover()})}var m=window.ot||(window.ot={});m.toast=b;m.toast.el=g;m.toast.clear=v;})();

+21

web/package.json

··· 1 + { 2 + "name": "fantasma-web", 3 + "private": true, 4 + "version": "0.1.0", 5 + "type": "module", 6 + "scripts": { 7 + "dev": "vite", 8 + "prebuild": "cp ../zig-out/bin/fantasma.wasm public/", 9 + "build": "tsc && vite build", 10 + "preview": "vite preview" 11 + }, 12 + "dependencies": { 13 + "@knadh/oat": "^0.4.1", 14 + "zod": "^4.3.6", 15 + "zustand": "^5.0.11" 16 + }, 17 + "devDependencies": { 18 + "typescript": "^5.9.3", 19 + "vite": "^7.3.1" 20 + } 21 + }

+52

web/src/db.ts

··· 1 + const DB_NAME = "fantasma"; 2 + const DB_VERSION = 1; 3 + const STORE_NAME = "model"; 4 + export const WEIGHTS_KEY = "weights"; 5 + 6 + function openDB(): Promise<IDBDatabase> { 7 + return new Promise((resolve, reject) => { 8 + const req = indexedDB.open(DB_NAME, DB_VERSION); 9 + req.onupgradeneeded = () => req.result.createObjectStore(STORE_NAME); 10 + req.onsuccess = () => resolve(req.result); 11 + req.onerror = () => reject(req.error); 12 + }); 13 + } 14 + 15 + export async function dbGet(key: string): Promise<ArrayBuffer | undefined> { 16 + const db = await openDB(); 17 + return new Promise((resolve, reject) => { 18 + const tx = db.transaction(STORE_NAME, "readonly"); 19 + const req = tx.objectStore(STORE_NAME).get(key); 20 + req.onsuccess = () => resolve(req.result); 21 + req.onerror = () => reject(req.error); 22 + }); 23 + } 24 + 25 + export async function dbPut(key: string, value: ArrayBuffer): Promise<void> { 26 + const db = await openDB(); 27 + return new Promise((resolve, reject) => { 28 + const tx = db.transaction(STORE_NAME, "readwrite"); 29 + const req = tx.objectStore(STORE_NAME).put(value, key); 30 + req.onsuccess = () => resolve(); 31 + req.onerror = () => reject(req.error); 32 + }); 33 + } 34 + 35 + export async function dbClear(): Promise<void> { 36 + try { 37 + const db = await openDB(); 38 + const tx = db.transaction(STORE_NAME, "readwrite"); 39 + tx.objectStore(STORE_NAME).clear(); 40 + await new Promise<void>((resolve, reject) => { 41 + tx.oncomplete = () => resolve(); 42 + tx.onerror = () => reject(tx.error); 43 + }); 44 + } catch { 45 + // best effort 46 + } 47 + try { 48 + indexedDB.deleteDatabase(DB_NAME); 49 + } catch { 50 + // best effort 51 + } 52 + }

+261

web/src/main.ts

··· 1 + import "@knadh/oat/oat.min.css"; 2 + import "./style.css"; 3 + 4 + import { store } from "./store"; 5 + import { loadWasm, wasmCall, loadModelWeights } from "./wasm"; 6 + import { dbGet, dbPut, dbClear, WEIGHTS_KEY } from "./db"; 7 + import { updateStatus, renderProfile } from "./render"; 8 + 9 + const $ = (sel: string) => document.querySelector<HTMLElement>(sel)!; 10 + 11 + // ============================================================ 12 + // Helpers 13 + // ============================================================ 14 + 15 + function formatBytes(bytes: number): string { 16 + if (bytes < 1024) return bytes + " B"; 17 + if (bytes < 1048576) return (bytes / 1024).toFixed(0) + " KB"; 18 + return (bytes / 1048576).toFixed(1) + " MB"; 19 + } 20 + 21 + async function fetchWithProgress( 22 + url: string, 23 + onProgress: (received: number, total: number) => void, 24 + ): Promise<ArrayBuffer> { 25 + const response = await fetch(url); 26 + if (!response.ok) throw new Error(`HTTP ${response.status}`); 27 + const total = parseInt(response.headers.get("content-length") || "0", 10); 28 + const reader = response.body!.getReader(); 29 + const chunks: Uint8Array[] = []; 30 + let received = 0; 31 + 32 + for (;;) { 33 + const { done, value } = await reader.read(); 34 + if (done) break; 35 + chunks.push(value); 36 + received += value.length; 37 + onProgress(received, total); 38 + } 39 + 40 + const result = new Uint8Array(received); 41 + let offset = 0; 42 + for (const chunk of chunks) { 43 + result.set(chunk, offset); 44 + offset += chunk.length; 45 + } 46 + return result.buffer; 47 + } 48 + 49 + function wordCount(text: string): number { 50 + return text.trim().split(/\s+/).filter(Boolean).length; 51 + } 52 + 53 + // ============================================================ 54 + // Subscribe to store changes 55 + // ============================================================ 56 + 57 + store.subscribe((state, prev) => { 58 + if (state.status !== prev.status || state.statusMessage !== prev.statusMessage) { 59 + updateStatus(); 60 + } 61 + }); 62 + 63 + // ============================================================ 64 + // DOM bindings 65 + // ============================================================ 66 + 67 + const inputEl = $("#input-text") as HTMLTextAreaElement; 68 + const btnAnalyze = $("#btn-analyze") as HTMLButtonElement; 69 + const btnRetry = $("#btn-retry") as HTMLButtonElement; 70 + const forensicToggle = $("#forensic-toggle") as HTMLInputElement; 71 + const forensicPanel = $("#forensic-panel") as HTMLElement; 72 + const btnDownload = $("#btn-download-model") as HTMLButtonElement; 73 + const downloadProgress = $("#download-progress") as HTMLElement; 74 + const progressFill = $("#progress-fill") as HTMLElement; 75 + const progressText = $("#progress-text") as HTMLElement; 76 + const btnNeutralize = $("#btn-neutralize") as HTMLButtonElement; 77 + const btnPanic = $("#btn-panic") as HTMLButtonElement; 78 + const wordCountEl = $("#word-count") as HTMLElement; 79 + const resultsSection = $("#results") as HTMLElement; 80 + 81 + // Word count 82 + inputEl.addEventListener("input", () => { 83 + const words = wordCount(inputEl.value); 84 + wordCountEl.textContent = words + " word" + (words !== 1 ? "s" : ""); 85 + btnAnalyze.disabled = !store.getState().wasmInstance || words < 3; 86 + }); 87 + 88 + // Forensic toggle 89 + forensicToggle.addEventListener("change", () => { 90 + store.getState().toggleForensic(forensicToggle.checked); 91 + forensicPanel.hidden = !forensicToggle.checked; 92 + }); 93 + 94 + // Analyze 95 + btnAnalyze.addEventListener("click", () => { 96 + const text = inputEl.value.trim(); 97 + const { wasmInstance } = store.getState(); 98 + if (!text || !wasmInstance) return; 99 + 100 + btnAnalyze.disabled = true; 101 + btnAnalyze.classList.add("btn-loading"); 102 + btnAnalyze.textContent = "Analyzing"; 103 + 104 + try { 105 + const result = wasmCall(wasmInstance, "profile", text); 106 + store.getState().setProfileResult(result); 107 + renderProfile(result); 108 + } catch (e) { 109 + console.error("Profile error:", e); 110 + store.getState().setStatus("error", "Analysis failed"); 111 + } finally { 112 + btnAnalyze.disabled = false; 113 + btnAnalyze.classList.remove("btn-loading"); 114 + btnAnalyze.textContent = "Analyze Fingerprint"; 115 + } 116 + }); 117 + 118 + // Download model 119 + btnDownload.addEventListener("click", async () => { 120 + btnDownload.disabled = true; 121 + downloadProgress.hidden = false; 122 + 123 + try { 124 + progressText.textContent = "Downloading model..."; 125 + progressFill.style.width = "0%"; 126 + 127 + const weightsBytes = await fetchWithProgress("/model.bin", (recv, total) => { 128 + if (total > 0) { 129 + const pct = Math.round((recv / total) * 100); 130 + progressFill.style.width = pct + "%"; 131 + progressText.textContent = `${formatBytes(recv)} / ${formatBytes(total)}`; 132 + } else { 133 + progressText.textContent = formatBytes(recv); 134 + } 135 + }); 136 + 137 + await dbPut(WEIGHTS_KEY, weightsBytes); 138 + progressText.textContent = "Initializing model..."; 139 + progressFill.style.width = "100%"; 140 + 141 + const { wasmInstance } = store.getState(); 142 + if (wasmInstance && loadModelWeights(wasmInstance, weightsBytes)) { 143 + store.getState().setModelLoaded(true); 144 + downloadProgress.hidden = true; 145 + btnDownload.hidden = true; 146 + btnNeutralize.hidden = false; 147 + store.getState().setStatus("model-ready"); 148 + } else { 149 + progressText.textContent = "Model init failed"; 150 + btnDownload.disabled = false; 151 + } 152 + } catch (e) { 153 + console.error("Download error:", e); 154 + progressText.textContent = "Model not available yet"; 155 + progressFill.style.width = "0%"; 156 + btnDownload.disabled = false; 157 + } 158 + }); 159 + 160 + // Neutralize 161 + btnNeutralize.addEventListener("click", () => { 162 + const text = inputEl.value.trim(); 163 + const { wasmInstance, modelLoaded } = store.getState(); 164 + if (!text || !wasmInstance || !modelLoaded) return; 165 + 166 + btnNeutralize.disabled = true; 167 + btnNeutralize.classList.add("btn-loading"); 168 + btnNeutralize.textContent = "Neutralizing"; 169 + 170 + try { 171 + const result = wasmCall(wasmInstance, "neutralize", text); 172 + store.getState().setProfileResult(result); 173 + renderProfile(result); 174 + } catch (e) { 175 + console.error("Neutralize error:", e); 176 + } finally { 177 + btnNeutralize.disabled = false; 178 + btnNeutralize.classList.remove("btn-loading"); 179 + btnNeutralize.textContent = "Neutralize"; 180 + } 181 + }); 182 + 183 + // Panic 184 + btnPanic.addEventListener("click", async () => { 185 + inputEl.value = ""; 186 + wordCountEl.textContent = "0 words"; 187 + btnAnalyze.disabled = true; 188 + resultsSection.hidden = true; 189 + $("#markers-container").innerHTML = ""; 190 + $("#forensic-table-body").innerHTML = ""; 191 + forensicToggle.checked = false; 192 + forensicPanel.hidden = true; 193 + 194 + await dbClear(); 195 + 196 + if ("serviceWorker" in navigator) { 197 + const regs = await navigator.serviceWorker.getRegistrations(); 198 + for (const reg of regs) reg.unregister(); 199 + } 200 + if ("caches" in window) { 201 + const names = await caches.keys(); 202 + for (const name of names) await caches.delete(name); 203 + } 204 + 205 + try { 206 + sessionStorage.clear(); 207 + } catch {} 208 + try { 209 + localStorage.clear(); 210 + } catch {} 211 + 212 + store.getState().reset(); 213 + 214 + document.title = ""; 215 + document.body.innerHTML = ""; 216 + history.replaceState(null, "", "about:blank"); 217 + }); 218 + 219 + // Retry 220 + btnRetry.addEventListener("click", () => { 221 + resultsSection.hidden = true; 222 + inputEl.focus(); 223 + }); 224 + 225 + // ============================================================ 226 + // Init 227 + // ============================================================ 228 + 229 + async function init() { 230 + store.getState().setStatus("loading"); 231 + 232 + try { 233 + const instance = await loadWasm(); 234 + store.getState().setWasmInstance(instance); 235 + store.getState().setStatus("ready"); 236 + 237 + const words = wordCount(inputEl.value); 238 + if (words >= 3) btnAnalyze.disabled = false; 239 + 240 + try { 241 + const storedWeights = await dbGet(WEIGHTS_KEY); 242 + if (storedWeights) { 243 + if (loadModelWeights(instance, storedWeights)) { 244 + store.getState().setModelLoaded(true); 245 + store.getState().setStatus("model-ready"); 246 + } 247 + } 248 + } catch { 249 + // IndexedDB not available 250 + } 251 + } catch (e) { 252 + console.error("WASM load failed:", e); 253 + store.getState().setStatus("error", "Engine unavailable"); 254 + } 255 + } 256 + 257 + init(); 258 + 259 + if ("serviceWorker" in navigator) { 260 + navigator.serviceWorker.register("/sw.js").catch(() => {}); 261 + }

+136

web/src/render.ts

··· 1 + import type { ProfileResult, Marker } from "./schemas"; 2 + import { store } from "./store"; 3 + 4 + const FEATURE_LABELS: Record<string, string> = { 5 + avg_word_length: "Avg word length", 6 + vocabulary_richness: "Vocabulary richness", 7 + hapax_ratio: "Hapax ratio", 8 + avg_sentence_length: "Avg sentence length", 9 + sentence_length_variance: "Sentence length variance", 10 + passive_voice_ratio: "Passive voice ratio", 11 + contraction_rate: "Contraction rate", 12 + semicolon_rate: "Semicolon rate", 13 + comma_rate: "Comma rate", 14 + dash_rate: "Dash rate", 15 + exclamation_rate: "Exclamation rate", 16 + hyphenation_rate: "Hyphenation rate", 17 + }; 18 + 19 + const MARKER_TO_FEATURE: Record<string, string> = { 20 + semicolon_high: "semicolon_rate", 21 + contraction_high: "contraction_rate", 22 + comma_rate: "comma_rate", 23 + colon_high: "colon_rate", 24 + dash_preference: "dash_rate", 25 + exclamation_high: "exclamation_rate", 26 + ellipsis_high: "ellipsis_rate", 27 + parenthetical_high: "parenthetical_rate", 28 + sentence_length_high: "avg_sentence_length", 29 + sentence_length_variance_high: "sentence_length_variance", 30 + passive_voice_high: "passive_voice_ratio", 31 + vocabulary_richness_high: "vocabulary_richness", 32 + hapax_ratio_high: "hapax_ratio", 33 + hyphenation_high: "hyphenation_rate", 34 + avg_word_length: "avg_word_length", 35 + avg_syllables: "avg_syllables_per_word", 36 + }; 37 + 38 + function esc(str: string): string { 39 + const div = document.createElement("div"); 40 + div.textContent = str; 41 + return div.innerHTML; 42 + } 43 + 44 + function formatMarkerName(name: string): string { 45 + return name.replace(/_/g, " ").replace(/\b\w/g, (c) => c.toUpperCase()); 46 + } 47 + 48 + function $(sel: string): HTMLElement { 49 + return document.querySelector(sel)!; 50 + } 51 + 52 + export function updateStatus(): void { 53 + const { status, statusMessage } = store.getState(); 54 + const el = $("#status-indicator"); 55 + el.textContent = statusMessage; 56 + el.className = `status status-${status === "model-ready" ? "ready" : status}`; 57 + } 58 + 59 + export function renderProfile(result: ProfileResult): void { 60 + const state = store.getState(); 61 + 62 + // Summary 63 + $("#marker-count").textContent = String(result.markers.length); 64 + const summary = $("#results-summary"); 65 + summary.dataset.variant = result.markers.length === 0 ? "success" : "warning"; 66 + 67 + // Markers list 68 + const container = $("#markers-container"); 69 + container.innerHTML = ""; 70 + for (const m of result.markers) { 71 + const item = document.createElement("div"); 72 + item.className = "change-item"; 73 + const dir = m.z_score > 0 ? "HIGH" : "LOW"; 74 + item.innerHTML = ` 75 + <div class="change-header"> 76 + <span class="change-marker">${esc(formatMarkerName(m.name))}</span> 77 + <span class="change-arrow">${dir}</span> 78 + </div> 79 + <div class="change-reason">Value: ${m.value.toFixed(4)} · Baseline: ${m.baseline.toFixed(4)} · z = ${m.z_score.toFixed(2)}</div> 80 + `; 81 + container.appendChild(item); 82 + } 83 + 84 + // Marker lookup by feature key 85 + const flagged = new Map<string, Marker>(); 86 + for (const m of result.markers) { 87 + const featureKey = MARKER_TO_FEATURE[m.name]; 88 + if (featureKey) flagged.set(featureKey, m); 89 + } 90 + 91 + // Forensic table 92 + const forensicBody = $("#forensic-table-body"); 93 + forensicBody.innerHTML = ""; 94 + const raw = result as Record<string, unknown>; 95 + for (const [key, label] of Object.entries(FEATURE_LABELS)) { 96 + if (raw[key] === undefined) continue; 97 + const tr = document.createElement("tr"); 98 + const m = flagged.get(key); 99 + 100 + let statusCls: string, statusTxt: string; 101 + if (m) { 102 + statusCls = "forensic-warn"; 103 + statusTxt = m.z_score > 0 ? "\u26a0 High" : "\u26a0 Low"; 104 + } else { 105 + statusCls = "forensic-fixed"; 106 + statusTxt = "\u2713 Normal"; 107 + } 108 + 109 + const baseline = m ? m.baseline.toFixed(4) : "\u2014"; 110 + const val = raw[key]; 111 + tr.innerHTML = ` 112 + <td>${esc(label)}</td> 113 + <td>${typeof val === "boolean" ? (val ? "Yes" : "No") : (val as number).toFixed(4)}</td> 114 + <td>${baseline}</td> 115 + <td class="${statusCls}">${statusTxt}</td> 116 + `; 117 + forensicBody.appendChild(tr); 118 + } 119 + 120 + // Rewriter section 121 + const rewriterSection = $("#rewriter-section") as HTMLElement; 122 + rewriterSection.hidden = false; 123 + const btnDownload = $("#btn-download-model") as HTMLElement; 124 + const btnNeutralize = $("#btn-neutralize") as HTMLElement; 125 + if (state.modelLoaded) { 126 + btnDownload.hidden = true; 127 + btnNeutralize.hidden = false; 128 + } else { 129 + btnDownload.hidden = false; 130 + btnNeutralize.hidden = true; 131 + } 132 + 133 + const resultsSection = $("#results") as HTMLElement; 134 + resultsSection.hidden = false; 135 + resultsSection.scrollIntoView({ behavior: "smooth", block: "start" }); 136 + }

+17

web/src/schemas.ts

··· 1 + import { z } from "zod"; 2 + 3 + export const MarkerSchema = z.object({ 4 + name: z.string(), 5 + value: z.number(), 6 + baseline: z.number(), 7 + z_score: z.number(), 8 + }); 9 + 10 + export const ProfileResultSchema = z 11 + .object({ 12 + markers: z.array(MarkerSchema), 13 + }) 14 + .passthrough(); 15 + 16 + export type Marker = z.infer<typeof MarkerSchema>; 17 + export type ProfileResult = z.infer<typeof ProfileResultSchema>;

+58

web/src/store.ts

··· 1 + import { createStore } from "zustand/vanilla"; 2 + import type { ProfileResult } from "./schemas"; 3 + 4 + export interface AppState { 5 + status: "loading" | "ready" | "model-ready" | "error"; 6 + statusMessage: string; 7 + wasmInstance: WebAssembly.Instance | null; 8 + modelLoaded: boolean; 9 + profileResult: ProfileResult | null; 10 + forensicVisible: boolean; 11 + } 12 + 13 + export interface AppActions { 14 + setStatus(status: AppState["status"], message?: string): void; 15 + setWasmInstance(instance: WebAssembly.Instance): void; 16 + setModelLoaded(loaded: boolean): void; 17 + setProfileResult(result: ProfileResult | null): void; 18 + toggleForensic(visible: boolean): void; 19 + reset(): void; 20 + } 21 + 22 + const initialState: AppState = { 23 + status: "loading", 24 + statusMessage: "Loading...", 25 + wasmInstance: null, 26 + modelLoaded: false, 27 + profileResult: null, 28 + forensicVisible: false, 29 + }; 30 + 31 + export const store = createStore<AppState & AppActions>()((set) => ({ 32 + ...initialState, 33 + 34 + setStatus: (status, message) => 35 + set({ 36 + status, 37 + statusMessage: 38 + message ?? 39 + { loading: "Loading...", ready: "Ready", "model-ready": "Offline", error: "Error" }[ 40 + status 41 + ], 42 + }), 43 + 44 + setWasmInstance: (instance) => set({ wasmInstance: instance }), 45 + 46 + setModelLoaded: (loaded) => set({ modelLoaded: loaded }), 47 + 48 + setProfileResult: (result) => set({ profileResult: result }), 49 + 50 + toggleForensic: (visible) => set({ forensicVisible: visible }), 51 + 52 + reset: () => 53 + set({ 54 + ...initialState, 55 + status: "loading", 56 + statusMessage: "Loading...", 57 + }), 58 + }));

+70

web/src/wasm.ts

··· 1 + import { ProfileResultSchema, type ProfileResult } from "./schemas"; 2 + 3 + export interface FantasmaExports extends WebAssembly.Exports { 4 + memory: WebAssembly.Memory; 5 + alloc(len: number): number; 6 + dealloc(ptr: number, len: number): void; 7 + profile(ptr: number, len: number): number; 8 + neutralize(ptr: number, len: number): number; 9 + init(ptr: number, len: number): number; 10 + get_output_ptr(): number; 11 + get_output_len(): number; 12 + } 13 + 14 + export async function loadWasm(): Promise<WebAssembly.Instance> { 15 + const wasmBytes = await fetch("/fantasma.wasm").then((r) => { 16 + if (!r.ok) throw new Error(`HTTP ${r.status}`); 17 + return r.arrayBuffer(); 18 + }); 19 + 20 + const { instance } = await WebAssembly.instantiate(wasmBytes, { 21 + env: { 22 + log_message: (ptr: number, len: number) => { 23 + const bytes = new Uint8Array( 24 + (instance.exports as unknown as FantasmaExports).memory.buffer, 25 + ptr, 26 + len, 27 + ); 28 + console.log("[wasm]", new TextDecoder().decode(bytes)); 29 + }, 30 + }, 31 + }); 32 + 33 + return instance; 34 + } 35 + 36 + export function wasmCall( 37 + instance: WebAssembly.Instance, 38 + exportName: "profile" | "neutralize", 39 + text: string, 40 + ): ProfileResult { 41 + const exports = instance.exports as unknown as FantasmaExports; 42 + const encoder = new TextEncoder(); 43 + const bytes = encoder.encode(text); 44 + const ptr = exports.alloc(bytes.length); 45 + new Uint8Array(exports.memory.buffer, ptr, bytes.length).set(bytes); 46 + 47 + const ok = exports[exportName](ptr, bytes.length); 48 + exports.dealloc(ptr, bytes.length); 49 + if (!ok) throw new Error(`WASM ${exportName} failed`); 50 + 51 + const outPtr = exports.get_output_ptr(); 52 + const outLen = exports.get_output_len(); 53 + const outBytes = new Uint8Array(exports.memory.buffer, outPtr, outLen); 54 + const raw = JSON.parse(new TextDecoder().decode(outBytes)); 55 + 56 + return ProfileResultSchema.parse(raw); 57 + } 58 + 59 + export function loadModelWeights( 60 + instance: WebAssembly.Instance, 61 + weightsBytes: ArrayBuffer, 62 + ): boolean { 63 + const exports = instance.exports as unknown as FantasmaExports; 64 + const arr = new Uint8Array(weightsBytes); 65 + const ptr = exports.alloc(arr.length); 66 + new Uint8Array(exports.memory.buffer, ptr, arr.length).set(arr); 67 + const ok = exports.init(ptr, arr.length); 68 + exports.dealloc(ptr, arr.length); 69 + return !!ok; 70 + }

web/style.css web/src/style.css

web/sw.js web/public/sw.js

+15

web/tsconfig.json

··· 1 + { 2 + "compilerOptions": { 3 + "target": "ES2020", 4 + "module": "ESNext", 5 + "moduleResolution": "bundler", 6 + "strict": true, 7 + "esModuleInterop": true, 8 + "skipLibCheck": true, 9 + "forceConsistentCasingInFileNames": true, 10 + "resolveJsonModule": true, 11 + "isolatedModules": true, 12 + "noEmit": true 13 + }, 14 + "include": ["src"] 15 + }

+13

web/vite.config.ts

··· 1 + import { defineConfig } from "vite"; 2 + 3 + export default defineConfig({ 4 + root: ".", 5 + publicDir: "public", 6 + build: { 7 + outDir: "dist", 8 + emptyOutDir: true, 9 + }, 10 + server: { 11 + port: 3000, 12 + }, 13 + });

Configure Feed

Configure Feed