AppView in a box as a Vite plugin thing hatk.dev

docs: add JSDoc comments to car.ts, cbor.ts, and cid.ts

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

+110 -8
+34 -2
packages/hatk/src/car.ts
··· 1 - // CAR (Content Addressable aRchive) parser from scratch 2 - // CAR files bundle content-addressed blocks — used in firehose events 1 + /** 2 + * CAR (Content Addressable aRchive) parser. 3 + * 4 + * CAR files bundle content-addressed blocks into a single binary container. 5 + * They're used by the AT Protocol firehose (`com.atproto.sync.getRepo`) to 6 + * deliver entire repos and by commit events to deliver individual changes. 7 + * 8 + * Format: `varint(headerLen) | CBOR(header) | block*` 9 + * Each block: `varint(blockLen) | CID | data` 10 + * 11 + * @see https://ipld.io/specs/transport/car/carv1/ 12 + * @module 13 + */ 3 14 4 15 import { cborDecode } from './cbor.ts' 5 16 import { cidToString, readVarint } from './cid.ts' 6 17 18 + /** 19 + * Parses a CID (Content Identifier) from raw bytes at the given offset. 20 + * 21 + * Handles both CIDv0 (bare SHA-256 multihash, starts with `0x12`) and 22 + * CIDv1 (version + codec + multihash with varint-encoded lengths). 23 + * 24 + * @returns A tuple of `[cidBytes, nextOffset]` 25 + */ 7 26 function parseCidFromBytes(bytes: Uint8Array, offset: number): [Uint8Array, number] { 8 27 const firstByte = bytes[offset] 9 28 ··· 30 49 return [bytes.slice(offset, pos), pos] 31 50 } 32 51 52 + /** 53 + * Parses a CARv1 binary frame into its root CIDs and block map. 54 + * 55 + * @param carBytes - Raw CAR file bytes (e.g. from `getRepo` or a firehose commit) 56 + * @returns `roots` — ordered list of root CID strings; `blocks` — map of CID string → raw block data 57 + * 58 + * @example 59 + * ```ts 60 + * const car = new Uint8Array(await res.arrayBuffer()) 61 + * const { roots, blocks } = parseCarFrame(car) 62 + * const commitData = blocks.get(roots[0]) 63 + * ``` 64 + */ 33 65 export function parseCarFrame(carBytes: Uint8Array): { 34 66 roots: string[] 35 67 blocks: Map<string, Uint8Array>
+38 -3
packages/hatk/src/cbor.ts
··· 1 - // CBOR decoder from scratch (RFC 8949) 2 - // Returns { value, offset } so we can split firehose frames 3 - // (two concatenated CBOR values: header + body) 1 + /** 2 + * Minimal CBOR (RFC 8949) decoder with DAG-CBOR CID support. 3 + * 4 + * Returns `{ value, offset }` so callers can decode concatenated CBOR values — 5 + * the AT Protocol firehose sends frames as two back-to-back CBOR items 6 + * (header + body). 7 + * 8 + * DAG-CBOR tag 42 (CID links) are decoded as `{ $link: "bafy..." }` objects, 9 + * matching the convention used by the AT Protocol. 10 + * 11 + * @see https://www.rfc-editor.org/rfc/rfc8949 — CBOR spec 12 + * @see https://ipld.io/specs/codecs/dag-cbor/spec/ — DAG-CBOR spec 13 + * @module 14 + */ 4 15 5 16 import { cidToString } from './cid.ts' 6 17 18 + /** CBOR tag number for DAG-CBOR CID links. */ 7 19 const CBOR_TAG_CID = 42 8 20 9 21 interface DecodeResult { 22 + /** The decoded JavaScript value. */ 10 23 value: any 24 + /** Byte offset immediately after the decoded value — use as `startOffset` to decode the next item. */ 11 25 offset: number 12 26 } 13 27 28 + /** 29 + * Decodes a single CBOR value from a byte array. 30 + * 31 + * Supports all major types: unsigned/negative integers, byte/text strings, 32 + * arrays, maps, tags (with special handling for CID tag 42), and simple 33 + * values (true, false, null). 34 + * 35 + * @param bytes - Raw CBOR bytes 36 + * @param startOffset - Byte position to start decoding from (default `0`) 37 + * @returns The decoded value and the offset of the next byte after it 38 + * 39 + * @example 40 + * ```ts 41 + * // Decode a single value 42 + * const { value } = cborDecode(bytes) 43 + * 44 + * // Decode two concatenated values (firehose frame) 45 + * const { value: header, offset } = cborDecode(frameBytes) 46 + * const { value: body } = cborDecode(frameBytes, offset) 47 + * ``` 48 + */ 14 49 export function cborDecode(bytes: Uint8Array, startOffset = 0): DecodeResult { 15 50 let offset = startOffset 16 51
+38 -3
packages/hatk/src/cid.ts
··· 1 - // CID (Content Identifier) + base32 + varint — from scratch 2 - // CIDs are self-describing content hashes used throughout AT Protocol 1 + /** 2 + * CID (Content Identifier), base32, and varint primitives. 3 + * 4 + * CIDs are self-describing content hashes used throughout the AT Protocol 5 + * to reference blocks in repos and CAR files. This module provides the 6 + * low-level encoding needed to convert raw CID bytes into their string 7 + * representation (base32lower with `b` multibase prefix). 8 + * 9 + * @see https://github.com/multiformats/cid 10 + * @module 11 + */ 3 12 13 + /** RFC 4648 base32 lowercase alphabet (no padding). */ 4 14 const BASE32_ALPHABET = 'abcdefghijklmnopqrstuvwxyz234567' 5 15 16 + /** 17 + * Encodes raw bytes as a base32 lowercase string (RFC 4648, no padding). 18 + * 19 + * @example 20 + * ```ts 21 + * base32Encode(new Uint8Array([0x01, 0x71])) // "afyq" 22 + * ``` 23 + */ 6 24 export function base32Encode(bytes: Uint8Array): string { 7 25 let result = '' 8 26 let bits = 0 ··· 24 42 return result 25 43 } 26 44 45 + /** 46 + * Converts raw CID bytes to their multibase-encoded string form (`b` prefix + base32lower). 47 + * 48 + * @example 49 + * ```ts 50 + * cidToString(cidBytes) // "bafyreig..." 51 + * ``` 52 + */ 27 53 export function cidToString(cidBytes: Uint8Array): string { 28 - // base32lower with 'b' multibase prefix 29 54 return `b${base32Encode(cidBytes)}` 30 55 } 31 56 57 + /** 58 + * Reads an unsigned LEB128 varint from a byte array. 59 + * 60 + * Varints are used extensively in CID encoding and CAR framing to represent 61 + * variable-length integers in a compact form. 62 + * 63 + * @param bytes - Source byte array 64 + * @param offset - Position to start reading from 65 + * @returns A tuple of `[value, nextOffset]` 66 + */ 32 67 export function readVarint(bytes: Uint8Array, offset: number): [number, number] { 33 68 let value = 0 34 69 let shift = 0