commit 4ffb9ac76dee6ba47f94d781c49962d2e0f6cc79 · zat.dev/zat

+2

.gitignore

··· 1 + .zig-cache/ 2 + zig-out/

+18

build.zig

··· 1 + const std = @import("std"); 2 + 3 + pub fn build(b: *std.Build) void { 4 + const target = b.standardTargetOptions(.{}); 5 + const optimize = b.standardOptimizeOption(.{}); 6 + 7 + const mod = b.addModule("zat", .{ 8 + .root_source_file = b.path("src/root.zig"), 9 + .target = target, 10 + .optimize = optimize, 11 + }); 12 + 13 + const tests = b.addTest(.{ .root_module = mod }); 14 + const run_tests = b.addRunArtifact(tests); 15 + 16 + const test_step = b.step("test", "run unit tests"); 17 + test_step.dependOn(&run_tests.step); 18 + }

+11

build.zig.zon

··· 1 + .{ 2 + .name = .zat, 3 + .version = "0.0.1-alpha", 4 + .fingerprint = 0x8da9db57ee82fbe4, 5 + .minimum_zig_version = "0.15.0", 6 + .paths = .{ 7 + "build.zig", 8 + "build.zig.zon", 9 + "src", 10 + }, 11 + }

+188

docs/plan.md

··· 1 + # zat - zig atproto primitives 2 + 3 + low-level building blocks for atproto applications in zig. not a full sdk - just the pieces that everyone reimplements. 4 + 5 + ## philosophy 6 + 7 + from studying the wishlists: the pain is real, but the suggested solutions often over-engineer. we want: 8 + 9 + 1. **primitives, not frameworks** - types and parsers, not http clients or feed scaffolds 10 + 2. **layered design** - each piece usable independently 11 + 3. **zig idioms** - explicit buffers, comptime validation, no hidden allocations 12 + 4. **minimal scope** - solve the repeated pain, not every possible need 13 + 14 + ## scope 15 + 16 + ### in scope (v0.1) 17 + 18 + **tid** - timestamp identifiers 19 + - parse tid string to timestamp (microseconds) 20 + - generate tid from timestamp 21 + - extract clock id 22 + - comptime validation of format 23 + 24 + **at-uri** - `at://did:plc:xyz/collection/rkey` 25 + - parse to components (did, collection, rkey) 26 + - construct from components 27 + - validation 28 + 29 + **did** - decentralized identifiers 30 + - parse did:plc and did:web 31 + - validate format 32 + - type-safe wrapper (not just `[]const u8`) 33 + 34 + ### maybe v0.2 35 + 36 + **facets** - extract links/mentions/tags from post records 37 + - given a json value with `text` and `facets`, extract urls 38 + - byte-offset handling for utf-8 39 + 40 + **cid** - content identifiers 41 + - parse cid strings 42 + - validate format 43 + 44 + ### out of scope (for now) 45 + 46 + - lexicon codegen (too big, could be its own project) 47 + - xrpc client (std.http.Client is fine) 48 + - session management (app-specific) 49 + - jetstream client (websocket.zig exists, just wire it) 50 + - feed generator framework (each feed is unique) 51 + - did resolution (requires http, out of primitive scope) 52 + 53 + ## design 54 + 55 + ### tid.zig 56 + 57 + ```zig 58 + pub const Tid = struct { 59 + raw: [13]u8, 60 + 61 + /// parse a tid string. returns null if invalid. 62 + pub fn parse(s: []const u8) ?Tid 63 + 64 + /// timestamp in microseconds since unix epoch 65 + pub fn timestamp(self: Tid) u64 66 + 67 + /// clock identifier (lower 10 bits) 68 + pub fn clockId(self: Tid) u10 69 + 70 + /// generate tid for current time 71 + pub fn now() Tid 72 + 73 + /// generate tid for specific timestamp 74 + pub fn fromTimestamp(ts: u64, clock_id: u10) Tid 75 + 76 + /// format to string 77 + pub fn format(self: Tid, buf: *[13]u8) void 78 + }; 79 + ``` 80 + 81 + encoding: base32-sortable (chars `234567abcdefghijklmnopqrstuvwxyz`), 13 chars, first 11 encode 53-bit timestamp, last 2 encode 10-bit clock id. 82 + 83 + ### at_uri.zig 84 + 85 + ```zig 86 + pub const AtUri = struct { 87 + /// the full uri string (borrowed, not owned) 88 + raw: []const u8, 89 + 90 + /// offsets into raw for each component 91 + did_end: usize, 92 + collection_end: usize, 93 + 94 + pub fn parse(s: []const u8) ?AtUri 95 + 96 + pub fn did(self: AtUri) []const u8 97 + pub fn collection(self: AtUri) []const u8 98 + pub fn rkey(self: AtUri) []const u8 99 + 100 + /// construct a new uri. caller owns the buffer. 101 + pub fn format( 102 + buf: []u8, 103 + did: []const u8, 104 + collection: []const u8, 105 + rkey: []const u8, 106 + ) ?[]const u8 107 + }; 108 + ``` 109 + 110 + ### did.zig 111 + 112 + ```zig 113 + pub const Did = union(enum) { 114 + plc: [24]u8, // the identifier after "did:plc:" 115 + web: []const u8, // the domain after "did:web:" 116 + 117 + pub fn parse(s: []const u8) ?Did 118 + 119 + /// format to string 120 + pub fn format(self: Did, buf: []u8) ?[]const u8 121 + 122 + /// check if this is a plc did 123 + pub fn isPlc(self: Did) bool 124 + }; 125 + ``` 126 + 127 + ## structure 128 + 129 + ``` 130 + zat/ 131 + ├── build.zig 132 + ├── build.zig.zon 133 + ├── src/ 134 + │ ├── root.zig # public API (stable exports) 135 + │ ├── internal.zig # internal API (experimental) 136 + │ └── internal/ 137 + │ ├── tid.zig 138 + │ ├── at_uri.zig 139 + │ └── did.zig 140 + └── docs/ 141 + └── plan.md 142 + ``` 143 + 144 + ## internal → public promotion 145 + 146 + new features start in `internal` where we can iterate freely. when an API stabilizes: 147 + 148 + ```zig 149 + // in root.zig, uncomment to promote: 150 + pub const Tid = internal.Tid; 151 + ``` 152 + 153 + users who need bleeding-edge access can always use: 154 + 155 + ```zig 156 + const zat = @import("zat"); 157 + const tid = zat.internal.Tid.parse("..."); 158 + ``` 159 + 160 + this pattern exists indefinitely - even after 1.0, new experimental features start in internal. 161 + 162 + ## decisions 163 + 164 + ### why not typed lexicons? 165 + 166 + codegen from lexicon json is a big project on its own. the core pain (json navigation) can be partially addressed by documenting patterns, and the sdk should work regardless of how people parse json. 167 + 168 + ### why not an http client wrapper? 169 + 170 + zig 0.15's `std.http.Client` with `Io.Writer.Allocating` works well. wrapping it doesn't add much value. the real pain is around auth token refresh and rate limiting - those are better solved at the application level where retry logic is domain-specific. 171 + 172 + ### why not websocket/jetstream? 173 + 174 + websocket.zig already exists and works well. the jetstream protocol is simple json messages. a thin wrapper doesn't justify a dependency. 175 + 176 + ### borrowing vs owning 177 + 178 + for parse operations, we borrow slices into the input rather than allocating. callers who need owned data can dupe. this matches zig's explicit memory style. 179 + 180 + ## next steps 181 + 182 + 1. ~~implement tid.zig with tests~~ done 183 + 2. ~~implement at_uri.zig with tests~~ done 184 + 3. ~~implement did.zig with tests~~ done 185 + 4. ~~wire up build.zig as a module~~ done 186 + 5. try using it in find-bufo or music-atmosphere-feed to validate the api 187 + 6. iterate on internal APIs based on real usage 188 + 7. promote stable APIs to root.zig

+16

src/internal.zig

··· 1 + //! zat internal module 2 + //! 3 + //! experimental APIs that haven't stabilized yet. 4 + //! everything here is subject to change without notice. 5 + //! 6 + //! when an API stabilizes, it gets promoted to root.zig. 7 + 8 + pub const Tid = @import("internal/tid.zig").Tid; 9 + pub const AtUri = @import("internal/at_uri.zig").AtUri; 10 + pub const Did = @import("internal/did.zig").Did; 11 + 12 + test { 13 + _ = @import("internal/tid.zig"); 14 + _ = @import("internal/at_uri.zig"); 15 + _ = @import("internal/did.zig"); 16 + }

+136

src/internal/at_uri.zig

··· 1 + //! AT-URI Parser 2 + //! 3 + //! at-uris identify records in the atproto network. 4 + //! format: at://<did>/<collection>/<rkey> 5 + //! 6 + //! examples: 7 + //! - at://did:plc:xyz/app.bsky.feed.post/abc123 8 + //! - at://did:web:example.com/app.bsky.actor.profile/self 9 + 10 + const std = @import("std"); 11 + 12 + pub const AtUri = struct { 13 + /// the full uri string (borrowed, not owned) 14 + raw: []const u8, 15 + 16 + /// offset where did ends (after "at://") 17 + did_end: usize, 18 + 19 + /// offset where collection ends 20 + collection_end: usize, 21 + 22 + const prefix = "at://"; 23 + 24 + /// parse an at-uri. returns null if invalid. 25 + pub fn parse(s: []const u8) ?AtUri { 26 + if (!std.mem.startsWith(u8, s, prefix)) return null; 27 + 28 + const after_prefix = s[prefix.len..]; 29 + 30 + // find first slash (end of did) 31 + const did_end_rel = std.mem.indexOfScalar(u8, after_prefix, '/') orelse return null; 32 + if (did_end_rel == 0) return null; // empty did 33 + 34 + const after_did = after_prefix[did_end_rel + 1 ..]; 35 + 36 + // find second slash (end of collection) 37 + const collection_end_rel = std.mem.indexOfScalar(u8, after_did, '/') orelse return null; 38 + if (collection_end_rel == 0) return null; // empty collection 39 + 40 + // check rkey isn't empty 41 + const rkey_start = prefix.len + did_end_rel + 1 + collection_end_rel + 1; 42 + if (rkey_start >= s.len) return null; 43 + 44 + return .{ 45 + .raw = s, 46 + .did_end = prefix.len + did_end_rel, 47 + .collection_end = prefix.len + did_end_rel + 1 + collection_end_rel, 48 + }; 49 + } 50 + 51 + /// the did portion (e.g., "did:plc:xyz") 52 + pub fn did(self: AtUri) []const u8 { 53 + return self.raw[prefix.len..self.did_end]; 54 + } 55 + 56 + /// the collection portion (e.g., "app.bsky.feed.post") 57 + pub fn collection(self: AtUri) []const u8 { 58 + return self.raw[self.did_end + 1 .. self.collection_end]; 59 + } 60 + 61 + /// the rkey portion (e.g., "abc123") 62 + pub fn rkey(self: AtUri) []const u8 { 63 + return self.raw[self.collection_end + 1 ..]; 64 + } 65 + 66 + /// format a new at-uri into the provided buffer. 67 + /// returns the slice of the buffer used, or null if buffer too small. 68 + pub fn format( 69 + buf: []u8, 70 + did_str: []const u8, 71 + collection_str: []const u8, 72 + rkey_str: []const u8, 73 + ) ?[]const u8 { 74 + const total_len = prefix.len + did_str.len + 1 + collection_str.len + 1 + rkey_str.len; 75 + if (buf.len < total_len) return null; 76 + 77 + var pos: usize = 0; 78 + 79 + @memcpy(buf[pos..][0..prefix.len], prefix); 80 + pos += prefix.len; 81 + 82 + @memcpy(buf[pos..][0..did_str.len], did_str); 83 + pos += did_str.len; 84 + 85 + buf[pos] = '/'; 86 + pos += 1; 87 + 88 + @memcpy(buf[pos..][0..collection_str.len], collection_str); 89 + pos += collection_str.len; 90 + 91 + buf[pos] = '/'; 92 + pos += 1; 93 + 94 + @memcpy(buf[pos..][0..rkey_str.len], rkey_str); 95 + pos += rkey_str.len; 96 + 97 + return buf[0..pos]; 98 + } 99 + }; 100 + 101 + test "parse valid at-uri" { 102 + const uri = AtUri.parse("at://did:plc:xyz/app.bsky.feed.post/abc123") orelse return error.InvalidUri; 103 + try std.testing.expectEqualStrings("did:plc:xyz", uri.did()); 104 + try std.testing.expectEqualStrings("app.bsky.feed.post", uri.collection()); 105 + try std.testing.expectEqualStrings("abc123", uri.rkey()); 106 + } 107 + 108 + test "parse did:web uri" { 109 + const uri = AtUri.parse("at://did:web:example.com/app.bsky.actor.profile/self") orelse return error.InvalidUri; 110 + try std.testing.expectEqualStrings("did:web:example.com", uri.did()); 111 + try std.testing.expectEqualStrings("app.bsky.actor.profile", uri.collection()); 112 + try std.testing.expectEqualStrings("self", uri.rkey()); 113 + } 114 + 115 + test "reject invalid uris" { 116 + // missing prefix 117 + try std.testing.expect(AtUri.parse("did:plc:xyz/app.bsky.feed.post/abc") == null); 118 + 119 + // wrong prefix 120 + try std.testing.expect(AtUri.parse("http://did:plc:xyz/app.bsky.feed.post/abc") == null); 121 + 122 + // missing collection 123 + try std.testing.expect(AtUri.parse("at://did:plc:xyz") == null); 124 + 125 + // missing rkey 126 + try std.testing.expect(AtUri.parse("at://did:plc:xyz/app.bsky.feed.post") == null); 127 + 128 + // empty did 129 + try std.testing.expect(AtUri.parse("at:///app.bsky.feed.post/abc") == null); 130 + } 131 + 132 + test "format at-uri" { 133 + var buf: [256]u8 = undefined; 134 + const result = AtUri.format(&buf, "did:plc:xyz", "app.bsky.feed.post", "abc123") orelse return error.BufferTooSmall; 135 + try std.testing.expectEqualStrings("at://did:plc:xyz/app.bsky.feed.post/abc123", result); 136 + }

+117

src/internal/did.zig

··· 1 + //! DID - Decentralized Identifier 2 + //! 3 + //! dids are globally unique identifiers in the atproto network. 4 + //! supports did:plc and did:web methods. 5 + //! 6 + //! examples: 7 + //! - did:plc:z72i7hdynmk6r22z27h6tvur 8 + //! - did:web:example.com 9 + 10 + const std = @import("std"); 11 + 12 + pub const Did = struct { 13 + /// the full did string (borrowed, not owned) 14 + raw: []const u8, 15 + 16 + /// the method (plc or web) 17 + method: Method, 18 + 19 + /// offset where method-specific identifier starts 20 + id_start: usize, 21 + 22 + pub const Method = enum { 23 + plc, 24 + web, 25 + }; 26 + 27 + /// parse a did string. returns null if invalid. 28 + pub fn parse(s: []const u8) ?Did { 29 + if (!std.mem.startsWith(u8, s, "did:")) return null; 30 + 31 + const after_did = s[4..]; 32 + 33 + if (std.mem.startsWith(u8, after_did, "plc:")) { 34 + const id = after_did[4..]; 35 + if (id.len == 0) return null; 36 + // plc identifiers should be 24 base32 chars 37 + if (!isValidPlcId(id)) return null; 38 + return .{ 39 + .raw = s, 40 + .method = .plc, 41 + .id_start = 8, 42 + }; 43 + } 44 + 45 + if (std.mem.startsWith(u8, after_did, "web:")) { 46 + const domain = after_did[4..]; 47 + if (domain.len == 0) return null; 48 + return .{ 49 + .raw = s, 50 + .method = .web, 51 + .id_start = 8, 52 + }; 53 + } 54 + 55 + return null; 56 + } 57 + 58 + /// the method-specific identifier 59 + /// for plc: the 24-char base32 id 60 + /// for web: the domain 61 + pub fn identifier(self: Did) []const u8 { 62 + return self.raw[self.id_start..]; 63 + } 64 + 65 + /// check if this is a plc did 66 + pub fn isPlc(self: Did) bool { 67 + return self.method == .plc; 68 + } 69 + 70 + /// check if this is a web did 71 + pub fn isWeb(self: Did) bool { 72 + return self.method == .web; 73 + } 74 + 75 + /// get the full did string 76 + pub fn str(self: Did) []const u8 { 77 + return self.raw; 78 + } 79 + 80 + fn isValidPlcId(id: []const u8) bool { 81 + // plc ids are base32 encoded (a-z, 2-7) 82 + for (id) |c| { 83 + const valid = (c >= 'a' and c <= 'z') or (c >= '2' and c <= '7'); 84 + if (!valid) return false; 85 + } 86 + return true; 87 + } 88 + }; 89 + 90 + test "parse did:plc" { 91 + const did = Did.parse("did:plc:z72i7hdynmk6r22z27h6tvur") orelse return error.InvalidDid; 92 + try std.testing.expect(did.isPlc()); 93 + try std.testing.expect(!did.isWeb()); 94 + try std.testing.expectEqualStrings("z72i7hdynmk6r22z27h6tvur", did.identifier()); 95 + } 96 + 97 + test "parse did:web" { 98 + const did = Did.parse("did:web:example.com") orelse return error.InvalidDid; 99 + try std.testing.expect(did.isWeb()); 100 + try std.testing.expect(!did.isPlc()); 101 + try std.testing.expectEqualStrings("example.com", did.identifier()); 102 + } 103 + 104 + test "reject invalid dids" { 105 + // missing prefix 106 + try std.testing.expect(Did.parse("plc:xyz") == null); 107 + 108 + // unknown method 109 + try std.testing.expect(Did.parse("did:unknown:xyz") == null); 110 + 111 + // empty identifier 112 + try std.testing.expect(Did.parse("did:plc:") == null); 113 + try std.testing.expect(Did.parse("did:web:") == null); 114 + 115 + // invalid plc chars 116 + try std.testing.expect(Did.parse("did:plc:INVALID") == null); 117 + }

+113

src/internal/tid.zig

··· 1 + //! TID - Timestamp Identifier 2 + //! 3 + //! tids encode a timestamp and clock id in a base32-sortable format. 4 + //! format: 13 characters using alphabet "234567abcdefghijklmnopqrstuvwxyz" 5 + //! - first 11 chars: 53-bit timestamp (microseconds since epoch) 6 + //! - last 2 chars: 10-bit clock identifier 7 + //! 8 + //! the encoding is designed to be lexicographically sortable by time. 9 + 10 + const std = @import("std"); 11 + 12 + pub const Tid = struct { 13 + raw: [13]u8, 14 + 15 + const alphabet = "234567abcdefghijklmnopqrstuvwxyz"; 16 + 17 + /// parse a tid string. returns null if invalid. 18 + pub fn parse(s: []const u8) ?Tid { 19 + if (s.len != 13) return null; 20 + 21 + var result: Tid = undefined; 22 + for (s, 0..) |c, i| { 23 + if (charToValue(c) == null) return null; 24 + result.raw[i] = c; 25 + } 26 + return result; 27 + } 28 + 29 + /// timestamp in microseconds since unix epoch 30 + pub fn timestamp(self: Tid) u64 { 31 + var ts: u64 = 0; 32 + for (self.raw[0..11]) |c| { 33 + const val = charToValue(c) orelse unreachable; 34 + ts = (ts << 5) | val; 35 + } 36 + return ts; 37 + } 38 + 39 + /// clock identifier (lower 10 bits) 40 + pub fn clockId(self: Tid) u10 { 41 + var id: u10 = 0; 42 + for (self.raw[11..13]) |c| { 43 + const val: u10 = @intCast(charToValue(c) orelse unreachable); 44 + id = (id << 5) | val; 45 + } 46 + return id; 47 + } 48 + 49 + /// generate tid from timestamp and clock id 50 + pub fn fromTimestamp(ts: u64, clock_id: u10) Tid { 51 + var result: Tid = undefined; 52 + 53 + // encode timestamp (53 bits -> 11 chars) 54 + var t = ts; 55 + var i: usize = 11; 56 + while (i > 0) { 57 + i -= 1; 58 + result.raw[i] = alphabet[@intCast(t & 0x1f)]; 59 + t >>= 5; 60 + } 61 + 62 + // encode clock id (10 bits -> 2 chars) 63 + var c: u10 = clock_id; 64 + i = 13; 65 + while (i > 11) { 66 + i -= 1; 67 + result.raw[i] = alphabet[@intCast(c & 0x1f)]; 68 + c >>= 5; 69 + } 70 + 71 + return result; 72 + } 73 + 74 + /// get the raw string representation 75 + pub fn str(self: *const Tid) []const u8 { 76 + return &self.raw; 77 + } 78 + 79 + fn charToValue(c: u8) ?u5 { 80 + return switch (c) { 81 + '2'...'7' => @intCast(c - '2'), 82 + 'a'...'z' => @intCast(c - 'a' + 6), 83 + else => null, 84 + }; 85 + } 86 + }; 87 + 88 + test "parse valid tid" { 89 + // generate a valid tid and parse it back 90 + const generated = Tid.fromTimestamp(1704067200000000, 42); 91 + const tid = Tid.parse(generated.str()) orelse return error.InvalidTid; 92 + try std.testing.expectEqual(@as(u64, 1704067200000000), tid.timestamp()); 93 + try std.testing.expectEqual(@as(u10, 42), tid.clockId()); 94 + } 95 + 96 + test "reject invalid tid" { 97 + // wrong length 98 + try std.testing.expect(Tid.parse("abc") == null); 99 + try std.testing.expect(Tid.parse("") == null); 100 + 101 + // invalid chars 102 + try std.testing.expect(Tid.parse("0000000000000") == null); 103 + try std.testing.expect(Tid.parse("1111111111111") == null); 104 + } 105 + 106 + test "roundtrip" { 107 + const ts: u64 = 1704067200000000; // 2024-01-01 00:00:00 UTC in microseconds 108 + const clock: u10 = 42; 109 + 110 + const tid = Tid.fromTimestamp(ts, clock); 111 + try std.testing.expectEqual(ts, tid.timestamp()); 112 + try std.testing.expectEqual(clock, tid.clockId()); 113 + }

+42

src/root.zig

··· 1 + //! zat - zig atproto primitives (alpha) 2 + //! 3 + //! low-level building blocks for atproto applications. 4 + //! not a full sdk - just the pieces everyone reimplements. 5 + //! 6 + //! ## stability 7 + //! 8 + //! this is alpha software (0.0.1-alpha). the public API is minimal by design. 9 + //! new features start in `internal` and get promoted here when stable. 10 + //! 11 + //! ## public api 12 + //! 13 + //! currently empty - everything is still in internal while we iterate. 14 + //! 15 + //! ## internal api 16 + //! 17 + //! for bleeding-edge features, use the internal module directly: 18 + //! 19 + //! ```zig 20 + //! const zat = @import("zat"); 21 + //! 22 + //! // internal APIs - subject to change 23 + //! const tid = zat.internal.Tid.parse("...") orelse return error.InvalidTid; 24 + //! const uri = zat.internal.AtUri.parse("at://did:plc:xyz/collection/rkey") orelse return error.InvalidUri; 25 + //! const did = zat.internal.Did.parse("did:plc:xyz") orelse return error.InvalidDid; 26 + //! ``` 27 + //! 28 + //! when these stabilize, they'll be promoted to `zat.Tid`, `zat.AtUri`, etc. 29 + 30 + /// experimental and in-progress APIs. 31 + /// everything here is subject to change without notice. 32 + pub const internal = @import("internal.zig"); 33 + 34 + // --- stable public API --- 35 + // (promoted from internal when ready) 36 + // 37 + // example of promotion: 38 + // pub const Tid = internal.Tid; 39 + 40 + test { 41 + _ = internal; 42 + }