+18
build.zig
+18
build.zig
···
1
+
const std = @import("std");
2
+
3
+
pub fn build(b: *std.Build) void {
4
+
const target = b.standardTargetOptions(.{});
5
+
const optimize = b.standardOptimizeOption(.{});
6
+
7
+
const mod = b.addModule("zat", .{
8
+
.root_source_file = b.path("src/root.zig"),
9
+
.target = target,
10
+
.optimize = optimize,
11
+
});
12
+
13
+
const tests = b.addTest(.{ .root_module = mod });
14
+
const run_tests = b.addRunArtifact(tests);
15
+
16
+
const test_step = b.step("test", "run unit tests");
17
+
test_step.dependOn(&run_tests.step);
18
+
}
+11
build.zig.zon
+11
build.zig.zon
+188
docs/plan.md
+188
docs/plan.md
···
1
+
# zat - zig atproto primitives
2
+
3
+
low-level building blocks for atproto applications in zig. not a full sdk - just the pieces that everyone reimplements.
4
+
5
+
## philosophy
6
+
7
+
from studying the wishlists: the pain is real, but the suggested solutions often over-engineer. we want:
8
+
9
+
1. **primitives, not frameworks** - types and parsers, not http clients or feed scaffolds
10
+
2. **layered design** - each piece usable independently
11
+
3. **zig idioms** - explicit buffers, comptime validation, no hidden allocations
12
+
4. **minimal scope** - solve the repeated pain, not every possible need
13
+
14
+
## scope
15
+
16
+
### in scope (v0.1)
17
+
18
+
**tid** - timestamp identifiers
19
+
- parse tid string to timestamp (microseconds)
20
+
- generate tid from timestamp
21
+
- extract clock id
22
+
- comptime validation of format
23
+
24
+
**at-uri** - `at://did:plc:xyz/collection/rkey`
25
+
- parse to components (did, collection, rkey)
26
+
- construct from components
27
+
- validation
28
+
29
+
**did** - decentralized identifiers
30
+
- parse did:plc and did:web
31
+
- validate format
32
+
- type-safe wrapper (not just `[]const u8`)
33
+
34
+
### maybe v0.2
35
+
36
+
**facets** - extract links/mentions/tags from post records
37
+
- given a json value with `text` and `facets`, extract urls
38
+
- byte-offset handling for utf-8
39
+
40
+
**cid** - content identifiers
41
+
- parse cid strings
42
+
- validate format
43
+
44
+
### out of scope (for now)
45
+
46
+
- lexicon codegen (too big, could be its own project)
47
+
- xrpc client (std.http.Client is fine)
48
+
- session management (app-specific)
49
+
- jetstream client (websocket.zig exists, just wire it)
50
+
- feed generator framework (each feed is unique)
51
+
- did resolution (requires http, out of primitive scope)
52
+
53
+
## design
54
+
55
+
### tid.zig
56
+
57
+
```zig
58
+
pub const Tid = struct {
59
+
raw: [13]u8,
60
+
61
+
/// parse a tid string. returns null if invalid.
62
+
pub fn parse(s: []const u8) ?Tid
63
+
64
+
/// timestamp in microseconds since unix epoch
65
+
pub fn timestamp(self: Tid) u64
66
+
67
+
/// clock identifier (lower 10 bits)
68
+
pub fn clockId(self: Tid) u10
69
+
70
+
/// generate tid for current time
71
+
pub fn now() Tid
72
+
73
+
/// generate tid for specific timestamp
74
+
pub fn fromTimestamp(ts: u64, clock_id: u10) Tid
75
+
76
+
/// format to string
77
+
pub fn format(self: Tid, buf: *[13]u8) void
78
+
};
79
+
```
80
+
81
+
encoding: base32-sortable (chars `234567abcdefghijklmnopqrstuvwxyz`), 13 chars, first 11 encode 53-bit timestamp, last 2 encode 10-bit clock id.
82
+
83
+
### at_uri.zig
84
+
85
+
```zig
86
+
pub const AtUri = struct {
87
+
/// the full uri string (borrowed, not owned)
88
+
raw: []const u8,
89
+
90
+
/// offsets into raw for each component
91
+
did_end: usize,
92
+
collection_end: usize,
93
+
94
+
pub fn parse(s: []const u8) ?AtUri
95
+
96
+
pub fn did(self: AtUri) []const u8
97
+
pub fn collection(self: AtUri) []const u8
98
+
pub fn rkey(self: AtUri) []const u8
99
+
100
+
/// construct a new uri. caller owns the buffer.
101
+
pub fn format(
102
+
buf: []u8,
103
+
did: []const u8,
104
+
collection: []const u8,
105
+
rkey: []const u8,
106
+
) ?[]const u8
107
+
};
108
+
```
109
+
110
+
### did.zig
111
+
112
+
```zig
113
+
pub const Did = union(enum) {
114
+
plc: [24]u8, // the identifier after "did:plc:"
115
+
web: []const u8, // the domain after "did:web:"
116
+
117
+
pub fn parse(s: []const u8) ?Did
118
+
119
+
/// format to string
120
+
pub fn format(self: Did, buf: []u8) ?[]const u8
121
+
122
+
/// check if this is a plc did
123
+
pub fn isPlc(self: Did) bool
124
+
};
125
+
```
126
+
127
+
## structure
128
+
129
+
```
130
+
zat/
131
+
├── build.zig
132
+
├── build.zig.zon
133
+
├── src/
134
+
│ ├── root.zig # public API (stable exports)
135
+
│ ├── internal.zig # internal API (experimental)
136
+
│ └── internal/
137
+
│ ├── tid.zig
138
+
│ ├── at_uri.zig
139
+
│ └── did.zig
140
+
└── docs/
141
+
└── plan.md
142
+
```
143
+
144
+
## internal → public promotion
145
+
146
+
new features start in `internal` where we can iterate freely. when an API stabilizes:
147
+
148
+
```zig
149
+
// in root.zig, uncomment to promote:
150
+
pub const Tid = internal.Tid;
151
+
```
152
+
153
+
users who need bleeding-edge access can always use:
154
+
155
+
```zig
156
+
const zat = @import("zat");
157
+
const tid = zat.internal.Tid.parse("...");
158
+
```
159
+
160
+
this pattern exists indefinitely - even after 1.0, new experimental features start in internal.
161
+
162
+
## decisions
163
+
164
+
### why not typed lexicons?
165
+
166
+
codegen from lexicon json is a big project on its own. the core pain (json navigation) can be partially addressed by documenting patterns, and the sdk should work regardless of how people parse json.
167
+
168
+
### why not an http client wrapper?
169
+
170
+
zig 0.15's `std.http.Client` with `Io.Writer.Allocating` works well. wrapping it doesn't add much value. the real pain is around auth token refresh and rate limiting - those are better solved at the application level where retry logic is domain-specific.
171
+
172
+
### why not websocket/jetstream?
173
+
174
+
websocket.zig already exists and works well. the jetstream protocol is simple json messages. a thin wrapper doesn't justify a dependency.
175
+
176
+
### borrowing vs owning
177
+
178
+
for parse operations, we borrow slices into the input rather than allocating. callers who need owned data can dupe. this matches zig's explicit memory style.
179
+
180
+
## next steps
181
+
182
+
1. ~~implement tid.zig with tests~~ done
183
+
2. ~~implement at_uri.zig with tests~~ done
184
+
3. ~~implement did.zig with tests~~ done
185
+
4. ~~wire up build.zig as a module~~ done
186
+
5. try using it in find-bufo or music-atmosphere-feed to validate the api
187
+
6. iterate on internal APIs based on real usage
188
+
7. promote stable APIs to root.zig
+16
src/internal.zig
+16
src/internal.zig
···
1
+
//! zat internal module
2
+
//!
3
+
//! experimental APIs that haven't stabilized yet.
4
+
//! everything here is subject to change without notice.
5
+
//!
6
+
//! when an API stabilizes, it gets promoted to root.zig.
7
+
8
+
pub const Tid = @import("internal/tid.zig").Tid;
9
+
pub const AtUri = @import("internal/at_uri.zig").AtUri;
10
+
pub const Did = @import("internal/did.zig").Did;
11
+
12
+
test {
13
+
_ = @import("internal/tid.zig");
14
+
_ = @import("internal/at_uri.zig");
15
+
_ = @import("internal/did.zig");
16
+
}
+136
src/internal/at_uri.zig
+136
src/internal/at_uri.zig
···
1
+
//! AT-URI Parser
2
+
//!
3
+
//! at-uris identify records in the atproto network.
4
+
//! format: at://<did>/<collection>/<rkey>
5
+
//!
6
+
//! examples:
7
+
//! - at://did:plc:xyz/app.bsky.feed.post/abc123
8
+
//! - at://did:web:example.com/app.bsky.actor.profile/self
9
+
10
+
const std = @import("std");
11
+
12
+
pub const AtUri = struct {
13
+
/// the full uri string (borrowed, not owned)
14
+
raw: []const u8,
15
+
16
+
/// offset where did ends (after "at://")
17
+
did_end: usize,
18
+
19
+
/// offset where collection ends
20
+
collection_end: usize,
21
+
22
+
const prefix = "at://";
23
+
24
+
/// parse an at-uri. returns null if invalid.
25
+
pub fn parse(s: []const u8) ?AtUri {
26
+
if (!std.mem.startsWith(u8, s, prefix)) return null;
27
+
28
+
const after_prefix = s[prefix.len..];
29
+
30
+
// find first slash (end of did)
31
+
const did_end_rel = std.mem.indexOfScalar(u8, after_prefix, '/') orelse return null;
32
+
if (did_end_rel == 0) return null; // empty did
33
+
34
+
const after_did = after_prefix[did_end_rel + 1 ..];
35
+
36
+
// find second slash (end of collection)
37
+
const collection_end_rel = std.mem.indexOfScalar(u8, after_did, '/') orelse return null;
38
+
if (collection_end_rel == 0) return null; // empty collection
39
+
40
+
// check rkey isn't empty
41
+
const rkey_start = prefix.len + did_end_rel + 1 + collection_end_rel + 1;
42
+
if (rkey_start >= s.len) return null;
43
+
44
+
return .{
45
+
.raw = s,
46
+
.did_end = prefix.len + did_end_rel,
47
+
.collection_end = prefix.len + did_end_rel + 1 + collection_end_rel,
48
+
};
49
+
}
50
+
51
+
/// the did portion (e.g., "did:plc:xyz")
52
+
pub fn did(self: AtUri) []const u8 {
53
+
return self.raw[prefix.len..self.did_end];
54
+
}
55
+
56
+
/// the collection portion (e.g., "app.bsky.feed.post")
57
+
pub fn collection(self: AtUri) []const u8 {
58
+
return self.raw[self.did_end + 1 .. self.collection_end];
59
+
}
60
+
61
+
/// the rkey portion (e.g., "abc123")
62
+
pub fn rkey(self: AtUri) []const u8 {
63
+
return self.raw[self.collection_end + 1 ..];
64
+
}
65
+
66
+
/// format a new at-uri into the provided buffer.
67
+
/// returns the slice of the buffer used, or null if buffer too small.
68
+
pub fn format(
69
+
buf: []u8,
70
+
did_str: []const u8,
71
+
collection_str: []const u8,
72
+
rkey_str: []const u8,
73
+
) ?[]const u8 {
74
+
const total_len = prefix.len + did_str.len + 1 + collection_str.len + 1 + rkey_str.len;
75
+
if (buf.len < total_len) return null;
76
+
77
+
var pos: usize = 0;
78
+
79
+
@memcpy(buf[pos..][0..prefix.len], prefix);
80
+
pos += prefix.len;
81
+
82
+
@memcpy(buf[pos..][0..did_str.len], did_str);
83
+
pos += did_str.len;
84
+
85
+
buf[pos] = '/';
86
+
pos += 1;
87
+
88
+
@memcpy(buf[pos..][0..collection_str.len], collection_str);
89
+
pos += collection_str.len;
90
+
91
+
buf[pos] = '/';
92
+
pos += 1;
93
+
94
+
@memcpy(buf[pos..][0..rkey_str.len], rkey_str);
95
+
pos += rkey_str.len;
96
+
97
+
return buf[0..pos];
98
+
}
99
+
};
100
+
101
+
test "parse valid at-uri" {
102
+
const uri = AtUri.parse("at://did:plc:xyz/app.bsky.feed.post/abc123") orelse return error.InvalidUri;
103
+
try std.testing.expectEqualStrings("did:plc:xyz", uri.did());
104
+
try std.testing.expectEqualStrings("app.bsky.feed.post", uri.collection());
105
+
try std.testing.expectEqualStrings("abc123", uri.rkey());
106
+
}
107
+
108
+
test "parse did:web uri" {
109
+
const uri = AtUri.parse("at://did:web:example.com/app.bsky.actor.profile/self") orelse return error.InvalidUri;
110
+
try std.testing.expectEqualStrings("did:web:example.com", uri.did());
111
+
try std.testing.expectEqualStrings("app.bsky.actor.profile", uri.collection());
112
+
try std.testing.expectEqualStrings("self", uri.rkey());
113
+
}
114
+
115
+
test "reject invalid uris" {
116
+
// missing prefix
117
+
try std.testing.expect(AtUri.parse("did:plc:xyz/app.bsky.feed.post/abc") == null);
118
+
119
+
// wrong prefix
120
+
try std.testing.expect(AtUri.parse("http://did:plc:xyz/app.bsky.feed.post/abc") == null);
121
+
122
+
// missing collection
123
+
try std.testing.expect(AtUri.parse("at://did:plc:xyz") == null);
124
+
125
+
// missing rkey
126
+
try std.testing.expect(AtUri.parse("at://did:plc:xyz/app.bsky.feed.post") == null);
127
+
128
+
// empty did
129
+
try std.testing.expect(AtUri.parse("at:///app.bsky.feed.post/abc") == null);
130
+
}
131
+
132
+
test "format at-uri" {
133
+
var buf: [256]u8 = undefined;
134
+
const result = AtUri.format(&buf, "did:plc:xyz", "app.bsky.feed.post", "abc123") orelse return error.BufferTooSmall;
135
+
try std.testing.expectEqualStrings("at://did:plc:xyz/app.bsky.feed.post/abc123", result);
136
+
}
+117
src/internal/did.zig
+117
src/internal/did.zig
···
1
+
//! DID - Decentralized Identifier
2
+
//!
3
+
//! dids are globally unique identifiers in the atproto network.
4
+
//! supports did:plc and did:web methods.
5
+
//!
6
+
//! examples:
7
+
//! - did:plc:z72i7hdynmk6r22z27h6tvur
8
+
//! - did:web:example.com
9
+
10
+
const std = @import("std");
11
+
12
+
pub const Did = struct {
13
+
/// the full did string (borrowed, not owned)
14
+
raw: []const u8,
15
+
16
+
/// the method (plc or web)
17
+
method: Method,
18
+
19
+
/// offset where method-specific identifier starts
20
+
id_start: usize,
21
+
22
+
pub const Method = enum {
23
+
plc,
24
+
web,
25
+
};
26
+
27
+
/// parse a did string. returns null if invalid.
28
+
pub fn parse(s: []const u8) ?Did {
29
+
if (!std.mem.startsWith(u8, s, "did:")) return null;
30
+
31
+
const after_did = s[4..];
32
+
33
+
if (std.mem.startsWith(u8, after_did, "plc:")) {
34
+
const id = after_did[4..];
35
+
if (id.len == 0) return null;
36
+
// plc identifiers should be 24 base32 chars
37
+
if (!isValidPlcId(id)) return null;
38
+
return .{
39
+
.raw = s,
40
+
.method = .plc,
41
+
.id_start = 8,
42
+
};
43
+
}
44
+
45
+
if (std.mem.startsWith(u8, after_did, "web:")) {
46
+
const domain = after_did[4..];
47
+
if (domain.len == 0) return null;
48
+
return .{
49
+
.raw = s,
50
+
.method = .web,
51
+
.id_start = 8,
52
+
};
53
+
}
54
+
55
+
return null;
56
+
}
57
+
58
+
/// the method-specific identifier
59
+
/// for plc: the 24-char base32 id
60
+
/// for web: the domain
61
+
pub fn identifier(self: Did) []const u8 {
62
+
return self.raw[self.id_start..];
63
+
}
64
+
65
+
/// check if this is a plc did
66
+
pub fn isPlc(self: Did) bool {
67
+
return self.method == .plc;
68
+
}
69
+
70
+
/// check if this is a web did
71
+
pub fn isWeb(self: Did) bool {
72
+
return self.method == .web;
73
+
}
74
+
75
+
/// get the full did string
76
+
pub fn str(self: Did) []const u8 {
77
+
return self.raw;
78
+
}
79
+
80
+
fn isValidPlcId(id: []const u8) bool {
81
+
// plc ids are base32 encoded (a-z, 2-7)
82
+
for (id) |c| {
83
+
const valid = (c >= 'a' and c <= 'z') or (c >= '2' and c <= '7');
84
+
if (!valid) return false;
85
+
}
86
+
return true;
87
+
}
88
+
};
89
+
90
+
test "parse did:plc" {
91
+
const did = Did.parse("did:plc:z72i7hdynmk6r22z27h6tvur") orelse return error.InvalidDid;
92
+
try std.testing.expect(did.isPlc());
93
+
try std.testing.expect(!did.isWeb());
94
+
try std.testing.expectEqualStrings("z72i7hdynmk6r22z27h6tvur", did.identifier());
95
+
}
96
+
97
+
test "parse did:web" {
98
+
const did = Did.parse("did:web:example.com") orelse return error.InvalidDid;
99
+
try std.testing.expect(did.isWeb());
100
+
try std.testing.expect(!did.isPlc());
101
+
try std.testing.expectEqualStrings("example.com", did.identifier());
102
+
}
103
+
104
+
test "reject invalid dids" {
105
+
// missing prefix
106
+
try std.testing.expect(Did.parse("plc:xyz") == null);
107
+
108
+
// unknown method
109
+
try std.testing.expect(Did.parse("did:unknown:xyz") == null);
110
+
111
+
// empty identifier
112
+
try std.testing.expect(Did.parse("did:plc:") == null);
113
+
try std.testing.expect(Did.parse("did:web:") == null);
114
+
115
+
// invalid plc chars
116
+
try std.testing.expect(Did.parse("did:plc:INVALID") == null);
117
+
}
+113
src/internal/tid.zig
+113
src/internal/tid.zig
···
1
+
//! TID - Timestamp Identifier
2
+
//!
3
+
//! tids encode a timestamp and clock id in a base32-sortable format.
4
+
//! format: 13 characters using alphabet "234567abcdefghijklmnopqrstuvwxyz"
5
+
//! - first 11 chars: 53-bit timestamp (microseconds since epoch)
6
+
//! - last 2 chars: 10-bit clock identifier
7
+
//!
8
+
//! the encoding is designed to be lexicographically sortable by time.
9
+
10
+
const std = @import("std");
11
+
12
+
pub const Tid = struct {
13
+
raw: [13]u8,
14
+
15
+
const alphabet = "234567abcdefghijklmnopqrstuvwxyz";
16
+
17
+
/// parse a tid string. returns null if invalid.
18
+
pub fn parse(s: []const u8) ?Tid {
19
+
if (s.len != 13) return null;
20
+
21
+
var result: Tid = undefined;
22
+
for (s, 0..) |c, i| {
23
+
if (charToValue(c) == null) return null;
24
+
result.raw[i] = c;
25
+
}
26
+
return result;
27
+
}
28
+
29
+
/// timestamp in microseconds since unix epoch
30
+
pub fn timestamp(self: Tid) u64 {
31
+
var ts: u64 = 0;
32
+
for (self.raw[0..11]) |c| {
33
+
const val = charToValue(c) orelse unreachable;
34
+
ts = (ts << 5) | val;
35
+
}
36
+
return ts;
37
+
}
38
+
39
+
/// clock identifier (lower 10 bits)
40
+
pub fn clockId(self: Tid) u10 {
41
+
var id: u10 = 0;
42
+
for (self.raw[11..13]) |c| {
43
+
const val: u10 = @intCast(charToValue(c) orelse unreachable);
44
+
id = (id << 5) | val;
45
+
}
46
+
return id;
47
+
}
48
+
49
+
/// generate tid from timestamp and clock id
50
+
pub fn fromTimestamp(ts: u64, clock_id: u10) Tid {
51
+
var result: Tid = undefined;
52
+
53
+
// encode timestamp (53 bits -> 11 chars)
54
+
var t = ts;
55
+
var i: usize = 11;
56
+
while (i > 0) {
57
+
i -= 1;
58
+
result.raw[i] = alphabet[@intCast(t & 0x1f)];
59
+
t >>= 5;
60
+
}
61
+
62
+
// encode clock id (10 bits -> 2 chars)
63
+
var c: u10 = clock_id;
64
+
i = 13;
65
+
while (i > 11) {
66
+
i -= 1;
67
+
result.raw[i] = alphabet[@intCast(c & 0x1f)];
68
+
c >>= 5;
69
+
}
70
+
71
+
return result;
72
+
}
73
+
74
+
/// get the raw string representation
75
+
pub fn str(self: *const Tid) []const u8 {
76
+
return &self.raw;
77
+
}
78
+
79
+
fn charToValue(c: u8) ?u5 {
80
+
return switch (c) {
81
+
'2'...'7' => @intCast(c - '2'),
82
+
'a'...'z' => @intCast(c - 'a' + 6),
83
+
else => null,
84
+
};
85
+
}
86
+
};
87
+
88
+
test "parse valid tid" {
89
+
// generate a valid tid and parse it back
90
+
const generated = Tid.fromTimestamp(1704067200000000, 42);
91
+
const tid = Tid.parse(generated.str()) orelse return error.InvalidTid;
92
+
try std.testing.expectEqual(@as(u64, 1704067200000000), tid.timestamp());
93
+
try std.testing.expectEqual(@as(u10, 42), tid.clockId());
94
+
}
95
+
96
+
test "reject invalid tid" {
97
+
// wrong length
98
+
try std.testing.expect(Tid.parse("abc") == null);
99
+
try std.testing.expect(Tid.parse("") == null);
100
+
101
+
// invalid chars
102
+
try std.testing.expect(Tid.parse("0000000000000") == null);
103
+
try std.testing.expect(Tid.parse("1111111111111") == null);
104
+
}
105
+
106
+
test "roundtrip" {
107
+
const ts: u64 = 1704067200000000; // 2024-01-01 00:00:00 UTC in microseconds
108
+
const clock: u10 = 42;
109
+
110
+
const tid = Tid.fromTimestamp(ts, clock);
111
+
try std.testing.expectEqual(ts, tid.timestamp());
112
+
try std.testing.expectEqual(clock, tid.clockId());
113
+
}
+42
src/root.zig
+42
src/root.zig
···
1
+
//! zat - zig atproto primitives (alpha)
2
+
//!
3
+
//! low-level building blocks for atproto applications.
4
+
//! not a full sdk - just the pieces everyone reimplements.
5
+
//!
6
+
//! ## stability
7
+
//!
8
+
//! this is alpha software (0.0.1-alpha). the public API is minimal by design.
9
+
//! new features start in `internal` and get promoted here when stable.
10
+
//!
11
+
//! ## public api
12
+
//!
13
+
//! currently empty - everything is still in internal while we iterate.
14
+
//!
15
+
//! ## internal api
16
+
//!
17
+
//! for bleeding-edge features, use the internal module directly:
18
+
//!
19
+
//! ```zig
20
+
//! const zat = @import("zat");
21
+
//!
22
+
//! // internal APIs - subject to change
23
+
//! const tid = zat.internal.Tid.parse("...") orelse return error.InvalidTid;
24
+
//! const uri = zat.internal.AtUri.parse("at://did:plc:xyz/collection/rkey") orelse return error.InvalidUri;
25
+
//! const did = zat.internal.Did.parse("did:plc:xyz") orelse return error.InvalidDid;
26
+
//! ```
27
+
//!
28
+
//! when these stabilize, they'll be promoted to `zat.Tid`, `zat.AtUri`, etc.
29
+
30
+
/// experimental and in-progress APIs.
31
+
/// everything here is subject to change without notice.
32
+
pub const internal = @import("internal.zig");
33
+
34
+
// --- stable public API ---
35
+
// (promoted from internal when ready)
36
+
//
37
+
// example of promotion:
38
+
// pub const Tid = internal.Tid;
39
+
40
+
test {
41
+
_ = internal;
42
+
}