+4
.beads/issues.jsonl
+4
.beads/issues.jsonl
···
31
31
{"id":"atproto-60","title":"Implement effects-based I/O abstraction","description":"Implement the effects-based I/O abstraction layer that makes all libraries runtime-agnostic.","design":"## Module Structure\n\n```ocaml\n(* atproto-effects/lib/effects.ml *)\n\n(* HTTP effects *)\ntype http_request = {\n method_: [ `GET | `POST | `PUT | `DELETE ];\n uri: Uri.t;\n headers: (string * string) list;\n body: string option;\n}\n\ntype http_response = {\n status: int;\n headers: (string * string) list;\n body: string;\n}\n\ntype _ Effect.t +=\n | Http_request : http_request -\u003e http_response Effect.t\n\n(* DNS effects *)\ntype _ Effect.t +=\n | Dns_txt : string -\u003e string list Effect.t\n | Dns_a : string -\u003e string list Effect.t\n\n(* Time effects *)\ntype _ Effect.t +=\n | Now : Ptime.t Effect.t\n | Sleep : float -\u003e unit Effect.t\n\n(* Random effects *)\ntype _ Effect.t +=\n | Random_bytes : int -\u003e bytes Effect.t\n\n(* atproto-effects-eio/lib/handler.ml *)\nval run : (unit -\u003e 'a) -\u003e 'a\n```\n\n## Handler Example (eio)\n\n```ocaml\nlet run f =\n Effect.Deep.match_ f ()\n {\n retc = Fun.id;\n exnc = raise;\n effc = fun (type a) (e : a Effect.t) -\u003e\n match e with\n | Http_request req -\u003e\n Some (fun (k : (a, _) continuation) -\u003e\n let resp = Eio_client.request req in\n continue k resp)\n | Dns_txt domain -\u003e\n Some (fun k -\u003e\n let records = Eio_dns.txt domain in\n continue k records)\n | _ -\u003e None\n }\n```\n\n## Dependencies\n- eio (for testing handler)","acceptance_criteria":"- Effect types for HTTP, DNS, time, random\n- eio-based handler for testing\n- Handler composition utilities\n- Performance benchmarks","status":"closed","priority":1,"issue_type":"feature","assignee":"claude","created_at":"2025-12-28T00:12:29.021401617+01:00","updated_at":"2025-12-28T11:57:08.264086142+01:00","closed_at":"2025-12-28T11:57:08.264086142+01:00","labels":["effects","infrastructure"],"dependencies":[{"issue_id":"atproto-60","depends_on_id":"atproto-1","type":"parent-child","created_at":"2025-12-28T00:12:55.467983208+01:00","created_by":"daemon"}]}
32
32
{"id":"atproto-61","title":"Set up interoperability test suite","description":"Set up and run the AT Protocol interoperability tests from bluesky-social/atproto-interop-tests.","design":"## Test Structure\n\n```\ntest/\n├── interop/\n│ ├── syntax_test.ml # Handle, DID, NSID, TID, etc.\n│ ├── crypto_test.ml # Signatures, did:key\n│ ├── data_model_test.ml # DAG-CBOR, CID\n│ ├── mst_test.ml # Key heights, tree structure\n│ ├── lexicon_test.ml # Schema and record validation\n│ └── firehose_test.ml # Commit proofs\n├── fixtures/ # Cloned from atproto-interop-tests\n└── dune\n```\n\n## Test Approach\n\n1. Clone test vectors from GitHub\n2. Parse JSON fixtures using jsont\n3. Parse text fixtures line by line\n4. Run each test case\n5. Compare output to expected values\n\n## Example Test\n\n```ocaml\nlet load_json_fixtures path =\n let json = Jsont.of_file path in\n Jsont.decode (Jsont.list fixture_jsont) json\n\nlet%test \"handle_syntax_valid\" =\n let fixtures = load_lines \"fixtures/syntax/handle_syntax_valid.txt\" in\n List.for_all (fun line -\u003e\n match Handle.of_string line with\n | Ok _ -\u003e true\n | Error _ -\u003e false\n ) fixtures\n\nlet%test \"handle_syntax_invalid\" =\n let fixtures = load_lines \"fixtures/syntax/handle_syntax_invalid.txt\" in\n List.for_all (fun line -\u003e\n match Handle.of_string line with\n | Ok _ -\u003e false\n | Error _ -\u003e true\n ) fixtures\n\nlet%test \"crypto_signature_fixtures\" =\n let fixtures = load_json_fixtures \"fixtures/crypto/signature-fixtures.json\" in\n List.for_all (fun fixture -\u003e\n let message = Base64.decode fixture.message_base64 in\n let signature = Base64.decode fixture.signature_base64 in\n let key = Did_key.of_string fixture.public_key_did in\n let result = Crypto.verify key message signature in\n result = fixture.valid_signature\n ) fixtures\n```\n\n## Dependencies\n- alcotest or ounit2\n- jsont","acceptance_criteria":"- All syntax interop tests pass\n- All crypto interop tests pass\n- All data-model interop tests pass\n- All MST interop tests pass\n- All lexicon interop tests pass\n- All firehose interop tests pass","status":"closed","priority":1,"issue_type":"task","created_at":"2025-12-28T00:12:40.553908313+01:00","updated_at":"2025-12-28T13:25:34.614867702+01:00","closed_at":"2025-12-28T13:25:34.614867702+01:00","labels":["conformance","testing"],"dependencies":[{"issue_id":"atproto-61","depends_on_id":"atproto-1","type":"parent-child","created_at":"2025-12-28T00:12:56.180809368+01:00","created_by":"daemon"}]}
33
33
{"id":"atproto-62","title":"Set up monorepo package structure","description":"Set up the monorepo structure for multiple opam packages within a single repository.","design":"## Repository Structure\n\n```\natproto/\n├── dune-project # Root with all packages\n├── packages/\n│ ├── atproto-syntax/\n│ │ ├── lib/\n│ │ │ ├── dune\n│ │ │ └── *.ml\n│ │ ├── test/\n│ │ │ ├── dune\n│ │ │ └── *_test.ml\n│ │ └── atproto-syntax.opam\n│ ├── atproto-crypto/\n│ ├── atproto-multibase/\n│ ├── atproto-ipld/\n│ ├── atproto-mst/\n│ ├── atproto-repo/\n│ ├── atproto-identity/\n│ ├── atproto-xrpc/\n│ ├── atproto-sync/\n│ ├── atproto-lexicon/\n│ ├── atproto-lexicon-gen/\n│ ├── atproto-api/\n│ └── atproto-effects/\n├── examples/\n│ ├── simple_client/\n│ └── firehose_consumer/\n└── interop-tests/\n```\n\n## dune-project\n\n```lisp\n(lang dune 3.20)\n(name atproto)\n(generate_opam_files true)\n\n(package\n (name atproto-syntax)\n (synopsis \"AT Protocol identifier syntax parsing\")\n (depends\n (ocaml (\u003e= 5.4))\n re\n ptime))\n\n(package\n (name atproto-crypto)\n ...)\n```\n\n## CI (.github/workflows/ci.yml)\n\n- OCaml 5.4 matrix\n- Build all packages\n- Run all tests\n- Run interop tests","acceptance_criteria":"- Multi-package dune-project structure\n- Separate opam files per package\n- CI pipeline for building and testing\n- Documentation generation setup","status":"closed","priority":1,"issue_type":"task","created_at":"2025-12-28T00:12:50.547102438+01:00","updated_at":"2025-12-28T11:57:18.856810633+01:00","closed_at":"2025-12-28T11:57:18.856810633+01:00","labels":["infrastructure","setup"],"dependencies":[{"issue_id":"atproto-62","depends_on_id":"atproto-1","type":"parent-child","created_at":"2025-12-28T00:12:57.015938611+01:00","created_by":"daemon"}]}
34
+
{"id":"atproto-cir","title":"Implement DAG-JSON codec","description":"Create lib/ipld/dag_json.ml, encode Links as {\"/\": \"\u003ccid-string\u003e\"}, encode Bytes as {\"/\": {\"bytes\": \"\u003cbase64\u003e\"}}","status":"closed","priority":2,"issue_type":"feature","created_at":"2025-12-28T15:47:30.578398468+01:00","updated_at":"2025-12-28T16:06:13.475583417+01:00","closed_at":"2025-12-28T16:06:13.475583417+01:00","labels":["compliance","ipld"]}
34
35
{"id":"atproto-h09","title":"Add package documentation","description":"Add documentation for each of the 11 AT Protocol packages including:\n- Module-level documentation with examples\n- README.md for the root project\n- CONTRIBUTING.md guide\n- API documentation generation with odoc","status":"closed","priority":2,"issue_type":"task","assignee":"claude","created_at":"2025-12-28T13:34:10.559554696+01:00","updated_at":"2025-12-28T13:50:20.509417248+01:00","closed_at":"2025-12-28T13:50:20.509417248+01:00","labels":["documentation"],"dependencies":[{"issue_id":"atproto-h09","depends_on_id":"atproto-1","type":"parent-child","created_at":"2025-12-28T13:34:16.081103184+01:00","created_by":"daemon"}]}
36
+
{"id":"atproto-kc7","title":"Verify/enforce CBOR strictness","description":"Check that CBOR library produces shortest-form encoding, add explicit rejection of indefinite-length items during decode","status":"closed","priority":2,"issue_type":"feature","created_at":"2025-12-28T15:47:28.665226526+01:00","updated_at":"2025-12-28T16:02:55.549763384+01:00","closed_at":"2025-12-28T16:02:55.549763384+01:00","labels":["compliance","ipld"]}
35
37
{"id":"atproto-pg8","title":"Add MST example_keys.txt fixture tests","description":"Add tests using the example_keys.txt fixture file which contains 156 structured MST keys.\n\nTests should:\n1. Load all 156 keys from the fixture\n2. Build an MST containing all keys\n3. Verify all keys are retrievable\n4. Verify iteration order matches sorted key order\n5. Optionally verify tree structure properties","acceptance_criteria":"- example_keys.txt is loaded and all 156 keys are used\n- MST is built with all keys\n- All keys are retrievable after insertion\n- Iteration produces keys in sorted order","status":"closed","priority":2,"issue_type":"task","created_at":"2025-12-28T12:12:19.180139823+01:00","updated_at":"2025-12-28T12:43:14.192342391+01:00","closed_at":"2025-12-28T12:43:14.192342391+01:00","labels":["conformance","mst","testing"]}
36
38
{"id":"atproto-q0h","title":"Add firehose commit-proof-fixtures.json tests","description":"Add tests for the commit-proof-fixtures.json file which contains 6 test cases for MST proof verification:\n\n1. two deep split\n2. two deep leafless split\n3. add on edge with neighbor two layers down\n4. merge and split in multi-op commit\n5. complex multi-op commit\n6. split with earlier leaves on same layer\n\nEach fixture includes:\n- keys (existing keys in MST)\n- adds (keys to add)\n- dels (keys to delete)\n- rootBeforeCommit / rootAfterCommit (expected CIDs)\n- blocksInProof (CIDs of blocks needed for proof)\n\nThis tests the commit proof verification needed for firehose sync.","acceptance_criteria":"- All 6 commit-proof fixtures are tested\n- MST operations (add/delete) produce correct root CIDs\n- Proof blocks are correctly identified\n- Tests verify rootBeforeCommit and rootAfterCommit match","status":"closed","priority":1,"issue_type":"task","created_at":"2025-12-28T12:12:34.999268893+01:00","updated_at":"2025-12-28T12:58:39.408679225+01:00","closed_at":"2025-12-28T12:58:39.408679225+01:00","labels":["conformance","firehose","testing"]}
39
+
{"id":"atproto-s19","title":"Add CIDv0 parsing support","description":"Detect base58btc strings starting with \"Qm\", decode as multihash (sha2-256), implicit dag-pb codec","status":"closed","priority":2,"issue_type":"feature","created_at":"2025-12-28T15:47:26.962896421+01:00","updated_at":"2025-12-28T15:58:38.151319563+01:00","closed_at":"2025-12-28T15:58:38.151319563+01:00","labels":["compliance","ipld"]}
37
40
{"id":"atproto-udz","title":"Add missing data-model conformance tests","description":"Add tests for data-model fixtures that are not currently covered:\n\n1. **data-model-valid.json** (5 entries) - Valid AT Protocol data model examples:\n - trivial record\n - float but integer-like (123.0)\n - empty list and object\n - list of nullable\n - list of lists\n\n2. **data-model-invalid.json** (12 entries) - Invalid examples that must be rejected:\n - top-level not an object\n - non-integer float\n - record with $type null/wrong type/empty\n - blob with string size/missing key\n - bytes with wrong field type/extra fields\n - link with wrong field type/bogus CID/extra fields","acceptance_criteria":"- test_data_model_valid() tests all 5 valid entries\n- test_data_model_invalid() tests all 12 invalid entries\n- Valid entries encode/decode correctly\n- Invalid entries are rejected with appropriate errors","status":"closed","priority":1,"issue_type":"task","created_at":"2025-12-28T12:12:14.579573063+01:00","updated_at":"2025-12-28T12:42:16.291981859+01:00","closed_at":"2025-12-28T12:42:16.291981859+01:00","labels":["conformance","ipld","testing"]}
38
41
{"id":"atproto-w5i","title":"Create example applications","description":"Create example applications demonstrating the AT Protocol libraries:\n1. Simple client - authenticate and make posts\n2. Firehose consumer - subscribe to real-time events\n3. Bot example - automated posting/interactions","notes":"Added firehose_demo example showing how to use the firehose module with OCaml 5 effects. Additional examples (client, bot) can be added in future iterations.","status":"closed","priority":2,"issue_type":"task","created_at":"2025-12-28T13:34:11.928213055+01:00","updated_at":"2025-12-28T13:36:39.963890666+01:00","closed_at":"2025-12-28T13:36:39.963890666+01:00","labels":["documentation","examples"],"dependencies":[{"issue_id":"atproto-w5i","depends_on_id":"atproto-1","type":"parent-child","created_at":"2025-12-28T13:34:17.10878+01:00","created_by":"daemon"}]}
42
+
{"id":"atproto-xfg","title":"Add Float support to DAG-CBOR","description":"Add Float of float variant to value type, encode as 64-bit double (CBOR major type 7, additional info 27), reject NaN and Infinity values, keep AT Protocol mode that rejects all floats","status":"closed","priority":1,"issue_type":"feature","created_at":"2025-12-28T15:47:24.836568239+01:00","updated_at":"2025-12-28T15:53:03.229484165+01:00","closed_at":"2025-12-28T15:53:03.229484165+01:00","labels":["compliance","ipld"]}
+1
-1
COMPLIANCE.md
+1
-1
COMPLIANCE.md
+1
-1
compliance-report.html
+1
-1
compliance-report.html
···
129
129
</table>
130
130
</div>
131
131
<div class="meta">
132
-
<p>Generated: 2025-12-28T13:56:33Z</p>
132
+
<p>Generated: 2025-12-28T15:06:07Z</p>
133
133
<p>Repository: <a href="https://github.com/gdiazlo/atproto">https://github.com/gdiazlo/atproto</a></p>
134
134
<p>Test fixtures from <a href="https://github.com/bluesky-social/atproto-interop-tests">AT Protocol Interoperability Tests</a></p>
135
135
</div>
+1
-1
compliance-report.json
+1
-1
compliance-report.json
+2
lib/ipld/atproto_ipld.ml
+2
lib/ipld/atproto_ipld.ml
···
8
8
9
9
- {!module:Cid}: Content Identifiers (CIDv0 and CIDv1)
10
10
- {!module:Dag_cbor}: DAG-CBOR encoding with AT Protocol sorting rules
11
+
- {!module:Dag_json}: DAG-JSON encoding for human-readable IPLD
11
12
- {!module:Car}: CAR (Content-Addressable aRchive) file handling
12
13
- {!module:Blob}: Blob reference handling for media
13
14
···
47
48
48
49
module Cid = Cid
49
50
module Dag_cbor = Dag_cbor
51
+
module Dag_json = Dag_json
50
52
module Car = Car
51
53
module Blob = Blob
+143
-31
lib/ipld/cid.ml
+143
-31
lib/ipld/cid.ml
···
1
-
(** CID (Content Identifier) for AT Protocol.
1
+
(** CID (Content Identifier) for IPLD and AT Protocol.
2
2
3
-
CIDs are self-describing content-addressed identifiers used throughout AT
4
-
Protocol for referencing data blocks.
3
+
CIDs are self-describing content-addressed identifiers used throughout IPLD
4
+
and AT Protocol for referencing data blocks.
5
+
6
+
This module supports both CIDv0 and CIDv1:
5
7
6
-
AT Protocol blessed CID format:
7
-
- Version: CIDv1 only
8
+
CIDv0 (legacy IPFS format):
9
+
- Base58btc encoded, starting with "Qm"
10
+
- Implicit dag-pb codec (0x70)
11
+
- SHA-256 hash only
12
+
- 46 characters when encoded
13
+
14
+
CIDv1 (AT Protocol blessed format):
15
+
- Version: CIDv1
8
16
- Hash: SHA-256 (multicodec 0x12), 256 bits
9
17
- Codec: dag-cbor (0x71) for records, raw (0x55) for blobs
10
18
- String encoding: base32lower (multibase prefix 'b') *)
···
15
23
| `Invalid_hash_length
16
24
| `Invalid_multibase
17
25
| `Invalid_cid_format
18
-
| `Cid_too_short ]
26
+
| `Cid_too_short
27
+
| `Invalid_cidv0 ]
19
28
20
29
let pp_error fmt = function
21
-
| `Invalid_cid_version -> Format.fprintf fmt "invalid CID version (must be 1)"
30
+
| `Invalid_cid_version ->
31
+
Format.fprintf fmt "invalid CID version (must be 0 or 1)"
22
32
| `Invalid_hash_algorithm ->
23
33
Format.fprintf fmt "invalid hash algorithm (must be SHA-256)"
24
34
| `Invalid_hash_length -> Format.fprintf fmt "invalid hash length"
25
35
| `Invalid_multibase -> Format.fprintf fmt "invalid multibase encoding"
26
36
| `Invalid_cid_format -> Format.fprintf fmt "invalid CID format"
27
37
| `Cid_too_short -> Format.fprintf fmt "CID too short"
38
+
| `Invalid_cidv0 -> Format.fprintf fmt "invalid CIDv0 format"
28
39
29
40
let error_to_string e = Format.asprintf "%a" pp_error e
30
41
···
59
70
| Other n -> n
60
71
end
61
72
62
-
type t = { codec : codec; hash : string (** 32-byte SHA-256 hash *) }
63
-
(** CID type - stores the codec and hash *)
73
+
(** CID version *)
74
+
type version = V0 | V1
75
+
76
+
type t = {
77
+
version : version;
78
+
codec : codec;
79
+
hash : string; (** 32-byte SHA-256 hash *)
80
+
}
81
+
(** CID type - stores the version, codec and hash *)
64
82
65
83
(** Encode an unsigned varint to bytes *)
66
84
let encode_varint n =
···
97
115
(** Create a CID from content bytes by hashing with SHA-256 *)
98
116
let create ~codec (content : string) : t =
99
117
let hash = Digestif.SHA256.(to_raw_string (digest_string content)) in
100
-
{ codec; hash }
118
+
{ version = V1; codec; hash }
101
119
102
120
(** Create a CID for DAG-CBOR content *)
103
121
let of_dag_cbor content = create ~codec:DagCbor content
···
108
126
(** Create a CID from a pre-computed hash *)
109
127
let of_hash ~codec hash : (t, error) result =
110
128
if String.length hash <> 32 then Error `Invalid_hash_length
111
-
else Ok { codec; hash }
129
+
else Ok { version = V1; codec; hash }
130
+
131
+
(** Get the version *)
132
+
let cid_version t = t.version
112
133
113
134
(** Get the codec *)
114
135
let codec t = t.codec
···
130
151
in
131
152
if c <> 0 then c else String.compare t1.hash t2.hash
132
153
133
-
(** Encode CID to binary format (for CBOR tag 42) *)
154
+
(** Encode CID to binary format (for CBOR tag 42). CIDv0 is encoded as just the
155
+
multihash (for compatibility). CIDv1 is encoded as version + codec +
156
+
multihash. *)
134
157
let to_bytes t =
135
-
let version = encode_varint 1 in
136
-
let codec = encode_varint (Multicodec.int_of_codec t.codec) in
137
-
let hash_info = encode_varint Multicodec.sha256 in
138
-
let hash_len = encode_varint 32 in
139
-
version ^ codec ^ hash_info ^ hash_len ^ t.hash
158
+
match t.version with
159
+
| V0 ->
160
+
(* CIDv0: just the multihash (sha256 indicator + length + hash) *)
161
+
let hash_info = encode_varint Multicodec.sha256 in
162
+
let hash_len = encode_varint 32 in
163
+
hash_info ^ hash_len ^ t.hash
164
+
| V1 ->
165
+
let version = encode_varint 1 in
166
+
let codec = encode_varint (Multicodec.int_of_codec t.codec) in
167
+
let hash_info = encode_varint Multicodec.sha256 in
168
+
let hash_len = encode_varint 32 in
169
+
version ^ codec ^ hash_info ^ hash_len ^ t.hash
140
170
141
-
(** Encode CID to string (base32lower with 'b' prefix) *)
171
+
(** Encode CID to string. CIDv0: base58btc (starts with "Qm") CIDv1: base32lower
172
+
with 'b' prefix *)
142
173
let to_string t =
143
-
let bytes = to_bytes t in
144
-
"b" ^ Atproto_multibase.Base32lower.encode (Bytes.of_string bytes)
174
+
match t.version with
175
+
| V0 ->
176
+
(* CIDv0 uses base58btc without multibase prefix *)
177
+
Atproto_multibase.Base58btc.encode (Bytes.of_string (to_bytes t))
178
+
| V1 ->
179
+
let bytes = to_bytes t in
180
+
"b" ^ Atproto_multibase.Base32lower.encode (Bytes.of_string bytes)
145
181
146
-
(** Parse CID from binary bytes *)
147
-
let of_bytes s : (t, error) result =
182
+
(** Parse CIDv0 from binary bytes (just multihash). CIDv0 is: hash_codec
183
+
(varint) + hash_len (varint) + hash (bytes) *)
184
+
let of_bytes_v0 s : (t, error) result =
185
+
let len = String.length s in
186
+
if len < 34 then Error `Cid_too_short
187
+
else
188
+
(* First byte should be 0x12 (sha256) *)
189
+
match decode_varint s 0 with
190
+
| Error e -> Error e
191
+
| Ok (hash_codec, hclen) -> (
192
+
if hash_codec <> Multicodec.sha256 then Error `Invalid_hash_algorithm
193
+
else
194
+
match decode_varint s hclen with
195
+
| Error e -> Error e
196
+
| Ok (hash_len, hllen) ->
197
+
if hash_len <> 32 then Error `Invalid_hash_length
198
+
else
199
+
let hash_start = hclen + hllen in
200
+
if len < hash_start + 32 then Error `Cid_too_short
201
+
else
202
+
let hash = String.sub s hash_start 32 in
203
+
(* CIDv0 implicitly uses dag-pb codec *)
204
+
Ok { version = V0; codec = DagPb; hash })
205
+
206
+
(** Parse CIDv1 from binary bytes *)
207
+
let of_bytes_v1 s : (t, error) result =
148
208
let len = String.length s in
149
209
if len < 4 then Error `Cid_too_short
150
210
else
···
178
238
if len < hash_start + 32 then Error `Cid_too_short
179
239
else
180
240
let hash = String.sub s hash_start 32 in
181
-
Ok { codec; hash })))
241
+
Ok { version = V1; codec; hash })))
242
+
243
+
(** Parse CID from binary bytes. Distinguishes CIDv0 from CIDv1 by first byte:
244
+
- 0x12 (sha256): CIDv0 (just multihash)
245
+
- 0x01: CIDv1 *)
246
+
let of_bytes s : (t, error) result =
247
+
let len = String.length s in
248
+
if len < 2 then Error `Cid_too_short
249
+
else
250
+
let first_byte = Char.code s.[0] in
251
+
if first_byte = 0x12 then
252
+
(* CIDv0: starts with sha256 multicodec *)
253
+
of_bytes_v0 s
254
+
else if first_byte = 0x01 then
255
+
(* CIDv1: starts with version 1 *)
256
+
of_bytes_v1 s
257
+
else Error `Invalid_cid_format
182
258
183
259
(** Decode hex string to bytes *)
184
260
let decode_hex s =
···
248
324
Some buf
249
325
with Exit -> None
250
326
251
-
(** Parse CID from string (multibase encoded) *)
327
+
(** Check if a string looks like a CIDv0 (base58btc encoded, starts with "Qm")
328
+
*)
329
+
let is_cidv0_string s =
330
+
let len = String.length s in
331
+
(* CIDv0 is exactly 46 characters and starts with "Qm" *)
332
+
len = 46 && len >= 2 && s.[0] = 'Q' && s.[1] = 'm'
333
+
334
+
(** Parse CID from string (multibase encoded for CIDv1, base58btc for CIDv0) *)
252
335
let of_string s : (t, error) result =
253
336
let len = String.length s in
254
337
if len < 2 then Error `Cid_too_short
338
+
else if is_cidv0_string s then
339
+
(* CIDv0: base58btc without multibase prefix *)
340
+
match Atproto_multibase.Base58btc.decode s with
341
+
| Ok bytes -> of_bytes_v0 (Bytes.to_string bytes)
342
+
| Error _ -> Error `Invalid_cidv0
255
343
else
256
-
(* Check multibase prefix *)
344
+
(* CIDv1: multibase prefix + encoded data *)
257
345
let prefix = s.[0] in
258
346
let encoded = String.sub s 1 (len - 1) in
259
347
(* AT Protocol uses base32lower ('b') but we also accept others *)
···
272
360
| Ok bytes -> Some bytes
273
361
| Error _ -> None)
274
362
| 'z' -> (
275
-
(* Base58btc *)
363
+
(* Base58btc with multibase prefix *)
276
364
match Atproto_multibase.Base58btc.decode encoded with
277
365
| Ok bytes -> Some bytes
278
366
| Error _ -> None)
···
304
392
(** Validate a CID string without fully parsing *)
305
393
let is_valid s = match of_string s with Ok _ -> true | Error _ -> false
306
394
307
-
(** Regex-based CID syntax validation (matches Go implementation). This is used
308
-
for Lexicon validation - it checks the string format without fully decoding
309
-
and validating the CID contents.
395
+
(** Regex-based CID syntax validation for AT Protocol (matches Go
396
+
implementation). This is used for Lexicon validation - it checks the string
397
+
format without fully decoding and validating the CID contents.
310
398
311
-
Rules:
399
+
Rules for CIDv1:
312
400
- Length between 8-256 characters
313
401
- Only alphanumeric characters plus '+' and '='
314
-
- Not a CIDv0 (starting with "Qmb") *)
402
+
- Not a legacy CIDv0 (starting with "Qmb")
403
+
404
+
Note: AT Protocol Lexicon validation intentionally excludes CIDv0. Use
405
+
is_valid_cidv0_syntax for validating CIDv0 strings. *)
315
406
let is_valid_syntax s =
316
407
let len = String.length s in
317
408
if len < 8 || len > 256 then false
···
331
422
if valid then check_chars (i + 1) else false
332
423
in
333
424
check_chars 0
425
+
426
+
(** Validate CIDv0 string syntax. CIDv0 is exactly 46 characters and starts with
427
+
"Qm". Uses base58btc alphabet (no 0, O, I, l). *)
428
+
let is_valid_cidv0_syntax s =
429
+
let len = String.length s in
430
+
if len <> 46 then false
431
+
else if not (len >= 2 && s.[0] = 'Q' && s.[1] = 'm') then false
432
+
else
433
+
(* Check all characters are valid base58btc *)
434
+
let base58_alphabet =
435
+
"123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz"
436
+
in
437
+
let rec check_chars i =
438
+
if i >= len then true
439
+
else if String.contains base58_alphabet s.[i] then check_chars (i + 1)
440
+
else false
441
+
in
442
+
check_chars 0
443
+
444
+
(** Validate any CID string syntax (CIDv0 or CIDv1) *)
445
+
let is_valid_any_syntax s = is_valid_syntax s || is_valid_cidv0_syntax s
+223
-46
lib/ipld/dag_cbor.ml
+223
-46
lib/ipld/dag_cbor.ml
···
1
-
(** DAG-CBOR codec for AT Protocol.
1
+
(** DAG-CBOR codec for IPLD and AT Protocol.
2
2
3
3
DAG-CBOR is a deterministic subset of CBOR used for content-addressed data.
4
-
This module provides encoding and decoding with AT Protocol specific rules:
4
+
This module provides encoding and decoding with two modes:
5
+
6
+
- IPLD mode (default): Full IPLD Data Model support including floats
7
+
- AT Protocol mode: Stricter rules where floats are forbidden
8
+
9
+
Common rules for both modes:
5
10
- Map keys are sorted by length first, then lexicographically
6
-
- Floats are not allowed (except integers represented as floats in JSON)
7
11
- Integers must be in JavaScript safe range (-2^53+1 to 2^53-1)
8
-
- CIDs are encoded with CBOR tag 42 *)
12
+
- CIDs are encoded with CBOR tag 42
13
+
14
+
IPLD-specific rules:
15
+
- Floats are encoded as 64-bit IEEE 754 doubles
16
+
- NaN, Infinity, and -Infinity are forbidden
17
+
18
+
AT Protocol-specific rules:
19
+
- Floats are not allowed (except integers represented as floats in JSON) *)
20
+
21
+
(** Decoding mode for DAG-CBOR *)
22
+
type decode_mode =
23
+
| Ipld (** Full IPLD spec: allows floats (no NaN/Inf) *)
24
+
| Atproto (** AT Protocol spec: no floats allowed *)
9
25
10
26
type error =
11
27
[ `Float_not_allowed
28
+
| `Float_nan_not_allowed
29
+
| `Float_infinity_not_allowed
12
30
| `Integer_out_of_range
13
31
| `Invalid_cid
14
32
| `Invalid_tag
15
33
| `Invalid_bytes
34
+
| `Indefinite_length
16
35
| `Decode_error of string ]
17
36
18
37
let pp_error fmt = function
19
38
| `Float_not_allowed ->
20
-
Format.fprintf fmt "floats are not allowed in DAG-CBOR"
39
+
Format.fprintf fmt "floats are not allowed in AT Protocol DAG-CBOR"
40
+
| `Float_nan_not_allowed ->
41
+
Format.fprintf fmt "NaN is not allowed in DAG-CBOR"
42
+
| `Float_infinity_not_allowed ->
43
+
Format.fprintf fmt "Infinity is not allowed in DAG-CBOR"
21
44
| `Integer_out_of_range ->
22
45
Format.fprintf fmt "integer out of JavaScript safe range"
23
46
| `Invalid_cid -> Format.fprintf fmt "invalid CID in tag 42"
24
47
| `Invalid_tag -> Format.fprintf fmt "unsupported CBOR tag"
25
48
| `Invalid_bytes -> Format.fprintf fmt "invalid bytes encoding"
49
+
| `Indefinite_length ->
50
+
Format.fprintf fmt "indefinite-length encoding not allowed in DAG-CBOR"
26
51
| `Decode_error msg -> Format.fprintf fmt "decode error: %s" msg
27
52
28
53
let error_to_string e = Format.asprintf "%a" pp_error e
29
54
30
-
(** AT Protocol data model value type *)
55
+
(** IPLD Data Model value type.
56
+
57
+
The 9 IPLD kinds: Null, Boolean, Integer, Float, String, Bytes, List, Map,
58
+
Link. Note: In AT Protocol mode, Float is forbidden. *)
31
59
type value =
32
60
| Null
33
61
| Bool of bool
34
62
| Int of int64 (** JavaScript safe integer range *)
63
+
| Float of float (** IEEE 754 double; NaN/Infinity forbidden *)
35
64
| String of string
36
65
| Bytes of string (** Raw bytes *)
37
66
| Array of value list
38
67
| Map of (string * value) list (** Keys are sorted *)
39
68
| Link of Cid.t (** CID link *)
69
+
70
+
(** Check if a float is NaN *)
71
+
let is_nan f = Float.is_nan f
72
+
73
+
(** Check if a float is infinite (positive or negative) *)
74
+
let is_infinite f =
75
+
match classify_float f with FP_infinite -> true | _ -> false
76
+
77
+
(** Check if a float is valid for DAG-CBOR (no NaN or Infinity) *)
78
+
let is_valid_float f = not (is_nan f || is_infinite f)
40
79
41
80
(** JavaScript safe integer range *)
42
81
let js_safe_min = -9007199254740991L (* -(2^53 - 1) *)
···
52
91
(** Sort map keys according to DAG-CBOR rules *)
53
92
let sort_map pairs = List.sort (fun (k1, _) (k2, _) -> compare_keys k1 k2) pairs
54
93
55
-
(** Encode a value to DAG-CBOR bytes *)
94
+
(** Check CBOR bytes for DAG-CBOR strictness violations.
95
+
96
+
DAG-CBOR forbids:
97
+
- Indefinite-length encoding (additional info 31 for types 0-5)
98
+
- Break stop code (0xFF)
99
+
100
+
Returns Ok () if valid, Error `Indefinite_length if invalid. *)
101
+
let check_strictness (s : string) : (unit, error) result =
102
+
let len = String.length s in
103
+
let rec check pos =
104
+
if pos >= len then Ok ()
105
+
else
106
+
let byte = Char.code s.[pos] in
107
+
let major_type = byte lsr 5 in
108
+
let additional_info = byte land 0x1F in
109
+
110
+
(* Check for indefinite-length encoding or break code *)
111
+
if byte = 0xFF then Error `Indefinite_length (* Break stop code *)
112
+
else if additional_info = 31 && major_type <= 5 then
113
+
Error `Indefinite_length (* Indefinite-length indicator *)
114
+
else
115
+
(* Calculate the size of this item to skip to next *)
116
+
let header_size, data_size =
117
+
if additional_info < 24 then (1, 0)
118
+
else if additional_info = 24 then (2, 0)
119
+
else if additional_info = 25 then (3, 0)
120
+
else if additional_info = 26 then (5, 0)
121
+
else if additional_info = 27 then (9, 0)
122
+
else (1, 0)
123
+
(* reserved/invalid, will be caught by decoder *)
124
+
in
125
+
let value_size =
126
+
match major_type with
127
+
| 2 | 3 ->
128
+
(* bytes or text *)
129
+
if additional_info < 24 then additional_info
130
+
else if additional_info = 24 && pos + 1 < len then
131
+
Char.code s.[pos + 1]
132
+
else if additional_info = 25 && pos + 2 < len then
133
+
(Char.code s.[pos + 1] lsl 8) lor Char.code s.[pos + 2]
134
+
else if additional_info = 26 && pos + 4 < len then
135
+
(Char.code s.[pos + 1] lsl 24)
136
+
lor (Char.code s.[pos + 2] lsl 16)
137
+
lor (Char.code s.[pos + 3] lsl 8)
138
+
lor Char.code s.[pos + 4]
139
+
else 0 (* will be handled by decoder *)
140
+
| _ -> 0
141
+
in
142
+
let next_pos = pos + header_size + data_size + value_size in
143
+
(* For complex types (arrays, maps, tags), we need to recurse into contents *)
144
+
(* For now, we do a simple linear scan which catches the common cases *)
145
+
check next_pos
146
+
in
147
+
check 0
148
+
149
+
(** Encode a value to DAG-CBOR bytes.
150
+
151
+
Floats are encoded as 64-bit IEEE 754 doubles. NaN and Infinity values will
152
+
raise Invalid_argument. *)
56
153
let encode (v : value) : string =
57
154
let rec to_cbor = function
58
155
| Null -> `Null
59
156
| Bool b -> `Bool b
60
157
| Int i -> `Int (Int64.to_int i) (* CBOR library uses int *)
158
+
| Float f ->
159
+
if not (is_valid_float f) then
160
+
invalid_arg "DAG-CBOR: NaN and Infinity are not allowed"
161
+
else `Float f
61
162
| String s -> `Text s
62
163
| Bytes b -> `Bytes b
63
164
| Array arr -> `Array (List.map to_cbor arr)
···
71
172
in
72
173
CBOR.Simple.encode (to_cbor v)
73
174
74
-
(** Decode DAG-CBOR bytes to a value *)
75
-
let decode (s : string) : (value, error) result =
175
+
(** Decode DAG-CBOR bytes to a value.
176
+
177
+
@param mode
178
+
Decoding mode: [Ipld] allows floats, [Atproto] forbids them. Defaults to
179
+
[Atproto] for backward compatibility.
180
+
@param strict
181
+
If true (default), check for DAG-CBOR strictness violations like
182
+
indefinite-length encoding before decoding. *)
183
+
let decode ?(mode = Atproto) ?(strict = true) (s : string) :
184
+
(value, error) result =
76
185
let rec from_cbor = function
77
186
| `Null -> Ok Null
78
187
| `Undefined -> Ok Null (* Treat undefined as null *)
···
82
191
if i64 < js_safe_min || i64 > js_safe_max then
83
192
Error `Integer_out_of_range
84
193
else Ok (Int i64)
85
-
| `Float f ->
86
-
(* Check if it's actually an integer *)
87
-
if Float.is_integer f then
88
-
let i = Int64.of_float f in
89
-
if i < js_safe_min || i > js_safe_max then Error `Integer_out_of_range
90
-
else Ok (Int i)
91
-
else Error `Float_not_allowed
194
+
| `Float f -> (
195
+
match mode with
196
+
| Atproto ->
197
+
(* AT Protocol: only accept floats that are integers *)
198
+
if Float.is_integer f then
199
+
let i = Int64.of_float f in
200
+
if i < js_safe_min || i > js_safe_max then
201
+
Error `Integer_out_of_range
202
+
else Ok (Int i)
203
+
else Error `Float_not_allowed
204
+
| Ipld ->
205
+
(* IPLD: accept floats but reject NaN/Infinity *)
206
+
if is_nan f then Error `Float_nan_not_allowed
207
+
else if is_infinite f then Error `Float_infinity_not_allowed
208
+
else if Float.is_integer f then
209
+
(* Convert integer-valued floats to Int for consistency *)
210
+
let i = Int64.of_float f in
211
+
if i >= js_safe_min && i <= js_safe_max then Ok (Int i)
212
+
else Ok (Float f)
213
+
else Ok (Float f))
92
214
| `Text s -> Ok (String s)
93
215
| `Bytes b -> Ok (Bytes b)
94
216
| `Array arr ->
···
125
247
| `Tag (_, _) -> Error `Invalid_tag
126
248
| `Simple _ -> Error (`Decode_error "simple values not supported")
127
249
in
128
-
try
129
-
let cbor = CBOR.Simple.decode s in
130
-
from_cbor cbor
131
-
with CBOR.Error msg -> Error (`Decode_error msg)
250
+
(* First check strictness if requested *)
251
+
match if strict then check_strictness s else Ok () with
252
+
| Error e -> Error e
253
+
| Ok () -> (
254
+
try
255
+
let cbor = CBOR.Simple.decode s in
256
+
from_cbor cbor
257
+
with CBOR.Error msg -> Error (`Decode_error msg))
132
258
133
259
(** Decode DAG-CBOR bytes, returning the value and any remaining bytes. Useful
134
-
for decoding concatenated CBOR values (like firehose frames). *)
135
-
let decode_partial (s : string) : (value * string, error) result =
260
+
for decoding concatenated CBOR values (like firehose frames).
261
+
262
+
@param mode
263
+
Decoding mode: [Ipld] allows floats, [Atproto] forbids them. Defaults to
264
+
[Atproto] for backward compatibility.
265
+
@param strict
266
+
If true (default), check for DAG-CBOR strictness violations like
267
+
indefinite-length encoding before decoding. Note: strictness check is
268
+
performed on the entire input, not just the first value. *)
269
+
let decode_partial ?(mode = Atproto) ?(strict = true) (s : string) :
270
+
(value * string, error) result =
136
271
let rec from_cbor = function
137
272
| `Null -> Ok Null
138
273
| `Undefined -> Ok Null
···
142
277
if i64 < js_safe_min || i64 > js_safe_max then
143
278
Error `Integer_out_of_range
144
279
else Ok (Int i64)
145
-
| `Float f ->
146
-
if Float.is_integer f then
147
-
let i = Int64.of_float f in
148
-
if i < js_safe_min || i > js_safe_max then Error `Integer_out_of_range
149
-
else Ok (Int i)
150
-
else Error `Float_not_allowed
280
+
| `Float f -> (
281
+
match mode with
282
+
| Atproto ->
283
+
if Float.is_integer f then
284
+
let i = Int64.of_float f in
285
+
if i < js_safe_min || i > js_safe_max then
286
+
Error `Integer_out_of_range
287
+
else Ok (Int i)
288
+
else Error `Float_not_allowed
289
+
| Ipld ->
290
+
if is_nan f then Error `Float_nan_not_allowed
291
+
else if is_infinite f then Error `Float_infinity_not_allowed
292
+
else if Float.is_integer f then
293
+
let i = Int64.of_float f in
294
+
if i >= js_safe_min && i <= js_safe_max then Ok (Int i)
295
+
else Ok (Float f)
296
+
else Ok (Float f))
151
297
| `Text s -> Ok (String s)
152
298
| `Bytes b -> Ok (Bytes b)
153
299
| `Array arr ->
···
182
328
| `Tag (_, _) -> Error `Invalid_tag
183
329
| `Simple _ -> Error (`Decode_error "simple values not supported")
184
330
in
185
-
try
186
-
let cbor, rest = CBOR.Simple.decode_partial s in
187
-
match from_cbor cbor with Ok v -> Ok (v, rest) | Error e -> Error e
188
-
with CBOR.Error msg -> Error (`Decode_error msg)
331
+
(* First check strictness if requested *)
332
+
match if strict then check_strictness s else Ok () with
333
+
| Error e -> Error e
334
+
| Ok () -> (
335
+
try
336
+
let cbor, rest = CBOR.Simple.decode_partial s in
337
+
match from_cbor cbor with Ok v -> Ok (v, rest) | Error e -> Error e
338
+
with CBOR.Error msg -> Error (`Decode_error msg))
189
339
190
-
(** Check if a value is valid according to AT Protocol rules *)
340
+
(** Check if a value is valid according to IPLD DAG-CBOR rules. Note: This
341
+
checks IPLD validity (floats allowed but no NaN/Inf). For AT Protocol
342
+
validity, also ensure no Float values exist. *)
191
343
let rec is_valid = function
192
344
| Null | Bool _ | String _ | Bytes _ | Link _ -> true
193
345
| Int i -> i >= js_safe_min && i <= js_safe_max
346
+
| Float f -> is_valid_float f
194
347
| Array arr -> List.for_all is_valid arr
195
348
| Map pairs -> List.for_all (fun (_, v) -> is_valid v) pairs
196
349
350
+
(** Check if a value is valid for AT Protocol (no floats allowed) *)
351
+
let rec is_atproto_valid = function
352
+
| Null | Bool _ | String _ | Bytes _ | Link _ -> true
353
+
| Int i -> i >= js_safe_min && i <= js_safe_max
354
+
| Float _ -> false (* AT Protocol forbids floats *)
355
+
| Array arr -> List.for_all is_atproto_valid arr
356
+
| Map pairs -> List.for_all (fun (_, v) -> is_atproto_valid v) pairs
357
+
197
358
(** Equality *)
198
359
let rec equal v1 v2 =
199
360
match (v1, v2) with
200
361
| Null, Null -> true
201
362
| Bool b1, Bool b2 -> b1 = b2
202
363
| Int i1, Int i2 -> i1 = i2
364
+
| Float f1, Float f2 -> f1 = f2
203
365
| String s1, String s2 -> s1 = s2
204
366
| Bytes b1, Bytes b2 -> b1 = b2
205
367
| Array a1, Array a2 ->
···
238
400
| `List of json list
239
401
| `Assoc of (string * json) list ]
240
402
241
-
(** Convert a DAG-CBOR value to AT Protocol JSON representation.
403
+
(** Convert a DAG-CBOR value to JSON representation.
242
404
- Links become {"$link": "cid-string"}
243
-
- Bytes become {"$bytes": "base64-string"} *)
405
+
- Bytes become {"$bytes": "base64-string"}
406
+
- Floats are represented as JSON floats *)
244
407
let rec to_json (v : value) : json =
245
408
match v with
246
409
| Null -> `Null
247
410
| Bool b -> `Bool b
248
411
| Int i -> `Int (Int64.to_int i)
412
+
| Float f -> `Float f
249
413
| String s -> `String s
250
414
| Bytes b -> `Assoc [ ("$bytes", `String (base64_encode b)) ]
251
415
| Array arr -> `List (List.map to_json arr)
252
416
| Map pairs -> `Assoc (List.map (fun (k, v) -> (k, to_json v)) pairs)
253
417
| Link cid -> `Assoc [ ("$link", `String (Cid.to_string cid)) ]
254
418
255
-
(** Convert AT Protocol JSON to DAG-CBOR value.
419
+
(** Convert JSON to DAG-CBOR value.
256
420
- {"$link": "..."} becomes a Link
257
421
- {"$bytes": "..."} becomes Bytes
258
-
- Floats that are integers are converted to Int *)
259
-
let rec of_json (j : json) : (value, error) result =
422
+
423
+
@param mode Decoding mode: [Ipld] allows floats, [Atproto] forbids them.
424
+
Defaults to [Atproto] for backward compatibility. *)
425
+
let rec of_json ?(mode = Atproto) (j : json) : (value, error) result =
260
426
match j with
261
427
| `Null -> Ok Null
262
428
| `Bool b -> Ok (Bool b)
···
264
430
let i64 = Int64.of_int i in
265
431
if i64 < js_safe_min || i64 > js_safe_max then Error `Integer_out_of_range
266
432
else Ok (Int i64)
267
-
| `Float f ->
268
-
if Float.is_integer f then
269
-
let i = Int64.of_float f in
270
-
if i < js_safe_min || i > js_safe_max then Error `Integer_out_of_range
271
-
else Ok (Int i)
272
-
else Error `Float_not_allowed
433
+
| `Float f -> (
434
+
match mode with
435
+
| Atproto ->
436
+
if Float.is_integer f then
437
+
let i = Int64.of_float f in
438
+
if i < js_safe_min || i > js_safe_max then
439
+
Error `Integer_out_of_range
440
+
else Ok (Int i)
441
+
else Error `Float_not_allowed
442
+
| Ipld ->
443
+
if is_nan f then Error `Float_nan_not_allowed
444
+
else if is_infinite f then Error `Float_infinity_not_allowed
445
+
else if Float.is_integer f then
446
+
let i = Int64.of_float f in
447
+
if i >= js_safe_min && i <= js_safe_max then Ok (Int i)
448
+
else Ok (Float f)
449
+
else Ok (Float f))
273
450
| `String s -> Ok (String s)
274
451
| `List arr ->
275
452
let rec convert_list acc = function
276
453
| [] -> Ok (Array (List.rev acc))
277
454
| x :: xs -> (
278
-
match of_json x with
455
+
match of_json ~mode x with
279
456
| Ok v -> convert_list (v :: acc) xs
280
457
| Error e -> Error e)
281
458
in
···
296
473
let rec convert_pairs acc = function
297
474
| [] -> Ok (Map (sort_map (List.rev acc)))
298
475
| (k, v) :: rest -> (
299
-
match of_json v with
476
+
match of_json ~mode v with
300
477
| Ok value -> convert_pairs ((k, value) :: acc) rest
301
478
| Error e -> Error e)
302
479
in
+215
lib/ipld/dag_json.ml
+215
lib/ipld/dag_json.ml
···
1
+
(** DAG-JSON codec for IPLD.
2
+
3
+
DAG-JSON is a JSON serialization format for IPLD data that uses special
4
+
object structures to represent Links and Bytes:
5
+
6
+
- Links: {"/": "<cid-string>"}
7
+
- Bytes: {"/": {"bytes": "<base64>"}}
8
+
9
+
This format is used for human-readable IPLD data exchange and debugging.
10
+
11
+
@see <https://ipld.io/specs/codecs/dag-json/spec/> DAG-JSON specification *)
12
+
13
+
type error =
14
+
[ `Invalid_cid of string
15
+
| `Invalid_bytes of string
16
+
| `Invalid_structure of string
17
+
| `Unsupported_value of string ]
18
+
(** Error types for DAG-JSON operations *)
19
+
20
+
(** Pretty-print errors *)
21
+
let pp_error fmt = function
22
+
| `Invalid_cid msg -> Format.fprintf fmt "invalid CID: %s" msg
23
+
| `Invalid_bytes msg -> Format.fprintf fmt "invalid bytes: %s" msg
24
+
| `Invalid_structure msg -> Format.fprintf fmt "invalid structure: %s" msg
25
+
| `Unsupported_value msg -> Format.fprintf fmt "unsupported value: %s" msg
26
+
27
+
let error_to_string e = Format.asprintf "%a" pp_error e
28
+
29
+
type json =
30
+
[ `Null
31
+
| `Bool of bool
32
+
| `Int of int
33
+
| `Float of float
34
+
| `String of string
35
+
| `List of json list
36
+
| `Assoc of (string * json) list ]
37
+
(** JSON type compatible with common JSON libraries *)
38
+
39
+
(** Encode bytes to base64 (RFC 4648, no padding) *)
40
+
let base64_encode bytes = Base64.encode_exn ~pad:false bytes
41
+
42
+
(** Decode base64 to bytes (handles missing padding) *)
43
+
let base64_decode s =
44
+
match Base64.decode ~pad:false s with
45
+
| Ok decoded -> Some decoded
46
+
| Error _ -> None
47
+
48
+
(** Encode an IPLD value to DAG-JSON.
49
+
50
+
DAG-JSON encoding rules:
51
+
- Links become {"/": "<cid-string>"}
52
+
- Bytes become {"/": {"bytes": "<base64>"}}
53
+
- All other values encode as standard JSON *)
54
+
let rec encode (v : Dag_cbor.value) : json =
55
+
match v with
56
+
| Dag_cbor.Null -> `Null
57
+
| Dag_cbor.Bool b -> `Bool b
58
+
| Dag_cbor.Int i ->
59
+
(* Check if it fits in int range *)
60
+
let i_int = Int64.to_int i in
61
+
if Int64.of_int i_int = i then `Int i_int else `Float (Int64.to_float i)
62
+
| Dag_cbor.Float f -> `Float f
63
+
| Dag_cbor.String s -> `String s
64
+
| Dag_cbor.Bytes b ->
65
+
(* Bytes: {"/": {"bytes": "<base64>"}} *)
66
+
`Assoc [ ("/", `Assoc [ ("bytes", `String (base64_encode b)) ]) ]
67
+
| Dag_cbor.Array arr -> `List (List.map encode arr)
68
+
| Dag_cbor.Map pairs -> `Assoc (List.map (fun (k, v) -> (k, encode v)) pairs)
69
+
| Dag_cbor.Link cid ->
70
+
(* Link: {"/": "<cid-string>"} *)
71
+
`Assoc [ ("/", `String (Cid.to_string cid)) ]
72
+
73
+
(** Check if a JSON object is a DAG-JSON link ({"/": "<cid-string>"}) *)
74
+
let is_link_object = function `Assoc [ ("/", `String _) ] -> true | _ -> false
75
+
76
+
(** Check if a JSON object is a DAG-JSON bytes ({"/": {"bytes": "<base64>"}}) *)
77
+
let is_bytes_object = function
78
+
| `Assoc [ ("/", `Assoc [ ("bytes", `String _) ]) ] -> true
79
+
| _ -> false
80
+
81
+
(** Decode DAG-JSON to an IPLD value.
82
+
83
+
@param mode
84
+
Decoding mode: [Ipld] allows floats, [Atproto] forbids non-integer floats.
85
+
Defaults to [Atproto] for AT Protocol compatibility. *)
86
+
let rec decode ?(mode = Dag_cbor.Atproto) (j : json) :
87
+
(Dag_cbor.value, error) result =
88
+
match j with
89
+
| `Null -> Ok Dag_cbor.Null
90
+
| `Bool b -> Ok (Dag_cbor.Bool b)
91
+
| `Int i -> Ok (Dag_cbor.Int (Int64.of_int i))
92
+
| `Float f -> (
93
+
match mode with
94
+
| Dag_cbor.Atproto ->
95
+
if Float.is_integer f then
96
+
let i = Int64.of_float f in
97
+
Ok (Dag_cbor.Int i)
98
+
else
99
+
Error
100
+
(`Unsupported_value
101
+
"non-integer floats not allowed in AT Protocol")
102
+
| Dag_cbor.Ipld ->
103
+
if Float.is_nan f then
104
+
Error (`Unsupported_value "NaN not allowed in DAG-JSON")
105
+
else if Float.is_integer f then
106
+
(* Convert integer-valued floats to Int *)
107
+
let i = Int64.of_float f in
108
+
if i >= Dag_cbor.js_safe_min && i <= Dag_cbor.js_safe_max then
109
+
Ok (Dag_cbor.Int i)
110
+
else Ok (Dag_cbor.Float f)
111
+
else Ok (Dag_cbor.Float f))
112
+
| `String s -> Ok (Dag_cbor.String s)
113
+
| `List arr ->
114
+
let rec decode_list acc = function
115
+
| [] -> Ok (Dag_cbor.Array (List.rev acc))
116
+
| x :: xs -> (
117
+
match decode ~mode x with
118
+
| Ok v -> decode_list (v :: acc) xs
119
+
| Error e -> Error e)
120
+
in
121
+
decode_list [] arr
122
+
| `Assoc pairs -> decode_object ~mode pairs
123
+
124
+
and decode_object ~mode pairs =
125
+
(* Check for special DAG-JSON structures *)
126
+
match pairs with
127
+
| [ ("/", `String cid_str) ] -> (
128
+
(* Link: {"/": "<cid-string>"} *)
129
+
match Cid.of_string cid_str with
130
+
| Ok cid -> Ok (Dag_cbor.Link cid)
131
+
| Error _ -> Error (`Invalid_cid cid_str))
132
+
| [ ("/", `Assoc [ ("bytes", `String b64) ]) ] -> (
133
+
(* Bytes: {"/": {"bytes": "<base64>"}} *)
134
+
match base64_decode b64 with
135
+
| Some bytes -> Ok (Dag_cbor.Bytes bytes)
136
+
| None -> Error (`Invalid_bytes ("invalid base64: " ^ b64)))
137
+
| [ ("/", _) ] ->
138
+
(* Invalid special object structure *)
139
+
Error (`Invalid_structure "invalid DAG-JSON special object")
140
+
| _ ->
141
+
(* Regular object - check for "/" key which would be ambiguous *)
142
+
if List.mem_assoc "/" pairs then
143
+
Error
144
+
(`Invalid_structure
145
+
"objects cannot have '/' key (reserved for DAG-JSON)")
146
+
else
147
+
let rec decode_pairs acc = function
148
+
| [] -> Ok (Dag_cbor.Map (Dag_cbor.sort_map (List.rev acc)))
149
+
| (k, v) :: rest -> (
150
+
match decode ~mode v with
151
+
| Ok value -> decode_pairs ((k, value) :: acc) rest
152
+
| Error e -> Error e)
153
+
in
154
+
decode_pairs [] pairs
155
+
156
+
(** Encode an IPLD value to a DAG-JSON string *)
157
+
let encode_string (v : Dag_cbor.value) : string =
158
+
let json = encode v in
159
+
(* Convert to Yojson for string serialization *)
160
+
let rec to_yojson : json -> Yojson.Basic.t = function
161
+
| `Null -> `Null
162
+
| `Bool b -> `Bool b
163
+
| `Int i -> `Int i
164
+
| `Float f -> `Float f
165
+
| `String s -> `String s
166
+
| `List l -> `List (List.map to_yojson l)
167
+
| `Assoc pairs -> `Assoc (List.map (fun (k, v) -> (k, to_yojson v)) pairs)
168
+
in
169
+
Yojson.Basic.to_string (to_yojson json)
170
+
171
+
(** Encode an IPLD value to a pretty-printed DAG-JSON string *)
172
+
let encode_string_pretty (v : Dag_cbor.value) : string =
173
+
let json = encode v in
174
+
let rec to_yojson : json -> Yojson.Basic.t = function
175
+
| `Null -> `Null
176
+
| `Bool b -> `Bool b
177
+
| `Int i -> `Int i
178
+
| `Float f -> `Float f
179
+
| `String s -> `String s
180
+
| `List l -> `List (List.map to_yojson l)
181
+
| `Assoc pairs -> `Assoc (List.map (fun (k, v) -> (k, to_yojson v)) pairs)
182
+
in
183
+
Yojson.Basic.pretty_to_string (to_yojson json)
184
+
185
+
(** Decode a DAG-JSON string to an IPLD value *)
186
+
let decode_string ?(mode = Dag_cbor.Atproto) (s : string) :
187
+
(Dag_cbor.value, error) result =
188
+
try
189
+
let yojson = Yojson.Basic.from_string s in
190
+
let rec of_yojson : Yojson.Basic.t -> json = function
191
+
| `Null -> `Null
192
+
| `Bool b -> `Bool b
193
+
| `Int i -> `Int i
194
+
| `Float f -> `Float f
195
+
| `String s -> `String s
196
+
| `List l -> `List (List.map of_yojson l)
197
+
| `Assoc pairs -> `Assoc (List.map (fun (k, v) -> (k, of_yojson v)) pairs)
198
+
in
199
+
decode ~mode (of_yojson yojson)
200
+
with Yojson.Json_error msg ->
201
+
Error (`Invalid_structure ("JSON parse error: " ^ msg))
202
+
203
+
(** Convert DAG-JSON to DAG-CBOR bytes *)
204
+
let to_dag_cbor ?(mode = Dag_cbor.Atproto) (j : json) : (string, error) result =
205
+
match decode ~mode j with
206
+
| Ok value -> Ok (Dag_cbor.encode value)
207
+
| Error e -> Error e
208
+
209
+
(** Convert DAG-CBOR bytes to DAG-JSON. Returns a combined error type that
210
+
includes both DAG-JSON and DAG-CBOR errors. *)
211
+
let of_dag_cbor ?(mode = Dag_cbor.Atproto) (bytes : string) :
212
+
(json, [ error | `Cbor_error of Dag_cbor.error ]) result =
213
+
match Dag_cbor.decode ~mode bytes with
214
+
| Ok value -> Ok (encode value)
215
+
| Error e -> Error (`Cbor_error e)
+637
test/ipld/test_ipld.ml
+637
test/ipld/test_ipld.ml
···
139
139
Alcotest.(check string)
140
140
"same string" (Cid.to_string cid1) (Cid.to_string cid2)
141
141
142
+
(* === CIDv0 tests === *)
143
+
144
+
let test_cidv0_parse () =
145
+
(* Known CIDv0 from IPFS - "hello world" *)
146
+
let cidv0_str = "QmWATWQ7fVPP2EFGu71UkfnqhYXDYH566qy47CnJDgvs8u" in
147
+
match Cid.of_string cidv0_str with
148
+
| Ok cid ->
149
+
(* CIDv0 should have version V0 *)
150
+
Alcotest.(check bool) "is V0" true (Cid.cid_version cid = Cid.V0);
151
+
(* CIDv0 implicitly uses dag-pb codec *)
152
+
Alcotest.(check bool) "codec is DagPb" true (Cid.codec cid = Cid.DagPb);
153
+
(* Hash should be 32 bytes *)
154
+
Alcotest.(check int) "hash length" 32 (String.length (Cid.hash cid))
155
+
| Error e ->
156
+
Alcotest.fail
157
+
(Printf.sprintf "CIDv0 parse failed: %s" (Cid.error_to_string e))
158
+
159
+
let test_cidv0_roundtrip () =
160
+
(* Parse a CIDv0 and convert back to string *)
161
+
let cidv0_str = "QmWATWQ7fVPP2EFGu71UkfnqhYXDYH566qy47CnJDgvs8u" in
162
+
match Cid.of_string cidv0_str with
163
+
| Ok cid ->
164
+
let encoded = Cid.to_string cid in
165
+
(* CIDv0 roundtrip should produce same base58btc string *)
166
+
Alcotest.(check string) "roundtrip" cidv0_str encoded
167
+
| Error e ->
168
+
Alcotest.fail
169
+
(Printf.sprintf "CIDv0 parse failed: %s" (Cid.error_to_string e))
170
+
171
+
let test_cidv0_binary_roundtrip () =
172
+
(* Parse CIDv0, encode to bytes, decode, verify equality *)
173
+
let cidv0_str = "QmWATWQ7fVPP2EFGu71UkfnqhYXDYH566qy47CnJDgvs8u" in
174
+
match Cid.of_string cidv0_str with
175
+
| Ok cid -> (
176
+
let bytes = Cid.to_bytes cid in
177
+
match Cid.of_bytes bytes with
178
+
| Ok cid2 ->
179
+
Alcotest.(check bool) "binary roundtrip" true (Cid.equal cid cid2)
180
+
| Error e ->
181
+
Alcotest.fail
182
+
(Printf.sprintf "binary roundtrip failed: %s"
183
+
(Cid.error_to_string e)))
184
+
| Error e ->
185
+
Alcotest.fail
186
+
(Printf.sprintf "CIDv0 parse failed: %s" (Cid.error_to_string e))
187
+
188
+
let test_cidv0_syntax () =
189
+
(* Test CIDv0 syntax validation *)
190
+
let valid_v0 = "QmWATWQ7fVPP2EFGu71UkfnqhYXDYH566qy47CnJDgvs8u" in
191
+
Alcotest.(check bool)
192
+
"valid CIDv0 syntax" true
193
+
(Cid.is_valid_cidv0_syntax valid_v0);
194
+
195
+
(* AT Protocol syntax validation uses a heuristic (rejects "Qmb" prefix) *)
196
+
(* CIDv0s not starting with "Qmb" may pass syntax validation but should
197
+
still fail full parsing as they're not CIDv1 *)
198
+
let qmb_v0 = "QmbWqxBEKC3P8tqsKc98xmWNzrzDtRLMiMPL8wBuTGsMnR" in
199
+
Alcotest.(check bool)
200
+
"AT Protocol rejects Qmb prefix" false
201
+
(Cid.is_valid_syntax qmb_v0);
202
+
203
+
(* is_valid_any_syntax should accept both CIDv0 and CIDv1 *)
204
+
Alcotest.(check bool)
205
+
"any syntax accepts CIDv0" true
206
+
(Cid.is_valid_any_syntax valid_v0);
207
+
208
+
(* Invalid CIDv0 (wrong length) *)
209
+
let invalid_v0 = "QmTooShort" in
210
+
Alcotest.(check bool)
211
+
"invalid CIDv0 too short" false
212
+
(Cid.is_valid_cidv0_syntax invalid_v0)
213
+
214
+
let test_cidv1_vs_cidv0 () =
215
+
(* CIDv1 should have version V1 *)
216
+
let content = "test content" in
217
+
let cid = Cid.of_dag_cbor content in
218
+
Alcotest.(check bool) "CIDv1 is V1" true (Cid.cid_version cid = Cid.V1);
219
+
220
+
(* CIDv1 string should start with 'b' (base32lower) *)
221
+
let cid_str = Cid.to_string cid in
222
+
Alcotest.(check bool) "CIDv1 starts with 'b'" true (cid_str.[0] = 'b')
223
+
142
224
(* === Test suites === *)
143
225
144
226
let cid_parsing_tests =
···
153
235
Alcotest.test_case "create raw CID" `Quick test_cid_raw;
154
236
Alcotest.test_case "binary roundtrip" `Quick test_cid_binary_roundtrip;
155
237
Alcotest.test_case "deterministic" `Quick test_deterministic;
238
+
]
239
+
240
+
let cidv0_tests =
241
+
[
242
+
Alcotest.test_case "parse CIDv0" `Quick test_cidv0_parse;
243
+
Alcotest.test_case "CIDv0 roundtrip" `Quick test_cidv0_roundtrip;
244
+
Alcotest.test_case "CIDv0 binary roundtrip" `Quick
245
+
test_cidv0_binary_roundtrip;
246
+
Alcotest.test_case "CIDv0 syntax" `Quick test_cidv0_syntax;
247
+
Alcotest.test_case "CIDv1 vs CIDv0" `Quick test_cidv1_vs_cidv0;
156
248
]
157
249
158
250
(* === DAG-CBOR tests === *)
···
331
423
Alcotest.test_case "JSON $bytes" `Quick test_dag_cbor_json_bytes;
332
424
]
333
425
426
+
(* === Float tests (IPLD mode) === *)
427
+
428
+
let test_float_encode_decode () =
429
+
(* Test Float encoding and decoding in IPLD mode *)
430
+
(* Note: Integer-valued floats like 0.0 are converted to Int *)
431
+
let test_cases =
432
+
[
433
+
1.5;
434
+
-2.5;
435
+
3.14159265358979;
436
+
1e10 +. 0.5;
437
+
(* Add 0.5 to ensure not integer *)
438
+
1e-10;
439
+
]
440
+
in
441
+
List.iter
442
+
(fun f ->
443
+
let value = Dag_cbor.Float f in
444
+
let encoded = Dag_cbor.encode value in
445
+
match Dag_cbor.decode ~mode:Dag_cbor.Ipld encoded with
446
+
| Ok (Dag_cbor.Float decoded_f) ->
447
+
Alcotest.(check (float 0.0))
448
+
(Printf.sprintf "float roundtrip: %f" f)
449
+
f decoded_f
450
+
| Ok _ -> Alcotest.fail "expected Float"
451
+
| Error e ->
452
+
Alcotest.fail
453
+
(Printf.sprintf "decode failed: %s" (Dag_cbor.error_to_string e)))
454
+
test_cases
455
+
456
+
let test_float_rejected_atproto () =
457
+
(* Test that non-integer floats are rejected in AT Protocol mode *)
458
+
let value = Dag_cbor.Float 1.5 in
459
+
let encoded = Dag_cbor.encode value in
460
+
match Dag_cbor.decode ~mode:Dag_cbor.Atproto encoded with
461
+
| Ok _ -> Alcotest.fail "should reject float in AT Protocol mode"
462
+
| Error `Float_not_allowed -> ()
463
+
| Error e ->
464
+
Alcotest.fail
465
+
(Printf.sprintf "expected Float_not_allowed, got: %s"
466
+
(Dag_cbor.error_to_string e))
467
+
468
+
let test_float_integer_converted () =
469
+
(* Test that integer-valued floats are converted to Int *)
470
+
let value = Dag_cbor.Float 42.0 in
471
+
let encoded = Dag_cbor.encode value in
472
+
(* In both modes, integer floats become Int *)
473
+
match Dag_cbor.decode ~mode:Dag_cbor.Ipld encoded with
474
+
| Ok (Dag_cbor.Int i) ->
475
+
Alcotest.(check int64) "integer float becomes Int" 42L i
476
+
| Ok _ -> Alcotest.fail "expected Int"
477
+
| Error e ->
478
+
Alcotest.fail
479
+
(Printf.sprintf "decode failed: %s" (Dag_cbor.error_to_string e))
480
+
481
+
let test_float_nan_rejected () =
482
+
(* Test that NaN is rejected *)
483
+
try
484
+
let _ = Dag_cbor.encode (Dag_cbor.Float Float.nan) in
485
+
Alcotest.fail "should reject NaN"
486
+
with Invalid_argument _ -> ()
487
+
488
+
let test_float_infinity_rejected () =
489
+
(* Test that Infinity is rejected *)
490
+
try
491
+
let _ = Dag_cbor.encode (Dag_cbor.Float Float.infinity) in
492
+
Alcotest.fail "should reject Infinity"
493
+
with Invalid_argument _ -> ()
494
+
495
+
let test_float_neg_infinity_rejected () =
496
+
(* Test that -Infinity is rejected *)
497
+
try
498
+
let _ = Dag_cbor.encode (Dag_cbor.Float Float.neg_infinity) in
499
+
Alcotest.fail "should reject -Infinity"
500
+
with Invalid_argument _ -> ()
501
+
502
+
let test_float_in_array () =
503
+
(* Test floats in arrays work in IPLD mode *)
504
+
let value =
505
+
Dag_cbor.Array [ Dag_cbor.Float 1.5; Dag_cbor.Int 42L; Dag_cbor.Float 3.14 ]
506
+
in
507
+
let encoded = Dag_cbor.encode value in
508
+
match Dag_cbor.decode ~mode:Dag_cbor.Ipld encoded with
509
+
| Ok (Dag_cbor.Array [ Dag_cbor.Float f1; Dag_cbor.Int i; Dag_cbor.Float f2 ])
510
+
->
511
+
Alcotest.(check (float 0.0)) "first float" 1.5 f1;
512
+
Alcotest.(check int64) "integer" 42L i;
513
+
Alcotest.(check (float 0.0)) "second float" 3.14 f2
514
+
| Ok _ -> Alcotest.fail "unexpected structure"
515
+
| Error e ->
516
+
Alcotest.fail
517
+
(Printf.sprintf "decode failed: %s" (Dag_cbor.error_to_string e))
518
+
519
+
let test_float_in_map () =
520
+
(* Test floats in maps work in IPLD mode *)
521
+
let value =
522
+
Dag_cbor.Map
523
+
[ ("temperature", Dag_cbor.Float 98.6); ("count", Dag_cbor.Int 5L) ]
524
+
in
525
+
let encoded = Dag_cbor.encode value in
526
+
match Dag_cbor.decode ~mode:Dag_cbor.Ipld encoded with
527
+
| Ok (Dag_cbor.Map pairs) -> (
528
+
let temp = List.assoc "temperature" pairs in
529
+
let count = List.assoc "count" pairs in
530
+
(match temp with
531
+
| Dag_cbor.Float f -> Alcotest.(check (float 0.0)) "temperature" 98.6 f
532
+
| _ -> Alcotest.fail "expected Float for temperature");
533
+
match count with
534
+
| Dag_cbor.Int i -> Alcotest.(check int64) "count" 5L i
535
+
| _ -> Alcotest.fail "expected Int for count")
536
+
| Ok _ -> Alcotest.fail "unexpected structure"
537
+
| Error e ->
538
+
Alcotest.fail
539
+
(Printf.sprintf "decode failed: %s" (Dag_cbor.error_to_string e))
540
+
541
+
let test_float_validity () =
542
+
(* Test is_valid and is_atproto_valid *)
543
+
let valid_float = Dag_cbor.Float 3.14 in
544
+
Alcotest.(check bool)
545
+
"float is IPLD valid" true
546
+
(Dag_cbor.is_valid valid_float);
547
+
Alcotest.(check bool)
548
+
"float is NOT AT Protocol valid" false
549
+
(Dag_cbor.is_atproto_valid valid_float);
550
+
551
+
let int_val = Dag_cbor.Int 42L in
552
+
Alcotest.(check bool) "int is IPLD valid" true (Dag_cbor.is_valid int_val);
553
+
Alcotest.(check bool)
554
+
"int is AT Protocol valid" true
555
+
(Dag_cbor.is_atproto_valid int_val)
556
+
557
+
let test_float_json_ipld () =
558
+
(* Test JSON conversion in IPLD mode *)
559
+
let json : Dag_cbor.json = `Float 3.14 in
560
+
match Dag_cbor.of_json ~mode:Dag_cbor.Ipld json with
561
+
| Ok (Dag_cbor.Float f) ->
562
+
Alcotest.(check (float 0.0)) "float from JSON" 3.14 f
563
+
| Ok _ -> Alcotest.fail "expected Float"
564
+
| Error e ->
565
+
Alcotest.fail
566
+
(Printf.sprintf "parse failed: %s" (Dag_cbor.error_to_string e))
567
+
568
+
let test_float_json_atproto () =
569
+
(* Test JSON conversion rejects floats in AT Protocol mode *)
570
+
let json : Dag_cbor.json = `Float 3.14 in
571
+
match Dag_cbor.of_json ~mode:Dag_cbor.Atproto json with
572
+
| Ok _ -> Alcotest.fail "should reject float in AT Protocol mode"
573
+
| Error `Float_not_allowed -> ()
574
+
| Error e ->
575
+
Alcotest.fail
576
+
(Printf.sprintf "expected Float_not_allowed, got: %s"
577
+
(Dag_cbor.error_to_string e))
578
+
579
+
let test_float_equality () =
580
+
(* Test Float equality *)
581
+
let f1 = Dag_cbor.Float 3.14 in
582
+
let f2 = Dag_cbor.Float 3.14 in
583
+
let f3 = Dag_cbor.Float 2.71 in
584
+
Alcotest.(check bool) "equal floats" true (Dag_cbor.equal f1 f2);
585
+
Alcotest.(check bool) "different floats" false (Dag_cbor.equal f1 f3);
586
+
Alcotest.(check bool)
587
+
"float vs int" false
588
+
(Dag_cbor.equal (Dag_cbor.Float 1.0) (Dag_cbor.Int 1L))
589
+
590
+
let float_tests =
591
+
[
592
+
Alcotest.test_case "encode/decode" `Quick test_float_encode_decode;
593
+
Alcotest.test_case "rejected in AT Protocol mode" `Quick
594
+
test_float_rejected_atproto;
595
+
Alcotest.test_case "integer float converted to Int" `Quick
596
+
test_float_integer_converted;
597
+
Alcotest.test_case "NaN rejected" `Quick test_float_nan_rejected;
598
+
Alcotest.test_case "Infinity rejected" `Quick test_float_infinity_rejected;
599
+
Alcotest.test_case "-Infinity rejected" `Quick
600
+
test_float_neg_infinity_rejected;
601
+
Alcotest.test_case "float in array" `Quick test_float_in_array;
602
+
Alcotest.test_case "float in map" `Quick test_float_in_map;
603
+
Alcotest.test_case "validity checks" `Quick test_float_validity;
604
+
Alcotest.test_case "JSON IPLD mode" `Quick test_float_json_ipld;
605
+
Alcotest.test_case "JSON AT Protocol mode" `Quick test_float_json_atproto;
606
+
Alcotest.test_case "equality" `Quick test_float_equality;
607
+
]
608
+
609
+
(* === CBOR Strictness tests === *)
610
+
611
+
let test_strictness_indefinite_bytes () =
612
+
(* Indefinite-length byte string: 0x5F followed by chunks and 0xFF break *)
613
+
(* 0x5F = major type 2 (bytes) + additional info 31 (indefinite) *)
614
+
let indefinite_bytes = "\x5F\x41\x61\xFF" in
615
+
(* 0x5F: start indefinite bytes, 0x41 0x61: 1-byte chunk "a", 0xFF: break *)
616
+
match Dag_cbor.decode indefinite_bytes with
617
+
| Ok _ -> Alcotest.fail "should reject indefinite-length bytes"
618
+
| Error `Indefinite_length -> ()
619
+
| Error e ->
620
+
Alcotest.fail
621
+
(Printf.sprintf "expected Indefinite_length, got: %s"
622
+
(Dag_cbor.error_to_string e))
623
+
624
+
let test_strictness_indefinite_text () =
625
+
(* Indefinite-length text string: 0x7F followed by chunks and 0xFF break *)
626
+
(* 0x7F = major type 3 (text) + additional info 31 (indefinite) *)
627
+
let indefinite_text = "\x7F\x61\x61\xFF" in
628
+
(* 0x7F: start indefinite text, 0x61 0x61: 1-char chunk "a", 0xFF: break *)
629
+
match Dag_cbor.decode indefinite_text with
630
+
| Ok _ -> Alcotest.fail "should reject indefinite-length text"
631
+
| Error `Indefinite_length -> ()
632
+
| Error e ->
633
+
Alcotest.fail
634
+
(Printf.sprintf "expected Indefinite_length, got: %s"
635
+
(Dag_cbor.error_to_string e))
636
+
637
+
let test_strictness_indefinite_array () =
638
+
(* Indefinite-length array: 0x9F followed by items and 0xFF break *)
639
+
(* 0x9F = major type 4 (array) + additional info 31 (indefinite) *)
640
+
let indefinite_array = "\x9F\x01\x02\xFF" in
641
+
(* 0x9F: start indefinite array, 0x01 0x02: items 1, 2, 0xFF: break *)
642
+
match Dag_cbor.decode indefinite_array with
643
+
| Ok _ -> Alcotest.fail "should reject indefinite-length array"
644
+
| Error `Indefinite_length -> ()
645
+
| Error e ->
646
+
Alcotest.fail
647
+
(Printf.sprintf "expected Indefinite_length, got: %s"
648
+
(Dag_cbor.error_to_string e))
649
+
650
+
let test_strictness_indefinite_map () =
651
+
(* Indefinite-length map: 0xBF followed by pairs and 0xFF break *)
652
+
(* 0xBF = major type 5 (map) + additional info 31 (indefinite) *)
653
+
let indefinite_map = "\xBF\x61\x61\x01\xFF" in
654
+
(* 0xBF: start indefinite map, "a": 1, 0xFF: break *)
655
+
match Dag_cbor.decode indefinite_map with
656
+
| Ok _ -> Alcotest.fail "should reject indefinite-length map"
657
+
| Error `Indefinite_length -> ()
658
+
| Error e ->
659
+
Alcotest.fail
660
+
(Printf.sprintf "expected Indefinite_length, got: %s"
661
+
(Dag_cbor.error_to_string e))
662
+
663
+
let test_strictness_break_code () =
664
+
(* Bare break stop code: 0xFF is not allowed *)
665
+
let break_code = "\xFF" in
666
+
match Dag_cbor.decode break_code with
667
+
| Ok _ -> Alcotest.fail "should reject bare break code"
668
+
| Error `Indefinite_length -> ()
669
+
| Error e ->
670
+
(* Could also be a decode error from the CBOR library *)
671
+
Alcotest.(check bool)
672
+
"rejected break code" true
673
+
(match e with
674
+
| `Indefinite_length | `Decode_error _ -> true
675
+
| _ -> false)
676
+
677
+
let test_strictness_valid_definite () =
678
+
(* Test that definite-length encoding works fine *)
679
+
let value = Dag_cbor.Map [ ("a", Dag_cbor.Int 1L); ("b", Dag_cbor.Int 2L) ] in
680
+
let encoded = Dag_cbor.encode value in
681
+
match Dag_cbor.decode encoded with
682
+
| Ok decoded ->
683
+
Alcotest.(check bool) "valid definite" true (Dag_cbor.equal value decoded)
684
+
| Error e ->
685
+
Alcotest.fail
686
+
(Printf.sprintf "valid definite encoding rejected: %s"
687
+
(Dag_cbor.error_to_string e))
688
+
689
+
let test_strictness_disabled () =
690
+
(* Test that ~strict:false bypasses the check *)
691
+
(* Note: The underlying CBOR library may still reject invalid CBOR *)
692
+
let value = Dag_cbor.Int 42L in
693
+
let encoded = Dag_cbor.encode value in
694
+
match Dag_cbor.decode ~strict:false encoded with
695
+
| Ok (Dag_cbor.Int i) ->
696
+
Alcotest.(check int64) "decoded with strict:false" 42L i
697
+
| Ok _ -> Alcotest.fail "expected Int"
698
+
| Error e ->
699
+
Alcotest.fail
700
+
(Printf.sprintf "decode with strict:false failed: %s"
701
+
(Dag_cbor.error_to_string e))
702
+
703
+
let test_strictness_check_function () =
704
+
(* Test check_strictness function directly *)
705
+
(* Valid definite encoding *)
706
+
let valid = "\xa2\x61\x61\x01\x61\x62\x02" in
707
+
(* {"a": 1, "b": 2} *)
708
+
Alcotest.(check bool)
709
+
"valid passes check" true
710
+
(Result.is_ok (Dag_cbor.check_strictness valid));
711
+
712
+
(* Indefinite array *)
713
+
let invalid = "\x9F\x01\xFF" in
714
+
Alcotest.(check bool)
715
+
"indefinite fails check" true
716
+
(Result.is_error (Dag_cbor.check_strictness invalid))
717
+
718
+
let strictness_tests =
719
+
[
720
+
Alcotest.test_case "reject indefinite bytes" `Quick
721
+
test_strictness_indefinite_bytes;
722
+
Alcotest.test_case "reject indefinite text" `Quick
723
+
test_strictness_indefinite_text;
724
+
Alcotest.test_case "reject indefinite array" `Quick
725
+
test_strictness_indefinite_array;
726
+
Alcotest.test_case "reject indefinite map" `Quick
727
+
test_strictness_indefinite_map;
728
+
Alcotest.test_case "reject break code" `Quick test_strictness_break_code;
729
+
Alcotest.test_case "valid definite encoding" `Quick
730
+
test_strictness_valid_definite;
731
+
Alcotest.test_case "strict:false bypasses check" `Quick
732
+
test_strictness_disabled;
733
+
Alcotest.test_case "check_strictness function" `Quick
734
+
test_strictness_check_function;
735
+
]
736
+
334
737
(* === CAR tests === *)
335
738
336
739
let test_car_roundtrip () =
···
752
1155
Alcotest.test_case "invalid data models" `Quick test_data_model_invalid;
753
1156
]
754
1157
1158
+
(* === DAG-JSON tests === *)
1159
+
1160
+
let test_dag_json_link () =
1161
+
(* Test Link encoding: {"/": "<cid-string>"} *)
1162
+
let cid = Cid.of_dag_cbor "test content" in
1163
+
let value = Dag_cbor.Link cid in
1164
+
let json = Dag_json.encode value in
1165
+
match json with
1166
+
| `Assoc [ ("/", `String cid_str) ] ->
1167
+
Alcotest.(check string) "CID string" (Cid.to_string cid) cid_str
1168
+
| _ -> Alcotest.fail "expected {/: <cid>}"
1169
+
1170
+
let test_dag_json_bytes () =
1171
+
(* Test Bytes encoding: {"/": {"bytes": "<base64>"}} *)
1172
+
let bytes = "\x00\x01\x02\x03\xff\xfe" in
1173
+
let value = Dag_cbor.Bytes bytes in
1174
+
let json = Dag_json.encode value in
1175
+
match json with
1176
+
| `Assoc [ ("/", `Assoc [ ("bytes", `String b64) ]) ] -> (
1177
+
(* Decode and verify *)
1178
+
match Base64.decode ~pad:false b64 with
1179
+
| Ok decoded -> Alcotest.(check string) "bytes roundtrip" bytes decoded
1180
+
| Error _ -> Alcotest.fail "invalid base64")
1181
+
| _ -> Alcotest.fail "expected {/: {bytes: <base64>}}"
1182
+
1183
+
let test_dag_json_roundtrip () =
1184
+
(* Test full roundtrip: IPLD value -> DAG-JSON -> IPLD value *)
1185
+
let cid = Cid.of_dag_cbor "nested link" in
1186
+
let value =
1187
+
Dag_cbor.Map
1188
+
[
1189
+
("name", Dag_cbor.String "test");
1190
+
("count", Dag_cbor.Int 42L);
1191
+
("data", Dag_cbor.Bytes "\x01\x02\x03");
1192
+
("ref", Dag_cbor.Link cid);
1193
+
( "items",
1194
+
Dag_cbor.Array [ Dag_cbor.Int 1L; Dag_cbor.Int 2L; Dag_cbor.Int 3L ]
1195
+
);
1196
+
]
1197
+
in
1198
+
let json = Dag_json.encode value in
1199
+
match Dag_json.decode json with
1200
+
| Ok decoded ->
1201
+
Alcotest.(check bool)
1202
+
"roundtrip equal" true
1203
+
(Dag_cbor.equal value decoded)
1204
+
| Error e ->
1205
+
Alcotest.fail
1206
+
(Printf.sprintf "decode failed: %s" (Dag_json.error_to_string e))
1207
+
1208
+
let test_dag_json_string_roundtrip () =
1209
+
(* Test string serialization roundtrip *)
1210
+
let value =
1211
+
Dag_cbor.Map
1212
+
[
1213
+
("hello", Dag_cbor.String "world");
1214
+
( "nested",
1215
+
Dag_cbor.Map [ ("a", Dag_cbor.Int 1L); ("b", Dag_cbor.Int 2L) ] );
1216
+
]
1217
+
in
1218
+
let json_str = Dag_json.encode_string value in
1219
+
match Dag_json.decode_string json_str with
1220
+
| Ok decoded ->
1221
+
Alcotest.(check bool)
1222
+
"string roundtrip" true
1223
+
(Dag_cbor.equal value decoded)
1224
+
| Error e ->
1225
+
Alcotest.fail
1226
+
(Printf.sprintf "decode failed: %s" (Dag_json.error_to_string e))
1227
+
1228
+
let test_dag_json_decode_link () =
1229
+
(* Test decoding a link from JSON *)
1230
+
let cid_str = "bafybeigdyrzt5sfp7udm7hu76uh7y26nf3efuylqabf3oclgtqy55fbzdi" in
1231
+
let json : Dag_json.json = `Assoc [ ("/", `String cid_str) ] in
1232
+
match Dag_json.decode json with
1233
+
| Ok (Dag_cbor.Link cid) ->
1234
+
Alcotest.(check string) "CID" cid_str (Cid.to_string cid)
1235
+
| Ok _ -> Alcotest.fail "expected Link"
1236
+
| Error e ->
1237
+
Alcotest.fail
1238
+
(Printf.sprintf "decode failed: %s" (Dag_json.error_to_string e))
1239
+
1240
+
let test_dag_json_decode_bytes () =
1241
+
(* Test decoding bytes from JSON *)
1242
+
let original = "\x00\x01\x02\x03\xff" in
1243
+
let b64 = Base64.encode_exn ~pad:false original in
1244
+
let json : Dag_json.json =
1245
+
`Assoc [ ("/", `Assoc [ ("bytes", `String b64) ]) ]
1246
+
in
1247
+
match Dag_json.decode json with
1248
+
| Ok (Dag_cbor.Bytes decoded) ->
1249
+
Alcotest.(check string) "bytes" original decoded
1250
+
| Ok _ -> Alcotest.fail "expected Bytes"
1251
+
| Error e ->
1252
+
Alcotest.fail
1253
+
(Printf.sprintf "decode failed: %s" (Dag_json.error_to_string e))
1254
+
1255
+
let test_dag_json_invalid_link () =
1256
+
(* Test that invalid CID is rejected *)
1257
+
let json : Dag_json.json = `Assoc [ ("/", `String "not-a-valid-cid") ] in
1258
+
match Dag_json.decode json with
1259
+
| Ok _ -> Alcotest.fail "should reject invalid CID"
1260
+
| Error (`Invalid_cid _) -> ()
1261
+
| Error e ->
1262
+
Alcotest.fail
1263
+
(Printf.sprintf "expected Invalid_cid, got: %s"
1264
+
(Dag_json.error_to_string e))
1265
+
1266
+
let test_dag_json_invalid_bytes () =
1267
+
(* Test that invalid base64 is rejected *)
1268
+
let json : Dag_json.json =
1269
+
`Assoc [ ("/", `Assoc [ ("bytes", `String "!!!invalid!!!") ]) ]
1270
+
in
1271
+
match Dag_json.decode json with
1272
+
| Ok _ -> Alcotest.fail "should reject invalid base64"
1273
+
| Error (`Invalid_bytes _) -> ()
1274
+
| Error e ->
1275
+
Alcotest.fail
1276
+
(Printf.sprintf "expected Invalid_bytes, got: %s"
1277
+
(Dag_json.error_to_string e))
1278
+
1279
+
let test_dag_json_reserved_slash () =
1280
+
(* Test that objects with "/" key are rejected (reserved for DAG-JSON) *)
1281
+
let json : Dag_json.json =
1282
+
`Assoc [ ("/", `Int 42); ("other", `String "value") ]
1283
+
in
1284
+
match Dag_json.decode json with
1285
+
| Ok _ -> Alcotest.fail "should reject object with / key"
1286
+
| Error (`Invalid_structure _) -> ()
1287
+
| Error e ->
1288
+
Alcotest.fail
1289
+
(Printf.sprintf "expected Invalid_structure, got: %s"
1290
+
(Dag_json.error_to_string e))
1291
+
1292
+
let test_dag_json_to_cbor () =
1293
+
(* Test DAG-JSON to DAG-CBOR conversion *)
1294
+
let cid = Cid.of_dag_cbor "test" in
1295
+
let json : Dag_json.json =
1296
+
`Assoc
1297
+
[
1298
+
("link", `Assoc [ ("/", `String (Cid.to_string cid)) ]);
1299
+
("value", `Int 42);
1300
+
]
1301
+
in
1302
+
match Dag_json.to_dag_cbor json with
1303
+
| Ok cbor_bytes -> (
1304
+
(* Decode the CBOR and verify *)
1305
+
match Dag_cbor.decode cbor_bytes with
1306
+
| Ok (Dag_cbor.Map pairs) ->
1307
+
Alcotest.(check int) "pair count" 2 (List.length pairs)
1308
+
| Ok _ -> Alcotest.fail "expected Map"
1309
+
| Error e ->
1310
+
Alcotest.fail
1311
+
(Printf.sprintf "CBOR decode failed: %s"
1312
+
(Dag_cbor.error_to_string e)))
1313
+
| Error e ->
1314
+
Alcotest.fail
1315
+
(Printf.sprintf "to_dag_cbor failed: %s" (Dag_json.error_to_string e))
1316
+
1317
+
let test_dag_json_of_cbor () =
1318
+
(* Test DAG-CBOR to DAG-JSON conversion *)
1319
+
let value =
1320
+
Dag_cbor.Map [ ("a", Dag_cbor.Int 1L); ("b", Dag_cbor.String "hello") ]
1321
+
in
1322
+
let cbor_bytes = Dag_cbor.encode value in
1323
+
match Dag_json.of_dag_cbor cbor_bytes with
1324
+
| Ok json -> (
1325
+
(* Verify structure *)
1326
+
match json with
1327
+
| `Assoc pairs -> Alcotest.(check int) "pair count" 2 (List.length pairs)
1328
+
| _ -> Alcotest.fail "expected Assoc")
1329
+
| Error _ -> Alcotest.fail "of_dag_cbor failed"
1330
+
1331
+
let test_dag_json_float_ipld () =
1332
+
(* Test float handling in IPLD mode *)
1333
+
let json : Dag_json.json = `Float 3.14 in
1334
+
match Dag_json.decode ~mode:Dag_cbor.Ipld json with
1335
+
| Ok (Dag_cbor.Float f) -> Alcotest.(check (float 0.001)) "float" 3.14 f
1336
+
| Ok _ -> Alcotest.fail "expected Float"
1337
+
| Error e ->
1338
+
Alcotest.fail
1339
+
(Printf.sprintf "decode failed: %s" (Dag_json.error_to_string e))
1340
+
1341
+
let test_dag_json_float_atproto () =
1342
+
(* Test that non-integer floats are rejected in AT Protocol mode *)
1343
+
let json : Dag_json.json = `Float 3.14 in
1344
+
match Dag_json.decode ~mode:Dag_cbor.Atproto json with
1345
+
| Ok _ -> Alcotest.fail "should reject float in AT Protocol mode"
1346
+
| Error (`Unsupported_value _) -> ()
1347
+
| Error e ->
1348
+
Alcotest.fail
1349
+
(Printf.sprintf "expected Unsupported_value, got: %s"
1350
+
(Dag_json.error_to_string e))
1351
+
1352
+
let test_dag_json_pretty () =
1353
+
(* Test pretty printing - verify it produces different output than compact *)
1354
+
let value =
1355
+
Dag_cbor.Map
1356
+
[
1357
+
("key", Dag_cbor.String "value");
1358
+
( "nested",
1359
+
Dag_cbor.Map [ ("a", Dag_cbor.Int 1L); ("b", Dag_cbor.Int 2L) ] );
1360
+
]
1361
+
in
1362
+
let compact = Dag_json.encode_string value in
1363
+
let pretty = Dag_json.encode_string_pretty value in
1364
+
(* Pretty should have more whitespace/formatting *)
1365
+
Alcotest.(check bool)
1366
+
"pretty has more chars" true
1367
+
(String.length pretty > String.length compact)
1368
+
1369
+
let dag_json_tests =
1370
+
[
1371
+
Alcotest.test_case "encode link" `Quick test_dag_json_link;
1372
+
Alcotest.test_case "encode bytes" `Quick test_dag_json_bytes;
1373
+
Alcotest.test_case "roundtrip" `Quick test_dag_json_roundtrip;
1374
+
Alcotest.test_case "string roundtrip" `Quick test_dag_json_string_roundtrip;
1375
+
Alcotest.test_case "decode link" `Quick test_dag_json_decode_link;
1376
+
Alcotest.test_case "decode bytes" `Quick test_dag_json_decode_bytes;
1377
+
Alcotest.test_case "invalid link" `Quick test_dag_json_invalid_link;
1378
+
Alcotest.test_case "invalid bytes" `Quick test_dag_json_invalid_bytes;
1379
+
Alcotest.test_case "reserved slash key" `Quick test_dag_json_reserved_slash;
1380
+
Alcotest.test_case "to DAG-CBOR" `Quick test_dag_json_to_cbor;
1381
+
Alcotest.test_case "of DAG-CBOR" `Quick test_dag_json_of_cbor;
1382
+
Alcotest.test_case "float IPLD mode" `Quick test_dag_json_float_ipld;
1383
+
Alcotest.test_case "float AT Protocol mode" `Quick
1384
+
test_dag_json_float_atproto;
1385
+
Alcotest.test_case "pretty print" `Quick test_dag_json_pretty;
1386
+
]
1387
+
755
1388
(* === Test suites === *)
756
1389
757
1390
let () =
···
759
1392
[
760
1393
("cid_parsing", cid_parsing_tests);
761
1394
("cid_creation", cid_creation_tests);
1395
+
("cidv0", cidv0_tests);
762
1396
("dag_cbor", dag_cbor_tests);
1397
+
("float", float_tests);
1398
+
("strictness", strictness_tests);
1399
+
("dag_json", dag_json_tests);
763
1400
("car", car_tests);
764
1401
("blob", blob_tests);
765
1402
("data_model", data_model_tests);
+5
-2
test/syntax/test_syntax.ml
+5
-2
test/syntax/test_syntax.ml
···
294
294
vectors
295
295
296
296
let test_cid_invalid () =
297
+
(* These are CIDs that should be rejected by AT Protocol syntax validation.
298
+
Note: Some of these (like CIDv0) are valid IPLD CIDs but not accepted in AT Protocol. *)
297
299
let vectors =
298
300
load_test_vectors ~preserve_whitespace:true
299
301
(fixture_dir ^ "/cid_syntax_invalid.txt")
300
302
in
301
303
List.iter
302
304
(fun cid_str ->
303
-
let result = Atproto_ipld.Cid.of_string cid_str in
305
+
(* Use is_valid_syntax for AT Protocol syntax validation *)
306
+
let result = Atproto_ipld.Cid.is_valid_syntax cid_str in
304
307
Alcotest.(check bool)
305
308
(Printf.sprintf "CID invalid: %s" cid_str)
306
-
true (Result.is_error result))
309
+
true (not result))
307
310
vectors
308
311
309
312
(* =========================== Language Tag Tests =========================== *)