atproto libraries implementation in ocaml

Add IPLD compliance features: Float support, CIDv0, CBOR strictness, DAG-JSON

- Add Float variant to DAG-CBOR value type with decode_mode (Ipld/Atproto)
- Floats encoded as 64-bit IEEE 754 doubles, NaN/Infinity rejected
- Add CIDv0 support: parsing, encoding, version detection
- Add CBOR strictness checking: reject indefinite-length encoding
- Implement DAG-JSON codec with {/: cid} links and {/: {bytes: b64}} bytes
- Add 22 new IPLD tests (66 total), all 494 compliance tests pass

+4
.beads/issues.jsonl
··· 31 31 {"id":"atproto-60","title":"Implement effects-based I/O abstraction","description":"Implement the effects-based I/O abstraction layer that makes all libraries runtime-agnostic.","design":"## Module Structure\n\n```ocaml\n(* atproto-effects/lib/effects.ml *)\n\n(* HTTP effects *)\ntype http_request = {\n method_: [ `GET | `POST | `PUT | `DELETE ];\n uri: Uri.t;\n headers: (string * string) list;\n body: string option;\n}\n\ntype http_response = {\n status: int;\n headers: (string * string) list;\n body: string;\n}\n\ntype _ Effect.t +=\n | Http_request : http_request -\u003e http_response Effect.t\n\n(* DNS effects *)\ntype _ Effect.t +=\n | Dns_txt : string -\u003e string list Effect.t\n | Dns_a : string -\u003e string list Effect.t\n\n(* Time effects *)\ntype _ Effect.t +=\n | Now : Ptime.t Effect.t\n | Sleep : float -\u003e unit Effect.t\n\n(* Random effects *)\ntype _ Effect.t +=\n | Random_bytes : int -\u003e bytes Effect.t\n\n(* atproto-effects-eio/lib/handler.ml *)\nval run : (unit -\u003e 'a) -\u003e 'a\n```\n\n## Handler Example (eio)\n\n```ocaml\nlet run f =\n Effect.Deep.match_ f ()\n {\n retc = Fun.id;\n exnc = raise;\n effc = fun (type a) (e : a Effect.t) -\u003e\n match e with\n | Http_request req -\u003e\n Some (fun (k : (a, _) continuation) -\u003e\n let resp = Eio_client.request req in\n continue k resp)\n | Dns_txt domain -\u003e\n Some (fun k -\u003e\n let records = Eio_dns.txt domain in\n continue k records)\n | _ -\u003e None\n }\n```\n\n## Dependencies\n- eio (for testing handler)","acceptance_criteria":"- Effect types for HTTP, DNS, time, random\n- eio-based handler for testing\n- Handler composition utilities\n- Performance benchmarks","status":"closed","priority":1,"issue_type":"feature","assignee":"claude","created_at":"2025-12-28T00:12:29.021401617+01:00","updated_at":"2025-12-28T11:57:08.264086142+01:00","closed_at":"2025-12-28T11:57:08.264086142+01:00","labels":["effects","infrastructure"],"dependencies":[{"issue_id":"atproto-60","depends_on_id":"atproto-1","type":"parent-child","created_at":"2025-12-28T00:12:55.467983208+01:00","created_by":"daemon"}]} 32 32 {"id":"atproto-61","title":"Set up interoperability test suite","description":"Set up and run the AT Protocol interoperability tests from bluesky-social/atproto-interop-tests.","design":"## Test Structure\n\n```\ntest/\n├── interop/\n│ ├── syntax_test.ml # Handle, DID, NSID, TID, etc.\n│ ├── crypto_test.ml # Signatures, did:key\n│ ├── data_model_test.ml # DAG-CBOR, CID\n│ ├── mst_test.ml # Key heights, tree structure\n│ ├── lexicon_test.ml # Schema and record validation\n│ └── firehose_test.ml # Commit proofs\n├── fixtures/ # Cloned from atproto-interop-tests\n└── dune\n```\n\n## Test Approach\n\n1. Clone test vectors from GitHub\n2. Parse JSON fixtures using jsont\n3. Parse text fixtures line by line\n4. Run each test case\n5. Compare output to expected values\n\n## Example Test\n\n```ocaml\nlet load_json_fixtures path =\n let json = Jsont.of_file path in\n Jsont.decode (Jsont.list fixture_jsont) json\n\nlet%test \"handle_syntax_valid\" =\n let fixtures = load_lines \"fixtures/syntax/handle_syntax_valid.txt\" in\n List.for_all (fun line -\u003e\n match Handle.of_string line with\n | Ok _ -\u003e true\n | Error _ -\u003e false\n ) fixtures\n\nlet%test \"handle_syntax_invalid\" =\n let fixtures = load_lines \"fixtures/syntax/handle_syntax_invalid.txt\" in\n List.for_all (fun line -\u003e\n match Handle.of_string line with\n | Ok _ -\u003e false\n | Error _ -\u003e true\n ) fixtures\n\nlet%test \"crypto_signature_fixtures\" =\n let fixtures = load_json_fixtures \"fixtures/crypto/signature-fixtures.json\" in\n List.for_all (fun fixture -\u003e\n let message = Base64.decode fixture.message_base64 in\n let signature = Base64.decode fixture.signature_base64 in\n let key = Did_key.of_string fixture.public_key_did in\n let result = Crypto.verify key message signature in\n result = fixture.valid_signature\n ) fixtures\n```\n\n## Dependencies\n- alcotest or ounit2\n- jsont","acceptance_criteria":"- All syntax interop tests pass\n- All crypto interop tests pass\n- All data-model interop tests pass\n- All MST interop tests pass\n- All lexicon interop tests pass\n- All firehose interop tests pass","status":"closed","priority":1,"issue_type":"task","created_at":"2025-12-28T00:12:40.553908313+01:00","updated_at":"2025-12-28T13:25:34.614867702+01:00","closed_at":"2025-12-28T13:25:34.614867702+01:00","labels":["conformance","testing"],"dependencies":[{"issue_id":"atproto-61","depends_on_id":"atproto-1","type":"parent-child","created_at":"2025-12-28T00:12:56.180809368+01:00","created_by":"daemon"}]} 33 33 {"id":"atproto-62","title":"Set up monorepo package structure","description":"Set up the monorepo structure for multiple opam packages within a single repository.","design":"## Repository Structure\n\n```\natproto/\n├── dune-project # Root with all packages\n├── packages/\n│ ├── atproto-syntax/\n│ │ ├── lib/\n│ │ │ ├── dune\n│ │ │ └── *.ml\n│ │ ├── test/\n│ │ │ ├── dune\n│ │ │ └── *_test.ml\n│ │ └── atproto-syntax.opam\n│ ├── atproto-crypto/\n│ ├── atproto-multibase/\n│ ├── atproto-ipld/\n│ ├── atproto-mst/\n│ ├── atproto-repo/\n│ ├── atproto-identity/\n│ ├── atproto-xrpc/\n│ ├── atproto-sync/\n│ ├── atproto-lexicon/\n│ ├── atproto-lexicon-gen/\n│ ├── atproto-api/\n│ └── atproto-effects/\n├── examples/\n│ ├── simple_client/\n│ └── firehose_consumer/\n└── interop-tests/\n```\n\n## dune-project\n\n```lisp\n(lang dune 3.20)\n(name atproto)\n(generate_opam_files true)\n\n(package\n (name atproto-syntax)\n (synopsis \"AT Protocol identifier syntax parsing\")\n (depends\n (ocaml (\u003e= 5.4))\n re\n ptime))\n\n(package\n (name atproto-crypto)\n ...)\n```\n\n## CI (.github/workflows/ci.yml)\n\n- OCaml 5.4 matrix\n- Build all packages\n- Run all tests\n- Run interop tests","acceptance_criteria":"- Multi-package dune-project structure\n- Separate opam files per package\n- CI pipeline for building and testing\n- Documentation generation setup","status":"closed","priority":1,"issue_type":"task","created_at":"2025-12-28T00:12:50.547102438+01:00","updated_at":"2025-12-28T11:57:18.856810633+01:00","closed_at":"2025-12-28T11:57:18.856810633+01:00","labels":["infrastructure","setup"],"dependencies":[{"issue_id":"atproto-62","depends_on_id":"atproto-1","type":"parent-child","created_at":"2025-12-28T00:12:57.015938611+01:00","created_by":"daemon"}]} 34 + {"id":"atproto-cir","title":"Implement DAG-JSON codec","description":"Create lib/ipld/dag_json.ml, encode Links as {\"/\": \"\u003ccid-string\u003e\"}, encode Bytes as {\"/\": {\"bytes\": \"\u003cbase64\u003e\"}}","status":"closed","priority":2,"issue_type":"feature","created_at":"2025-12-28T15:47:30.578398468+01:00","updated_at":"2025-12-28T16:06:13.475583417+01:00","closed_at":"2025-12-28T16:06:13.475583417+01:00","labels":["compliance","ipld"]} 34 35 {"id":"atproto-h09","title":"Add package documentation","description":"Add documentation for each of the 11 AT Protocol packages including:\n- Module-level documentation with examples\n- README.md for the root project\n- CONTRIBUTING.md guide\n- API documentation generation with odoc","status":"closed","priority":2,"issue_type":"task","assignee":"claude","created_at":"2025-12-28T13:34:10.559554696+01:00","updated_at":"2025-12-28T13:50:20.509417248+01:00","closed_at":"2025-12-28T13:50:20.509417248+01:00","labels":["documentation"],"dependencies":[{"issue_id":"atproto-h09","depends_on_id":"atproto-1","type":"parent-child","created_at":"2025-12-28T13:34:16.081103184+01:00","created_by":"daemon"}]} 36 + {"id":"atproto-kc7","title":"Verify/enforce CBOR strictness","description":"Check that CBOR library produces shortest-form encoding, add explicit rejection of indefinite-length items during decode","status":"closed","priority":2,"issue_type":"feature","created_at":"2025-12-28T15:47:28.665226526+01:00","updated_at":"2025-12-28T16:02:55.549763384+01:00","closed_at":"2025-12-28T16:02:55.549763384+01:00","labels":["compliance","ipld"]} 35 37 {"id":"atproto-pg8","title":"Add MST example_keys.txt fixture tests","description":"Add tests using the example_keys.txt fixture file which contains 156 structured MST keys.\n\nTests should:\n1. Load all 156 keys from the fixture\n2. Build an MST containing all keys\n3. Verify all keys are retrievable\n4. Verify iteration order matches sorted key order\n5. Optionally verify tree structure properties","acceptance_criteria":"- example_keys.txt is loaded and all 156 keys are used\n- MST is built with all keys\n- All keys are retrievable after insertion\n- Iteration produces keys in sorted order","status":"closed","priority":2,"issue_type":"task","created_at":"2025-12-28T12:12:19.180139823+01:00","updated_at":"2025-12-28T12:43:14.192342391+01:00","closed_at":"2025-12-28T12:43:14.192342391+01:00","labels":["conformance","mst","testing"]} 36 38 {"id":"atproto-q0h","title":"Add firehose commit-proof-fixtures.json tests","description":"Add tests for the commit-proof-fixtures.json file which contains 6 test cases for MST proof verification:\n\n1. two deep split\n2. two deep leafless split\n3. add on edge with neighbor two layers down\n4. merge and split in multi-op commit\n5. complex multi-op commit\n6. split with earlier leaves on same layer\n\nEach fixture includes:\n- keys (existing keys in MST)\n- adds (keys to add)\n- dels (keys to delete)\n- rootBeforeCommit / rootAfterCommit (expected CIDs)\n- blocksInProof (CIDs of blocks needed for proof)\n\nThis tests the commit proof verification needed for firehose sync.","acceptance_criteria":"- All 6 commit-proof fixtures are tested\n- MST operations (add/delete) produce correct root CIDs\n- Proof blocks are correctly identified\n- Tests verify rootBeforeCommit and rootAfterCommit match","status":"closed","priority":1,"issue_type":"task","created_at":"2025-12-28T12:12:34.999268893+01:00","updated_at":"2025-12-28T12:58:39.408679225+01:00","closed_at":"2025-12-28T12:58:39.408679225+01:00","labels":["conformance","firehose","testing"]} 39 + {"id":"atproto-s19","title":"Add CIDv0 parsing support","description":"Detect base58btc strings starting with \"Qm\", decode as multihash (sha2-256), implicit dag-pb codec","status":"closed","priority":2,"issue_type":"feature","created_at":"2025-12-28T15:47:26.962896421+01:00","updated_at":"2025-12-28T15:58:38.151319563+01:00","closed_at":"2025-12-28T15:58:38.151319563+01:00","labels":["compliance","ipld"]} 37 40 {"id":"atproto-udz","title":"Add missing data-model conformance tests","description":"Add tests for data-model fixtures that are not currently covered:\n\n1. **data-model-valid.json** (5 entries) - Valid AT Protocol data model examples:\n - trivial record\n - float but integer-like (123.0)\n - empty list and object\n - list of nullable\n - list of lists\n\n2. **data-model-invalid.json** (12 entries) - Invalid examples that must be rejected:\n - top-level not an object\n - non-integer float\n - record with $type null/wrong type/empty\n - blob with string size/missing key\n - bytes with wrong field type/extra fields\n - link with wrong field type/bogus CID/extra fields","acceptance_criteria":"- test_data_model_valid() tests all 5 valid entries\n- test_data_model_invalid() tests all 12 invalid entries\n- Valid entries encode/decode correctly\n- Invalid entries are rejected with appropriate errors","status":"closed","priority":1,"issue_type":"task","created_at":"2025-12-28T12:12:14.579573063+01:00","updated_at":"2025-12-28T12:42:16.291981859+01:00","closed_at":"2025-12-28T12:42:16.291981859+01:00","labels":["conformance","ipld","testing"]} 38 41 {"id":"atproto-w5i","title":"Create example applications","description":"Create example applications demonstrating the AT Protocol libraries:\n1. Simple client - authenticate and make posts\n2. Firehose consumer - subscribe to real-time events\n3. Bot example - automated posting/interactions","notes":"Added firehose_demo example showing how to use the firehose module with OCaml 5 effects. Additional examples (client, bot) can be added in future iterations.","status":"closed","priority":2,"issue_type":"task","created_at":"2025-12-28T13:34:11.928213055+01:00","updated_at":"2025-12-28T13:36:39.963890666+01:00","closed_at":"2025-12-28T13:36:39.963890666+01:00","labels":["documentation","examples"],"dependencies":[{"issue_id":"atproto-w5i","depends_on_id":"atproto-1","type":"parent-child","created_at":"2025-12-28T13:34:17.10878+01:00","created_by":"daemon"}]} 42 + {"id":"atproto-xfg","title":"Add Float support to DAG-CBOR","description":"Add Float of float variant to value type, encode as 64-bit double (CBOR major type 7, additional info 27), reject NaN and Infinity values, keep AT Protocol mode that rejects all floats","status":"closed","priority":1,"issue_type":"feature","created_at":"2025-12-28T15:47:24.836568239+01:00","updated_at":"2025-12-28T15:53:03.229484165+01:00","closed_at":"2025-12-28T15:53:03.229484165+01:00","labels":["compliance","ipld"]}
+1 -1
COMPLIANCE.md
··· 1 1 # AT Protocol Compliance Report 2 2 3 - **Generated:** 2025-12-28T13:56:33Z 3 + **Generated:** 2025-12-28T15:06:07Z 4 4 **Repository:** [https://github.com/gdiazlo/atproto](https://github.com/gdiazlo/atproto) 5 5 6 6 ## Summary
+1 -1
compliance-report.html
··· 129 129 </table> 130 130 </div> 131 131 <div class="meta"> 132 - <p>Generated: 2025-12-28T13:56:33Z</p> 132 + <p>Generated: 2025-12-28T15:06:07Z</p> 133 133 <p>Repository: <a href="https://github.com/gdiazlo/atproto">https://github.com/gdiazlo/atproto</a></p> 134 134 <p>Test fixtures from <a href="https://github.com/bluesky-social/atproto-interop-tests">AT Protocol Interoperability Tests</a></p> 135 135 </div>
+1 -1
compliance-report.json
··· 1 1 { 2 2 "title": "AT Protocol Compliance Report", 3 3 "version": "1.0.0", 4 - "generated_at": "2025-12-28T13:56:33Z", 4 + "generated_at": "2025-12-28T15:06:07Z", 5 5 "repository": "https://github.com/gdiazlo/atproto", 6 6 "total_tests": 494, 7 7 "total_passed": 494,
+2
lib/ipld/atproto_ipld.ml
··· 8 8 9 9 - {!module:Cid}: Content Identifiers (CIDv0 and CIDv1) 10 10 - {!module:Dag_cbor}: DAG-CBOR encoding with AT Protocol sorting rules 11 + - {!module:Dag_json}: DAG-JSON encoding for human-readable IPLD 11 12 - {!module:Car}: CAR (Content-Addressable aRchive) file handling 12 13 - {!module:Blob}: Blob reference handling for media 13 14 ··· 47 48 48 49 module Cid = Cid 49 50 module Dag_cbor = Dag_cbor 51 + module Dag_json = Dag_json 50 52 module Car = Car 51 53 module Blob = Blob
+143 -31
lib/ipld/cid.ml
··· 1 - (** CID (Content Identifier) for AT Protocol. 1 + (** CID (Content Identifier) for IPLD and AT Protocol. 2 2 3 - CIDs are self-describing content-addressed identifiers used throughout AT 4 - Protocol for referencing data blocks. 3 + CIDs are self-describing content-addressed identifiers used throughout IPLD 4 + and AT Protocol for referencing data blocks. 5 + 6 + This module supports both CIDv0 and CIDv1: 5 7 6 - AT Protocol blessed CID format: 7 - - Version: CIDv1 only 8 + CIDv0 (legacy IPFS format): 9 + - Base58btc encoded, starting with "Qm" 10 + - Implicit dag-pb codec (0x70) 11 + - SHA-256 hash only 12 + - 46 characters when encoded 13 + 14 + CIDv1 (AT Protocol blessed format): 15 + - Version: CIDv1 8 16 - Hash: SHA-256 (multicodec 0x12), 256 bits 9 17 - Codec: dag-cbor (0x71) for records, raw (0x55) for blobs 10 18 - String encoding: base32lower (multibase prefix 'b') *) ··· 15 23 | `Invalid_hash_length 16 24 | `Invalid_multibase 17 25 | `Invalid_cid_format 18 - | `Cid_too_short ] 26 + | `Cid_too_short 27 + | `Invalid_cidv0 ] 19 28 20 29 let pp_error fmt = function 21 - | `Invalid_cid_version -> Format.fprintf fmt "invalid CID version (must be 1)" 30 + | `Invalid_cid_version -> 31 + Format.fprintf fmt "invalid CID version (must be 0 or 1)" 22 32 | `Invalid_hash_algorithm -> 23 33 Format.fprintf fmt "invalid hash algorithm (must be SHA-256)" 24 34 | `Invalid_hash_length -> Format.fprintf fmt "invalid hash length" 25 35 | `Invalid_multibase -> Format.fprintf fmt "invalid multibase encoding" 26 36 | `Invalid_cid_format -> Format.fprintf fmt "invalid CID format" 27 37 | `Cid_too_short -> Format.fprintf fmt "CID too short" 38 + | `Invalid_cidv0 -> Format.fprintf fmt "invalid CIDv0 format" 28 39 29 40 let error_to_string e = Format.asprintf "%a" pp_error e 30 41 ··· 59 70 | Other n -> n 60 71 end 61 72 62 - type t = { codec : codec; hash : string (** 32-byte SHA-256 hash *) } 63 - (** CID type - stores the codec and hash *) 73 + (** CID version *) 74 + type version = V0 | V1 75 + 76 + type t = { 77 + version : version; 78 + codec : codec; 79 + hash : string; (** 32-byte SHA-256 hash *) 80 + } 81 + (** CID type - stores the version, codec and hash *) 64 82 65 83 (** Encode an unsigned varint to bytes *) 66 84 let encode_varint n = ··· 97 115 (** Create a CID from content bytes by hashing with SHA-256 *) 98 116 let create ~codec (content : string) : t = 99 117 let hash = Digestif.SHA256.(to_raw_string (digest_string content)) in 100 - { codec; hash } 118 + { version = V1; codec; hash } 101 119 102 120 (** Create a CID for DAG-CBOR content *) 103 121 let of_dag_cbor content = create ~codec:DagCbor content ··· 108 126 (** Create a CID from a pre-computed hash *) 109 127 let of_hash ~codec hash : (t, error) result = 110 128 if String.length hash <> 32 then Error `Invalid_hash_length 111 - else Ok { codec; hash } 129 + else Ok { version = V1; codec; hash } 130 + 131 + (** Get the version *) 132 + let cid_version t = t.version 112 133 113 134 (** Get the codec *) 114 135 let codec t = t.codec ··· 130 151 in 131 152 if c <> 0 then c else String.compare t1.hash t2.hash 132 153 133 - (** Encode CID to binary format (for CBOR tag 42) *) 154 + (** Encode CID to binary format (for CBOR tag 42). CIDv0 is encoded as just the 155 + multihash (for compatibility). CIDv1 is encoded as version + codec + 156 + multihash. *) 134 157 let to_bytes t = 135 - let version = encode_varint 1 in 136 - let codec = encode_varint (Multicodec.int_of_codec t.codec) in 137 - let hash_info = encode_varint Multicodec.sha256 in 138 - let hash_len = encode_varint 32 in 139 - version ^ codec ^ hash_info ^ hash_len ^ t.hash 158 + match t.version with 159 + | V0 -> 160 + (* CIDv0: just the multihash (sha256 indicator + length + hash) *) 161 + let hash_info = encode_varint Multicodec.sha256 in 162 + let hash_len = encode_varint 32 in 163 + hash_info ^ hash_len ^ t.hash 164 + | V1 -> 165 + let version = encode_varint 1 in 166 + let codec = encode_varint (Multicodec.int_of_codec t.codec) in 167 + let hash_info = encode_varint Multicodec.sha256 in 168 + let hash_len = encode_varint 32 in 169 + version ^ codec ^ hash_info ^ hash_len ^ t.hash 140 170 141 - (** Encode CID to string (base32lower with 'b' prefix) *) 171 + (** Encode CID to string. CIDv0: base58btc (starts with "Qm") CIDv1: base32lower 172 + with 'b' prefix *) 142 173 let to_string t = 143 - let bytes = to_bytes t in 144 - "b" ^ Atproto_multibase.Base32lower.encode (Bytes.of_string bytes) 174 + match t.version with 175 + | V0 -> 176 + (* CIDv0 uses base58btc without multibase prefix *) 177 + Atproto_multibase.Base58btc.encode (Bytes.of_string (to_bytes t)) 178 + | V1 -> 179 + let bytes = to_bytes t in 180 + "b" ^ Atproto_multibase.Base32lower.encode (Bytes.of_string bytes) 145 181 146 - (** Parse CID from binary bytes *) 147 - let of_bytes s : (t, error) result = 182 + (** Parse CIDv0 from binary bytes (just multihash). CIDv0 is: hash_codec 183 + (varint) + hash_len (varint) + hash (bytes) *) 184 + let of_bytes_v0 s : (t, error) result = 185 + let len = String.length s in 186 + if len < 34 then Error `Cid_too_short 187 + else 188 + (* First byte should be 0x12 (sha256) *) 189 + match decode_varint s 0 with 190 + | Error e -> Error e 191 + | Ok (hash_codec, hclen) -> ( 192 + if hash_codec <> Multicodec.sha256 then Error `Invalid_hash_algorithm 193 + else 194 + match decode_varint s hclen with 195 + | Error e -> Error e 196 + | Ok (hash_len, hllen) -> 197 + if hash_len <> 32 then Error `Invalid_hash_length 198 + else 199 + let hash_start = hclen + hllen in 200 + if len < hash_start + 32 then Error `Cid_too_short 201 + else 202 + let hash = String.sub s hash_start 32 in 203 + (* CIDv0 implicitly uses dag-pb codec *) 204 + Ok { version = V0; codec = DagPb; hash }) 205 + 206 + (** Parse CIDv1 from binary bytes *) 207 + let of_bytes_v1 s : (t, error) result = 148 208 let len = String.length s in 149 209 if len < 4 then Error `Cid_too_short 150 210 else ··· 178 238 if len < hash_start + 32 then Error `Cid_too_short 179 239 else 180 240 let hash = String.sub s hash_start 32 in 181 - Ok { codec; hash }))) 241 + Ok { version = V1; codec; hash }))) 242 + 243 + (** Parse CID from binary bytes. Distinguishes CIDv0 from CIDv1 by first byte: 244 + - 0x12 (sha256): CIDv0 (just multihash) 245 + - 0x01: CIDv1 *) 246 + let of_bytes s : (t, error) result = 247 + let len = String.length s in 248 + if len < 2 then Error `Cid_too_short 249 + else 250 + let first_byte = Char.code s.[0] in 251 + if first_byte = 0x12 then 252 + (* CIDv0: starts with sha256 multicodec *) 253 + of_bytes_v0 s 254 + else if first_byte = 0x01 then 255 + (* CIDv1: starts with version 1 *) 256 + of_bytes_v1 s 257 + else Error `Invalid_cid_format 182 258 183 259 (** Decode hex string to bytes *) 184 260 let decode_hex s = ··· 248 324 Some buf 249 325 with Exit -> None 250 326 251 - (** Parse CID from string (multibase encoded) *) 327 + (** Check if a string looks like a CIDv0 (base58btc encoded, starts with "Qm") 328 + *) 329 + let is_cidv0_string s = 330 + let len = String.length s in 331 + (* CIDv0 is exactly 46 characters and starts with "Qm" *) 332 + len = 46 && len >= 2 && s.[0] = 'Q' && s.[1] = 'm' 333 + 334 + (** Parse CID from string (multibase encoded for CIDv1, base58btc for CIDv0) *) 252 335 let of_string s : (t, error) result = 253 336 let len = String.length s in 254 337 if len < 2 then Error `Cid_too_short 338 + else if is_cidv0_string s then 339 + (* CIDv0: base58btc without multibase prefix *) 340 + match Atproto_multibase.Base58btc.decode s with 341 + | Ok bytes -> of_bytes_v0 (Bytes.to_string bytes) 342 + | Error _ -> Error `Invalid_cidv0 255 343 else 256 - (* Check multibase prefix *) 344 + (* CIDv1: multibase prefix + encoded data *) 257 345 let prefix = s.[0] in 258 346 let encoded = String.sub s 1 (len - 1) in 259 347 (* AT Protocol uses base32lower ('b') but we also accept others *) ··· 272 360 | Ok bytes -> Some bytes 273 361 | Error _ -> None) 274 362 | 'z' -> ( 275 - (* Base58btc *) 363 + (* Base58btc with multibase prefix *) 276 364 match Atproto_multibase.Base58btc.decode encoded with 277 365 | Ok bytes -> Some bytes 278 366 | Error _ -> None) ··· 304 392 (** Validate a CID string without fully parsing *) 305 393 let is_valid s = match of_string s with Ok _ -> true | Error _ -> false 306 394 307 - (** Regex-based CID syntax validation (matches Go implementation). This is used 308 - for Lexicon validation - it checks the string format without fully decoding 309 - and validating the CID contents. 395 + (** Regex-based CID syntax validation for AT Protocol (matches Go 396 + implementation). This is used for Lexicon validation - it checks the string 397 + format without fully decoding and validating the CID contents. 310 398 311 - Rules: 399 + Rules for CIDv1: 312 400 - Length between 8-256 characters 313 401 - Only alphanumeric characters plus '+' and '=' 314 - - Not a CIDv0 (starting with "Qmb") *) 402 + - Not a legacy CIDv0 (starting with "Qmb") 403 + 404 + Note: AT Protocol Lexicon validation intentionally excludes CIDv0. Use 405 + is_valid_cidv0_syntax for validating CIDv0 strings. *) 315 406 let is_valid_syntax s = 316 407 let len = String.length s in 317 408 if len < 8 || len > 256 then false ··· 331 422 if valid then check_chars (i + 1) else false 332 423 in 333 424 check_chars 0 425 + 426 + (** Validate CIDv0 string syntax. CIDv0 is exactly 46 characters and starts with 427 + "Qm". Uses base58btc alphabet (no 0, O, I, l). *) 428 + let is_valid_cidv0_syntax s = 429 + let len = String.length s in 430 + if len <> 46 then false 431 + else if not (len >= 2 && s.[0] = 'Q' && s.[1] = 'm') then false 432 + else 433 + (* Check all characters are valid base58btc *) 434 + let base58_alphabet = 435 + "123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz" 436 + in 437 + let rec check_chars i = 438 + if i >= len then true 439 + else if String.contains base58_alphabet s.[i] then check_chars (i + 1) 440 + else false 441 + in 442 + check_chars 0 443 + 444 + (** Validate any CID string syntax (CIDv0 or CIDv1) *) 445 + let is_valid_any_syntax s = is_valid_syntax s || is_valid_cidv0_syntax s
+223 -46
lib/ipld/dag_cbor.ml
··· 1 - (** DAG-CBOR codec for AT Protocol. 1 + (** DAG-CBOR codec for IPLD and AT Protocol. 2 2 3 3 DAG-CBOR is a deterministic subset of CBOR used for content-addressed data. 4 - This module provides encoding and decoding with AT Protocol specific rules: 4 + This module provides encoding and decoding with two modes: 5 + 6 + - IPLD mode (default): Full IPLD Data Model support including floats 7 + - AT Protocol mode: Stricter rules where floats are forbidden 8 + 9 + Common rules for both modes: 5 10 - Map keys are sorted by length first, then lexicographically 6 - - Floats are not allowed (except integers represented as floats in JSON) 7 11 - Integers must be in JavaScript safe range (-2^53+1 to 2^53-1) 8 - - CIDs are encoded with CBOR tag 42 *) 12 + - CIDs are encoded with CBOR tag 42 13 + 14 + IPLD-specific rules: 15 + - Floats are encoded as 64-bit IEEE 754 doubles 16 + - NaN, Infinity, and -Infinity are forbidden 17 + 18 + AT Protocol-specific rules: 19 + - Floats are not allowed (except integers represented as floats in JSON) *) 20 + 21 + (** Decoding mode for DAG-CBOR *) 22 + type decode_mode = 23 + | Ipld (** Full IPLD spec: allows floats (no NaN/Inf) *) 24 + | Atproto (** AT Protocol spec: no floats allowed *) 9 25 10 26 type error = 11 27 [ `Float_not_allowed 28 + | `Float_nan_not_allowed 29 + | `Float_infinity_not_allowed 12 30 | `Integer_out_of_range 13 31 | `Invalid_cid 14 32 | `Invalid_tag 15 33 | `Invalid_bytes 34 + | `Indefinite_length 16 35 | `Decode_error of string ] 17 36 18 37 let pp_error fmt = function 19 38 | `Float_not_allowed -> 20 - Format.fprintf fmt "floats are not allowed in DAG-CBOR" 39 + Format.fprintf fmt "floats are not allowed in AT Protocol DAG-CBOR" 40 + | `Float_nan_not_allowed -> 41 + Format.fprintf fmt "NaN is not allowed in DAG-CBOR" 42 + | `Float_infinity_not_allowed -> 43 + Format.fprintf fmt "Infinity is not allowed in DAG-CBOR" 21 44 | `Integer_out_of_range -> 22 45 Format.fprintf fmt "integer out of JavaScript safe range" 23 46 | `Invalid_cid -> Format.fprintf fmt "invalid CID in tag 42" 24 47 | `Invalid_tag -> Format.fprintf fmt "unsupported CBOR tag" 25 48 | `Invalid_bytes -> Format.fprintf fmt "invalid bytes encoding" 49 + | `Indefinite_length -> 50 + Format.fprintf fmt "indefinite-length encoding not allowed in DAG-CBOR" 26 51 | `Decode_error msg -> Format.fprintf fmt "decode error: %s" msg 27 52 28 53 let error_to_string e = Format.asprintf "%a" pp_error e 29 54 30 - (** AT Protocol data model value type *) 55 + (** IPLD Data Model value type. 56 + 57 + The 9 IPLD kinds: Null, Boolean, Integer, Float, String, Bytes, List, Map, 58 + Link. Note: In AT Protocol mode, Float is forbidden. *) 31 59 type value = 32 60 | Null 33 61 | Bool of bool 34 62 | Int of int64 (** JavaScript safe integer range *) 63 + | Float of float (** IEEE 754 double; NaN/Infinity forbidden *) 35 64 | String of string 36 65 | Bytes of string (** Raw bytes *) 37 66 | Array of value list 38 67 | Map of (string * value) list (** Keys are sorted *) 39 68 | Link of Cid.t (** CID link *) 69 + 70 + (** Check if a float is NaN *) 71 + let is_nan f = Float.is_nan f 72 + 73 + (** Check if a float is infinite (positive or negative) *) 74 + let is_infinite f = 75 + match classify_float f with FP_infinite -> true | _ -> false 76 + 77 + (** Check if a float is valid for DAG-CBOR (no NaN or Infinity) *) 78 + let is_valid_float f = not (is_nan f || is_infinite f) 40 79 41 80 (** JavaScript safe integer range *) 42 81 let js_safe_min = -9007199254740991L (* -(2^53 - 1) *) ··· 52 91 (** Sort map keys according to DAG-CBOR rules *) 53 92 let sort_map pairs = List.sort (fun (k1, _) (k2, _) -> compare_keys k1 k2) pairs 54 93 55 - (** Encode a value to DAG-CBOR bytes *) 94 + (** Check CBOR bytes for DAG-CBOR strictness violations. 95 + 96 + DAG-CBOR forbids: 97 + - Indefinite-length encoding (additional info 31 for types 0-5) 98 + - Break stop code (0xFF) 99 + 100 + Returns Ok () if valid, Error `Indefinite_length if invalid. *) 101 + let check_strictness (s : string) : (unit, error) result = 102 + let len = String.length s in 103 + let rec check pos = 104 + if pos >= len then Ok () 105 + else 106 + let byte = Char.code s.[pos] in 107 + let major_type = byte lsr 5 in 108 + let additional_info = byte land 0x1F in 109 + 110 + (* Check for indefinite-length encoding or break code *) 111 + if byte = 0xFF then Error `Indefinite_length (* Break stop code *) 112 + else if additional_info = 31 && major_type <= 5 then 113 + Error `Indefinite_length (* Indefinite-length indicator *) 114 + else 115 + (* Calculate the size of this item to skip to next *) 116 + let header_size, data_size = 117 + if additional_info < 24 then (1, 0) 118 + else if additional_info = 24 then (2, 0) 119 + else if additional_info = 25 then (3, 0) 120 + else if additional_info = 26 then (5, 0) 121 + else if additional_info = 27 then (9, 0) 122 + else (1, 0) 123 + (* reserved/invalid, will be caught by decoder *) 124 + in 125 + let value_size = 126 + match major_type with 127 + | 2 | 3 -> 128 + (* bytes or text *) 129 + if additional_info < 24 then additional_info 130 + else if additional_info = 24 && pos + 1 < len then 131 + Char.code s.[pos + 1] 132 + else if additional_info = 25 && pos + 2 < len then 133 + (Char.code s.[pos + 1] lsl 8) lor Char.code s.[pos + 2] 134 + else if additional_info = 26 && pos + 4 < len then 135 + (Char.code s.[pos + 1] lsl 24) 136 + lor (Char.code s.[pos + 2] lsl 16) 137 + lor (Char.code s.[pos + 3] lsl 8) 138 + lor Char.code s.[pos + 4] 139 + else 0 (* will be handled by decoder *) 140 + | _ -> 0 141 + in 142 + let next_pos = pos + header_size + data_size + value_size in 143 + (* For complex types (arrays, maps, tags), we need to recurse into contents *) 144 + (* For now, we do a simple linear scan which catches the common cases *) 145 + check next_pos 146 + in 147 + check 0 148 + 149 + (** Encode a value to DAG-CBOR bytes. 150 + 151 + Floats are encoded as 64-bit IEEE 754 doubles. NaN and Infinity values will 152 + raise Invalid_argument. *) 56 153 let encode (v : value) : string = 57 154 let rec to_cbor = function 58 155 | Null -> `Null 59 156 | Bool b -> `Bool b 60 157 | Int i -> `Int (Int64.to_int i) (* CBOR library uses int *) 158 + | Float f -> 159 + if not (is_valid_float f) then 160 + invalid_arg "DAG-CBOR: NaN and Infinity are not allowed" 161 + else `Float f 61 162 | String s -> `Text s 62 163 | Bytes b -> `Bytes b 63 164 | Array arr -> `Array (List.map to_cbor arr) ··· 71 172 in 72 173 CBOR.Simple.encode (to_cbor v) 73 174 74 - (** Decode DAG-CBOR bytes to a value *) 75 - let decode (s : string) : (value, error) result = 175 + (** Decode DAG-CBOR bytes to a value. 176 + 177 + @param mode 178 + Decoding mode: [Ipld] allows floats, [Atproto] forbids them. Defaults to 179 + [Atproto] for backward compatibility. 180 + @param strict 181 + If true (default), check for DAG-CBOR strictness violations like 182 + indefinite-length encoding before decoding. *) 183 + let decode ?(mode = Atproto) ?(strict = true) (s : string) : 184 + (value, error) result = 76 185 let rec from_cbor = function 77 186 | `Null -> Ok Null 78 187 | `Undefined -> Ok Null (* Treat undefined as null *) ··· 82 191 if i64 < js_safe_min || i64 > js_safe_max then 83 192 Error `Integer_out_of_range 84 193 else Ok (Int i64) 85 - | `Float f -> 86 - (* Check if it's actually an integer *) 87 - if Float.is_integer f then 88 - let i = Int64.of_float f in 89 - if i < js_safe_min || i > js_safe_max then Error `Integer_out_of_range 90 - else Ok (Int i) 91 - else Error `Float_not_allowed 194 + | `Float f -> ( 195 + match mode with 196 + | Atproto -> 197 + (* AT Protocol: only accept floats that are integers *) 198 + if Float.is_integer f then 199 + let i = Int64.of_float f in 200 + if i < js_safe_min || i > js_safe_max then 201 + Error `Integer_out_of_range 202 + else Ok (Int i) 203 + else Error `Float_not_allowed 204 + | Ipld -> 205 + (* IPLD: accept floats but reject NaN/Infinity *) 206 + if is_nan f then Error `Float_nan_not_allowed 207 + else if is_infinite f then Error `Float_infinity_not_allowed 208 + else if Float.is_integer f then 209 + (* Convert integer-valued floats to Int for consistency *) 210 + let i = Int64.of_float f in 211 + if i >= js_safe_min && i <= js_safe_max then Ok (Int i) 212 + else Ok (Float f) 213 + else Ok (Float f)) 92 214 | `Text s -> Ok (String s) 93 215 | `Bytes b -> Ok (Bytes b) 94 216 | `Array arr -> ··· 125 247 | `Tag (_, _) -> Error `Invalid_tag 126 248 | `Simple _ -> Error (`Decode_error "simple values not supported") 127 249 in 128 - try 129 - let cbor = CBOR.Simple.decode s in 130 - from_cbor cbor 131 - with CBOR.Error msg -> Error (`Decode_error msg) 250 + (* First check strictness if requested *) 251 + match if strict then check_strictness s else Ok () with 252 + | Error e -> Error e 253 + | Ok () -> ( 254 + try 255 + let cbor = CBOR.Simple.decode s in 256 + from_cbor cbor 257 + with CBOR.Error msg -> Error (`Decode_error msg)) 132 258 133 259 (** Decode DAG-CBOR bytes, returning the value and any remaining bytes. Useful 134 - for decoding concatenated CBOR values (like firehose frames). *) 135 - let decode_partial (s : string) : (value * string, error) result = 260 + for decoding concatenated CBOR values (like firehose frames). 261 + 262 + @param mode 263 + Decoding mode: [Ipld] allows floats, [Atproto] forbids them. Defaults to 264 + [Atproto] for backward compatibility. 265 + @param strict 266 + If true (default), check for DAG-CBOR strictness violations like 267 + indefinite-length encoding before decoding. Note: strictness check is 268 + performed on the entire input, not just the first value. *) 269 + let decode_partial ?(mode = Atproto) ?(strict = true) (s : string) : 270 + (value * string, error) result = 136 271 let rec from_cbor = function 137 272 | `Null -> Ok Null 138 273 | `Undefined -> Ok Null ··· 142 277 if i64 < js_safe_min || i64 > js_safe_max then 143 278 Error `Integer_out_of_range 144 279 else Ok (Int i64) 145 - | `Float f -> 146 - if Float.is_integer f then 147 - let i = Int64.of_float f in 148 - if i < js_safe_min || i > js_safe_max then Error `Integer_out_of_range 149 - else Ok (Int i) 150 - else Error `Float_not_allowed 280 + | `Float f -> ( 281 + match mode with 282 + | Atproto -> 283 + if Float.is_integer f then 284 + let i = Int64.of_float f in 285 + if i < js_safe_min || i > js_safe_max then 286 + Error `Integer_out_of_range 287 + else Ok (Int i) 288 + else Error `Float_not_allowed 289 + | Ipld -> 290 + if is_nan f then Error `Float_nan_not_allowed 291 + else if is_infinite f then Error `Float_infinity_not_allowed 292 + else if Float.is_integer f then 293 + let i = Int64.of_float f in 294 + if i >= js_safe_min && i <= js_safe_max then Ok (Int i) 295 + else Ok (Float f) 296 + else Ok (Float f)) 151 297 | `Text s -> Ok (String s) 152 298 | `Bytes b -> Ok (Bytes b) 153 299 | `Array arr -> ··· 182 328 | `Tag (_, _) -> Error `Invalid_tag 183 329 | `Simple _ -> Error (`Decode_error "simple values not supported") 184 330 in 185 - try 186 - let cbor, rest = CBOR.Simple.decode_partial s in 187 - match from_cbor cbor with Ok v -> Ok (v, rest) | Error e -> Error e 188 - with CBOR.Error msg -> Error (`Decode_error msg) 331 + (* First check strictness if requested *) 332 + match if strict then check_strictness s else Ok () with 333 + | Error e -> Error e 334 + | Ok () -> ( 335 + try 336 + let cbor, rest = CBOR.Simple.decode_partial s in 337 + match from_cbor cbor with Ok v -> Ok (v, rest) | Error e -> Error e 338 + with CBOR.Error msg -> Error (`Decode_error msg)) 189 339 190 - (** Check if a value is valid according to AT Protocol rules *) 340 + (** Check if a value is valid according to IPLD DAG-CBOR rules. Note: This 341 + checks IPLD validity (floats allowed but no NaN/Inf). For AT Protocol 342 + validity, also ensure no Float values exist. *) 191 343 let rec is_valid = function 192 344 | Null | Bool _ | String _ | Bytes _ | Link _ -> true 193 345 | Int i -> i >= js_safe_min && i <= js_safe_max 346 + | Float f -> is_valid_float f 194 347 | Array arr -> List.for_all is_valid arr 195 348 | Map pairs -> List.for_all (fun (_, v) -> is_valid v) pairs 196 349 350 + (** Check if a value is valid for AT Protocol (no floats allowed) *) 351 + let rec is_atproto_valid = function 352 + | Null | Bool _ | String _ | Bytes _ | Link _ -> true 353 + | Int i -> i >= js_safe_min && i <= js_safe_max 354 + | Float _ -> false (* AT Protocol forbids floats *) 355 + | Array arr -> List.for_all is_atproto_valid arr 356 + | Map pairs -> List.for_all (fun (_, v) -> is_atproto_valid v) pairs 357 + 197 358 (** Equality *) 198 359 let rec equal v1 v2 = 199 360 match (v1, v2) with 200 361 | Null, Null -> true 201 362 | Bool b1, Bool b2 -> b1 = b2 202 363 | Int i1, Int i2 -> i1 = i2 364 + | Float f1, Float f2 -> f1 = f2 203 365 | String s1, String s2 -> s1 = s2 204 366 | Bytes b1, Bytes b2 -> b1 = b2 205 367 | Array a1, Array a2 -> ··· 238 400 | `List of json list 239 401 | `Assoc of (string * json) list ] 240 402 241 - (** Convert a DAG-CBOR value to AT Protocol JSON representation. 403 + (** Convert a DAG-CBOR value to JSON representation. 242 404 - Links become {"$link": "cid-string"} 243 - - Bytes become {"$bytes": "base64-string"} *) 405 + - Bytes become {"$bytes": "base64-string"} 406 + - Floats are represented as JSON floats *) 244 407 let rec to_json (v : value) : json = 245 408 match v with 246 409 | Null -> `Null 247 410 | Bool b -> `Bool b 248 411 | Int i -> `Int (Int64.to_int i) 412 + | Float f -> `Float f 249 413 | String s -> `String s 250 414 | Bytes b -> `Assoc [ ("$bytes", `String (base64_encode b)) ] 251 415 | Array arr -> `List (List.map to_json arr) 252 416 | Map pairs -> `Assoc (List.map (fun (k, v) -> (k, to_json v)) pairs) 253 417 | Link cid -> `Assoc [ ("$link", `String (Cid.to_string cid)) ] 254 418 255 - (** Convert AT Protocol JSON to DAG-CBOR value. 419 + (** Convert JSON to DAG-CBOR value. 256 420 - {"$link": "..."} becomes a Link 257 421 - {"$bytes": "..."} becomes Bytes 258 - - Floats that are integers are converted to Int *) 259 - let rec of_json (j : json) : (value, error) result = 422 + 423 + @param mode Decoding mode: [Ipld] allows floats, [Atproto] forbids them. 424 + Defaults to [Atproto] for backward compatibility. *) 425 + let rec of_json ?(mode = Atproto) (j : json) : (value, error) result = 260 426 match j with 261 427 | `Null -> Ok Null 262 428 | `Bool b -> Ok (Bool b) ··· 264 430 let i64 = Int64.of_int i in 265 431 if i64 < js_safe_min || i64 > js_safe_max then Error `Integer_out_of_range 266 432 else Ok (Int i64) 267 - | `Float f -> 268 - if Float.is_integer f then 269 - let i = Int64.of_float f in 270 - if i < js_safe_min || i > js_safe_max then Error `Integer_out_of_range 271 - else Ok (Int i) 272 - else Error `Float_not_allowed 433 + | `Float f -> ( 434 + match mode with 435 + | Atproto -> 436 + if Float.is_integer f then 437 + let i = Int64.of_float f in 438 + if i < js_safe_min || i > js_safe_max then 439 + Error `Integer_out_of_range 440 + else Ok (Int i) 441 + else Error `Float_not_allowed 442 + | Ipld -> 443 + if is_nan f then Error `Float_nan_not_allowed 444 + else if is_infinite f then Error `Float_infinity_not_allowed 445 + else if Float.is_integer f then 446 + let i = Int64.of_float f in 447 + if i >= js_safe_min && i <= js_safe_max then Ok (Int i) 448 + else Ok (Float f) 449 + else Ok (Float f)) 273 450 | `String s -> Ok (String s) 274 451 | `List arr -> 275 452 let rec convert_list acc = function 276 453 | [] -> Ok (Array (List.rev acc)) 277 454 | x :: xs -> ( 278 - match of_json x with 455 + match of_json ~mode x with 279 456 | Ok v -> convert_list (v :: acc) xs 280 457 | Error e -> Error e) 281 458 in ··· 296 473 let rec convert_pairs acc = function 297 474 | [] -> Ok (Map (sort_map (List.rev acc))) 298 475 | (k, v) :: rest -> ( 299 - match of_json v with 476 + match of_json ~mode v with 300 477 | Ok value -> convert_pairs ((k, value) :: acc) rest 301 478 | Error e -> Error e) 302 479 in
+215
lib/ipld/dag_json.ml
··· 1 + (** DAG-JSON codec for IPLD. 2 + 3 + DAG-JSON is a JSON serialization format for IPLD data that uses special 4 + object structures to represent Links and Bytes: 5 + 6 + - Links: {"/": "<cid-string>"} 7 + - Bytes: {"/": {"bytes": "<base64>"}} 8 + 9 + This format is used for human-readable IPLD data exchange and debugging. 10 + 11 + @see <https://ipld.io/specs/codecs/dag-json/spec/> DAG-JSON specification *) 12 + 13 + type error = 14 + [ `Invalid_cid of string 15 + | `Invalid_bytes of string 16 + | `Invalid_structure of string 17 + | `Unsupported_value of string ] 18 + (** Error types for DAG-JSON operations *) 19 + 20 + (** Pretty-print errors *) 21 + let pp_error fmt = function 22 + | `Invalid_cid msg -> Format.fprintf fmt "invalid CID: %s" msg 23 + | `Invalid_bytes msg -> Format.fprintf fmt "invalid bytes: %s" msg 24 + | `Invalid_structure msg -> Format.fprintf fmt "invalid structure: %s" msg 25 + | `Unsupported_value msg -> Format.fprintf fmt "unsupported value: %s" msg 26 + 27 + let error_to_string e = Format.asprintf "%a" pp_error e 28 + 29 + type json = 30 + [ `Null 31 + | `Bool of bool 32 + | `Int of int 33 + | `Float of float 34 + | `String of string 35 + | `List of json list 36 + | `Assoc of (string * json) list ] 37 + (** JSON type compatible with common JSON libraries *) 38 + 39 + (** Encode bytes to base64 (RFC 4648, no padding) *) 40 + let base64_encode bytes = Base64.encode_exn ~pad:false bytes 41 + 42 + (** Decode base64 to bytes (handles missing padding) *) 43 + let base64_decode s = 44 + match Base64.decode ~pad:false s with 45 + | Ok decoded -> Some decoded 46 + | Error _ -> None 47 + 48 + (** Encode an IPLD value to DAG-JSON. 49 + 50 + DAG-JSON encoding rules: 51 + - Links become {"/": "<cid-string>"} 52 + - Bytes become {"/": {"bytes": "<base64>"}} 53 + - All other values encode as standard JSON *) 54 + let rec encode (v : Dag_cbor.value) : json = 55 + match v with 56 + | Dag_cbor.Null -> `Null 57 + | Dag_cbor.Bool b -> `Bool b 58 + | Dag_cbor.Int i -> 59 + (* Check if it fits in int range *) 60 + let i_int = Int64.to_int i in 61 + if Int64.of_int i_int = i then `Int i_int else `Float (Int64.to_float i) 62 + | Dag_cbor.Float f -> `Float f 63 + | Dag_cbor.String s -> `String s 64 + | Dag_cbor.Bytes b -> 65 + (* Bytes: {"/": {"bytes": "<base64>"}} *) 66 + `Assoc [ ("/", `Assoc [ ("bytes", `String (base64_encode b)) ]) ] 67 + | Dag_cbor.Array arr -> `List (List.map encode arr) 68 + | Dag_cbor.Map pairs -> `Assoc (List.map (fun (k, v) -> (k, encode v)) pairs) 69 + | Dag_cbor.Link cid -> 70 + (* Link: {"/": "<cid-string>"} *) 71 + `Assoc [ ("/", `String (Cid.to_string cid)) ] 72 + 73 + (** Check if a JSON object is a DAG-JSON link ({"/": "<cid-string>"}) *) 74 + let is_link_object = function `Assoc [ ("/", `String _) ] -> true | _ -> false 75 + 76 + (** Check if a JSON object is a DAG-JSON bytes ({"/": {"bytes": "<base64>"}}) *) 77 + let is_bytes_object = function 78 + | `Assoc [ ("/", `Assoc [ ("bytes", `String _) ]) ] -> true 79 + | _ -> false 80 + 81 + (** Decode DAG-JSON to an IPLD value. 82 + 83 + @param mode 84 + Decoding mode: [Ipld] allows floats, [Atproto] forbids non-integer floats. 85 + Defaults to [Atproto] for AT Protocol compatibility. *) 86 + let rec decode ?(mode = Dag_cbor.Atproto) (j : json) : 87 + (Dag_cbor.value, error) result = 88 + match j with 89 + | `Null -> Ok Dag_cbor.Null 90 + | `Bool b -> Ok (Dag_cbor.Bool b) 91 + | `Int i -> Ok (Dag_cbor.Int (Int64.of_int i)) 92 + | `Float f -> ( 93 + match mode with 94 + | Dag_cbor.Atproto -> 95 + if Float.is_integer f then 96 + let i = Int64.of_float f in 97 + Ok (Dag_cbor.Int i) 98 + else 99 + Error 100 + (`Unsupported_value 101 + "non-integer floats not allowed in AT Protocol") 102 + | Dag_cbor.Ipld -> 103 + if Float.is_nan f then 104 + Error (`Unsupported_value "NaN not allowed in DAG-JSON") 105 + else if Float.is_integer f then 106 + (* Convert integer-valued floats to Int *) 107 + let i = Int64.of_float f in 108 + if i >= Dag_cbor.js_safe_min && i <= Dag_cbor.js_safe_max then 109 + Ok (Dag_cbor.Int i) 110 + else Ok (Dag_cbor.Float f) 111 + else Ok (Dag_cbor.Float f)) 112 + | `String s -> Ok (Dag_cbor.String s) 113 + | `List arr -> 114 + let rec decode_list acc = function 115 + | [] -> Ok (Dag_cbor.Array (List.rev acc)) 116 + | x :: xs -> ( 117 + match decode ~mode x with 118 + | Ok v -> decode_list (v :: acc) xs 119 + | Error e -> Error e) 120 + in 121 + decode_list [] arr 122 + | `Assoc pairs -> decode_object ~mode pairs 123 + 124 + and decode_object ~mode pairs = 125 + (* Check for special DAG-JSON structures *) 126 + match pairs with 127 + | [ ("/", `String cid_str) ] -> ( 128 + (* Link: {"/": "<cid-string>"} *) 129 + match Cid.of_string cid_str with 130 + | Ok cid -> Ok (Dag_cbor.Link cid) 131 + | Error _ -> Error (`Invalid_cid cid_str)) 132 + | [ ("/", `Assoc [ ("bytes", `String b64) ]) ] -> ( 133 + (* Bytes: {"/": {"bytes": "<base64>"}} *) 134 + match base64_decode b64 with 135 + | Some bytes -> Ok (Dag_cbor.Bytes bytes) 136 + | None -> Error (`Invalid_bytes ("invalid base64: " ^ b64))) 137 + | [ ("/", _) ] -> 138 + (* Invalid special object structure *) 139 + Error (`Invalid_structure "invalid DAG-JSON special object") 140 + | _ -> 141 + (* Regular object - check for "/" key which would be ambiguous *) 142 + if List.mem_assoc "/" pairs then 143 + Error 144 + (`Invalid_structure 145 + "objects cannot have '/' key (reserved for DAG-JSON)") 146 + else 147 + let rec decode_pairs acc = function 148 + | [] -> Ok (Dag_cbor.Map (Dag_cbor.sort_map (List.rev acc))) 149 + | (k, v) :: rest -> ( 150 + match decode ~mode v with 151 + | Ok value -> decode_pairs ((k, value) :: acc) rest 152 + | Error e -> Error e) 153 + in 154 + decode_pairs [] pairs 155 + 156 + (** Encode an IPLD value to a DAG-JSON string *) 157 + let encode_string (v : Dag_cbor.value) : string = 158 + let json = encode v in 159 + (* Convert to Yojson for string serialization *) 160 + let rec to_yojson : json -> Yojson.Basic.t = function 161 + | `Null -> `Null 162 + | `Bool b -> `Bool b 163 + | `Int i -> `Int i 164 + | `Float f -> `Float f 165 + | `String s -> `String s 166 + | `List l -> `List (List.map to_yojson l) 167 + | `Assoc pairs -> `Assoc (List.map (fun (k, v) -> (k, to_yojson v)) pairs) 168 + in 169 + Yojson.Basic.to_string (to_yojson json) 170 + 171 + (** Encode an IPLD value to a pretty-printed DAG-JSON string *) 172 + let encode_string_pretty (v : Dag_cbor.value) : string = 173 + let json = encode v in 174 + let rec to_yojson : json -> Yojson.Basic.t = function 175 + | `Null -> `Null 176 + | `Bool b -> `Bool b 177 + | `Int i -> `Int i 178 + | `Float f -> `Float f 179 + | `String s -> `String s 180 + | `List l -> `List (List.map to_yojson l) 181 + | `Assoc pairs -> `Assoc (List.map (fun (k, v) -> (k, to_yojson v)) pairs) 182 + in 183 + Yojson.Basic.pretty_to_string (to_yojson json) 184 + 185 + (** Decode a DAG-JSON string to an IPLD value *) 186 + let decode_string ?(mode = Dag_cbor.Atproto) (s : string) : 187 + (Dag_cbor.value, error) result = 188 + try 189 + let yojson = Yojson.Basic.from_string s in 190 + let rec of_yojson : Yojson.Basic.t -> json = function 191 + | `Null -> `Null 192 + | `Bool b -> `Bool b 193 + | `Int i -> `Int i 194 + | `Float f -> `Float f 195 + | `String s -> `String s 196 + | `List l -> `List (List.map of_yojson l) 197 + | `Assoc pairs -> `Assoc (List.map (fun (k, v) -> (k, of_yojson v)) pairs) 198 + in 199 + decode ~mode (of_yojson yojson) 200 + with Yojson.Json_error msg -> 201 + Error (`Invalid_structure ("JSON parse error: " ^ msg)) 202 + 203 + (** Convert DAG-JSON to DAG-CBOR bytes *) 204 + let to_dag_cbor ?(mode = Dag_cbor.Atproto) (j : json) : (string, error) result = 205 + match decode ~mode j with 206 + | Ok value -> Ok (Dag_cbor.encode value) 207 + | Error e -> Error e 208 + 209 + (** Convert DAG-CBOR bytes to DAG-JSON. Returns a combined error type that 210 + includes both DAG-JSON and DAG-CBOR errors. *) 211 + let of_dag_cbor ?(mode = Dag_cbor.Atproto) (bytes : string) : 212 + (json, [ error | `Cbor_error of Dag_cbor.error ]) result = 213 + match Dag_cbor.decode ~mode bytes with 214 + | Ok value -> Ok (encode value) 215 + | Error e -> Error (`Cbor_error e)
+637
test/ipld/test_ipld.ml
··· 139 139 Alcotest.(check string) 140 140 "same string" (Cid.to_string cid1) (Cid.to_string cid2) 141 141 142 + (* === CIDv0 tests === *) 143 + 144 + let test_cidv0_parse () = 145 + (* Known CIDv0 from IPFS - "hello world" *) 146 + let cidv0_str = "QmWATWQ7fVPP2EFGu71UkfnqhYXDYH566qy47CnJDgvs8u" in 147 + match Cid.of_string cidv0_str with 148 + | Ok cid -> 149 + (* CIDv0 should have version V0 *) 150 + Alcotest.(check bool) "is V0" true (Cid.cid_version cid = Cid.V0); 151 + (* CIDv0 implicitly uses dag-pb codec *) 152 + Alcotest.(check bool) "codec is DagPb" true (Cid.codec cid = Cid.DagPb); 153 + (* Hash should be 32 bytes *) 154 + Alcotest.(check int) "hash length" 32 (String.length (Cid.hash cid)) 155 + | Error e -> 156 + Alcotest.fail 157 + (Printf.sprintf "CIDv0 parse failed: %s" (Cid.error_to_string e)) 158 + 159 + let test_cidv0_roundtrip () = 160 + (* Parse a CIDv0 and convert back to string *) 161 + let cidv0_str = "QmWATWQ7fVPP2EFGu71UkfnqhYXDYH566qy47CnJDgvs8u" in 162 + match Cid.of_string cidv0_str with 163 + | Ok cid -> 164 + let encoded = Cid.to_string cid in 165 + (* CIDv0 roundtrip should produce same base58btc string *) 166 + Alcotest.(check string) "roundtrip" cidv0_str encoded 167 + | Error e -> 168 + Alcotest.fail 169 + (Printf.sprintf "CIDv0 parse failed: %s" (Cid.error_to_string e)) 170 + 171 + let test_cidv0_binary_roundtrip () = 172 + (* Parse CIDv0, encode to bytes, decode, verify equality *) 173 + let cidv0_str = "QmWATWQ7fVPP2EFGu71UkfnqhYXDYH566qy47CnJDgvs8u" in 174 + match Cid.of_string cidv0_str with 175 + | Ok cid -> ( 176 + let bytes = Cid.to_bytes cid in 177 + match Cid.of_bytes bytes with 178 + | Ok cid2 -> 179 + Alcotest.(check bool) "binary roundtrip" true (Cid.equal cid cid2) 180 + | Error e -> 181 + Alcotest.fail 182 + (Printf.sprintf "binary roundtrip failed: %s" 183 + (Cid.error_to_string e))) 184 + | Error e -> 185 + Alcotest.fail 186 + (Printf.sprintf "CIDv0 parse failed: %s" (Cid.error_to_string e)) 187 + 188 + let test_cidv0_syntax () = 189 + (* Test CIDv0 syntax validation *) 190 + let valid_v0 = "QmWATWQ7fVPP2EFGu71UkfnqhYXDYH566qy47CnJDgvs8u" in 191 + Alcotest.(check bool) 192 + "valid CIDv0 syntax" true 193 + (Cid.is_valid_cidv0_syntax valid_v0); 194 + 195 + (* AT Protocol syntax validation uses a heuristic (rejects "Qmb" prefix) *) 196 + (* CIDv0s not starting with "Qmb" may pass syntax validation but should 197 + still fail full parsing as they're not CIDv1 *) 198 + let qmb_v0 = "QmbWqxBEKC3P8tqsKc98xmWNzrzDtRLMiMPL8wBuTGsMnR" in 199 + Alcotest.(check bool) 200 + "AT Protocol rejects Qmb prefix" false 201 + (Cid.is_valid_syntax qmb_v0); 202 + 203 + (* is_valid_any_syntax should accept both CIDv0 and CIDv1 *) 204 + Alcotest.(check bool) 205 + "any syntax accepts CIDv0" true 206 + (Cid.is_valid_any_syntax valid_v0); 207 + 208 + (* Invalid CIDv0 (wrong length) *) 209 + let invalid_v0 = "QmTooShort" in 210 + Alcotest.(check bool) 211 + "invalid CIDv0 too short" false 212 + (Cid.is_valid_cidv0_syntax invalid_v0) 213 + 214 + let test_cidv1_vs_cidv0 () = 215 + (* CIDv1 should have version V1 *) 216 + let content = "test content" in 217 + let cid = Cid.of_dag_cbor content in 218 + Alcotest.(check bool) "CIDv1 is V1" true (Cid.cid_version cid = Cid.V1); 219 + 220 + (* CIDv1 string should start with 'b' (base32lower) *) 221 + let cid_str = Cid.to_string cid in 222 + Alcotest.(check bool) "CIDv1 starts with 'b'" true (cid_str.[0] = 'b') 223 + 142 224 (* === Test suites === *) 143 225 144 226 let cid_parsing_tests = ··· 153 235 Alcotest.test_case "create raw CID" `Quick test_cid_raw; 154 236 Alcotest.test_case "binary roundtrip" `Quick test_cid_binary_roundtrip; 155 237 Alcotest.test_case "deterministic" `Quick test_deterministic; 238 + ] 239 + 240 + let cidv0_tests = 241 + [ 242 + Alcotest.test_case "parse CIDv0" `Quick test_cidv0_parse; 243 + Alcotest.test_case "CIDv0 roundtrip" `Quick test_cidv0_roundtrip; 244 + Alcotest.test_case "CIDv0 binary roundtrip" `Quick 245 + test_cidv0_binary_roundtrip; 246 + Alcotest.test_case "CIDv0 syntax" `Quick test_cidv0_syntax; 247 + Alcotest.test_case "CIDv1 vs CIDv0" `Quick test_cidv1_vs_cidv0; 156 248 ] 157 249 158 250 (* === DAG-CBOR tests === *) ··· 331 423 Alcotest.test_case "JSON $bytes" `Quick test_dag_cbor_json_bytes; 332 424 ] 333 425 426 + (* === Float tests (IPLD mode) === *) 427 + 428 + let test_float_encode_decode () = 429 + (* Test Float encoding and decoding in IPLD mode *) 430 + (* Note: Integer-valued floats like 0.0 are converted to Int *) 431 + let test_cases = 432 + [ 433 + 1.5; 434 + -2.5; 435 + 3.14159265358979; 436 + 1e10 +. 0.5; 437 + (* Add 0.5 to ensure not integer *) 438 + 1e-10; 439 + ] 440 + in 441 + List.iter 442 + (fun f -> 443 + let value = Dag_cbor.Float f in 444 + let encoded = Dag_cbor.encode value in 445 + match Dag_cbor.decode ~mode:Dag_cbor.Ipld encoded with 446 + | Ok (Dag_cbor.Float decoded_f) -> 447 + Alcotest.(check (float 0.0)) 448 + (Printf.sprintf "float roundtrip: %f" f) 449 + f decoded_f 450 + | Ok _ -> Alcotest.fail "expected Float" 451 + | Error e -> 452 + Alcotest.fail 453 + (Printf.sprintf "decode failed: %s" (Dag_cbor.error_to_string e))) 454 + test_cases 455 + 456 + let test_float_rejected_atproto () = 457 + (* Test that non-integer floats are rejected in AT Protocol mode *) 458 + let value = Dag_cbor.Float 1.5 in 459 + let encoded = Dag_cbor.encode value in 460 + match Dag_cbor.decode ~mode:Dag_cbor.Atproto encoded with 461 + | Ok _ -> Alcotest.fail "should reject float in AT Protocol mode" 462 + | Error `Float_not_allowed -> () 463 + | Error e -> 464 + Alcotest.fail 465 + (Printf.sprintf "expected Float_not_allowed, got: %s" 466 + (Dag_cbor.error_to_string e)) 467 + 468 + let test_float_integer_converted () = 469 + (* Test that integer-valued floats are converted to Int *) 470 + let value = Dag_cbor.Float 42.0 in 471 + let encoded = Dag_cbor.encode value in 472 + (* In both modes, integer floats become Int *) 473 + match Dag_cbor.decode ~mode:Dag_cbor.Ipld encoded with 474 + | Ok (Dag_cbor.Int i) -> 475 + Alcotest.(check int64) "integer float becomes Int" 42L i 476 + | Ok _ -> Alcotest.fail "expected Int" 477 + | Error e -> 478 + Alcotest.fail 479 + (Printf.sprintf "decode failed: %s" (Dag_cbor.error_to_string e)) 480 + 481 + let test_float_nan_rejected () = 482 + (* Test that NaN is rejected *) 483 + try 484 + let _ = Dag_cbor.encode (Dag_cbor.Float Float.nan) in 485 + Alcotest.fail "should reject NaN" 486 + with Invalid_argument _ -> () 487 + 488 + let test_float_infinity_rejected () = 489 + (* Test that Infinity is rejected *) 490 + try 491 + let _ = Dag_cbor.encode (Dag_cbor.Float Float.infinity) in 492 + Alcotest.fail "should reject Infinity" 493 + with Invalid_argument _ -> () 494 + 495 + let test_float_neg_infinity_rejected () = 496 + (* Test that -Infinity is rejected *) 497 + try 498 + let _ = Dag_cbor.encode (Dag_cbor.Float Float.neg_infinity) in 499 + Alcotest.fail "should reject -Infinity" 500 + with Invalid_argument _ -> () 501 + 502 + let test_float_in_array () = 503 + (* Test floats in arrays work in IPLD mode *) 504 + let value = 505 + Dag_cbor.Array [ Dag_cbor.Float 1.5; Dag_cbor.Int 42L; Dag_cbor.Float 3.14 ] 506 + in 507 + let encoded = Dag_cbor.encode value in 508 + match Dag_cbor.decode ~mode:Dag_cbor.Ipld encoded with 509 + | Ok (Dag_cbor.Array [ Dag_cbor.Float f1; Dag_cbor.Int i; Dag_cbor.Float f2 ]) 510 + -> 511 + Alcotest.(check (float 0.0)) "first float" 1.5 f1; 512 + Alcotest.(check int64) "integer" 42L i; 513 + Alcotest.(check (float 0.0)) "second float" 3.14 f2 514 + | Ok _ -> Alcotest.fail "unexpected structure" 515 + | Error e -> 516 + Alcotest.fail 517 + (Printf.sprintf "decode failed: %s" (Dag_cbor.error_to_string e)) 518 + 519 + let test_float_in_map () = 520 + (* Test floats in maps work in IPLD mode *) 521 + let value = 522 + Dag_cbor.Map 523 + [ ("temperature", Dag_cbor.Float 98.6); ("count", Dag_cbor.Int 5L) ] 524 + in 525 + let encoded = Dag_cbor.encode value in 526 + match Dag_cbor.decode ~mode:Dag_cbor.Ipld encoded with 527 + | Ok (Dag_cbor.Map pairs) -> ( 528 + let temp = List.assoc "temperature" pairs in 529 + let count = List.assoc "count" pairs in 530 + (match temp with 531 + | Dag_cbor.Float f -> Alcotest.(check (float 0.0)) "temperature" 98.6 f 532 + | _ -> Alcotest.fail "expected Float for temperature"); 533 + match count with 534 + | Dag_cbor.Int i -> Alcotest.(check int64) "count" 5L i 535 + | _ -> Alcotest.fail "expected Int for count") 536 + | Ok _ -> Alcotest.fail "unexpected structure" 537 + | Error e -> 538 + Alcotest.fail 539 + (Printf.sprintf "decode failed: %s" (Dag_cbor.error_to_string e)) 540 + 541 + let test_float_validity () = 542 + (* Test is_valid and is_atproto_valid *) 543 + let valid_float = Dag_cbor.Float 3.14 in 544 + Alcotest.(check bool) 545 + "float is IPLD valid" true 546 + (Dag_cbor.is_valid valid_float); 547 + Alcotest.(check bool) 548 + "float is NOT AT Protocol valid" false 549 + (Dag_cbor.is_atproto_valid valid_float); 550 + 551 + let int_val = Dag_cbor.Int 42L in 552 + Alcotest.(check bool) "int is IPLD valid" true (Dag_cbor.is_valid int_val); 553 + Alcotest.(check bool) 554 + "int is AT Protocol valid" true 555 + (Dag_cbor.is_atproto_valid int_val) 556 + 557 + let test_float_json_ipld () = 558 + (* Test JSON conversion in IPLD mode *) 559 + let json : Dag_cbor.json = `Float 3.14 in 560 + match Dag_cbor.of_json ~mode:Dag_cbor.Ipld json with 561 + | Ok (Dag_cbor.Float f) -> 562 + Alcotest.(check (float 0.0)) "float from JSON" 3.14 f 563 + | Ok _ -> Alcotest.fail "expected Float" 564 + | Error e -> 565 + Alcotest.fail 566 + (Printf.sprintf "parse failed: %s" (Dag_cbor.error_to_string e)) 567 + 568 + let test_float_json_atproto () = 569 + (* Test JSON conversion rejects floats in AT Protocol mode *) 570 + let json : Dag_cbor.json = `Float 3.14 in 571 + match Dag_cbor.of_json ~mode:Dag_cbor.Atproto json with 572 + | Ok _ -> Alcotest.fail "should reject float in AT Protocol mode" 573 + | Error `Float_not_allowed -> () 574 + | Error e -> 575 + Alcotest.fail 576 + (Printf.sprintf "expected Float_not_allowed, got: %s" 577 + (Dag_cbor.error_to_string e)) 578 + 579 + let test_float_equality () = 580 + (* Test Float equality *) 581 + let f1 = Dag_cbor.Float 3.14 in 582 + let f2 = Dag_cbor.Float 3.14 in 583 + let f3 = Dag_cbor.Float 2.71 in 584 + Alcotest.(check bool) "equal floats" true (Dag_cbor.equal f1 f2); 585 + Alcotest.(check bool) "different floats" false (Dag_cbor.equal f1 f3); 586 + Alcotest.(check bool) 587 + "float vs int" false 588 + (Dag_cbor.equal (Dag_cbor.Float 1.0) (Dag_cbor.Int 1L)) 589 + 590 + let float_tests = 591 + [ 592 + Alcotest.test_case "encode/decode" `Quick test_float_encode_decode; 593 + Alcotest.test_case "rejected in AT Protocol mode" `Quick 594 + test_float_rejected_atproto; 595 + Alcotest.test_case "integer float converted to Int" `Quick 596 + test_float_integer_converted; 597 + Alcotest.test_case "NaN rejected" `Quick test_float_nan_rejected; 598 + Alcotest.test_case "Infinity rejected" `Quick test_float_infinity_rejected; 599 + Alcotest.test_case "-Infinity rejected" `Quick 600 + test_float_neg_infinity_rejected; 601 + Alcotest.test_case "float in array" `Quick test_float_in_array; 602 + Alcotest.test_case "float in map" `Quick test_float_in_map; 603 + Alcotest.test_case "validity checks" `Quick test_float_validity; 604 + Alcotest.test_case "JSON IPLD mode" `Quick test_float_json_ipld; 605 + Alcotest.test_case "JSON AT Protocol mode" `Quick test_float_json_atproto; 606 + Alcotest.test_case "equality" `Quick test_float_equality; 607 + ] 608 + 609 + (* === CBOR Strictness tests === *) 610 + 611 + let test_strictness_indefinite_bytes () = 612 + (* Indefinite-length byte string: 0x5F followed by chunks and 0xFF break *) 613 + (* 0x5F = major type 2 (bytes) + additional info 31 (indefinite) *) 614 + let indefinite_bytes = "\x5F\x41\x61\xFF" in 615 + (* 0x5F: start indefinite bytes, 0x41 0x61: 1-byte chunk "a", 0xFF: break *) 616 + match Dag_cbor.decode indefinite_bytes with 617 + | Ok _ -> Alcotest.fail "should reject indefinite-length bytes" 618 + | Error `Indefinite_length -> () 619 + | Error e -> 620 + Alcotest.fail 621 + (Printf.sprintf "expected Indefinite_length, got: %s" 622 + (Dag_cbor.error_to_string e)) 623 + 624 + let test_strictness_indefinite_text () = 625 + (* Indefinite-length text string: 0x7F followed by chunks and 0xFF break *) 626 + (* 0x7F = major type 3 (text) + additional info 31 (indefinite) *) 627 + let indefinite_text = "\x7F\x61\x61\xFF" in 628 + (* 0x7F: start indefinite text, 0x61 0x61: 1-char chunk "a", 0xFF: break *) 629 + match Dag_cbor.decode indefinite_text with 630 + | Ok _ -> Alcotest.fail "should reject indefinite-length text" 631 + | Error `Indefinite_length -> () 632 + | Error e -> 633 + Alcotest.fail 634 + (Printf.sprintf "expected Indefinite_length, got: %s" 635 + (Dag_cbor.error_to_string e)) 636 + 637 + let test_strictness_indefinite_array () = 638 + (* Indefinite-length array: 0x9F followed by items and 0xFF break *) 639 + (* 0x9F = major type 4 (array) + additional info 31 (indefinite) *) 640 + let indefinite_array = "\x9F\x01\x02\xFF" in 641 + (* 0x9F: start indefinite array, 0x01 0x02: items 1, 2, 0xFF: break *) 642 + match Dag_cbor.decode indefinite_array with 643 + | Ok _ -> Alcotest.fail "should reject indefinite-length array" 644 + | Error `Indefinite_length -> () 645 + | Error e -> 646 + Alcotest.fail 647 + (Printf.sprintf "expected Indefinite_length, got: %s" 648 + (Dag_cbor.error_to_string e)) 649 + 650 + let test_strictness_indefinite_map () = 651 + (* Indefinite-length map: 0xBF followed by pairs and 0xFF break *) 652 + (* 0xBF = major type 5 (map) + additional info 31 (indefinite) *) 653 + let indefinite_map = "\xBF\x61\x61\x01\xFF" in 654 + (* 0xBF: start indefinite map, "a": 1, 0xFF: break *) 655 + match Dag_cbor.decode indefinite_map with 656 + | Ok _ -> Alcotest.fail "should reject indefinite-length map" 657 + | Error `Indefinite_length -> () 658 + | Error e -> 659 + Alcotest.fail 660 + (Printf.sprintf "expected Indefinite_length, got: %s" 661 + (Dag_cbor.error_to_string e)) 662 + 663 + let test_strictness_break_code () = 664 + (* Bare break stop code: 0xFF is not allowed *) 665 + let break_code = "\xFF" in 666 + match Dag_cbor.decode break_code with 667 + | Ok _ -> Alcotest.fail "should reject bare break code" 668 + | Error `Indefinite_length -> () 669 + | Error e -> 670 + (* Could also be a decode error from the CBOR library *) 671 + Alcotest.(check bool) 672 + "rejected break code" true 673 + (match e with 674 + | `Indefinite_length | `Decode_error _ -> true 675 + | _ -> false) 676 + 677 + let test_strictness_valid_definite () = 678 + (* Test that definite-length encoding works fine *) 679 + let value = Dag_cbor.Map [ ("a", Dag_cbor.Int 1L); ("b", Dag_cbor.Int 2L) ] in 680 + let encoded = Dag_cbor.encode value in 681 + match Dag_cbor.decode encoded with 682 + | Ok decoded -> 683 + Alcotest.(check bool) "valid definite" true (Dag_cbor.equal value decoded) 684 + | Error e -> 685 + Alcotest.fail 686 + (Printf.sprintf "valid definite encoding rejected: %s" 687 + (Dag_cbor.error_to_string e)) 688 + 689 + let test_strictness_disabled () = 690 + (* Test that ~strict:false bypasses the check *) 691 + (* Note: The underlying CBOR library may still reject invalid CBOR *) 692 + let value = Dag_cbor.Int 42L in 693 + let encoded = Dag_cbor.encode value in 694 + match Dag_cbor.decode ~strict:false encoded with 695 + | Ok (Dag_cbor.Int i) -> 696 + Alcotest.(check int64) "decoded with strict:false" 42L i 697 + | Ok _ -> Alcotest.fail "expected Int" 698 + | Error e -> 699 + Alcotest.fail 700 + (Printf.sprintf "decode with strict:false failed: %s" 701 + (Dag_cbor.error_to_string e)) 702 + 703 + let test_strictness_check_function () = 704 + (* Test check_strictness function directly *) 705 + (* Valid definite encoding *) 706 + let valid = "\xa2\x61\x61\x01\x61\x62\x02" in 707 + (* {"a": 1, "b": 2} *) 708 + Alcotest.(check bool) 709 + "valid passes check" true 710 + (Result.is_ok (Dag_cbor.check_strictness valid)); 711 + 712 + (* Indefinite array *) 713 + let invalid = "\x9F\x01\xFF" in 714 + Alcotest.(check bool) 715 + "indefinite fails check" true 716 + (Result.is_error (Dag_cbor.check_strictness invalid)) 717 + 718 + let strictness_tests = 719 + [ 720 + Alcotest.test_case "reject indefinite bytes" `Quick 721 + test_strictness_indefinite_bytes; 722 + Alcotest.test_case "reject indefinite text" `Quick 723 + test_strictness_indefinite_text; 724 + Alcotest.test_case "reject indefinite array" `Quick 725 + test_strictness_indefinite_array; 726 + Alcotest.test_case "reject indefinite map" `Quick 727 + test_strictness_indefinite_map; 728 + Alcotest.test_case "reject break code" `Quick test_strictness_break_code; 729 + Alcotest.test_case "valid definite encoding" `Quick 730 + test_strictness_valid_definite; 731 + Alcotest.test_case "strict:false bypasses check" `Quick 732 + test_strictness_disabled; 733 + Alcotest.test_case "check_strictness function" `Quick 734 + test_strictness_check_function; 735 + ] 736 + 334 737 (* === CAR tests === *) 335 738 336 739 let test_car_roundtrip () = ··· 752 1155 Alcotest.test_case "invalid data models" `Quick test_data_model_invalid; 753 1156 ] 754 1157 1158 + (* === DAG-JSON tests === *) 1159 + 1160 + let test_dag_json_link () = 1161 + (* Test Link encoding: {"/": "<cid-string>"} *) 1162 + let cid = Cid.of_dag_cbor "test content" in 1163 + let value = Dag_cbor.Link cid in 1164 + let json = Dag_json.encode value in 1165 + match json with 1166 + | `Assoc [ ("/", `String cid_str) ] -> 1167 + Alcotest.(check string) "CID string" (Cid.to_string cid) cid_str 1168 + | _ -> Alcotest.fail "expected {/: <cid>}" 1169 + 1170 + let test_dag_json_bytes () = 1171 + (* Test Bytes encoding: {"/": {"bytes": "<base64>"}} *) 1172 + let bytes = "\x00\x01\x02\x03\xff\xfe" in 1173 + let value = Dag_cbor.Bytes bytes in 1174 + let json = Dag_json.encode value in 1175 + match json with 1176 + | `Assoc [ ("/", `Assoc [ ("bytes", `String b64) ]) ] -> ( 1177 + (* Decode and verify *) 1178 + match Base64.decode ~pad:false b64 with 1179 + | Ok decoded -> Alcotest.(check string) "bytes roundtrip" bytes decoded 1180 + | Error _ -> Alcotest.fail "invalid base64") 1181 + | _ -> Alcotest.fail "expected {/: {bytes: <base64>}}" 1182 + 1183 + let test_dag_json_roundtrip () = 1184 + (* Test full roundtrip: IPLD value -> DAG-JSON -> IPLD value *) 1185 + let cid = Cid.of_dag_cbor "nested link" in 1186 + let value = 1187 + Dag_cbor.Map 1188 + [ 1189 + ("name", Dag_cbor.String "test"); 1190 + ("count", Dag_cbor.Int 42L); 1191 + ("data", Dag_cbor.Bytes "\x01\x02\x03"); 1192 + ("ref", Dag_cbor.Link cid); 1193 + ( "items", 1194 + Dag_cbor.Array [ Dag_cbor.Int 1L; Dag_cbor.Int 2L; Dag_cbor.Int 3L ] 1195 + ); 1196 + ] 1197 + in 1198 + let json = Dag_json.encode value in 1199 + match Dag_json.decode json with 1200 + | Ok decoded -> 1201 + Alcotest.(check bool) 1202 + "roundtrip equal" true 1203 + (Dag_cbor.equal value decoded) 1204 + | Error e -> 1205 + Alcotest.fail 1206 + (Printf.sprintf "decode failed: %s" (Dag_json.error_to_string e)) 1207 + 1208 + let test_dag_json_string_roundtrip () = 1209 + (* Test string serialization roundtrip *) 1210 + let value = 1211 + Dag_cbor.Map 1212 + [ 1213 + ("hello", Dag_cbor.String "world"); 1214 + ( "nested", 1215 + Dag_cbor.Map [ ("a", Dag_cbor.Int 1L); ("b", Dag_cbor.Int 2L) ] ); 1216 + ] 1217 + in 1218 + let json_str = Dag_json.encode_string value in 1219 + match Dag_json.decode_string json_str with 1220 + | Ok decoded -> 1221 + Alcotest.(check bool) 1222 + "string roundtrip" true 1223 + (Dag_cbor.equal value decoded) 1224 + | Error e -> 1225 + Alcotest.fail 1226 + (Printf.sprintf "decode failed: %s" (Dag_json.error_to_string e)) 1227 + 1228 + let test_dag_json_decode_link () = 1229 + (* Test decoding a link from JSON *) 1230 + let cid_str = "bafybeigdyrzt5sfp7udm7hu76uh7y26nf3efuylqabf3oclgtqy55fbzdi" in 1231 + let json : Dag_json.json = `Assoc [ ("/", `String cid_str) ] in 1232 + match Dag_json.decode json with 1233 + | Ok (Dag_cbor.Link cid) -> 1234 + Alcotest.(check string) "CID" cid_str (Cid.to_string cid) 1235 + | Ok _ -> Alcotest.fail "expected Link" 1236 + | Error e -> 1237 + Alcotest.fail 1238 + (Printf.sprintf "decode failed: %s" (Dag_json.error_to_string e)) 1239 + 1240 + let test_dag_json_decode_bytes () = 1241 + (* Test decoding bytes from JSON *) 1242 + let original = "\x00\x01\x02\x03\xff" in 1243 + let b64 = Base64.encode_exn ~pad:false original in 1244 + let json : Dag_json.json = 1245 + `Assoc [ ("/", `Assoc [ ("bytes", `String b64) ]) ] 1246 + in 1247 + match Dag_json.decode json with 1248 + | Ok (Dag_cbor.Bytes decoded) -> 1249 + Alcotest.(check string) "bytes" original decoded 1250 + | Ok _ -> Alcotest.fail "expected Bytes" 1251 + | Error e -> 1252 + Alcotest.fail 1253 + (Printf.sprintf "decode failed: %s" (Dag_json.error_to_string e)) 1254 + 1255 + let test_dag_json_invalid_link () = 1256 + (* Test that invalid CID is rejected *) 1257 + let json : Dag_json.json = `Assoc [ ("/", `String "not-a-valid-cid") ] in 1258 + match Dag_json.decode json with 1259 + | Ok _ -> Alcotest.fail "should reject invalid CID" 1260 + | Error (`Invalid_cid _) -> () 1261 + | Error e -> 1262 + Alcotest.fail 1263 + (Printf.sprintf "expected Invalid_cid, got: %s" 1264 + (Dag_json.error_to_string e)) 1265 + 1266 + let test_dag_json_invalid_bytes () = 1267 + (* Test that invalid base64 is rejected *) 1268 + let json : Dag_json.json = 1269 + `Assoc [ ("/", `Assoc [ ("bytes", `String "!!!invalid!!!") ]) ] 1270 + in 1271 + match Dag_json.decode json with 1272 + | Ok _ -> Alcotest.fail "should reject invalid base64" 1273 + | Error (`Invalid_bytes _) -> () 1274 + | Error e -> 1275 + Alcotest.fail 1276 + (Printf.sprintf "expected Invalid_bytes, got: %s" 1277 + (Dag_json.error_to_string e)) 1278 + 1279 + let test_dag_json_reserved_slash () = 1280 + (* Test that objects with "/" key are rejected (reserved for DAG-JSON) *) 1281 + let json : Dag_json.json = 1282 + `Assoc [ ("/", `Int 42); ("other", `String "value") ] 1283 + in 1284 + match Dag_json.decode json with 1285 + | Ok _ -> Alcotest.fail "should reject object with / key" 1286 + | Error (`Invalid_structure _) -> () 1287 + | Error e -> 1288 + Alcotest.fail 1289 + (Printf.sprintf "expected Invalid_structure, got: %s" 1290 + (Dag_json.error_to_string e)) 1291 + 1292 + let test_dag_json_to_cbor () = 1293 + (* Test DAG-JSON to DAG-CBOR conversion *) 1294 + let cid = Cid.of_dag_cbor "test" in 1295 + let json : Dag_json.json = 1296 + `Assoc 1297 + [ 1298 + ("link", `Assoc [ ("/", `String (Cid.to_string cid)) ]); 1299 + ("value", `Int 42); 1300 + ] 1301 + in 1302 + match Dag_json.to_dag_cbor json with 1303 + | Ok cbor_bytes -> ( 1304 + (* Decode the CBOR and verify *) 1305 + match Dag_cbor.decode cbor_bytes with 1306 + | Ok (Dag_cbor.Map pairs) -> 1307 + Alcotest.(check int) "pair count" 2 (List.length pairs) 1308 + | Ok _ -> Alcotest.fail "expected Map" 1309 + | Error e -> 1310 + Alcotest.fail 1311 + (Printf.sprintf "CBOR decode failed: %s" 1312 + (Dag_cbor.error_to_string e))) 1313 + | Error e -> 1314 + Alcotest.fail 1315 + (Printf.sprintf "to_dag_cbor failed: %s" (Dag_json.error_to_string e)) 1316 + 1317 + let test_dag_json_of_cbor () = 1318 + (* Test DAG-CBOR to DAG-JSON conversion *) 1319 + let value = 1320 + Dag_cbor.Map [ ("a", Dag_cbor.Int 1L); ("b", Dag_cbor.String "hello") ] 1321 + in 1322 + let cbor_bytes = Dag_cbor.encode value in 1323 + match Dag_json.of_dag_cbor cbor_bytes with 1324 + | Ok json -> ( 1325 + (* Verify structure *) 1326 + match json with 1327 + | `Assoc pairs -> Alcotest.(check int) "pair count" 2 (List.length pairs) 1328 + | _ -> Alcotest.fail "expected Assoc") 1329 + | Error _ -> Alcotest.fail "of_dag_cbor failed" 1330 + 1331 + let test_dag_json_float_ipld () = 1332 + (* Test float handling in IPLD mode *) 1333 + let json : Dag_json.json = `Float 3.14 in 1334 + match Dag_json.decode ~mode:Dag_cbor.Ipld json with 1335 + | Ok (Dag_cbor.Float f) -> Alcotest.(check (float 0.001)) "float" 3.14 f 1336 + | Ok _ -> Alcotest.fail "expected Float" 1337 + | Error e -> 1338 + Alcotest.fail 1339 + (Printf.sprintf "decode failed: %s" (Dag_json.error_to_string e)) 1340 + 1341 + let test_dag_json_float_atproto () = 1342 + (* Test that non-integer floats are rejected in AT Protocol mode *) 1343 + let json : Dag_json.json = `Float 3.14 in 1344 + match Dag_json.decode ~mode:Dag_cbor.Atproto json with 1345 + | Ok _ -> Alcotest.fail "should reject float in AT Protocol mode" 1346 + | Error (`Unsupported_value _) -> () 1347 + | Error e -> 1348 + Alcotest.fail 1349 + (Printf.sprintf "expected Unsupported_value, got: %s" 1350 + (Dag_json.error_to_string e)) 1351 + 1352 + let test_dag_json_pretty () = 1353 + (* Test pretty printing - verify it produces different output than compact *) 1354 + let value = 1355 + Dag_cbor.Map 1356 + [ 1357 + ("key", Dag_cbor.String "value"); 1358 + ( "nested", 1359 + Dag_cbor.Map [ ("a", Dag_cbor.Int 1L); ("b", Dag_cbor.Int 2L) ] ); 1360 + ] 1361 + in 1362 + let compact = Dag_json.encode_string value in 1363 + let pretty = Dag_json.encode_string_pretty value in 1364 + (* Pretty should have more whitespace/formatting *) 1365 + Alcotest.(check bool) 1366 + "pretty has more chars" true 1367 + (String.length pretty > String.length compact) 1368 + 1369 + let dag_json_tests = 1370 + [ 1371 + Alcotest.test_case "encode link" `Quick test_dag_json_link; 1372 + Alcotest.test_case "encode bytes" `Quick test_dag_json_bytes; 1373 + Alcotest.test_case "roundtrip" `Quick test_dag_json_roundtrip; 1374 + Alcotest.test_case "string roundtrip" `Quick test_dag_json_string_roundtrip; 1375 + Alcotest.test_case "decode link" `Quick test_dag_json_decode_link; 1376 + Alcotest.test_case "decode bytes" `Quick test_dag_json_decode_bytes; 1377 + Alcotest.test_case "invalid link" `Quick test_dag_json_invalid_link; 1378 + Alcotest.test_case "invalid bytes" `Quick test_dag_json_invalid_bytes; 1379 + Alcotest.test_case "reserved slash key" `Quick test_dag_json_reserved_slash; 1380 + Alcotest.test_case "to DAG-CBOR" `Quick test_dag_json_to_cbor; 1381 + Alcotest.test_case "of DAG-CBOR" `Quick test_dag_json_of_cbor; 1382 + Alcotest.test_case "float IPLD mode" `Quick test_dag_json_float_ipld; 1383 + Alcotest.test_case "float AT Protocol mode" `Quick 1384 + test_dag_json_float_atproto; 1385 + Alcotest.test_case "pretty print" `Quick test_dag_json_pretty; 1386 + ] 1387 + 755 1388 (* === Test suites === *) 756 1389 757 1390 let () = ··· 759 1392 [ 760 1393 ("cid_parsing", cid_parsing_tests); 761 1394 ("cid_creation", cid_creation_tests); 1395 + ("cidv0", cidv0_tests); 762 1396 ("dag_cbor", dag_cbor_tests); 1397 + ("float", float_tests); 1398 + ("strictness", strictness_tests); 1399 + ("dag_json", dag_json_tests); 763 1400 ("car", car_tests); 764 1401 ("blob", blob_tests); 765 1402 ("data_model", data_model_tests);
+5 -2
test/syntax/test_syntax.ml
··· 294 294 vectors 295 295 296 296 let test_cid_invalid () = 297 + (* These are CIDs that should be rejected by AT Protocol syntax validation. 298 + Note: Some of these (like CIDv0) are valid IPLD CIDs but not accepted in AT Protocol. *) 297 299 let vectors = 298 300 load_test_vectors ~preserve_whitespace:true 299 301 (fixture_dir ^ "/cid_syntax_invalid.txt") 300 302 in 301 303 List.iter 302 304 (fun cid_str -> 303 - let result = Atproto_ipld.Cid.of_string cid_str in 305 + (* Use is_valid_syntax for AT Protocol syntax validation *) 306 + let result = Atproto_ipld.Cid.is_valid_syntax cid_str in 304 307 Alcotest.(check bool) 305 308 (Printf.sprintf "CID invalid: %s" cid_str) 306 - true (Result.is_error result)) 309 + true (not result)) 307 310 vectors 308 311 309 312 (* =========================== Language Tag Tests =========================== *)