atproto libraries implementation in ocaml

lexicon: switch JSON from yojson to simdjsont

Changed files
+193 -127
.beads
lib
test
lexicon
+1 -1
.beads/issues.jsonl
··· 28 28 {"id":"atproto-52","title":"Implement Lexicon validation","description":"Implement Lexicon-based validation for AT Protocol data. This validates records and API payloads against schemas.","design":"## Module Structure\n\n```ocaml\n(* atproto-lexicon/lib/validator.ml *)\ntype validation_error = {\n path: string list;\n message: string;\n}\n\nval validate_record :\n registry:Registry.t -\u003e\n nsid:Nsid.t -\u003e\n value:Dag_cbor.value -\u003e\n (unit, validation_error list) result\n\nval validate_xrpc_params :\n registry:Registry.t -\u003e\n nsid:Nsid.t -\u003e\n params:(string * string) list -\u003e\n (unit, validation_error list) result\n\nval validate_xrpc_input :\n registry:Registry.t -\u003e\n nsid:Nsid.t -\u003e\n input:Jsont.json -\u003e\n (unit, validation_error list) result\n\nval validate_xrpc_output :\n registry:Registry.t -\u003e\n nsid:Nsid.t -\u003e\n output:Jsont.json -\u003e\n (unit, validation_error list) result\n```\n\n## Constraint Types\n\n- **String**: minLength, maxLength, minGraphemes, maxGraphemes, format, enum, const\n- **Integer**: minimum, maximum, enum, const\n- **Bytes**: minLength, maxLength\n- **Array**: minLength, maxLength, items type\n- **Blob**: maxSize, accept (MIME types)\n- **Union**: open/closed, refs\n\n## Format Validators (Parser-based, NO REGEX)\n\nEach format has a dedicated parser module:\n\n```ocaml\n(* atproto-lexicon/lib/formats.ml *)\n\nlet validate_did s = Did.of_string s |\u003e Result.is_ok\nlet validate_handle s = Handle.of_string s |\u003e Result.is_ok\nlet validate_nsid s = Nsid.of_string s |\u003e Result.is_ok\nlet validate_tid s = Tid.of_string s |\u003e Result.is_ok\nlet validate_cid s = Cid.of_string s |\u003e Result.is_ok\nlet validate_at_uri s = At_uri.of_string s |\u003e Result.is_ok\nlet validate_at_identifier s = \n Did.of_string s |\u003e Result.is_ok || Handle.of_string s |\u003e Result.is_ok\nlet validate_record_key s = Record_key.of_string s |\u003e Result.is_ok\n\nlet validate_datetime s =\n (* Hand-written RFC-3339 parser *)\n parse_datetime s |\u003e Result.is_ok\n\nlet validate_language s =\n (* BCP-47 language tag parser *)\n parse_language_tag s |\u003e Result.is_ok\n\nlet validate_uri s =\n (* RFC-3986 URI parser *)\n Uri.of_string s |\u003e Option.is_some\n```\n\n## Dependencies\n- atproto-lexicon (schema)\n- atproto-syntax (format validators)\n- jsont","acceptance_criteria":"- Validate records against schemas\n- Validate XRPC params, input, output\n- Proper error messages with paths\n- All constraint types supported\n- All record-data interop tests pass","status":"closed","priority":1,"issue_type":"feature","assignee":"claude","created_at":"2025-12-28T00:11:39.125440686+01:00","updated_at":"2025-12-28T10:25:46.671434007+01:00","closed_at":"2025-12-28T10:25:46.671434007+01:00","labels":["application","lexicon"],"dependencies":[{"issue_id":"atproto-52","depends_on_id":"atproto-50","type":"parent-child","created_at":"2025-12-28T00:12:05.375287273+01:00","created_by":"daemon"},{"issue_id":"atproto-52","depends_on_id":"atproto-51","type":"blocks","created_at":"2025-12-28T00:12:09.479940241+01:00","created_by":"daemon"}]} 29 29 {"id":"atproto-53","title":"Implement Lexicon code generation","description":"Implement code generation from Lexicon schemas to OCaml types and API bindings.","design":"## Module Structure\n\n```ocaml\n(* atproto-lexicon-gen/lib/codegen.ml *)\ntype config = {\n output_dir: string;\n module_prefix: string;\n}\n\nval generate_types : config:config -\u003e lexicon:Lexicon.t -\u003e unit\nval generate_client : config:config -\u003e lexicons:Lexicon.t list -\u003e unit\n```\n\n## Generated Code Example\n\nInput Lexicon:\n```json\n{\n \"id\": \"app.bsky.feed.post\",\n \"defs\": {\n \"main\": {\n \"type\": \"record\",\n \"record\": {\n \"type\": \"object\",\n \"properties\": {\n \"text\": { \"type\": \"string\", \"maxGraphemes\": 300 },\n \"createdAt\": { \"type\": \"string\", \"format\": \"datetime\" }\n }\n }\n }\n }\n}\n```\n\nGenerated OCaml:\n```ocaml\nmodule App_bsky_feed_post = struct\n type t = {\n text: string;\n created_at: Ptime.t;\n }\n \n let jsont : t Jsont.t =\n Jsont.obj \"app.bsky.feed.post\" @@ fun o -\u003e\n let text = Jsont.obj_mem o \"text\" Jsont.string in\n let created_at = Jsont.obj_mem o \"createdAt\" Datetime.jsont in\n Jsont.obj_finish o { text; created_at }\n \n val to_dag_cbor : t -\u003e Dag_cbor.value\n val of_dag_cbor : Dag_cbor.value -\u003e (t, error) result\nend\n```\n\n## CLI Tool\n\n```bash\natproto-lexicon-gen --input lexicons/ --output lib/generated/\n```\n\n## Dependencies\n- atproto-lexicon\n- jsont","acceptance_criteria":"- Generate OCaml types from Lexicon schemas\n- Generate encoders/decoders\n- Type-safe API bindings\n- CLI tool for code generation","status":"closed","priority":2,"issue_type":"feature","assignee":"claude","created_at":"2025-12-28T00:11:47.861552784+01:00","updated_at":"2025-12-28T11:28:03.226633204+01:00","closed_at":"2025-12-28T11:28:03.226633204+01:00","labels":["application","codegen"],"dependencies":[{"issue_id":"atproto-53","depends_on_id":"atproto-50","type":"parent-child","created_at":"2025-12-28T00:12:06.539440409+01:00","created_by":"daemon"},{"issue_id":"atproto-53","depends_on_id":"atproto-51","type":"blocks","created_at":"2025-12-28T00:12:11.189125052+01:00","created_by":"daemon"}]} 30 30 {"id":"atproto-54","title":"Implement high-level API client","description":"Implement high-level API client for AT Protocol / Bluesky. This provides a user-friendly interface for common operations.","design":"## Module Structure\n\n```ocaml\n(* atproto-api/lib/agent.ml *)\ntype t\n\nval create : pds:Uri.t -\u003e t\n\n(* Authentication *)\nval login : t -\u003e identifier:string -\u003e password:string -\u003e (t, error) result\nval login_oauth : t -\u003e tokens:Oauth.tokens -\u003e t\nval refresh_session : t -\u003e (t, error) result\n\n(* Profile *)\nval get_profile : t -\u003e actor:string -\u003e (profile, error) result\nval update_profile : t -\u003e display_name:string option -\u003e ... -\u003e (unit, error) result\n\n(* Posts *)\nval create_post : t -\u003e text:string -\u003e ?reply:reply_ref -\u003e ... -\u003e (post_ref, error) result\nval delete_post : t -\u003e uri:At_uri.t -\u003e (unit, error) result\n\n(* Social *)\nval like : t -\u003e uri:At_uri.t -\u003e cid:Cid.t -\u003e (like_ref, error) result\nval follow : t -\u003e did:Did.t -\u003e (follow_ref, error) result\nval unfollow : t -\u003e uri:At_uri.t -\u003e (unit, error) result\n\n(* Feed *)\nval get_timeline : t -\u003e ?cursor:string -\u003e ?limit:int -\u003e (timeline, error) result\nval get_author_feed : t -\u003e actor:string -\u003e ... -\u003e (feed, error) result\n\n(* atproto-api/lib/richtext.ml *)\ntype t\n\nval create : string -\u003e t\nval detect_facets : t -\u003e t (* auto-detect mentions, links *)\nval add_mention : t -\u003e start:int -\u003e end_:int -\u003e did:Did.t -\u003e t\nval add_link : t -\u003e start:int -\u003e end_:int -\u003e uri:Uri.t -\u003e t\nval to_post_record : t -\u003e Dag_cbor.value\n```\n\n## Jsont Codecs for API Types\n\n```ocaml\nlet profile_jsont : profile Jsont.t =\n Jsont.obj \"profile\" @@ fun o -\u003e\n let did = Jsont.obj_mem o \"did\" did_jsont in\n let handle = Jsont.obj_mem o \"handle\" handle_jsont in\n let display_name = Jsont.obj_mem o \"displayName\" ~opt:true Jsont.string in\n let description = Jsont.obj_mem o \"description\" ~opt:true Jsont.string in\n let avatar = Jsont.obj_mem o \"avatar\" ~opt:true Jsont.string in\n let followers_count = Jsont.obj_mem o \"followersCount\" ~opt:true Jsont.int in\n let follows_count = Jsont.obj_mem o \"followsCount\" ~opt:true Jsont.int in\n let posts_count = Jsont.obj_mem o \"postsCount\" ~opt:true Jsont.int in\n Jsont.obj_finish o { did; handle; display_name; description; avatar; \n followers_count; follows_count; posts_count }\n\nlet facet_jsont : facet Jsont.t =\n Jsont.obj \"facet\" @@ fun o -\u003e\n let index = Jsont.obj_mem o \"index\" byte_slice_jsont in\n let features = Jsont.obj_mem o \"features\" (Jsont.list facet_feature_jsont) in\n Jsont.obj_finish o { index; features }\n```\n\n## RichText Facets\n\n```json\n{\n \"text\": \"Hello @alice.bsky.social!\",\n \"facets\": [\n {\n \"index\": { \"byteStart\": 6, \"byteEnd\": 25 },\n \"features\": [\n { \"$type\": \"app.bsky.richtext.facet#mention\", \"did\": \"did:plc:...\" }\n ]\n }\n ]\n}\n```\n\n## Dependencies\n- atproto-xrpc\n- atproto-identity\n- atproto-repo\n- jsont","acceptance_criteria":"- Session management (login, logout, refresh)\n- Common operations (post, like, follow, etc.)\n- RichText handling (mentions, links, facets)\n- Timeline and feed fetching\n- Profile operations","status":"closed","priority":2,"issue_type":"feature","assignee":"claude","created_at":"2025-12-28T00:12:00.736309435+01:00","updated_at":"2025-12-28T11:47:47.071271001+01:00","closed_at":"2025-12-28T11:47:47.071271001+01:00","labels":["api","application"],"dependencies":[{"issue_id":"atproto-54","depends_on_id":"atproto-50","type":"parent-child","created_at":"2025-12-28T00:12:07.636789403+01:00","created_by":"daemon"},{"issue_id":"atproto-54","depends_on_id":"atproto-41","type":"blocks","created_at":"2025-12-28T00:12:12.376875324+01:00","created_by":"daemon"},{"issue_id":"atproto-54","depends_on_id":"atproto-33","type":"blocks","created_at":"2025-12-28T00:12:13.060557136+01:00","created_by":"daemon"},{"issue_id":"atproto-54","depends_on_id":"atproto-25","type":"blocks","created_at":"2025-12-28T00:12:13.934360048+01:00","created_by":"daemon"}]} 31 - {"id":"atproto-5l1","title":"Refactor JSON to simdjsont (replace yojson/jsont)","description":"","notes":"Next steps: migrate lib/crypto/jwt.ml off Yojson.Safe to Atproto_json/Simdjsont.Json (header_to_json/claims_to_json + *_of_json + create/decode_unverified); add atproto-json to lib/crypto/dune; run dune runtest test/crypto + full runtest; then continue with other Yojson hotspots (xrpc client/server, identity did_resolver, sync firehose, ipld blob).","status":"in_progress","priority":1,"issue_type":"feature","created_at":"2026-01-01T18:06:10.17746938+01:00","updated_at":"2026-01-01T19:32:47.437650903+01:00","dependencies":[{"issue_id":"atproto-5l1","depends_on_id":"atproto-dqs","type":"blocks","created_at":"2026-01-01T18:06:39.648046004+01:00","created_by":"daemon"}]} 31 + {"id":"atproto-5l1","title":"Refactor JSON to simdjsont (replace yojson/jsont)","description":"","notes":"Migrated atproto-lexicon off Yojson onto simdjsont.Json.t + Simdjsont.decode Codec.value. Updated lib/lexicon/{parser,validator,atproto_lexicon,codegen} and lib/lexicon/dune. Updated test/lexicon/test_lexicon.ml fixtures loader + patterns. Verified: dune runtest test/lexicon OK; dune build @install OK.","status":"in_progress","priority":1,"issue_type":"feature","created_at":"2026-01-01T18:06:10.17746938+01:00","updated_at":"2026-01-01T20:40:51.684953681+01:00","dependencies":[{"issue_id":"atproto-5l1","depends_on_id":"atproto-dqs","type":"blocks","created_at":"2026-01-01T18:06:39.648046004+01:00","created_by":"daemon"}]} 32 32 {"id":"atproto-60","title":"Implement effects-based I/O abstraction","description":"Implement the effects-based I/O abstraction layer that makes all libraries runtime-agnostic.","design":"## Module Structure\n\n```ocaml\n(* atproto-effects/lib/effects.ml *)\n\n(* HTTP effects *)\ntype http_request = {\n method_: [ `GET | `POST | `PUT | `DELETE ];\n uri: Uri.t;\n headers: (string * string) list;\n body: string option;\n}\n\ntype http_response = {\n status: int;\n headers: (string * string) list;\n body: string;\n}\n\ntype _ Effect.t +=\n | Http_request : http_request -\u003e http_response Effect.t\n\n(* DNS effects *)\ntype _ Effect.t +=\n | Dns_txt : string -\u003e string list Effect.t\n | Dns_a : string -\u003e string list Effect.t\n\n(* Time effects *)\ntype _ Effect.t +=\n | Now : Ptime.t Effect.t\n | Sleep : float -\u003e unit Effect.t\n\n(* Random effects *)\ntype _ Effect.t +=\n | Random_bytes : int -\u003e bytes Effect.t\n\n(* atproto-effects-eio/lib/handler.ml *)\nval run : (unit -\u003e 'a) -\u003e 'a\n```\n\n## Handler Example (eio)\n\n```ocaml\nlet run f =\n Effect.Deep.match_ f ()\n {\n retc = Fun.id;\n exnc = raise;\n effc = fun (type a) (e : a Effect.t) -\u003e\n match e with\n | Http_request req -\u003e\n Some (fun (k : (a, _) continuation) -\u003e\n let resp = Eio_client.request req in\n continue k resp)\n | Dns_txt domain -\u003e\n Some (fun k -\u003e\n let records = Eio_dns.txt domain in\n continue k records)\n | _ -\u003e None\n }\n```\n\n## Dependencies\n- eio (for testing handler)","acceptance_criteria":"- Effect types for HTTP, DNS, time, random\n- eio-based handler for testing\n- Handler composition utilities\n- Performance benchmarks","status":"closed","priority":1,"issue_type":"feature","assignee":"claude","created_at":"2025-12-28T00:12:29.021401617+01:00","updated_at":"2025-12-28T11:57:08.264086142+01:00","closed_at":"2025-12-28T11:57:08.264086142+01:00","labels":["effects","infrastructure"],"dependencies":[{"issue_id":"atproto-60","depends_on_id":"atproto-1","type":"parent-child","created_at":"2025-12-28T00:12:55.467983208+01:00","created_by":"daemon"}]} 33 33 {"id":"atproto-61","title":"Set up interoperability test suite","description":"Set up and run the AT Protocol interoperability tests from bluesky-social/atproto-interop-tests.","design":"## Test Structure\n\n```\ntest/\n├── interop/\n│ ├── syntax_test.ml # Handle, DID, NSID, TID, etc.\n│ ├── crypto_test.ml # Signatures, did:key\n│ ├── data_model_test.ml # DAG-CBOR, CID\n│ ├── mst_test.ml # Key heights, tree structure\n│ ├── lexicon_test.ml # Schema and record validation\n│ └── firehose_test.ml # Commit proofs\n├── fixtures/ # Cloned from atproto-interop-tests\n└── dune\n```\n\n## Test Approach\n\n1. Clone test vectors from GitHub\n2. Parse JSON fixtures using jsont\n3. Parse text fixtures line by line\n4. Run each test case\n5. Compare output to expected values\n\n## Example Test\n\n```ocaml\nlet load_json_fixtures path =\n let json = Jsont.of_file path in\n Jsont.decode (Jsont.list fixture_jsont) json\n\nlet%test \"handle_syntax_valid\" =\n let fixtures = load_lines \"fixtures/syntax/handle_syntax_valid.txt\" in\n List.for_all (fun line -\u003e\n match Handle.of_string line with\n | Ok _ -\u003e true\n | Error _ -\u003e false\n ) fixtures\n\nlet%test \"handle_syntax_invalid\" =\n let fixtures = load_lines \"fixtures/syntax/handle_syntax_invalid.txt\" in\n List.for_all (fun line -\u003e\n match Handle.of_string line with\n | Ok _ -\u003e false\n | Error _ -\u003e true\n ) fixtures\n\nlet%test \"crypto_signature_fixtures\" =\n let fixtures = load_json_fixtures \"fixtures/crypto/signature-fixtures.json\" in\n List.for_all (fun fixture -\u003e\n let message = Base64.decode fixture.message_base64 in\n let signature = Base64.decode fixture.signature_base64 in\n let key = Did_key.of_string fixture.public_key_did in\n let result = Crypto.verify key message signature in\n result = fixture.valid_signature\n ) fixtures\n```\n\n## Dependencies\n- alcotest or ounit2\n- jsont","acceptance_criteria":"- All syntax interop tests pass\n- All crypto interop tests pass\n- All data-model interop tests pass\n- All MST interop tests pass\n- All lexicon interop tests pass\n- All firehose interop tests pass","status":"closed","priority":1,"issue_type":"task","created_at":"2025-12-28T00:12:40.553908313+01:00","updated_at":"2025-12-28T13:25:34.614867702+01:00","closed_at":"2025-12-28T13:25:34.614867702+01:00","labels":["conformance","testing"],"dependencies":[{"issue_id":"atproto-61","depends_on_id":"atproto-1","type":"parent-child","created_at":"2025-12-28T00:12:56.180809368+01:00","created_by":"daemon"}]} 34 34 {"id":"atproto-62","title":"Set up monorepo package structure","description":"Set up the monorepo structure for multiple opam packages within a single repository.","design":"## Repository Structure\n\n```\natproto/\n├── dune-project # Root with all packages\n├── packages/\n│ ├── atproto-syntax/\n│ │ ├── lib/\n│ │ │ ├── dune\n│ │ │ └── *.ml\n│ │ ├── test/\n│ │ │ ├── dune\n│ │ │ └── *_test.ml\n│ │ └── atproto-syntax.opam\n│ ├── atproto-crypto/\n│ ├── atproto-multibase/\n│ ├── atproto-ipld/\n│ ├── atproto-mst/\n│ ├── atproto-repo/\n│ ├── atproto-identity/\n│ ├── atproto-xrpc/\n│ ├── atproto-sync/\n│ ├── atproto-lexicon/\n│ ├── atproto-lexicon-gen/\n│ ├── atproto-api/\n│ └── atproto-effects/\n├── examples/\n│ ├── simple_client/\n│ └── firehose_consumer/\n└── interop-tests/\n```\n\n## dune-project\n\n```lisp\n(lang dune 3.20)\n(name atproto)\n(generate_opam_files true)\n\n(package\n (name atproto-syntax)\n (synopsis \"AT Protocol identifier syntax parsing\")\n (depends\n (ocaml (\u003e= 5.4))\n re\n ptime))\n\n(package\n (name atproto-crypto)\n ...)\n```\n\n## CI (.github/workflows/ci.yml)\n\n- OCaml 5.4 matrix\n- Build all packages\n- Run all tests\n- Run interop tests","acceptance_criteria":"- Multi-package dune-project structure\n- Separate opam files per package\n- CI pipeline for building and testing\n- Documentation generation setup","status":"closed","priority":1,"issue_type":"task","created_at":"2025-12-28T00:12:50.547102438+01:00","updated_at":"2025-12-28T11:57:18.856810633+01:00","closed_at":"2025-12-28T11:57:18.856810633+01:00","labels":["infrastructure","setup"],"dependencies":[{"issue_id":"atproto-62","depends_on_id":"atproto-1","type":"parent-child","created_at":"2025-12-28T00:12:57.015938611+01:00","created_by":"daemon"}]}
+9 -5
lib/lexicon/atproto_lexicon.ml
··· 15 15 16 16 {[ 17 17 (* Parse a Lexicon schema *) 18 - let schema_json = Yojson.Safe.from_file "app.bsky.feed.post.json" in 18 + let schema_json = 19 + match Parser.of_file "app.bsky.feed.post.json" with 20 + | Ok lex -> lex 21 + | Error msg -> failwith (Parser.error_to_string msg) 22 + in 19 23 let schema = Parser.parse_schema schema_json in 20 24 21 25 (* Validate a record against a schema *) 22 26 let record = 23 - `Assoc 27 + Simdjsont.Json.Object 24 28 [ 25 - ("$type", `String "app.bsky.feed.post"); 26 - ("text", `String "Hello!"); 27 - ("createdAt", `String "2024-01-01T00:00:00Z"); 29 + ("$type", Simdjsont.Json.String "app.bsky.feed.post"); 30 + ("text", Simdjsont.Json.String "Hello!"); 31 + ("createdAt", Simdjsont.Json.String "2024-01-01T00:00:00Z"); 28 32 ] 29 33 in 30 34 match Validator.validate_record schema record with
+9 -9
lib/lexicon/codegen.ml
··· 5 5 6 6 Generated code includes: 7 7 - OCaml record types matching Lexicon schemas 8 - - JSON encoders/decoders using Yojson 8 + - JSON encoders/decoders using simdjsont 9 9 - DAG-CBOR encoders/decoders 10 10 - Type-safe XRPC client methods *) 11 11 ··· 103 103 | Schema.Array arr -> 104 104 Printf.sprintf "%s list" (field_type_to_ocaml arr.items) 105 105 | Schema.Object _ -> 106 - "Yojson.Basic.t" (* Inline objects become raw JSON for now *) 106 + "Simdjsont.Json.t" (* Inline objects become raw JSON for now *) 107 107 | Schema.Ref r -> ref_to_ocaml r.ref_ 108 108 | Schema.Union u -> union_to_ocaml u 109 109 in ··· 117 117 | Schema.String _ -> "string" 118 118 | Schema.Bytes _ -> "string" 119 119 | Schema.Cid_link _ -> "Cid.t" 120 - | Schema.Unknown _ -> "Yojson.Basic.t" 120 + | Schema.Unknown _ -> "Simdjsont.Json.t" 121 121 122 122 and format_to_ocaml (fmt : Schema.string_format) : string = 123 123 match fmt with ··· 140 140 let name = String.sub ref_str 1 (String.length ref_str - 1) in 141 141 Printf.sprintf "%s.t" (String.capitalize_ascii (camel_to_snake name)) 142 142 else 143 - (* External ref like "app.bsky.actor.defs#basicView" -> Yojson.Basic.t for now *) 144 - "Yojson.Basic.t" 143 + (* External ref like "app.bsky.actor.defs#basicView" -> Simdjsont.Json.t for now *) 144 + "Simdjsont.Json.t" 145 145 146 146 and union_to_ocaml (_u : Schema.union_type) : string = 147 147 (* Unions become raw JSON for now - full union support would require variant types *) 148 - "Yojson.Basic.t" 148 + "Simdjsont.Json.t" 149 149 150 150 (** {1 Code Generation} *) 151 151 ··· 296 296 297 297 (* Generate output type if present *) 298 298 emit_blank e; 299 - emit e "type output = Yojson.Basic.t"; 299 + emit e "type output = Simdjsont.Json.t"; 300 300 301 301 dedent e; 302 302 emit e "end"; ··· 320 320 emit e (Printf.sprintf "let nsid = \"%s\"" lexicon.id); 321 321 322 322 emit_blank e; 323 - emit e "type input = Yojson.Basic.t"; 323 + emit e "type input = Simdjsont.Json.t"; 324 324 emit_blank e; 325 - emit e "type output = Yojson.Basic.t"; 325 + emit e "type output = Simdjsont.Json.t"; 326 326 327 327 dedent e; 328 328 emit e "end";
+1 -1
lib/lexicon/dune
··· 1 1 (library 2 2 (name atproto_lexicon) 3 3 (public_name atproto-lexicon) 4 - (libraries atproto_syntax yojson) 4 + (libraries atproto_syntax simdjsont) 5 5 (preprocess no_preprocessing))
+55 -33
lib/lexicon/parser.ml
··· 17 17 18 18 let error_to_string e = Format.asprintf "%a" pp_error e 19 19 20 + type json = Simdjsont.Json.t 21 + 22 + let int_of_int64 (i : int64) : int option = 23 + if 24 + Int64.compare i (Int64.of_int max_int) > 0 25 + || Int64.compare i (Int64.of_int min_int) < 0 26 + then None 27 + else Some (Int64.to_int i) 28 + 20 29 (** Helper to get string from JSON *) 21 30 let get_string key json = 22 31 match json with 23 - | `Assoc pairs -> ( 32 + | Simdjsont.Json.Object pairs -> ( 24 33 match List.assoc_opt key pairs with 25 - | Some (`String s) -> Some s 34 + | Some (Simdjsont.Json.String s) -> Some s 26 35 | _ -> None) 27 36 | _ -> None 28 37 ··· 32 41 (** Helper to get int from JSON *) 33 42 let get_int key json = 34 43 match json with 35 - | `Assoc pairs -> ( 36 - match List.assoc_opt key pairs with Some (`Int i) -> Some i | _ -> None) 44 + | Simdjsont.Json.Object pairs -> ( 45 + match List.assoc_opt key pairs with 46 + | Some (Simdjsont.Json.Int i) -> int_of_int64 i 47 + | _ -> None) 37 48 | _ -> None 38 49 39 50 (** Helper to get bool from JSON *) 40 51 let get_bool key json = 41 52 match json with 42 - | `Assoc pairs -> ( 43 - match List.assoc_opt key pairs with Some (`Bool b) -> Some b | _ -> None) 53 + | Simdjsont.Json.Object pairs -> ( 54 + match List.assoc_opt key pairs with 55 + | Some (Simdjsont.Json.Bool b) -> Some b 56 + | _ -> None) 44 57 | _ -> None 45 58 46 59 (** Helper to get list from JSON *) 47 60 let get_list key json = 48 61 match json with 49 - | `Assoc pairs -> ( 50 - match List.assoc_opt key pairs with Some (`List l) -> Some l | _ -> None) 62 + | Simdjsont.Json.Object pairs -> ( 63 + match List.assoc_opt key pairs with 64 + | Some (Simdjsont.Json.Array l) -> Some l 65 + | _ -> None) 51 66 | _ -> None 52 67 53 68 (** Helper to get assoc from JSON *) 54 69 let get_assoc key json = 55 70 match json with 56 - | `Assoc pairs -> ( 71 + | Simdjsont.Json.Object pairs -> ( 57 72 match List.assoc_opt key pairs with 58 - | Some (`Assoc a) -> Some a 73 + | Some (Simdjsont.Json.Object a) -> Some a 59 74 | _ -> None) 60 75 | _ -> None 61 76 ··· 63 78 let get_string_list key json = 64 79 match get_list key json with 65 80 | Some l -> 66 - Some (List.filter_map (function `String s -> Some s | _ -> None) l) 81 + Some 82 + (List.filter_map 83 + (function Simdjsont.Json.String s -> Some s | _ -> None) 84 + l) 67 85 | None -> None 68 86 69 87 (** Helper to get int list *) 70 88 let get_int_list key json = 71 89 match get_list key json with 72 - | Some l -> Some (List.filter_map (function `Int i -> Some i | _ -> None) l) 90 + | Some l -> 91 + Some 92 + (List.filter_map 93 + (function Simdjsont.Json.Int i -> int_of_int64 i | _ -> None) 94 + l) 73 95 | None -> None 74 96 75 97 (** Parse a field type from JSON *) ··· 149 171 }) 150 172 | "array" -> ( 151 173 match json with 152 - | `Assoc pairs -> ( 174 + | Simdjsont.Json.Object pairs -> ( 153 175 match List.assoc_opt "items" pairs with 154 176 | Some items_json -> ( 155 177 match parse_field_type items_json with ··· 263 285 | Some encoding -> 264 286 let schema = 265 287 match json with 266 - | `Assoc pairs -> ( 288 + | Simdjsont.Json.Object pairs -> ( 267 289 match List.assoc_opt "schema" pairs with 268 290 | Some schema_json -> ( 269 291 match parse_field_type schema_json with ··· 292 314 (** Parse message from JSON *) 293 315 let parse_message json : (Schema.message, error) result = 294 316 match json with 295 - | `Assoc pairs -> ( 317 + | Simdjsont.Json.Object pairs -> ( 296 318 match List.assoc_opt "schema" pairs with 297 319 | Some schema_json -> ( 298 320 match parse_field_type schema_json with ··· 333 355 | None -> Schema.Any 334 356 in 335 357 match json with 336 - | `Assoc pairs -> ( 358 + | Simdjsont.Json.Object pairs -> ( 337 359 match List.assoc_opt "record" pairs with 338 360 | Some record_json -> ( 339 361 match parse_object_type record_json with ··· 352 374 | "query" -> 353 375 let parameters = 354 376 match json with 355 - | `Assoc pairs -> ( 377 + | Simdjsont.Json.Object pairs -> ( 356 378 match List.assoc_opt "parameters" pairs with 357 379 | Some p -> ( 358 380 match parse_params p with ··· 363 385 in 364 386 let output = 365 387 match json with 366 - | `Assoc pairs -> ( 388 + | Simdjsont.Json.Object pairs -> ( 367 389 match List.assoc_opt "output" pairs with 368 390 | Some o -> ( 369 391 match parse_body o with ··· 383 405 | "procedure" -> 384 406 let parameters = 385 407 match json with 386 - | `Assoc pairs -> ( 408 + | Simdjsont.Json.Object pairs -> ( 387 409 match List.assoc_opt "parameters" pairs with 388 410 | Some p -> ( 389 411 match parse_params p with ··· 394 416 in 395 417 let input = 396 418 match json with 397 - | `Assoc pairs -> ( 419 + | Simdjsont.Json.Object pairs -> ( 398 420 match List.assoc_opt "input" pairs with 399 421 | Some i -> ( 400 422 match parse_body i with ··· 405 427 in 406 428 let output = 407 429 match json with 408 - | `Assoc pairs -> ( 430 + | Simdjsont.Json.Object pairs -> ( 409 431 match List.assoc_opt "output" pairs with 410 432 | Some o -> ( 411 433 match parse_body o with ··· 426 448 | "subscription" -> 427 449 let parameters = 428 450 match json with 429 - | `Assoc pairs -> ( 451 + | Simdjsont.Json.Object pairs -> ( 430 452 match List.assoc_opt "parameters" pairs with 431 453 | Some p -> ( 432 454 match parse_params p with ··· 437 459 in 438 460 let message = 439 461 match json with 440 - | `Assoc pairs -> ( 462 + | Simdjsont.Json.Object pairs -> ( 441 463 match List.assoc_opt "message" pairs with 442 464 | Some m -> ( 443 465 match parse_message m with ··· 565 587 (** Parse a complete lexicon from JSON *) 566 588 let parse_lexicon json : (Schema.lexicon, error) result = 567 589 match json with 568 - | `Assoc _ -> ( 590 + | Simdjsont.Json.Object _ -> ( 569 591 match get_int "lexicon" json with 570 592 | None -> Error (`Missing_field "lexicon") 571 593 | Some version -> ( ··· 593 615 594 616 (** Parse a lexicon from a JSON string *) 595 617 let of_string s : (Schema.lexicon, error) result = 596 - try 597 - let json = Yojson.Basic.from_string s in 598 - parse_lexicon json 599 - with Yojson.Json_error msg -> Error (`Parse_error msg) 618 + match Simdjsont.decode Simdjsont.Codec.value s with 619 + | Ok json -> parse_lexicon json 620 + | Error msg -> Error (`Parse_error msg) 600 621 601 622 (** Parse a lexicon from a file *) 602 623 let of_file path : (Schema.lexicon, error) result = 603 624 try 604 - let json = Yojson.Basic.from_file path in 605 - parse_lexicon json 606 - with 607 - | Yojson.Json_error msg -> Error (`Parse_error msg) 608 - | Sys_error msg -> Error (`Parse_error msg) 625 + let ic = open_in path in 626 + let len = in_channel_length ic in 627 + let s = really_input_string ic len in 628 + close_in ic; 629 + of_string s 630 + with Sys_error msg -> Error (`Parse_error msg)
+84 -51
lib/lexicon/validator.ml
··· 202 202 203 203 (* === JSON value helpers === *) 204 204 205 - type json = Yojson.Basic.t 205 + type json = Simdjsont.Json.t 206 206 207 207 let get_string key = function 208 - | `Assoc pairs -> ( 208 + | Simdjsont.Json.Object pairs -> ( 209 209 match List.assoc_opt key pairs with 210 - | Some (`String s) -> Some s 210 + | Some (Simdjsont.Json.String s) -> Some s 211 211 | _ -> None) 212 212 | _ -> None 213 213 214 214 let get_int key = function 215 - | `Assoc pairs -> ( 216 - match List.assoc_opt key pairs with Some (`Int i) -> Some i | _ -> None) 215 + | Simdjsont.Json.Object pairs -> ( 216 + match List.assoc_opt key pairs with 217 + | Some (Simdjsont.Json.Int i) -> 218 + if 219 + Int64.compare i (Int64.of_int max_int) > 0 220 + || Int64.compare i (Int64.of_int min_int) < 0 221 + then None 222 + else Some (Int64.to_int i) 223 + | _ -> None) 217 224 | _ -> None 218 225 219 226 let get_bool key = function 220 - | `Assoc pairs -> ( 221 - match List.assoc_opt key pairs with Some (`Bool b) -> Some b | _ -> None) 227 + | Simdjsont.Json.Object pairs -> ( 228 + match List.assoc_opt key pairs with 229 + | Some (Simdjsont.Json.Bool b) -> Some b 230 + | _ -> None) 222 231 | _ -> None 223 232 224 - let is_null = function `Null -> true | _ -> false 233 + let is_null = function Simdjsont.Json.Null -> true | _ -> false 225 234 226 235 (* === Field validators === *) 227 236 228 237 (** Validate a boolean value *) 229 238 let validate_boolean ~path json = 230 - match json with `Bool _ -> [] | _ -> [ error ~path "expected boolean" ] 239 + match json with 240 + | Simdjsont.Json.Bool _ -> [] 241 + | _ -> [ error ~path "expected boolean" ] 231 242 232 243 (** Validate an integer value with constraints *) 233 244 let validate_integer ~path ?minimum ?maximum ?enum ?const json = 234 245 match json with 235 - | `Int i -> 236 - let errs = ref [] in 237 - (match const with 238 - | Some c when i <> c -> 239 - errs := error ~path (Printf.sprintf "must be %d" c) :: !errs 240 - | _ -> ()); 241 - (match enum with 242 - | Some values when not (List.mem i values) -> 243 - errs := error ~path "value not in enum" :: !errs 244 - | _ -> ()); 245 - (match minimum with 246 - | Some min when i < min -> 247 - errs := error ~path (Printf.sprintf "must be >= %d" min) :: !errs 248 - | _ -> ()); 249 - (match maximum with 250 - | Some max when i > max -> 251 - errs := error ~path (Printf.sprintf "must be <= %d" max) :: !errs 252 - | _ -> ()); 253 - !errs 246 + | Simdjsont.Json.Int i64 -> ( 247 + let i_opt = 248 + if 249 + Int64.compare i64 (Int64.of_int max_int) > 0 250 + || Int64.compare i64 (Int64.of_int min_int) < 0 251 + then None 252 + else Some (Int64.to_int i64) 253 + in 254 + match i_opt with 255 + | None -> [ error ~path "integer out of int range" ] 256 + | Some i -> 257 + let errs = ref [] in 258 + (match const with 259 + | Some c when i <> c -> 260 + errs := error ~path (Printf.sprintf "must be %d" c) :: !errs 261 + | _ -> ()); 262 + (match enum with 263 + | Some values when not (List.mem i values) -> 264 + errs := error ~path "value not in enum" :: !errs 265 + | _ -> ()); 266 + (match minimum with 267 + | Some min when i < min -> 268 + errs := error ~path (Printf.sprintf "must be >= %d" min) :: !errs 269 + | _ -> ()); 270 + (match maximum with 271 + | Some max when i > max -> 272 + errs := error ~path (Printf.sprintf "must be <= %d" max) :: !errs 273 + | _ -> ()); 274 + !errs) 254 275 | _ -> [ error ~path "expected integer" ] 255 276 256 277 (** Validate a string value with constraints *) 257 278 let validate_string ~path ?format ?min_length ?max_length ?min_graphemes 258 279 ?max_graphemes ?enum ?const ?known_values:_ json = 259 280 match json with 260 - | `String s -> 281 + | Simdjsont.Json.String s -> 261 282 let errs = ref [] in 262 283 (match const with 263 284 | Some c when s <> c -> ··· 301 322 (** Validate a bytes value (expects $bytes object) *) 302 323 let validate_bytes ~path ?min_length ?max_length json = 303 324 match json with 304 - | `Assoc pairs -> ( 325 + | Simdjsont.Json.Object pairs -> ( 305 326 match List.assoc_opt "$bytes" pairs with 306 - | Some (`String b64) -> 327 + | Some (Simdjsont.Json.String b64) -> 307 328 (* Decode base64 to get actual length *) 308 329 let len = String.length b64 * 3 / 4 in 309 330 (* approximate *) ··· 327 348 (** Validate a CID link (expects $link object) *) 328 349 let validate_cid_link ~path json = 329 350 match json with 330 - | `Assoc pairs -> ( 351 + | Simdjsont.Json.Object pairs -> ( 331 352 match List.assoc_opt "$link" pairs with 332 - | Some (`String _cid) -> [] 353 + | Some (Simdjsont.Json.String _cid) -> [] 333 354 | _ -> [ error ~path "expected $link object" ]) 334 355 | _ -> [ error ~path "expected $link object" ] 335 356 336 357 (** Validate a blob value *) 337 358 let validate_blob ~path ?max_size ?accept json = 338 359 match json with 339 - | `Assoc pairs -> ( 360 + | Simdjsont.Json.Object pairs -> ( 340 361 match List.assoc_opt "$type" pairs with 341 - | Some (`String "blob") -> 362 + | Some (Simdjsont.Json.String "blob") -> 342 363 let errs = ref [] in 343 364 (* Check mimeType *) 344 365 (match List.assoc_opt "mimeType" pairs with 345 - | Some (`String mime) -> ( 366 + | Some (Simdjsont.Json.String mime) -> ( 346 367 match accept with 347 368 | Some patterns -> 348 369 let matches = ··· 365 386 | None -> errs := error ~path "missing mimeType" :: !errs); 366 387 (* Check size *) 367 388 (match List.assoc_opt "size" pairs with 368 - | Some (`Int size) -> ( 369 - match max_size with 370 - | Some max when size > max -> 371 - errs := 372 - error ~path (Printf.sprintf "blob size must be <= %d" max) 373 - :: !errs 374 - | _ -> ()) 389 + | Some (Simdjsont.Json.Int size64) -> ( 390 + let size = 391 + if 392 + Int64.compare size64 (Int64.of_int max_int) > 0 393 + || Int64.compare size64 (Int64.of_int min_int) < 0 394 + then None 395 + else Some (Int64.to_int size64) 396 + in 397 + match size with 398 + | None -> errs := error ~path "size out of int range" :: !errs 399 + | Some size -> ( 400 + match max_size with 401 + | Some max when size > max -> 402 + errs := 403 + error ~path 404 + (Printf.sprintf "blob size must be <= %d" max) 405 + :: !errs 406 + | _ -> ())) 375 407 | Some _ -> errs := error ~path "size must be integer" :: !errs 376 408 | None -> errs := error ~path "missing size" :: !errs); 377 409 (* Check ref *) ··· 393 425 This is part of the data model restrictions. *) 394 426 let validate_unknown ~path json = 395 427 match json with 396 - | `Bool _ -> [ error ~path "unknown type cannot contain boolean" ] 397 - | `Assoc pairs -> ( 428 + | Simdjsont.Json.Bool _ -> 429 + [ error ~path "unknown type cannot contain boolean" ] 430 + | Simdjsont.Json.Object pairs -> ( 398 431 (* Check for $bytes - not allowed in unknown *) 399 432 match List.assoc_opt "$bytes" pairs with 400 433 | Some _ -> [ error ~path "unknown type cannot contain bytes ($bytes)" ] 401 434 | None -> ( 402 435 (* Check for blob ($type: "blob") - not allowed in unknown *) 403 436 match List.assoc_opt "$type" pairs with 404 - | Some (`String "blob") -> 437 + | Some (Simdjsont.Json.String "blob") -> 405 438 [ error ~path "unknown type cannot contain blob" ] 406 439 | _ -> [])) 407 440 | _ -> [] ··· 455 488 456 489 and validate_array ~resolver ~path (arr : Schema.array_type) json = 457 490 match json with 458 - | `List items -> 491 + | Simdjsont.Json.Array items -> 459 492 let errs = ref [] in 460 493 (* Check length constraints *) 461 494 let len = List.length items in ··· 484 517 485 518 and validate_object ~resolver ~path (obj : Schema.object_type) json = 486 519 match json with 487 - | `Assoc pairs -> 520 + | Simdjsont.Json.Object pairs -> 488 521 let errs = ref [] in 489 522 (* Check required fields *) 490 523 List.iter ··· 520 553 | None -> ( 521 554 (* Fallback: require an object for unresolved refs *) 522 555 match json with 523 - | `Assoc _ -> [] 556 + | Simdjsont.Json.Object _ -> [] 524 557 | _ -> [ error ~path "expected object for ref" ]) 525 558 526 559 and validate_union ~resolver ~path (union : Schema.union_type) json = 527 560 match json with 528 - | `Assoc pairs -> ( 561 + | Simdjsont.Json.Object pairs -> ( 529 562 match List.assoc_opt "$type" pairs with 530 - | Some (`String type_ref) -> 563 + | Some (Simdjsont.Json.String type_ref) -> 531 564 let errs = ref [] in 532 565 (* Check if type is in allowed refs for closed unions *) 533 566 (if union.closed then
+34 -27
test/lexicon/test_lexicon.ml
··· 11 11 let ic = open_in path in 12 12 let content = really_input_string ic (in_channel_length ic) in 13 13 close_in ic; 14 - Yojson.Basic.from_string content 14 + match Simdjsont.decode Simdjsont.Codec.value content with 15 + | Ok json -> json 16 + | Error msg -> failwith msg 15 17 16 18 (** Read catalog lexicon file *) 17 19 let read_catalog_file filename = ··· 23 25 let test_valid_lexicons () = 24 26 let fixtures = read_fixture_json "lexicon-valid.json" in 25 27 match fixtures with 26 - | `List items -> 28 + | Simdjsont.Json.Array items -> 27 29 List.iter 28 30 (fun item -> 29 31 match item with 30 - | `Assoc pairs -> ( 32 + | Simdjsont.Json.Object pairs -> ( 31 33 let name = 32 34 match List.assoc_opt "name" pairs with 33 - | Some (`String s) -> s 35 + | Some (Simdjsont.Json.String s) -> s 34 36 | _ -> "unknown" 35 37 in 36 38 match List.assoc_opt "lexicon" pairs with 37 39 | Some lexicon_json -> ( 38 - let lexicon_str = Yojson.Basic.to_string lexicon_json in 40 + let lexicon_str = Simdjsont.Json.to_string lexicon_json in 39 41 match Parser.of_string lexicon_str with 40 42 | Ok lexicon -> 41 43 Alcotest.(check bool) ··· 54 56 let test_invalid_lexicons () = 55 57 let fixtures = read_fixture_json "lexicon-invalid.json" in 56 58 match fixtures with 57 - | `List items -> 59 + | Simdjsont.Json.Array items -> 58 60 List.iter 59 61 (fun item -> 60 62 match item with 61 - | `Assoc pairs -> ( 63 + | Simdjsont.Json.Object pairs -> ( 62 64 let name = 63 65 match List.assoc_opt "name" pairs with 64 - | Some (`String s) -> s 66 + | Some (Simdjsont.Json.String s) -> s 65 67 | _ -> "unknown" 66 68 in 67 69 match List.assoc_opt "lexicon" pairs with 68 70 | Some lexicon_json -> ( 69 - let lexicon_str = Yojson.Basic.to_string lexicon_json in 71 + let lexicon_str = Simdjsont.Json.to_string lexicon_json in 70 72 match Parser.of_string lexicon_str with 71 73 | Ok _ -> 72 74 (* Some "invalid" lexicons may be parseable but semantically invalid *) ··· 313 315 let resolver = make_resolver lexicon in 314 316 let fixtures = read_fixture_json "record-data-valid.json" in 315 317 match fixtures with 316 - | `List items -> 318 + | Simdjsont.Json.Array items -> 317 319 List.iter 318 320 (fun item -> 319 321 match item with 320 - | `Assoc pairs -> ( 322 + | Simdjsont.Json.Object pairs -> ( 321 323 let name = 322 324 match List.assoc_opt "name" pairs with 323 - | Some (`String s) -> s 325 + | Some (Simdjsont.Json.String s) -> s 324 326 | _ -> "unknown" 325 327 in 326 328 match List.assoc_opt "data" pairs with ··· 352 354 let resolver = make_resolver lexicon in 353 355 let fixtures = read_fixture_json "record-data-invalid.json" in 354 356 match fixtures with 355 - | `List items -> 357 + | Simdjsont.Json.Array items -> 356 358 List.iter 357 359 (fun item -> 358 360 match item with 359 - | `Assoc pairs -> ( 361 + | Simdjsont.Json.Object pairs -> ( 360 362 let name = 361 363 match List.assoc_opt "name" pairs with 362 - | Some (`String s) -> s 364 + | Some (Simdjsont.Json.String s) -> s 363 365 | _ -> "unknown" 364 366 in 365 367 match List.assoc_opt "data" pairs with ··· 386 388 | None -> Alcotest.fail "could not load record schema" 387 389 | Some schema -> 388 390 (* Missing required 'integer' field *) 389 - let data = `Assoc [ ("$type", `String "example.lexicon.record") ] in 391 + let data = 392 + Simdjsont.Json.Object 393 + [ ("$type", Simdjsont.Json.String "example.lexicon.record") ] 394 + in 390 395 let errors = Validator.validate_record ~path:[] schema data in 391 396 Alcotest.(check bool) "has errors" true (errors <> []); 392 397 let has_required_error = ··· 403 408 | Some schema -> 404 409 (* Wrong type for integer field *) 405 410 let data = 406 - `Assoc 411 + Simdjsont.Json.Object 407 412 [ 408 - ("$type", `String "example.lexicon.record"); 409 - ("integer", `String "not-an-integer"); 413 + ("$type", Simdjsont.Json.String "example.lexicon.record"); 414 + ("integer", Simdjsont.Json.String "not-an-integer"); 410 415 ] 411 416 in 412 417 let errors = Validator.validate_record ~path:[] schema data in ··· 425 430 | Some schema -> 426 431 (* Invalid DID format in nested formats object *) 427 432 let data = 428 - `Assoc 433 + Simdjsont.Json.Object 429 434 [ 430 - ("$type", `String "example.lexicon.record"); 431 - ("integer", `Int 1); 432 - ("formats", `Assoc [ ("did", `String "invalid-did") ]); 435 + ("$type", Simdjsont.Json.String "example.lexicon.record"); 436 + ("integer", Simdjsont.Json.Int 1L); 437 + ( "formats", 438 + Simdjsont.Json.Object 439 + [ ("did", Simdjsont.Json.String "invalid-did") ] ); 433 440 ] 434 441 in 435 442 let _errors = Validator.validate_record ~path:[] schema data in ··· 443 450 | Some schema -> 444 451 (* Integer out of range *) 445 452 let data = 446 - `Assoc 453 + Simdjsont.Json.Object 447 454 [ 448 - ("$type", `String "example.lexicon.record"); 449 - ("integer", `Int 1); 450 - ("rangeInteger", `Int 9000); 455 + ("$type", Simdjsont.Json.String "example.lexicon.record"); 456 + ("integer", Simdjsont.Json.Int 1L); 457 + ("rangeInteger", Simdjsont.Json.Int 9000L); 451 458 ] 452 459 in 453 460 let errors = Validator.validate_record ~path:[] schema data in