An ATProto Lexicon validator for Gleam.
at main 19 kB view raw
1// Main public API for the ATProtocol lexicon validator 2 3@target(erlang) 4import argv 5import gleam/dict.{type Dict} 6import gleam/dynamic 7import gleam/dynamic/decode 8@target(erlang) 9import gleam/int 10@target(erlang) 11import gleam/io 12import gleam/json.{type Json} 13import gleam/list 14import gleam/option.{None, Some} 15import gleam/result 16import gleam/string 17import honk/errors 18import honk/internal/json_helpers 19import honk/types 20import honk/validation/context 21import honk/validation/formats 22@target(erlang) 23import simplifile 24 25// Import validators 26import honk/validation/field as validation_field 27import honk/validation/field/reference as validation_field_reference 28import honk/validation/field/union as validation_field_union 29import honk/validation/meta/token as validation_meta_token 30import honk/validation/meta/unknown as validation_meta_unknown 31import honk/validation/primary/params as validation_primary_params 32import honk/validation/primary/procedure as validation_primary_procedure 33import honk/validation/primary/query as validation_primary_query 34import honk/validation/primary/record as validation_primary_record 35import honk/validation/primary/subscription as validation_primary_subscription 36import honk/validation/primitive/blob as validation_primitive_blob 37import honk/validation/primitive/boolean as validation_primitive_boolean 38import honk/validation/primitive/bytes as validation_primitive_bytes 39import honk/validation/primitive/cid_link as validation_primitive_cid_link 40import honk/validation/primitive/integer as validation_primitive_integer 41import honk/validation/primitive/null as validation_primitive_null 42import honk/validation/primitive/string as validation_primitive_string 43 44// Re-export error type for public API error handling 45pub type ValidationError = 46 errors.ValidationError 47 48/// Validates lexicon documents 49/// 50/// Validates lexicon structure (id, defs) and ALL definitions within each lexicon. 51/// Each definition in the defs object is validated according to its type. 52/// 53/// Returns Ok(Nil) if all lexicons and their definitions are valid. 54/// Returns Error with a map of lexicon ID to list of error messages. 55/// Error messages include the definition name (e.g., "lex.id#defName: error"). 56pub fn validate(lexicons: List(Json)) -> Result(Nil, Dict(String, List(String))) { 57 // Build validation context 58 let builder_result = 59 context.builder() 60 |> context.with_lexicons(lexicons) 61 62 case builder_result { 63 Ok(builder) -> 64 case context.build(builder) { 65 Ok(ctx) -> { 66 // Validate ALL definitions in each lexicon 67 let error_map = 68 dict.fold(ctx.lexicons, dict.new(), fn(errors, lex_id, lexicon) { 69 // Get all definition names from the defs object 70 let def_keys = json_helpers.get_keys(lexicon.defs) 71 let lex_ctx = context.with_current_lexicon(ctx, lex_id) 72 73 // Validate each definition 74 list.fold(def_keys, errors, fn(errors_acc, def_name) { 75 case json_helpers.get_field(lexicon.defs, def_name) { 76 Some(def) -> { 77 case validate_definition(def, lex_ctx) { 78 Ok(_) -> errors_acc 79 Error(e) -> { 80 // Include def name in error for better context 81 // Extract just the message without wrapper text 82 let message = case e { 83 errors.InvalidSchema(msg) -> msg 84 errors.DataValidation(msg) -> msg 85 errors.LexiconNotFound(msg) -> 86 "Lexicon not found: " <> msg 87 } 88 // Clean up leading ": " if present 89 let clean_message = case 90 string.starts_with(message, ": ") 91 { 92 True -> string.drop_start(message, 2) 93 False -> message 94 } 95 let error_msg = 96 lex_id <> "#" <> def_name <> ": " <> clean_message 97 case dict.get(errors_acc, lex_id) { 98 Ok(existing_errors) -> 99 dict.insert(errors_acc, lex_id, [ 100 error_msg, 101 ..existing_errors 102 ]) 103 Error(_) -> 104 dict.insert(errors_acc, lex_id, [error_msg]) 105 } 106 } 107 } 108 } 109 None -> errors_acc 110 } 111 }) 112 }) 113 114 case dict.is_empty(error_map) { 115 True -> Ok(Nil) 116 False -> Error(error_map) 117 } 118 } 119 Error(e) -> Error(dict.from_list([#("builder", [errors.to_string(e)])])) 120 } 121 Error(e) -> Error(dict.from_list([#("builder", [errors.to_string(e)])])) 122 } 123} 124 125/// Validates a single definition based on its type 126fn validate_definition( 127 def: Json, 128 ctx: context.ValidationContext, 129) -> Result(Nil, errors.ValidationError) { 130 case json_helpers.get_string(def, "type") { 131 Some("record") -> validation_primary_record.validate_schema(def, ctx) 132 Some("query") -> validation_primary_query.validate_schema(def, ctx) 133 Some("procedure") -> validation_primary_procedure.validate_schema(def, ctx) 134 Some("subscription") -> 135 validation_primary_subscription.validate_schema(def, ctx) 136 Some("params") -> validation_primary_params.validate_schema(def, ctx) 137 Some("object") -> validation_field.validate_object_schema(def, ctx) 138 Some("array") -> validation_field.validate_array_schema(def, ctx) 139 Some("union") -> validation_field_union.validate_schema(def, ctx) 140 Some("string") -> validation_primitive_string.validate_schema(def, ctx) 141 Some("integer") -> validation_primitive_integer.validate_schema(def, ctx) 142 Some("boolean") -> validation_primitive_boolean.validate_schema(def, ctx) 143 Some("bytes") -> validation_primitive_bytes.validate_schema(def, ctx) 144 Some("blob") -> validation_primitive_blob.validate_schema(def, ctx) 145 Some("cid-link") -> validation_primitive_cid_link.validate_schema(def, ctx) 146 Some("null") -> validation_primitive_null.validate_schema(def, ctx) 147 Some("ref") -> validation_field_reference.validate_schema(def, ctx) 148 Some("token") -> validation_meta_token.validate_schema(def, ctx) 149 Some("unknown") -> validation_meta_unknown.validate_schema(def, ctx) 150 Some(unknown_type) -> 151 Error(errors.invalid_schema("Unknown type: " <> unknown_type)) 152 None -> Error(errors.invalid_schema("Definition missing type field")) 153 } 154} 155 156/// Validation context type (re-exported for external use) 157pub type ValidationContext = 158 context.ValidationContext 159 160/// Build a reusable validation context from lexicons 161/// Call this once, then use validate_record_with_context for each record 162pub fn build_validation_context( 163 lexicons: List(Json), 164) -> Result(ValidationContext, ValidationError) { 165 let builder_result = 166 context.builder() 167 |> context.with_lexicons(lexicons) 168 169 use builder <- result.try(builder_result) 170 context.build(builder) 171} 172 173/// Validates a single data record against a collection schema using pre-built context 174/// This is much faster when validating many records - build context once with 175/// build_validation_context, then call this for each record 176pub fn validate_record_with_context( 177 ctx: ValidationContext, 178 collection: String, 179 record: Json, 180) -> Result(Nil, ValidationError) { 181 // Get the lexicon for this collection 182 case context.get_lexicon(ctx, collection) { 183 Some(lexicon) -> { 184 // Get the main definition (should be a record type) 185 case json_helpers.get_field(lexicon.defs, "main") { 186 Some(main_def) -> { 187 let lex_ctx = context.with_current_lexicon(ctx, collection) 188 // Set the path to include the definition name 189 let def_ctx = context.with_path(lex_ctx, "defs.main") 190 // Validate the record data against the main definition 191 validation_primary_record.validate_data(record, main_def, def_ctx) 192 } 193 None -> 194 Error(errors.invalid_schema( 195 "Lexicon '" <> collection <> "' has no main definition", 196 )) 197 } 198 } 199 None -> Error(errors.lexicon_not_found(collection)) 200 } 201} 202 203/// Validates a single data record against a collection schema 204pub fn validate_record( 205 lexicons: List(Json), 206 collection: String, 207 record: Json, 208) -> Result(Nil, ValidationError) { 209 // Build validation context 210 use ctx <- result.try(build_validation_context(lexicons)) 211 validate_record_with_context(ctx, collection, record) 212} 213 214/// Validates NSID format 215pub fn is_valid_nsid(nsid: String) -> Bool { 216 formats.is_valid_nsid(nsid) 217} 218 219/// Validates a string value against a specific format 220pub fn validate_string_format( 221 value: String, 222 format: types.StringFormat, 223) -> Result(Nil, String) { 224 case formats.validate_format(value, format) { 225 True -> Ok(Nil) 226 False -> { 227 let format_name = types.format_to_string(format) 228 Error("Value does not match format: " <> format_name) 229 } 230 } 231} 232 233/// Convert a Dynamic value to Json 234/// 235/// This is useful when parsing JSON strings with `json.parse(str, decode.dynamic)` 236/// and then needing to convert to Json for validation. 237/// 238/// ## Example 239/// ```gleam 240/// use dyn <- result.try(json.parse(json_str, decode.dynamic)) 241/// use json_val <- result.try(honk.dynamic_to_json(dyn)) 242/// honk.validate([json_val]) 243/// ``` 244pub fn dynamic_to_json(dyn: dynamic.Dynamic) -> Result(Json, ValidationError) { 245 json_helpers.dynamic_to_json(dyn) 246} 247 248/// Parse a JSON string and convert to Json for validation 249/// 250/// This is a convenience function that combines `json.parse()` and `dynamic_to_json()`. 251/// It's useful when you have JSON stored as strings (e.g., in a database) and want 252/// to validate it with honk. 253/// 254/// ## Example 255/// ```gleam 256/// use json_val <- result.try(honk.parse_json_string(stored_json)) 257/// honk.validate([json_val]) 258/// ``` 259pub fn parse_json_string(json_str: String) -> Result(Json, ValidationError) { 260 use dyn <- result.try( 261 json.parse(json_str, decode.dynamic) 262 |> result.map_error(fn(_) { 263 errors.invalid_schema("Failed to parse JSON string") 264 }), 265 ) 266 dynamic_to_json(dyn) 267} 268 269/// Parse multiple JSON strings and convert to Json for validation 270/// 271/// This is a convenience function for batch parsing JSON strings. 272/// 273/// ## Example 274/// ```gleam 275/// use json_vals <- result.try(honk.parse_json_strings(stored_jsons)) 276/// honk.validate(json_vals) 277/// ``` 278pub fn parse_json_strings( 279 json_strs: List(String), 280) -> Result(List(Json), ValidationError) { 281 json_strs 282 |> list.try_map(parse_json_string) 283 |> result.map_error(fn(_) { 284 errors.invalid_schema("Failed to parse JSON strings") 285 }) 286} 287 288@target(erlang) 289/// CLI entry point for the honk lexicon validator 290/// 291/// Usage: 292/// gleam run -m honk check <path> 293/// gleam run -m honk help 294pub fn main() -> Nil { 295 case argv.load().arguments { 296 ["check", path] -> validate_path(path) 297 ["help"] | [] -> show_help() 298 _ -> { 299 io.println_error("Unknown command. Use 'help' for usage information.") 300 Nil 301 } 302 } 303} 304 305@target(erlang) 306/// Validate a path (auto-detects file or directory) 307fn validate_path(path: String) -> Nil { 308 case simplifile.is_file(path) { 309 Ok(True) -> validate_file(path) 310 Ok(False) -> 311 case simplifile.is_directory(path) { 312 Ok(True) -> validate_directory(path) 313 Ok(False) -> { 314 io.println_error( 315 "Error: Path is neither a file nor a directory: " <> path, 316 ) 317 Nil 318 } 319 Error(_) -> { 320 io.println_error("Error: Cannot access path: " <> path) 321 Nil 322 } 323 } 324 Error(_) -> { 325 io.println_error("Error: Cannot access path: " <> path) 326 Nil 327 } 328 } 329} 330 331@target(erlang) 332/// Validate a single lexicon file 333fn validate_file(file_path: String) -> Nil { 334 case read_and_validate_file(file_path) { 335 Ok(_) -> { 336 io.println("" <> file_path <> " - valid") 337 Nil 338 } 339 Error(msg) -> { 340 io.println_error("" <> file_path) 341 io.println_error(" " <> msg) 342 Nil 343 } 344 } 345} 346 347@target(erlang) 348/// Validate all .json files in a directory 349fn validate_directory(dir_path: String) -> Nil { 350 case simplifile.get_files(dir_path) { 351 Error(_) -> { 352 io.println_error("Error: Cannot read directory: " <> dir_path) 353 Nil 354 } 355 Ok(all_files) -> { 356 // Filter for .json files 357 let json_files = 358 all_files 359 |> list.filter(fn(path) { string.ends_with(path, ".json") }) 360 361 case json_files { 362 [] -> { 363 io.println("No .json files found in " <> dir_path) 364 Nil 365 } 366 files -> { 367 // Read and parse all files 368 let file_results = 369 files 370 |> list.map(fn(file) { 371 case read_json_file(file) { 372 Ok(json_value) -> #(file, Ok(json_value)) 373 Error(msg) -> #(file, Error(msg)) 374 } 375 }) 376 377 // Separate successful parses from failures 378 let #(parse_errors, parsed_files) = 379 list.partition(file_results, fn(result) { 380 case result { 381 #(_, Error(_)) -> True 382 #(_, Ok(_)) -> False 383 } 384 }) 385 386 // Display parse errors 387 parse_errors 388 |> list.each(fn(result) { 389 case result { 390 #(file, Error(msg)) -> { 391 io.println_error("" <> file) 392 io.println_error(" " <> msg) 393 } 394 _ -> Nil 395 } 396 }) 397 398 // Get all successfully parsed lexicons 399 let lexicons = 400 parsed_files 401 |> list.filter_map(fn(result) { 402 case result { 403 #(_, Ok(json)) -> Ok(json) 404 _ -> Error(Nil) 405 } 406 }) 407 408 // Validate all lexicons together (allows cross-lexicon references) 409 case validate(lexicons) { 410 Ok(_) -> { 411 // All lexicons are valid 412 parsed_files 413 |> list.each(fn(result) { 414 case result { 415 #(file, Ok(_)) -> io.println("" <> file) 416 _ -> Nil 417 } 418 }) 419 } 420 Error(error_map) -> { 421 // Some lexicons have errors - map errors back to files 422 parsed_files 423 |> list.each(fn(result) { 424 case result { 425 #(file, Ok(json)) -> { 426 // Get the lexicon ID for this file 427 case json_helpers.get_string(json, "id") { 428 Some(lex_id) -> { 429 case dict.get(error_map, lex_id) { 430 Ok(errors) -> { 431 io.println_error("" <> file) 432 errors 433 |> list.each(fn(err) { 434 io.println_error(" " <> err) 435 }) 436 } 437 Error(_) -> io.println("" <> file) 438 } 439 } 440 None -> { 441 io.println_error("" <> file) 442 io.println_error(" Missing lexicon id") 443 } 444 } 445 } 446 _ -> Nil 447 } 448 }) 449 } 450 } 451 452 // Summary 453 let total = list.length(files) 454 let parse_error_count = list.length(parse_errors) 455 let validation_error_count = case validate(lexicons) { 456 Ok(_) -> 0 457 Error(error_map) -> dict.size(error_map) 458 } 459 let total_errors = parse_error_count + validation_error_count 460 461 case total_errors { 462 0 -> 463 io.println( 464 "\nAll " 465 <> int.to_string(total) 466 <> " schemas validated successfully.", 467 ) 468 _ -> 469 io.println_error( 470 "\n" 471 <> int.to_string(total_errors) 472 <> " of " 473 <> int.to_string(total) 474 <> " schemas failed validation.", 475 ) 476 } 477 478 Nil 479 } 480 } 481 } 482 } 483} 484 485@target(erlang) 486/// Read and parse a JSON file (without validation) 487fn read_json_file(file_path: String) -> Result(Json, String) { 488 use content <- result.try( 489 simplifile.read(file_path) 490 |> result.map_error(fn(_) { "Cannot read file" }), 491 ) 492 493 use json_dynamic <- result.try( 494 json.parse(content, decode.dynamic) 495 |> result.map_error(fn(_) { "Invalid JSON" }), 496 ) 497 498 json_helpers.dynamic_to_json(json_dynamic) 499 |> result.map_error(fn(_) { "Failed to convert JSON" }) 500} 501 502@target(erlang) 503/// Read a file and validate it as a lexicon 504fn read_and_validate_file(file_path: String) -> Result(Nil, String) { 505 use content <- result.try( 506 simplifile.read(file_path) 507 |> result.map_error(fn(_) { "Cannot read file" }), 508 ) 509 510 use json_dynamic <- result.try( 511 json.parse(content, decode.dynamic) 512 |> result.map_error(fn(_) { "Invalid JSON" }), 513 ) 514 515 use json_value <- result.try( 516 json_helpers.dynamic_to_json(json_dynamic) 517 |> result.map_error(fn(_) { "Failed to convert JSON" }), 518 ) 519 520 use _ <- result.try( 521 validate([json_value]) 522 |> result.map_error(fn(error_map) { format_validation_errors(error_map) }), 523 ) 524 525 Ok(Nil) 526} 527 528@target(erlang) 529/// Format validation errors from the error map 530fn format_validation_errors(error_map: Dict(String, List(String))) -> String { 531 error_map 532 |> dict.to_list 533 |> list.map(fn(entry) { 534 let #(_key, errors) = entry 535 string.join(errors, "\n ") 536 }) 537 |> string.join("\n ") 538} 539 540@target(erlang) 541/// Show help text 542fn show_help() -> Nil { 543 io.println( 544 " 545honk - ATProtocol Lexicon Validator 546 547USAGE: 548 gleam run -m honk check <path> 549 gleam run -m honk help 550 551COMMANDS: 552 check <path> Check a lexicon file or directory 553 - If <path> is a file: validates that single lexicon 554 - If <path> is a directory: recursively validates all .json files 555 556 help Show this help message 557 558EXAMPLES: 559 gleam run -m honk check ./lexicons/xyz/statusphere/status.json 560 gleam run -m honk check ./lexicons 561 562VALIDATION: 563 - Validates lexicon structure (id, defs) 564 - Validates ALL definitions in each lexicon 565 - Checks types, constraints, and references 566 - Reports errors with definition context (lex.id#defName) 567", 568 ) 569}