An ATProto Lexicon validator for Gleam.
at v1.0.1 18 kB view raw
1// Main public API for the ATProtocol lexicon validator 2 3import argv 4import gleam/dict.{type Dict} 5import gleam/dynamic 6import gleam/dynamic/decode 7import gleam/int 8import gleam/io 9import gleam/json.{type Json} 10import gleam/list 11import gleam/option.{None, Some} 12import gleam/result 13import gleam/string 14import honk/errors 15import honk/internal/json_helpers 16import honk/types 17import honk/validation/context 18import honk/validation/formats 19import simplifile 20 21// Import validators 22import honk/validation/field as validation_field 23import honk/validation/field/reference as validation_field_reference 24import honk/validation/field/union as validation_field_union 25import honk/validation/meta/token as validation_meta_token 26import honk/validation/meta/unknown as validation_meta_unknown 27import honk/validation/primary/params as validation_primary_params 28import honk/validation/primary/procedure as validation_primary_procedure 29import honk/validation/primary/query as validation_primary_query 30import honk/validation/primary/record as validation_primary_record 31import honk/validation/primary/subscription as validation_primary_subscription 32import honk/validation/primitive/blob as validation_primitive_blob 33import honk/validation/primitive/boolean as validation_primitive_boolean 34import honk/validation/primitive/bytes as validation_primitive_bytes 35import honk/validation/primitive/cid_link as validation_primitive_cid_link 36import honk/validation/primitive/integer as validation_primitive_integer 37import honk/validation/primitive/null as validation_primitive_null 38import honk/validation/primitive/string as validation_primitive_string 39 40// Re-export error type for public API error handling 41pub type ValidationError = 42 errors.ValidationError 43 44/// Validates lexicon documents 45/// 46/// Validates lexicon structure (id, defs) and ALL definitions within each lexicon. 47/// Each definition in the defs object is validated according to its type. 48/// 49/// Returns Ok(Nil) if all lexicons and their definitions are valid. 50/// Returns Error with a map of lexicon ID to list of error messages. 51/// Error messages include the definition name (e.g., "lex.id#defName: error"). 52pub fn validate(lexicons: List(Json)) -> Result(Nil, Dict(String, List(String))) { 53 // Build validation context 54 let builder_result = 55 context.builder() 56 |> context.with_lexicons(lexicons) 57 58 case builder_result { 59 Ok(builder) -> 60 case context.build(builder) { 61 Ok(ctx) -> { 62 // Validate ALL definitions in each lexicon 63 let error_map = 64 dict.fold(ctx.lexicons, dict.new(), fn(errors, lex_id, lexicon) { 65 // Get all definition names from the defs object 66 let def_keys = json_helpers.get_keys(lexicon.defs) 67 let lex_ctx = context.with_current_lexicon(ctx, lex_id) 68 69 // Validate each definition 70 list.fold(def_keys, errors, fn(errors_acc, def_name) { 71 case json_helpers.get_field(lexicon.defs, def_name) { 72 Some(def) -> { 73 case validate_definition(def, lex_ctx) { 74 Ok(_) -> errors_acc 75 Error(e) -> { 76 // Include def name in error for better context 77 // Extract just the message without wrapper text 78 let message = case e { 79 errors.InvalidSchema(msg) -> msg 80 errors.DataValidation(msg) -> msg 81 errors.LexiconNotFound(msg) -> 82 "Lexicon not found: " <> msg 83 } 84 // Clean up leading ": " if present 85 let clean_message = case 86 string.starts_with(message, ": ") 87 { 88 True -> string.drop_start(message, 2) 89 False -> message 90 } 91 let error_msg = 92 lex_id <> "#" <> def_name <> ": " <> clean_message 93 case dict.get(errors_acc, lex_id) { 94 Ok(existing_errors) -> 95 dict.insert(errors_acc, lex_id, [ 96 error_msg, 97 ..existing_errors 98 ]) 99 Error(_) -> 100 dict.insert(errors_acc, lex_id, [error_msg]) 101 } 102 } 103 } 104 } 105 None -> errors_acc 106 } 107 }) 108 }) 109 110 case dict.is_empty(error_map) { 111 True -> Ok(Nil) 112 False -> Error(error_map) 113 } 114 } 115 Error(e) -> Error(dict.from_list([#("builder", [errors.to_string(e)])])) 116 } 117 Error(e) -> Error(dict.from_list([#("builder", [errors.to_string(e)])])) 118 } 119} 120 121/// Validates a single definition based on its type 122fn validate_definition( 123 def: Json, 124 ctx: context.ValidationContext, 125) -> Result(Nil, errors.ValidationError) { 126 case json_helpers.get_string(def, "type") { 127 Some("record") -> validation_primary_record.validate_schema(def, ctx) 128 Some("query") -> validation_primary_query.validate_schema(def, ctx) 129 Some("procedure") -> validation_primary_procedure.validate_schema(def, ctx) 130 Some("subscription") -> 131 validation_primary_subscription.validate_schema(def, ctx) 132 Some("params") -> validation_primary_params.validate_schema(def, ctx) 133 Some("object") -> validation_field.validate_object_schema(def, ctx) 134 Some("array") -> validation_field.validate_array_schema(def, ctx) 135 Some("union") -> validation_field_union.validate_schema(def, ctx) 136 Some("string") -> validation_primitive_string.validate_schema(def, ctx) 137 Some("integer") -> validation_primitive_integer.validate_schema(def, ctx) 138 Some("boolean") -> validation_primitive_boolean.validate_schema(def, ctx) 139 Some("bytes") -> validation_primitive_bytes.validate_schema(def, ctx) 140 Some("blob") -> validation_primitive_blob.validate_schema(def, ctx) 141 Some("cid-link") -> validation_primitive_cid_link.validate_schema(def, ctx) 142 Some("null") -> validation_primitive_null.validate_schema(def, ctx) 143 Some("ref") -> validation_field_reference.validate_schema(def, ctx) 144 Some("token") -> validation_meta_token.validate_schema(def, ctx) 145 Some("unknown") -> validation_meta_unknown.validate_schema(def, ctx) 146 Some(unknown_type) -> 147 Error(errors.invalid_schema("Unknown type: " <> unknown_type)) 148 None -> Error(errors.invalid_schema("Definition missing type field")) 149 } 150} 151 152/// Validates a single data record against a collection schema 153pub fn validate_record( 154 lexicons: List(Json), 155 collection: String, 156 record: Json, 157) -> Result(Nil, ValidationError) { 158 // Build validation context 159 let builder_result = 160 context.builder() 161 |> context.with_lexicons(lexicons) 162 163 use builder <- result.try(builder_result) 164 use ctx <- result.try(context.build(builder)) 165 166 // Get the lexicon for this collection 167 case context.get_lexicon(ctx, collection) { 168 Some(lexicon) -> { 169 // Get the main definition (should be a record type) 170 case json_helpers.get_field(lexicon.defs, "main") { 171 Some(main_def) -> { 172 let lex_ctx = context.with_current_lexicon(ctx, collection) 173 // Set the path to include the definition name 174 let def_ctx = context.with_path(lex_ctx, "defs.main") 175 // Validate the record data against the main definition 176 validation_primary_record.validate_data(record, main_def, def_ctx) 177 } 178 None -> 179 Error(errors.invalid_schema( 180 "Lexicon '" <> collection <> "' has no main definition", 181 )) 182 } 183 } 184 None -> Error(errors.lexicon_not_found(collection)) 185 } 186} 187 188/// Validates NSID format 189pub fn is_valid_nsid(nsid: String) -> Bool { 190 formats.is_valid_nsid(nsid) 191} 192 193/// Validates a string value against a specific format 194pub fn validate_string_format( 195 value: String, 196 format: types.StringFormat, 197) -> Result(Nil, String) { 198 case formats.validate_format(value, format) { 199 True -> Ok(Nil) 200 False -> { 201 let format_name = types.format_to_string(format) 202 Error("Value does not match format: " <> format_name) 203 } 204 } 205} 206 207/// Convert a Dynamic value to Json 208/// 209/// This is useful when parsing JSON strings with `json.parse(str, decode.dynamic)` 210/// and then needing to convert to Json for validation. 211/// 212/// ## Example 213/// ```gleam 214/// use dyn <- result.try(json.parse(json_str, decode.dynamic)) 215/// use json_val <- result.try(honk.dynamic_to_json(dyn)) 216/// honk.validate([json_val]) 217/// ``` 218pub fn dynamic_to_json(dyn: dynamic.Dynamic) -> Result(Json, ValidationError) { 219 json_helpers.dynamic_to_json(dyn) 220} 221 222/// Parse a JSON string and convert to Json for validation 223/// 224/// This is a convenience function that combines `json.parse()` and `dynamic_to_json()`. 225/// It's useful when you have JSON stored as strings (e.g., in a database) and want 226/// to validate it with honk. 227/// 228/// ## Example 229/// ```gleam 230/// use json_val <- result.try(honk.parse_json_string(stored_json)) 231/// honk.validate([json_val]) 232/// ``` 233pub fn parse_json_string(json_str: String) -> Result(Json, ValidationError) { 234 use dyn <- result.try( 235 json.parse(json_str, decode.dynamic) 236 |> result.map_error(fn(_) { 237 errors.invalid_schema("Failed to parse JSON string") 238 }), 239 ) 240 dynamic_to_json(dyn) 241} 242 243/// Parse multiple JSON strings and convert to Json for validation 244/// 245/// This is a convenience function for batch parsing JSON strings. 246/// 247/// ## Example 248/// ```gleam 249/// use json_vals <- result.try(honk.parse_json_strings(stored_jsons)) 250/// honk.validate(json_vals) 251/// ``` 252pub fn parse_json_strings( 253 json_strs: List(String), 254) -> Result(List(Json), ValidationError) { 255 json_strs 256 |> list.try_map(parse_json_string) 257 |> result.map_error(fn(_) { 258 errors.invalid_schema("Failed to parse JSON strings") 259 }) 260} 261 262/// CLI entry point for the honk lexicon validator 263/// 264/// Usage: 265/// gleam run -m honk check <path> 266/// gleam run -m honk help 267pub fn main() -> Nil { 268 case argv.load().arguments { 269 ["check", path] -> validate_path(path) 270 ["help"] | [] -> show_help() 271 _ -> { 272 io.println_error("Unknown command. Use 'help' for usage information.") 273 Nil 274 } 275 } 276} 277 278/// Validate a path (auto-detects file or directory) 279fn validate_path(path: String) -> Nil { 280 case simplifile.is_file(path) { 281 Ok(True) -> validate_file(path) 282 Ok(False) -> 283 case simplifile.is_directory(path) { 284 Ok(True) -> validate_directory(path) 285 Ok(False) -> { 286 io.println_error( 287 "Error: Path is neither a file nor a directory: " <> path, 288 ) 289 Nil 290 } 291 Error(_) -> { 292 io.println_error("Error: Cannot access path: " <> path) 293 Nil 294 } 295 } 296 Error(_) -> { 297 io.println_error("Error: Cannot access path: " <> path) 298 Nil 299 } 300 } 301} 302 303/// Validate a single lexicon file 304fn validate_file(file_path: String) -> Nil { 305 case read_and_validate_file(file_path) { 306 Ok(_) -> { 307 io.println("" <> file_path <> " - valid") 308 Nil 309 } 310 Error(msg) -> { 311 io.println_error("" <> file_path) 312 io.println_error(" " <> msg) 313 Nil 314 } 315 } 316} 317 318/// Validate all .json files in a directory 319fn validate_directory(dir_path: String) -> Nil { 320 case simplifile.get_files(dir_path) { 321 Error(_) -> { 322 io.println_error("Error: Cannot read directory: " <> dir_path) 323 Nil 324 } 325 Ok(all_files) -> { 326 // Filter for .json files 327 let json_files = 328 all_files 329 |> list.filter(fn(path) { string.ends_with(path, ".json") }) 330 331 case json_files { 332 [] -> { 333 io.println("No .json files found in " <> dir_path) 334 Nil 335 } 336 files -> { 337 // Read and parse all files 338 let file_results = 339 files 340 |> list.map(fn(file) { 341 case read_json_file(file) { 342 Ok(json_value) -> #(file, Ok(json_value)) 343 Error(msg) -> #(file, Error(msg)) 344 } 345 }) 346 347 // Separate successful parses from failures 348 let #(parse_errors, parsed_files) = 349 list.partition(file_results, fn(result) { 350 case result { 351 #(_, Error(_)) -> True 352 #(_, Ok(_)) -> False 353 } 354 }) 355 356 // Display parse errors 357 parse_errors 358 |> list.each(fn(result) { 359 case result { 360 #(file, Error(msg)) -> { 361 io.println_error("" <> file) 362 io.println_error(" " <> msg) 363 } 364 _ -> Nil 365 } 366 }) 367 368 // Get all successfully parsed lexicons 369 let lexicons = 370 parsed_files 371 |> list.filter_map(fn(result) { 372 case result { 373 #(_, Ok(json)) -> Ok(json) 374 _ -> Error(Nil) 375 } 376 }) 377 378 // Validate all lexicons together (allows cross-lexicon references) 379 case validate(lexicons) { 380 Ok(_) -> { 381 // All lexicons are valid 382 parsed_files 383 |> list.each(fn(result) { 384 case result { 385 #(file, Ok(_)) -> io.println("" <> file) 386 _ -> Nil 387 } 388 }) 389 } 390 Error(error_map) -> { 391 // Some lexicons have errors - map errors back to files 392 parsed_files 393 |> list.each(fn(result) { 394 case result { 395 #(file, Ok(json)) -> { 396 // Get the lexicon ID for this file 397 case json_helpers.get_string(json, "id") { 398 Some(lex_id) -> { 399 case dict.get(error_map, lex_id) { 400 Ok(errors) -> { 401 io.println_error("" <> file) 402 errors 403 |> list.each(fn(err) { 404 io.println_error(" " <> err) 405 }) 406 } 407 Error(_) -> io.println("" <> file) 408 } 409 } 410 None -> { 411 io.println_error("" <> file) 412 io.println_error(" Missing lexicon id") 413 } 414 } 415 } 416 _ -> Nil 417 } 418 }) 419 } 420 } 421 422 // Summary 423 let total = list.length(files) 424 let parse_error_count = list.length(parse_errors) 425 let validation_error_count = case validate(lexicons) { 426 Ok(_) -> 0 427 Error(error_map) -> dict.size(error_map) 428 } 429 let total_errors = parse_error_count + validation_error_count 430 431 case total_errors { 432 0 -> 433 io.println( 434 "\nAll " 435 <> int.to_string(total) 436 <> " schemas validated successfully.", 437 ) 438 _ -> 439 io.println_error( 440 "\n" 441 <> int.to_string(total_errors) 442 <> " of " 443 <> int.to_string(total) 444 <> " schemas failed validation.", 445 ) 446 } 447 448 Nil 449 } 450 } 451 } 452 } 453} 454 455/// Read and parse a JSON file (without validation) 456fn read_json_file(file_path: String) -> Result(Json, String) { 457 use content <- result.try( 458 simplifile.read(file_path) 459 |> result.map_error(fn(_) { "Cannot read file" }), 460 ) 461 462 use json_dynamic <- result.try( 463 json.parse(content, decode.dynamic) 464 |> result.map_error(fn(_) { "Invalid JSON" }), 465 ) 466 467 json_helpers.dynamic_to_json(json_dynamic) 468 |> result.map_error(fn(_) { "Failed to convert JSON" }) 469} 470 471/// Read a file and validate it as a lexicon 472fn read_and_validate_file(file_path: String) -> Result(Nil, String) { 473 use content <- result.try( 474 simplifile.read(file_path) 475 |> result.map_error(fn(_) { "Cannot read file" }), 476 ) 477 478 use json_dynamic <- result.try( 479 json.parse(content, decode.dynamic) 480 |> result.map_error(fn(_) { "Invalid JSON" }), 481 ) 482 483 use json_value <- result.try( 484 json_helpers.dynamic_to_json(json_dynamic) 485 |> result.map_error(fn(_) { "Failed to convert JSON" }), 486 ) 487 488 use _ <- result.try( 489 validate([json_value]) 490 |> result.map_error(fn(error_map) { format_validation_errors(error_map) }), 491 ) 492 493 Ok(Nil) 494} 495 496/// Format validation errors from the error map 497fn format_validation_errors(error_map: Dict(String, List(String))) -> String { 498 error_map 499 |> dict.to_list 500 |> list.map(fn(entry) { 501 let #(_key, errors) = entry 502 string.join(errors, "\n ") 503 }) 504 |> string.join("\n ") 505} 506 507/// Show help text 508fn show_help() -> Nil { 509 io.println( 510 " 511honk - ATProtocol Lexicon Validator 512 513USAGE: 514 gleam run -m honk check <path> 515 gleam run -m honk help 516 517COMMANDS: 518 check <path> Check a lexicon file or directory 519 - If <path> is a file: validates that single lexicon 520 - If <path> is a directory: recursively validates all .json files 521 522 help Show this help message 523 524EXAMPLES: 525 gleam run -m honk check ./lexicons/xyz/statusphere/status.json 526 gleam run -m honk check ./lexicons 527 528VALIDATION: 529 - Validates lexicon structure (id, defs) 530 - Validates ALL definitions in each lexicon 531 - Checks types, constraints, and references 532 - Reports errors with definition context (lex.id#defName) 533", 534 ) 535}