An ATProto Lexicon validator for Gleam.

add cli check command

+17
README.md
··· 86 86 - ✅ **Circular Dependency Detection**: prevents infinite reference loops 87 87 - ✅ **Detailed Error Messages**: validation errors with path information 88 88 89 + ## CLI Usage 90 + 91 + Validate lexicon files from the command line: 92 + 93 + ```sh 94 + # Validate a single file 95 + gleam run -m honk check ./lexicons/xyz/statusphere/status.json 96 + 97 + # Validate all .json files in a directory 98 + gleam run -m honk check ./lexicons/ 99 + 100 + # Show help 101 + gleam run -m honk help 102 + ``` 103 + 104 + When validating a directory, all lexicons are loaded together to resolve cross-lexicon references 105 + 89 106 ## API Overview 90 107 91 108 ### Main Functions
+2
gleam.toml
··· 8 8 gleam_json = ">= 3.0.0 and < 4.0.0" 9 9 gleam_regexp = ">= 1.0.0 and < 2.0.0" 10 10 gleam_time = ">= 1.5.0 and < 2.0.0" 11 + simplifile = ">= 2.3.1 and < 3.0.0" 12 + argv = ">= 1.0.2 and < 2.0.0" 11 13 12 14 [dev-dependencies] 13 15 gleeunit = ">= 1.0.0 and < 2.0.0"
+5
manifest.toml
··· 2 2 # You typically do not need to edit this file 3 3 4 4 packages = [ 5 + { name = "argv", version = "1.0.2", build_tools = ["gleam"], requirements = [], otp_app = "argv", source = "hex", outer_checksum = "BA1FF0929525DEBA1CE67256E5ADF77A7CDDFE729E3E3F57A5BDCAA031DED09D" }, 6 + { name = "filepath", version = "1.1.2", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "filepath", source = "hex", outer_checksum = "B06A9AF0BF10E51401D64B98E4B627F1D2E48C154967DA7AF4D0914780A6D40A" }, 5 7 { name = "gleam_json", version = "3.1.0", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "gleam_json", source = "hex", outer_checksum = "44FDAA8847BE8FC48CA7A1C089706BD54BADCC4C45B237A992EDDF9F2CDB2836" }, 6 8 { name = "gleam_regexp", version = "1.1.1", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "gleam_regexp", source = "hex", outer_checksum = "9C215C6CA84A5B35BB934A9B61A9A306EC743153BE2B0425A0D032E477B062A9" }, 7 9 { name = "gleam_stdlib", version = "0.65.0", build_tools = ["gleam"], requirements = [], otp_app = "gleam_stdlib", source = "hex", outer_checksum = "7C69C71D8C493AE11A5184828A77110EB05A7786EBF8B25B36A72F879C3EE107" }, 8 10 { name = "gleam_time", version = "1.5.0", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "gleam_time", source = "hex", outer_checksum = "D560E672C7279C89908981E068DF07FD16D0C859DCA266F908B18F04DF0EB8E6" }, 9 11 { name = "gleeunit", version = "1.9.0", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "gleeunit", source = "hex", outer_checksum = "DA9553CE58B67924B3C631F96FE3370C49EB6D6DC6B384EC4862CC4AAA718F3C" }, 12 + { name = "simplifile", version = "2.3.1", build_tools = ["gleam"], requirements = ["filepath", "gleam_stdlib"], otp_app = "simplifile", source = "hex", outer_checksum = "957E0E5B75927659F1D2A1B7B75D7B9BA96FAA8D0C53EA71C4AD9CD0C6B848F6" }, 10 13 ] 11 14 12 15 [requirements] 16 + argv = { version = ">= 1.0.2 and < 2.0.0" } 13 17 gleam_json = { version = ">= 3.0.0 and < 4.0.0" } 14 18 gleam_regexp = { version = ">= 1.0.0 and < 2.0.0" } 15 19 gleam_stdlib = { version = ">= 0.44.0 and < 2.0.0" } 16 20 gleam_time = { version = ">= 1.5.0 and < 2.0.0" } 17 21 gleeunit = { version = ">= 1.0.0 and < 2.0.0" } 22 + simplifile = { version = ">= 2.3.1 and < 3.0.0" }
+285 -23
src/honk.gleam
··· 1 1 // Main public API for the ATProtocol lexicon validator 2 2 3 + import argv 3 4 import gleam/dict.{type Dict} 5 + import gleam/dynamic/decode 6 + import gleam/int 7 + import gleam/io 4 8 import gleam/json.{type Json} 5 9 import gleam/list 6 10 import gleam/option.{None, Some} 7 11 import gleam/result 12 + import gleam/string 8 13 import honk/errors 9 14 import honk/internal/json_helpers 10 15 import honk/types 11 16 import honk/validation/context 12 17 import honk/validation/formats 18 + import simplifile 13 19 14 20 // Import validators 15 21 import honk/validation/field as validation_field ··· 67 73 Ok(_) -> errors_acc 68 74 Error(e) -> { 69 75 // Include def name in error for better context 70 - let error_msg = 71 - lex_id 72 - <> "#" 73 - <> def_name 74 - <> ": " 75 - <> errors.to_string(e) 76 + // Extract just the message without wrapper text 77 + let message = case e { 78 + errors.InvalidSchema(msg) -> msg 79 + errors.DataValidation(msg) -> msg 80 + errors.LexiconNotFound(msg) -> "Lexicon not found: " <> msg 81 + } 82 + // Clean up leading ": " if present 83 + let clean_message = case string.starts_with(message, ": ") { 84 + True -> string.drop_start(message, 2) 85 + False -> message 86 + } 87 + let error_msg = lex_id <> "#" <> def_name <> ": " <> clean_message 76 88 case dict.get(errors_acc, lex_id) { 77 89 Ok(existing_errors) -> 78 90 dict.insert(errors_acc, lex_id, [ ··· 185 197 } 186 198 } 187 199 188 - /// Entry point for the honk lexicon validator. 200 + /// CLI entry point for the honk lexicon validator 189 201 /// 190 - /// This function serves as an example entry point and can be used 191 - /// for basic CLI or testing purposes. For actual validation, 192 - /// use the `validate()` or `validate_record()` functions. 193 - /// 194 - /// ## Example 195 - /// 196 - /// ```gleam 197 - /// import honk 198 - /// 199 - /// pub fn main() { 200 - /// honk.main() 201 - /// } 202 - /// ``` 202 + /// Usage: 203 + /// gleam run -m honk check <path> 204 + /// gleam run -m honk help 203 205 pub fn main() -> Nil { 204 - // This would typically be called from tests or CLI 205 - let _example_result = is_valid_nsid("com.example.record") 206 - Nil 206 + case argv.load().arguments { 207 + ["check", path] -> validate_path(path) 208 + ["help"] | [] -> show_help() 209 + _ -> { 210 + io.println_error("Unknown command. Use 'help' for usage information.") 211 + Nil 212 + } 213 + } 214 + } 215 + 216 + /// Validate a path (auto-detects file or directory) 217 + fn validate_path(path: String) -> Nil { 218 + case simplifile.is_file(path) { 219 + Ok(True) -> validate_file(path) 220 + Ok(False) -> 221 + case simplifile.is_directory(path) { 222 + Ok(True) -> validate_directory(path) 223 + Ok(False) -> { 224 + io.println_error("Error: Path is neither a file nor a directory: " <> path) 225 + Nil 226 + } 227 + Error(_) -> { 228 + io.println_error("Error: Cannot access path: " <> path) 229 + Nil 230 + } 231 + } 232 + Error(_) -> { 233 + io.println_error("Error: Cannot access path: " <> path) 234 + Nil 235 + } 236 + } 237 + } 238 + 239 + /// Validate a single lexicon file 240 + fn validate_file(file_path: String) -> Nil { 241 + case read_and_validate_file(file_path) { 242 + Ok(_) -> { 243 + io.println("✓ " <> file_path <> " - valid") 244 + Nil 245 + } 246 + Error(msg) -> { 247 + io.println_error("✗ " <> file_path) 248 + io.println_error(" " <> msg) 249 + Nil 250 + } 251 + } 252 + } 253 + 254 + /// Validate all .json files in a directory 255 + fn validate_directory(dir_path: String) -> Nil { 256 + case simplifile.get_files(dir_path) { 257 + Error(_) -> { 258 + io.println_error("Error: Cannot read directory: " <> dir_path) 259 + Nil 260 + } 261 + Ok(all_files) -> { 262 + // Filter for .json files 263 + let json_files = 264 + all_files 265 + |> list.filter(fn(path) { string.ends_with(path, ".json") }) 266 + 267 + case json_files { 268 + [] -> { 269 + io.println("No .json files found in " <> dir_path) 270 + Nil 271 + } 272 + files -> { 273 + // Read and parse all files 274 + let file_results = 275 + files 276 + |> list.map(fn(file) { 277 + case read_json_file(file) { 278 + Ok(json_value) -> #(file, Ok(json_value)) 279 + Error(msg) -> #(file, Error(msg)) 280 + } 281 + }) 282 + 283 + // Separate successful parses from failures 284 + let #(parse_errors, parsed_files) = 285 + list.partition(file_results, fn(result) { 286 + case result { 287 + #(_, Error(_)) -> True 288 + #(_, Ok(_)) -> False 289 + } 290 + }) 291 + 292 + // Display parse errors 293 + parse_errors 294 + |> list.each(fn(result) { 295 + case result { 296 + #(file, Error(msg)) -> { 297 + io.println_error("✗ " <> file) 298 + io.println_error(" " <> msg) 299 + } 300 + _ -> Nil 301 + } 302 + }) 303 + 304 + // Get all successfully parsed lexicons 305 + let lexicons = 306 + parsed_files 307 + |> list.filter_map(fn(result) { 308 + case result { 309 + #(_, Ok(json)) -> Ok(json) 310 + _ -> Error(Nil) 311 + } 312 + }) 313 + 314 + // Validate all lexicons together (allows cross-lexicon references) 315 + case validate(lexicons) { 316 + Ok(_) -> { 317 + // All lexicons are valid 318 + parsed_files 319 + |> list.each(fn(result) { 320 + case result { 321 + #(file, Ok(_)) -> io.println("✓ " <> file) 322 + _ -> Nil 323 + } 324 + }) 325 + } 326 + Error(error_map) -> { 327 + // Some lexicons have errors - map errors back to files 328 + parsed_files 329 + |> list.each(fn(result) { 330 + case result { 331 + #(file, Ok(json)) -> { 332 + // Get the lexicon ID for this file 333 + case json_helpers.get_string(json, "id") { 334 + Some(lex_id) -> { 335 + case dict.get(error_map, lex_id) { 336 + Ok(errors) -> { 337 + io.println_error("✗ " <> file) 338 + errors 339 + |> list.each(fn(err) { 340 + io.println_error(" " <> err) 341 + }) 342 + } 343 + Error(_) -> io.println("✓ " <> file) 344 + } 345 + } 346 + None -> { 347 + io.println_error("✗ " <> file) 348 + io.println_error(" Missing lexicon id") 349 + } 350 + } 351 + } 352 + _ -> Nil 353 + } 354 + }) 355 + } 356 + } 357 + 358 + // Summary 359 + let total = list.length(files) 360 + let parse_error_count = list.length(parse_errors) 361 + let validation_error_count = case validate(lexicons) { 362 + Ok(_) -> 0 363 + Error(error_map) -> dict.size(error_map) 364 + } 365 + let total_errors = parse_error_count + validation_error_count 366 + 367 + case total_errors { 368 + 0 -> 369 + io.println( 370 + "\nAll " <> int.to_string(total) <> " schemas validated successfully.", 371 + ) 372 + _ -> 373 + io.println_error( 374 + "\n" 375 + <> int.to_string(total_errors) 376 + <> " of " 377 + <> int.to_string(total) 378 + <> " schemas failed validation.", 379 + ) 380 + } 381 + 382 + Nil 383 + } 384 + } 385 + } 386 + } 387 + } 388 + 389 + /// Read and parse a JSON file (without validation) 390 + fn read_json_file(file_path: String) -> Result(Json, String) { 391 + use content <- result.try( 392 + simplifile.read(file_path) 393 + |> result.map_error(fn(_) { "Cannot read file" }), 394 + ) 395 + 396 + use json_dynamic <- result.try( 397 + json.parse(content, decode.dynamic) 398 + |> result.map_error(fn(_) { "Invalid JSON" }), 399 + ) 400 + 401 + json_helpers.dynamic_to_json(json_dynamic) 402 + |> result.map_error(fn(_) { "Failed to convert JSON" }) 403 + } 404 + 405 + /// Read a file and validate it as a lexicon 406 + fn read_and_validate_file(file_path: String) -> Result(Nil, String) { 407 + use content <- result.try( 408 + simplifile.read(file_path) 409 + |> result.map_error(fn(_) { "Cannot read file" }), 410 + ) 411 + 412 + use json_dynamic <- result.try( 413 + json.parse(content, decode.dynamic) 414 + |> result.map_error(fn(_) { "Invalid JSON" }), 415 + ) 416 + 417 + use json_value <- result.try( 418 + json_helpers.dynamic_to_json(json_dynamic) 419 + |> result.map_error(fn(_) { "Failed to convert JSON" }), 420 + ) 421 + 422 + use _ <- result.try( 423 + validate([json_value]) 424 + |> result.map_error(fn(error_map) { format_validation_errors(error_map) }), 425 + ) 426 + 427 + Ok(Nil) 428 + } 429 + 430 + /// Format validation errors from the error map 431 + fn format_validation_errors(error_map: Dict(String, List(String))) -> String { 432 + error_map 433 + |> dict.to_list 434 + |> list.map(fn(entry) { 435 + let #(_key, errors) = entry 436 + string.join(errors, "\n ") 437 + }) 438 + |> string.join("\n ") 439 + } 440 + 441 + /// Show help text 442 + fn show_help() -> Nil { 443 + io.println( 444 + " 445 + honk - ATProtocol Lexicon Validator 446 + 447 + USAGE: 448 + gleam run -m honk check <path> 449 + gleam run -m honk help 450 + 451 + COMMANDS: 452 + check <path> Check a lexicon file or directory 453 + - If <path> is a file: validates that single lexicon 454 + - If <path> is a directory: recursively validates all .json files 455 + 456 + help Show this help message 457 + 458 + EXAMPLES: 459 + gleam run -m honk check ./lexicons/xyz/statusphere/status.json 460 + gleam run -m honk check ./lexicons 461 + 462 + VALIDATION: 463 + - Validates lexicon structure (id, defs) 464 + - Validates ALL definitions in each lexicon 465 + - Checks types, constraints, and references 466 + - Reports errors with definition context (lex.id#defName) 467 + ", 468 + ) 207 469 }
+32 -1
src/honk/validation/field/reference.gleam
··· 40 40 use ref_str <- result.try(ref_value) 41 41 42 42 // Validate reference syntax 43 - validate_ref_syntax(ref_str, def_name) 43 + use _ <- result.try(validate_ref_syntax(ref_str, def_name)) 44 + 45 + // Validate that the reference can be resolved (only for global refs with full context) 46 + case string.starts_with(ref_str, "#") { 47 + True -> Ok(Nil) // Local ref - will be validated in same lexicon 48 + False -> { 49 + // Global ref - check it exists in catalog if we have a current lexicon 50 + case context.current_lexicon_id(ctx) { 51 + Some(lex_id) -> { 52 + // We have a full validation context, so validate reference resolution 53 + use resolved <- result.try(resolution.resolve_reference( 54 + ref_str, 55 + ctx, 56 + lex_id, 57 + )) 58 + 59 + case resolved { 60 + Some(_) -> Ok(Nil) 61 + None -> 62 + Error(errors.invalid_schema( 63 + def_name <> ": reference not found: " <> ref_str, 64 + )) 65 + } 66 + } 67 + None -> { 68 + // No current lexicon (e.g., unit test context) 69 + // Just validate syntax, can't check if reference exists 70 + Ok(Nil) 71 + } 72 + } 73 + } 74 + } 44 75 } 45 76 46 77 /// Validates data against the referenced schema
+20
test/fixtures/com.atproto.repo.strongRef.json
··· 1 + { 2 + "lexicon": 1, 3 + "id": "com.atproto.repo.strongRef", 4 + "defs": { 5 + "main": { 6 + "type": "object", 7 + "required": ["uri", "cid"], 8 + "properties": { 9 + "uri": { 10 + "type": "string", 11 + "format": "at-uri" 12 + }, 13 + "cid": { 14 + "type": "string", 15 + "format": "cid" 16 + } 17 + } 18 + } 19 + } 20 + }
+46
test/fixtures/com.example.post.json
··· 1 + { 2 + "lexicon": 1, 3 + "id": "com.example.post", 4 + "defs": { 5 + "main": { 6 + "type": "record", 7 + "key": "tid", 8 + "record": { 9 + "type": "object", 10 + "required": ["text", "createdAt"], 11 + "properties": { 12 + "text": { 13 + "type": "string", 14 + "maxLength": 300 15 + }, 16 + "author": { 17 + "type": "ref", 18 + "ref": "com.example.user#profile" 19 + }, 20 + "createdAt": { 21 + "type": "string", 22 + "format": "datetime" 23 + }, 24 + "reply": { 25 + "type": "ref", 26 + "ref": "#replyRef" 27 + } 28 + } 29 + } 30 + }, 31 + "replyRef": { 32 + "type": "object", 33 + "required": ["parent", "root"], 34 + "properties": { 35 + "parent": { 36 + "type": "ref", 37 + "ref": "com.atproto.repo.strongRef" 38 + }, 39 + "root": { 40 + "type": "ref", 41 + "ref": "com.atproto.repo.strongRef" 42 + } 43 + } 44 + } 45 + } 46 + }
+41
test/fixtures/com.example.user.json
··· 1 + { 2 + "lexicon": 1, 3 + "id": "com.example.user", 4 + "defs": { 5 + "main": { 6 + "type": "record", 7 + "key": "tid", 8 + "record": { 9 + "type": "object", 10 + "required": ["handle", "displayName"], 11 + "properties": { 12 + "handle": { 13 + "type": "string", 14 + "format": "handle" 15 + }, 16 + "displayName": { 17 + "type": "string", 18 + "maxLength": 64 19 + }, 20 + "bio": { 21 + "type": "string", 22 + "maxLength": 256 23 + } 24 + } 25 + } 26 + }, 27 + "profile": { 28 + "type": "object", 29 + "required": ["handle"], 30 + "properties": { 31 + "handle": { 32 + "type": "string", 33 + "format": "handle" 34 + }, 35 + "displayName": { 36 + "type": "string" 37 + } 38 + } 39 + } 40 + } 41 + }
+19
test/fixtures/invalid-ref.json
··· 1 + { 2 + "lexicon": 1, 3 + "id": "com.example.invalidref", 4 + "defs": { 5 + "main": { 6 + "type": "record", 7 + "key": "tid", 8 + "record": { 9 + "type": "object", 10 + "properties": { 11 + "brokenRef": { 12 + "type": "ref", 13 + "ref": "com.example.nonexistent#thing" 14 + } 15 + } 16 + } 17 + } 18 + } 19 + }
+19
test/fixtures/invalid.json
··· 1 + { 2 + "lexicon": 1, 3 + "id": "com.example.invalid", 4 + "defs": { 5 + "main": { 6 + "type": "record", 7 + "key": "tid", 8 + "record": { 9 + "type": "object", 10 + "properties": {} 11 + } 12 + }, 13 + "badDef": { 14 + "type": "string", 15 + "minLength": 10, 16 + "maxLength": 5 17 + } 18 + } 19 + }
+26
test/fixtures/valid.json
··· 1 + { 2 + "lexicon": 1, 3 + "id": "xyz.statusphere.status", 4 + "defs": { 5 + "main": { 6 + "type": "record", 7 + "key": "tid", 8 + "record": { 9 + "type": "object", 10 + "required": ["status", "createdAt"], 11 + "properties": { 12 + "status": { 13 + "type": "string", 14 + "minLength": 1, 15 + "maxGraphemes": 1, 16 + "maxLength": 32 17 + }, 18 + "createdAt": { 19 + "type": "string", 20 + "format": "datetime" 21 + } 22 + } 23 + } 24 + } 25 + } 26 + }