An ATProto Lexicon validator for Gleam.
1// Main public API for the ATProtocol lexicon validator
2
3import argv
4import gleam/dict.{type Dict}
5import gleam/dynamic
6import gleam/dynamic/decode
7import gleam/int
8import gleam/io
9import gleam/json.{type Json}
10import gleam/list
11import gleam/option.{None, Some}
12import gleam/result
13import gleam/string
14import honk/errors
15import honk/internal/json_helpers
16import honk/types
17import honk/validation/context
18import honk/validation/formats
19import simplifile
20
21// Import validators
22import honk/validation/field as validation_field
23import honk/validation/field/reference as validation_field_reference
24import honk/validation/field/union as validation_field_union
25import honk/validation/meta/token as validation_meta_token
26import honk/validation/meta/unknown as validation_meta_unknown
27import honk/validation/primary/params as validation_primary_params
28import honk/validation/primary/procedure as validation_primary_procedure
29import honk/validation/primary/query as validation_primary_query
30import honk/validation/primary/record as validation_primary_record
31import honk/validation/primary/subscription as validation_primary_subscription
32import honk/validation/primitive/blob as validation_primitive_blob
33import honk/validation/primitive/boolean as validation_primitive_boolean
34import honk/validation/primitive/bytes as validation_primitive_bytes
35import honk/validation/primitive/cid_link as validation_primitive_cid_link
36import honk/validation/primitive/integer as validation_primitive_integer
37import honk/validation/primitive/null as validation_primitive_null
38import honk/validation/primitive/string as validation_primitive_string
39
40// Re-export error type for public API error handling
41pub type ValidationError =
42 errors.ValidationError
43
44/// Validates lexicon documents
45///
46/// Validates lexicon structure (id, defs) and ALL definitions within each lexicon.
47/// Each definition in the defs object is validated according to its type.
48///
49/// Returns Ok(Nil) if all lexicons and their definitions are valid.
50/// Returns Error with a map of lexicon ID to list of error messages.
51/// Error messages include the definition name (e.g., "lex.id#defName: error").
52pub fn validate(lexicons: List(Json)) -> Result(Nil, Dict(String, List(String))) {
53 // Build validation context
54 let builder_result =
55 context.builder()
56 |> context.with_lexicons(lexicons)
57
58 case builder_result {
59 Ok(builder) ->
60 case context.build(builder) {
61 Ok(ctx) -> {
62 // Validate ALL definitions in each lexicon
63 let error_map =
64 dict.fold(ctx.lexicons, dict.new(), fn(errors, lex_id, lexicon) {
65 // Get all definition names from the defs object
66 let def_keys = json_helpers.get_keys(lexicon.defs)
67 let lex_ctx = context.with_current_lexicon(ctx, lex_id)
68
69 // Validate each definition
70 list.fold(def_keys, errors, fn(errors_acc, def_name) {
71 case json_helpers.get_field(lexicon.defs, def_name) {
72 Some(def) -> {
73 case validate_definition(def, lex_ctx) {
74 Ok(_) -> errors_acc
75 Error(e) -> {
76 // Include def name in error for better context
77 // Extract just the message without wrapper text
78 let message = case e {
79 errors.InvalidSchema(msg) -> msg
80 errors.DataValidation(msg) -> msg
81 errors.LexiconNotFound(msg) ->
82 "Lexicon not found: " <> msg
83 }
84 // Clean up leading ": " if present
85 let clean_message = case
86 string.starts_with(message, ": ")
87 {
88 True -> string.drop_start(message, 2)
89 False -> message
90 }
91 let error_msg =
92 lex_id <> "#" <> def_name <> ": " <> clean_message
93 case dict.get(errors_acc, lex_id) {
94 Ok(existing_errors) ->
95 dict.insert(errors_acc, lex_id, [
96 error_msg,
97 ..existing_errors
98 ])
99 Error(_) ->
100 dict.insert(errors_acc, lex_id, [error_msg])
101 }
102 }
103 }
104 }
105 None -> errors_acc
106 }
107 })
108 })
109
110 case dict.is_empty(error_map) {
111 True -> Ok(Nil)
112 False -> Error(error_map)
113 }
114 }
115 Error(e) -> Error(dict.from_list([#("builder", [errors.to_string(e)])]))
116 }
117 Error(e) -> Error(dict.from_list([#("builder", [errors.to_string(e)])]))
118 }
119}
120
121/// Validates a single definition based on its type
122fn validate_definition(
123 def: Json,
124 ctx: context.ValidationContext,
125) -> Result(Nil, errors.ValidationError) {
126 case json_helpers.get_string(def, "type") {
127 Some("record") -> validation_primary_record.validate_schema(def, ctx)
128 Some("query") -> validation_primary_query.validate_schema(def, ctx)
129 Some("procedure") -> validation_primary_procedure.validate_schema(def, ctx)
130 Some("subscription") ->
131 validation_primary_subscription.validate_schema(def, ctx)
132 Some("params") -> validation_primary_params.validate_schema(def, ctx)
133 Some("object") -> validation_field.validate_object_schema(def, ctx)
134 Some("array") -> validation_field.validate_array_schema(def, ctx)
135 Some("union") -> validation_field_union.validate_schema(def, ctx)
136 Some("string") -> validation_primitive_string.validate_schema(def, ctx)
137 Some("integer") -> validation_primitive_integer.validate_schema(def, ctx)
138 Some("boolean") -> validation_primitive_boolean.validate_schema(def, ctx)
139 Some("bytes") -> validation_primitive_bytes.validate_schema(def, ctx)
140 Some("blob") -> validation_primitive_blob.validate_schema(def, ctx)
141 Some("cid-link") -> validation_primitive_cid_link.validate_schema(def, ctx)
142 Some("null") -> validation_primitive_null.validate_schema(def, ctx)
143 Some("ref") -> validation_field_reference.validate_schema(def, ctx)
144 Some("token") -> validation_meta_token.validate_schema(def, ctx)
145 Some("unknown") -> validation_meta_unknown.validate_schema(def, ctx)
146 Some(unknown_type) ->
147 Error(errors.invalid_schema("Unknown type: " <> unknown_type))
148 None -> Error(errors.invalid_schema("Definition missing type field"))
149 }
150}
151
152/// Validates a single data record against a collection schema
153pub fn validate_record(
154 lexicons: List(Json),
155 collection: String,
156 record: Json,
157) -> Result(Nil, ValidationError) {
158 // Build validation context
159 let builder_result =
160 context.builder()
161 |> context.with_lexicons(lexicons)
162
163 use builder <- result.try(builder_result)
164 use ctx <- result.try(context.build(builder))
165
166 // Get the lexicon for this collection
167 case context.get_lexicon(ctx, collection) {
168 Some(lexicon) -> {
169 // Get the main definition (should be a record type)
170 case json_helpers.get_field(lexicon.defs, "main") {
171 Some(main_def) -> {
172 let lex_ctx = context.with_current_lexicon(ctx, collection)
173 // Set the path to include the definition name
174 let def_ctx = context.with_path(lex_ctx, "defs.main")
175 // Validate the record data against the main definition
176 validation_primary_record.validate_data(record, main_def, def_ctx)
177 }
178 None ->
179 Error(errors.invalid_schema(
180 "Lexicon '" <> collection <> "' has no main definition",
181 ))
182 }
183 }
184 None -> Error(errors.lexicon_not_found(collection))
185 }
186}
187
188/// Validates NSID format
189pub fn is_valid_nsid(nsid: String) -> Bool {
190 formats.is_valid_nsid(nsid)
191}
192
193/// Validates a string value against a specific format
194pub fn validate_string_format(
195 value: String,
196 format: types.StringFormat,
197) -> Result(Nil, String) {
198 case formats.validate_format(value, format) {
199 True -> Ok(Nil)
200 False -> {
201 let format_name = types.format_to_string(format)
202 Error("Value does not match format: " <> format_name)
203 }
204 }
205}
206
207/// Convert a Dynamic value to Json
208///
209/// This is useful when parsing JSON strings with `json.parse(str, decode.dynamic)`
210/// and then needing to convert to Json for validation.
211///
212/// ## Example
213/// ```gleam
214/// use dyn <- result.try(json.parse(json_str, decode.dynamic))
215/// use json_val <- result.try(honk.dynamic_to_json(dyn))
216/// honk.validate([json_val])
217/// ```
218pub fn dynamic_to_json(dyn: dynamic.Dynamic) -> Result(Json, ValidationError) {
219 json_helpers.dynamic_to_json(dyn)
220}
221
222/// Parse a JSON string and convert to Json for validation
223///
224/// This is a convenience function that combines `json.parse()` and `dynamic_to_json()`.
225/// It's useful when you have JSON stored as strings (e.g., in a database) and want
226/// to validate it with honk.
227///
228/// ## Example
229/// ```gleam
230/// use json_val <- result.try(honk.parse_json_string(stored_json))
231/// honk.validate([json_val])
232/// ```
233pub fn parse_json_string(json_str: String) -> Result(Json, ValidationError) {
234 use dyn <- result.try(
235 json.parse(json_str, decode.dynamic)
236 |> result.map_error(fn(_) {
237 errors.invalid_schema("Failed to parse JSON string")
238 }),
239 )
240 dynamic_to_json(dyn)
241}
242
243/// Parse multiple JSON strings and convert to Json for validation
244///
245/// This is a convenience function for batch parsing JSON strings.
246///
247/// ## Example
248/// ```gleam
249/// use json_vals <- result.try(honk.parse_json_strings(stored_jsons))
250/// honk.validate(json_vals)
251/// ```
252pub fn parse_json_strings(
253 json_strs: List(String),
254) -> Result(List(Json), ValidationError) {
255 json_strs
256 |> list.try_map(parse_json_string)
257 |> result.map_error(fn(_) {
258 errors.invalid_schema("Failed to parse JSON strings")
259 })
260}
261
262/// CLI entry point for the honk lexicon validator
263///
264/// Usage:
265/// gleam run -m honk check <path>
266/// gleam run -m honk help
267pub fn main() -> Nil {
268 case argv.load().arguments {
269 ["check", path] -> validate_path(path)
270 ["help"] | [] -> show_help()
271 _ -> {
272 io.println_error("Unknown command. Use 'help' for usage information.")
273 Nil
274 }
275 }
276}
277
278/// Validate a path (auto-detects file or directory)
279fn validate_path(path: String) -> Nil {
280 case simplifile.is_file(path) {
281 Ok(True) -> validate_file(path)
282 Ok(False) ->
283 case simplifile.is_directory(path) {
284 Ok(True) -> validate_directory(path)
285 Ok(False) -> {
286 io.println_error(
287 "Error: Path is neither a file nor a directory: " <> path,
288 )
289 Nil
290 }
291 Error(_) -> {
292 io.println_error("Error: Cannot access path: " <> path)
293 Nil
294 }
295 }
296 Error(_) -> {
297 io.println_error("Error: Cannot access path: " <> path)
298 Nil
299 }
300 }
301}
302
303/// Validate a single lexicon file
304fn validate_file(file_path: String) -> Nil {
305 case read_and_validate_file(file_path) {
306 Ok(_) -> {
307 io.println("✓ " <> file_path <> " - valid")
308 Nil
309 }
310 Error(msg) -> {
311 io.println_error("✗ " <> file_path)
312 io.println_error(" " <> msg)
313 Nil
314 }
315 }
316}
317
318/// Validate all .json files in a directory
319fn validate_directory(dir_path: String) -> Nil {
320 case simplifile.get_files(dir_path) {
321 Error(_) -> {
322 io.println_error("Error: Cannot read directory: " <> dir_path)
323 Nil
324 }
325 Ok(all_files) -> {
326 // Filter for .json files
327 let json_files =
328 all_files
329 |> list.filter(fn(path) { string.ends_with(path, ".json") })
330
331 case json_files {
332 [] -> {
333 io.println("No .json files found in " <> dir_path)
334 Nil
335 }
336 files -> {
337 // Read and parse all files
338 let file_results =
339 files
340 |> list.map(fn(file) {
341 case read_json_file(file) {
342 Ok(json_value) -> #(file, Ok(json_value))
343 Error(msg) -> #(file, Error(msg))
344 }
345 })
346
347 // Separate successful parses from failures
348 let #(parse_errors, parsed_files) =
349 list.partition(file_results, fn(result) {
350 case result {
351 #(_, Error(_)) -> True
352 #(_, Ok(_)) -> False
353 }
354 })
355
356 // Display parse errors
357 parse_errors
358 |> list.each(fn(result) {
359 case result {
360 #(file, Error(msg)) -> {
361 io.println_error("✗ " <> file)
362 io.println_error(" " <> msg)
363 }
364 _ -> Nil
365 }
366 })
367
368 // Get all successfully parsed lexicons
369 let lexicons =
370 parsed_files
371 |> list.filter_map(fn(result) {
372 case result {
373 #(_, Ok(json)) -> Ok(json)
374 _ -> Error(Nil)
375 }
376 })
377
378 // Validate all lexicons together (allows cross-lexicon references)
379 case validate(lexicons) {
380 Ok(_) -> {
381 // All lexicons are valid
382 parsed_files
383 |> list.each(fn(result) {
384 case result {
385 #(file, Ok(_)) -> io.println("✓ " <> file)
386 _ -> Nil
387 }
388 })
389 }
390 Error(error_map) -> {
391 // Some lexicons have errors - map errors back to files
392 parsed_files
393 |> list.each(fn(result) {
394 case result {
395 #(file, Ok(json)) -> {
396 // Get the lexicon ID for this file
397 case json_helpers.get_string(json, "id") {
398 Some(lex_id) -> {
399 case dict.get(error_map, lex_id) {
400 Ok(errors) -> {
401 io.println_error("✗ " <> file)
402 errors
403 |> list.each(fn(err) {
404 io.println_error(" " <> err)
405 })
406 }
407 Error(_) -> io.println("✓ " <> file)
408 }
409 }
410 None -> {
411 io.println_error("✗ " <> file)
412 io.println_error(" Missing lexicon id")
413 }
414 }
415 }
416 _ -> Nil
417 }
418 })
419 }
420 }
421
422 // Summary
423 let total = list.length(files)
424 let parse_error_count = list.length(parse_errors)
425 let validation_error_count = case validate(lexicons) {
426 Ok(_) -> 0
427 Error(error_map) -> dict.size(error_map)
428 }
429 let total_errors = parse_error_count + validation_error_count
430
431 case total_errors {
432 0 ->
433 io.println(
434 "\nAll "
435 <> int.to_string(total)
436 <> " schemas validated successfully.",
437 )
438 _ ->
439 io.println_error(
440 "\n"
441 <> int.to_string(total_errors)
442 <> " of "
443 <> int.to_string(total)
444 <> " schemas failed validation.",
445 )
446 }
447
448 Nil
449 }
450 }
451 }
452 }
453}
454
455/// Read and parse a JSON file (without validation)
456fn read_json_file(file_path: String) -> Result(Json, String) {
457 use content <- result.try(
458 simplifile.read(file_path)
459 |> result.map_error(fn(_) { "Cannot read file" }),
460 )
461
462 use json_dynamic <- result.try(
463 json.parse(content, decode.dynamic)
464 |> result.map_error(fn(_) { "Invalid JSON" }),
465 )
466
467 json_helpers.dynamic_to_json(json_dynamic)
468 |> result.map_error(fn(_) { "Failed to convert JSON" })
469}
470
471/// Read a file and validate it as a lexicon
472fn read_and_validate_file(file_path: String) -> Result(Nil, String) {
473 use content <- result.try(
474 simplifile.read(file_path)
475 |> result.map_error(fn(_) { "Cannot read file" }),
476 )
477
478 use json_dynamic <- result.try(
479 json.parse(content, decode.dynamic)
480 |> result.map_error(fn(_) { "Invalid JSON" }),
481 )
482
483 use json_value <- result.try(
484 json_helpers.dynamic_to_json(json_dynamic)
485 |> result.map_error(fn(_) { "Failed to convert JSON" }),
486 )
487
488 use _ <- result.try(
489 validate([json_value])
490 |> result.map_error(fn(error_map) { format_validation_errors(error_map) }),
491 )
492
493 Ok(Nil)
494}
495
496/// Format validation errors from the error map
497fn format_validation_errors(error_map: Dict(String, List(String))) -> String {
498 error_map
499 |> dict.to_list
500 |> list.map(fn(entry) {
501 let #(_key, errors) = entry
502 string.join(errors, "\n ")
503 })
504 |> string.join("\n ")
505}
506
507/// Show help text
508fn show_help() -> Nil {
509 io.println(
510 "
511honk - ATProtocol Lexicon Validator
512
513USAGE:
514 gleam run -m honk check <path>
515 gleam run -m honk help
516
517COMMANDS:
518 check <path> Check a lexicon file or directory
519 - If <path> is a file: validates that single lexicon
520 - If <path> is a directory: recursively validates all .json files
521
522 help Show this help message
523
524EXAMPLES:
525 gleam run -m honk check ./lexicons/xyz/statusphere/status.json
526 gleam run -m honk check ./lexicons
527
528VALIDATION:
529 - Validates lexicon structure (id, defs)
530 - Validates ALL definitions in each lexicon
531 - Checks types, constraints, and references
532 - Reports errors with definition context (lex.id#defName)
533",
534 )
535}