An ATProto Lexicon validator for Gleam.
1// Main public API for the ATProtocol lexicon validator
2
3@target(erlang)
4import argv
5import gleam/dict.{type Dict}
6import gleam/dynamic
7import gleam/dynamic/decode
8@target(erlang)
9import gleam/int
10@target(erlang)
11import gleam/io
12import gleam/json.{type Json}
13import gleam/list
14import gleam/option.{None, Some}
15import gleam/result
16import gleam/string
17import honk/errors
18import honk/internal/json_helpers
19import honk/types
20import honk/validation/context
21import honk/validation/formats
22@target(erlang)
23import simplifile
24
25// Import validators
26import honk/validation/field as validation_field
27import honk/validation/field/reference as validation_field_reference
28import honk/validation/field/union as validation_field_union
29import honk/validation/meta/token as validation_meta_token
30import honk/validation/meta/unknown as validation_meta_unknown
31import honk/validation/primary/params as validation_primary_params
32import honk/validation/primary/procedure as validation_primary_procedure
33import honk/validation/primary/query as validation_primary_query
34import honk/validation/primary/record as validation_primary_record
35import honk/validation/primary/subscription as validation_primary_subscription
36import honk/validation/primitive/blob as validation_primitive_blob
37import honk/validation/primitive/boolean as validation_primitive_boolean
38import honk/validation/primitive/bytes as validation_primitive_bytes
39import honk/validation/primitive/cid_link as validation_primitive_cid_link
40import honk/validation/primitive/integer as validation_primitive_integer
41import honk/validation/primitive/null as validation_primitive_null
42import honk/validation/primitive/string as validation_primitive_string
43
44// Re-export error type for public API error handling
45pub type ValidationError =
46 errors.ValidationError
47
48/// Validates lexicon documents
49///
50/// Validates lexicon structure (id, defs) and ALL definitions within each lexicon.
51/// Each definition in the defs object is validated according to its type.
52///
53/// Returns Ok(Nil) if all lexicons and their definitions are valid.
54/// Returns Error with a map of lexicon ID to list of error messages.
55/// Error messages include the definition name (e.g., "lex.id#defName: error").
56pub fn validate(lexicons: List(Json)) -> Result(Nil, Dict(String, List(String))) {
57 // Build validation context
58 let builder_result =
59 context.builder()
60 |> context.with_lexicons(lexicons)
61
62 case builder_result {
63 Ok(builder) ->
64 case context.build(builder) {
65 Ok(ctx) -> {
66 // Validate ALL definitions in each lexicon
67 let error_map =
68 dict.fold(ctx.lexicons, dict.new(), fn(errors, lex_id, lexicon) {
69 // Get all definition names from the defs object
70 let def_keys = json_helpers.get_keys(lexicon.defs)
71 let lex_ctx = context.with_current_lexicon(ctx, lex_id)
72
73 // Validate each definition
74 list.fold(def_keys, errors, fn(errors_acc, def_name) {
75 case json_helpers.get_field(lexicon.defs, def_name) {
76 Some(def) -> {
77 case validate_definition(def, lex_ctx) {
78 Ok(_) -> errors_acc
79 Error(e) -> {
80 // Include def name in error for better context
81 // Extract just the message without wrapper text
82 let message = case e {
83 errors.InvalidSchema(msg) -> msg
84 errors.DataValidation(msg) -> msg
85 errors.LexiconNotFound(msg) ->
86 "Lexicon not found: " <> msg
87 }
88 // Clean up leading ": " if present
89 let clean_message = case
90 string.starts_with(message, ": ")
91 {
92 True -> string.drop_start(message, 2)
93 False -> message
94 }
95 let error_msg =
96 lex_id <> "#" <> def_name <> ": " <> clean_message
97 case dict.get(errors_acc, lex_id) {
98 Ok(existing_errors) ->
99 dict.insert(errors_acc, lex_id, [
100 error_msg,
101 ..existing_errors
102 ])
103 Error(_) ->
104 dict.insert(errors_acc, lex_id, [error_msg])
105 }
106 }
107 }
108 }
109 None -> errors_acc
110 }
111 })
112 })
113
114 case dict.is_empty(error_map) {
115 True -> Ok(Nil)
116 False -> Error(error_map)
117 }
118 }
119 Error(e) -> Error(dict.from_list([#("builder", [errors.to_string(e)])]))
120 }
121 Error(e) -> Error(dict.from_list([#("builder", [errors.to_string(e)])]))
122 }
123}
124
125/// Validates a single definition based on its type
126fn validate_definition(
127 def: Json,
128 ctx: context.ValidationContext,
129) -> Result(Nil, errors.ValidationError) {
130 case json_helpers.get_string(def, "type") {
131 Some("record") -> validation_primary_record.validate_schema(def, ctx)
132 Some("query") -> validation_primary_query.validate_schema(def, ctx)
133 Some("procedure") -> validation_primary_procedure.validate_schema(def, ctx)
134 Some("subscription") ->
135 validation_primary_subscription.validate_schema(def, ctx)
136 Some("params") -> validation_primary_params.validate_schema(def, ctx)
137 Some("object") -> validation_field.validate_object_schema(def, ctx)
138 Some("array") -> validation_field.validate_array_schema(def, ctx)
139 Some("union") -> validation_field_union.validate_schema(def, ctx)
140 Some("string") -> validation_primitive_string.validate_schema(def, ctx)
141 Some("integer") -> validation_primitive_integer.validate_schema(def, ctx)
142 Some("boolean") -> validation_primitive_boolean.validate_schema(def, ctx)
143 Some("bytes") -> validation_primitive_bytes.validate_schema(def, ctx)
144 Some("blob") -> validation_primitive_blob.validate_schema(def, ctx)
145 Some("cid-link") -> validation_primitive_cid_link.validate_schema(def, ctx)
146 Some("null") -> validation_primitive_null.validate_schema(def, ctx)
147 Some("ref") -> validation_field_reference.validate_schema(def, ctx)
148 Some("token") -> validation_meta_token.validate_schema(def, ctx)
149 Some("unknown") -> validation_meta_unknown.validate_schema(def, ctx)
150 Some(unknown_type) ->
151 Error(errors.invalid_schema("Unknown type: " <> unknown_type))
152 None -> Error(errors.invalid_schema("Definition missing type field"))
153 }
154}
155
156/// Validation context type (re-exported for external use)
157pub type ValidationContext =
158 context.ValidationContext
159
160/// Build a reusable validation context from lexicons
161/// Call this once, then use validate_record_with_context for each record
162pub fn build_validation_context(
163 lexicons: List(Json),
164) -> Result(ValidationContext, ValidationError) {
165 let builder_result =
166 context.builder()
167 |> context.with_lexicons(lexicons)
168
169 use builder <- result.try(builder_result)
170 context.build(builder)
171}
172
173/// Validates a single data record against a collection schema using pre-built context
174/// This is much faster when validating many records - build context once with
175/// build_validation_context, then call this for each record
176pub fn validate_record_with_context(
177 ctx: ValidationContext,
178 collection: String,
179 record: Json,
180) -> Result(Nil, ValidationError) {
181 // Get the lexicon for this collection
182 case context.get_lexicon(ctx, collection) {
183 Some(lexicon) -> {
184 // Get the main definition (should be a record type)
185 case json_helpers.get_field(lexicon.defs, "main") {
186 Some(main_def) -> {
187 let lex_ctx = context.with_current_lexicon(ctx, collection)
188 // Set the path to include the definition name
189 let def_ctx = context.with_path(lex_ctx, "defs.main")
190 // Validate the record data against the main definition
191 validation_primary_record.validate_data(record, main_def, def_ctx)
192 }
193 None ->
194 Error(errors.invalid_schema(
195 "Lexicon '" <> collection <> "' has no main definition",
196 ))
197 }
198 }
199 None -> Error(errors.lexicon_not_found(collection))
200 }
201}
202
203/// Validates a single data record against a collection schema
204pub fn validate_record(
205 lexicons: List(Json),
206 collection: String,
207 record: Json,
208) -> Result(Nil, ValidationError) {
209 // Build validation context
210 use ctx <- result.try(build_validation_context(lexicons))
211 validate_record_with_context(ctx, collection, record)
212}
213
214/// Validates NSID format
215pub fn is_valid_nsid(nsid: String) -> Bool {
216 formats.is_valid_nsid(nsid)
217}
218
219/// Validates a string value against a specific format
220pub fn validate_string_format(
221 value: String,
222 format: types.StringFormat,
223) -> Result(Nil, String) {
224 case formats.validate_format(value, format) {
225 True -> Ok(Nil)
226 False -> {
227 let format_name = types.format_to_string(format)
228 Error("Value does not match format: " <> format_name)
229 }
230 }
231}
232
233/// Convert a Dynamic value to Json
234///
235/// This is useful when parsing JSON strings with `json.parse(str, decode.dynamic)`
236/// and then needing to convert to Json for validation.
237///
238/// ## Example
239/// ```gleam
240/// use dyn <- result.try(json.parse(json_str, decode.dynamic))
241/// use json_val <- result.try(honk.dynamic_to_json(dyn))
242/// honk.validate([json_val])
243/// ```
244pub fn dynamic_to_json(dyn: dynamic.Dynamic) -> Result(Json, ValidationError) {
245 json_helpers.dynamic_to_json(dyn)
246}
247
248/// Parse a JSON string and convert to Json for validation
249///
250/// This is a convenience function that combines `json.parse()` and `dynamic_to_json()`.
251/// It's useful when you have JSON stored as strings (e.g., in a database) and want
252/// to validate it with honk.
253///
254/// ## Example
255/// ```gleam
256/// use json_val <- result.try(honk.parse_json_string(stored_json))
257/// honk.validate([json_val])
258/// ```
259pub fn parse_json_string(json_str: String) -> Result(Json, ValidationError) {
260 use dyn <- result.try(
261 json.parse(json_str, decode.dynamic)
262 |> result.map_error(fn(_) {
263 errors.invalid_schema("Failed to parse JSON string")
264 }),
265 )
266 dynamic_to_json(dyn)
267}
268
269/// Parse multiple JSON strings and convert to Json for validation
270///
271/// This is a convenience function for batch parsing JSON strings.
272///
273/// ## Example
274/// ```gleam
275/// use json_vals <- result.try(honk.parse_json_strings(stored_jsons))
276/// honk.validate(json_vals)
277/// ```
278pub fn parse_json_strings(
279 json_strs: List(String),
280) -> Result(List(Json), ValidationError) {
281 json_strs
282 |> list.try_map(parse_json_string)
283 |> result.map_error(fn(_) {
284 errors.invalid_schema("Failed to parse JSON strings")
285 })
286}
287
288@target(erlang)
289/// CLI entry point for the honk lexicon validator
290///
291/// Usage:
292/// gleam run -m honk check <path>
293/// gleam run -m honk help
294pub fn main() -> Nil {
295 case argv.load().arguments {
296 ["check", path] -> validate_path(path)
297 ["help"] | [] -> show_help()
298 _ -> {
299 io.println_error("Unknown command. Use 'help' for usage information.")
300 Nil
301 }
302 }
303}
304
305@target(erlang)
306/// Validate a path (auto-detects file or directory)
307fn validate_path(path: String) -> Nil {
308 case simplifile.is_file(path) {
309 Ok(True) -> validate_file(path)
310 Ok(False) ->
311 case simplifile.is_directory(path) {
312 Ok(True) -> validate_directory(path)
313 Ok(False) -> {
314 io.println_error(
315 "Error: Path is neither a file nor a directory: " <> path,
316 )
317 Nil
318 }
319 Error(_) -> {
320 io.println_error("Error: Cannot access path: " <> path)
321 Nil
322 }
323 }
324 Error(_) -> {
325 io.println_error("Error: Cannot access path: " <> path)
326 Nil
327 }
328 }
329}
330
331@target(erlang)
332/// Validate a single lexicon file
333fn validate_file(file_path: String) -> Nil {
334 case read_and_validate_file(file_path) {
335 Ok(_) -> {
336 io.println("✓ " <> file_path <> " - valid")
337 Nil
338 }
339 Error(msg) -> {
340 io.println_error("✗ " <> file_path)
341 io.println_error(" " <> msg)
342 Nil
343 }
344 }
345}
346
347@target(erlang)
348/// Validate all .json files in a directory
349fn validate_directory(dir_path: String) -> Nil {
350 case simplifile.get_files(dir_path) {
351 Error(_) -> {
352 io.println_error("Error: Cannot read directory: " <> dir_path)
353 Nil
354 }
355 Ok(all_files) -> {
356 // Filter for .json files
357 let json_files =
358 all_files
359 |> list.filter(fn(path) { string.ends_with(path, ".json") })
360
361 case json_files {
362 [] -> {
363 io.println("No .json files found in " <> dir_path)
364 Nil
365 }
366 files -> {
367 // Read and parse all files
368 let file_results =
369 files
370 |> list.map(fn(file) {
371 case read_json_file(file) {
372 Ok(json_value) -> #(file, Ok(json_value))
373 Error(msg) -> #(file, Error(msg))
374 }
375 })
376
377 // Separate successful parses from failures
378 let #(parse_errors, parsed_files) =
379 list.partition(file_results, fn(result) {
380 case result {
381 #(_, Error(_)) -> True
382 #(_, Ok(_)) -> False
383 }
384 })
385
386 // Display parse errors
387 parse_errors
388 |> list.each(fn(result) {
389 case result {
390 #(file, Error(msg)) -> {
391 io.println_error("✗ " <> file)
392 io.println_error(" " <> msg)
393 }
394 _ -> Nil
395 }
396 })
397
398 // Get all successfully parsed lexicons
399 let lexicons =
400 parsed_files
401 |> list.filter_map(fn(result) {
402 case result {
403 #(_, Ok(json)) -> Ok(json)
404 _ -> Error(Nil)
405 }
406 })
407
408 // Validate all lexicons together (allows cross-lexicon references)
409 case validate(lexicons) {
410 Ok(_) -> {
411 // All lexicons are valid
412 parsed_files
413 |> list.each(fn(result) {
414 case result {
415 #(file, Ok(_)) -> io.println("✓ " <> file)
416 _ -> Nil
417 }
418 })
419 }
420 Error(error_map) -> {
421 // Some lexicons have errors - map errors back to files
422 parsed_files
423 |> list.each(fn(result) {
424 case result {
425 #(file, Ok(json)) -> {
426 // Get the lexicon ID for this file
427 case json_helpers.get_string(json, "id") {
428 Some(lex_id) -> {
429 case dict.get(error_map, lex_id) {
430 Ok(errors) -> {
431 io.println_error("✗ " <> file)
432 errors
433 |> list.each(fn(err) {
434 io.println_error(" " <> err)
435 })
436 }
437 Error(_) -> io.println("✓ " <> file)
438 }
439 }
440 None -> {
441 io.println_error("✗ " <> file)
442 io.println_error(" Missing lexicon id")
443 }
444 }
445 }
446 _ -> Nil
447 }
448 })
449 }
450 }
451
452 // Summary
453 let total = list.length(files)
454 let parse_error_count = list.length(parse_errors)
455 let validation_error_count = case validate(lexicons) {
456 Ok(_) -> 0
457 Error(error_map) -> dict.size(error_map)
458 }
459 let total_errors = parse_error_count + validation_error_count
460
461 case total_errors {
462 0 ->
463 io.println(
464 "\nAll "
465 <> int.to_string(total)
466 <> " schemas validated successfully.",
467 )
468 _ ->
469 io.println_error(
470 "\n"
471 <> int.to_string(total_errors)
472 <> " of "
473 <> int.to_string(total)
474 <> " schemas failed validation.",
475 )
476 }
477
478 Nil
479 }
480 }
481 }
482 }
483}
484
485@target(erlang)
486/// Read and parse a JSON file (without validation)
487fn read_json_file(file_path: String) -> Result(Json, String) {
488 use content <- result.try(
489 simplifile.read(file_path)
490 |> result.map_error(fn(_) { "Cannot read file" }),
491 )
492
493 use json_dynamic <- result.try(
494 json.parse(content, decode.dynamic)
495 |> result.map_error(fn(_) { "Invalid JSON" }),
496 )
497
498 json_helpers.dynamic_to_json(json_dynamic)
499 |> result.map_error(fn(_) { "Failed to convert JSON" })
500}
501
502@target(erlang)
503/// Read a file and validate it as a lexicon
504fn read_and_validate_file(file_path: String) -> Result(Nil, String) {
505 use content <- result.try(
506 simplifile.read(file_path)
507 |> result.map_error(fn(_) { "Cannot read file" }),
508 )
509
510 use json_dynamic <- result.try(
511 json.parse(content, decode.dynamic)
512 |> result.map_error(fn(_) { "Invalid JSON" }),
513 )
514
515 use json_value <- result.try(
516 json_helpers.dynamic_to_json(json_dynamic)
517 |> result.map_error(fn(_) { "Failed to convert JSON" }),
518 )
519
520 use _ <- result.try(
521 validate([json_value])
522 |> result.map_error(fn(error_map) { format_validation_errors(error_map) }),
523 )
524
525 Ok(Nil)
526}
527
528@target(erlang)
529/// Format validation errors from the error map
530fn format_validation_errors(error_map: Dict(String, List(String))) -> String {
531 error_map
532 |> dict.to_list
533 |> list.map(fn(entry) {
534 let #(_key, errors) = entry
535 string.join(errors, "\n ")
536 })
537 |> string.join("\n ")
538}
539
540@target(erlang)
541/// Show help text
542fn show_help() -> Nil {
543 io.println(
544 "
545honk - ATProtocol Lexicon Validator
546
547USAGE:
548 gleam run -m honk check <path>
549 gleam run -m honk help
550
551COMMANDS:
552 check <path> Check a lexicon file or directory
553 - If <path> is a file: validates that single lexicon
554 - If <path> is a directory: recursively validates all .json files
555
556 help Show this help message
557
558EXAMPLES:
559 gleam run -m honk check ./lexicons/xyz/statusphere/status.json
560 gleam run -m honk check ./lexicons
561
562VALIDATION:
563 - Validates lexicon structure (id, defs)
564 - Validates ALL definitions in each lexicon
565 - Checks types, constraints, and references
566 - Reports errors with definition context (lex.id#defName)
567",
568 )
569}