src/honk.gleam at main · chadtmiller.com/honk

chadtmiller.com / honk
An ATProto Lexicon validator for Gleam.
honk / src / honk.gleam
at main 19 kB view raw
  1// Main public API for the ATProtocol lexicon validator
  2
  3@target(erlang)
  4import argv
  5import gleam/dict.{type Dict}
  6import gleam/dynamic
  7import gleam/dynamic/decode
  8@target(erlang)
  9import gleam/int
 10@target(erlang)
 11import gleam/io
 12import gleam/json.{type Json}
 13import gleam/list
 14import gleam/option.{None, Some}
 15import gleam/result
 16import gleam/string
 17import honk/errors
 18import honk/internal/json_helpers
 19import honk/types
 20import honk/validation/context
 21import honk/validation/formats
 22@target(erlang)
 23import simplifile
 24
 25// Import validators
 26import honk/validation/field as validation_field
 27import honk/validation/field/reference as validation_field_reference
 28import honk/validation/field/union as validation_field_union
 29import honk/validation/meta/token as validation_meta_token
 30import honk/validation/meta/unknown as validation_meta_unknown
 31import honk/validation/primary/params as validation_primary_params
 32import honk/validation/primary/procedure as validation_primary_procedure
 33import honk/validation/primary/query as validation_primary_query
 34import honk/validation/primary/record as validation_primary_record
 35import honk/validation/primary/subscription as validation_primary_subscription
 36import honk/validation/primitive/blob as validation_primitive_blob
 37import honk/validation/primitive/boolean as validation_primitive_boolean
 38import honk/validation/primitive/bytes as validation_primitive_bytes
 39import honk/validation/primitive/cid_link as validation_primitive_cid_link
 40import honk/validation/primitive/integer as validation_primitive_integer
 41import honk/validation/primitive/null as validation_primitive_null
 42import honk/validation/primitive/string as validation_primitive_string
 43
 44// Re-export error type for public API error handling
 45pub type ValidationError =
 46  errors.ValidationError
 47
 48/// Validates lexicon documents
 49///
 50/// Validates lexicon structure (id, defs) and ALL definitions within each lexicon.
 51/// Each definition in the defs object is validated according to its type.
 52///
 53/// Returns Ok(Nil) if all lexicons and their definitions are valid.
 54/// Returns Error with a map of lexicon ID to list of error messages.
 55/// Error messages include the definition name (e.g., "lex.id#defName: error").
 56pub fn validate(lexicons: List(Json)) -> Result(Nil, Dict(String, List(String))) {
 57  // Build validation context
 58  let builder_result =
 59    context.builder()
 60    |> context.with_lexicons(lexicons)
 61
 62  case builder_result {
 63    Ok(builder) ->
 64      case context.build(builder) {
 65        Ok(ctx) -> {
 66          // Validate ALL definitions in each lexicon
 67          let error_map =
 68            dict.fold(ctx.lexicons, dict.new(), fn(errors, lex_id, lexicon) {
 69              // Get all definition names from the defs object
 70              let def_keys = json_helpers.get_keys(lexicon.defs)
 71              let lex_ctx = context.with_current_lexicon(ctx, lex_id)
 72
 73              // Validate each definition
 74              list.fold(def_keys, errors, fn(errors_acc, def_name) {
 75                case json_helpers.get_field(lexicon.defs, def_name) {
 76                  Some(def) -> {
 77                    case validate_definition(def, lex_ctx) {
 78                      Ok(_) -> errors_acc
 79                      Error(e) -> {
 80                        // Include def name in error for better context
 81                        // Extract just the message without wrapper text
 82                        let message = case e {
 83                          errors.InvalidSchema(msg) -> msg
 84                          errors.DataValidation(msg) -> msg
 85                          errors.LexiconNotFound(msg) ->
 86                            "Lexicon not found: " <> msg
 87                        }
 88                        // Clean up leading ": " if present
 89                        let clean_message = case
 90                          string.starts_with(message, ": ")
 91                        {
 92                          True -> string.drop_start(message, 2)
 93                          False -> message
 94                        }
 95                        let error_msg =
 96                          lex_id <> "#" <> def_name <> ": " <> clean_message
 97                        case dict.get(errors_acc, lex_id) {
 98                          Ok(existing_errors) ->
 99                            dict.insert(errors_acc, lex_id, [
100                              error_msg,
101                              ..existing_errors
102                            ])
103                          Error(_) ->
104                            dict.insert(errors_acc, lex_id, [error_msg])
105                        }
106                      }
107                    }
108                  }
109                  None -> errors_acc
110                }
111              })
112            })
113
114          case dict.is_empty(error_map) {
115            True -> Ok(Nil)
116            False -> Error(error_map)
117          }
118        }
119        Error(e) -> Error(dict.from_list([#("builder", [errors.to_string(e)])]))
120      }
121    Error(e) -> Error(dict.from_list([#("builder", [errors.to_string(e)])]))
122  }
123}
124
125/// Validates a single definition based on its type
126fn validate_definition(
127  def: Json,
128  ctx: context.ValidationContext,
129) -> Result(Nil, errors.ValidationError) {
130  case json_helpers.get_string(def, "type") {
131    Some("record") -> validation_primary_record.validate_schema(def, ctx)
132    Some("query") -> validation_primary_query.validate_schema(def, ctx)
133    Some("procedure") -> validation_primary_procedure.validate_schema(def, ctx)
134    Some("subscription") ->
135      validation_primary_subscription.validate_schema(def, ctx)
136    Some("params") -> validation_primary_params.validate_schema(def, ctx)
137    Some("object") -> validation_field.validate_object_schema(def, ctx)
138    Some("array") -> validation_field.validate_array_schema(def, ctx)
139    Some("union") -> validation_field_union.validate_schema(def, ctx)
140    Some("string") -> validation_primitive_string.validate_schema(def, ctx)
141    Some("integer") -> validation_primitive_integer.validate_schema(def, ctx)
142    Some("boolean") -> validation_primitive_boolean.validate_schema(def, ctx)
143    Some("bytes") -> validation_primitive_bytes.validate_schema(def, ctx)
144    Some("blob") -> validation_primitive_blob.validate_schema(def, ctx)
145    Some("cid-link") -> validation_primitive_cid_link.validate_schema(def, ctx)
146    Some("null") -> validation_primitive_null.validate_schema(def, ctx)
147    Some("ref") -> validation_field_reference.validate_schema(def, ctx)
148    Some("token") -> validation_meta_token.validate_schema(def, ctx)
149    Some("unknown") -> validation_meta_unknown.validate_schema(def, ctx)
150    Some(unknown_type) ->
151      Error(errors.invalid_schema("Unknown type: " <> unknown_type))
152    None -> Error(errors.invalid_schema("Definition missing type field"))
153  }
154}
155
156/// Validation context type (re-exported for external use)
157pub type ValidationContext =
158  context.ValidationContext
159
160/// Build a reusable validation context from lexicons
161/// Call this once, then use validate_record_with_context for each record
162pub fn build_validation_context(
163  lexicons: List(Json),
164) -> Result(ValidationContext, ValidationError) {
165  let builder_result =
166    context.builder()
167    |> context.with_lexicons(lexicons)
168
169  use builder <- result.try(builder_result)
170  context.build(builder)
171}
172
173/// Validates a single data record against a collection schema using pre-built context
174/// This is much faster when validating many records - build context once with
175/// build_validation_context, then call this for each record
176pub fn validate_record_with_context(
177  ctx: ValidationContext,
178  collection: String,
179  record: Json,
180) -> Result(Nil, ValidationError) {
181  // Get the lexicon for this collection
182  case context.get_lexicon(ctx, collection) {
183    Some(lexicon) -> {
184      // Get the main definition (should be a record type)
185      case json_helpers.get_field(lexicon.defs, "main") {
186        Some(main_def) -> {
187          let lex_ctx = context.with_current_lexicon(ctx, collection)
188          // Set the path to include the definition name
189          let def_ctx = context.with_path(lex_ctx, "defs.main")
190          // Validate the record data against the main definition
191          validation_primary_record.validate_data(record, main_def, def_ctx)
192        }
193        None ->
194          Error(errors.invalid_schema(
195            "Lexicon '" <> collection <> "' has no main definition",
196          ))
197      }
198    }
199    None -> Error(errors.lexicon_not_found(collection))
200  }
201}
202
203/// Validates a single data record against a collection schema
204pub fn validate_record(
205  lexicons: List(Json),
206  collection: String,
207  record: Json,
208) -> Result(Nil, ValidationError) {
209  // Build validation context
210  use ctx <- result.try(build_validation_context(lexicons))
211  validate_record_with_context(ctx, collection, record)
212}
213
214/// Validates NSID format
215pub fn is_valid_nsid(nsid: String) -> Bool {
216  formats.is_valid_nsid(nsid)
217}
218
219/// Validates a string value against a specific format
220pub fn validate_string_format(
221  value: String,
222  format: types.StringFormat,
223) -> Result(Nil, String) {
224  case formats.validate_format(value, format) {
225    True -> Ok(Nil)
226    False -> {
227      let format_name = types.format_to_string(format)
228      Error("Value does not match format: " <> format_name)
229    }
230  }
231}
232
233/// Convert a Dynamic value to Json
234///
235/// This is useful when parsing JSON strings with `json.parse(str, decode.dynamic)`
236/// and then needing to convert to Json for validation.
237///
238/// ## Example
239/// ```gleam
240/// use dyn <- result.try(json.parse(json_str, decode.dynamic))
241/// use json_val <- result.try(honk.dynamic_to_json(dyn))
242/// honk.validate([json_val])
243/// ```
244pub fn dynamic_to_json(dyn: dynamic.Dynamic) -> Result(Json, ValidationError) {
245  json_helpers.dynamic_to_json(dyn)
246}
247
248/// Parse a JSON string and convert to Json for validation
249///
250/// This is a convenience function that combines `json.parse()` and `dynamic_to_json()`.
251/// It's useful when you have JSON stored as strings (e.g., in a database) and want
252/// to validate it with honk.
253///
254/// ## Example
255/// ```gleam
256/// use json_val <- result.try(honk.parse_json_string(stored_json))
257/// honk.validate([json_val])
258/// ```
259pub fn parse_json_string(json_str: String) -> Result(Json, ValidationError) {
260  use dyn <- result.try(
261    json.parse(json_str, decode.dynamic)
262    |> result.map_error(fn(_) {
263      errors.invalid_schema("Failed to parse JSON string")
264    }),
265  )
266  dynamic_to_json(dyn)
267}
268
269/// Parse multiple JSON strings and convert to Json for validation
270///
271/// This is a convenience function for batch parsing JSON strings.
272///
273/// ## Example
274/// ```gleam
275/// use json_vals <- result.try(honk.parse_json_strings(stored_jsons))
276/// honk.validate(json_vals)
277/// ```
278pub fn parse_json_strings(
279  json_strs: List(String),
280) -> Result(List(Json), ValidationError) {
281  json_strs
282  |> list.try_map(parse_json_string)
283  |> result.map_error(fn(_) {
284    errors.invalid_schema("Failed to parse JSON strings")
285  })
286}
287
288@target(erlang)
289/// CLI entry point for the honk lexicon validator
290///
291/// Usage:
292///   gleam run -m honk check <path>
293///   gleam run -m honk help
294pub fn main() -> Nil {
295  case argv.load().arguments {
296    ["check", path] -> validate_path(path)
297    ["help"] | [] -> show_help()
298    _ -> {
299      io.println_error("Unknown command. Use 'help' for usage information.")
300      Nil
301    }
302  }
303}
304
305@target(erlang)
306/// Validate a path (auto-detects file or directory)
307fn validate_path(path: String) -> Nil {
308  case simplifile.is_file(path) {
309    Ok(True) -> validate_file(path)
310    Ok(False) ->
311      case simplifile.is_directory(path) {
312        Ok(True) -> validate_directory(path)
313        Ok(False) -> {
314          io.println_error(
315            "Error: Path is neither a file nor a directory: " <> path,
316          )
317          Nil
318        }
319        Error(_) -> {
320          io.println_error("Error: Cannot access path: " <> path)
321          Nil
322        }
323      }
324    Error(_) -> {
325      io.println_error("Error: Cannot access path: " <> path)
326      Nil
327    }
328  }
329}
330
331@target(erlang)
332/// Validate a single lexicon file
333fn validate_file(file_path: String) -> Nil {
334  case read_and_validate_file(file_path) {
335    Ok(_) -> {
336      io.println("✓ " <> file_path <> " - valid")
337      Nil
338    }
339    Error(msg) -> {
340      io.println_error("✗ " <> file_path)
341      io.println_error("  " <> msg)
342      Nil
343    }
344  }
345}
346
347@target(erlang)
348/// Validate all .json files in a directory
349fn validate_directory(dir_path: String) -> Nil {
350  case simplifile.get_files(dir_path) {
351    Error(_) -> {
352      io.println_error("Error: Cannot read directory: " <> dir_path)
353      Nil
354    }
355    Ok(all_files) -> {
356      // Filter for .json files
357      let json_files =
358        all_files
359        |> list.filter(fn(path) { string.ends_with(path, ".json") })
360
361      case json_files {
362        [] -> {
363          io.println("No .json files found in " <> dir_path)
364          Nil
365        }
366        files -> {
367          // Read and parse all files
368          let file_results =
369            files
370            |> list.map(fn(file) {
371              case read_json_file(file) {
372                Ok(json_value) -> #(file, Ok(json_value))
373                Error(msg) -> #(file, Error(msg))
374              }
375            })
376
377          // Separate successful parses from failures
378          let #(parse_errors, parsed_files) =
379            list.partition(file_results, fn(result) {
380              case result {
381                #(_, Error(_)) -> True
382                #(_, Ok(_)) -> False
383              }
384            })
385
386          // Display parse errors
387          parse_errors
388          |> list.each(fn(result) {
389            case result {
390              #(file, Error(msg)) -> {
391                io.println_error("✗ " <> file)
392                io.println_error("  " <> msg)
393              }
394              _ -> Nil
395            }
396          })
397
398          // Get all successfully parsed lexicons
399          let lexicons =
400            parsed_files
401            |> list.filter_map(fn(result) {
402              case result {
403                #(_, Ok(json)) -> Ok(json)
404                _ -> Error(Nil)
405              }
406            })
407
408          // Validate all lexicons together (allows cross-lexicon references)
409          case validate(lexicons) {
410            Ok(_) -> {
411              // All lexicons are valid
412              parsed_files
413              |> list.each(fn(result) {
414                case result {
415                  #(file, Ok(_)) -> io.println("✓ " <> file)
416                  _ -> Nil
417                }
418              })
419            }
420            Error(error_map) -> {
421              // Some lexicons have errors - map errors back to files
422              parsed_files
423              |> list.each(fn(result) {
424                case result {
425                  #(file, Ok(json)) -> {
426                    // Get the lexicon ID for this file
427                    case json_helpers.get_string(json, "id") {
428                      Some(lex_id) -> {
429                        case dict.get(error_map, lex_id) {
430                          Ok(errors) -> {
431                            io.println_error("✗ " <> file)
432                            errors
433                            |> list.each(fn(err) {
434                              io.println_error("  " <> err)
435                            })
436                          }
437                          Error(_) -> io.println("✓ " <> file)
438                        }
439                      }
440                      None -> {
441                        io.println_error("✗ " <> file)
442                        io.println_error("  Missing lexicon id")
443                      }
444                    }
445                  }
446                  _ -> Nil
447                }
448              })
449            }
450          }
451
452          // Summary
453          let total = list.length(files)
454          let parse_error_count = list.length(parse_errors)
455          let validation_error_count = case validate(lexicons) {
456            Ok(_) -> 0
457            Error(error_map) -> dict.size(error_map)
458          }
459          let total_errors = parse_error_count + validation_error_count
460
461          case total_errors {
462            0 ->
463              io.println(
464                "\nAll "
465                <> int.to_string(total)
466                <> " schemas validated successfully.",
467              )
468            _ ->
469              io.println_error(
470                "\n"
471                <> int.to_string(total_errors)
472                <> " of "
473                <> int.to_string(total)
474                <> " schemas failed validation.",
475              )
476          }
477
478          Nil
479        }
480      }
481    }
482  }
483}
484
485@target(erlang)
486/// Read and parse a JSON file (without validation)
487fn read_json_file(file_path: String) -> Result(Json, String) {
488  use content <- result.try(
489    simplifile.read(file_path)
490    |> result.map_error(fn(_) { "Cannot read file" }),
491  )
492
493  use json_dynamic <- result.try(
494    json.parse(content, decode.dynamic)
495    |> result.map_error(fn(_) { "Invalid JSON" }),
496  )
497
498  json_helpers.dynamic_to_json(json_dynamic)
499  |> result.map_error(fn(_) { "Failed to convert JSON" })
500}
501
502@target(erlang)
503/// Read a file and validate it as a lexicon
504fn read_and_validate_file(file_path: String) -> Result(Nil, String) {
505  use content <- result.try(
506    simplifile.read(file_path)
507    |> result.map_error(fn(_) { "Cannot read file" }),
508  )
509
510  use json_dynamic <- result.try(
511    json.parse(content, decode.dynamic)
512    |> result.map_error(fn(_) { "Invalid JSON" }),
513  )
514
515  use json_value <- result.try(
516    json_helpers.dynamic_to_json(json_dynamic)
517    |> result.map_error(fn(_) { "Failed to convert JSON" }),
518  )
519
520  use _ <- result.try(
521    validate([json_value])
522    |> result.map_error(fn(error_map) { format_validation_errors(error_map) }),
523  )
524
525  Ok(Nil)
526}
527
528@target(erlang)
529/// Format validation errors from the error map
530fn format_validation_errors(error_map: Dict(String, List(String))) -> String {
531  error_map
532  |> dict.to_list
533  |> list.map(fn(entry) {
534    let #(_key, errors) = entry
535    string.join(errors, "\n  ")
536  })
537  |> string.join("\n  ")
538}
539
540@target(erlang)
541/// Show help text
542fn show_help() -> Nil {
543  io.println(
544    "
545honk - ATProtocol Lexicon Validator
546
547USAGE:
548  gleam run -m honk check <path>
549  gleam run -m honk help
550
551COMMANDS:
552  check <path>     Check a lexicon file or directory
553                   - If <path> is a file: validates that single lexicon
554                   - If <path> is a directory: recursively validates all .json files
555
556  help            Show this help message
557
558EXAMPLES:
559  gleam run -m honk check ./lexicons/xyz/statusphere/status.json
560  gleam run -m honk check ./lexicons
561
562VALIDATION:
563  - Validates lexicon structure (id, defs)
564  - Validates ALL definitions in each lexicon
565  - Checks types, constraints, and references
566  - Reports errors with definition context (lex.id#defName)
567",
568  )
569}