(*--------------------------------------------------------------------------- Copyright (c) 2025 Anil Madhavapeddy . All rights reserved. SPDX-License-Identifier: MIT ---------------------------------------------------------------------------*) (** html5check - HTML5 conformance checker CLI Command line interface for validating HTML5 documents. *) open Cmdliner let version = "0.1.0" (** Exit codes *) module Exit_code = struct let ok = Cmd.Exit.ok let validation_errors = 1 let io_error = 2 end (** Read input from file or stdin *) let read_input file = try let ic = if file = "-" then stdin else open_in file in let reader = Bytesrw.Bytes.Reader.of_in_channel ic in Ok (reader, ic, file) with | Sys_error msg -> Error (`Io_error (Printf.sprintf "Cannot read file '%s': %s" file msg)) (** Format output based on the requested format *) let format_output format result = match format with | `Text -> Htmlrw_check.to_text result | `Json -> Htmlrw_check.to_json result | `Gnu -> Htmlrw_check.to_gnu result (** Run the validation *) let run format errors_only exit_zero quiet verbose file = match read_input file with | Error (`Io_error msg) -> if not quiet then Printf.eprintf "Error: %s\n" msg; Exit_code.io_error | Ok (reader, ic, system_id) -> (* Run validation *) let result = Htmlrw_check.check ~system_id reader in (* Close input if it's not stdin *) if file <> "-" then close_in ic; (* Get messages based on filtering *) let messages = if errors_only then Htmlrw_check.errors result else Htmlrw_check.messages result in (* Output based on mode *) if quiet then begin (* Only show counts *) let error_count = List.length (Htmlrw_check.errors result) in let warning_count = List.length (Htmlrw_check.warnings result) in if errors_only then Printf.printf "%d error%s\n" error_count (if error_count = 1 then "" else "s") else Printf.printf "%d error%s, %d warning%s\n" error_count (if error_count = 1 then "" else "s") warning_count (if warning_count = 1 then "" else "s") end else begin (* Format and print messages *) let output = format_output format result in if output <> "" then print_string output; (* Show summary if verbose *) if verbose && messages <> [] then begin let error_count = List.length (Htmlrw_check.errors result) in let warning_count = List.length (Htmlrw_check.warnings result) in Printf.eprintf "\nSummary: %d error%s, %d warning%s\n" error_count (if error_count = 1 then "" else "s") warning_count (if warning_count = 1 then "" else "s") end end; (* Determine exit code *) if exit_zero || not (Htmlrw_check.has_errors result) then Exit_code.ok else Exit_code.validation_errors (** Command line argument definitions *) let format_arg = let formats = [("text", `Text); ("json", `Json); ("gnu", `Gnu)] in let doc = "Output format. $(docv) must be one of $(b,text) (human-readable, default), \ $(b,json) (Nu validator compatible JSON), or $(b,gnu) (GNU-style for IDE integration)." in Arg.(value & opt (enum formats) `Text & info ["format"] ~docv:"FORMAT" ~doc) let errors_only_arg = let doc = "Only show errors (suppress warnings)." in Arg.(value & flag & info ["errors-only"] ~doc) let exit_zero_arg = let doc = "Always exit with status code 0, even if validation errors are found. \ Useful for CI pipelines where you want to collect validation results \ but not fail the build." in Arg.(value & flag & info ["exit-zero"] ~doc) let quiet_arg = let doc = "Quiet mode - only show error and warning counts, no details." in Arg.(value & flag & info ["q"; "quiet"] ~doc) let verbose_arg = let doc = "Verbose mode - show additional information including summary." in Arg.(value & flag & info ["v"; "verbose"] ~doc) let file_arg = let doc = "HTML file to validate. Use $(b,-) to read from standard input. \ If no file is specified, reads from stdin." in Arg.(value & pos 0 string "-" & info [] ~docv:"FILE" ~doc) let cmd = let doc = "validate HTML5 documents for conformance" in let man = [ `S Manpage.s_description; `P "$(tname) validates HTML5 documents against the WHATWG HTML5 specification. \ It reports parse errors, structural validation issues, and conformance problems."; `P "The validator checks for:"; `I ("Parse errors", "Malformed HTML syntax according to the WHATWG specification"); `I ("Content model violations", "Elements in invalid parent/child relationships"); `I ("Attribute errors", "Invalid or missing required attributes"); `I ("Structural issues", "Other conformance problems"); `S Manpage.s_options; `S "OUTPUT FORMATS"; `P "The validator supports three output formats:"; `I ("$(b,text)", "Human-readable format showing file:line:col: severity: message"); `I ("$(b,json)", "JSON format compatible with the Nu Html Checker (v.Nu)"); `I ("$(b,gnu)", "GNU-style format for IDE integration (file:line:column: message)"); `S "EXIT STATUS"; `P "The validator exits with one of the following status codes:"; `I ("0", "No validation errors found (or --exit-zero was specified)"); `I ("1", "Validation errors were found"); `I ("2", "File not found or I/O error"); `S Manpage.s_examples; `P "Validate a file:"; `Pre " $(mname) index.html"; `P "Validate from stdin:"; `Pre " cat page.html | $(mname) -"; `P "Show only errors in JSON format:"; `Pre " $(mname) --format=json --errors-only page.html"; `P "Quiet mode for CI:"; `Pre " $(mname) --quiet --exit-zero index.html"; `S Manpage.s_bugs; `P "Report bugs at https://tangled.org/@anil.recoil.org/ocaml-html5rw/issues"; ] in let info = Cmd.info "html5check" ~version ~doc ~man in Cmd.v info Term.(const run $ format_arg $ errors_only_arg $ exit_zero_arg $ quiet_arg $ verbose_arg $ file_arg) let main () = Cmd.eval' cmd let () = Stdlib.exit (main ())