OCaml HTML5 parser/serialiser based on Python's JustHTML
1(*--------------------------------------------------------------------------- 2 Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 SPDX-License-Identifier: MIT 4 ---------------------------------------------------------------------------*) 5 6(** HTML5 conformance checker. 7 8 This module provides HTML5 validation and conformance checking, 9 combining parse error detection with structural validation rules. *) 10 11(** {1 Re-exported modules} *) 12 13(** Validation message types and constructors. *) 14module Message = Message 15 16(** Message collection utilities. *) 17module Message_collector = Message_collector 18 19(** Message output formatters. *) 20module Message_format = Message_format 21 22(** Parse error bridge. *) 23module Parse_error_bridge = Parse_error_bridge 24 25(** {2 Content Model Framework} *) 26 27(** HTML5 content categories. *) 28module Content_category = Content_category 29 30(** HTML5 element content models. *) 31module Content_model = Content_model 32 33(** HTML5 attribute specifications. *) 34module Attr_spec = Attr_spec 35 36(** HTML5 element specifications. *) 37module Element_spec = Element_spec 38 39(** {1 Core Types} *) 40 41(** Result of checking an HTML document. *) 42type t 43 44(** {1 Checking Functions} *) 45 46(** Parse and validate HTML from a reader. 47 48 This function parses the HTML input and optionally collects parse errors. 49 Future versions will also run conformance checkers on the resulting DOM. 50 51 @param collect_parse_errors If true, collect and include parse errors. Default: true. 52 @param system_id Optional file path or URL for error reporting. 53 @param reader Bytesrw reader containing HTML input. *) 54val check : 55 ?collect_parse_errors:bool -> 56 ?system_id:string -> 57 Bytesrw.Bytes.Reader.t -> 58 t 59 60(** Validate an already-parsed HTML document. 61 62 This function takes an existing Html5rw.t parse result and validates it. 63 64 @param collect_parse_errors If true, collect and include parse errors from the result. Default: true. 65 @param system_id Optional file path or URL for error reporting. 66 @param result Already-parsed HTML document. *) 67val check_dom : 68 ?collect_parse_errors:bool -> 69 ?system_id:string -> 70 Html5rw.t -> 71 t 72 73(** {1 Result Accessors} *) 74 75(** Get all validation messages. *) 76val messages : t -> Message.t list 77 78(** Get only error messages. *) 79val errors : t -> Message.t list 80 81(** Get only warning messages. *) 82val warnings : t -> Message.t list 83 84(** Get only info messages. *) 85val infos : t -> Message.t list 86 87(** Check if there are any errors. *) 88val has_errors : t -> bool 89 90(** Get the underlying parsed document. *) 91val document : t -> Html5rw.t 92 93(** Get the system identifier if set. *) 94val system_id : t -> string option 95 96(** {1 Formatting} *) 97 98(** Format messages as human-readable text. *) 99val format_text : t -> string 100 101(** Format messages as JSON. *) 102val format_json : t -> string 103 104(** Format messages in GNU style. *) 105val format_gnu : t -> string