OCaml HTML5 parser/serialiser based on Python's JustHTML
1(*---------------------------------------------------------------------------
2 Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
3 SPDX-License-Identifier: MIT
4 ---------------------------------------------------------------------------*)
5
6(** HTML5 conformance checker.
7
8 This module provides HTML5 validation and conformance checking,
9 combining parse error detection with structural validation rules. *)
10
11(** {1 Re-exported modules} *)
12
13(** Validation message types and constructors. *)
14module Message = Message
15
16(** Message collection utilities. *)
17module Message_collector = Message_collector
18
19(** Message output formatters. *)
20module Message_format = Message_format
21
22(** Parse error bridge. *)
23module Parse_error_bridge = Parse_error_bridge
24
25(** {2 Content Model Framework} *)
26
27(** HTML5 content categories. *)
28module Content_category = Content_category
29
30(** HTML5 element content models. *)
31module Content_model = Content_model
32
33(** HTML5 attribute specifications. *)
34module Attr_spec = Attr_spec
35
36(** HTML5 element specifications. *)
37module Element_spec = Element_spec
38
39(** {1 Core Types} *)
40
41(** Result of checking an HTML document. *)
42type t
43
44(** {1 Checking Functions} *)
45
46(** Parse and validate HTML from a reader.
47
48 This function parses the HTML input and optionally collects parse errors.
49 Future versions will also run conformance checkers on the resulting DOM.
50
51 @param collect_parse_errors If true, collect and include parse errors. Default: true.
52 @param system_id Optional file path or URL for error reporting.
53 @param reader Bytesrw reader containing HTML input. *)
54val check :
55 ?collect_parse_errors:bool ->
56 ?system_id:string ->
57 Bytesrw.Bytes.Reader.t ->
58 t
59
60(** Validate an already-parsed HTML document.
61
62 This function takes an existing Html5rw.t parse result and validates it.
63
64 @param collect_parse_errors If true, collect and include parse errors from the result. Default: true.
65 @param system_id Optional file path or URL for error reporting.
66 @param result Already-parsed HTML document. *)
67val check_dom :
68 ?collect_parse_errors:bool ->
69 ?system_id:string ->
70 Html5rw.t ->
71 t
72
73(** {1 Result Accessors} *)
74
75(** Get all validation messages. *)
76val messages : t -> Message.t list
77
78(** Get only error messages. *)
79val errors : t -> Message.t list
80
81(** Get only warning messages. *)
82val warnings : t -> Message.t list
83
84(** Get only info messages. *)
85val infos : t -> Message.t list
86
87(** Check if there are any errors. *)
88val has_errors : t -> bool
89
90(** Get the underlying parsed document. *)
91val document : t -> Html5rw.t
92
93(** Get the system identifier if set. *)
94val system_id : t -> string option
95
96(** {1 Formatting} *)
97
98(** Format messages as human-readable text. *)
99val format_text : t -> string
100
101(** Format messages as JSON. *)
102val format_json : t -> string
103
104(** Format messages in GNU style. *)
105val format_gnu : t -> string