(*--------------------------------------------------------------------------- Copyright (c) 2025 Anil Madhavapeddy . All rights reserved. SPDX-License-Identifier: MIT ---------------------------------------------------------------------------*) (** Parse error codes as defined by the WHATWG HTML5 specification. The HTML5 parser never fails - it always produces a DOM tree. However, the specification defines these error codes for conformance checkers to report issues in HTML documents. Each error code corresponds to a specific condition in the WHATWG specification's parsing algorithm. @see WHATWG: Parse errors *) type t = | Abrupt_closing_of_empty_comment (** Parser encounters [] or []; comment is treated as correctly closed. @see *) | Abrupt_doctype_public_identifier (** [>] found in DOCTYPE public identifier before closing quote; sets document to quirks mode. @see *) | Abrupt_doctype_system_identifier (** [>] found in DOCTYPE system identifier before closing quote; sets document to quirks mode. @see *) | Absence_of_digits_in_numeric_character_reference (** Numeric character reference has no digits (e.g., [&#qux;]); the reference is not resolved. @see *) | Cdata_in_html_content (** CDATA section found outside SVG or MathML foreign content; treated as a bogus comment. @see *) | Character_reference_outside_unicode_range (** Numeric reference exceeds U+10FFFF; resolves to U+FFFD REPLACEMENT CHARACTER. @see *) | Control_character_in_input_stream (** Control code point (other than ASCII whitespace or NULL) appears in the input; parsed as-is. @see *) | Control_character_reference (** Numeric reference to a control character; handled per specification replacement rules. @see *) | Duplicate_attribute (** Tag contains duplicate attribute names; later duplicates are removed. @see *) | End_tag_with_attributes (** End tag includes attributes; attributes are ignored. @see *) | End_tag_with_trailing_solidus (** End tag has [/] before [>] (like [
]); treated as regular end tag. @see *) | Eof_before_tag_name (** End of input where tag name expected; [<] or [ *) | Eof_in_cdata (** End of input within CDATA section; treated as immediately closed. @see *) | Eof_in_comment (** End of input within comment; comment is treated as immediately closed. @see *) | Eof_in_doctype (** End of input within DOCTYPE; sets document to quirks mode. @see *) | Eof_in_script_html_comment_like_text (** End of input within HTML-like comment syntax inside a script element. @see *) | Eof_in_tag (** End of input within a start or end tag; the tag is ignored. @see *) | Incorrectly_closed_comment (** Comment closed by [--!>] instead of [-->]; treated as correctly closed. @see *) | Incorrectly_opened_comment (** [ *) | Invalid_character_sequence_after_doctype_name (** Neither "PUBLIC" nor "SYSTEM" after DOCTYPE name; sets document to quirks mode. @see *) | Invalid_first_character_of_tag_name (** Non-ASCII-alpha character where tag name start expected; [<] is treated as text. @see *) | Missing_attribute_value (** [>] where attribute value expected (e.g., [
]); attribute gets empty string value. @see *) | Missing_doctype_name (** DOCTYPE has no name; sets document to quirks mode. @see *) | Missing_doctype_public_identifier (** [>] where public identifier expected; sets quirks mode. @see *) | Missing_doctype_system_identifier (** [>] where system identifier expected; sets quirks mode. @see *) | Missing_end_tag_name (** [>] where end tag name expected ([]); sequence is ignored. @see *) | Missing_quote_before_doctype_public_identifier (** Public identifier lacks preceding quote; sets quirks mode. @see *) | Missing_quote_before_doctype_system_identifier (** System identifier lacks preceding quote; sets quirks mode. @see *) | Missing_semicolon_after_character_reference (** Character reference lacks terminating [;]; behaves as if semicolon were present. @see *) | Missing_whitespace_after_doctype_public_keyword (** No whitespace between "PUBLIC" and identifier; treated as if whitespace were present. @see *) | Missing_whitespace_after_doctype_system_keyword (** No whitespace between "SYSTEM" and identifier; treated as if whitespace were present. @see *) | Missing_whitespace_before_doctype_name (** No whitespace between "DOCTYPE" and name; treated as if whitespace were present. @see *) | Missing_whitespace_between_attributes (** Adjacent attributes lack separating whitespace; treated as if whitespace were present. @see *) | Missing_whitespace_between_doctype_public_and_system_identifiers (** Public and system identifiers not separated by whitespace; treated as if whitespace were present. @see *) | Nested_comment (** Nested []. @see *) | Noncharacter_character_reference (** Numeric reference to a Unicode noncharacter; resolved as-is (not replaced). @see *) | Noncharacter_in_input_stream (** Unicode noncharacter code point in input; parsed as-is. @see *) | Non_void_html_element_start_tag_with_trailing_solidus (** Non-void element start tag has [/] before [>] (like [
]); the [/] is ignored. @see *) | Null_character_reference (** Numeric reference to U+0000 (NULL); resolves to U+FFFD REPLACEMENT CHARACTER. @see *) | Surrogate_character_reference (** Numeric reference to a surrogate code point (U+D800-U+DFFF); resolves to U+FFFD REPLACEMENT CHARACTER. @see *) | Surrogate_in_input_stream (** Surrogate code point in input stream; parsed as-is. @see *) | Unexpected_character_after_doctype_system_identifier (** Non-whitespace/non-[>] character after system identifier; the character is ignored. @see *) | Unexpected_character_in_attribute_name (** Double quote, single quote, or less-than sign in attribute name; included in the attribute name. @see *) | Unexpected_character_in_unquoted_attribute_value (** Double quote, equals sign, backtick, or less-than sign in unquoted attribute value; included in the value. @see *) | Unexpected_equals_sign_before_attribute_name (** [=] where attribute name expected; treated as first character of attribute name. @see *) | Unexpected_null_character (** U+0000 (NULL) in various positions; ignored or replaced with U+FFFD depending on context. @see *) | Unexpected_question_mark_instead_of_tag_name (** [?] where tag name expected (like [ *) | Unexpected_solidus_in_tag (** [/] in tag not immediately before [>]; treated as whitespace. @see *) | Unknown_named_character_reference (** Ambiguous ampersand: [&] followed by characters that don't match any named reference; not resolved as reference. @see *) | Tree_construction_error of string (** Tree construction error not defined in the WHATWG specification. These are informative errors produced during tree construction to indicate various issues like unexpected tags, missing closing tags, etc. The string contains a descriptive error code. *) val to_string : t -> string (** Convert an error code to its WHATWG specification string representation. The returned string is lowercase with hyphens, matching the WHATWG specification naming convention. For example: - [Abrupt_closing_of_empty_comment] becomes ["abrupt-closing-of-empty-comment"] - [Eof_in_tag] becomes ["eof-in-tag"] *) val of_string : string -> t (** Parse an error code from its WHATWG specification string representation. If the string matches a known WHATWG error code, returns that variant. Otherwise, returns [Tree_construction_error s]. *) val of_string_opt : string -> t option (** Parse an error code from its WHATWG specification string representation. Always returns [Some code]. For unrecognized strings, returns [Some (Tree_construction_error s)]. *) val is_whatwg_standard : t -> bool (** Check if an error code is defined in the WHATWG specification. Returns [false] for [Tree_construction_error _], [true] for all others. *) val pp : Format.formatter -> t -> unit (** Pretty-print an error code using the WHATWG specification string format. *)