OCaml HTML5 parser/serialiser based on Python's JustHTML
1(*--------------------------------------------------------------------------- 2 Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 SPDX-License-Identifier: MIT 4 ---------------------------------------------------------------------------*) 5 6(** Parse error codes as defined by the WHATWG HTML5 specification. 7 8 @see <https://html.spec.whatwg.org/multipage/parsing.html#parse-errors> 9*) 10 11type t = 12 | Abrupt_closing_of_empty_comment 13 | Abrupt_doctype_public_identifier 14 | Abrupt_doctype_system_identifier 15 | Absence_of_digits_in_numeric_character_reference 16 | Cdata_in_html_content 17 | Character_reference_outside_unicode_range 18 | Control_character_in_input_stream 19 | Control_character_reference 20 | Duplicate_attribute 21 | End_tag_with_attributes 22 | End_tag_with_trailing_solidus 23 | Eof_before_tag_name 24 | Eof_in_cdata 25 | Eof_in_comment 26 | Eof_in_doctype 27 | Eof_in_script_html_comment_like_text 28 | Eof_in_tag 29 | Incorrectly_closed_comment 30 | Incorrectly_opened_comment 31 | Invalid_character_sequence_after_doctype_name 32 | Invalid_first_character_of_tag_name 33 | Missing_attribute_value 34 | Missing_doctype_name 35 | Missing_doctype_public_identifier 36 | Missing_doctype_system_identifier 37 | Missing_end_tag_name 38 | Missing_quote_before_doctype_public_identifier 39 | Missing_quote_before_doctype_system_identifier 40 | Missing_semicolon_after_character_reference 41 | Missing_whitespace_after_doctype_public_keyword 42 | Missing_whitespace_after_doctype_system_keyword 43 | Missing_whitespace_before_doctype_name 44 | Missing_whitespace_between_attributes 45 | Missing_whitespace_between_doctype_public_and_system_identifiers 46 | Nested_comment 47 | Noncharacter_character_reference 48 | Noncharacter_in_input_stream 49 | Non_void_html_element_start_tag_with_trailing_solidus 50 | Null_character_reference 51 | Surrogate_character_reference 52 | Surrogate_in_input_stream 53 | Unexpected_character_after_doctype_system_identifier 54 | Unexpected_character_in_attribute_name 55 | Unexpected_character_in_unquoted_attribute_value 56 | Unexpected_equals_sign_before_attribute_name 57 | Unexpected_null_character 58 | Unexpected_question_mark_instead_of_tag_name 59 | Unexpected_solidus_in_tag 60 | Unknown_named_character_reference 61 | Tree_construction_error of string 62 63let to_string = function 64 | Abrupt_closing_of_empty_comment -> "abrupt-closing-of-empty-comment" 65 | Abrupt_doctype_public_identifier -> "abrupt-doctype-public-identifier" 66 | Abrupt_doctype_system_identifier -> "abrupt-doctype-system-identifier" 67 | Absence_of_digits_in_numeric_character_reference -> 68 "absence-of-digits-in-numeric-character-reference" 69 | Cdata_in_html_content -> "cdata-in-html-content" 70 | Character_reference_outside_unicode_range -> 71 "character-reference-outside-unicode-range" 72 | Control_character_in_input_stream -> "control-character-in-input-stream" 73 | Control_character_reference -> "control-character-reference" 74 | Duplicate_attribute -> "duplicate-attribute" 75 | End_tag_with_attributes -> "end-tag-with-attributes" 76 | End_tag_with_trailing_solidus -> "end-tag-with-trailing-solidus" 77 | Eof_before_tag_name -> "eof-before-tag-name" 78 | Eof_in_cdata -> "eof-in-cdata" 79 | Eof_in_comment -> "eof-in-comment" 80 | Eof_in_doctype -> "eof-in-doctype" 81 | Eof_in_script_html_comment_like_text -> 82 "eof-in-script-html-comment-like-text" 83 | Eof_in_tag -> "eof-in-tag" 84 | Incorrectly_closed_comment -> "incorrectly-closed-comment" 85 | Incorrectly_opened_comment -> "incorrectly-opened-comment" 86 | Invalid_character_sequence_after_doctype_name -> 87 "invalid-character-sequence-after-doctype-name" 88 | Invalid_first_character_of_tag_name -> 89 "invalid-first-character-of-tag-name" 90 | Missing_attribute_value -> "missing-attribute-value" 91 | Missing_doctype_name -> "missing-doctype-name" 92 | Missing_doctype_public_identifier -> "missing-doctype-public-identifier" 93 | Missing_doctype_system_identifier -> "missing-doctype-system-identifier" 94 | Missing_end_tag_name -> "missing-end-tag-name" 95 | Missing_quote_before_doctype_public_identifier -> 96 "missing-quote-before-doctype-public-identifier" 97 | Missing_quote_before_doctype_system_identifier -> 98 "missing-quote-before-doctype-system-identifier" 99 | Missing_semicolon_after_character_reference -> 100 "missing-semicolon-after-character-reference" 101 | Missing_whitespace_after_doctype_public_keyword -> 102 "missing-whitespace-after-doctype-public-keyword" 103 | Missing_whitespace_after_doctype_system_keyword -> 104 "missing-whitespace-after-doctype-system-keyword" 105 | Missing_whitespace_before_doctype_name -> 106 "missing-whitespace-before-doctype-name" 107 | Missing_whitespace_between_attributes -> 108 "missing-whitespace-between-attributes" 109 | Missing_whitespace_between_doctype_public_and_system_identifiers -> 110 "missing-whitespace-between-doctype-public-and-system-identifiers" 111 | Nested_comment -> "nested-comment" 112 | Noncharacter_character_reference -> "noncharacter-character-reference" 113 | Noncharacter_in_input_stream -> "noncharacter-in-input-stream" 114 | Non_void_html_element_start_tag_with_trailing_solidus -> 115 "non-void-html-element-start-tag-with-trailing-solidus" 116 | Null_character_reference -> "null-character-reference" 117 | Surrogate_character_reference -> "surrogate-character-reference" 118 | Surrogate_in_input_stream -> "surrogate-in-input-stream" 119 | Unexpected_character_after_doctype_system_identifier -> 120 "unexpected-character-after-doctype-system-identifier" 121 | Unexpected_character_in_attribute_name -> 122 "unexpected-character-in-attribute-name" 123 | Unexpected_character_in_unquoted_attribute_value -> 124 "unexpected-character-in-unquoted-attribute-value" 125 | Unexpected_equals_sign_before_attribute_name -> 126 "unexpected-equals-sign-before-attribute-name" 127 | Unexpected_null_character -> "unexpected-null-character" 128 | Unexpected_question_mark_instead_of_tag_name -> 129 "unexpected-question-mark-instead-of-tag-name" 130 | Unexpected_solidus_in_tag -> "unexpected-solidus-in-tag" 131 | Unknown_named_character_reference -> "unknown-named-character-reference" 132 | Tree_construction_error s -> s 133 134let of_string = function 135 | "abrupt-closing-of-empty-comment" -> Abrupt_closing_of_empty_comment 136 | "abrupt-doctype-public-identifier" -> Abrupt_doctype_public_identifier 137 | "abrupt-doctype-system-identifier" -> Abrupt_doctype_system_identifier 138 | "absence-of-digits-in-numeric-character-reference" -> 139 Absence_of_digits_in_numeric_character_reference 140 | "cdata-in-html-content" -> Cdata_in_html_content 141 | "character-reference-outside-unicode-range" -> 142 Character_reference_outside_unicode_range 143 | "control-character-in-input-stream" -> Control_character_in_input_stream 144 | "control-character-reference" -> Control_character_reference 145 | "duplicate-attribute" -> Duplicate_attribute 146 | "end-tag-with-attributes" -> End_tag_with_attributes 147 | "end-tag-with-trailing-solidus" -> End_tag_with_trailing_solidus 148 | "eof-before-tag-name" -> Eof_before_tag_name 149 | "eof-in-cdata" -> Eof_in_cdata 150 | "eof-in-comment" -> Eof_in_comment 151 | "eof-in-doctype" -> Eof_in_doctype 152 | "eof-in-script-html-comment-like-text" -> 153 Eof_in_script_html_comment_like_text 154 | "eof-in-tag" -> Eof_in_tag 155 | "incorrectly-closed-comment" -> Incorrectly_closed_comment 156 | "incorrectly-opened-comment" -> Incorrectly_opened_comment 157 | "invalid-character-sequence-after-doctype-name" -> 158 Invalid_character_sequence_after_doctype_name 159 | "invalid-first-character-of-tag-name" -> 160 Invalid_first_character_of_tag_name 161 | "missing-attribute-value" -> Missing_attribute_value 162 | "missing-doctype-name" -> Missing_doctype_name 163 | "missing-doctype-public-identifier" -> Missing_doctype_public_identifier 164 | "missing-doctype-system-identifier" -> Missing_doctype_system_identifier 165 | "missing-end-tag-name" -> Missing_end_tag_name 166 | "missing-quote-before-doctype-public-identifier" -> 167 Missing_quote_before_doctype_public_identifier 168 | "missing-quote-before-doctype-system-identifier" -> 169 Missing_quote_before_doctype_system_identifier 170 | "missing-semicolon-after-character-reference" -> 171 Missing_semicolon_after_character_reference 172 | "missing-whitespace-after-doctype-public-keyword" -> 173 Missing_whitespace_after_doctype_public_keyword 174 | "missing-whitespace-after-doctype-system-keyword" -> 175 Missing_whitespace_after_doctype_system_keyword 176 | "missing-whitespace-before-doctype-name" -> 177 Missing_whitespace_before_doctype_name 178 | "missing-whitespace-between-attributes" -> 179 Missing_whitespace_between_attributes 180 | "missing-whitespace-between-doctype-public-and-system-identifiers" -> 181 Missing_whitespace_between_doctype_public_and_system_identifiers 182 | "nested-comment" -> Nested_comment 183 | "noncharacter-character-reference" -> Noncharacter_character_reference 184 | "noncharacter-in-input-stream" -> Noncharacter_in_input_stream 185 | "non-void-html-element-start-tag-with-trailing-solidus" -> 186 Non_void_html_element_start_tag_with_trailing_solidus 187 | "null-character-reference" -> Null_character_reference 188 | "surrogate-character-reference" -> Surrogate_character_reference 189 | "surrogate-in-input-stream" -> Surrogate_in_input_stream 190 | "unexpected-character-after-doctype-system-identifier" -> 191 Unexpected_character_after_doctype_system_identifier 192 | "unexpected-character-in-attribute-name" -> 193 Unexpected_character_in_attribute_name 194 | "unexpected-character-in-unquoted-attribute-value" -> 195 Unexpected_character_in_unquoted_attribute_value 196 | "unexpected-equals-sign-before-attribute-name" -> 197 Unexpected_equals_sign_before_attribute_name 198 | "unexpected-null-character" -> Unexpected_null_character 199 | "unexpected-question-mark-instead-of-tag-name" -> 200 Unexpected_question_mark_instead_of_tag_name 201 | "unexpected-solidus-in-tag" -> Unexpected_solidus_in_tag 202 | "unknown-named-character-reference" -> Unknown_named_character_reference 203 | s -> Tree_construction_error s 204 205let of_string_opt s = Some (of_string s) 206 207let is_whatwg_standard = function 208 | Tree_construction_error _ -> false 209 | _ -> true 210 211let pp fmt t = Format.pp_print_string fmt (to_string t)