(*--------------------------------------------------------------------------- Copyright (c) 2025 Anil Madhavapeddy . All rights reserved. SPDX-License-Identifier: MIT ---------------------------------------------------------------------------*) let q = Error_code.q (** Generate human-readable message for a parse error code *) let message_of_parse_error code = let code_str = Html5rw.Parse_error_code.to_string code in match code with | Html5rw.Parse_error_code.Non_void_html_element_start_tag_with_trailing_solidus -> "Self-closing syntax (\"/>\") used on a non-void HTML element. Ignoring the slash and treating as a start tag." | Html5rw.Parse_error_code.Null_character_reference -> "Character reference expands to zero." | Html5rw.Parse_error_code.Tree_construction_error s -> (* Check for control-character/noncharacter/surrogate with codepoint info *) (try if String.starts_with ~prefix:"control-character-in-input-s" s then let colon_pos = String.index s ':' in let cp_str = String.sub s (colon_pos + 1) (String.length s - colon_pos - 1) in let cp = int_of_string ("0x" ^ cp_str) in Printf.sprintf "Forbidden code point U+%04x." cp else if String.starts_with ~prefix:"noncharacter-in-input-str" s then let colon_pos = String.index s ':' in let cp_str = String.sub s (colon_pos + 1) (String.length s - colon_pos - 1) in let cp = int_of_string ("0x" ^ cp_str) in Printf.sprintf "Forbidden code point U+%04x." cp else if String.starts_with ~prefix:"surrogate-in-input-str" s then let colon_pos = String.index s ':' in let cp_str = String.sub s (colon_pos + 1) (String.length s - colon_pos - 1) in let cp = int_of_string ("0x" ^ cp_str) in Printf.sprintf "Forbidden code point U+%04x." cp (* Character reference errors *) else if String.starts_with ~prefix:"control-character-reference:" s then let cp_str = String.sub s 28 (String.length s - 28) in let cp = int_of_string ("0x" ^ cp_str) in if cp = 0x0D then "A numeric character reference expanded to carriage return." else Printf.sprintf "Character reference expands to a control character (U+%04x)." cp else if String.starts_with ~prefix:"noncharacter-character-referenc" s then let colon_pos = String.index s ':' in let cp_str = String.sub s (colon_pos + 1) (String.length s - colon_pos - 1) in let cp = int_of_string ("0x" ^ cp_str) in (* U+FDD0-U+FDEF are "permanently unassigned" *) if cp >= 0xFDD0 && cp <= 0xFDEF then "Character reference expands to a permanently unassigned code point." (* Astral noncharacters (planes 1-16) *) else if cp >= 0x10000 then Printf.sprintf "Character reference expands to an astral non-character (U+%05x)." cp else Printf.sprintf "Character reference expands to a non-character (U+%04x)." cp else if String.starts_with ~prefix:"character-reference-outside-unicode-" s then "Character reference outside the permissible Unicode range." else if String.starts_with ~prefix:"surrogate-character-referen" s then let colon_pos = String.index s ':' in let cp_str = String.sub s (colon_pos + 1) (String.length s - colon_pos - 1) in let cp = int_of_string ("0x" ^ cp_str) in Printf.sprintf "Character reference expands to a surrogate (U+%04x)." cp else if s = "no-p-element-in-scope" then Printf.sprintf "No %s element in scope but a %s end tag seen." (q "p") (q "p") else if s = "end-tag-p-implied-but-open-elements" then Printf.sprintf "End tag %s implied, but there were open elements." (q "p") else if s = "end-tag-br" then Printf.sprintf "End tag %s." (q "br") else if s = "expected-closing-tag-but-got-eof" then "End of file seen and there were open elements." else if String.starts_with ~prefix:"bad-start-tag-in-head-noscri" s then let colon_pos = String.index s ':' in let element = String.sub s (colon_pos + 1) (String.length s - colon_pos - 1) in Printf.sprintf "Bad start tag in %s in %s in %s." (q element) (q "noscript") (q "head") else if String.starts_with ~prefix:"unexpected-end-tag:" s then let element = String.sub s 19 (String.length s - 19) in Printf.sprintf "Stray end tag %s." (q element) else if String.starts_with ~prefix:"start-tag-in-table:" s then let tag = String.sub s 19 (String.length s - 19) in Printf.sprintf "Start tag %s seen in %s." (q tag) (q "table") else Printf.sprintf "Parse error: %s" s with _ -> Printf.sprintf "Parse error: %s" s) | _ -> Printf.sprintf "Parse error: %s" code_str let of_parse_error ?system_id err = let code = Html5rw.error_code err in let line = Html5rw.error_line err in let column = Html5rw.error_column err in let location = Message.make_location ~line ~column ?system_id () in let message = message_of_parse_error code in Message.of_parse_error ~location ~message code let collect_parse_errors ?system_id result = let errors = Html5rw.errors result in let is_xhtml = match system_id with | Some s -> String.length s > 6 && String.sub s (String.length s - 6) 6 = ".xhtml" | None -> false in let filtered_errors = if is_xhtml then (* XHTML has different requirements than HTML: - No DOCTYPE required - Self-closing syntax is valid for all elements *) List.filter (fun err -> match Html5rw.error_code err with | Html5rw.Parse_error_code.Tree_construction_error "expected-doctype-but-got-other" -> false | Html5rw.Parse_error_code.Non_void_html_element_start_tag_with_trailing_solidus -> false | _ -> true ) errors else errors in List.map (of_parse_error ?system_id) filtered_errors