OCaml HTML5 parser/serialiser based on Python's JustHTML

Compare changes

Choose any two refs to compare.

+71 -3
README.md
··· 1 - # html5rw - Pure OCaml HTML5 Parser 1 + # html5rw - Pure OCaml HTML5 Parser and Conformance Checker 2 2 3 - A pure OCaml HTML5 parser implementing the WHATWG HTML5 parsing specification. 3 + A pure OCaml HTML5 parser and validator implementing the WHATWG HTML5 specification. 4 4 This library passes the html5lib-tests suite and provides full support for 5 - tokenization, tree construction, encoding detection, and CSS selector queries. 5 + tokenization, tree construction, encoding detection, CSS selector queries, and 6 + conformance checking. 6 7 This library was ported from [JustHTML](https://github.com/EmilStenstrom/justhtml/). 7 8 8 9 ## Key Features 9 10 10 11 - **WHATWG Compliant**: Implements the full HTML5 parsing algorithm with proper error recovery 12 + - **Conformance Checker**: Validates HTML5 documents against the WHATWG specification 11 13 - **CSS Selectors**: Query the DOM using standard CSS selector syntax 12 14 - **Streaming I/O**: Uses bytesrw for efficient streaming input/output 13 15 - **Encoding Detection**: Automatic character encoding detection following the WHATWG algorithm 14 16 - **Entity Decoding**: Complete HTML5 named character reference support 17 + - **Multiple Output Formats**: Text, JSON (Nu validator compatible), and GNU-style output 18 + 19 + ## Libraries 20 + 21 + - `html5rw` - Core HTML5 parser 22 + - `html5rw.check` - Conformance checker library 23 + 24 + ## Command Line Tool 25 + 26 + The `html5check` CLI validates HTML5 documents: 27 + 28 + ```bash 29 + # Validate a file 30 + html5check index.html 31 + 32 + # Validate from stdin 33 + cat page.html | html5check - 34 + 35 + # JSON output (Nu validator compatible) 36 + html5check --format=json page.html 37 + 38 + # GNU-style output for IDE integration 39 + html5check --format=gnu page.html 40 + 41 + # Show only errors (suppress warnings) 42 + html5check --errors-only page.html 43 + 44 + # Quiet mode - show only counts 45 + html5check --quiet page.html 46 + ``` 47 + 48 + Exit codes: 0 = valid, 1 = validation errors, 2 = I/O error. 15 49 16 50 ## Usage 51 + 52 + ### Parsing HTML 17 53 18 54 ```ocaml 19 55 open Bytesrw ··· 41 77 let reader = Bytes.Reader.of_string "<p>Fragment content</p>" 42 78 let doc = Html5rw.parse ~fragment_context:ctx reader 43 79 ``` 80 + 81 + ### Validating HTML 82 + 83 + ```ocaml 84 + open Bytesrw 85 + 86 + (* Check HTML from a string *) 87 + let html = "<html><body><p>Hello</p></body></html>" 88 + let reader = Bytes.Reader.of_string html 89 + let result = Htmlrw_check.check reader 90 + 91 + (* Check for errors *) 92 + if Htmlrw_check.has_errors result then 93 + print_endline "Document has errors"; 94 + 95 + (* Get all messages *) 96 + let messages = Htmlrw_check.messages result in 97 + List.iter (fun msg -> 98 + Format.printf "%a@." Htmlrw_check.pp_message msg 99 + ) messages; 100 + 101 + (* Get formatted output *) 102 + let text_output = Htmlrw_check.to_text result in 103 + let json_output = Htmlrw_check.to_json result in 104 + let gnu_output = Htmlrw_check.to_gnu result 105 + ``` 106 + 107 + The checker validates: 108 + - Parse errors (malformed HTML syntax) 109 + - Content model violations (invalid element nesting) 110 + - Attribute errors (invalid or missing required attributes) 111 + - Structural issues (other conformance problems) 44 112 45 113 ## Installation 46 114
+2 -2
lib/check/attr_utils.ml
··· 3 3 type attrs = (string * string) list 4 4 5 5 let has_attr name attrs = 6 - List.exists (fun (n, _) -> String.lowercase_ascii n = name) attrs 6 + List.exists (fun (n, _) -> Astring.String.Ascii.lowercase n = name) attrs 7 7 8 8 let get_attr name attrs = 9 9 List.find_map (fun (n, v) -> 10 - if String.lowercase_ascii n = name then Some v else None 10 + if Astring.String.Ascii.lowercase n = name then Some v else None 11 11 ) attrs 12 12 13 13 let get_attr_or name ~default attrs =
+2 -1
lib/check/checker_registry.ml
··· 22 22 Hashtbl.replace reg "source" Source_checker.checker; 23 23 Hashtbl.replace reg "label" Label_checker.checker; 24 24 Hashtbl.replace reg "ruby" Ruby_checker.checker; 25 - Hashtbl.replace reg "h1" H1_checker.checker; 25 + Hashtbl.replace reg "heading" Heading_checker.checker; 26 26 Hashtbl.replace reg "srcset-sizes" Srcset_sizes_checker.checker; 27 27 Hashtbl.replace reg "autofocus" Autofocus_checker.checker; 28 28 Hashtbl.replace reg "option" Option_checker.checker; ··· 36 36 Hashtbl.replace reg "xhtml-content" Xhtml_content_checker.checker; 37 37 Hashtbl.replace reg "lang-detecting" Lang_detecting_checker.checker; 38 38 Hashtbl.replace reg "unknown-element" Unknown_element_checker.checker; 39 + Hashtbl.replace reg "content" Content_checker.checker; 39 40 reg 40 41 41 42 let register registry name checker = Hashtbl.replace registry name checker
+64 -7
lib/check/content_model/content_checker.ml
··· 30 30 | Some spec -> 31 31 List.exists (fun cat -> Element_spec.has_category spec cat) cats) 32 32 | Content_model.Elements names -> 33 - List.mem (String.lowercase_ascii element_name) 34 - (List.map String.lowercase_ascii names) 33 + List.mem (Astring.String.Ascii.lowercase element_name) 34 + (List.map Astring.String.Ascii.lowercase names) 35 35 | Content_model.Mixed cats -> ( 36 36 match Element_registry.get registry element_name with 37 37 | None -> false ··· 79 79 (`Element (`Not_allowed_as_child (`Child name, `Parent prohibited)))) 80 80 spec.Element_spec.prohibited_ancestors 81 81 82 + (* Check if element is allowed via permitted_parents *) 83 + let is_permitted_parent registry child_name parent_name = 84 + match Element_registry.get registry child_name with 85 + | None -> false 86 + | Some spec -> 87 + match spec.Element_spec.permitted_parents with 88 + | None -> false 89 + | Some parents -> 90 + List.mem (Astring.String.Ascii.lowercase parent_name) 91 + (List.map Astring.String.Ascii.lowercase parents) 92 + 93 + (* Check if a specific element is in the ancestor stack *) 94 + let has_ancestor state ancestor_name = 95 + List.exists (fun ctx -> 96 + String.equal (Astring.String.Ascii.lowercase ctx.name) 97 + (Astring.String.Ascii.lowercase ancestor_name) 98 + ) state.ancestor_stack 99 + 100 + (* Check if an attribute exists in raw attrs list *) 101 + let has_raw_attr name attrs = 102 + List.exists (fun (n, _) -> 103 + Astring.String.Ascii.lowercase n = Astring.String.Ascii.lowercase name 104 + ) attrs 105 + 106 + (* Special cases for content model validation: 107 + - dt/dd inside div is only valid when dl is an ancestor (div as grouping in dl) 108 + - meta with property/itemprop/name attribute in body is valid (RDFa/microdata) 109 + - link with itemprop in body is valid (microdata) *) 110 + let is_special_case_allowed state child_name parent_name raw_attrs = 111 + let child_lower = Astring.String.Ascii.lowercase child_name in 112 + let parent_lower = Astring.String.Ascii.lowercase parent_name in 113 + (* dt/dd inside div is allowed when dl is an ancestor *) 114 + if (child_lower = "dt" || child_lower = "dd") && parent_lower = "div" then 115 + has_ancestor state "dl" 116 + (* meta in body is allowed with property (RDFa), itemprop (microdata), or name+content (meta tags) *) 117 + else if child_lower = "meta" && parent_lower <> "head" then 118 + has_raw_attr "property" raw_attrs || 119 + has_raw_attr "itemprop" raw_attrs || 120 + (has_raw_attr "name" raw_attrs && has_raw_attr "content" raw_attrs) 121 + (* link in body is allowed with itemprop (microdata) or property (RDFa) *) 122 + else if child_lower = "link" && parent_lower <> "head" then 123 + has_raw_attr "itemprop" raw_attrs || has_raw_attr "property" raw_attrs 124 + (* Custom elements (with hyphen) are valid HTML5 and are flow content *) 125 + else if String.contains child_lower '-' then 126 + true 127 + else 128 + false 129 + 82 130 (* Validate that a child element is allowed *) 83 - let validate_child_element state child_name collector = 131 + let validate_child_element state child_name raw_attrs collector = 84 132 match state.ancestor_stack with 85 133 | [] -> 86 134 (* Root level - only html allowed *) 87 - if not (String.equal (String.lowercase_ascii child_name) "html") then 135 + if not (String.equal (Astring.String.Ascii.lowercase child_name) "html") then 88 136 Message_collector.add_typed collector 89 137 (`Generic (Printf.sprintf "Element '%s' not allowed at document root (only 'html' allowed)" child_name)) 90 138 | parent :: _ -> 91 139 let content_model = parent.spec.Element_spec.content_model in 92 - if not (matches_content_model state.registry child_name content_model) then 140 + (* Check content model, permitted_parents, or special cases *) 141 + let allowed_by_content_model = matches_content_model state.registry child_name content_model in 142 + let allowed_by_permitted_parents = is_permitted_parent state.registry child_name parent.name in 143 + let allowed_by_special_case = is_special_case_allowed state child_name parent.name raw_attrs in 144 + if not (allowed_by_content_model || allowed_by_permitted_parents || allowed_by_special_case) then 93 145 Message_collector.add_typed collector 94 146 (`Element (`Not_allowed_as_child (`Child child_name, `Parent parent.name))) 95 147 96 148 let start_element state ~element collector = 97 149 let name = Tag.tag_to_string element.Element.tag in 150 + let raw_attrs = element.Element.raw_attrs in 98 151 99 152 (* Check if we're inside a foreign (SVG/MathML) context *) 100 153 let in_foreign_context = match state.ancestor_stack with ··· 127 180 match spec_opt with 128 181 | None -> 129 182 (* Unknown element - first check if it's allowed in current context *) 130 - validate_child_element state name collector 183 + validate_child_element state name raw_attrs collector; 184 + (* Push unknown element onto stack with default flow content model *) 185 + let spec = Element_spec.make ~name ~content_model:(Content_model.Categories [Content_category.Flow]) () in 186 + let context = { name; spec; children_count = 0; is_foreign = false } in 187 + state.ancestor_stack <- context :: state.ancestor_stack 131 188 | Some spec -> 132 189 (* Check prohibited ancestors *) 133 190 check_prohibited_ancestors state name spec collector; 134 191 135 192 (* Validate this element is allowed as child of parent *) 136 - validate_child_element state name collector; 193 + validate_child_element state name raw_attrs collector; 137 194 138 195 (* Push element context onto stack *) 139 196 let context = { name; spec; children_count = 0; is_foreign = false } in
+2 -2
lib/check/content_model/element_registry.ml
··· 3 3 let create () = Hashtbl.create 128 4 4 5 5 let register registry spec = 6 - let name = String.lowercase_ascii spec.Element_spec.name in 6 + let name = Astring.String.Ascii.lowercase spec.Element_spec.name in 7 7 Hashtbl.replace registry name spec 8 8 9 9 let get registry name = 10 - let name = String.lowercase_ascii name in 10 + let name = Astring.String.Ascii.lowercase name in 11 11 Hashtbl.find_opt registry name 12 12 13 13 let list_names registry =
+3 -1
lib/check/content_model/elements_embedded.ml
··· 31 31 () 32 32 33 33 let img = 34 + (* Note: img is only Interactive when it has usemap attribute; 35 + we omit Interactive from static categories since usemap is rare *) 34 36 Element_spec.make ~name:"img" ~void:true 35 - ~categories:[ Flow; Phrasing; Embedded; Palpable; Interactive ] 37 + ~categories:[ Flow; Phrasing; Embedded; Palpable ] 36 38 ~content_model:Nothing 37 39 ~attrs: 38 40 [
+1 -1
lib/check/content_model/elements_form.ml
··· 97 97 let select = 98 98 Element_spec.make ~name:"select" 99 99 ~categories:[Flow; Phrasing; Interactive; Palpable] 100 - ~content_model:(Elements ["option"; "optgroup"; "script"; "template"]) 100 + ~content_model:(Elements ["option"; "optgroup"; "hr"; "script"; "template"]) 101 101 ~attrs:[ 102 102 Attr_spec.make "autocomplete" ~datatype:"autocomplete" (); 103 103 Attr_spec.make "disabled" ~datatype:"boolean" ();
-1
lib/check/content_model/elements_table.ml
··· 34 34 ~categories:[] 35 35 ~content_model:(Categories [ Flow ]) 36 36 ~permitted_parents:[ "table" ] 37 - ~prohibited_ancestors:[ "table" ] 38 37 ~attrs:[] () 39 38 40 39 let colgroup =
+28 -6
lib/check/datatype/datatype.ml
··· 12 12 13 13 (* Helper utilities *) 14 14 15 + (** Character predicates *) 16 + 15 17 let is_whitespace = function 16 18 | ' ' | '\t' | '\n' | '\r' | '\012' (* FF *) -> true 17 19 | _ -> false 18 20 19 21 let is_ascii_digit = function '0' .. '9' -> true | _ -> false 20 22 21 - let to_ascii_lowercase c = 22 - match c with 'A' .. 'Z' -> Char.chr (Char.code c + 32) | _ -> c 23 + let is_lower_alpha = function 'a' .. 'z' -> true | _ -> false 24 + 25 + let is_upper_alpha = function 'A' .. 'Z' -> true | _ -> false 26 + 27 + let is_alpha c = is_lower_alpha c || is_upper_alpha c 28 + 29 + let is_alphanumeric c = is_alpha c || is_ascii_digit c 23 30 24 - let string_to_ascii_lowercase s = 25 - String.map to_ascii_lowercase s 31 + let is_hex_digit = function 32 + | '0' .. '9' | 'a' .. 'f' | 'A' .. 'F' -> true 33 + | _ -> false 34 + 35 + (** Case conversion - delegated to Astring *) 36 + 37 + (* Removed to_ascii_lowercase and string_to_ascii_lowercase - use Astring.String.Ascii.lowercase instead *) 38 + 39 + (** String predicates *) 40 + 41 + let is_non_empty s = String.trim s <> "" 42 + 43 + let is_all_digits s = String.length s > 0 && String.for_all is_ascii_digit s 44 + 45 + let is_all_alpha s = String.length s > 0 && String.for_all is_alpha s 46 + 47 + let is_all_alphanumeric s = String.length s > 0 && String.for_all is_alphanumeric s 26 48 27 49 let trim_html_spaces s = 28 50 let len = String.length s in ··· 52 74 let make_enum ~name ~values ?(allow_empty = true) () : t = 53 75 (* Pre-compute hashtable for O(1) membership *) 54 76 let values_tbl = Hashtbl.create (List.length values) in 55 - List.iter (fun v -> Hashtbl.add values_tbl (String.lowercase_ascii v) ()) values; 77 + List.iter (fun v -> Hashtbl.add values_tbl (Astring.String.Ascii.lowercase v) ()) values; 56 78 let values_str = String.concat ", " (List.map (Printf.sprintf "'%s'") values) in 57 79 (module struct 58 80 let name = name 59 81 let validate s = 60 - let s_lower = string_to_ascii_lowercase s in 82 + let s_lower = Astring.String.Ascii.lowercase s in 61 83 if (allow_empty && s = "") || Hashtbl.mem values_tbl s_lower then Ok () 62 84 else Error (Printf.sprintf "The value '%s' is not a valid %s value. Expected %s%s." 63 85 s name (if allow_empty then "empty string, " else "") values_str)
+36 -5
lib/check/datatype/datatype.mli
··· 27 27 (** Check if a value is valid *) 28 28 val is_valid : t -> string -> bool 29 29 30 - (** Helper utilities for implementing datatype validators. *) 30 + (** {1 Helper utilities for implementing datatype validators} *) 31 + 32 + (** {2 Character predicates} *) 31 33 32 34 (** Check if a character is HTML5 whitespace (space, tab, LF, FF, or CR). *) 33 35 val is_whitespace : char -> bool ··· 35 37 (** Check if a character is an ASCII digit (0-9). *) 36 38 val is_ascii_digit : char -> bool 37 39 38 - (** Convert an ASCII character to lowercase. *) 39 - val to_ascii_lowercase : char -> char 40 + (** Check if a character is a lowercase ASCII letter (a-z). *) 41 + val is_lower_alpha : char -> bool 42 + 43 + (** Check if a character is an uppercase ASCII letter (A-Z). *) 44 + val is_upper_alpha : char -> bool 45 + 46 + (** Check if a character is an ASCII letter (a-z or A-Z). *) 47 + val is_alpha : char -> bool 48 + 49 + (** Check if a character is an ASCII letter or digit. *) 50 + val is_alphanumeric : char -> bool 51 + 52 + (** Check if a character is a hexadecimal digit (0-9, a-f, A-F). *) 53 + val is_hex_digit : char -> bool 54 + 55 + (** {2 Case conversion} *) 56 + 57 + (** Case conversion functions removed - use Astring.String.Ascii.lowercase instead *) 40 58 41 - (** Convert an ASCII string to lowercase. *) 42 - val string_to_ascii_lowercase : string -> string 59 + (** {2 String predicates} *) 60 + 61 + (** Check if a string has non-whitespace content after trimming. *) 62 + val is_non_empty : string -> bool 63 + 64 + (** Check if all characters in a non-empty string are ASCII digits. *) 65 + val is_all_digits : string -> bool 66 + 67 + (** Check if all characters in a non-empty string are ASCII letters. *) 68 + val is_all_alpha : string -> bool 69 + 70 + (** Check if all characters in a non-empty string are ASCII letters or digits. *) 71 + val is_all_alphanumeric : string -> bool 72 + 73 + (** {2 String manipulation} *) 43 74 44 75 (** Trim HTML5 whitespace from both ends of a string. *) 45 76 val trim_html_spaces : string -> string
+8 -9
lib/check/datatype/dt_autocomplete.ml
··· 2 2 3 3 (* Use shared utilities from Datatype *) 4 4 let is_whitespace = Datatype.is_whitespace 5 - let to_ascii_lowercase = Datatype.to_ascii_lowercase 5 + let to_ascii_lowercase c = 6 + match c with 'A' .. 'Z' -> Char.chr (Char.code c + 32) | _ -> c 7 + 8 + (* Use Astring for string operations *) 9 + let is_prefix = Astring.String.is_prefix 6 10 7 11 (** Trim whitespace from string and collapse internal whitespace *) 8 12 let trim_whitespace s = ··· 104 108 (** Split string on whitespace - uses shared utility *) 105 109 let split_on_whitespace = Datatype.split_on_whitespace 106 110 107 - (** Check if string starts with prefix *) 108 - let starts_with s prefix = 109 - String.length s >= String.length prefix 110 - && String.sub s 0 (String.length prefix) = prefix 111 - 112 111 (** Validate detail tokens *) 113 112 let check_tokens tokens = 114 113 let tokens = ref tokens in ··· 116 115 117 116 (* Check for section-* *) 118 117 (match !tokens with 119 - | token :: rest when starts_with token "section-" -> 118 + | token :: rest when is_prefix ~affix:"section-" token -> 120 119 tokens := rest 121 120 | _ -> ()); 122 121 ··· 145 144 146 145 (* Check if any token in the list is a section-* indicator *) 147 146 let find_section tokens = 148 - List.find_opt (fun t -> starts_with t "section-") tokens 147 + List.find_opt (fun t -> is_prefix ~affix:"section-" t) tokens 149 148 in 150 149 151 150 (* Check if webauthn appears anywhere except as the very last token *) ··· 207 206 (Printf.sprintf 208 207 "The token \"%s\" must only appear before any autofill field names." 209 208 token) 210 - | token :: _ when starts_with token "section-" -> 209 + | token :: _ when is_prefix ~affix:"section-" token -> 211 210 Error 212 211 "A \"section-*\" indicator must only appear as the first token in a \ 213 212 list of autofill detail tokens."
+2 -2
lib/check/datatype/dt_boolean.ml
··· 22 22 match s with 23 23 | "" | "true" | "false" -> Ok () 24 24 | _ -> 25 - let s_lower = Datatype.string_to_ascii_lowercase s in 26 - let attr_lower = Datatype.string_to_ascii_lowercase attr_name in 25 + let s_lower = Astring.String.Ascii.lowercase s in 26 + let attr_lower = Astring.String.Ascii.lowercase attr_name in 27 27 if s_lower = attr_lower then Ok () 28 28 else 29 29 Error
+1 -1
lib/check/datatype/dt_button_type.ml
··· 7 7 let name = "button-type" 8 8 9 9 let validate s = 10 - let s_lower = Datatype.string_to_ascii_lowercase s in 10 + let s_lower = Astring.String.Ascii.lowercase s in 11 11 if List.mem s_lower valid_types then Ok () 12 12 else 13 13 Error
+3 -6
lib/check/datatype/dt_charset.ml
··· 1 1 (** Helper functions for charset validation *) 2 2 3 3 let is_valid_charset_char c = 4 - (c >= '0' && c <= '9') || 5 - (c >= 'a' && c <= 'z') || 6 - (c >= 'A' && c <= 'Z') || 4 + Datatype.is_alphanumeric c || 7 5 c = '-' || c = '!' || c = '#' || c = '$' || c = '%' || c = '&' || 8 6 c = '\'' || c = '+' || c = '_' || c = '`' || c = '{' || c = '}' || 9 7 c = '~' || c = '^' 10 8 11 - let to_lower s = String.lowercase_ascii s 9 + let to_lower = Astring.String.Ascii.lowercase 12 10 13 11 (** Common encoding labels recognized by WHATWG Encoding Standard. 14 12 This is a subset of the full list. *) ··· 74 72 module Meta_charset = struct 75 73 let name = "legacy character encoding declaration" 76 74 77 - let is_whitespace c = 78 - c = ' ' || c = '\t' || c = '\n' || c = '\012' || c = '\r' 75 + let is_whitespace = Datatype.is_whitespace 79 76 80 77 let validate s = 81 78 let lower = to_lower s in
+2 -3
lib/check/datatype/dt_color.ml
··· 154 154 ] 155 155 156 156 (** Check if character is hex digit *) 157 - let is_hex_digit c = 158 - (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F') 157 + let is_hex_digit = Datatype.is_hex_digit 159 158 160 159 (** Validate hex color (#RGB or #RRGGBB) *) 161 160 let validate_hex_color s = ··· 209 208 let name = "color" 210 209 211 210 let validate s = 212 - let s = String.trim s |> String.lowercase_ascii in 211 + let s = String.trim s |> Astring.String.Ascii.lowercase in 213 212 if String.length s = 0 then Error "Color value must not be empty" 214 213 else if List.mem s named_colors then Ok () 215 214 else if String.length s > 0 && s.[0] = '#' then validate_hex_color s
+1 -1
lib/check/datatype/dt_contenteditable.ml
··· 4 4 let name = "contenteditable" 5 5 6 6 let validate s = 7 - let s_lower = Datatype.string_to_ascii_lowercase s in 7 + let s_lower = Astring.String.Ascii.lowercase s in 8 8 match s_lower with 9 9 | "" | "true" | "false" | "plaintext-only" -> Ok () 10 10 | _ ->
+1 -1
lib/check/datatype/dt_crossorigin.ml
··· 4 4 let name = "crossorigin" 5 5 6 6 let validate s = 7 - let s_lower = Datatype.string_to_ascii_lowercase s in 7 + let s_lower = Astring.String.Ascii.lowercase s in 8 8 match s_lower with 9 9 | "" | "anonymous" | "use-credentials" -> Ok () 10 10 | _ ->
+1 -4
lib/check/datatype/dt_datetime.ml
··· 1 1 (** Helper functions for datetime validation *) 2 2 3 - let is_digit c = c >= '0' && c <= '9' 4 - 5 - let is_all_digits s = 6 - String.for_all is_digit s 3 + let is_all_digits = Datatype.is_all_digits 7 4 8 5 let parse_int s = 9 6 try Some (int_of_string s)
+1 -1
lib/check/datatype/dt_decoding.ml
··· 4 4 let name = "decoding" 5 5 6 6 let validate s = 7 - let s_lower = Datatype.string_to_ascii_lowercase s in 7 + let s_lower = Astring.String.Ascii.lowercase s in 8 8 match s_lower with 9 9 | "" | "sync" | "async" | "auto" -> Ok () 10 10 | _ ->
+1 -1
lib/check/datatype/dt_dir.ml
··· 4 4 let name = "dir" 5 5 6 6 let validate s = 7 - let s_lower = Datatype.string_to_ascii_lowercase s in 7 + let s_lower = Astring.String.Ascii.lowercase s in 8 8 match s_lower with 9 9 | "" | "ltr" | "rtl" | "auto" -> Ok () 10 10 | _ ->
+1 -1
lib/check/datatype/dt_draggable.ml
··· 4 4 let name = "draggable" 5 5 6 6 let validate s = 7 - let s_lower = Datatype.string_to_ascii_lowercase s in 7 + let s_lower = Astring.String.Ascii.lowercase s in 8 8 match s_lower with 9 9 | "true" | "false" -> Ok () 10 10 | _ ->
+1 -3
lib/check/datatype/dt_email.ml
··· 2 2 3 3 (** Helper to check if a character is valid in email local/domain parts *) 4 4 let is_email_char c = 5 - (c >= 'a' && c <= 'z') 6 - || (c >= 'A' && c <= 'Z') 7 - || (c >= '0' && c <= '9') 5 + Datatype.is_alphanumeric c 8 6 || c = '.' || c = '-' || c = '_' || c = '+' || c = '=' 9 7 10 8 (** Validate a single email address using simplified rules *)
+1 -1
lib/check/datatype/dt_enterkeyhint.ml
··· 4 4 let name = "enterkeyhint" 5 5 6 6 let validate s = 7 - let s_lower = Datatype.string_to_ascii_lowercase s in 7 + let s_lower = Astring.String.Ascii.lowercase s in 8 8 match s_lower with 9 9 | "" | "enter" | "done" | "go" | "next" | "previous" | "search" | "send" -> 10 10 Ok ()
+1 -1
lib/check/datatype/dt_fetchpriority.ml
··· 4 4 let name = "fetchpriority" 5 5 6 6 let validate s = 7 - let s_lower = Datatype.string_to_ascii_lowercase s in 7 + let s_lower = Astring.String.Ascii.lowercase s in 8 8 match s_lower with 9 9 | "" | "high" | "low" | "auto" -> Ok () 10 10 | _ ->
+1 -1
lib/check/datatype/dt_form_enctype.ml
··· 12 12 let name = "form-enctype" 13 13 14 14 let validate s = 15 - let s_lower = Datatype.string_to_ascii_lowercase s in 15 + let s_lower = Astring.String.Ascii.lowercase s in 16 16 if List.mem s_lower valid_enctypes then Ok () 17 17 else 18 18 Error
+1 -1
lib/check/datatype/dt_form_method.ml
··· 7 7 let name = "form-method" 8 8 9 9 let validate s = 10 - let s_lower = Datatype.string_to_ascii_lowercase s in 10 + let s_lower = Astring.String.Ascii.lowercase s in 11 11 if List.mem s_lower valid_methods then Ok () 12 12 else 13 13 Error
+1 -1
lib/check/datatype/dt_hidden.ml
··· 4 4 let name = "hidden" 5 5 6 6 let validate s = 7 - let s_lower = Datatype.string_to_ascii_lowercase s in 7 + let s_lower = Astring.String.Ascii.lowercase s in 8 8 match s_lower with 9 9 | "" | "hidden" | "until-found" -> Ok () 10 10 | _ ->
+1 -1
lib/check/datatype/dt_input_type.ml
··· 31 31 let name = "input-type" 32 32 33 33 let validate s = 34 - let s_lower = Datatype.string_to_ascii_lowercase s in 34 + let s_lower = Astring.String.Ascii.lowercase s in 35 35 if List.mem s_lower valid_types then Ok () 36 36 else 37 37 Error
+1 -1
lib/check/datatype/dt_inputmode.ml
··· 4 4 let name = "inputmode" 5 5 6 6 let validate s = 7 - let s_lower = Datatype.string_to_ascii_lowercase s in 7 + let s_lower = Astring.String.Ascii.lowercase s in 8 8 match s_lower with 9 9 | "" | "none" | "text" | "decimal" | "numeric" | "tel" | "search" 10 10 | "email" | "url" ->
+1 -1
lib/check/datatype/dt_integrity.ml
··· 49 49 "Hash value '%s' must be in format 'algorithm-base64hash'" trimmed) 50 50 | Some dash_pos -> 51 51 let algorithm = String.sub trimmed 0 dash_pos in 52 - let algorithm_lower = Datatype.string_to_ascii_lowercase algorithm in 52 + let algorithm_lower = Astring.String.Ascii.lowercase algorithm in 53 53 if not (List.mem algorithm_lower valid_algorithms) then 54 54 Error 55 55 (Printf.sprintf
+1 -1
lib/check/datatype/dt_kind.ml
··· 4 4 let name = "kind" 5 5 6 6 let validate s = 7 - let s_lower = Datatype.string_to_ascii_lowercase s in 7 + let s_lower = Astring.String.Ascii.lowercase s in 8 8 match s_lower with 9 9 | "subtitles" | "captions" | "descriptions" | "chapters" | "metadata" -> Ok () 10 10 | _ ->
+4 -17
lib/check/datatype/dt_language.ml
··· 2 2 3 3 let q = Error_code.q 4 4 5 - let is_lower_alpha c = c >= 'a' && c <= 'z' 6 - let is_upper_alpha c = c >= 'A' && c <= 'Z' 7 - let is_alpha c = is_lower_alpha c || is_upper_alpha c 8 - let is_digit c = c >= '0' && c <= '9' 9 - let is_alphanumeric c = is_alpha c || is_digit c 10 - 11 - let is_all_alpha s = 12 - String.for_all is_alpha s 13 - 14 - let _is_all_digits s = 15 - String.for_all is_digit s 16 - 17 - let is_all_alphanumeric s = 18 - String.for_all is_alphanumeric s 19 - 20 - let to_lower s = 21 - String.lowercase_ascii s 5 + (* Use shared character predicates from Datatype *) 6 + let is_all_alpha = Datatype.is_all_alpha 7 + let is_all_alphanumeric = Datatype.is_all_alphanumeric 8 + let to_lower = Astring.String.Ascii.lowercase 22 9 23 10 (** Valid extlang subtags per IANA language-subtag-registry. 24 11 Extlangs are 3-letter subtags that follow the primary language.
+1 -1
lib/check/datatype/dt_list_type.ml
··· 26 26 let name = "ul-type" 27 27 28 28 let validate s = 29 - let s_lower = Datatype.string_to_ascii_lowercase s in 29 + let s_lower = Astring.String.Ascii.lowercase s in 30 30 if List.mem s_lower valid_ul_types then Ok () 31 31 else 32 32 Error
+1 -1
lib/check/datatype/dt_loading.ml
··· 4 4 let name = "loading" 5 5 6 6 let validate s = 7 - let s_lower = Datatype.string_to_ascii_lowercase s in 7 + let s_lower = Astring.String.Ascii.lowercase s in 8 8 match s_lower with 9 9 | "" | "lazy" | "eager" -> Ok () 10 10 | _ ->
+44 -25
lib/check/datatype/dt_media_query.ml
··· 70 70 (** Media query keywords (unused but kept for documentation) *) 71 71 let _media_keywords = [ "and"; "not"; "only" ] 72 72 73 - (** Check if character is whitespace *) 74 - let is_whitespace c = c = ' ' || c = '\t' || c = '\n' || c = '\r' 73 + let is_whitespace = Datatype.is_whitespace 75 74 76 75 (** Check if character can start an identifier *) 77 76 let is_ident_start c = 78 - (c >= 'a' && c <= 'z') 79 - || (c >= 'A' && c <= 'Z') 80 - || c = '_' || c = '-' || Char.code c >= 128 77 + Datatype.is_alpha c || c = '_' || c = '-' || Char.code c >= 128 81 78 82 79 (** Check if character can be in an identifier *) 83 80 let is_ident_char c = 84 - is_ident_start c || (c >= '0' && c <= '9') 81 + is_ident_start c || Datatype.is_ascii_digit c 82 + 83 + (** Unicode case folding for case-insensitive comparison. 84 + 85 + WORKAROUND: This is a temporary domain-specific implementation because 86 + the uucp library fails to compile with wasm_of_ocaml due to "too many 87 + locals" errors. Once uucp supports WASM, restore the proper implementation: 88 + 89 + {[ 90 + (* Proper uucp-based case folding: *) 91 + let case_fold s = 92 + let buf = Buffer.create (String.length s) in 93 + let add_uchar u = Uutf.Buffer.add_utf_8 buf u in 94 + let fold_char () _pos = function 95 + | `Malformed _ -> () 96 + | `Uchar u -> 97 + match Uucp.Case.Fold.fold u with 98 + | `Self -> add_uchar u 99 + | `Uchars us -> List.iter add_uchar us 100 + in 101 + Uutf.String.fold_utf_8 fold_char () s; 102 + Buffer.contents buf 103 + ]} 85 104 86 - (** Unicode case-fold for Turkish dotted-I etc *) 87 - let lowercase_unicode s = 88 - (* Handle special case: U+0130 LATIN CAPITAL LETTER I WITH DOT ABOVE -> i *) 105 + This workaround handles the Turkish dotted-I (U+0130 -> 'i' + U+0307) 106 + which is the main non-ASCII case relevant for CSS media query identifiers. *) 107 + let case_fold s = 89 108 let buf = Buffer.create (String.length s) in 109 + let len = String.length s in 90 110 let i = ref 0 in 91 - while !i < String.length s do 111 + while !i < len do 92 112 let c = s.[!i] in 93 - if c = '\xc4' && !i + 1 < String.length s && s.[!i + 1] = '\xb0' then begin 94 - (* U+0130 -> 'i' + U+0307 (combining dot above), but for simplicity just 'i' followed by U+0307 *) 113 + (* U+0130 LATIN CAPITAL LETTER I WITH DOT ABOVE encoded as UTF-8: 0xC4 0xB0 *) 114 + if c = '\xc4' && !i + 1 < len && s.[!i + 1] = '\xb0' then begin 115 + (* Case fold to 'i' + U+0307 (combining dot above) = 0x69 0xCC 0x87 *) 95 116 Buffer.add_string buf "i\xcc\x87"; 96 117 i := !i + 2 97 118 end else begin ··· 151 172 let trimmed = String.trim s in 152 173 if String.length trimmed >= 3 then begin 153 174 let suffix = String.sub trimmed (String.length trimmed - 3) 3 in 154 - if String.lowercase_ascii suffix = "and" then 175 + if Astring.String.Ascii.lowercase suffix = "and" then 155 176 Error "Parse Error." 156 177 else if String.length trimmed >= 4 then begin 157 178 let suffix4 = String.sub trimmed (String.length trimmed - 4) 4 in 158 - if String.lowercase_ascii suffix4 = "and(" then 179 + if Astring.String.Ascii.lowercase suffix4 = "and(" then 159 180 Error "Parse Error." 160 181 else 161 182 validate_media_query_content trimmed ··· 201 222 let has_not = ref false in 202 223 (match read_ident () with 203 224 | Some w -> 204 - let w_lower = String.lowercase_ascii w in 225 + let w_lower = Astring.String.Ascii.lowercase w in 205 226 if w_lower = "only" then (has_only := true; skip_ws ()) 206 227 else if w_lower = "not" then (has_not := true; skip_ws ()) 207 228 else i := !i - String.length w (* put back *) ··· 222 243 match read_ident () with 223 244 | None -> Error "Parse Error." 224 245 | Some media_type -> 225 - let mt_lower = lowercase_unicode media_type in 246 + let mt_lower = case_fold media_type in 226 247 (* Check for deprecated media type *) 227 248 if List.mem mt_lower deprecated_media_types then 228 249 Error (Printf.sprintf "The media \"%s\" has been deprecated" mt_lower) ··· 238 259 match read_ident () with 239 260 | None -> Error "Parse Error." 240 261 | Some kw -> 241 - let kw_lower = String.lowercase_ascii kw in 262 + let kw_lower = Astring.String.Ascii.lowercase kw in 242 263 if kw_lower <> "and" then Error "Parse Error." 243 264 else begin 244 265 (* Check that there was whitespace before 'and' *) ··· 267 288 match read_ident () with 268 289 | None -> Error "Parse Error." 269 290 | Some kw2 -> 270 - let kw2_lower = String.lowercase_ascii kw2 in 291 + let kw2_lower = Astring.String.Ascii.lowercase kw2 in 271 292 if kw2_lower <> "and" then Error "Parse Error." 272 293 else begin 273 294 skip_ws (); ··· 295 316 match String.index_opt content ':' with 296 317 | None -> 297 318 (* Just feature name - boolean feature or range syntax *) 298 - let feature_lower = String.lowercase_ascii content in 319 + let feature_lower = Astring.String.Ascii.lowercase content in 299 320 if List.mem feature_lower deprecated_media_features then 300 321 Error (Printf.sprintf "Deprecated media feature \"%s\". For guidance, see the Deprecated Media Features section in the current Media Queries specification." feature_lower) 301 322 else if List.mem feature_lower valid_media_features then ··· 305 326 | Some colon_pos -> 306 327 let feature = String.trim (String.sub content 0 colon_pos) in 307 328 let value = String.trim (String.sub content (colon_pos + 1) (String.length content - colon_pos - 1)) in 308 - let feature_lower = String.lowercase_ascii feature in 329 + let feature_lower = Astring.String.Ascii.lowercase feature in 309 330 310 331 (* Check for deprecated features *) 311 332 if List.mem feature_lower deprecated_media_features then ··· 341 362 if List.mem feature length_features then begin 342 363 (* Must be a valid length: number followed by unit *) 343 364 let value = String.trim value in 344 - let is_digit c = c >= '0' && c <= '9' in 345 365 346 366 (* Parse number - includes sign, digits, and decimal point *) 347 367 let i = ref 0 in 348 368 let len = String.length value in 349 - while !i < len && (is_digit value.[!i] || value.[!i] = '.' || value.[!i] = '-' || value.[!i] = '+') do 369 + while !i < len && (Datatype.is_ascii_digit value.[!i] || value.[!i] = '.' || value.[!i] = '-' || value.[!i] = '+') do 350 370 incr i 351 371 done; 352 372 let num_part = String.sub value 0 !i in ··· 367 387 else if unit_part = "" then 368 388 Error "only \"0\" can be a \"unit\". You must put a unit after your number" 369 389 else begin 370 - let unit_lower = String.lowercase_ascii unit_part in 390 + let unit_lower = Astring.String.Ascii.lowercase unit_part in 371 391 if List.mem unit_lower valid_length_units then Ok () 372 392 else if List.mem unit_lower valid_resolution_units then 373 393 Error (Printf.sprintf "\"%s\" is not a \"%s\" value" value base_feature) ··· 377 397 end else if List.mem feature color_features then begin 378 398 (* Must be an integer *) 379 399 let value = String.trim value in 380 - let is_digit c = c >= '0' && c <= '9' in 381 - if String.length value > 0 && String.for_all is_digit value then Ok () 400 + if Datatype.is_all_digits value then Ok () 382 401 else 383 402 Error (Printf.sprintf "\"%s\" is not a \"%s\" value" value base_feature) 384 403 end else
+2 -3
lib/check/datatype/dt_mime.ml
··· 1 1 (** MIME type validation based on RFC 2045 and HTML5 spec *) 2 2 3 - (** Check if character is whitespace *) 4 - let is_whitespace c = c = ' ' || c = '\t' || c = '\n' || c = '\r' 3 + let is_whitespace = Datatype.is_whitespace 5 4 6 5 (** Check if character is a token character (RFC 2045) *) 7 6 let is_token_char c = ··· 92 91 if is_token_char c then parse In_subtype (i + 1) 93 92 else if c = ';' then 94 93 (* Check if this is a JavaScript MIME type *) 95 - let mime_type = String.sub s 0 i |> String.lowercase_ascii in 94 + let mime_type = String.sub s 0 i |> Astring.String.Ascii.lowercase in 96 95 if List.mem mime_type javascript_mime_types then 97 96 Error 98 97 "A JavaScript MIME type must not contain any characters after \
+1 -1
lib/check/datatype/dt_popover.ml
··· 4 4 let name = "popover" 5 5 6 6 let validate s = 7 - let s_lower = Datatype.string_to_ascii_lowercase s in 7 + let s_lower = Astring.String.Ascii.lowercase s in 8 8 match s_lower with 9 9 | "" | "auto" | "manual" -> Ok () 10 10 | _ ->
+1 -1
lib/check/datatype/dt_preload.ml
··· 7 7 let name = "preload" 8 8 9 9 let validate s = 10 - let s_lower = Datatype.string_to_ascii_lowercase s in 10 + let s_lower = Astring.String.Ascii.lowercase s in 11 11 if List.mem s_lower valid_preloads then Ok () 12 12 else 13 13 Error
+1 -1
lib/check/datatype/dt_referrer.ml
··· 4 4 let name = "referrerpolicy" 5 5 6 6 let validate s = 7 - let s_lower = Datatype.string_to_ascii_lowercase s in 7 + let s_lower = Astring.String.Ascii.lowercase s in 8 8 match s_lower with 9 9 | "" 10 10 | "no-referrer"
+1 -1
lib/check/datatype/dt_scope.ml
··· 7 7 let name = "scope" 8 8 9 9 let validate s = 10 - let s_lower = Datatype.string_to_ascii_lowercase s in 10 + let s_lower = Astring.String.Ascii.lowercase s in 11 11 if List.mem s_lower valid_scopes then Ok () 12 12 else 13 13 Error
+1 -1
lib/check/datatype/dt_shape.ml
··· 4 4 let name = "shape" 5 5 6 6 let validate s = 7 - let s_lower = Datatype.string_to_ascii_lowercase s in 7 + let s_lower = Astring.String.Ascii.lowercase s in 8 8 match s_lower with 9 9 | "default" | "rect" | "circle" | "poly" -> Ok () 10 10 | _ ->
+1 -1
lib/check/datatype/dt_spellcheck.ml
··· 4 4 let name = "spellcheck" 5 5 6 6 let validate s = 7 - let s_lower = Datatype.string_to_ascii_lowercase s in 7 + let s_lower = Astring.String.Ascii.lowercase s in 8 8 match s_lower with 9 9 | "" | "true" | "false" -> Ok () 10 10 | _ ->
+1 -1
lib/check/datatype/dt_target.ml
··· 8 8 if String.length s = 0 then Error "Browsing context name must not be empty" 9 9 else if s.[0] = '_' then 10 10 (* If starts with underscore, must be a special keyword *) 11 - let lower = Datatype.string_to_ascii_lowercase s in 11 + let lower = Astring.String.Ascii.lowercase s in 12 12 if List.mem lower special_keywords then Ok () 13 13 else 14 14 Error
+1 -1
lib/check/datatype/dt_translate.ml
··· 4 4 let name = "translate" 5 5 6 6 let validate s = 7 - let s_lower = Datatype.string_to_ascii_lowercase s in 7 + let s_lower = Astring.String.Ascii.lowercase s in 8 8 match s_lower with 9 9 | "" | "yes" | "no" -> Ok () 10 10 | _ ->
+1 -1
lib/check/datatype/dt_url.ml
··· 30 30 match s.[i] with 31 31 | ':' -> 32 32 let scheme = 33 - String.sub s start (i - start) |> Datatype.string_to_ascii_lowercase 33 + String.sub s start (i - start) |> Astring.String.Ascii.lowercase 34 34 in 35 35 let rest = String.sub s (i + 1) (len - i - 1) in 36 36 Some (scheme, rest)
+1 -1
lib/check/datatype/dt_wrap.ml
··· 7 7 let name = "wrap" 8 8 9 9 let validate s = 10 - let s_lower = Datatype.string_to_ascii_lowercase s in 10 + let s_lower = Astring.String.Ascii.lowercase s in 11 11 if List.mem s_lower valid_wraps then Ok () 12 12 else 13 13 Error
+3 -3
lib/check/element/attr.ml
··· 571 571 572 572 (** Parse a single attribute name-value pair to typed attribute *) 573 573 let parse_attr name value : t = 574 - let name_lower = String.lowercase_ascii name in 575 - let value_lower = String.lowercase_ascii value in 574 + let name_lower = Astring.String.Ascii.lowercase name in 575 + let value_lower = Astring.String.Ascii.lowercase value in 576 576 match name_lower with 577 577 (* Global attributes *) 578 578 | "id" -> `Id value ··· 875 875 (** Get rel attribute as list of link types (space-separated, lowercased per HTML5 spec) *) 876 876 let get_rel_list attrs = 877 877 match get_rel attrs with 878 - | Some s -> List.map String.lowercase_ascii (Datatype.split_on_whitespace s) 878 + | Some s -> List.map Astring.String.Ascii.lowercase (Datatype.split_on_whitespace s) 879 879 | None -> [] 880 880 881 881 (** Get headers attribute as raw string *)
+4 -4
lib/check/element/element.ml
··· 21 21 22 22 (** Parse element-specific type attribute based on tag *) 23 23 let parse_type_attr (tag : Tag.html_tag) value : Attr.t = 24 - let value_lower = String.lowercase_ascii value in 24 + let value_lower = Astring.String.Ascii.lowercase value in 25 25 match tag with 26 26 | `Input -> 27 27 (match Attr.parse_input_type value_lower with ··· 42 42 (** Parse attributes with element context for type attribute *) 43 43 let parse_attrs_for_tag (tag : Tag.element_tag) (raw_attrs : (string * string) list) : Attr.t list = 44 44 List.map (fun (name, value) -> 45 - let name_lower = String.lowercase_ascii name in 45 + let name_lower = Astring.String.Ascii.lowercase name in 46 46 if name_lower = "type" then 47 47 match tag with 48 48 | Tag.Html html_tag -> parse_type_attr html_tag value ··· 274 274 (** Get raw attribute value (from original attrs) *) 275 275 let get_raw_attr name elem = 276 276 List.find_map (fun (n, v) -> 277 - if String.lowercase_ascii n = String.lowercase_ascii name then Some v else None 277 + if Astring.String.Ascii.lowercase n = Astring.String.Ascii.lowercase name then Some v else None 278 278 ) elem.raw_attrs 279 279 280 280 (** Check if raw attribute exists *) 281 281 let has_raw_attr name elem = 282 282 List.exists (fun (n, _) -> 283 - String.lowercase_ascii n = String.lowercase_ascii name 283 + Astring.String.Ascii.lowercase n = Astring.String.Ascii.lowercase name 284 284 ) elem.raw_attrs 285 285 286 286 (** {1 Pattern Matching Helpers} *)
+3 -3
lib/check/element/tag.ml
··· 234 234 (** Check if a name is a valid custom element name (contains hyphen, not reserved) *) 235 235 let is_custom_element_name name = 236 236 String.contains name '-' && 237 - not (String.starts_with ~prefix:"xml" (String.lowercase_ascii name)) && 238 - not (String.equal (String.lowercase_ascii name) "annotation-xml") 237 + not (String.starts_with ~prefix:"xml" (Astring.String.Ascii.lowercase name)) && 238 + not (String.equal (Astring.String.Ascii.lowercase name) "annotation-xml") 239 239 240 240 (** SVG namespace URI *) 241 241 let svg_namespace = "http://www.w3.org/2000/svg" ··· 255 255 256 256 (** Convert tag name and optional namespace to element_tag *) 257 257 let tag_of_string ?namespace name = 258 - let name_lower = String.lowercase_ascii name in 258 + let name_lower = Astring.String.Ascii.lowercase name in 259 259 match namespace with 260 260 | Some ns when is_svg_namespace ns -> Svg name (* Preserve original case for SVG *) 261 261 | Some ns when is_mathml_namespace ns -> MathML name (* Preserve original case for MathML *)
+6 -3
lib/check/message_format.ml
··· 90 90 91 91 Object (with_extract, Meta.none) 92 92 93 - let format_json ?system_id messages = 93 + let messages_to_json ?system_id messages = 94 94 let open Jsont in 95 95 let msg_array = Array (List.map (message_to_json ?system_id) messages, Meta.none) in 96 - let obj = Object ([ (("messages", Meta.none), msg_array) ], Meta.none) in 97 - match Jsont_bytesrw.encode_string ~format:Minify json obj with 96 + Object ([ (("messages", Meta.none), msg_array) ], Meta.none) 97 + 98 + let format_json ?system_id messages = 99 + let obj = messages_to_json ?system_id messages in 100 + match Jsont_bytesrw.encode_string ~format:Minify Jsont.json obj with 98 101 | Ok s -> s 99 102 | Error e -> failwith ("JSON encoding error: " ^ e)
+33
lib/check/message_format.mli
··· 26 26 27 27 @param system_id Optional default system identifier for messages without location. *) 28 28 val format_gnu : ?system_id:string -> Message.t list -> string 29 + 30 + (** {1 JSON Value Builders} 31 + 32 + These functions return [Jsont.json] values that can be reused 33 + for custom JSON encoding scenarios. *) 34 + 35 + (** Convert a single message to JSON AST. 36 + 37 + Produces JSON compatible with the Nu HTML Validator format: 38 + {[ 39 + { 40 + "type": "error", 41 + "message": "...", 42 + "subType": "error-code", 43 + "url": "...", 44 + "firstLine": 1, 45 + "firstColumn": 1, 46 + ... 47 + } 48 + ]} 49 + 50 + @param system_id Default system identifier for messages without location.system_id. *) 51 + val message_to_json : ?system_id:string -> Message.t -> Jsont.json 52 + 53 + (** Convert a message list to JSON AST with wrapper object. 54 + 55 + Produces JSON with a "messages" array: 56 + {[ 57 + { "messages": [...] } 58 + ]} 59 + 60 + @param system_id Default system identifier for messages without location.system_id. *) 61 + val messages_to_json : ?system_id:string -> Message.t list -> Jsont.json
+1 -1
lib/check/semantic/form_checker.ml
··· 12 12 13 13 (** Check if autocomplete value contains webauthn token *) 14 14 let contains_webauthn value = 15 - let lower = String.lowercase_ascii value in 15 + let lower = Astring.String.Ascii.lowercase value in 16 16 let tokens = String.split_on_char ' ' lower |> List.filter (fun s -> String.length s > 0) in 17 17 List.mem "webauthn" tokens 18 18
+2 -2
lib/check/semantic/lang_detecting_checker.ml
··· 54 54 let get_lang_code lang = 55 55 (* Extract primary language subtag *) 56 56 match String.split_on_char '-' lang with 57 - | code :: _ -> String.lowercase_ascii code 57 + | code :: _ -> Astring.String.Ascii.lowercase code 58 58 | [] -> "" 59 59 60 60 (* Create detector lazily with deterministic seed *) ··· 324 324 | None -> 325 325 Message_collector.add_typed collector 326 326 (`I18n (`Missing_dir_rtl (`Language detected_name))) 327 - | Some dir when String.lowercase_ascii dir <> "rtl" -> 327 + | Some dir when Astring.String.Ascii.lowercase dir <> "rtl" -> 328 328 Message_collector.add_typed collector 329 329 (`I18n (`Wrong_dir (`Language detected_name, `Declared dir))) 330 330 | _ -> ()
+11 -13
lib/check/semantic/nesting_checker.ml
··· 190 190 state.ancestor_flags <- empty_flags () 191 191 192 192 (** Get attribute value by name from attribute list. *) 193 - let get_attr attrs name = 194 - List.assoc_opt name attrs 193 + let get_attr = Attr_utils.get_attr 195 194 196 195 (** Check if an attribute exists. *) 197 - let has_attr attrs name = 198 - get_attr attrs name <> None 196 + let has_attr = Attr_utils.has_attr 199 197 200 198 (** Check if element is interactive based on its attributes. *) 201 199 let is_interactive_element name attrs = 202 200 match name with 203 - | "a" -> has_attr attrs "href" 204 - | "audio" | "video" -> has_attr attrs "controls" 205 - | "img" | "object" -> has_attr attrs "usemap" 201 + | "a" -> has_attr "href" attrs 202 + | "audio" | "video" -> has_attr "controls" attrs 203 + | "img" | "object" -> has_attr "usemap" attrs 206 204 | "input" -> 207 - (match get_attr attrs "type" with 205 + (match get_attr "type" attrs with 208 206 | Some "hidden" -> false 209 207 | _ -> true) 210 208 | "button" | "details" | "embed" | "iframe" | "label" | "select" ··· 239 237 (* Determine attribute to mention in error messages *) 240 238 let attr = 241 239 match name with 242 - | "a" when has_attr attrs "href" -> Some "href" 243 - | "audio" when has_attr attrs "controls" -> Some "controls" 244 - | "video" when has_attr attrs "controls" -> Some "controls" 245 - | "img" when has_attr attrs "usemap" -> Some "usemap" 246 - | "object" when has_attr attrs "usemap" -> Some "usemap" 240 + | "a" when has_attr "href" attrs -> Some "href" 241 + | "audio" when has_attr "controls" attrs -> Some "controls" 242 + | "video" when has_attr "controls" attrs -> Some "controls" 243 + | "img" when has_attr "usemap" attrs -> Some "usemap" 244 + | "object" when has_attr "usemap" attrs -> Some "usemap" 247 245 | _ -> None 248 246 in 249 247
+2 -2
lib/check/semantic/obsolete_checker.ml
··· 260 260 match element.Element.tag with 261 261 | Tag.Html _ -> 262 262 let name = Tag.tag_to_string element.tag in 263 - let name_lower = String.lowercase_ascii name in 263 + let name_lower = Astring.String.Ascii.lowercase name in 264 264 let attrs = element.raw_attrs in 265 265 266 266 (* Track head context *) ··· 275 275 276 276 (* Check for obsolete attributes *) 277 277 List.iter (fun (attr_name, _attr_value) -> 278 - let attr_lower = String.lowercase_ascii attr_name in 278 + let attr_lower = Astring.String.Ascii.lowercase attr_name in 279 279 280 280 (* Special handling for scoped attribute on style *) 281 281 if attr_lower = "scoped" && name_lower = "style" then begin
+1 -1
lib/check/semantic/required_attr_checker.ml
··· 120 120 (* popover attribute must have valid value *) 121 121 match Attr_utils.get_attr "popover" attrs with 122 122 | Some value -> 123 - let value_lower = String.lowercase_ascii value in 123 + let value_lower = Astring.String.Ascii.lowercase value in 124 124 (* Valid values: empty string, auto, manual, hint *) 125 125 if value_lower <> "" && value_lower <> "auto" && value_lower <> "manual" && value_lower <> "hint" then 126 126 Message_collector.add_typed collector
+34 -34
lib/check/specialized/aria_checker.ml
··· 309 309 else 310 310 String.split_on_char ' ' trimmed 311 311 |> List.filter (fun s -> String.trim s <> "") 312 - |> List.map String.lowercase_ascii 312 + |> List.map Astring.String.Ascii.lowercase 313 313 314 314 (** Get the implicit role for an HTML element. *) 315 315 let get_implicit_role element_name attrs = 316 316 (* Check for input element with type attribute *) 317 317 if element_name = "input" then begin 318 - match List.assoc_opt "type" attrs with 318 + match Attr_utils.get_attr "type" attrs with 319 319 | Some input_type -> 320 - let input_type = String.lowercase_ascii input_type in 320 + let input_type = Astring.String.Ascii.lowercase input_type in 321 321 begin match Hashtbl.find_opt input_types_with_implicit_role input_type with 322 322 | Some role -> Some role 323 323 | None -> ··· 332 332 end 333 333 (* Check for area element - implicit role depends on href attribute *) 334 334 else if element_name = "area" then begin 335 - match List.assoc_opt "href" attrs with 335 + match Attr_utils.get_attr "href" attrs with 336 336 | Some _ -> Some "link" (* area with href has implicit role "link" *) 337 337 | None -> Some "generic" (* area without href has no corresponding role, treated as generic *) 338 338 end 339 339 (* Check for a element - implicit role depends on href attribute *) 340 340 else if element_name = "a" then begin 341 - match List.assoc_opt "href" attrs with 341 + match Attr_utils.get_attr "href" attrs with 342 342 | Some _ -> Some "link" (* a with href has implicit role "link" *) 343 343 | None -> Some "generic" (* a without href has no corresponding role, treated as generic *) 344 344 end ··· 430 430 match element.Element.tag with 431 431 | Tag.Html _ -> 432 432 let name = Tag.tag_to_string element.tag in 433 - let name_lower = String.lowercase_ascii name in 433 + let name_lower = Astring.String.Ascii.lowercase name in 434 434 let attrs = element.raw_attrs in 435 - let role_attr = List.assoc_opt "role" attrs in 436 - let aria_label = List.assoc_opt "aria-label" attrs in 437 - let aria_labelledby = List.assoc_opt "aria-labelledby" attrs in 438 - let aria_braillelabel = List.assoc_opt "aria-braillelabel" attrs in 435 + let role_attr = Attr_utils.get_attr "role" attrs in 436 + let aria_label = Attr_utils.get_attr "aria-label" attrs in 437 + let aria_labelledby = Attr_utils.get_attr "aria-labelledby" attrs in 438 + let aria_braillelabel = Attr_utils.get_attr "aria-braillelabel" attrs in 439 439 let has_aria_label = match aria_label with Some v -> String.trim v <> "" | None -> false in 440 440 let has_aria_labelledby = match aria_labelledby with Some v -> String.trim v <> "" | None -> false in 441 441 let has_aria_braillelabel = match aria_braillelabel with Some v -> String.trim v <> "" | None -> false in ··· 459 459 460 460 (* Track active tabs and tabpanel roles for end_document validation *) 461 461 if List.mem "tab" explicit_roles then begin 462 - let aria_selected = List.assoc_opt "aria-selected" attrs in 462 + let aria_selected = Attr_utils.get_attr "aria-selected" attrs in 463 463 if aria_selected = Some "true" then state.has_active_tab <- true 464 464 end; 465 465 if List.mem "tabpanel" explicit_roles then state.has_tabpanel <- true; 466 466 467 467 (* Track visible main elements (explicit role=main or implicit main role) *) 468 468 let is_hidden = 469 - let aria_hidden = List.assoc_opt "aria-hidden" attrs in 469 + let aria_hidden = Attr_utils.get_attr "aria-hidden" attrs in 470 470 aria_hidden = Some "true" 471 471 in 472 472 if not is_hidden then begin ··· 489 489 (* Check br/wbr aria-* attribute restrictions - not allowed *) 490 490 if name_lower = "br" || name_lower = "wbr" then begin 491 491 List.iter (fun (attr_name, _) -> 492 - let attr_lower = String.lowercase_ascii attr_name in 492 + let attr_lower = Astring.String.Ascii.lowercase attr_name in 493 493 if String.starts_with ~prefix:"aria-" attr_lower && 494 494 attr_lower <> "aria-hidden" then 495 495 Message_collector.add_typed collector ··· 515 515 516 516 (* Check for img with empty alt having role attribute *) 517 517 if name_lower = "img" then begin 518 - let alt_value = List.assoc_opt "alt" attrs in 518 + let alt_value = Attr_utils.get_attr "alt" attrs in 519 519 match alt_value with 520 520 | Some alt when String.trim alt = "" -> 521 521 (* img with empty alt must not have role attribute *) ··· 526 526 527 527 (* Check for input[type=checkbox][role=button] requires aria-pressed *) 528 528 if name_lower = "input" then begin 529 - let input_type = match List.assoc_opt "type" attrs with 530 - | Some t -> String.lowercase_ascii t 529 + let input_type = match Attr_utils.get_attr "type" attrs with 530 + | Some t -> Astring.String.Ascii.lowercase t 531 531 | None -> "text" 532 532 in 533 533 if input_type = "checkbox" && List.mem "button" explicit_roles then begin 534 - let has_aria_pressed = List.assoc_opt "aria-pressed" attrs <> None in 534 + let has_aria_pressed = Attr_utils.has_attr "aria-pressed" attrs in 535 535 if not has_aria_pressed then 536 536 Message_collector.add_typed collector (`Input `Checkbox_needs_aria_pressed) 537 537 end ··· 566 566 567 567 (* Check for aria-hidden="true" on body element *) 568 568 if name_lower = "body" then begin 569 - let aria_hidden = List.assoc_opt "aria-hidden" attrs in 569 + let aria_hidden = Attr_utils.get_attr "aria-hidden" attrs in 570 570 match aria_hidden with 571 571 | Some "true" -> 572 572 Message_collector.add_typed collector (`Aria `Hidden_on_body) ··· 574 574 end; 575 575 576 576 (* Check for aria-checked on input[type=checkbox] *) 577 - let aria_checked = List.assoc_opt "aria-checked" attrs in 577 + let aria_checked = Attr_utils.get_attr "aria-checked" attrs in 578 578 if name_lower = "input" then begin 579 - match List.assoc_opt "type" attrs with 580 - | Some input_type when String.lowercase_ascii input_type = "checkbox" -> 579 + match Attr_utils.get_attr "type" attrs with 580 + | Some input_type when Astring.String.Ascii.lowercase input_type = "checkbox" -> 581 581 if aria_checked <> None then 582 582 Message_collector.add_typed collector 583 583 (`Aria (`Must_not_use (`Attr "aria-checked", `Elem "input", ··· 586 586 end; 587 587 588 588 (* Check for aria-expanded on roles that don't support it *) 589 - let aria_expanded = List.assoc_opt "aria-expanded" attrs in 589 + let aria_expanded = Attr_utils.get_attr "aria-expanded" attrs in 590 590 if aria_expanded <> None then begin 591 591 let role_to_check = match explicit_roles with 592 592 | first :: _ -> Some first ··· 605 605 (* Special message for input[type=text] with role="textbox" *) 606 606 let reason = 607 607 if name_lower = "input" && first_role = "textbox" then begin 608 - let has_list = List.exists (fun (k, _) -> String.lowercase_ascii k = "list") attrs in 609 - let input_type = match List.assoc_opt "type" attrs with 610 - | Some t -> String.lowercase_ascii t 608 + let has_list = Attr_utils.has_attr "list" attrs in 609 + let input_type = match Attr_utils.get_attr "type" attrs with 610 + | Some t -> Astring.String.Ascii.lowercase t 611 611 | None -> "text" 612 612 in 613 613 if not has_list && input_type = "text" then ··· 671 671 672 672 (* Check for redundant default ARIA attribute values *) 673 673 List.iter (fun (attr_name, attr_value) -> 674 - let attr_lower = String.lowercase_ascii attr_name in 674 + let attr_lower = Astring.String.Ascii.lowercase attr_name in 675 675 if String.starts_with ~prefix:"aria-" attr_lower then 676 676 match Hashtbl.find_opt aria_default_values attr_lower with 677 677 | Some default_value -> 678 - let value_lower = String.lowercase_ascii (String.trim attr_value) in 678 + let value_lower = Astring.String.Ascii.lowercase (String.trim attr_value) in 679 679 if value_lower = default_value then 680 680 Message_collector.add_typed collector 681 681 (`Generic (Printf.sprintf ··· 688 688 if name_lower = "summary" then begin 689 689 let parent = get_parent_element state in 690 690 let is_in_details = parent = Some "details" in 691 - let has_role_attr = List.exists (fun (k, _) -> String.lowercase_ascii k = "role") attrs in 692 - let has_aria_expanded = List.assoc_opt "aria-expanded" attrs <> None in 693 - let has_aria_pressed = List.assoc_opt "aria-pressed" attrs <> None in 691 + let has_role_attr = Attr_utils.has_attr "role" attrs in 692 + let has_aria_expanded = Attr_utils.has_attr "aria-expanded" attrs in 693 + let has_aria_pressed = Attr_utils.has_attr "aria-pressed" attrs in 694 694 if is_in_details then begin 695 695 (* summary that is the first child of details *) 696 696 if has_role_attr then ··· 726 726 (* Custom elements (autonomous custom elements) have generic role by default 727 727 and cannot have accessible names unless they have an explicit role *) 728 728 let attrs = element.raw_attrs in 729 - let role_attr = List.assoc_opt "role" attrs in 730 - let aria_label = List.assoc_opt "aria-label" attrs in 731 - let aria_labelledby = List.assoc_opt "aria-labelledby" attrs in 732 - let aria_braillelabel = List.assoc_opt "aria-braillelabel" attrs in 729 + let role_attr = Attr_utils.get_attr "role" attrs in 730 + let aria_label = Attr_utils.get_attr "aria-label" attrs in 731 + let aria_labelledby = Attr_utils.get_attr "aria-labelledby" attrs in 732 + let aria_braillelabel = Attr_utils.get_attr "aria-braillelabel" attrs in 733 733 let has_aria_label = match aria_label with Some v -> String.trim v <> "" | None -> false in 734 734 let has_aria_labelledby = match aria_labelledby with Some v -> String.trim v <> "" | None -> false in 735 735 let has_aria_braillelabel = match aria_braillelabel with Some v -> String.trim v <> "" | None -> false in
+21 -21
lib/check/specialized/attr_restrictions_checker.ml
··· 58 58 match element.Element.tag with 59 59 | Tag.Html _ -> 60 60 let name = Tag.tag_to_string element.tag in 61 - let name_lower = String.lowercase_ascii name in 61 + let name_lower = Astring.String.Ascii.lowercase name in 62 62 let attrs = element.raw_attrs in 63 63 64 64 (* Detect XHTML mode from xmlns attribute on html element *) ··· 86 86 (* Check for xmlns:* prefixed attributes - not allowed in HTML *) 87 87 (* Standard xmlns declarations are allowed but custom prefixes are not *) 88 88 List.iter (fun (attr_name, _) -> 89 - let attr_lower = String.lowercase_ascii attr_name in 89 + let attr_lower = Astring.String.Ascii.lowercase attr_name in 90 90 if String.starts_with ~prefix:"xmlns:" attr_lower then begin 91 91 let prefix = String.sub attr_lower 6 (String.length attr_lower - 6) in 92 92 (* Only xmlns:xlink (with correct value) and xmlns:xml are allowed *) ··· 113 113 (* Validate style type attribute - must be "text/css" or omitted *) 114 114 if name_lower = "style" then begin 115 115 List.iter (fun (attr_name, attr_value) -> 116 - let attr_lower = String.lowercase_ascii attr_name in 116 + let attr_lower = Astring.String.Ascii.lowercase attr_name in 117 117 if attr_lower = "type" then begin 118 - let value_lower = String.lowercase_ascii (String.trim attr_value) in 118 + let value_lower = Astring.String.Ascii.lowercase (String.trim attr_value) in 119 119 if value_lower <> "text/css" then 120 120 Message_collector.add_typed collector (`Misc `Style_type_invalid) 121 121 end ··· 144 144 (* imagesrcset requires as="image" *) 145 145 if has_imagesrcset then begin 146 146 let as_is_image = match as_value with 147 - | Some v -> String.lowercase_ascii (String.trim v) = "image" 147 + | Some v -> Astring.String.Ascii.lowercase (String.trim v) = "image" 148 148 | None -> false 149 149 in 150 150 if not as_is_image then ··· 164 164 (* Validate img usemap attribute - must be hash-name reference with content *) 165 165 if name_lower = "img" then begin 166 166 List.iter (fun (attr_name, attr_value) -> 167 - let attr_lower = String.lowercase_ascii attr_name in 167 + let attr_lower = Astring.String.Ascii.lowercase attr_name in 168 168 if attr_lower = "usemap" then begin 169 169 if attr_value = "#" then 170 170 Message_collector.add_typed collector ··· 178 178 (* Validate embed type attribute - must be valid MIME type *) 179 179 if name_lower = "embed" then begin 180 180 List.iter (fun (attr_name, attr_value) -> 181 - let attr_lower = String.lowercase_ascii attr_name in 181 + let attr_lower = Astring.String.Ascii.lowercase attr_name in 182 182 if attr_lower = "type" then begin 183 183 match Dt_mime.validate_mime_type attr_value with 184 184 | Ok () -> () ··· 197 197 name_lower = "iframe" || name_lower = "source" in 198 198 if is_dimension_element then begin 199 199 List.iter (fun (attr_name, attr_value) -> 200 - let attr_lower = String.lowercase_ascii attr_name in 200 + let attr_lower = Astring.String.Ascii.lowercase attr_name in 201 201 if attr_lower = "width" || attr_lower = "height" then begin 202 202 (* Check for non-negative integer only *) 203 203 let is_valid = ··· 245 245 (* Validate area[shape=default] cannot have coords *) 246 246 if name_lower = "area" then begin 247 247 match Attr_utils.get_attr "shape" attrs with 248 - | Some s when String.lowercase_ascii (String.trim s) = "default" -> 248 + | Some s when Astring.String.Ascii.lowercase (String.trim s) = "default" -> 249 249 if Attr_utils.has_attr "coords" attrs then 250 250 Message_collector.add_typed collector 251 251 (`Attr (`Not_allowed (`Attr "coords", `Elem "area"))) ··· 257 257 match Attr_utils.get_attr "dir" attrs with 258 258 | None -> 259 259 Message_collector.add_typed collector (`Misc `Bdo_missing_dir) 260 - | Some v when String.lowercase_ascii (String.trim v) = "auto" -> 260 + | Some v when Astring.String.Ascii.lowercase (String.trim v) = "auto" -> 261 261 Message_collector.add_typed collector (`Misc `Bdo_dir_auto) 262 262 | _ -> () 263 263 end; ··· 266 266 if name_lower = "input" then begin 267 267 if Attr_utils.has_attr "list" attrs then begin 268 268 let input_type = Attr_utils.get_attr_or "type" ~default:"text" attrs 269 - |> String.trim |> String.lowercase_ascii in 269 + |> String.trim |> Astring.String.Ascii.lowercase in 270 270 if not (List.mem input_type input_types_allowing_list) then 271 271 Message_collector.add_typed collector (`Input `List_not_allowed) 272 272 end ··· 274 274 275 275 (* Validate data-* attributes *) 276 276 List.iter (fun (attr_name, _) -> 277 - let attr_lower = String.lowercase_ascii attr_name in 277 + let attr_lower = Astring.String.Ascii.lowercase attr_name in 278 278 (* Check if it starts with "data-" *) 279 279 if String.starts_with ~prefix:"data-" attr_lower then begin 280 280 let after_prefix = String.sub attr_lower 5 (String.length attr_lower - 5) in ··· 297 297 (match lang_value with 298 298 | None -> 299 299 Message_collector.add_typed collector (`I18n `Xml_lang_without_lang) 300 - | Some lang when String.lowercase_ascii lang <> String.lowercase_ascii xmllang -> 300 + | Some lang when Astring.String.Ascii.lowercase lang <> Astring.String.Ascii.lowercase xmllang -> 301 301 Message_collector.add_typed collector (`I18n `Xml_lang_without_lang) 302 302 | _ -> ()) 303 303 | None -> () ··· 305 305 306 306 (* Validate spellcheck attribute - must be "true" or "false" or empty *) 307 307 List.iter (fun (attr_name, attr_value) -> 308 - let attr_lower = String.lowercase_ascii attr_name in 308 + let attr_lower = Astring.String.Ascii.lowercase attr_name in 309 309 if attr_lower = "spellcheck" then begin 310 - let value_lower = String.lowercase_ascii (String.trim attr_value) in 310 + let value_lower = Astring.String.Ascii.lowercase (String.trim attr_value) in 311 311 if value_lower <> "" && value_lower <> "true" && value_lower <> "false" then 312 312 Message_collector.add_typed collector 313 313 (`Attr (`Bad_value (`Elem name, `Attr attr_name, `Value attr_value, `Reason ""))) ··· 317 317 (* Validate enterkeyhint attribute - must be one of specific values *) 318 318 let valid_enterkeyhint = ["enter"; "done"; "go"; "next"; "previous"; "search"; "send"] in 319 319 List.iter (fun (attr_name, attr_value) -> 320 - let attr_lower = String.lowercase_ascii attr_name in 320 + let attr_lower = Astring.String.Ascii.lowercase attr_name in 321 321 if attr_lower = "enterkeyhint" then begin 322 - let value_lower = String.lowercase_ascii (String.trim attr_value) in 322 + let value_lower = Astring.String.Ascii.lowercase (String.trim attr_value) in 323 323 if not (List.mem value_lower valid_enterkeyhint) then 324 324 Message_collector.add_typed collector 325 325 (`Attr (`Bad_value (`Elem name, `Attr attr_name, `Value attr_value, `Reason ""))) ··· 328 328 329 329 (* Validate headingoffset attribute - must be a number between 0 and 8 *) 330 330 List.iter (fun (attr_name, attr_value) -> 331 - let attr_lower = String.lowercase_ascii attr_name in 331 + let attr_lower = Astring.String.Ascii.lowercase attr_name in 332 332 if attr_lower = "headingoffset" then begin 333 333 let trimmed = String.trim attr_value in 334 334 let is_valid = ··· 346 346 347 347 (* Validate accesskey attribute - each key label must be a single code point *) 348 348 List.iter (fun (attr_name, attr_value) -> 349 - let attr_lower = String.lowercase_ascii attr_name in 349 + let attr_lower = Astring.String.Ascii.lowercase attr_name in 350 350 if attr_lower = "accesskey" then begin 351 351 (* Split by whitespace to get key labels *) 352 352 let keys = String.split_on_char ' ' attr_value |> ··· 418 418 let is_media_element = name_lower = "link" || name_lower = "style" || name_lower = "source" in 419 419 if is_media_element then begin 420 420 List.iter (fun (attr_name, attr_value) -> 421 - let attr_lower = String.lowercase_ascii attr_name in 421 + let attr_lower = Astring.String.Ascii.lowercase attr_name in 422 422 if attr_lower = "media" then begin 423 423 let trimmed = String.trim attr_value in 424 424 if trimmed <> "" then begin ··· 436 436 437 437 (* Validate RDFa prefix attribute - space-separated list of prefix:iri pairs *) 438 438 List.iter (fun (attr_name, attr_value) -> 439 - let attr_lower = String.lowercase_ascii attr_name in 439 + let attr_lower = Astring.String.Ascii.lowercase attr_name in 440 440 if attr_lower = "prefix" then begin 441 441 (* Parse prefix attribute value - format: "prefix1: iri1 prefix2: iri2 ..." *) 442 442 let trimmed = String.trim attr_value in
+1 -1
lib/check/specialized/datetime_checker.ml
··· 451 451 if List.mem name datetime_elements then begin 452 452 (* Check for datetime attribute *) 453 453 let datetime_attr = List.find_map (fun (k, v) -> 454 - if String.lowercase_ascii k = "datetime" then Some v else None 454 + if Astring.String.Ascii.lowercase k = "datetime" then Some v else None 455 455 ) element.raw_attrs in 456 456 match datetime_attr with 457 457 | None -> ()
+1 -1
lib/check/specialized/dl_checker.ml
··· 106 106 (`Element (`Not_allowed_as_child (`Child "div", `Parent "dl"))); 107 107 (match Attr.get_role element.attrs with 108 108 | Some role_value -> 109 - let role_lower = String.lowercase_ascii (String.trim role_value) in 109 + let role_lower = Astring.String.Ascii.lowercase (String.trim role_value) in 110 110 if role_lower <> "presentation" && role_lower <> "none" then 111 111 Message_collector.add_typed collector (`Li_role `Div_in_dl_bad_role) 112 112 | None -> ());
-37
lib/check/specialized/h1_checker.ml
··· 1 - (** H1 element counter - warns about multiple h1 elements in a document. *) 2 - 3 - type state = { 4 - mutable h1_count : int; 5 - mutable svg_depth : int; (* Track depth inside SVG *) 6 - } 7 - 8 - let create () = { 9 - h1_count = 0; 10 - svg_depth = 0; 11 - } 12 - 13 - let reset state = 14 - state.h1_count <- 0; 15 - state.svg_depth <- 0 16 - 17 - let start_element state ~element collector = 18 - (* Track SVG depth - h1 inside SVG (foreignObject, desc) shouldn't count *) 19 - match element.Element.tag with 20 - | Tag.Svg _ -> 21 - state.svg_depth <- state.svg_depth + 1 22 - | Tag.Html `H1 when state.svg_depth = 0 -> 23 - state.h1_count <- state.h1_count + 1; 24 - if state.h1_count > 1 then 25 - Message_collector.add_typed collector (`Misc `Multiple_h1) 26 - | Tag.Html _ when state.svg_depth = 0 -> 27 - () (* Other HTML elements outside SVG *) 28 - | _ -> 29 - () (* Non-HTML or inside SVG *) 30 - 31 - let end_element state ~tag _collector = 32 - match tag with 33 - | Tag.Svg _ when state.svg_depth > 0 -> 34 - state.svg_depth <- state.svg_depth - 1 35 - | _ -> () 36 - 37 - let checker = Checker.make ~create ~reset ~start_element ~end_element ()
-16
lib/check/specialized/h1_checker.mli
··· 1 - (** H1 element counter checker. 2 - 3 - This checker validates that documents don't have multiple h1 elements, 4 - which can confuse document structure and accessibility tools. 5 - 6 - {2 Validation Rules} 7 - 8 - - Documents should have at most one [<h1>] element 9 - - [<h1>] elements inside SVG content (foreignObject, desc) are not counted 10 - 11 - {2 Error Messages} 12 - 13 - - [Multiple_h1]: Document contains more than one h1 element *) 14 - 15 - val checker : Checker.t 16 - (** The H1 checker instance. *)
+19 -107
lib/check/specialized/heading_checker.ml
··· 1 1 (** Heading structure validation checker. 2 2 3 3 This checker validates that: 4 - - Heading levels don't skip (e.g., h1 to h3) 5 - - Documents have at least one heading 6 - - Multiple h1 usage is noted 7 - - Headings are not empty *) 4 + - Multiple h1 usage is reported as an error 5 + 6 + Note: Additional accessibility checks (first heading should be h1, skipped 7 + levels, empty headings) are intentionally not included as errors since they 8 + are recommendations rather than HTML5 spec requirements. *) 8 9 9 10 (** Checker state tracking heading structure. *) 10 11 type state = { 11 - mutable current_level : int option; 12 12 mutable h1_count : int; 13 - mutable has_any_heading : bool; 14 - mutable first_heading_checked : bool; 15 - mutable in_heading : Tag.html_tag option; 16 - mutable heading_has_text : bool; 17 13 mutable svg_depth : int; (* Track depth inside SVG - headings in SVG don't count *) 18 14 } 19 15 20 - let create () = 21 - { 22 - current_level = None; 23 - h1_count = 0; 24 - has_any_heading = false; 25 - first_heading_checked = false; 26 - in_heading = None; 27 - heading_has_text = false; 28 - svg_depth = 0; 29 - } 16 + let create () = { 17 + h1_count = 0; 18 + svg_depth = 0; 19 + } 30 20 31 21 let reset state = 32 - state.current_level <- None; 33 22 state.h1_count <- 0; 34 - state.has_any_heading <- false; 35 - state.first_heading_checked <- false; 36 - state.in_heading <- None; 37 - state.heading_has_text <- false; 38 23 state.svg_depth <- 0 39 24 40 - (** Check if text is effectively empty (only whitespace). *) 41 - let is_empty_text text = 42 - let rec check i = 43 - if i >= String.length text then 44 - true 45 - else 46 - match text.[i] with 47 - | ' ' | '\t' | '\n' | '\r' -> check (i + 1) 48 - | _ -> false 49 - in 50 - check 0 51 - 52 25 let start_element state ~element collector = 53 26 match element.Element.tag with 54 27 | Tag.Svg _ -> 55 - (* Track SVG depth - headings inside SVG (foreignObject, desc) don't count *) 28 + (* Track SVG depth - h1 inside SVG (foreignObject, desc) shouldn't count *) 56 29 state.svg_depth <- state.svg_depth + 1 57 - | Tag.Html (#Tag.heading_tag as h) when state.svg_depth = 0 -> 58 - let level = match Tag.heading_level h with Some l -> l | None -> 0 in 59 - let name = Tag.html_tag_to_string h in 60 - state.has_any_heading <- true; 61 - 62 - (* Check if this is the first heading *) 63 - if not state.first_heading_checked then begin 64 - state.first_heading_checked <- true; 65 - if level <> 1 then 66 - Message_collector.add_typed collector 67 - (`Generic (Printf.sprintf 68 - "First heading in document is <%s>, should typically be <h1>" name)) 69 - end; 70 - 71 - (* Track h1 count *) 72 - if level = 1 then begin 73 - state.h1_count <- state.h1_count + 1; 74 - if state.h1_count > 1 then 75 - Message_collector.add_typed collector (`Misc `Multiple_h1) 76 - end; 77 - 78 - (* Check for skipped levels *) 79 - begin match state.current_level with 80 - | None -> 81 - state.current_level <- Some level 82 - | Some prev_level -> 83 - let diff = level - prev_level in 84 - if diff > 1 then 85 - Message_collector.add_typed collector 86 - (`Generic (Printf.sprintf 87 - "Heading level skipped: <%s> follows <h%d>, skipping %d level%s. This can confuse screen reader users" 88 - name prev_level (diff - 1) (if diff > 2 then "s" else ""))); 89 - state.current_level <- Some level 90 - end; 91 - 92 - (* Track that we're in a heading to check for empty content *) 93 - state.in_heading <- Some h; 94 - state.heading_has_text <- false 30 + | Tag.Html `H1 when state.svg_depth = 0 -> 31 + state.h1_count <- state.h1_count + 1; 32 + if state.h1_count > 1 then 33 + Message_collector.add_typed collector (`Misc `Multiple_h1) 95 34 | _ -> () 96 35 97 - let end_element state ~tag collector = 98 - (* Track SVG depth *) 99 - (match tag with 100 - | Tag.Svg _ when state.svg_depth > 0 -> 101 - state.svg_depth <- state.svg_depth - 1 102 - | _ -> ()); 103 - (* Check for empty headings *) 104 - match state.in_heading, tag with 105 - | Some h, Tag.Html h2 when h = h2 -> 106 - if not state.heading_has_text then 107 - Message_collector.add_typed collector 108 - (`Generic (Printf.sprintf 109 - "Heading <%s> is empty or contains only whitespace. Empty headings are problematic for screen readers" 110 - (Tag.html_tag_to_string h))); 111 - state.in_heading <- None; 112 - state.heading_has_text <- false 36 + let end_element state ~tag _collector = 37 + match tag with 38 + | Tag.Svg _ when state.svg_depth > 0 -> 39 + state.svg_depth <- state.svg_depth - 1 113 40 | _ -> () 114 41 115 - let characters state text _collector = 116 - (* If we're inside a heading, check if this text is non-whitespace *) 117 - match state.in_heading with 118 - | Some _ -> 119 - if not (is_empty_text text) then 120 - state.heading_has_text <- true 121 - | None -> 122 - () 123 - 124 - let end_document state collector = 125 - if not state.has_any_heading then 126 - Message_collector.add_typed collector 127 - (`Generic "Document contains no heading elements (h1-h6). Headings provide important document structure for accessibility") 128 - 129 - let checker = Checker.make ~create ~reset ~start_element ~end_element 130 - ~characters ~end_document () 42 + let checker = Checker.make ~create ~reset ~start_element ~end_element ()
+2 -2
lib/check/specialized/importmap_checker.ml
··· 270 270 | Tag.Html `Script -> 271 271 (* Check if type="importmap" *) 272 272 let type_attr = List.find_opt (fun (n, _) -> 273 - String.lowercase_ascii n = "type" 273 + Astring.String.Ascii.lowercase n = "type" 274 274 ) element.raw_attrs in 275 275 (match type_attr with 276 - | Some (_, v) when String.lowercase_ascii v = "importmap" -> 276 + | Some (_, v) when Astring.String.Ascii.lowercase v = "importmap" -> 277 277 state.in_importmap <- true; 278 278 Buffer.clear state.content 279 279 | _ -> ())
+1 -1
lib/check/specialized/label_checker.ml
··· 65 65 | _ -> ()) 66 66 67 67 | Tag.Html tag -> 68 - let name_lower = String.lowercase_ascii (Tag.tag_to_string (Tag.Html tag)) in 68 + let name_lower = Astring.String.Ascii.lowercase (Tag.tag_to_string (Tag.Html tag)) in 69 69 70 70 (* Track labelable element IDs *) 71 71 (if is_labelable name_lower then
+1 -1
lib/check/specialized/language_checker.ml
··· 27 27 28 28 (** Check if a language tag contains deprecated subtags. *) 29 29 let check_deprecated_tag value = 30 - let lower = String.lowercase_ascii value in 30 + let lower = Astring.String.Ascii.lowercase value in 31 31 let subtags = String.split_on_char '-' lower in 32 32 match subtags with 33 33 | [] -> None
+3 -6
lib/check/specialized/mime_type_checker.ml
··· 153 153 let create () = () 154 154 let reset _state = () 155 155 156 - let get_attr_value name attrs = 157 - List.find_map (fun (k, v) -> 158 - if String.lowercase_ascii k = String.lowercase_ascii name then Some v else None 159 - ) attrs 156 + let get_attr_value = Attr_utils.get_attr 160 157 161 158 let start_element _state ~element collector = 162 159 match element.Element.tag with 163 160 | Tag.Html tag -> 164 161 let name = Tag.html_tag_to_string tag in 165 - let name_lower = String.lowercase_ascii name in 162 + let name_lower = Astring.String.Ascii.lowercase name in 166 163 (match List.assoc_opt name_lower mime_type_attrs with 167 164 | None -> () 168 165 | Some type_attrs -> ··· 174 171 if value = "" then () 175 172 else if name_lower = "script" then 176 173 (* script type can be module, importmap, etc. - skip validation for non-MIME types *) 177 - let value_lower = String.lowercase_ascii value in 174 + let value_lower = Astring.String.Ascii.lowercase value in 178 175 if value_lower = "module" || value_lower = "importmap" || 179 176 not (String.contains value '/') then () 180 177 else
+2 -2
lib/check/specialized/picture_checker.ml
··· 133 133 let media_value = Attr_utils.get_attr "media" attrs in 134 134 let has_type = Attr_utils.has_attr "type" attrs in 135 135 let is_media_all = match media_value with 136 - | Some v -> String.lowercase_ascii (String.trim v) = "all" 136 + | Some v -> Astring.String.Ascii.lowercase (String.trim v) = "all" 137 137 | None -> false in 138 138 let is_media_empty = match media_value with 139 139 | Some v -> String.trim v = "" ··· 142 142 | None -> not has_type 143 143 | Some v -> 144 144 let trimmed = String.trim v in 145 - trimmed = "" || String.lowercase_ascii trimmed = "all" 145 + trimmed = "" || Astring.String.Ascii.lowercase trimmed = "all" 146 146 in 147 147 if is_always_matching then begin 148 148 state.has_always_matching_source <- true;
+12 -12
lib/check/specialized/srcset_sizes_checker.ml
··· 153 153 154 154 (** Check if scientific notation has invalid exponent (like 1e+1.5 - decimal in exponent) *) 155 155 let has_invalid_scientific_notation s = 156 - let lower = String.lowercase_ascii s in 156 + let lower = Astring.String.Ascii.lowercase s in 157 157 (* Find 'e' for scientific notation *) 158 158 match String.index_opt lower 'e' with 159 159 | None -> false ··· 176 176 (* Check for % at the end *) 177 177 else if trimmed.[len - 1] = '%' then "%" 178 178 else begin 179 - let lower = String.lowercase_ascii trimmed in 179 + let lower = Astring.String.Ascii.lowercase trimmed in 180 180 (* Try to find a unit at the end (letters only) *) 181 181 let rec find_unit_length i = 182 182 if i < 0 then 0 ··· 205 205 if has_invalid_scientific_notation value_no_comments then BadScientificNotation 206 206 (* "auto" is only valid with lazy loading, which requires checking the element context. 207 207 For general validation, treat "auto" alone as invalid in sizes. *) 208 - else if String.lowercase_ascii value_no_comments = "auto" then 208 + else if Astring.String.Ascii.lowercase value_no_comments = "auto" then 209 209 BadCssNumber (value_no_comments.[0], trimmed) 210 210 else if value_no_comments = "" then InvalidUnit ("", trimmed) 211 211 else begin 212 - let lower = String.lowercase_ascii value_no_comments in 212 + let lower = Astring.String.Ascii.lowercase value_no_comments in 213 213 (* Check for calc() or other CSS functions first - these are always valid *) 214 214 if String.contains value_no_comments '(' then Valid 215 215 else begin ··· 310 310 Some "Bad media condition: Parse Error" 311 311 end else begin 312 312 (* Check for bare "all" which is invalid *) 313 - let lower = String.lowercase_ascii trimmed in 313 + let lower = Astring.String.Ascii.lowercase trimmed in 314 314 let parts = String.split_on_char ' ' lower |> List.filter (fun s -> s <> "") in 315 315 match parts with 316 316 | keyword :: _ when keyword = "all" -> ··· 358 358 end 359 359 else begin 360 360 (* Check if remaining starts with "and", "or", "not" followed by space or paren *) 361 - let lower_remaining = String.lowercase_ascii remaining in 361 + let lower_remaining = Astring.String.Ascii.lowercase remaining in 362 362 if remaining_len >= 4 && String.sub lower_remaining 0 4 = "and " then 363 363 skip_media_condition (i + (len - i) - remaining_len + 4) 364 364 else if remaining_len >= 3 && String.sub lower_remaining 0 3 = "or " then ··· 577 577 578 578 (** Validate srcset descriptor *) 579 579 let validate_srcset_descriptor desc element_name srcset_value has_sizes collector = 580 - let desc_lower = String.lowercase_ascii (String.trim desc) in 580 + let desc_lower = Astring.String.Ascii.lowercase (String.trim desc) in 581 581 if String.length desc_lower = 0 then true 582 582 else begin 583 583 let last_char = desc_lower.[String.length desc_lower - 1] in ··· 723 723 724 724 (** Normalize descriptor for duplicate detection (e.g., 1x = 1.0x) *) 725 725 let normalize_descriptor desc = 726 - let desc_lower = String.lowercase_ascii (String.trim desc) in 726 + let desc_lower = Astring.String.Ascii.lowercase (String.trim desc) in 727 727 if String.length desc_lower = 0 then desc_lower 728 728 else 729 729 let last_char = desc_lower.[String.length desc_lower - 1] in ··· 793 793 (* Special schemes that require host/content after :// *) 794 794 let special_schemes = ["http"; "https"; "ftp"; "ws"; "wss"] in 795 795 (* Check for scheme-only URL like "http:" *) 796 - let url_lower = String.lowercase_ascii url in 796 + let url_lower = Astring.String.Ascii.lowercase url in 797 797 List.iter (fun scheme -> 798 798 let scheme_colon = scheme ^ ":" in 799 799 if url_lower = scheme_colon then ··· 824 824 (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: Expected single descriptor but found extraneous descriptor %s at %s." (q value) (q "srcset") (q element_name) (q extra_desc) (q value))))) 825 825 end; 826 826 827 - let desc_lower = String.lowercase_ascii (String.trim desc) in 827 + let desc_lower = Astring.String.Ascii.lowercase (String.trim desc) in 828 828 if String.length desc_lower > 0 then begin 829 829 let last_char = desc_lower.[String.length desc_lower - 1] in 830 830 if last_char = 'w' then has_w_descriptor := true ··· 872 872 begin match Hashtbl.find_opt seen_descriptors normalized with 873 873 | Some first_url -> 874 874 Message_collector.add_typed collector 875 - (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: %s for image %s is identical to %s for image %s." (q value) (q "srcset") (q element_name) dup_type (q url) (String.lowercase_ascii dup_type) (q first_url))))) 875 + (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: %s for image %s is identical to %s for image %s." (q value) (q "srcset") (q element_name) dup_type (q url) (Astring.String.Ascii.lowercase dup_type) (q first_url))))) 876 876 | None -> 877 877 begin match (if is_1x then Hashtbl.find_opt seen_descriptors "implicit-1x" else None) with 878 878 | Some first_url -> 879 879 (* Explicit 1x conflicts with implicit 1x *) 880 880 Message_collector.add_typed collector 881 - (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: %s for image %s is identical to %s for image %s." (q value) (q "srcset") (q element_name) dup_type (q url) (String.lowercase_ascii dup_type) (q first_url))))) 881 + (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: %s for image %s is identical to %s for image %s." (q value) (q "srcset") (q element_name) dup_type (q url) (Astring.String.Ascii.lowercase dup_type) (q first_url))))) 882 882 | None -> 883 883 Hashtbl.add seen_descriptors normalized url; 884 884 if is_1x then Hashtbl.add seen_descriptors "explicit-1x" url
+10 -10
lib/check/specialized/svg_checker.ml
··· 260 260 261 261 (* Check if attribute name matches a pattern like "data-*" or "aria-*" - case insensitive *) 262 262 let matches_pattern attr pattern = 263 - let attr_lower = String.lowercase_ascii attr in 264 - let pattern_lower = String.lowercase_ascii pattern in 263 + let attr_lower = Astring.String.Ascii.lowercase attr in 264 + let pattern_lower = Astring.String.Ascii.lowercase pattern in 265 265 if String.ends_with ~suffix:"-*" pattern_lower then 266 266 let prefix = String.sub pattern_lower 0 (String.length pattern_lower - 1) in 267 267 String.starts_with ~prefix attr_lower ··· 361 361 state.in_svg <- true; 362 362 363 363 if is_svg_element || state.in_svg then begin 364 - let name_lower = String.lowercase_ascii name in 364 + let name_lower = Astring.String.Ascii.lowercase name in 365 365 366 366 (* Check SVG content model rules *) 367 367 (* 1. Check if child is allowed in SVG <a> *) 368 368 (match state.element_stack with 369 - | parent :: _ when String.lowercase_ascii parent = "a" -> 369 + | parent :: _ when Astring.String.Ascii.lowercase parent = "a" -> 370 370 if List.mem name_lower a_disallowed_children then 371 371 Message_collector.add_typed collector 372 372 (`Element (`Not_allowed_as_child (`Child name, `Parent "a"))) ··· 382 382 (* 2.5 Check stop element is only in linearGradient or radialGradient *) 383 383 if name_lower = "stop" then begin 384 384 match state.element_stack with 385 - | parent :: _ when (let p = String.lowercase_ascii parent in 385 + | parent :: _ when (let p = Astring.String.Ascii.lowercase parent in 386 386 p = "lineargradient" || p = "radialgradient") -> () 387 387 | parent :: _ -> 388 388 Message_collector.add_typed collector ··· 393 393 (* 2.6 Check use element is not nested inside another use element *) 394 394 if name_lower = "use" then begin 395 395 match state.element_stack with 396 - | parent :: _ when String.lowercase_ascii parent = "use" -> 396 + | parent :: _ when Astring.String.Ascii.lowercase parent = "use" -> 397 397 Message_collector.add_typed collector 398 398 (`Element (`Not_allowed_as_child (`Child name, `Parent parent))) 399 399 | _ -> () ··· 401 401 402 402 (* 3. Check duplicate feFunc* in feComponentTransfer *) 403 403 (match state.element_stack with 404 - | parent :: _ when String.lowercase_ascii parent = "fecomponenttransfer" -> 404 + | parent :: _ when Astring.String.Ascii.lowercase parent = "fecomponenttransfer" -> 405 405 if List.mem name_lower ["fefuncr"; "fefuncg"; "fefuncb"; "fefunca"] then begin 406 406 match state.fecomponenttransfer_stack with 407 407 | fect :: _ -> ··· 435 435 436 436 (* Check each attribute *) 437 437 List.iter (fun (attr, value) -> 438 - let attr_lower = String.lowercase_ascii attr in 438 + let attr_lower = Astring.String.Ascii.lowercase attr in 439 439 440 440 (* Validate xmlns attributes *) 441 441 if String.starts_with ~prefix:"xmlns" attr_lower then ··· 457 457 (match List.assoc_opt name_lower required_attrs with 458 458 | Some req_attrs -> 459 459 List.iter (fun req_attr -> 460 - if not (List.exists (fun (a, _) -> String.lowercase_ascii a = req_attr) attrs) then 460 + if not (Attr_utils.has_attr req_attr attrs) then 461 461 Message_collector.add_typed collector 462 462 (`Svg (`Missing_attr (`Elem name_lower, `Attr req_attr))) 463 463 ) req_attrs ··· 469 469 let name = Tag.tag_to_string tag in 470 470 471 471 if is_svg_element || state.in_svg then begin 472 - let name_lower = String.lowercase_ascii name in 472 + let name_lower = Astring.String.Ascii.lowercase name in 473 473 474 474 (* Check required children when closing font element *) 475 475 if name_lower = "font" then begin
+5 -5
lib/check/specialized/table_checker.ml
··· 354 354 355 355 (** Parse a non-negative integer attribute, returning 1 if absent or invalid *) 356 356 let parse_non_negative_int attrs name = 357 - match List.assoc_opt name attrs with 357 + match Attr_utils.get_attr name attrs with 358 358 | None -> 1 359 359 | Some v -> ( 360 360 try ··· 364 364 365 365 (** Parse a positive integer attribute, returning 1 if absent or invalid *) 366 366 let parse_positive_int attrs name = 367 - match List.assoc_opt name attrs with 367 + match Attr_utils.get_attr name attrs with 368 368 | None -> 1 369 369 | Some v -> ( 370 370 try ··· 374 374 375 375 (** Parse the headers attribute into a list of IDs *) 376 376 let parse_headers attrs = 377 - match List.assoc_opt "headers" attrs with 377 + match Attr_utils.get_attr "headers" attrs with 378 378 | None -> [] 379 379 | Some v -> 380 380 let parts = String.split_on_char ' ' v in ··· 523 523 table.state <- InCellInRowGroup; 524 524 (* Record header ID if present *) 525 525 if is_header then ( 526 - match List.assoc_opt "id" attrs with 526 + match Attr_utils.get_attr "id" attrs with 527 527 | Some id when String.length id > 0 -> Hashtbl.replace table.header_ids id () 528 528 | _ -> ()); 529 529 (* Parse cell attributes *) ··· 541 541 table.state <- InCellInImplicitRowGroup; 542 542 (* Same logic as above *) 543 543 if is_header then ( 544 - match List.assoc_opt "id" attrs with 544 + match Attr_utils.get_attr "id" attrs with 545 545 | Some id when String.length id > 0 -> Hashtbl.replace table.header_ids id () 546 546 | _ -> ()); 547 547 let colspan = abs (parse_positive_int attrs "colspan") in
+1 -1
lib/check/specialized/unknown_element_checker.ml
··· 31 31 state.stack <- name :: state.stack 32 32 33 33 | Tag.Html tag -> 34 - let name_lower = String.lowercase_ascii (Tag.tag_to_string (Tag.Html tag)) in 34 + let name_lower = Astring.String.Ascii.lowercase (Tag.tag_to_string (Tag.Html tag)) in 35 35 state.stack <- name_lower :: state.stack 36 36 37 37 | _ -> () (* SVG, MathML, Custom elements are allowed *)
+24 -25
lib/check/specialized/url_checker.ml
··· 67 67 68 68 (** Check if pipe is allowed in this host context. *) 69 69 let is_pipe_allowed_in_host url host = 70 - let scheme = try String.lowercase_ascii (String.sub url 0 (String.index url ':')) with _ -> "" in 70 + let scheme = try Astring.String.Ascii.lowercase (String.sub url 0 (String.index url ':')) with _ -> "" in 71 71 scheme = "file" && is_valid_windows_drive host 72 72 73 73 (** Special schemes that require double slash (//). ··· 95 95 (c >= '0' && c <= '9') || c = '+' || c = '-' || c = '.' 96 96 ) potential_scheme in 97 97 if is_valid_scheme then 98 - Some (String.lowercase_ascii potential_scheme) 98 + Some (Astring.String.Ascii.lowercase potential_scheme) 99 99 else 100 100 None 101 101 with Not_found -> None ··· 104 104 let extract_host_and_port url = 105 105 try 106 106 let double_slash = 107 - try Some (Str.search_forward (Str.regexp "://") url 0 + 3) 108 - with Not_found -> None 107 + match Astring.String.find_sub ~sub:"://" url with 108 + | Some pos -> Some (pos + 3) 109 + | None -> None 109 110 in 110 111 match double_slash with 111 112 | None -> (None, None) ··· 250 251 (* Check for ASCII percent *) 251 252 String.contains s '%' || 252 253 (* Check for fullwidth percent (U+FF05 = 0xEF 0xBC 0x85 in UTF-8) *) 253 - try 254 - let _ = Str.search_forward (Str.regexp "\xef\xbc\x85") s 0 in 255 - true 256 - with Not_found -> false 254 + Astring.String.is_infix ~affix:"\xef\xbc\x85" s 257 255 258 256 (** Check if decoded host contains forbidden characters. 259 257 Some URLs have percent-encoded fullwidth characters that decode to forbidden chars. *) ··· 424 422 let check_path_segment url attr_name element_name = 425 423 (* Extract path: everything after authority (or after scheme: for non-authority URLs) *) 426 424 let raw_path = 427 - try 428 - let double_slash = Str.search_forward (Str.regexp "://") url 0 in 425 + match Astring.String.find_sub ~sub:"://" url with 426 + | Some double_slash -> 429 427 let after_auth_start = double_slash + 3 in 430 428 let rest = String.sub url after_auth_start (String.length url - after_auth_start) in 431 429 (* Find end of authority *) ··· 437 435 String.sub rest path_start (String.length rest - path_start) 438 436 else 439 437 "" 440 - with Not_found -> 438 + | None -> 441 439 (* No double slash - check for single slash path *) 442 - match extract_scheme url with 440 + (match extract_scheme url with 443 441 | Some _ -> 444 - let colon_pos = String.index url ':' in 445 - let after_colon = String.sub url (colon_pos + 1) (String.length url - colon_pos - 1) in 446 - after_colon 442 + (try 443 + let colon_pos = String.index url ':' in 444 + String.sub url (colon_pos + 1) (String.length url - colon_pos - 1) 445 + with Not_found -> url) 447 446 | None -> 448 447 (* Relative URL - the whole thing is the path *) 449 - url 448 + url) 450 449 in 451 450 (* Remove query and fragment for path-specific checks *) 452 451 let path = remove_query_fragment raw_path in ··· 546 545 547 546 (** Check for illegal characters in userinfo (user:password). *) 548 547 let check_userinfo url attr_name element_name = 548 + match Astring.String.find_sub ~sub:"://" url with 549 + | None -> None 550 + | Some pos -> 549 551 try 550 552 (* Look for :// then find the LAST @ before the next / or end *) 551 - let double_slash = Str.search_forward (Str.regexp "://") url 0 + 3 in 553 + let double_slash = pos + 3 in 552 554 let rest = String.sub url double_slash (String.length url - double_slash) in 553 555 (* Find first / or ? or # to limit authority section *) 554 556 let auth_end = ··· 633 635 let url = String.trim url in 634 636 (* Empty URL check for certain attributes *) 635 637 if url = "" then begin 636 - let name_lower = String.lowercase_ascii element_name in 637 - let attr_lower = String.lowercase_ascii attr_name in 638 + let name_lower = Astring.String.Ascii.lowercase element_name in 639 + let attr_lower = Astring.String.Ascii.lowercase attr_name in 638 640 if List.mem attr_lower must_be_non_empty || 639 641 List.mem (name_lower, attr_lower) must_be_non_empty_combinations then 640 642 Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Must be non-empty." ··· 739 741 let reset _state = () 740 742 741 743 (** Get attribute value by name. *) 742 - let get_attr_value name attrs = 743 - List.find_map (fun (k, v) -> 744 - if String.lowercase_ascii k = String.lowercase_ascii name then Some v else None 745 - ) attrs 744 + let get_attr_value = Attr_utils.get_attr 746 745 747 746 let start_element _state ~element collector = 748 747 match element.Element.tag with 749 748 | Tag.Html _ -> 750 749 let name = Tag.tag_to_string element.tag in 751 - let name_lower = String.lowercase_ascii name in 750 + let name_lower = Astring.String.Ascii.lowercase name in 752 751 let attrs = element.raw_attrs in 753 752 (* Check URL attributes for elements that have them *) 754 753 (match List.assoc_opt name_lower url_attributes with ··· 794 793 match validate_url url name "value" with 795 794 | None -> () 796 795 | Some error_msg -> 797 - let error_msg = Str.global_replace (Str.regexp "Bad URL:") "Bad absolute URL:" error_msg in 796 + let error_msg = Astring.String.concat ~sep:"Bad absolute URL:" (Astring.String.cuts ~sep:"Bad URL:" error_msg) in 798 797 Message_collector.add_typed collector (`Attr (`Bad_value_generic (`Message error_msg))) 799 798 end 800 799 end
+5 -5
lib/check/specialized/xhtml_content_checker.ml
··· 54 54 55 55 let start_element state ~element collector = 56 56 let name = Tag.tag_to_string element.Element.tag in 57 - let name_lower = String.lowercase_ascii name in 57 + let name_lower = Astring.String.Ascii.lowercase name in 58 58 let attrs = element.raw_attrs in 59 59 60 60 (* Check data-* attributes for uppercase *) ··· 63 63 (* Check if this element is allowed as child of parent *) 64 64 (match state.element_stack with 65 65 | parent :: _ -> 66 - let parent_lower = String.lowercase_ascii parent in 66 + let parent_lower = Astring.String.Ascii.lowercase parent in 67 67 if not (is_child_allowed ~parent:parent_lower ~child:name_lower) then 68 68 Message_collector.add_typed collector 69 69 (`Element (`Not_allowed_as_child (`Child name_lower, `Parent parent_lower))) ··· 71 71 72 72 (* Handle figure content model *) 73 73 (match state.element_stack with 74 - | parent :: _ when String.lowercase_ascii parent = "figure" -> 74 + | parent :: _ when Astring.String.Ascii.lowercase parent = "figure" -> 75 75 (* We're inside a figure, check content model *) 76 76 (match state.figure_stack with 77 77 | fig :: _ -> ··· 99 99 state.element_stack <- name :: state.element_stack 100 100 101 101 let end_element state ~tag _collector = 102 - let name_lower = String.lowercase_ascii (Tag.tag_to_string tag) in 102 + let name_lower = Astring.String.Ascii.lowercase (Tag.tag_to_string tag) in 103 103 (* Pop figure state if leaving a figure *) 104 104 if name_lower = "figure" then begin 105 105 match state.figure_stack with ··· 115 115 match state.element_stack with 116 116 | [] -> () 117 117 | parent :: _ -> 118 - let parent_lower = String.lowercase_ascii parent in 118 + let parent_lower = Astring.String.Ascii.lowercase parent in 119 119 let trimmed = String.trim text in 120 120 if trimmed <> "" then begin 121 121 if parent_lower = "figure" then begin
+46 -13
lib/js/htmlrw_js_ui.ml
··· 6 6 open Brr 7 7 open Htmlrw_js_types 8 8 9 + let console_log msg = 10 + ignore (Jv.call (Jv.get Jv.global "console") "log" [| Jv.of_string ("[html5rw-ui] " ^ msg) |]) 11 + 9 12 module Css_class = struct 10 13 let panel = Jstr.v "html5rw-panel" 11 14 let panel_header = Jstr.v "html5rw-panel-header" ··· 71 74 let highlighted_element t = t.highlighted 72 75 73 76 let clear_highlight t = 77 + console_log (Printf.sprintf "clear_highlight: highlighted is %s" 78 + (if t.highlighted = None then "None" else "Some")); 74 79 match t.highlighted with 75 80 | Some el -> 81 + console_log "clear_highlight: unhighlighting element"; 76 82 Htmlrw_js_annotate.unhighlight_element el; 77 - t.highlighted <- None 78 - | None -> () 83 + t.highlighted <- None; 84 + console_log "clear_highlight: done" 85 + | None -> 86 + console_log "clear_highlight: nothing to clear" 79 87 80 88 let navigate_to_element t bm = 81 89 clear_highlight t; ··· 190 198 El.set_inline_style (Jstr.v "display") (Jstr.v "none") t.root 191 199 192 200 let destroy t = 201 + console_log "destroy: starting"; 202 + clear_highlight t; 203 + console_log "destroy: cleared highlight"; 204 + (* Clear _current_panel before removing element to avoid comparison issues *) 205 + (match !_current_panel with 206 + | Some p when p.root == t.root -> _current_panel := None 207 + | _ -> ()); 208 + console_log "destroy: cleared current_panel ref"; 193 209 El.remove t.root; 194 - if !_current_panel = Some t then _current_panel := None 210 + console_log "destroy: removed root element, done" 195 211 196 212 let hide_current () = 197 - match !_current_panel with Some t -> destroy t | None -> () 213 + console_log (Printf.sprintf "hide_current: current_panel is %s" 214 + (if !_current_panel = None then "None" else "Some")); 215 + match !_current_panel with 216 + | Some t -> 217 + console_log "hide_current: destroying existing panel"; 218 + destroy t 219 + | None -> 220 + console_log "hide_current: no panel to destroy" 198 221 199 222 let create ~config result = 223 + console_log (Printf.sprintf "create: starting with %d messages" (List.length result.messages)); 200 224 hide_current (); 225 + console_log "create: hide_current done"; 201 226 202 227 let _doc = G.document in 203 228 204 229 let title = El.v (Jstr.v "span") [El.txt' "HTML5 Validation"] in 205 230 206 - let collapse_btn = El.v (Jstr.v "button") ~at:[At.class' Css_class.collapse_btn] [ 207 - El.txt' "_" 208 - ] in 209 - 210 231 let close_btn = El.v (Jstr.v "button") ~at:[At.class' Css_class.close_btn] [ 211 232 El.txt' "x" 212 233 ] in 213 234 214 235 let header = El.v (Jstr.v "div") ~at:[At.class' Css_class.panel_header] [ 215 - title; collapse_btn; close_btn 236 + title; close_btn 216 237 ] in 217 238 218 239 let error_count = List.length (List.filter (fun bm -> ··· 268 289 269 290 update t result; 270 291 271 - ignore (Ev.listen Ev.click (fun _ -> toggle_collapsed t) (El.as_target collapse_btn)); 292 + (* Stop mousedown from bubbling to header (prevents drag interference) *) 293 + ignore (Ev.listen Ev.mousedown (fun ev -> 294 + console_log "close_btn: mousedown, stopping propagation"; 295 + Ev.stop_propagation ev 296 + ) (El.as_target close_btn)); 272 297 273 - ignore (Ev.listen Ev.click (fun _ -> 298 + ignore (Ev.listen Ev.click (fun ev -> 299 + console_log "close_btn: click handler starting"; 300 + Ev.stop_propagation ev; 301 + console_log "close_btn: stopped propagation, calling destroy"; 274 302 destroy t; 275 - match t.on_close with Some f -> f () | None -> () 303 + console_log "close_btn: destroy done, checking on_close callback"; 304 + (match t.on_close with Some f -> f () | None -> ()); 305 + console_log "close_btn: click handler done" 276 306 ) (El.as_target close_btn)); 277 307 278 308 if config.draggable then begin ··· 307 337 if config.start_collapsed then 308 338 El.set_class Css_class.panel_collapsed true root; 309 339 340 + console_log "create: appending panel to document body"; 310 341 El.append_children (Document.body G.document) [root]; 311 342 312 343 _current_panel := Some t; 344 + console_log "create: panel creation complete"; 313 345 t 314 346 315 347 let on_warning_click t f = t.on_warning_click <- Some f ··· 381 413 width: 24px; height: 24px; margin-left: 8px; 382 414 border: none; border-radius: 4px; 383 415 background: transparent; color: var(--html5rw-panel-text); 384 - cursor: pointer; font-size: 14px; line-height: 1; 416 + cursor: pointer; font-size: 14px; 417 + display: flex; align-items: center; justify-content: center; 385 418 } 386 419 387 420 .html5rw-panel-header button:hover { background: rgba(0, 0, 0, 0.1); }