OCaml HTML5 parser/serialiser based on Python's JustHTML

refactors

+96
lib/check/ancestor_tracker.ml
···
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 + SPDX-License-Identifier: MIT 4 + ---------------------------------------------------------------------------*) 5 + 6 + (** Generic ancestor tracking for DOM traversal. *) 7 + 8 + type 'data context = { 9 + name : string; 10 + data : 'data; 11 + } 12 + 13 + type 'data t = { 14 + mutable stack : 'data context list; 15 + } 16 + 17 + let create () = { stack = [] } 18 + 19 + let reset tracker = tracker.stack <- [] 20 + 21 + let push tracker name data = 22 + let name_lower = Astring.String.Ascii.lowercase name in 23 + let context = { name = name_lower; data } in 24 + tracker.stack <- context :: tracker.stack 25 + 26 + let pop tracker = 27 + match tracker.stack with 28 + | [] -> () (* Gracefully handle underflow *) 29 + | _ :: rest -> tracker.stack <- rest 30 + 31 + let peek tracker = 32 + match tracker.stack with 33 + | [] -> None 34 + | hd :: _ -> Some hd 35 + 36 + let depth tracker = List.length tracker.stack 37 + 38 + let has_ancestor tracker name = 39 + let name_lower = Astring.String.Ascii.lowercase name in 40 + List.exists (fun ctx -> String.equal ctx.name name_lower) tracker.stack 41 + 42 + let has_ancestor_with tracker predicate = 43 + List.exists (fun ctx -> predicate ctx.name ctx.data) tracker.stack 44 + 45 + let find_ancestor tracker name = 46 + let name_lower = Astring.String.Ascii.lowercase name in 47 + List.find_opt (fun ctx -> String.equal ctx.name name_lower) tracker.stack 48 + 49 + let find_ancestor_with tracker predicate = 50 + List.find_opt (fun ctx -> predicate ctx.name ctx.data) tracker.stack 51 + 52 + let get_all_ancestors tracker = tracker.stack 53 + 54 + let filter_ancestors tracker predicate = 55 + List.filter (fun ctx -> predicate ctx.name ctx.data) tracker.stack 56 + 57 + let exists = has_ancestor_with 58 + 59 + let for_all tracker predicate = 60 + List.for_all (fun ctx -> predicate ctx.name ctx.data) tracker.stack 61 + 62 + let iter tracker f = 63 + List.iter (fun ctx -> f ctx.name ctx.data) tracker.stack 64 + 65 + let fold tracker f init = 66 + List.fold_left (fun acc ctx -> f acc ctx.name ctx.data) init tracker.stack 67 + 68 + let get_parent = peek 69 + 70 + let get_parent_name tracker = 71 + match peek tracker with 72 + | Some ctx -> Some ctx.name 73 + | None -> None 74 + 75 + let get_parent_data tracker = 76 + match peek tracker with 77 + | Some ctx -> Some ctx.data 78 + | None -> None 79 + 80 + let has_any_ancestor tracker names = 81 + let names_lower = List.map Astring.String.Ascii.lowercase names in 82 + List.exists (fun ctx -> List.mem ctx.name names_lower) tracker.stack 83 + 84 + let find_first_matching tracker names = 85 + let names_lower = List.map Astring.String.Ascii.lowercase names in 86 + List.find_map (fun ctx -> 87 + if List.mem ctx.name names_lower then Some (ctx.name, ctx) 88 + else None 89 + ) tracker.stack 90 + 91 + let is_empty tracker = tracker.stack = [] 92 + 93 + let to_list = get_all_ancestors 94 + 95 + let to_name_list tracker = 96 + List.map (fun ctx -> ctx.name) tracker.stack
+236
lib/check/ancestor_tracker.mli
···
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 + SPDX-License-Identifier: MIT 4 + ---------------------------------------------------------------------------*) 5 + 6 + (** Generic ancestor tracking for DOM traversal. 7 + 8 + This module provides a generic stack-based ancestor tracker that can be 9 + used by various HTML checkers to track element nesting during DOM 10 + traversal. It is parameterized by the type of data stored per element. 11 + 12 + {2 Design} 13 + 14 + The tracker maintains a stack of element contexts, where each context 15 + contains: 16 + - The element name (string) 17 + - Custom data of type ['data] (checker-specific) 18 + 19 + The stack is automatically managed through [push] and [pop] operations 20 + that should be called in response to start_element and end_element events. 21 + 22 + {2 Usage Example} 23 + 24 + {[ 25 + (* Define custom data type *) 26 + type role_data = { 27 + explicit_roles : string list; 28 + implicit_role : string option; 29 + } 30 + 31 + (* Create tracker *) 32 + let tracker = Ancestor_tracker.create () 33 + 34 + (* Push element context *) 35 + let data = { explicit_roles = ["button"]; implicit_role = None } in 36 + Ancestor_tracker.push tracker "div" data; 37 + 38 + (* Query ancestors *) 39 + let has_button = Ancestor_tracker.exists tracker 40 + (fun name data -> List.mem "button" data.explicit_roles) 41 + in 42 + 43 + (* Pop when element closes *) 44 + Ancestor_tracker.pop tracker 45 + ]} 46 + *) 47 + 48 + (** {1 Types} *) 49 + 50 + (** Ancestor context containing element name and custom data. *) 51 + type 'data context = { 52 + name : string; 53 + (** Element name (lowercase). *) 54 + data : 'data; 55 + (** Checker-specific data. *) 56 + } 57 + 58 + (** Ancestor tracker state. *) 59 + type 'data t 60 + 61 + (** {1 Creation} *) 62 + 63 + val create : unit -> 'data t 64 + (** [create ()] creates a new empty ancestor tracker. *) 65 + 66 + val reset : 'data t -> unit 67 + (** [reset tracker] clears all ancestor contexts from the tracker. *) 68 + 69 + (** {1 Stack Operations} *) 70 + 71 + val push : 'data t -> string -> 'data -> unit 72 + (** [push tracker name data] pushes a new element context onto the stack. 73 + 74 + This should be called in [start_element] event handlers. 75 + 76 + @param tracker The ancestor tracker 77 + @param name The element name (will be lowercased) 78 + @param data Checker-specific data to associate with this element *) 79 + 80 + val pop : 'data t -> unit 81 + (** [pop tracker] pops the most recent element context from the stack. 82 + 83 + This should be called in [end_element] event handlers. 84 + 85 + @param tracker The ancestor tracker *) 86 + 87 + val peek : 'data t -> 'data context option 88 + (** [peek tracker] returns the most recent element context without removing it. 89 + 90 + @param tracker The ancestor tracker 91 + @return [Some context] if the stack is non-empty, [None] otherwise *) 92 + 93 + val depth : 'data t -> int 94 + (** [depth tracker] returns the current stack depth (number of ancestors). 95 + 96 + @param tracker The ancestor tracker 97 + @return The number of elements on the stack *) 98 + 99 + (** {1 Ancestor Queries} *) 100 + 101 + val has_ancestor : 'data t -> string -> bool 102 + (** [has_ancestor tracker name] checks if an element with the given name 103 + exists anywhere in the ancestor chain. 104 + 105 + @param tracker The ancestor tracker 106 + @param name The element name to search for (case-insensitive) 107 + @return [true] if an ancestor with this name exists *) 108 + 109 + val has_ancestor_with : 'data t -> (string -> 'data -> bool) -> bool 110 + (** [has_ancestor_with tracker predicate] checks if any ancestor satisfies 111 + the given predicate. 112 + 113 + @param tracker The ancestor tracker 114 + @param predicate Function that tests element name and data 115 + @return [true] if any ancestor satisfies the predicate *) 116 + 117 + val find_ancestor : 'data t -> string -> 'data context option 118 + (** [find_ancestor tracker name] finds the nearest ancestor with the given name. 119 + 120 + @param tracker The ancestor tracker 121 + @param name The element name to search for (case-insensitive) 122 + @return [Some context] for the nearest matching ancestor, [None] if not found *) 123 + 124 + val find_ancestor_with : 'data t -> (string -> 'data -> bool) -> 'data context option 125 + (** [find_ancestor_with tracker predicate] finds the nearest ancestor that 126 + satisfies the predicate. 127 + 128 + @param tracker The ancestor tracker 129 + @param predicate Function that tests element name and data 130 + @return [Some context] for the nearest matching ancestor, [None] if not found *) 131 + 132 + val get_all_ancestors : 'data t -> 'data context list 133 + (** [get_all_ancestors tracker] returns all ancestor contexts from nearest to root. 134 + 135 + @param tracker The ancestor tracker 136 + @return List of contexts, with the most recent first *) 137 + 138 + val filter_ancestors : 'data t -> (string -> 'data -> bool) -> 'data context list 139 + (** [filter_ancestors tracker predicate] returns all ancestors that satisfy 140 + the predicate. 141 + 142 + @param tracker The ancestor tracker 143 + @param predicate Function that tests element name and data 144 + @return List of matching contexts, from nearest to root *) 145 + 146 + val exists : 'data t -> (string -> 'data -> bool) -> bool 147 + (** [exists tracker predicate] checks if any ancestor satisfies the predicate. 148 + 149 + This is an alias for {!has_ancestor_with}. 150 + 151 + @param tracker The ancestor tracker 152 + @param predicate Function that tests element name and data 153 + @return [true] if any ancestor satisfies the predicate *) 154 + 155 + val for_all : 'data t -> (string -> 'data -> bool) -> bool 156 + (** [for_all tracker predicate] checks if all ancestors satisfy the predicate. 157 + 158 + @param tracker The ancestor tracker 159 + @param predicate Function that tests element name and data 160 + @return [true] if all ancestors satisfy the predicate (vacuously true for empty stack) *) 161 + 162 + val iter : 'data t -> (string -> 'data -> unit) -> unit 163 + (** [iter tracker f] applies function [f] to each ancestor from nearest to root. 164 + 165 + @param tracker The ancestor tracker 166 + @param f Function to apply to each ancestor *) 167 + 168 + val fold : 'data t -> ('acc -> string -> 'data -> 'acc) -> 'acc -> 'acc 169 + (** [fold tracker f init] folds over ancestors from nearest to root. 170 + 171 + @param tracker The ancestor tracker 172 + @param f Folding function 173 + @param init Initial accumulator value 174 + @return Final accumulator value *) 175 + 176 + (** {1 Parent Access} *) 177 + 178 + val get_parent : 'data t -> 'data context option 179 + (** [get_parent tracker] returns the immediate parent element context. 180 + 181 + This is equivalent to {!peek}. 182 + 183 + @param tracker The ancestor tracker 184 + @return [Some context] for the parent, [None] if at root *) 185 + 186 + val get_parent_name : 'data t -> string option 187 + (** [get_parent_name tracker] returns the immediate parent element name. 188 + 189 + @param tracker The ancestor tracker 190 + @return [Some name] for the parent, [None] if at root *) 191 + 192 + val get_parent_data : 'data t -> 'data option 193 + (** [get_parent_data tracker] returns the immediate parent's custom data. 194 + 195 + @param tracker The ancestor tracker 196 + @return [Some data] for the parent, [None] if at root *) 197 + 198 + (** {1 Multiple Ancestor Queries} *) 199 + 200 + val has_any_ancestor : 'data t -> string list -> bool 201 + (** [has_any_ancestor tracker names] checks if any of the given element names 202 + exists in the ancestor chain. 203 + 204 + @param tracker The ancestor tracker 205 + @param names List of element names to search for 206 + @return [true] if any name matches an ancestor *) 207 + 208 + val find_first_matching : 'data t -> string list -> (string * 'data context) option 209 + (** [find_first_matching tracker names] finds the nearest ancestor that matches 210 + any of the given names. 211 + 212 + @param tracker The ancestor tracker 213 + @param names List of element names to search for 214 + @return [Some (matched_name, context)] for the first match, [None] if no match *) 215 + 216 + (** {1 Stack Inspection} *) 217 + 218 + val is_empty : 'data t -> bool 219 + (** [is_empty tracker] checks if the stack is empty (at document root). 220 + 221 + @param tracker The ancestor tracker 222 + @return [true] if no elements are on the stack *) 223 + 224 + val to_list : 'data t -> 'data context list 225 + (** [to_list tracker] converts the stack to a list of contexts. 226 + 227 + This is an alias for {!get_all_ancestors}. 228 + 229 + @param tracker The ancestor tracker 230 + @return List of contexts from nearest to root *) 231 + 232 + val to_name_list : 'data t -> string list 233 + (** [to_name_list tracker] returns just the element names in the stack. 234 + 235 + @param tracker The ancestor tracker 236 + @return List of element names from nearest to root *)
+2 -2
lib/check/attr_utils.ml
··· 3 type attrs = (string * string) list 4 5 let has_attr name attrs = 6 - List.exists (fun (n, _) -> String.lowercase_ascii n = name) attrs 7 8 let get_attr name attrs = 9 List.find_map (fun (n, v) -> 10 - if String.lowercase_ascii n = name then Some v else None 11 ) attrs 12 13 let get_attr_or name ~default attrs =
··· 3 type attrs = (string * string) list 4 5 let has_attr name attrs = 6 + List.exists (fun (n, _) -> Astring.String.Ascii.lowercase n = name) attrs 7 8 let get_attr name attrs = 9 List.find_map (fun (n, v) -> 10 + if Astring.String.Ascii.lowercase n = name then Some v else None 11 ) attrs 12 13 let get_attr_or name ~default attrs =
+1
lib/check/checker_registry.ml
··· 36 Hashtbl.replace reg "xhtml-content" Xhtml_content_checker.checker; 37 Hashtbl.replace reg "lang-detecting" Lang_detecting_checker.checker; 38 Hashtbl.replace reg "unknown-element" Unknown_element_checker.checker; 39 reg 40 41 let register registry name checker = Hashtbl.replace registry name checker
··· 36 Hashtbl.replace reg "xhtml-content" Xhtml_content_checker.checker; 37 Hashtbl.replace reg "lang-detecting" Lang_detecting_checker.checker; 38 Hashtbl.replace reg "unknown-element" Unknown_element_checker.checker; 39 + Hashtbl.replace reg "content" Content_checker.checker; 40 reg 41 42 let register registry name checker = Hashtbl.replace registry name checker
+64 -7
lib/check/content_model/content_checker.ml
··· 30 | Some spec -> 31 List.exists (fun cat -> Element_spec.has_category spec cat) cats) 32 | Content_model.Elements names -> 33 - List.mem (String.lowercase_ascii element_name) 34 - (List.map String.lowercase_ascii names) 35 | Content_model.Mixed cats -> ( 36 match Element_registry.get registry element_name with 37 | None -> false ··· 79 (`Element (`Not_allowed_as_child (`Child name, `Parent prohibited)))) 80 spec.Element_spec.prohibited_ancestors 81 82 (* Validate that a child element is allowed *) 83 - let validate_child_element state child_name collector = 84 match state.ancestor_stack with 85 | [] -> 86 (* Root level - only html allowed *) 87 - if not (String.equal (String.lowercase_ascii child_name) "html") then 88 Message_collector.add_typed collector 89 (`Generic (Printf.sprintf "Element '%s' not allowed at document root (only 'html' allowed)" child_name)) 90 | parent :: _ -> 91 let content_model = parent.spec.Element_spec.content_model in 92 - if not (matches_content_model state.registry child_name content_model) then 93 Message_collector.add_typed collector 94 (`Element (`Not_allowed_as_child (`Child child_name, `Parent parent.name))) 95 96 let start_element state ~element collector = 97 let name = Tag.tag_to_string element.Element.tag in 98 99 (* Check if we're inside a foreign (SVG/MathML) context *) 100 let in_foreign_context = match state.ancestor_stack with ··· 127 match spec_opt with 128 | None -> 129 (* Unknown element - first check if it's allowed in current context *) 130 - validate_child_element state name collector 131 | Some spec -> 132 (* Check prohibited ancestors *) 133 check_prohibited_ancestors state name spec collector; 134 135 (* Validate this element is allowed as child of parent *) 136 - validate_child_element state name collector; 137 138 (* Push element context onto stack *) 139 let context = { name; spec; children_count = 0; is_foreign = false } in
··· 30 | Some spec -> 31 List.exists (fun cat -> Element_spec.has_category spec cat) cats) 32 | Content_model.Elements names -> 33 + List.mem (Astring.String.Ascii.lowercase element_name) 34 + (List.map Astring.String.Ascii.lowercase names) 35 | Content_model.Mixed cats -> ( 36 match Element_registry.get registry element_name with 37 | None -> false ··· 79 (`Element (`Not_allowed_as_child (`Child name, `Parent prohibited)))) 80 spec.Element_spec.prohibited_ancestors 81 82 + (* Check if element is allowed via permitted_parents *) 83 + let is_permitted_parent registry child_name parent_name = 84 + match Element_registry.get registry child_name with 85 + | None -> false 86 + | Some spec -> 87 + match spec.Element_spec.permitted_parents with 88 + | None -> false 89 + | Some parents -> 90 + List.mem (Astring.String.Ascii.lowercase parent_name) 91 + (List.map Astring.String.Ascii.lowercase parents) 92 + 93 + (* Check if a specific element is in the ancestor stack *) 94 + let has_ancestor state ancestor_name = 95 + List.exists (fun ctx -> 96 + String.equal (Astring.String.Ascii.lowercase ctx.name) 97 + (Astring.String.Ascii.lowercase ancestor_name) 98 + ) state.ancestor_stack 99 + 100 + (* Check if an attribute exists in raw attrs list *) 101 + let has_raw_attr name attrs = 102 + List.exists (fun (n, _) -> 103 + Astring.String.Ascii.lowercase n = Astring.String.Ascii.lowercase name 104 + ) attrs 105 + 106 + (* Special cases for content model validation: 107 + - dt/dd inside div is only valid when dl is an ancestor (div as grouping in dl) 108 + - meta with property/itemprop/name attribute in body is valid (RDFa/microdata) 109 + - link with itemprop in body is valid (microdata) *) 110 + let is_special_case_allowed state child_name parent_name raw_attrs = 111 + let child_lower = Astring.String.Ascii.lowercase child_name in 112 + let parent_lower = Astring.String.Ascii.lowercase parent_name in 113 + (* dt/dd inside div is allowed when dl is an ancestor *) 114 + if (child_lower = "dt" || child_lower = "dd") && parent_lower = "div" then 115 + has_ancestor state "dl" 116 + (* meta in body is allowed with property (RDFa), itemprop (microdata), or name+content (meta tags) *) 117 + else if child_lower = "meta" && parent_lower <> "head" then 118 + has_raw_attr "property" raw_attrs || 119 + has_raw_attr "itemprop" raw_attrs || 120 + (has_raw_attr "name" raw_attrs && has_raw_attr "content" raw_attrs) 121 + (* link in body is allowed with itemprop (microdata) or property (RDFa) *) 122 + else if child_lower = "link" && parent_lower <> "head" then 123 + has_raw_attr "itemprop" raw_attrs || has_raw_attr "property" raw_attrs 124 + (* Custom elements (with hyphen) are valid HTML5 and are flow content *) 125 + else if String.contains child_lower '-' then 126 + true 127 + else 128 + false 129 + 130 (* Validate that a child element is allowed *) 131 + let validate_child_element state child_name raw_attrs collector = 132 match state.ancestor_stack with 133 | [] -> 134 (* Root level - only html allowed *) 135 + if not (String.equal (Astring.String.Ascii.lowercase child_name) "html") then 136 Message_collector.add_typed collector 137 (`Generic (Printf.sprintf "Element '%s' not allowed at document root (only 'html' allowed)" child_name)) 138 | parent :: _ -> 139 let content_model = parent.spec.Element_spec.content_model in 140 + (* Check content model, permitted_parents, or special cases *) 141 + let allowed_by_content_model = matches_content_model state.registry child_name content_model in 142 + let allowed_by_permitted_parents = is_permitted_parent state.registry child_name parent.name in 143 + let allowed_by_special_case = is_special_case_allowed state child_name parent.name raw_attrs in 144 + if not (allowed_by_content_model || allowed_by_permitted_parents || allowed_by_special_case) then 145 Message_collector.add_typed collector 146 (`Element (`Not_allowed_as_child (`Child child_name, `Parent parent.name))) 147 148 let start_element state ~element collector = 149 let name = Tag.tag_to_string element.Element.tag in 150 + let raw_attrs = element.Element.raw_attrs in 151 152 (* Check if we're inside a foreign (SVG/MathML) context *) 153 let in_foreign_context = match state.ancestor_stack with ··· 180 match spec_opt with 181 | None -> 182 (* Unknown element - first check if it's allowed in current context *) 183 + validate_child_element state name raw_attrs collector; 184 + (* Push unknown element onto stack with default flow content model *) 185 + let spec = Element_spec.make ~name ~content_model:(Content_model.Categories [Content_category.Flow]) () in 186 + let context = { name; spec; children_count = 0; is_foreign = false } in 187 + state.ancestor_stack <- context :: state.ancestor_stack 188 | Some spec -> 189 (* Check prohibited ancestors *) 190 check_prohibited_ancestors state name spec collector; 191 192 (* Validate this element is allowed as child of parent *) 193 + validate_child_element state name raw_attrs collector; 194 195 (* Push element context onto stack *) 196 let context = { name; spec; children_count = 0; is_foreign = false } in
+2 -2
lib/check/content_model/element_registry.ml
··· 3 let create () = Hashtbl.create 128 4 5 let register registry spec = 6 - let name = String.lowercase_ascii spec.Element_spec.name in 7 Hashtbl.replace registry name spec 8 9 let get registry name = 10 - let name = String.lowercase_ascii name in 11 Hashtbl.find_opt registry name 12 13 let list_names registry =
··· 3 let create () = Hashtbl.create 128 4 5 let register registry spec = 6 + let name = Astring.String.Ascii.lowercase spec.Element_spec.name in 7 Hashtbl.replace registry name spec 8 9 let get registry name = 10 + let name = Astring.String.Ascii.lowercase name in 11 Hashtbl.find_opt registry name 12 13 let list_names registry =
+3 -1
lib/check/content_model/elements_embedded.ml
··· 31 () 32 33 let img = 34 Element_spec.make ~name:"img" ~void:true 35 - ~categories:[ Flow; Phrasing; Embedded; Palpable; Interactive ] 36 ~content_model:Nothing 37 ~attrs: 38 [
··· 31 () 32 33 let img = 34 + (* Note: img is only Interactive when it has usemap attribute; 35 + we omit Interactive from static categories since usemap is rare *) 36 Element_spec.make ~name:"img" ~void:true 37 + ~categories:[ Flow; Phrasing; Embedded; Palpable ] 38 ~content_model:Nothing 39 ~attrs: 40 [
+1 -1
lib/check/content_model/elements_form.ml
··· 97 let select = 98 Element_spec.make ~name:"select" 99 ~categories:[Flow; Phrasing; Interactive; Palpable] 100 - ~content_model:(Elements ["option"; "optgroup"; "script"; "template"]) 101 ~attrs:[ 102 Attr_spec.make "autocomplete" ~datatype:"autocomplete" (); 103 Attr_spec.make "disabled" ~datatype:"boolean" ();
··· 97 let select = 98 Element_spec.make ~name:"select" 99 ~categories:[Flow; Phrasing; Interactive; Palpable] 100 + ~content_model:(Elements ["option"; "optgroup"; "hr"; "script"; "template"]) 101 ~attrs:[ 102 Attr_spec.make "autocomplete" ~datatype:"autocomplete" (); 103 Attr_spec.make "disabled" ~datatype:"boolean" ();
-1
lib/check/content_model/elements_table.ml
··· 34 ~categories:[] 35 ~content_model:(Categories [ Flow ]) 36 ~permitted_parents:[ "table" ] 37 - ~prohibited_ancestors:[ "table" ] 38 ~attrs:[] () 39 40 let colgroup =
··· 34 ~categories:[] 35 ~content_model:(Categories [ Flow ]) 36 ~permitted_parents:[ "table" ] 37 ~attrs:[] () 38 39 let colgroup =
+4 -8
lib/check/datatype/datatype.ml
··· 32 | '0' .. '9' | 'a' .. 'f' | 'A' .. 'F' -> true 33 | _ -> false 34 35 - (** Case conversion *) 36 - 37 - let to_ascii_lowercase c = 38 - match c with 'A' .. 'Z' -> Char.chr (Char.code c + 32) | _ -> c 39 40 - let string_to_ascii_lowercase s = 41 - String.map to_ascii_lowercase s 42 43 (** String predicates *) 44 ··· 78 let make_enum ~name ~values ?(allow_empty = true) () : t = 79 (* Pre-compute hashtable for O(1) membership *) 80 let values_tbl = Hashtbl.create (List.length values) in 81 - List.iter (fun v -> Hashtbl.add values_tbl (String.lowercase_ascii v) ()) values; 82 let values_str = String.concat ", " (List.map (Printf.sprintf "'%s'") values) in 83 (module struct 84 let name = name 85 let validate s = 86 - let s_lower = string_to_ascii_lowercase s in 87 if (allow_empty && s = "") || Hashtbl.mem values_tbl s_lower then Ok () 88 else Error (Printf.sprintf "The value '%s' is not a valid %s value. Expected %s%s." 89 s name (if allow_empty then "empty string, " else "") values_str)
··· 32 | '0' .. '9' | 'a' .. 'f' | 'A' .. 'F' -> true 33 | _ -> false 34 35 + (** Case conversion - delegated to Astring *) 36 37 + (* Removed to_ascii_lowercase and string_to_ascii_lowercase - use Astring.String.Ascii.lowercase instead *) 38 39 (** String predicates *) 40 ··· 74 let make_enum ~name ~values ?(allow_empty = true) () : t = 75 (* Pre-compute hashtable for O(1) membership *) 76 let values_tbl = Hashtbl.create (List.length values) in 77 + List.iter (fun v -> Hashtbl.add values_tbl (Astring.String.Ascii.lowercase v) ()) values; 78 let values_str = String.concat ", " (List.map (Printf.sprintf "'%s'") values) in 79 (module struct 80 let name = name 81 let validate s = 82 + let s_lower = Astring.String.Ascii.lowercase s in 83 if (allow_empty && s = "") || Hashtbl.mem values_tbl s_lower then Ok () 84 else Error (Printf.sprintf "The value '%s' is not a valid %s value. Expected %s%s." 85 s name (if allow_empty then "empty string, " else "") values_str)
+1 -5
lib/check/datatype/datatype.mli
··· 54 55 (** {2 Case conversion} *) 56 57 - (** Convert an ASCII character to lowercase. *) 58 - val to_ascii_lowercase : char -> char 59 - 60 - (** Convert an ASCII string to lowercase. *) 61 - val string_to_ascii_lowercase : string -> string 62 63 (** {2 String predicates} *) 64
··· 54 55 (** {2 Case conversion} *) 56 57 + (** Case conversion functions removed - use Astring.String.Ascii.lowercase instead *) 58 59 (** {2 String predicates} *) 60
+2 -1
lib/check/datatype/dt_autocomplete.ml
··· 2 3 (* Use shared utilities from Datatype *) 4 let is_whitespace = Datatype.is_whitespace 5 - let to_ascii_lowercase = Datatype.to_ascii_lowercase 6 7 (* Use Astring for string operations *) 8 let is_prefix = Astring.String.is_prefix
··· 2 3 (* Use shared utilities from Datatype *) 4 let is_whitespace = Datatype.is_whitespace 5 + let to_ascii_lowercase c = 6 + match c with 'A' .. 'Z' -> Char.chr (Char.code c + 32) | _ -> c 7 8 (* Use Astring for string operations *) 9 let is_prefix = Astring.String.is_prefix
+2 -2
lib/check/datatype/dt_boolean.ml
··· 22 match s with 23 | "" | "true" | "false" -> Ok () 24 | _ -> 25 - let s_lower = Datatype.string_to_ascii_lowercase s in 26 - let attr_lower = Datatype.string_to_ascii_lowercase attr_name in 27 if s_lower = attr_lower then Ok () 28 else 29 Error
··· 22 match s with 23 | "" | "true" | "false" -> Ok () 24 | _ -> 25 + let s_lower = Astring.String.Ascii.lowercase s in 26 + let attr_lower = Astring.String.Ascii.lowercase attr_name in 27 if s_lower = attr_lower then Ok () 28 else 29 Error
+1 -1
lib/check/datatype/dt_button_type.ml
··· 7 let name = "button-type" 8 9 let validate s = 10 - let s_lower = Datatype.string_to_ascii_lowercase s in 11 if List.mem s_lower valid_types then Ok () 12 else 13 Error
··· 7 let name = "button-type" 8 9 let validate s = 10 + let s_lower = Astring.String.Ascii.lowercase s in 11 if List.mem s_lower valid_types then Ok () 12 else 13 Error
+1 -1
lib/check/datatype/dt_charset.ml
··· 6 c = '\'' || c = '+' || c = '_' || c = '`' || c = '{' || c = '}' || 7 c = '~' || c = '^' 8 9 - let to_lower = Datatype.string_to_ascii_lowercase 10 11 (** Common encoding labels recognized by WHATWG Encoding Standard. 12 This is a subset of the full list. *)
··· 6 c = '\'' || c = '+' || c = '_' || c = '`' || c = '{' || c = '}' || 7 c = '~' || c = '^' 8 9 + let to_lower = Astring.String.Ascii.lowercase 10 11 (** Common encoding labels recognized by WHATWG Encoding Standard. 12 This is a subset of the full list. *)
+1 -1
lib/check/datatype/dt_color.ml
··· 208 let name = "color" 209 210 let validate s = 211 - let s = String.trim s |> String.lowercase_ascii in 212 if String.length s = 0 then Error "Color value must not be empty" 213 else if List.mem s named_colors then Ok () 214 else if String.length s > 0 && s.[0] = '#' then validate_hex_color s
··· 208 let name = "color" 209 210 let validate s = 211 + let s = String.trim s |> Astring.String.Ascii.lowercase in 212 if String.length s = 0 then Error "Color value must not be empty" 213 else if List.mem s named_colors then Ok () 214 else if String.length s > 0 && s.[0] = '#' then validate_hex_color s
+1 -1
lib/check/datatype/dt_contenteditable.ml
··· 4 let name = "contenteditable" 5 6 let validate s = 7 - let s_lower = Datatype.string_to_ascii_lowercase s in 8 match s_lower with 9 | "" | "true" | "false" | "plaintext-only" -> Ok () 10 | _ ->
··· 4 let name = "contenteditable" 5 6 let validate s = 7 + let s_lower = Astring.String.Ascii.lowercase s in 8 match s_lower with 9 | "" | "true" | "false" | "plaintext-only" -> Ok () 10 | _ ->
+1 -1
lib/check/datatype/dt_crossorigin.ml
··· 4 let name = "crossorigin" 5 6 let validate s = 7 - let s_lower = Datatype.string_to_ascii_lowercase s in 8 match s_lower with 9 | "" | "anonymous" | "use-credentials" -> Ok () 10 | _ ->
··· 4 let name = "crossorigin" 5 6 let validate s = 7 + let s_lower = Astring.String.Ascii.lowercase s in 8 match s_lower with 9 | "" | "anonymous" | "use-credentials" -> Ok () 10 | _ ->
+1 -1
lib/check/datatype/dt_decoding.ml
··· 4 let name = "decoding" 5 6 let validate s = 7 - let s_lower = Datatype.string_to_ascii_lowercase s in 8 match s_lower with 9 | "" | "sync" | "async" | "auto" -> Ok () 10 | _ ->
··· 4 let name = "decoding" 5 6 let validate s = 7 + let s_lower = Astring.String.Ascii.lowercase s in 8 match s_lower with 9 | "" | "sync" | "async" | "auto" -> Ok () 10 | _ ->
+1 -1
lib/check/datatype/dt_dir.ml
··· 4 let name = "dir" 5 6 let validate s = 7 - let s_lower = Datatype.string_to_ascii_lowercase s in 8 match s_lower with 9 | "" | "ltr" | "rtl" | "auto" -> Ok () 10 | _ ->
··· 4 let name = "dir" 5 6 let validate s = 7 + let s_lower = Astring.String.Ascii.lowercase s in 8 match s_lower with 9 | "" | "ltr" | "rtl" | "auto" -> Ok () 10 | _ ->
+1 -1
lib/check/datatype/dt_draggable.ml
··· 4 let name = "draggable" 5 6 let validate s = 7 - let s_lower = Datatype.string_to_ascii_lowercase s in 8 match s_lower with 9 | "true" | "false" -> Ok () 10 | _ ->
··· 4 let name = "draggable" 5 6 let validate s = 7 + let s_lower = Astring.String.Ascii.lowercase s in 8 match s_lower with 9 | "true" | "false" -> Ok () 10 | _ ->
+1 -1
lib/check/datatype/dt_enterkeyhint.ml
··· 4 let name = "enterkeyhint" 5 6 let validate s = 7 - let s_lower = Datatype.string_to_ascii_lowercase s in 8 match s_lower with 9 | "" | "enter" | "done" | "go" | "next" | "previous" | "search" | "send" -> 10 Ok ()
··· 4 let name = "enterkeyhint" 5 6 let validate s = 7 + let s_lower = Astring.String.Ascii.lowercase s in 8 match s_lower with 9 | "" | "enter" | "done" | "go" | "next" | "previous" | "search" | "send" -> 10 Ok ()
+1 -1
lib/check/datatype/dt_fetchpriority.ml
··· 4 let name = "fetchpriority" 5 6 let validate s = 7 - let s_lower = Datatype.string_to_ascii_lowercase s in 8 match s_lower with 9 | "" | "high" | "low" | "auto" -> Ok () 10 | _ ->
··· 4 let name = "fetchpriority" 5 6 let validate s = 7 + let s_lower = Astring.String.Ascii.lowercase s in 8 match s_lower with 9 | "" | "high" | "low" | "auto" -> Ok () 10 | _ ->
+1 -1
lib/check/datatype/dt_form_enctype.ml
··· 12 let name = "form-enctype" 13 14 let validate s = 15 - let s_lower = Datatype.string_to_ascii_lowercase s in 16 if List.mem s_lower valid_enctypes then Ok () 17 else 18 Error
··· 12 let name = "form-enctype" 13 14 let validate s = 15 + let s_lower = Astring.String.Ascii.lowercase s in 16 if List.mem s_lower valid_enctypes then Ok () 17 else 18 Error
+1 -1
lib/check/datatype/dt_form_method.ml
··· 7 let name = "form-method" 8 9 let validate s = 10 - let s_lower = Datatype.string_to_ascii_lowercase s in 11 if List.mem s_lower valid_methods then Ok () 12 else 13 Error
··· 7 let name = "form-method" 8 9 let validate s = 10 + let s_lower = Astring.String.Ascii.lowercase s in 11 if List.mem s_lower valid_methods then Ok () 12 else 13 Error
+1 -1
lib/check/datatype/dt_hidden.ml
··· 4 let name = "hidden" 5 6 let validate s = 7 - let s_lower = Datatype.string_to_ascii_lowercase s in 8 match s_lower with 9 | "" | "hidden" | "until-found" -> Ok () 10 | _ ->
··· 4 let name = "hidden" 5 6 let validate s = 7 + let s_lower = Astring.String.Ascii.lowercase s in 8 match s_lower with 9 | "" | "hidden" | "until-found" -> Ok () 10 | _ ->
+1 -1
lib/check/datatype/dt_input_type.ml
··· 31 let name = "input-type" 32 33 let validate s = 34 - let s_lower = Datatype.string_to_ascii_lowercase s in 35 if List.mem s_lower valid_types then Ok () 36 else 37 Error
··· 31 let name = "input-type" 32 33 let validate s = 34 + let s_lower = Astring.String.Ascii.lowercase s in 35 if List.mem s_lower valid_types then Ok () 36 else 37 Error
+1 -1
lib/check/datatype/dt_inputmode.ml
··· 4 let name = "inputmode" 5 6 let validate s = 7 - let s_lower = Datatype.string_to_ascii_lowercase s in 8 match s_lower with 9 | "" | "none" | "text" | "decimal" | "numeric" | "tel" | "search" 10 | "email" | "url" ->
··· 4 let name = "inputmode" 5 6 let validate s = 7 + let s_lower = Astring.String.Ascii.lowercase s in 8 match s_lower with 9 | "" | "none" | "text" | "decimal" | "numeric" | "tel" | "search" 10 | "email" | "url" ->
+1 -1
lib/check/datatype/dt_integrity.ml
··· 49 "Hash value '%s' must be in format 'algorithm-base64hash'" trimmed) 50 | Some dash_pos -> 51 let algorithm = String.sub trimmed 0 dash_pos in 52 - let algorithm_lower = Datatype.string_to_ascii_lowercase algorithm in 53 if not (List.mem algorithm_lower valid_algorithms) then 54 Error 55 (Printf.sprintf
··· 49 "Hash value '%s' must be in format 'algorithm-base64hash'" trimmed) 50 | Some dash_pos -> 51 let algorithm = String.sub trimmed 0 dash_pos in 52 + let algorithm_lower = Astring.String.Ascii.lowercase algorithm in 53 if not (List.mem algorithm_lower valid_algorithms) then 54 Error 55 (Printf.sprintf
+1 -1
lib/check/datatype/dt_kind.ml
··· 4 let name = "kind" 5 6 let validate s = 7 - let s_lower = Datatype.string_to_ascii_lowercase s in 8 match s_lower with 9 | "subtitles" | "captions" | "descriptions" | "chapters" | "metadata" -> Ok () 10 | _ ->
··· 4 let name = "kind" 5 6 let validate s = 7 + let s_lower = Astring.String.Ascii.lowercase s in 8 match s_lower with 9 | "subtitles" | "captions" | "descriptions" | "chapters" | "metadata" -> Ok () 10 | _ ->
+1 -1
lib/check/datatype/dt_language.ml
··· 5 (* Use shared character predicates from Datatype *) 6 let is_all_alpha = Datatype.is_all_alpha 7 let is_all_alphanumeric = Datatype.is_all_alphanumeric 8 - let to_lower = Datatype.string_to_ascii_lowercase 9 10 (** Valid extlang subtags per IANA language-subtag-registry. 11 Extlangs are 3-letter subtags that follow the primary language.
··· 5 (* Use shared character predicates from Datatype *) 6 let is_all_alpha = Datatype.is_all_alpha 7 let is_all_alphanumeric = Datatype.is_all_alphanumeric 8 + let to_lower = Astring.String.Ascii.lowercase 9 10 (** Valid extlang subtags per IANA language-subtag-registry. 11 Extlangs are 3-letter subtags that follow the primary language.
+1 -1
lib/check/datatype/dt_list_type.ml
··· 26 let name = "ul-type" 27 28 let validate s = 29 - let s_lower = Datatype.string_to_ascii_lowercase s in 30 if List.mem s_lower valid_ul_types then Ok () 31 else 32 Error
··· 26 let name = "ul-type" 27 28 let validate s = 29 + let s_lower = Astring.String.Ascii.lowercase s in 30 if List.mem s_lower valid_ul_types then Ok () 31 else 32 Error
+1 -1
lib/check/datatype/dt_loading.ml
··· 4 let name = "loading" 5 6 let validate s = 7 - let s_lower = Datatype.string_to_ascii_lowercase s in 8 match s_lower with 9 | "" | "lazy" | "eager" -> Ok () 10 | _ ->
··· 4 let name = "loading" 5 6 let validate s = 7 + let s_lower = Astring.String.Ascii.lowercase s in 8 match s_lower with 9 | "" | "lazy" | "eager" -> Ok () 10 | _ ->
+8 -8
lib/check/datatype/dt_media_query.ml
··· 147 let trimmed = String.trim s in 148 if String.length trimmed >= 3 then begin 149 let suffix = String.sub trimmed (String.length trimmed - 3) 3 in 150 - if String.lowercase_ascii suffix = "and" then 151 Error "Parse Error." 152 else if String.length trimmed >= 4 then begin 153 let suffix4 = String.sub trimmed (String.length trimmed - 4) 4 in 154 - if String.lowercase_ascii suffix4 = "and(" then 155 Error "Parse Error." 156 else 157 validate_media_query_content trimmed ··· 197 let has_not = ref false in 198 (match read_ident () with 199 | Some w -> 200 - let w_lower = String.lowercase_ascii w in 201 if w_lower = "only" then (has_only := true; skip_ws ()) 202 else if w_lower = "not" then (has_not := true; skip_ws ()) 203 else i := !i - String.length w (* put back *) ··· 234 match read_ident () with 235 | None -> Error "Parse Error." 236 | Some kw -> 237 - let kw_lower = String.lowercase_ascii kw in 238 if kw_lower <> "and" then Error "Parse Error." 239 else begin 240 (* Check that there was whitespace before 'and' *) ··· 263 match read_ident () with 264 | None -> Error "Parse Error." 265 | Some kw2 -> 266 - let kw2_lower = String.lowercase_ascii kw2 in 267 if kw2_lower <> "and" then Error "Parse Error." 268 else begin 269 skip_ws (); ··· 291 match String.index_opt content ':' with 292 | None -> 293 (* Just feature name - boolean feature or range syntax *) 294 - let feature_lower = String.lowercase_ascii content in 295 if List.mem feature_lower deprecated_media_features then 296 Error (Printf.sprintf "Deprecated media feature \"%s\". For guidance, see the Deprecated Media Features section in the current Media Queries specification." feature_lower) 297 else if List.mem feature_lower valid_media_features then ··· 301 | Some colon_pos -> 302 let feature = String.trim (String.sub content 0 colon_pos) in 303 let value = String.trim (String.sub content (colon_pos + 1) (String.length content - colon_pos - 1)) in 304 - let feature_lower = String.lowercase_ascii feature in 305 306 (* Check for deprecated features *) 307 if List.mem feature_lower deprecated_media_features then ··· 362 else if unit_part = "" then 363 Error "only \"0\" can be a \"unit\". You must put a unit after your number" 364 else begin 365 - let unit_lower = String.lowercase_ascii unit_part in 366 if List.mem unit_lower valid_length_units then Ok () 367 else if List.mem unit_lower valid_resolution_units then 368 Error (Printf.sprintf "\"%s\" is not a \"%s\" value" value base_feature)
··· 147 let trimmed = String.trim s in 148 if String.length trimmed >= 3 then begin 149 let suffix = String.sub trimmed (String.length trimmed - 3) 3 in 150 + if Astring.String.Ascii.lowercase suffix = "and" then 151 Error "Parse Error." 152 else if String.length trimmed >= 4 then begin 153 let suffix4 = String.sub trimmed (String.length trimmed - 4) 4 in 154 + if Astring.String.Ascii.lowercase suffix4 = "and(" then 155 Error "Parse Error." 156 else 157 validate_media_query_content trimmed ··· 197 let has_not = ref false in 198 (match read_ident () with 199 | Some w -> 200 + let w_lower = Astring.String.Ascii.lowercase w in 201 if w_lower = "only" then (has_only := true; skip_ws ()) 202 else if w_lower = "not" then (has_not := true; skip_ws ()) 203 else i := !i - String.length w (* put back *) ··· 234 match read_ident () with 235 | None -> Error "Parse Error." 236 | Some kw -> 237 + let kw_lower = Astring.String.Ascii.lowercase kw in 238 if kw_lower <> "and" then Error "Parse Error." 239 else begin 240 (* Check that there was whitespace before 'and' *) ··· 263 match read_ident () with 264 | None -> Error "Parse Error." 265 | Some kw2 -> 266 + let kw2_lower = Astring.String.Ascii.lowercase kw2 in 267 if kw2_lower <> "and" then Error "Parse Error." 268 else begin 269 skip_ws (); ··· 291 match String.index_opt content ':' with 292 | None -> 293 (* Just feature name - boolean feature or range syntax *) 294 + let feature_lower = Astring.String.Ascii.lowercase content in 295 if List.mem feature_lower deprecated_media_features then 296 Error (Printf.sprintf "Deprecated media feature \"%s\". For guidance, see the Deprecated Media Features section in the current Media Queries specification." feature_lower) 297 else if List.mem feature_lower valid_media_features then ··· 301 | Some colon_pos -> 302 let feature = String.trim (String.sub content 0 colon_pos) in 303 let value = String.trim (String.sub content (colon_pos + 1) (String.length content - colon_pos - 1)) in 304 + let feature_lower = Astring.String.Ascii.lowercase feature in 305 306 (* Check for deprecated features *) 307 if List.mem feature_lower deprecated_media_features then ··· 362 else if unit_part = "" then 363 Error "only \"0\" can be a \"unit\". You must put a unit after your number" 364 else begin 365 + let unit_lower = Astring.String.Ascii.lowercase unit_part in 366 if List.mem unit_lower valid_length_units then Ok () 367 else if List.mem unit_lower valid_resolution_units then 368 Error (Printf.sprintf "\"%s\" is not a \"%s\" value" value base_feature)
+1 -1
lib/check/datatype/dt_mime.ml
··· 91 if is_token_char c then parse In_subtype (i + 1) 92 else if c = ';' then 93 (* Check if this is a JavaScript MIME type *) 94 - let mime_type = String.sub s 0 i |> String.lowercase_ascii in 95 if List.mem mime_type javascript_mime_types then 96 Error 97 "A JavaScript MIME type must not contain any characters after \
··· 91 if is_token_char c then parse In_subtype (i + 1) 92 else if c = ';' then 93 (* Check if this is a JavaScript MIME type *) 94 + let mime_type = String.sub s 0 i |> Astring.String.Ascii.lowercase in 95 if List.mem mime_type javascript_mime_types then 96 Error 97 "A JavaScript MIME type must not contain any characters after \
+1 -1
lib/check/datatype/dt_popover.ml
··· 4 let name = "popover" 5 6 let validate s = 7 - let s_lower = Datatype.string_to_ascii_lowercase s in 8 match s_lower with 9 | "" | "auto" | "manual" -> Ok () 10 | _ ->
··· 4 let name = "popover" 5 6 let validate s = 7 + let s_lower = Astring.String.Ascii.lowercase s in 8 match s_lower with 9 | "" | "auto" | "manual" -> Ok () 10 | _ ->
+1 -1
lib/check/datatype/dt_preload.ml
··· 7 let name = "preload" 8 9 let validate s = 10 - let s_lower = Datatype.string_to_ascii_lowercase s in 11 if List.mem s_lower valid_preloads then Ok () 12 else 13 Error
··· 7 let name = "preload" 8 9 let validate s = 10 + let s_lower = Astring.String.Ascii.lowercase s in 11 if List.mem s_lower valid_preloads then Ok () 12 else 13 Error
+1 -1
lib/check/datatype/dt_referrer.ml
··· 4 let name = "referrerpolicy" 5 6 let validate s = 7 - let s_lower = Datatype.string_to_ascii_lowercase s in 8 match s_lower with 9 | "" 10 | "no-referrer"
··· 4 let name = "referrerpolicy" 5 6 let validate s = 7 + let s_lower = Astring.String.Ascii.lowercase s in 8 match s_lower with 9 | "" 10 | "no-referrer"
+1 -1
lib/check/datatype/dt_scope.ml
··· 7 let name = "scope" 8 9 let validate s = 10 - let s_lower = Datatype.string_to_ascii_lowercase s in 11 if List.mem s_lower valid_scopes then Ok () 12 else 13 Error
··· 7 let name = "scope" 8 9 let validate s = 10 + let s_lower = Astring.String.Ascii.lowercase s in 11 if List.mem s_lower valid_scopes then Ok () 12 else 13 Error
+1 -1
lib/check/datatype/dt_shape.ml
··· 4 let name = "shape" 5 6 let validate s = 7 - let s_lower = Datatype.string_to_ascii_lowercase s in 8 match s_lower with 9 | "default" | "rect" | "circle" | "poly" -> Ok () 10 | _ ->
··· 4 let name = "shape" 5 6 let validate s = 7 + let s_lower = Astring.String.Ascii.lowercase s in 8 match s_lower with 9 | "default" | "rect" | "circle" | "poly" -> Ok () 10 | _ ->
+1 -1
lib/check/datatype/dt_spellcheck.ml
··· 4 let name = "spellcheck" 5 6 let validate s = 7 - let s_lower = Datatype.string_to_ascii_lowercase s in 8 match s_lower with 9 | "" | "true" | "false" -> Ok () 10 | _ ->
··· 4 let name = "spellcheck" 5 6 let validate s = 7 + let s_lower = Astring.String.Ascii.lowercase s in 8 match s_lower with 9 | "" | "true" | "false" -> Ok () 10 | _ ->
+1 -1
lib/check/datatype/dt_target.ml
··· 8 if String.length s = 0 then Error "Browsing context name must not be empty" 9 else if s.[0] = '_' then 10 (* If starts with underscore, must be a special keyword *) 11 - let lower = Datatype.string_to_ascii_lowercase s in 12 if List.mem lower special_keywords then Ok () 13 else 14 Error
··· 8 if String.length s = 0 then Error "Browsing context name must not be empty" 9 else if s.[0] = '_' then 10 (* If starts with underscore, must be a special keyword *) 11 + let lower = Astring.String.Ascii.lowercase s in 12 if List.mem lower special_keywords then Ok () 13 else 14 Error
+1 -1
lib/check/datatype/dt_translate.ml
··· 4 let name = "translate" 5 6 let validate s = 7 - let s_lower = Datatype.string_to_ascii_lowercase s in 8 match s_lower with 9 | "" | "yes" | "no" -> Ok () 10 | _ ->
··· 4 let name = "translate" 5 6 let validate s = 7 + let s_lower = Astring.String.Ascii.lowercase s in 8 match s_lower with 9 | "" | "yes" | "no" -> Ok () 10 | _ ->
+1 -1
lib/check/datatype/dt_url.ml
··· 30 match s.[i] with 31 | ':' -> 32 let scheme = 33 - String.sub s start (i - start) |> Datatype.string_to_ascii_lowercase 34 in 35 let rest = String.sub s (i + 1) (len - i - 1) in 36 Some (scheme, rest)
··· 30 match s.[i] with 31 | ':' -> 32 let scheme = 33 + String.sub s start (i - start) |> Astring.String.Ascii.lowercase 34 in 35 let rest = String.sub s (i + 1) (len - i - 1) in 36 Some (scheme, rest)
+1 -1
lib/check/datatype/dt_wrap.ml
··· 7 let name = "wrap" 8 9 let validate s = 10 - let s_lower = Datatype.string_to_ascii_lowercase s in 11 if List.mem s_lower valid_wraps then Ok () 12 else 13 Error
··· 7 let name = "wrap" 8 9 let validate s = 10 + let s_lower = Astring.String.Ascii.lowercase s in 11 if List.mem s_lower valid_wraps then Ok () 12 else 13 Error
+3 -3
lib/check/element/attr.ml
··· 571 572 (** Parse a single attribute name-value pair to typed attribute *) 573 let parse_attr name value : t = 574 - let name_lower = String.lowercase_ascii name in 575 - let value_lower = String.lowercase_ascii value in 576 match name_lower with 577 (* Global attributes *) 578 | "id" -> `Id value ··· 875 (** Get rel attribute as list of link types (space-separated, lowercased per HTML5 spec) *) 876 let get_rel_list attrs = 877 match get_rel attrs with 878 - | Some s -> List.map String.lowercase_ascii (Datatype.split_on_whitespace s) 879 | None -> [] 880 881 (** Get headers attribute as raw string *)
··· 571 572 (** Parse a single attribute name-value pair to typed attribute *) 573 let parse_attr name value : t = 574 + let name_lower = Astring.String.Ascii.lowercase name in 575 + let value_lower = Astring.String.Ascii.lowercase value in 576 match name_lower with 577 (* Global attributes *) 578 | "id" -> `Id value ··· 875 (** Get rel attribute as list of link types (space-separated, lowercased per HTML5 spec) *) 876 let get_rel_list attrs = 877 match get_rel attrs with 878 + | Some s -> List.map Astring.String.Ascii.lowercase (Datatype.split_on_whitespace s) 879 | None -> [] 880 881 (** Get headers attribute as raw string *)
+4 -4
lib/check/element/element.ml
··· 21 22 (** Parse element-specific type attribute based on tag *) 23 let parse_type_attr (tag : Tag.html_tag) value : Attr.t = 24 - let value_lower = String.lowercase_ascii value in 25 match tag with 26 | `Input -> 27 (match Attr.parse_input_type value_lower with ··· 42 (** Parse attributes with element context for type attribute *) 43 let parse_attrs_for_tag (tag : Tag.element_tag) (raw_attrs : (string * string) list) : Attr.t list = 44 List.map (fun (name, value) -> 45 - let name_lower = String.lowercase_ascii name in 46 if name_lower = "type" then 47 match tag with 48 | Tag.Html html_tag -> parse_type_attr html_tag value ··· 274 (** Get raw attribute value (from original attrs) *) 275 let get_raw_attr name elem = 276 List.find_map (fun (n, v) -> 277 - if String.lowercase_ascii n = String.lowercase_ascii name then Some v else None 278 ) elem.raw_attrs 279 280 (** Check if raw attribute exists *) 281 let has_raw_attr name elem = 282 List.exists (fun (n, _) -> 283 - String.lowercase_ascii n = String.lowercase_ascii name 284 ) elem.raw_attrs 285 286 (** {1 Pattern Matching Helpers} *)
··· 21 22 (** Parse element-specific type attribute based on tag *) 23 let parse_type_attr (tag : Tag.html_tag) value : Attr.t = 24 + let value_lower = Astring.String.Ascii.lowercase value in 25 match tag with 26 | `Input -> 27 (match Attr.parse_input_type value_lower with ··· 42 (** Parse attributes with element context for type attribute *) 43 let parse_attrs_for_tag (tag : Tag.element_tag) (raw_attrs : (string * string) list) : Attr.t list = 44 List.map (fun (name, value) -> 45 + let name_lower = Astring.String.Ascii.lowercase name in 46 if name_lower = "type" then 47 match tag with 48 | Tag.Html html_tag -> parse_type_attr html_tag value ··· 274 (** Get raw attribute value (from original attrs) *) 275 let get_raw_attr name elem = 276 List.find_map (fun (n, v) -> 277 + if Astring.String.Ascii.lowercase n = Astring.String.Ascii.lowercase name then Some v else None 278 ) elem.raw_attrs 279 280 (** Check if raw attribute exists *) 281 let has_raw_attr name elem = 282 List.exists (fun (n, _) -> 283 + Astring.String.Ascii.lowercase n = Astring.String.Ascii.lowercase name 284 ) elem.raw_attrs 285 286 (** {1 Pattern Matching Helpers} *)
+3 -3
lib/check/element/tag.ml
··· 234 (** Check if a name is a valid custom element name (contains hyphen, not reserved) *) 235 let is_custom_element_name name = 236 String.contains name '-' && 237 - not (String.starts_with ~prefix:"xml" (String.lowercase_ascii name)) && 238 - not (String.equal (String.lowercase_ascii name) "annotation-xml") 239 240 (** SVG namespace URI *) 241 let svg_namespace = "http://www.w3.org/2000/svg" ··· 255 256 (** Convert tag name and optional namespace to element_tag *) 257 let tag_of_string ?namespace name = 258 - let name_lower = String.lowercase_ascii name in 259 match namespace with 260 | Some ns when is_svg_namespace ns -> Svg name (* Preserve original case for SVG *) 261 | Some ns when is_mathml_namespace ns -> MathML name (* Preserve original case for MathML *)
··· 234 (** Check if a name is a valid custom element name (contains hyphen, not reserved) *) 235 let is_custom_element_name name = 236 String.contains name '-' && 237 + not (String.starts_with ~prefix:"xml" (Astring.String.Ascii.lowercase name)) && 238 + not (String.equal (Astring.String.Ascii.lowercase name) "annotation-xml") 239 240 (** SVG namespace URI *) 241 let svg_namespace = "http://www.w3.org/2000/svg" ··· 255 256 (** Convert tag name and optional namespace to element_tag *) 257 let tag_of_string ?namespace name = 258 + let name_lower = Astring.String.Ascii.lowercase name in 259 match namespace with 260 | Some ns when is_svg_namespace ns -> Svg name (* Preserve original case for SVG *) 261 | Some ns when is_mathml_namespace ns -> MathML name (* Preserve original case for MathML *)
+1 -1
lib/check/semantic/form_checker.ml
··· 12 13 (** Check if autocomplete value contains webauthn token *) 14 let contains_webauthn value = 15 - let lower = String.lowercase_ascii value in 16 let tokens = String.split_on_char ' ' lower |> List.filter (fun s -> String.length s > 0) in 17 List.mem "webauthn" tokens 18
··· 12 13 (** Check if autocomplete value contains webauthn token *) 14 let contains_webauthn value = 15 + let lower = Astring.String.Ascii.lowercase value in 16 let tokens = String.split_on_char ' ' lower |> List.filter (fun s -> String.length s > 0) in 17 List.mem "webauthn" tokens 18
+2 -2
lib/check/semantic/lang_detecting_checker.ml
··· 54 let get_lang_code lang = 55 (* Extract primary language subtag *) 56 match String.split_on_char '-' lang with 57 - | code :: _ -> String.lowercase_ascii code 58 | [] -> "" 59 60 (* Create detector lazily with deterministic seed *) ··· 324 | None -> 325 Message_collector.add_typed collector 326 (`I18n (`Missing_dir_rtl (`Language detected_name))) 327 - | Some dir when String.lowercase_ascii dir <> "rtl" -> 328 Message_collector.add_typed collector 329 (`I18n (`Wrong_dir (`Language detected_name, `Declared dir))) 330 | _ -> ()
··· 54 let get_lang_code lang = 55 (* Extract primary language subtag *) 56 match String.split_on_char '-' lang with 57 + | code :: _ -> Astring.String.Ascii.lowercase code 58 | [] -> "" 59 60 (* Create detector lazily with deterministic seed *) ··· 324 | None -> 325 Message_collector.add_typed collector 326 (`I18n (`Missing_dir_rtl (`Language detected_name))) 327 + | Some dir when Astring.String.Ascii.lowercase dir <> "rtl" -> 328 Message_collector.add_typed collector 329 (`I18n (`Wrong_dir (`Language detected_name, `Declared dir))) 330 | _ -> ()
+11 -13
lib/check/semantic/nesting_checker.ml
··· 190 state.ancestor_flags <- empty_flags () 191 192 (** Get attribute value by name from attribute list. *) 193 - let get_attr attrs name = 194 - List.assoc_opt name attrs 195 196 (** Check if an attribute exists. *) 197 - let has_attr attrs name = 198 - get_attr attrs name <> None 199 200 (** Check if element is interactive based on its attributes. *) 201 let is_interactive_element name attrs = 202 match name with 203 - | "a" -> has_attr attrs "href" 204 - | "audio" | "video" -> has_attr attrs "controls" 205 - | "img" | "object" -> has_attr attrs "usemap" 206 | "input" -> 207 - (match get_attr attrs "type" with 208 | Some "hidden" -> false 209 | _ -> true) 210 | "button" | "details" | "embed" | "iframe" | "label" | "select" ··· 239 (* Determine attribute to mention in error messages *) 240 let attr = 241 match name with 242 - | "a" when has_attr attrs "href" -> Some "href" 243 - | "audio" when has_attr attrs "controls" -> Some "controls" 244 - | "video" when has_attr attrs "controls" -> Some "controls" 245 - | "img" when has_attr attrs "usemap" -> Some "usemap" 246 - | "object" when has_attr attrs "usemap" -> Some "usemap" 247 | _ -> None 248 in 249
··· 190 state.ancestor_flags <- empty_flags () 191 192 (** Get attribute value by name from attribute list. *) 193 + let get_attr = Attr_utils.get_attr 194 195 (** Check if an attribute exists. *) 196 + let has_attr = Attr_utils.has_attr 197 198 (** Check if element is interactive based on its attributes. *) 199 let is_interactive_element name attrs = 200 match name with 201 + | "a" -> has_attr "href" attrs 202 + | "audio" | "video" -> has_attr "controls" attrs 203 + | "img" | "object" -> has_attr "usemap" attrs 204 | "input" -> 205 + (match get_attr "type" attrs with 206 | Some "hidden" -> false 207 | _ -> true) 208 | "button" | "details" | "embed" | "iframe" | "label" | "select" ··· 237 (* Determine attribute to mention in error messages *) 238 let attr = 239 match name with 240 + | "a" when has_attr "href" attrs -> Some "href" 241 + | "audio" when has_attr "controls" attrs -> Some "controls" 242 + | "video" when has_attr "controls" attrs -> Some "controls" 243 + | "img" when has_attr "usemap" attrs -> Some "usemap" 244 + | "object" when has_attr "usemap" attrs -> Some "usemap" 245 | _ -> None 246 in 247
+2 -2
lib/check/semantic/obsolete_checker.ml
··· 260 match element.Element.tag with 261 | Tag.Html _ -> 262 let name = Tag.tag_to_string element.tag in 263 - let name_lower = String.lowercase_ascii name in 264 let attrs = element.raw_attrs in 265 266 (* Track head context *) ··· 275 276 (* Check for obsolete attributes *) 277 List.iter (fun (attr_name, _attr_value) -> 278 - let attr_lower = String.lowercase_ascii attr_name in 279 280 (* Special handling for scoped attribute on style *) 281 if attr_lower = "scoped" && name_lower = "style" then begin
··· 260 match element.Element.tag with 261 | Tag.Html _ -> 262 let name = Tag.tag_to_string element.tag in 263 + let name_lower = Astring.String.Ascii.lowercase name in 264 let attrs = element.raw_attrs in 265 266 (* Track head context *) ··· 275 276 (* Check for obsolete attributes *) 277 List.iter (fun (attr_name, _attr_value) -> 278 + let attr_lower = Astring.String.Ascii.lowercase attr_name in 279 280 (* Special handling for scoped attribute on style *) 281 if attr_lower = "scoped" && name_lower = "style" then begin
+1 -1
lib/check/semantic/required_attr_checker.ml
··· 120 (* popover attribute must have valid value *) 121 match Attr_utils.get_attr "popover" attrs with 122 | Some value -> 123 - let value_lower = String.lowercase_ascii value in 124 (* Valid values: empty string, auto, manual, hint *) 125 if value_lower <> "" && value_lower <> "auto" && value_lower <> "manual" && value_lower <> "hint" then 126 Message_collector.add_typed collector
··· 120 (* popover attribute must have valid value *) 121 match Attr_utils.get_attr "popover" attrs with 122 | Some value -> 123 + let value_lower = Astring.String.Ascii.lowercase value in 124 (* Valid values: empty string, auto, manual, hint *) 125 if value_lower <> "" && value_lower <> "auto" && value_lower <> "manual" && value_lower <> "hint" then 126 Message_collector.add_typed collector
+34 -34
lib/check/specialized/aria_checker.ml
··· 309 else 310 String.split_on_char ' ' trimmed 311 |> List.filter (fun s -> String.trim s <> "") 312 - |> List.map String.lowercase_ascii 313 314 (** Get the implicit role for an HTML element. *) 315 let get_implicit_role element_name attrs = 316 (* Check for input element with type attribute *) 317 if element_name = "input" then begin 318 - match List.assoc_opt "type" attrs with 319 | Some input_type -> 320 - let input_type = String.lowercase_ascii input_type in 321 begin match Hashtbl.find_opt input_types_with_implicit_role input_type with 322 | Some role -> Some role 323 | None -> ··· 332 end 333 (* Check for area element - implicit role depends on href attribute *) 334 else if element_name = "area" then begin 335 - match List.assoc_opt "href" attrs with 336 | Some _ -> Some "link" (* area with href has implicit role "link" *) 337 | None -> Some "generic" (* area without href has no corresponding role, treated as generic *) 338 end 339 (* Check for a element - implicit role depends on href attribute *) 340 else if element_name = "a" then begin 341 - match List.assoc_opt "href" attrs with 342 | Some _ -> Some "link" (* a with href has implicit role "link" *) 343 | None -> Some "generic" (* a without href has no corresponding role, treated as generic *) 344 end ··· 430 match element.Element.tag with 431 | Tag.Html _ -> 432 let name = Tag.tag_to_string element.tag in 433 - let name_lower = String.lowercase_ascii name in 434 let attrs = element.raw_attrs in 435 - let role_attr = List.assoc_opt "role" attrs in 436 - let aria_label = List.assoc_opt "aria-label" attrs in 437 - let aria_labelledby = List.assoc_opt "aria-labelledby" attrs in 438 - let aria_braillelabel = List.assoc_opt "aria-braillelabel" attrs in 439 let has_aria_label = match aria_label with Some v -> String.trim v <> "" | None -> false in 440 let has_aria_labelledby = match aria_labelledby with Some v -> String.trim v <> "" | None -> false in 441 let has_aria_braillelabel = match aria_braillelabel with Some v -> String.trim v <> "" | None -> false in ··· 459 460 (* Track active tabs and tabpanel roles for end_document validation *) 461 if List.mem "tab" explicit_roles then begin 462 - let aria_selected = List.assoc_opt "aria-selected" attrs in 463 if aria_selected = Some "true" then state.has_active_tab <- true 464 end; 465 if List.mem "tabpanel" explicit_roles then state.has_tabpanel <- true; 466 467 (* Track visible main elements (explicit role=main or implicit main role) *) 468 let is_hidden = 469 - let aria_hidden = List.assoc_opt "aria-hidden" attrs in 470 aria_hidden = Some "true" 471 in 472 if not is_hidden then begin ··· 489 (* Check br/wbr aria-* attribute restrictions - not allowed *) 490 if name_lower = "br" || name_lower = "wbr" then begin 491 List.iter (fun (attr_name, _) -> 492 - let attr_lower = String.lowercase_ascii attr_name in 493 if String.starts_with ~prefix:"aria-" attr_lower && 494 attr_lower <> "aria-hidden" then 495 Message_collector.add_typed collector ··· 515 516 (* Check for img with empty alt having role attribute *) 517 if name_lower = "img" then begin 518 - let alt_value = List.assoc_opt "alt" attrs in 519 match alt_value with 520 | Some alt when String.trim alt = "" -> 521 (* img with empty alt must not have role attribute *) ··· 526 527 (* Check for input[type=checkbox][role=button] requires aria-pressed *) 528 if name_lower = "input" then begin 529 - let input_type = match List.assoc_opt "type" attrs with 530 - | Some t -> String.lowercase_ascii t 531 | None -> "text" 532 in 533 if input_type = "checkbox" && List.mem "button" explicit_roles then begin 534 - let has_aria_pressed = List.assoc_opt "aria-pressed" attrs <> None in 535 if not has_aria_pressed then 536 Message_collector.add_typed collector (`Input `Checkbox_needs_aria_pressed) 537 end ··· 566 567 (* Check for aria-hidden="true" on body element *) 568 if name_lower = "body" then begin 569 - let aria_hidden = List.assoc_opt "aria-hidden" attrs in 570 match aria_hidden with 571 | Some "true" -> 572 Message_collector.add_typed collector (`Aria `Hidden_on_body) ··· 574 end; 575 576 (* Check for aria-checked on input[type=checkbox] *) 577 - let aria_checked = List.assoc_opt "aria-checked" attrs in 578 if name_lower = "input" then begin 579 - match List.assoc_opt "type" attrs with 580 - | Some input_type when String.lowercase_ascii input_type = "checkbox" -> 581 if aria_checked <> None then 582 Message_collector.add_typed collector 583 (`Aria (`Must_not_use (`Attr "aria-checked", `Elem "input", ··· 586 end; 587 588 (* Check for aria-expanded on roles that don't support it *) 589 - let aria_expanded = List.assoc_opt "aria-expanded" attrs in 590 if aria_expanded <> None then begin 591 let role_to_check = match explicit_roles with 592 | first :: _ -> Some first ··· 605 (* Special message for input[type=text] with role="textbox" *) 606 let reason = 607 if name_lower = "input" && first_role = "textbox" then begin 608 - let has_list = List.exists (fun (k, _) -> String.lowercase_ascii k = "list") attrs in 609 - let input_type = match List.assoc_opt "type" attrs with 610 - | Some t -> String.lowercase_ascii t 611 | None -> "text" 612 in 613 if not has_list && input_type = "text" then ··· 671 672 (* Check for redundant default ARIA attribute values *) 673 List.iter (fun (attr_name, attr_value) -> 674 - let attr_lower = String.lowercase_ascii attr_name in 675 if String.starts_with ~prefix:"aria-" attr_lower then 676 match Hashtbl.find_opt aria_default_values attr_lower with 677 | Some default_value -> 678 - let value_lower = String.lowercase_ascii (String.trim attr_value) in 679 if value_lower = default_value then 680 Message_collector.add_typed collector 681 (`Generic (Printf.sprintf ··· 688 if name_lower = "summary" then begin 689 let parent = get_parent_element state in 690 let is_in_details = parent = Some "details" in 691 - let has_role_attr = List.exists (fun (k, _) -> String.lowercase_ascii k = "role") attrs in 692 - let has_aria_expanded = List.assoc_opt "aria-expanded" attrs <> None in 693 - let has_aria_pressed = List.assoc_opt "aria-pressed" attrs <> None in 694 if is_in_details then begin 695 (* summary that is the first child of details *) 696 if has_role_attr then ··· 726 (* Custom elements (autonomous custom elements) have generic role by default 727 and cannot have accessible names unless they have an explicit role *) 728 let attrs = element.raw_attrs in 729 - let role_attr = List.assoc_opt "role" attrs in 730 - let aria_label = List.assoc_opt "aria-label" attrs in 731 - let aria_labelledby = List.assoc_opt "aria-labelledby" attrs in 732 - let aria_braillelabel = List.assoc_opt "aria-braillelabel" attrs in 733 let has_aria_label = match aria_label with Some v -> String.trim v <> "" | None -> false in 734 let has_aria_labelledby = match aria_labelledby with Some v -> String.trim v <> "" | None -> false in 735 let has_aria_braillelabel = match aria_braillelabel with Some v -> String.trim v <> "" | None -> false in
··· 309 else 310 String.split_on_char ' ' trimmed 311 |> List.filter (fun s -> String.trim s <> "") 312 + |> List.map Astring.String.Ascii.lowercase 313 314 (** Get the implicit role for an HTML element. *) 315 let get_implicit_role element_name attrs = 316 (* Check for input element with type attribute *) 317 if element_name = "input" then begin 318 + match Attr_utils.get_attr "type" attrs with 319 | Some input_type -> 320 + let input_type = Astring.String.Ascii.lowercase input_type in 321 begin match Hashtbl.find_opt input_types_with_implicit_role input_type with 322 | Some role -> Some role 323 | None -> ··· 332 end 333 (* Check for area element - implicit role depends on href attribute *) 334 else if element_name = "area" then begin 335 + match Attr_utils.get_attr "href" attrs with 336 | Some _ -> Some "link" (* area with href has implicit role "link" *) 337 | None -> Some "generic" (* area without href has no corresponding role, treated as generic *) 338 end 339 (* Check for a element - implicit role depends on href attribute *) 340 else if element_name = "a" then begin 341 + match Attr_utils.get_attr "href" attrs with 342 | Some _ -> Some "link" (* a with href has implicit role "link" *) 343 | None -> Some "generic" (* a without href has no corresponding role, treated as generic *) 344 end ··· 430 match element.Element.tag with 431 | Tag.Html _ -> 432 let name = Tag.tag_to_string element.tag in 433 + let name_lower = Astring.String.Ascii.lowercase name in 434 let attrs = element.raw_attrs in 435 + let role_attr = Attr_utils.get_attr "role" attrs in 436 + let aria_label = Attr_utils.get_attr "aria-label" attrs in 437 + let aria_labelledby = Attr_utils.get_attr "aria-labelledby" attrs in 438 + let aria_braillelabel = Attr_utils.get_attr "aria-braillelabel" attrs in 439 let has_aria_label = match aria_label with Some v -> String.trim v <> "" | None -> false in 440 let has_aria_labelledby = match aria_labelledby with Some v -> String.trim v <> "" | None -> false in 441 let has_aria_braillelabel = match aria_braillelabel with Some v -> String.trim v <> "" | None -> false in ··· 459 460 (* Track active tabs and tabpanel roles for end_document validation *) 461 if List.mem "tab" explicit_roles then begin 462 + let aria_selected = Attr_utils.get_attr "aria-selected" attrs in 463 if aria_selected = Some "true" then state.has_active_tab <- true 464 end; 465 if List.mem "tabpanel" explicit_roles then state.has_tabpanel <- true; 466 467 (* Track visible main elements (explicit role=main or implicit main role) *) 468 let is_hidden = 469 + let aria_hidden = Attr_utils.get_attr "aria-hidden" attrs in 470 aria_hidden = Some "true" 471 in 472 if not is_hidden then begin ··· 489 (* Check br/wbr aria-* attribute restrictions - not allowed *) 490 if name_lower = "br" || name_lower = "wbr" then begin 491 List.iter (fun (attr_name, _) -> 492 + let attr_lower = Astring.String.Ascii.lowercase attr_name in 493 if String.starts_with ~prefix:"aria-" attr_lower && 494 attr_lower <> "aria-hidden" then 495 Message_collector.add_typed collector ··· 515 516 (* Check for img with empty alt having role attribute *) 517 if name_lower = "img" then begin 518 + let alt_value = Attr_utils.get_attr "alt" attrs in 519 match alt_value with 520 | Some alt when String.trim alt = "" -> 521 (* img with empty alt must not have role attribute *) ··· 526 527 (* Check for input[type=checkbox][role=button] requires aria-pressed *) 528 if name_lower = "input" then begin 529 + let input_type = match Attr_utils.get_attr "type" attrs with 530 + | Some t -> Astring.String.Ascii.lowercase t 531 | None -> "text" 532 in 533 if input_type = "checkbox" && List.mem "button" explicit_roles then begin 534 + let has_aria_pressed = Attr_utils.has_attr "aria-pressed" attrs in 535 if not has_aria_pressed then 536 Message_collector.add_typed collector (`Input `Checkbox_needs_aria_pressed) 537 end ··· 566 567 (* Check for aria-hidden="true" on body element *) 568 if name_lower = "body" then begin 569 + let aria_hidden = Attr_utils.get_attr "aria-hidden" attrs in 570 match aria_hidden with 571 | Some "true" -> 572 Message_collector.add_typed collector (`Aria `Hidden_on_body) ··· 574 end; 575 576 (* Check for aria-checked on input[type=checkbox] *) 577 + let aria_checked = Attr_utils.get_attr "aria-checked" attrs in 578 if name_lower = "input" then begin 579 + match Attr_utils.get_attr "type" attrs with 580 + | Some input_type when Astring.String.Ascii.lowercase input_type = "checkbox" -> 581 if aria_checked <> None then 582 Message_collector.add_typed collector 583 (`Aria (`Must_not_use (`Attr "aria-checked", `Elem "input", ··· 586 end; 587 588 (* Check for aria-expanded on roles that don't support it *) 589 + let aria_expanded = Attr_utils.get_attr "aria-expanded" attrs in 590 if aria_expanded <> None then begin 591 let role_to_check = match explicit_roles with 592 | first :: _ -> Some first ··· 605 (* Special message for input[type=text] with role="textbox" *) 606 let reason = 607 if name_lower = "input" && first_role = "textbox" then begin 608 + let has_list = Attr_utils.has_attr "list" attrs in 609 + let input_type = match Attr_utils.get_attr "type" attrs with 610 + | Some t -> Astring.String.Ascii.lowercase t 611 | None -> "text" 612 in 613 if not has_list && input_type = "text" then ··· 671 672 (* Check for redundant default ARIA attribute values *) 673 List.iter (fun (attr_name, attr_value) -> 674 + let attr_lower = Astring.String.Ascii.lowercase attr_name in 675 if String.starts_with ~prefix:"aria-" attr_lower then 676 match Hashtbl.find_opt aria_default_values attr_lower with 677 | Some default_value -> 678 + let value_lower = Astring.String.Ascii.lowercase (String.trim attr_value) in 679 if value_lower = default_value then 680 Message_collector.add_typed collector 681 (`Generic (Printf.sprintf ··· 688 if name_lower = "summary" then begin 689 let parent = get_parent_element state in 690 let is_in_details = parent = Some "details" in 691 + let has_role_attr = Attr_utils.has_attr "role" attrs in 692 + let has_aria_expanded = Attr_utils.has_attr "aria-expanded" attrs in 693 + let has_aria_pressed = Attr_utils.has_attr "aria-pressed" attrs in 694 if is_in_details then begin 695 (* summary that is the first child of details *) 696 if has_role_attr then ··· 726 (* Custom elements (autonomous custom elements) have generic role by default 727 and cannot have accessible names unless they have an explicit role *) 728 let attrs = element.raw_attrs in 729 + let role_attr = Attr_utils.get_attr "role" attrs in 730 + let aria_label = Attr_utils.get_attr "aria-label" attrs in 731 + let aria_labelledby = Attr_utils.get_attr "aria-labelledby" attrs in 732 + let aria_braillelabel = Attr_utils.get_attr "aria-braillelabel" attrs in 733 let has_aria_label = match aria_label with Some v -> String.trim v <> "" | None -> false in 734 let has_aria_labelledby = match aria_labelledby with Some v -> String.trim v <> "" | None -> false in 735 let has_aria_braillelabel = match aria_braillelabel with Some v -> String.trim v <> "" | None -> false in
+21 -21
lib/check/specialized/attr_restrictions_checker.ml
··· 58 match element.Element.tag with 59 | Tag.Html _ -> 60 let name = Tag.tag_to_string element.tag in 61 - let name_lower = String.lowercase_ascii name in 62 let attrs = element.raw_attrs in 63 64 (* Detect XHTML mode from xmlns attribute on html element *) ··· 86 (* Check for xmlns:* prefixed attributes - not allowed in HTML *) 87 (* Standard xmlns declarations are allowed but custom prefixes are not *) 88 List.iter (fun (attr_name, _) -> 89 - let attr_lower = String.lowercase_ascii attr_name in 90 if String.starts_with ~prefix:"xmlns:" attr_lower then begin 91 let prefix = String.sub attr_lower 6 (String.length attr_lower - 6) in 92 (* Only xmlns:xlink (with correct value) and xmlns:xml are allowed *) ··· 113 (* Validate style type attribute - must be "text/css" or omitted *) 114 if name_lower = "style" then begin 115 List.iter (fun (attr_name, attr_value) -> 116 - let attr_lower = String.lowercase_ascii attr_name in 117 if attr_lower = "type" then begin 118 - let value_lower = String.lowercase_ascii (String.trim attr_value) in 119 if value_lower <> "text/css" then 120 Message_collector.add_typed collector (`Misc `Style_type_invalid) 121 end ··· 144 (* imagesrcset requires as="image" *) 145 if has_imagesrcset then begin 146 let as_is_image = match as_value with 147 - | Some v -> String.lowercase_ascii (String.trim v) = "image" 148 | None -> false 149 in 150 if not as_is_image then ··· 164 (* Validate img usemap attribute - must be hash-name reference with content *) 165 if name_lower = "img" then begin 166 List.iter (fun (attr_name, attr_value) -> 167 - let attr_lower = String.lowercase_ascii attr_name in 168 if attr_lower = "usemap" then begin 169 if attr_value = "#" then 170 Message_collector.add_typed collector ··· 178 (* Validate embed type attribute - must be valid MIME type *) 179 if name_lower = "embed" then begin 180 List.iter (fun (attr_name, attr_value) -> 181 - let attr_lower = String.lowercase_ascii attr_name in 182 if attr_lower = "type" then begin 183 match Dt_mime.validate_mime_type attr_value with 184 | Ok () -> () ··· 197 name_lower = "iframe" || name_lower = "source" in 198 if is_dimension_element then begin 199 List.iter (fun (attr_name, attr_value) -> 200 - let attr_lower = String.lowercase_ascii attr_name in 201 if attr_lower = "width" || attr_lower = "height" then begin 202 (* Check for non-negative integer only *) 203 let is_valid = ··· 245 (* Validate area[shape=default] cannot have coords *) 246 if name_lower = "area" then begin 247 match Attr_utils.get_attr "shape" attrs with 248 - | Some s when String.lowercase_ascii (String.trim s) = "default" -> 249 if Attr_utils.has_attr "coords" attrs then 250 Message_collector.add_typed collector 251 (`Attr (`Not_allowed (`Attr "coords", `Elem "area"))) ··· 257 match Attr_utils.get_attr "dir" attrs with 258 | None -> 259 Message_collector.add_typed collector (`Misc `Bdo_missing_dir) 260 - | Some v when String.lowercase_ascii (String.trim v) = "auto" -> 261 Message_collector.add_typed collector (`Misc `Bdo_dir_auto) 262 | _ -> () 263 end; ··· 266 if name_lower = "input" then begin 267 if Attr_utils.has_attr "list" attrs then begin 268 let input_type = Attr_utils.get_attr_or "type" ~default:"text" attrs 269 - |> String.trim |> String.lowercase_ascii in 270 if not (List.mem input_type input_types_allowing_list) then 271 Message_collector.add_typed collector (`Input `List_not_allowed) 272 end ··· 274 275 (* Validate data-* attributes *) 276 List.iter (fun (attr_name, _) -> 277 - let attr_lower = String.lowercase_ascii attr_name in 278 (* Check if it starts with "data-" *) 279 if String.starts_with ~prefix:"data-" attr_lower then begin 280 let after_prefix = String.sub attr_lower 5 (String.length attr_lower - 5) in ··· 297 (match lang_value with 298 | None -> 299 Message_collector.add_typed collector (`I18n `Xml_lang_without_lang) 300 - | Some lang when String.lowercase_ascii lang <> String.lowercase_ascii xmllang -> 301 Message_collector.add_typed collector (`I18n `Xml_lang_without_lang) 302 | _ -> ()) 303 | None -> () ··· 305 306 (* Validate spellcheck attribute - must be "true" or "false" or empty *) 307 List.iter (fun (attr_name, attr_value) -> 308 - let attr_lower = String.lowercase_ascii attr_name in 309 if attr_lower = "spellcheck" then begin 310 - let value_lower = String.lowercase_ascii (String.trim attr_value) in 311 if value_lower <> "" && value_lower <> "true" && value_lower <> "false" then 312 Message_collector.add_typed collector 313 (`Attr (`Bad_value (`Elem name, `Attr attr_name, `Value attr_value, `Reason ""))) ··· 317 (* Validate enterkeyhint attribute - must be one of specific values *) 318 let valid_enterkeyhint = ["enter"; "done"; "go"; "next"; "previous"; "search"; "send"] in 319 List.iter (fun (attr_name, attr_value) -> 320 - let attr_lower = String.lowercase_ascii attr_name in 321 if attr_lower = "enterkeyhint" then begin 322 - let value_lower = String.lowercase_ascii (String.trim attr_value) in 323 if not (List.mem value_lower valid_enterkeyhint) then 324 Message_collector.add_typed collector 325 (`Attr (`Bad_value (`Elem name, `Attr attr_name, `Value attr_value, `Reason ""))) ··· 328 329 (* Validate headingoffset attribute - must be a number between 0 and 8 *) 330 List.iter (fun (attr_name, attr_value) -> 331 - let attr_lower = String.lowercase_ascii attr_name in 332 if attr_lower = "headingoffset" then begin 333 let trimmed = String.trim attr_value in 334 let is_valid = ··· 346 347 (* Validate accesskey attribute - each key label must be a single code point *) 348 List.iter (fun (attr_name, attr_value) -> 349 - let attr_lower = String.lowercase_ascii attr_name in 350 if attr_lower = "accesskey" then begin 351 (* Split by whitespace to get key labels *) 352 let keys = String.split_on_char ' ' attr_value |> ··· 418 let is_media_element = name_lower = "link" || name_lower = "style" || name_lower = "source" in 419 if is_media_element then begin 420 List.iter (fun (attr_name, attr_value) -> 421 - let attr_lower = String.lowercase_ascii attr_name in 422 if attr_lower = "media" then begin 423 let trimmed = String.trim attr_value in 424 if trimmed <> "" then begin ··· 436 437 (* Validate RDFa prefix attribute - space-separated list of prefix:iri pairs *) 438 List.iter (fun (attr_name, attr_value) -> 439 - let attr_lower = String.lowercase_ascii attr_name in 440 if attr_lower = "prefix" then begin 441 (* Parse prefix attribute value - format: "prefix1: iri1 prefix2: iri2 ..." *) 442 let trimmed = String.trim attr_value in
··· 58 match element.Element.tag with 59 | Tag.Html _ -> 60 let name = Tag.tag_to_string element.tag in 61 + let name_lower = Astring.String.Ascii.lowercase name in 62 let attrs = element.raw_attrs in 63 64 (* Detect XHTML mode from xmlns attribute on html element *) ··· 86 (* Check for xmlns:* prefixed attributes - not allowed in HTML *) 87 (* Standard xmlns declarations are allowed but custom prefixes are not *) 88 List.iter (fun (attr_name, _) -> 89 + let attr_lower = Astring.String.Ascii.lowercase attr_name in 90 if String.starts_with ~prefix:"xmlns:" attr_lower then begin 91 let prefix = String.sub attr_lower 6 (String.length attr_lower - 6) in 92 (* Only xmlns:xlink (with correct value) and xmlns:xml are allowed *) ··· 113 (* Validate style type attribute - must be "text/css" or omitted *) 114 if name_lower = "style" then begin 115 List.iter (fun (attr_name, attr_value) -> 116 + let attr_lower = Astring.String.Ascii.lowercase attr_name in 117 if attr_lower = "type" then begin 118 + let value_lower = Astring.String.Ascii.lowercase (String.trim attr_value) in 119 if value_lower <> "text/css" then 120 Message_collector.add_typed collector (`Misc `Style_type_invalid) 121 end ··· 144 (* imagesrcset requires as="image" *) 145 if has_imagesrcset then begin 146 let as_is_image = match as_value with 147 + | Some v -> Astring.String.Ascii.lowercase (String.trim v) = "image" 148 | None -> false 149 in 150 if not as_is_image then ··· 164 (* Validate img usemap attribute - must be hash-name reference with content *) 165 if name_lower = "img" then begin 166 List.iter (fun (attr_name, attr_value) -> 167 + let attr_lower = Astring.String.Ascii.lowercase attr_name in 168 if attr_lower = "usemap" then begin 169 if attr_value = "#" then 170 Message_collector.add_typed collector ··· 178 (* Validate embed type attribute - must be valid MIME type *) 179 if name_lower = "embed" then begin 180 List.iter (fun (attr_name, attr_value) -> 181 + let attr_lower = Astring.String.Ascii.lowercase attr_name in 182 if attr_lower = "type" then begin 183 match Dt_mime.validate_mime_type attr_value with 184 | Ok () -> () ··· 197 name_lower = "iframe" || name_lower = "source" in 198 if is_dimension_element then begin 199 List.iter (fun (attr_name, attr_value) -> 200 + let attr_lower = Astring.String.Ascii.lowercase attr_name in 201 if attr_lower = "width" || attr_lower = "height" then begin 202 (* Check for non-negative integer only *) 203 let is_valid = ··· 245 (* Validate area[shape=default] cannot have coords *) 246 if name_lower = "area" then begin 247 match Attr_utils.get_attr "shape" attrs with 248 + | Some s when Astring.String.Ascii.lowercase (String.trim s) = "default" -> 249 if Attr_utils.has_attr "coords" attrs then 250 Message_collector.add_typed collector 251 (`Attr (`Not_allowed (`Attr "coords", `Elem "area"))) ··· 257 match Attr_utils.get_attr "dir" attrs with 258 | None -> 259 Message_collector.add_typed collector (`Misc `Bdo_missing_dir) 260 + | Some v when Astring.String.Ascii.lowercase (String.trim v) = "auto" -> 261 Message_collector.add_typed collector (`Misc `Bdo_dir_auto) 262 | _ -> () 263 end; ··· 266 if name_lower = "input" then begin 267 if Attr_utils.has_attr "list" attrs then begin 268 let input_type = Attr_utils.get_attr_or "type" ~default:"text" attrs 269 + |> String.trim |> Astring.String.Ascii.lowercase in 270 if not (List.mem input_type input_types_allowing_list) then 271 Message_collector.add_typed collector (`Input `List_not_allowed) 272 end ··· 274 275 (* Validate data-* attributes *) 276 List.iter (fun (attr_name, _) -> 277 + let attr_lower = Astring.String.Ascii.lowercase attr_name in 278 (* Check if it starts with "data-" *) 279 if String.starts_with ~prefix:"data-" attr_lower then begin 280 let after_prefix = String.sub attr_lower 5 (String.length attr_lower - 5) in ··· 297 (match lang_value with 298 | None -> 299 Message_collector.add_typed collector (`I18n `Xml_lang_without_lang) 300 + | Some lang when Astring.String.Ascii.lowercase lang <> Astring.String.Ascii.lowercase xmllang -> 301 Message_collector.add_typed collector (`I18n `Xml_lang_without_lang) 302 | _ -> ()) 303 | None -> () ··· 305 306 (* Validate spellcheck attribute - must be "true" or "false" or empty *) 307 List.iter (fun (attr_name, attr_value) -> 308 + let attr_lower = Astring.String.Ascii.lowercase attr_name in 309 if attr_lower = "spellcheck" then begin 310 + let value_lower = Astring.String.Ascii.lowercase (String.trim attr_value) in 311 if value_lower <> "" && value_lower <> "true" && value_lower <> "false" then 312 Message_collector.add_typed collector 313 (`Attr (`Bad_value (`Elem name, `Attr attr_name, `Value attr_value, `Reason ""))) ··· 317 (* Validate enterkeyhint attribute - must be one of specific values *) 318 let valid_enterkeyhint = ["enter"; "done"; "go"; "next"; "previous"; "search"; "send"] in 319 List.iter (fun (attr_name, attr_value) -> 320 + let attr_lower = Astring.String.Ascii.lowercase attr_name in 321 if attr_lower = "enterkeyhint" then begin 322 + let value_lower = Astring.String.Ascii.lowercase (String.trim attr_value) in 323 if not (List.mem value_lower valid_enterkeyhint) then 324 Message_collector.add_typed collector 325 (`Attr (`Bad_value (`Elem name, `Attr attr_name, `Value attr_value, `Reason ""))) ··· 328 329 (* Validate headingoffset attribute - must be a number between 0 and 8 *) 330 List.iter (fun (attr_name, attr_value) -> 331 + let attr_lower = Astring.String.Ascii.lowercase attr_name in 332 if attr_lower = "headingoffset" then begin 333 let trimmed = String.trim attr_value in 334 let is_valid = ··· 346 347 (* Validate accesskey attribute - each key label must be a single code point *) 348 List.iter (fun (attr_name, attr_value) -> 349 + let attr_lower = Astring.String.Ascii.lowercase attr_name in 350 if attr_lower = "accesskey" then begin 351 (* Split by whitespace to get key labels *) 352 let keys = String.split_on_char ' ' attr_value |> ··· 418 let is_media_element = name_lower = "link" || name_lower = "style" || name_lower = "source" in 419 if is_media_element then begin 420 List.iter (fun (attr_name, attr_value) -> 421 + let attr_lower = Astring.String.Ascii.lowercase attr_name in 422 if attr_lower = "media" then begin 423 let trimmed = String.trim attr_value in 424 if trimmed <> "" then begin ··· 436 437 (* Validate RDFa prefix attribute - space-separated list of prefix:iri pairs *) 438 List.iter (fun (attr_name, attr_value) -> 439 + let attr_lower = Astring.String.Ascii.lowercase attr_name in 440 if attr_lower = "prefix" then begin 441 (* Parse prefix attribute value - format: "prefix1: iri1 prefix2: iri2 ..." *) 442 let trimmed = String.trim attr_value in
+1 -1
lib/check/specialized/datetime_checker.ml
··· 451 if List.mem name datetime_elements then begin 452 (* Check for datetime attribute *) 453 let datetime_attr = List.find_map (fun (k, v) -> 454 - if String.lowercase_ascii k = "datetime" then Some v else None 455 ) element.raw_attrs in 456 match datetime_attr with 457 | None -> ()
··· 451 if List.mem name datetime_elements then begin 452 (* Check for datetime attribute *) 453 let datetime_attr = List.find_map (fun (k, v) -> 454 + if Astring.String.Ascii.lowercase k = "datetime" then Some v else None 455 ) element.raw_attrs in 456 match datetime_attr with 457 | None -> ()
+1 -1
lib/check/specialized/dl_checker.ml
··· 106 (`Element (`Not_allowed_as_child (`Child "div", `Parent "dl"))); 107 (match Attr.get_role element.attrs with 108 | Some role_value -> 109 - let role_lower = String.lowercase_ascii (String.trim role_value) in 110 if role_lower <> "presentation" && role_lower <> "none" then 111 Message_collector.add_typed collector (`Li_role `Div_in_dl_bad_role) 112 | None -> ());
··· 106 (`Element (`Not_allowed_as_child (`Child "div", `Parent "dl"))); 107 (match Attr.get_role element.attrs with 108 | Some role_value -> 109 + let role_lower = Astring.String.Ascii.lowercase (String.trim role_value) in 110 if role_lower <> "presentation" && role_lower <> "none" then 111 Message_collector.add_typed collector (`Li_role `Div_in_dl_bad_role) 112 | None -> ());
+2 -2
lib/check/specialized/importmap_checker.ml
··· 270 | Tag.Html `Script -> 271 (* Check if type="importmap" *) 272 let type_attr = List.find_opt (fun (n, _) -> 273 - String.lowercase_ascii n = "type" 274 ) element.raw_attrs in 275 (match type_attr with 276 - | Some (_, v) when String.lowercase_ascii v = "importmap" -> 277 state.in_importmap <- true; 278 Buffer.clear state.content 279 | _ -> ())
··· 270 | Tag.Html `Script -> 271 (* Check if type="importmap" *) 272 let type_attr = List.find_opt (fun (n, _) -> 273 + Astring.String.Ascii.lowercase n = "type" 274 ) element.raw_attrs in 275 (match type_attr with 276 + | Some (_, v) when Astring.String.Ascii.lowercase v = "importmap" -> 277 state.in_importmap <- true; 278 Buffer.clear state.content 279 | _ -> ())
+1 -1
lib/check/specialized/label_checker.ml
··· 65 | _ -> ()) 66 67 | Tag.Html tag -> 68 - let name_lower = String.lowercase_ascii (Tag.tag_to_string (Tag.Html tag)) in 69 70 (* Track labelable element IDs *) 71 (if is_labelable name_lower then
··· 65 | _ -> ()) 66 67 | Tag.Html tag -> 68 + let name_lower = Astring.String.Ascii.lowercase (Tag.tag_to_string (Tag.Html tag)) in 69 70 (* Track labelable element IDs *) 71 (if is_labelable name_lower then
+1 -1
lib/check/specialized/language_checker.ml
··· 27 28 (** Check if a language tag contains deprecated subtags. *) 29 let check_deprecated_tag value = 30 - let lower = String.lowercase_ascii value in 31 let subtags = String.split_on_char '-' lower in 32 match subtags with 33 | [] -> None
··· 27 28 (** Check if a language tag contains deprecated subtags. *) 29 let check_deprecated_tag value = 30 + let lower = Astring.String.Ascii.lowercase value in 31 let subtags = String.split_on_char '-' lower in 32 match subtags with 33 | [] -> None
+3 -6
lib/check/specialized/mime_type_checker.ml
··· 153 let create () = () 154 let reset _state = () 155 156 - let get_attr_value name attrs = 157 - List.find_map (fun (k, v) -> 158 - if String.lowercase_ascii k = String.lowercase_ascii name then Some v else None 159 - ) attrs 160 161 let start_element _state ~element collector = 162 match element.Element.tag with 163 | Tag.Html tag -> 164 let name = Tag.html_tag_to_string tag in 165 - let name_lower = String.lowercase_ascii name in 166 (match List.assoc_opt name_lower mime_type_attrs with 167 | None -> () 168 | Some type_attrs -> ··· 174 if value = "" then () 175 else if name_lower = "script" then 176 (* script type can be module, importmap, etc. - skip validation for non-MIME types *) 177 - let value_lower = String.lowercase_ascii value in 178 if value_lower = "module" || value_lower = "importmap" || 179 not (String.contains value '/') then () 180 else
··· 153 let create () = () 154 let reset _state = () 155 156 + let get_attr_value = Attr_utils.get_attr 157 158 let start_element _state ~element collector = 159 match element.Element.tag with 160 | Tag.Html tag -> 161 let name = Tag.html_tag_to_string tag in 162 + let name_lower = Astring.String.Ascii.lowercase name in 163 (match List.assoc_opt name_lower mime_type_attrs with 164 | None -> () 165 | Some type_attrs -> ··· 171 if value = "" then () 172 else if name_lower = "script" then 173 (* script type can be module, importmap, etc. - skip validation for non-MIME types *) 174 + let value_lower = Astring.String.Ascii.lowercase value in 175 if value_lower = "module" || value_lower = "importmap" || 176 not (String.contains value '/') then () 177 else
+2 -2
lib/check/specialized/picture_checker.ml
··· 133 let media_value = Attr_utils.get_attr "media" attrs in 134 let has_type = Attr_utils.has_attr "type" attrs in 135 let is_media_all = match media_value with 136 - | Some v -> String.lowercase_ascii (String.trim v) = "all" 137 | None -> false in 138 let is_media_empty = match media_value with 139 | Some v -> String.trim v = "" ··· 142 | None -> not has_type 143 | Some v -> 144 let trimmed = String.trim v in 145 - trimmed = "" || String.lowercase_ascii trimmed = "all" 146 in 147 if is_always_matching then begin 148 state.has_always_matching_source <- true;
··· 133 let media_value = Attr_utils.get_attr "media" attrs in 134 let has_type = Attr_utils.has_attr "type" attrs in 135 let is_media_all = match media_value with 136 + | Some v -> Astring.String.Ascii.lowercase (String.trim v) = "all" 137 | None -> false in 138 let is_media_empty = match media_value with 139 | Some v -> String.trim v = "" ··· 142 | None -> not has_type 143 | Some v -> 144 let trimmed = String.trim v in 145 + trimmed = "" || Astring.String.Ascii.lowercase trimmed = "all" 146 in 147 if is_always_matching then begin 148 state.has_always_matching_source <- true;
+12 -12
lib/check/specialized/srcset_sizes_checker.ml
··· 153 154 (** Check if scientific notation has invalid exponent (like 1e+1.5 - decimal in exponent) *) 155 let has_invalid_scientific_notation s = 156 - let lower = String.lowercase_ascii s in 157 (* Find 'e' for scientific notation *) 158 match String.index_opt lower 'e' with 159 | None -> false ··· 176 (* Check for % at the end *) 177 else if trimmed.[len - 1] = '%' then "%" 178 else begin 179 - let lower = String.lowercase_ascii trimmed in 180 (* Try to find a unit at the end (letters only) *) 181 let rec find_unit_length i = 182 if i < 0 then 0 ··· 205 if has_invalid_scientific_notation value_no_comments then BadScientificNotation 206 (* "auto" is only valid with lazy loading, which requires checking the element context. 207 For general validation, treat "auto" alone as invalid in sizes. *) 208 - else if String.lowercase_ascii value_no_comments = "auto" then 209 BadCssNumber (value_no_comments.[0], trimmed) 210 else if value_no_comments = "" then InvalidUnit ("", trimmed) 211 else begin 212 - let lower = String.lowercase_ascii value_no_comments in 213 (* Check for calc() or other CSS functions first - these are always valid *) 214 if String.contains value_no_comments '(' then Valid 215 else begin ··· 310 Some "Bad media condition: Parse Error" 311 end else begin 312 (* Check for bare "all" which is invalid *) 313 - let lower = String.lowercase_ascii trimmed in 314 let parts = String.split_on_char ' ' lower |> List.filter (fun s -> s <> "") in 315 match parts with 316 | keyword :: _ when keyword = "all" -> ··· 358 end 359 else begin 360 (* Check if remaining starts with "and", "or", "not" followed by space or paren *) 361 - let lower_remaining = String.lowercase_ascii remaining in 362 if remaining_len >= 4 && String.sub lower_remaining 0 4 = "and " then 363 skip_media_condition (i + (len - i) - remaining_len + 4) 364 else if remaining_len >= 3 && String.sub lower_remaining 0 3 = "or " then ··· 577 578 (** Validate srcset descriptor *) 579 let validate_srcset_descriptor desc element_name srcset_value has_sizes collector = 580 - let desc_lower = String.lowercase_ascii (String.trim desc) in 581 if String.length desc_lower = 0 then true 582 else begin 583 let last_char = desc_lower.[String.length desc_lower - 1] in ··· 723 724 (** Normalize descriptor for duplicate detection (e.g., 1x = 1.0x) *) 725 let normalize_descriptor desc = 726 - let desc_lower = String.lowercase_ascii (String.trim desc) in 727 if String.length desc_lower = 0 then desc_lower 728 else 729 let last_char = desc_lower.[String.length desc_lower - 1] in ··· 793 (* Special schemes that require host/content after :// *) 794 let special_schemes = ["http"; "https"; "ftp"; "ws"; "wss"] in 795 (* Check for scheme-only URL like "http:" *) 796 - let url_lower = String.lowercase_ascii url in 797 List.iter (fun scheme -> 798 let scheme_colon = scheme ^ ":" in 799 if url_lower = scheme_colon then ··· 824 (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: Expected single descriptor but found extraneous descriptor %s at %s." (q value) (q "srcset") (q element_name) (q extra_desc) (q value))))) 825 end; 826 827 - let desc_lower = String.lowercase_ascii (String.trim desc) in 828 if String.length desc_lower > 0 then begin 829 let last_char = desc_lower.[String.length desc_lower - 1] in 830 if last_char = 'w' then has_w_descriptor := true ··· 872 begin match Hashtbl.find_opt seen_descriptors normalized with 873 | Some first_url -> 874 Message_collector.add_typed collector 875 - (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: %s for image %s is identical to %s for image %s." (q value) (q "srcset") (q element_name) dup_type (q url) (String.lowercase_ascii dup_type) (q first_url))))) 876 | None -> 877 begin match (if is_1x then Hashtbl.find_opt seen_descriptors "implicit-1x" else None) with 878 | Some first_url -> 879 (* Explicit 1x conflicts with implicit 1x *) 880 Message_collector.add_typed collector 881 - (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: %s for image %s is identical to %s for image %s." (q value) (q "srcset") (q element_name) dup_type (q url) (String.lowercase_ascii dup_type) (q first_url))))) 882 | None -> 883 Hashtbl.add seen_descriptors normalized url; 884 if is_1x then Hashtbl.add seen_descriptors "explicit-1x" url
··· 153 154 (** Check if scientific notation has invalid exponent (like 1e+1.5 - decimal in exponent) *) 155 let has_invalid_scientific_notation s = 156 + let lower = Astring.String.Ascii.lowercase s in 157 (* Find 'e' for scientific notation *) 158 match String.index_opt lower 'e' with 159 | None -> false ··· 176 (* Check for % at the end *) 177 else if trimmed.[len - 1] = '%' then "%" 178 else begin 179 + let lower = Astring.String.Ascii.lowercase trimmed in 180 (* Try to find a unit at the end (letters only) *) 181 let rec find_unit_length i = 182 if i < 0 then 0 ··· 205 if has_invalid_scientific_notation value_no_comments then BadScientificNotation 206 (* "auto" is only valid with lazy loading, which requires checking the element context. 207 For general validation, treat "auto" alone as invalid in sizes. *) 208 + else if Astring.String.Ascii.lowercase value_no_comments = "auto" then 209 BadCssNumber (value_no_comments.[0], trimmed) 210 else if value_no_comments = "" then InvalidUnit ("", trimmed) 211 else begin 212 + let lower = Astring.String.Ascii.lowercase value_no_comments in 213 (* Check for calc() or other CSS functions first - these are always valid *) 214 if String.contains value_no_comments '(' then Valid 215 else begin ··· 310 Some "Bad media condition: Parse Error" 311 end else begin 312 (* Check for bare "all" which is invalid *) 313 + let lower = Astring.String.Ascii.lowercase trimmed in 314 let parts = String.split_on_char ' ' lower |> List.filter (fun s -> s <> "") in 315 match parts with 316 | keyword :: _ when keyword = "all" -> ··· 358 end 359 else begin 360 (* Check if remaining starts with "and", "or", "not" followed by space or paren *) 361 + let lower_remaining = Astring.String.Ascii.lowercase remaining in 362 if remaining_len >= 4 && String.sub lower_remaining 0 4 = "and " then 363 skip_media_condition (i + (len - i) - remaining_len + 4) 364 else if remaining_len >= 3 && String.sub lower_remaining 0 3 = "or " then ··· 577 578 (** Validate srcset descriptor *) 579 let validate_srcset_descriptor desc element_name srcset_value has_sizes collector = 580 + let desc_lower = Astring.String.Ascii.lowercase (String.trim desc) in 581 if String.length desc_lower = 0 then true 582 else begin 583 let last_char = desc_lower.[String.length desc_lower - 1] in ··· 723 724 (** Normalize descriptor for duplicate detection (e.g., 1x = 1.0x) *) 725 let normalize_descriptor desc = 726 + let desc_lower = Astring.String.Ascii.lowercase (String.trim desc) in 727 if String.length desc_lower = 0 then desc_lower 728 else 729 let last_char = desc_lower.[String.length desc_lower - 1] in ··· 793 (* Special schemes that require host/content after :// *) 794 let special_schemes = ["http"; "https"; "ftp"; "ws"; "wss"] in 795 (* Check for scheme-only URL like "http:" *) 796 + let url_lower = Astring.String.Ascii.lowercase url in 797 List.iter (fun scheme -> 798 let scheme_colon = scheme ^ ":" in 799 if url_lower = scheme_colon then ··· 824 (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: Expected single descriptor but found extraneous descriptor %s at %s." (q value) (q "srcset") (q element_name) (q extra_desc) (q value))))) 825 end; 826 827 + let desc_lower = Astring.String.Ascii.lowercase (String.trim desc) in 828 if String.length desc_lower > 0 then begin 829 let last_char = desc_lower.[String.length desc_lower - 1] in 830 if last_char = 'w' then has_w_descriptor := true ··· 872 begin match Hashtbl.find_opt seen_descriptors normalized with 873 | Some first_url -> 874 Message_collector.add_typed collector 875 + (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: %s for image %s is identical to %s for image %s." (q value) (q "srcset") (q element_name) dup_type (q url) (Astring.String.Ascii.lowercase dup_type) (q first_url))))) 876 | None -> 877 begin match (if is_1x then Hashtbl.find_opt seen_descriptors "implicit-1x" else None) with 878 | Some first_url -> 879 (* Explicit 1x conflicts with implicit 1x *) 880 Message_collector.add_typed collector 881 + (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: %s for image %s is identical to %s for image %s." (q value) (q "srcset") (q element_name) dup_type (q url) (Astring.String.Ascii.lowercase dup_type) (q first_url))))) 882 | None -> 883 Hashtbl.add seen_descriptors normalized url; 884 if is_1x then Hashtbl.add seen_descriptors "explicit-1x" url
+10 -10
lib/check/specialized/svg_checker.ml
··· 260 261 (* Check if attribute name matches a pattern like "data-*" or "aria-*" - case insensitive *) 262 let matches_pattern attr pattern = 263 - let attr_lower = String.lowercase_ascii attr in 264 - let pattern_lower = String.lowercase_ascii pattern in 265 if String.ends_with ~suffix:"-*" pattern_lower then 266 let prefix = String.sub pattern_lower 0 (String.length pattern_lower - 1) in 267 String.starts_with ~prefix attr_lower ··· 361 state.in_svg <- true; 362 363 if is_svg_element || state.in_svg then begin 364 - let name_lower = String.lowercase_ascii name in 365 366 (* Check SVG content model rules *) 367 (* 1. Check if child is allowed in SVG <a> *) 368 (match state.element_stack with 369 - | parent :: _ when String.lowercase_ascii parent = "a" -> 370 if List.mem name_lower a_disallowed_children then 371 Message_collector.add_typed collector 372 (`Element (`Not_allowed_as_child (`Child name, `Parent "a"))) ··· 382 (* 2.5 Check stop element is only in linearGradient or radialGradient *) 383 if name_lower = "stop" then begin 384 match state.element_stack with 385 - | parent :: _ when (let p = String.lowercase_ascii parent in 386 p = "lineargradient" || p = "radialgradient") -> () 387 | parent :: _ -> 388 Message_collector.add_typed collector ··· 393 (* 2.6 Check use element is not nested inside another use element *) 394 if name_lower = "use" then begin 395 match state.element_stack with 396 - | parent :: _ when String.lowercase_ascii parent = "use" -> 397 Message_collector.add_typed collector 398 (`Element (`Not_allowed_as_child (`Child name, `Parent parent))) 399 | _ -> () ··· 401 402 (* 3. Check duplicate feFunc* in feComponentTransfer *) 403 (match state.element_stack with 404 - | parent :: _ when String.lowercase_ascii parent = "fecomponenttransfer" -> 405 if List.mem name_lower ["fefuncr"; "fefuncg"; "fefuncb"; "fefunca"] then begin 406 match state.fecomponenttransfer_stack with 407 | fect :: _ -> ··· 435 436 (* Check each attribute *) 437 List.iter (fun (attr, value) -> 438 - let attr_lower = String.lowercase_ascii attr in 439 440 (* Validate xmlns attributes *) 441 if String.starts_with ~prefix:"xmlns" attr_lower then ··· 457 (match List.assoc_opt name_lower required_attrs with 458 | Some req_attrs -> 459 List.iter (fun req_attr -> 460 - if not (List.exists (fun (a, _) -> String.lowercase_ascii a = req_attr) attrs) then 461 Message_collector.add_typed collector 462 (`Svg (`Missing_attr (`Elem name_lower, `Attr req_attr))) 463 ) req_attrs ··· 469 let name = Tag.tag_to_string tag in 470 471 if is_svg_element || state.in_svg then begin 472 - let name_lower = String.lowercase_ascii name in 473 474 (* Check required children when closing font element *) 475 if name_lower = "font" then begin
··· 260 261 (* Check if attribute name matches a pattern like "data-*" or "aria-*" - case insensitive *) 262 let matches_pattern attr pattern = 263 + let attr_lower = Astring.String.Ascii.lowercase attr in 264 + let pattern_lower = Astring.String.Ascii.lowercase pattern in 265 if String.ends_with ~suffix:"-*" pattern_lower then 266 let prefix = String.sub pattern_lower 0 (String.length pattern_lower - 1) in 267 String.starts_with ~prefix attr_lower ··· 361 state.in_svg <- true; 362 363 if is_svg_element || state.in_svg then begin 364 + let name_lower = Astring.String.Ascii.lowercase name in 365 366 (* Check SVG content model rules *) 367 (* 1. Check if child is allowed in SVG <a> *) 368 (match state.element_stack with 369 + | parent :: _ when Astring.String.Ascii.lowercase parent = "a" -> 370 if List.mem name_lower a_disallowed_children then 371 Message_collector.add_typed collector 372 (`Element (`Not_allowed_as_child (`Child name, `Parent "a"))) ··· 382 (* 2.5 Check stop element is only in linearGradient or radialGradient *) 383 if name_lower = "stop" then begin 384 match state.element_stack with 385 + | parent :: _ when (let p = Astring.String.Ascii.lowercase parent in 386 p = "lineargradient" || p = "radialgradient") -> () 387 | parent :: _ -> 388 Message_collector.add_typed collector ··· 393 (* 2.6 Check use element is not nested inside another use element *) 394 if name_lower = "use" then begin 395 match state.element_stack with 396 + | parent :: _ when Astring.String.Ascii.lowercase parent = "use" -> 397 Message_collector.add_typed collector 398 (`Element (`Not_allowed_as_child (`Child name, `Parent parent))) 399 | _ -> () ··· 401 402 (* 3. Check duplicate feFunc* in feComponentTransfer *) 403 (match state.element_stack with 404 + | parent :: _ when Astring.String.Ascii.lowercase parent = "fecomponenttransfer" -> 405 if List.mem name_lower ["fefuncr"; "fefuncg"; "fefuncb"; "fefunca"] then begin 406 match state.fecomponenttransfer_stack with 407 | fect :: _ -> ··· 435 436 (* Check each attribute *) 437 List.iter (fun (attr, value) -> 438 + let attr_lower = Astring.String.Ascii.lowercase attr in 439 440 (* Validate xmlns attributes *) 441 if String.starts_with ~prefix:"xmlns" attr_lower then ··· 457 (match List.assoc_opt name_lower required_attrs with 458 | Some req_attrs -> 459 List.iter (fun req_attr -> 460 + if not (Attr_utils.has_attr req_attr attrs) then 461 Message_collector.add_typed collector 462 (`Svg (`Missing_attr (`Elem name_lower, `Attr req_attr))) 463 ) req_attrs ··· 469 let name = Tag.tag_to_string tag in 470 471 if is_svg_element || state.in_svg then begin 472 + let name_lower = Astring.String.Ascii.lowercase name in 473 474 (* Check required children when closing font element *) 475 if name_lower = "font" then begin
+5 -5
lib/check/specialized/table_checker.ml
··· 354 355 (** Parse a non-negative integer attribute, returning 1 if absent or invalid *) 356 let parse_non_negative_int attrs name = 357 - match List.assoc_opt name attrs with 358 | None -> 1 359 | Some v -> ( 360 try ··· 364 365 (** Parse a positive integer attribute, returning 1 if absent or invalid *) 366 let parse_positive_int attrs name = 367 - match List.assoc_opt name attrs with 368 | None -> 1 369 | Some v -> ( 370 try ··· 374 375 (** Parse the headers attribute into a list of IDs *) 376 let parse_headers attrs = 377 - match List.assoc_opt "headers" attrs with 378 | None -> [] 379 | Some v -> 380 let parts = String.split_on_char ' ' v in ··· 523 table.state <- InCellInRowGroup; 524 (* Record header ID if present *) 525 if is_header then ( 526 - match List.assoc_opt "id" attrs with 527 | Some id when String.length id > 0 -> Hashtbl.replace table.header_ids id () 528 | _ -> ()); 529 (* Parse cell attributes *) ··· 541 table.state <- InCellInImplicitRowGroup; 542 (* Same logic as above *) 543 if is_header then ( 544 - match List.assoc_opt "id" attrs with 545 | Some id when String.length id > 0 -> Hashtbl.replace table.header_ids id () 546 | _ -> ()); 547 let colspan = abs (parse_positive_int attrs "colspan") in
··· 354 355 (** Parse a non-negative integer attribute, returning 1 if absent or invalid *) 356 let parse_non_negative_int attrs name = 357 + match Attr_utils.get_attr name attrs with 358 | None -> 1 359 | Some v -> ( 360 try ··· 364 365 (** Parse a positive integer attribute, returning 1 if absent or invalid *) 366 let parse_positive_int attrs name = 367 + match Attr_utils.get_attr name attrs with 368 | None -> 1 369 | Some v -> ( 370 try ··· 374 375 (** Parse the headers attribute into a list of IDs *) 376 let parse_headers attrs = 377 + match Attr_utils.get_attr "headers" attrs with 378 | None -> [] 379 | Some v -> 380 let parts = String.split_on_char ' ' v in ··· 523 table.state <- InCellInRowGroup; 524 (* Record header ID if present *) 525 if is_header then ( 526 + match Attr_utils.get_attr "id" attrs with 527 | Some id when String.length id > 0 -> Hashtbl.replace table.header_ids id () 528 | _ -> ()); 529 (* Parse cell attributes *) ··· 541 table.state <- InCellInImplicitRowGroup; 542 (* Same logic as above *) 543 if is_header then ( 544 + match Attr_utils.get_attr "id" attrs with 545 | Some id when String.length id > 0 -> Hashtbl.replace table.header_ids id () 546 | _ -> ()); 547 let colspan = abs (parse_positive_int attrs "colspan") in
+1 -1
lib/check/specialized/unknown_element_checker.ml
··· 31 state.stack <- name :: state.stack 32 33 | Tag.Html tag -> 34 - let name_lower = String.lowercase_ascii (Tag.tag_to_string (Tag.Html tag)) in 35 state.stack <- name_lower :: state.stack 36 37 | _ -> () (* SVG, MathML, Custom elements are allowed *)
··· 31 state.stack <- name :: state.stack 32 33 | Tag.Html tag -> 34 + let name_lower = Astring.String.Ascii.lowercase (Tag.tag_to_string (Tag.Html tag)) in 35 state.stack <- name_lower :: state.stack 36 37 | _ -> () (* SVG, MathML, Custom elements are allowed *)
+24 -25
lib/check/specialized/url_checker.ml
··· 67 68 (** Check if pipe is allowed in this host context. *) 69 let is_pipe_allowed_in_host url host = 70 - let scheme = try String.lowercase_ascii (String.sub url 0 (String.index url ':')) with _ -> "" in 71 scheme = "file" && is_valid_windows_drive host 72 73 (** Special schemes that require double slash (//). ··· 95 (c >= '0' && c <= '9') || c = '+' || c = '-' || c = '.' 96 ) potential_scheme in 97 if is_valid_scheme then 98 - Some (String.lowercase_ascii potential_scheme) 99 else 100 None 101 with Not_found -> None ··· 104 let extract_host_and_port url = 105 try 106 let double_slash = 107 - try Some (Str.search_forward (Str.regexp "://") url 0 + 3) 108 - with Not_found -> None 109 in 110 match double_slash with 111 | None -> (None, None) ··· 250 (* Check for ASCII percent *) 251 String.contains s '%' || 252 (* Check for fullwidth percent (U+FF05 = 0xEF 0xBC 0x85 in UTF-8) *) 253 - try 254 - let _ = Str.search_forward (Str.regexp "\xef\xbc\x85") s 0 in 255 - true 256 - with Not_found -> false 257 258 (** Check if decoded host contains forbidden characters. 259 Some URLs have percent-encoded fullwidth characters that decode to forbidden chars. *) ··· 424 let check_path_segment url attr_name element_name = 425 (* Extract path: everything after authority (or after scheme: for non-authority URLs) *) 426 let raw_path = 427 - try 428 - let double_slash = Str.search_forward (Str.regexp "://") url 0 in 429 let after_auth_start = double_slash + 3 in 430 let rest = String.sub url after_auth_start (String.length url - after_auth_start) in 431 (* Find end of authority *) ··· 437 String.sub rest path_start (String.length rest - path_start) 438 else 439 "" 440 - with Not_found -> 441 (* No double slash - check for single slash path *) 442 - match extract_scheme url with 443 | Some _ -> 444 - let colon_pos = String.index url ':' in 445 - let after_colon = String.sub url (colon_pos + 1) (String.length url - colon_pos - 1) in 446 - after_colon 447 | None -> 448 (* Relative URL - the whole thing is the path *) 449 - url 450 in 451 (* Remove query and fragment for path-specific checks *) 452 let path = remove_query_fragment raw_path in ··· 546 547 (** Check for illegal characters in userinfo (user:password). *) 548 let check_userinfo url attr_name element_name = 549 try 550 (* Look for :// then find the LAST @ before the next / or end *) 551 - let double_slash = Str.search_forward (Str.regexp "://") url 0 + 3 in 552 let rest = String.sub url double_slash (String.length url - double_slash) in 553 (* Find first / or ? or # to limit authority section *) 554 let auth_end = ··· 633 let url = String.trim url in 634 (* Empty URL check for certain attributes *) 635 if url = "" then begin 636 - let name_lower = String.lowercase_ascii element_name in 637 - let attr_lower = String.lowercase_ascii attr_name in 638 if List.mem attr_lower must_be_non_empty || 639 List.mem (name_lower, attr_lower) must_be_non_empty_combinations then 640 Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Must be non-empty." ··· 739 let reset _state = () 740 741 (** Get attribute value by name. *) 742 - let get_attr_value name attrs = 743 - List.find_map (fun (k, v) -> 744 - if String.lowercase_ascii k = String.lowercase_ascii name then Some v else None 745 - ) attrs 746 747 let start_element _state ~element collector = 748 match element.Element.tag with 749 | Tag.Html _ -> 750 let name = Tag.tag_to_string element.tag in 751 - let name_lower = String.lowercase_ascii name in 752 let attrs = element.raw_attrs in 753 (* Check URL attributes for elements that have them *) 754 (match List.assoc_opt name_lower url_attributes with ··· 794 match validate_url url name "value" with 795 | None -> () 796 | Some error_msg -> 797 - let error_msg = Str.global_replace (Str.regexp "Bad URL:") "Bad absolute URL:" error_msg in 798 Message_collector.add_typed collector (`Attr (`Bad_value_generic (`Message error_msg))) 799 end 800 end
··· 67 68 (** Check if pipe is allowed in this host context. *) 69 let is_pipe_allowed_in_host url host = 70 + let scheme = try Astring.String.Ascii.lowercase (String.sub url 0 (String.index url ':')) with _ -> "" in 71 scheme = "file" && is_valid_windows_drive host 72 73 (** Special schemes that require double slash (//). ··· 95 (c >= '0' && c <= '9') || c = '+' || c = '-' || c = '.' 96 ) potential_scheme in 97 if is_valid_scheme then 98 + Some (Astring.String.Ascii.lowercase potential_scheme) 99 else 100 None 101 with Not_found -> None ··· 104 let extract_host_and_port url = 105 try 106 let double_slash = 107 + match Astring.String.find_sub ~sub:"://" url with 108 + | Some pos -> Some (pos + 3) 109 + | None -> None 110 in 111 match double_slash with 112 | None -> (None, None) ··· 251 (* Check for ASCII percent *) 252 String.contains s '%' || 253 (* Check for fullwidth percent (U+FF05 = 0xEF 0xBC 0x85 in UTF-8) *) 254 + Astring.String.is_infix ~affix:"\xef\xbc\x85" s 255 256 (** Check if decoded host contains forbidden characters. 257 Some URLs have percent-encoded fullwidth characters that decode to forbidden chars. *) ··· 422 let check_path_segment url attr_name element_name = 423 (* Extract path: everything after authority (or after scheme: for non-authority URLs) *) 424 let raw_path = 425 + match Astring.String.find_sub ~sub:"://" url with 426 + | Some double_slash -> 427 let after_auth_start = double_slash + 3 in 428 let rest = String.sub url after_auth_start (String.length url - after_auth_start) in 429 (* Find end of authority *) ··· 435 String.sub rest path_start (String.length rest - path_start) 436 else 437 "" 438 + | None -> 439 (* No double slash - check for single slash path *) 440 + (match extract_scheme url with 441 | Some _ -> 442 + (try 443 + let colon_pos = String.index url ':' in 444 + String.sub url (colon_pos + 1) (String.length url - colon_pos - 1) 445 + with Not_found -> url) 446 | None -> 447 (* Relative URL - the whole thing is the path *) 448 + url) 449 in 450 (* Remove query and fragment for path-specific checks *) 451 let path = remove_query_fragment raw_path in ··· 545 546 (** Check for illegal characters in userinfo (user:password). *) 547 let check_userinfo url attr_name element_name = 548 + match Astring.String.find_sub ~sub:"://" url with 549 + | None -> None 550 + | Some pos -> 551 try 552 (* Look for :// then find the LAST @ before the next / or end *) 553 + let double_slash = pos + 3 in 554 let rest = String.sub url double_slash (String.length url - double_slash) in 555 (* Find first / or ? or # to limit authority section *) 556 let auth_end = ··· 635 let url = String.trim url in 636 (* Empty URL check for certain attributes *) 637 if url = "" then begin 638 + let name_lower = Astring.String.Ascii.lowercase element_name in 639 + let attr_lower = Astring.String.Ascii.lowercase attr_name in 640 if List.mem attr_lower must_be_non_empty || 641 List.mem (name_lower, attr_lower) must_be_non_empty_combinations then 642 Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Must be non-empty." ··· 741 let reset _state = () 742 743 (** Get attribute value by name. *) 744 + let get_attr_value = Attr_utils.get_attr 745 746 let start_element _state ~element collector = 747 match element.Element.tag with 748 | Tag.Html _ -> 749 let name = Tag.tag_to_string element.tag in 750 + let name_lower = Astring.String.Ascii.lowercase name in 751 let attrs = element.raw_attrs in 752 (* Check URL attributes for elements that have them *) 753 (match List.assoc_opt name_lower url_attributes with ··· 793 match validate_url url name "value" with 794 | None -> () 795 | Some error_msg -> 796 + let error_msg = Astring.String.concat ~sep:"Bad absolute URL:" (Astring.String.cuts ~sep:"Bad URL:" error_msg) in 797 Message_collector.add_typed collector (`Attr (`Bad_value_generic (`Message error_msg))) 798 end 799 end
+5 -5
lib/check/specialized/xhtml_content_checker.ml
··· 54 55 let start_element state ~element collector = 56 let name = Tag.tag_to_string element.Element.tag in 57 - let name_lower = String.lowercase_ascii name in 58 let attrs = element.raw_attrs in 59 60 (* Check data-* attributes for uppercase *) ··· 63 (* Check if this element is allowed as child of parent *) 64 (match state.element_stack with 65 | parent :: _ -> 66 - let parent_lower = String.lowercase_ascii parent in 67 if not (is_child_allowed ~parent:parent_lower ~child:name_lower) then 68 Message_collector.add_typed collector 69 (`Element (`Not_allowed_as_child (`Child name_lower, `Parent parent_lower))) ··· 71 72 (* Handle figure content model *) 73 (match state.element_stack with 74 - | parent :: _ when String.lowercase_ascii parent = "figure" -> 75 (* We're inside a figure, check content model *) 76 (match state.figure_stack with 77 | fig :: _ -> ··· 99 state.element_stack <- name :: state.element_stack 100 101 let end_element state ~tag _collector = 102 - let name_lower = String.lowercase_ascii (Tag.tag_to_string tag) in 103 (* Pop figure state if leaving a figure *) 104 if name_lower = "figure" then begin 105 match state.figure_stack with ··· 115 match state.element_stack with 116 | [] -> () 117 | parent :: _ -> 118 - let parent_lower = String.lowercase_ascii parent in 119 let trimmed = String.trim text in 120 if trimmed <> "" then begin 121 if parent_lower = "figure" then begin
··· 54 55 let start_element state ~element collector = 56 let name = Tag.tag_to_string element.Element.tag in 57 + let name_lower = Astring.String.Ascii.lowercase name in 58 let attrs = element.raw_attrs in 59 60 (* Check data-* attributes for uppercase *) ··· 63 (* Check if this element is allowed as child of parent *) 64 (match state.element_stack with 65 | parent :: _ -> 66 + let parent_lower = Astring.String.Ascii.lowercase parent in 67 if not (is_child_allowed ~parent:parent_lower ~child:name_lower) then 68 Message_collector.add_typed collector 69 (`Element (`Not_allowed_as_child (`Child name_lower, `Parent parent_lower))) ··· 71 72 (* Handle figure content model *) 73 (match state.element_stack with 74 + | parent :: _ when Astring.String.Ascii.lowercase parent = "figure" -> 75 (* We're inside a figure, check content model *) 76 (match state.figure_stack with 77 | fig :: _ -> ··· 99 state.element_stack <- name :: state.element_stack 100 101 let end_element state ~tag _collector = 102 + let name_lower = Astring.String.Ascii.lowercase (Tag.tag_to_string tag) in 103 (* Pop figure state if leaving a figure *) 104 if name_lower = "figure" then begin 105 match state.figure_stack with ··· 115 match state.element_stack with 116 | [] -> () 117 | parent :: _ -> 118 + let parent_lower = Astring.String.Ascii.lowercase parent in 119 let trimmed = String.trim text in 120 if trimmed <> "" then begin 121 if parent_lower = "figure" then begin