+96
lib/check/ancestor_tracker.ml
+96
lib/check/ancestor_tracker.ml
···
···
1
+
(*---------------------------------------------------------------------------
2
+
Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
3
+
SPDX-License-Identifier: MIT
4
+
---------------------------------------------------------------------------*)
5
+
6
+
(** Generic ancestor tracking for DOM traversal. *)
7
+
8
+
type 'data context = {
9
+
name : string;
10
+
data : 'data;
11
+
}
12
+
13
+
type 'data t = {
14
+
mutable stack : 'data context list;
15
+
}
16
+
17
+
let create () = { stack = [] }
18
+
19
+
let reset tracker = tracker.stack <- []
20
+
21
+
let push tracker name data =
22
+
let name_lower = Astring.String.Ascii.lowercase name in
23
+
let context = { name = name_lower; data } in
24
+
tracker.stack <- context :: tracker.stack
25
+
26
+
let pop tracker =
27
+
match tracker.stack with
28
+
| [] -> () (* Gracefully handle underflow *)
29
+
| _ :: rest -> tracker.stack <- rest
30
+
31
+
let peek tracker =
32
+
match tracker.stack with
33
+
| [] -> None
34
+
| hd :: _ -> Some hd
35
+
36
+
let depth tracker = List.length tracker.stack
37
+
38
+
let has_ancestor tracker name =
39
+
let name_lower = Astring.String.Ascii.lowercase name in
40
+
List.exists (fun ctx -> String.equal ctx.name name_lower) tracker.stack
41
+
42
+
let has_ancestor_with tracker predicate =
43
+
List.exists (fun ctx -> predicate ctx.name ctx.data) tracker.stack
44
+
45
+
let find_ancestor tracker name =
46
+
let name_lower = Astring.String.Ascii.lowercase name in
47
+
List.find_opt (fun ctx -> String.equal ctx.name name_lower) tracker.stack
48
+
49
+
let find_ancestor_with tracker predicate =
50
+
List.find_opt (fun ctx -> predicate ctx.name ctx.data) tracker.stack
51
+
52
+
let get_all_ancestors tracker = tracker.stack
53
+
54
+
let filter_ancestors tracker predicate =
55
+
List.filter (fun ctx -> predicate ctx.name ctx.data) tracker.stack
56
+
57
+
let exists = has_ancestor_with
58
+
59
+
let for_all tracker predicate =
60
+
List.for_all (fun ctx -> predicate ctx.name ctx.data) tracker.stack
61
+
62
+
let iter tracker f =
63
+
List.iter (fun ctx -> f ctx.name ctx.data) tracker.stack
64
+
65
+
let fold tracker f init =
66
+
List.fold_left (fun acc ctx -> f acc ctx.name ctx.data) init tracker.stack
67
+
68
+
let get_parent = peek
69
+
70
+
let get_parent_name tracker =
71
+
match peek tracker with
72
+
| Some ctx -> Some ctx.name
73
+
| None -> None
74
+
75
+
let get_parent_data tracker =
76
+
match peek tracker with
77
+
| Some ctx -> Some ctx.data
78
+
| None -> None
79
+
80
+
let has_any_ancestor tracker names =
81
+
let names_lower = List.map Astring.String.Ascii.lowercase names in
82
+
List.exists (fun ctx -> List.mem ctx.name names_lower) tracker.stack
83
+
84
+
let find_first_matching tracker names =
85
+
let names_lower = List.map Astring.String.Ascii.lowercase names in
86
+
List.find_map (fun ctx ->
87
+
if List.mem ctx.name names_lower then Some (ctx.name, ctx)
88
+
else None
89
+
) tracker.stack
90
+
91
+
let is_empty tracker = tracker.stack = []
92
+
93
+
let to_list = get_all_ancestors
94
+
95
+
let to_name_list tracker =
96
+
List.map (fun ctx -> ctx.name) tracker.stack
+236
lib/check/ancestor_tracker.mli
+236
lib/check/ancestor_tracker.mli
···
···
1
+
(*---------------------------------------------------------------------------
2
+
Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
3
+
SPDX-License-Identifier: MIT
4
+
---------------------------------------------------------------------------*)
5
+
6
+
(** Generic ancestor tracking for DOM traversal.
7
+
8
+
This module provides a generic stack-based ancestor tracker that can be
9
+
used by various HTML checkers to track element nesting during DOM
10
+
traversal. It is parameterized by the type of data stored per element.
11
+
12
+
{2 Design}
13
+
14
+
The tracker maintains a stack of element contexts, where each context
15
+
contains:
16
+
- The element name (string)
17
+
- Custom data of type ['data] (checker-specific)
18
+
19
+
The stack is automatically managed through [push] and [pop] operations
20
+
that should be called in response to start_element and end_element events.
21
+
22
+
{2 Usage Example}
23
+
24
+
{[
25
+
(* Define custom data type *)
26
+
type role_data = {
27
+
explicit_roles : string list;
28
+
implicit_role : string option;
29
+
}
30
+
31
+
(* Create tracker *)
32
+
let tracker = Ancestor_tracker.create ()
33
+
34
+
(* Push element context *)
35
+
let data = { explicit_roles = ["button"]; implicit_role = None } in
36
+
Ancestor_tracker.push tracker "div" data;
37
+
38
+
(* Query ancestors *)
39
+
let has_button = Ancestor_tracker.exists tracker
40
+
(fun name data -> List.mem "button" data.explicit_roles)
41
+
in
42
+
43
+
(* Pop when element closes *)
44
+
Ancestor_tracker.pop tracker
45
+
]}
46
+
*)
47
+
48
+
(** {1 Types} *)
49
+
50
+
(** Ancestor context containing element name and custom data. *)
51
+
type 'data context = {
52
+
name : string;
53
+
(** Element name (lowercase). *)
54
+
data : 'data;
55
+
(** Checker-specific data. *)
56
+
}
57
+
58
+
(** Ancestor tracker state. *)
59
+
type 'data t
60
+
61
+
(** {1 Creation} *)
62
+
63
+
val create : unit -> 'data t
64
+
(** [create ()] creates a new empty ancestor tracker. *)
65
+
66
+
val reset : 'data t -> unit
67
+
(** [reset tracker] clears all ancestor contexts from the tracker. *)
68
+
69
+
(** {1 Stack Operations} *)
70
+
71
+
val push : 'data t -> string -> 'data -> unit
72
+
(** [push tracker name data] pushes a new element context onto the stack.
73
+
74
+
This should be called in [start_element] event handlers.
75
+
76
+
@param tracker The ancestor tracker
77
+
@param name The element name (will be lowercased)
78
+
@param data Checker-specific data to associate with this element *)
79
+
80
+
val pop : 'data t -> unit
81
+
(** [pop tracker] pops the most recent element context from the stack.
82
+
83
+
This should be called in [end_element] event handlers.
84
+
85
+
@param tracker The ancestor tracker *)
86
+
87
+
val peek : 'data t -> 'data context option
88
+
(** [peek tracker] returns the most recent element context without removing it.
89
+
90
+
@param tracker The ancestor tracker
91
+
@return [Some context] if the stack is non-empty, [None] otherwise *)
92
+
93
+
val depth : 'data t -> int
94
+
(** [depth tracker] returns the current stack depth (number of ancestors).
95
+
96
+
@param tracker The ancestor tracker
97
+
@return The number of elements on the stack *)
98
+
99
+
(** {1 Ancestor Queries} *)
100
+
101
+
val has_ancestor : 'data t -> string -> bool
102
+
(** [has_ancestor tracker name] checks if an element with the given name
103
+
exists anywhere in the ancestor chain.
104
+
105
+
@param tracker The ancestor tracker
106
+
@param name The element name to search for (case-insensitive)
107
+
@return [true] if an ancestor with this name exists *)
108
+
109
+
val has_ancestor_with : 'data t -> (string -> 'data -> bool) -> bool
110
+
(** [has_ancestor_with tracker predicate] checks if any ancestor satisfies
111
+
the given predicate.
112
+
113
+
@param tracker The ancestor tracker
114
+
@param predicate Function that tests element name and data
115
+
@return [true] if any ancestor satisfies the predicate *)
116
+
117
+
val find_ancestor : 'data t -> string -> 'data context option
118
+
(** [find_ancestor tracker name] finds the nearest ancestor with the given name.
119
+
120
+
@param tracker The ancestor tracker
121
+
@param name The element name to search for (case-insensitive)
122
+
@return [Some context] for the nearest matching ancestor, [None] if not found *)
123
+
124
+
val find_ancestor_with : 'data t -> (string -> 'data -> bool) -> 'data context option
125
+
(** [find_ancestor_with tracker predicate] finds the nearest ancestor that
126
+
satisfies the predicate.
127
+
128
+
@param tracker The ancestor tracker
129
+
@param predicate Function that tests element name and data
130
+
@return [Some context] for the nearest matching ancestor, [None] if not found *)
131
+
132
+
val get_all_ancestors : 'data t -> 'data context list
133
+
(** [get_all_ancestors tracker] returns all ancestor contexts from nearest to root.
134
+
135
+
@param tracker The ancestor tracker
136
+
@return List of contexts, with the most recent first *)
137
+
138
+
val filter_ancestors : 'data t -> (string -> 'data -> bool) -> 'data context list
139
+
(** [filter_ancestors tracker predicate] returns all ancestors that satisfy
140
+
the predicate.
141
+
142
+
@param tracker The ancestor tracker
143
+
@param predicate Function that tests element name and data
144
+
@return List of matching contexts, from nearest to root *)
145
+
146
+
val exists : 'data t -> (string -> 'data -> bool) -> bool
147
+
(** [exists tracker predicate] checks if any ancestor satisfies the predicate.
148
+
149
+
This is an alias for {!has_ancestor_with}.
150
+
151
+
@param tracker The ancestor tracker
152
+
@param predicate Function that tests element name and data
153
+
@return [true] if any ancestor satisfies the predicate *)
154
+
155
+
val for_all : 'data t -> (string -> 'data -> bool) -> bool
156
+
(** [for_all tracker predicate] checks if all ancestors satisfy the predicate.
157
+
158
+
@param tracker The ancestor tracker
159
+
@param predicate Function that tests element name and data
160
+
@return [true] if all ancestors satisfy the predicate (vacuously true for empty stack) *)
161
+
162
+
val iter : 'data t -> (string -> 'data -> unit) -> unit
163
+
(** [iter tracker f] applies function [f] to each ancestor from nearest to root.
164
+
165
+
@param tracker The ancestor tracker
166
+
@param f Function to apply to each ancestor *)
167
+
168
+
val fold : 'data t -> ('acc -> string -> 'data -> 'acc) -> 'acc -> 'acc
169
+
(** [fold tracker f init] folds over ancestors from nearest to root.
170
+
171
+
@param tracker The ancestor tracker
172
+
@param f Folding function
173
+
@param init Initial accumulator value
174
+
@return Final accumulator value *)
175
+
176
+
(** {1 Parent Access} *)
177
+
178
+
val get_parent : 'data t -> 'data context option
179
+
(** [get_parent tracker] returns the immediate parent element context.
180
+
181
+
This is equivalent to {!peek}.
182
+
183
+
@param tracker The ancestor tracker
184
+
@return [Some context] for the parent, [None] if at root *)
185
+
186
+
val get_parent_name : 'data t -> string option
187
+
(** [get_parent_name tracker] returns the immediate parent element name.
188
+
189
+
@param tracker The ancestor tracker
190
+
@return [Some name] for the parent, [None] if at root *)
191
+
192
+
val get_parent_data : 'data t -> 'data option
193
+
(** [get_parent_data tracker] returns the immediate parent's custom data.
194
+
195
+
@param tracker The ancestor tracker
196
+
@return [Some data] for the parent, [None] if at root *)
197
+
198
+
(** {1 Multiple Ancestor Queries} *)
199
+
200
+
val has_any_ancestor : 'data t -> string list -> bool
201
+
(** [has_any_ancestor tracker names] checks if any of the given element names
202
+
exists in the ancestor chain.
203
+
204
+
@param tracker The ancestor tracker
205
+
@param names List of element names to search for
206
+
@return [true] if any name matches an ancestor *)
207
+
208
+
val find_first_matching : 'data t -> string list -> (string * 'data context) option
209
+
(** [find_first_matching tracker names] finds the nearest ancestor that matches
210
+
any of the given names.
211
+
212
+
@param tracker The ancestor tracker
213
+
@param names List of element names to search for
214
+
@return [Some (matched_name, context)] for the first match, [None] if no match *)
215
+
216
+
(** {1 Stack Inspection} *)
217
+
218
+
val is_empty : 'data t -> bool
219
+
(** [is_empty tracker] checks if the stack is empty (at document root).
220
+
221
+
@param tracker The ancestor tracker
222
+
@return [true] if no elements are on the stack *)
223
+
224
+
val to_list : 'data t -> 'data context list
225
+
(** [to_list tracker] converts the stack to a list of contexts.
226
+
227
+
This is an alias for {!get_all_ancestors}.
228
+
229
+
@param tracker The ancestor tracker
230
+
@return List of contexts from nearest to root *)
231
+
232
+
val to_name_list : 'data t -> string list
233
+
(** [to_name_list tracker] returns just the element names in the stack.
234
+
235
+
@param tracker The ancestor tracker
236
+
@return List of element names from nearest to root *)
+2
-2
lib/check/attr_utils.ml
+2
-2
lib/check/attr_utils.ml
···
3
type attrs = (string * string) list
4
5
let has_attr name attrs =
6
-
List.exists (fun (n, _) -> String.lowercase_ascii n = name) attrs
7
8
let get_attr name attrs =
9
List.find_map (fun (n, v) ->
10
-
if String.lowercase_ascii n = name then Some v else None
11
) attrs
12
13
let get_attr_or name ~default attrs =
···
3
type attrs = (string * string) list
4
5
let has_attr name attrs =
6
+
List.exists (fun (n, _) -> Astring.String.Ascii.lowercase n = name) attrs
7
8
let get_attr name attrs =
9
List.find_map (fun (n, v) ->
10
+
if Astring.String.Ascii.lowercase n = name then Some v else None
11
) attrs
12
13
let get_attr_or name ~default attrs =
+1
lib/check/checker_registry.ml
+1
lib/check/checker_registry.ml
···
36
Hashtbl.replace reg "xhtml-content" Xhtml_content_checker.checker;
37
Hashtbl.replace reg "lang-detecting" Lang_detecting_checker.checker;
38
Hashtbl.replace reg "unknown-element" Unknown_element_checker.checker;
39
reg
40
41
let register registry name checker = Hashtbl.replace registry name checker
···
36
Hashtbl.replace reg "xhtml-content" Xhtml_content_checker.checker;
37
Hashtbl.replace reg "lang-detecting" Lang_detecting_checker.checker;
38
Hashtbl.replace reg "unknown-element" Unknown_element_checker.checker;
39
+
Hashtbl.replace reg "content" Content_checker.checker;
40
reg
41
42
let register registry name checker = Hashtbl.replace registry name checker
+64
-7
lib/check/content_model/content_checker.ml
+64
-7
lib/check/content_model/content_checker.ml
···
30
| Some spec ->
31
List.exists (fun cat -> Element_spec.has_category spec cat) cats)
32
| Content_model.Elements names ->
33
-
List.mem (String.lowercase_ascii element_name)
34
-
(List.map String.lowercase_ascii names)
35
| Content_model.Mixed cats -> (
36
match Element_registry.get registry element_name with
37
| None -> false
···
79
(`Element (`Not_allowed_as_child (`Child name, `Parent prohibited))))
80
spec.Element_spec.prohibited_ancestors
81
82
(* Validate that a child element is allowed *)
83
-
let validate_child_element state child_name collector =
84
match state.ancestor_stack with
85
| [] ->
86
(* Root level - only html allowed *)
87
-
if not (String.equal (String.lowercase_ascii child_name) "html") then
88
Message_collector.add_typed collector
89
(`Generic (Printf.sprintf "Element '%s' not allowed at document root (only 'html' allowed)" child_name))
90
| parent :: _ ->
91
let content_model = parent.spec.Element_spec.content_model in
92
-
if not (matches_content_model state.registry child_name content_model) then
93
Message_collector.add_typed collector
94
(`Element (`Not_allowed_as_child (`Child child_name, `Parent parent.name)))
95
96
let start_element state ~element collector =
97
let name = Tag.tag_to_string element.Element.tag in
98
99
(* Check if we're inside a foreign (SVG/MathML) context *)
100
let in_foreign_context = match state.ancestor_stack with
···
127
match spec_opt with
128
| None ->
129
(* Unknown element - first check if it's allowed in current context *)
130
-
validate_child_element state name collector
131
| Some spec ->
132
(* Check prohibited ancestors *)
133
check_prohibited_ancestors state name spec collector;
134
135
(* Validate this element is allowed as child of parent *)
136
-
validate_child_element state name collector;
137
138
(* Push element context onto stack *)
139
let context = { name; spec; children_count = 0; is_foreign = false } in
···
30
| Some spec ->
31
List.exists (fun cat -> Element_spec.has_category spec cat) cats)
32
| Content_model.Elements names ->
33
+
List.mem (Astring.String.Ascii.lowercase element_name)
34
+
(List.map Astring.String.Ascii.lowercase names)
35
| Content_model.Mixed cats -> (
36
match Element_registry.get registry element_name with
37
| None -> false
···
79
(`Element (`Not_allowed_as_child (`Child name, `Parent prohibited))))
80
spec.Element_spec.prohibited_ancestors
81
82
+
(* Check if element is allowed via permitted_parents *)
83
+
let is_permitted_parent registry child_name parent_name =
84
+
match Element_registry.get registry child_name with
85
+
| None -> false
86
+
| Some spec ->
87
+
match spec.Element_spec.permitted_parents with
88
+
| None -> false
89
+
| Some parents ->
90
+
List.mem (Astring.String.Ascii.lowercase parent_name)
91
+
(List.map Astring.String.Ascii.lowercase parents)
92
+
93
+
(* Check if a specific element is in the ancestor stack *)
94
+
let has_ancestor state ancestor_name =
95
+
List.exists (fun ctx ->
96
+
String.equal (Astring.String.Ascii.lowercase ctx.name)
97
+
(Astring.String.Ascii.lowercase ancestor_name)
98
+
) state.ancestor_stack
99
+
100
+
(* Check if an attribute exists in raw attrs list *)
101
+
let has_raw_attr name attrs =
102
+
List.exists (fun (n, _) ->
103
+
Astring.String.Ascii.lowercase n = Astring.String.Ascii.lowercase name
104
+
) attrs
105
+
106
+
(* Special cases for content model validation:
107
+
- dt/dd inside div is only valid when dl is an ancestor (div as grouping in dl)
108
+
- meta with property/itemprop/name attribute in body is valid (RDFa/microdata)
109
+
- link with itemprop in body is valid (microdata) *)
110
+
let is_special_case_allowed state child_name parent_name raw_attrs =
111
+
let child_lower = Astring.String.Ascii.lowercase child_name in
112
+
let parent_lower = Astring.String.Ascii.lowercase parent_name in
113
+
(* dt/dd inside div is allowed when dl is an ancestor *)
114
+
if (child_lower = "dt" || child_lower = "dd") && parent_lower = "div" then
115
+
has_ancestor state "dl"
116
+
(* meta in body is allowed with property (RDFa), itemprop (microdata), or name+content (meta tags) *)
117
+
else if child_lower = "meta" && parent_lower <> "head" then
118
+
has_raw_attr "property" raw_attrs ||
119
+
has_raw_attr "itemprop" raw_attrs ||
120
+
(has_raw_attr "name" raw_attrs && has_raw_attr "content" raw_attrs)
121
+
(* link in body is allowed with itemprop (microdata) or property (RDFa) *)
122
+
else if child_lower = "link" && parent_lower <> "head" then
123
+
has_raw_attr "itemprop" raw_attrs || has_raw_attr "property" raw_attrs
124
+
(* Custom elements (with hyphen) are valid HTML5 and are flow content *)
125
+
else if String.contains child_lower '-' then
126
+
true
127
+
else
128
+
false
129
+
130
(* Validate that a child element is allowed *)
131
+
let validate_child_element state child_name raw_attrs collector =
132
match state.ancestor_stack with
133
| [] ->
134
(* Root level - only html allowed *)
135
+
if not (String.equal (Astring.String.Ascii.lowercase child_name) "html") then
136
Message_collector.add_typed collector
137
(`Generic (Printf.sprintf "Element '%s' not allowed at document root (only 'html' allowed)" child_name))
138
| parent :: _ ->
139
let content_model = parent.spec.Element_spec.content_model in
140
+
(* Check content model, permitted_parents, or special cases *)
141
+
let allowed_by_content_model = matches_content_model state.registry child_name content_model in
142
+
let allowed_by_permitted_parents = is_permitted_parent state.registry child_name parent.name in
143
+
let allowed_by_special_case = is_special_case_allowed state child_name parent.name raw_attrs in
144
+
if not (allowed_by_content_model || allowed_by_permitted_parents || allowed_by_special_case) then
145
Message_collector.add_typed collector
146
(`Element (`Not_allowed_as_child (`Child child_name, `Parent parent.name)))
147
148
let start_element state ~element collector =
149
let name = Tag.tag_to_string element.Element.tag in
150
+
let raw_attrs = element.Element.raw_attrs in
151
152
(* Check if we're inside a foreign (SVG/MathML) context *)
153
let in_foreign_context = match state.ancestor_stack with
···
180
match spec_opt with
181
| None ->
182
(* Unknown element - first check if it's allowed in current context *)
183
+
validate_child_element state name raw_attrs collector;
184
+
(* Push unknown element onto stack with default flow content model *)
185
+
let spec = Element_spec.make ~name ~content_model:(Content_model.Categories [Content_category.Flow]) () in
186
+
let context = { name; spec; children_count = 0; is_foreign = false } in
187
+
state.ancestor_stack <- context :: state.ancestor_stack
188
| Some spec ->
189
(* Check prohibited ancestors *)
190
check_prohibited_ancestors state name spec collector;
191
192
(* Validate this element is allowed as child of parent *)
193
+
validate_child_element state name raw_attrs collector;
194
195
(* Push element context onto stack *)
196
let context = { name; spec; children_count = 0; is_foreign = false } in
+2
-2
lib/check/content_model/element_registry.ml
+2
-2
lib/check/content_model/element_registry.ml
···
3
let create () = Hashtbl.create 128
4
5
let register registry spec =
6
-
let name = String.lowercase_ascii spec.Element_spec.name in
7
Hashtbl.replace registry name spec
8
9
let get registry name =
10
-
let name = String.lowercase_ascii name in
11
Hashtbl.find_opt registry name
12
13
let list_names registry =
···
3
let create () = Hashtbl.create 128
4
5
let register registry spec =
6
+
let name = Astring.String.Ascii.lowercase spec.Element_spec.name in
7
Hashtbl.replace registry name spec
8
9
let get registry name =
10
+
let name = Astring.String.Ascii.lowercase name in
11
Hashtbl.find_opt registry name
12
13
let list_names registry =
+3
-1
lib/check/content_model/elements_embedded.ml
+3
-1
lib/check/content_model/elements_embedded.ml
···
31
()
32
33
let img =
34
+
(* Note: img is only Interactive when it has usemap attribute;
35
+
we omit Interactive from static categories since usemap is rare *)
36
Element_spec.make ~name:"img" ~void:true
37
+
~categories:[ Flow; Phrasing; Embedded; Palpable ]
38
~content_model:Nothing
39
~attrs:
40
[
+1
-1
lib/check/content_model/elements_form.ml
+1
-1
lib/check/content_model/elements_form.ml
···
97
let select =
98
Element_spec.make ~name:"select"
99
~categories:[Flow; Phrasing; Interactive; Palpable]
100
-
~content_model:(Elements ["option"; "optgroup"; "script"; "template"])
101
~attrs:[
102
Attr_spec.make "autocomplete" ~datatype:"autocomplete" ();
103
Attr_spec.make "disabled" ~datatype:"boolean" ();
···
97
let select =
98
Element_spec.make ~name:"select"
99
~categories:[Flow; Phrasing; Interactive; Palpable]
100
+
~content_model:(Elements ["option"; "optgroup"; "hr"; "script"; "template"])
101
~attrs:[
102
Attr_spec.make "autocomplete" ~datatype:"autocomplete" ();
103
Attr_spec.make "disabled" ~datatype:"boolean" ();
-1
lib/check/content_model/elements_table.ml
-1
lib/check/content_model/elements_table.ml
+4
-8
lib/check/datatype/datatype.ml
+4
-8
lib/check/datatype/datatype.ml
···
32
| '0' .. '9' | 'a' .. 'f' | 'A' .. 'F' -> true
33
| _ -> false
34
35
-
(** Case conversion *)
36
-
37
-
let to_ascii_lowercase c =
38
-
match c with 'A' .. 'Z' -> Char.chr (Char.code c + 32) | _ -> c
39
40
-
let string_to_ascii_lowercase s =
41
-
String.map to_ascii_lowercase s
42
43
(** String predicates *)
44
···
78
let make_enum ~name ~values ?(allow_empty = true) () : t =
79
(* Pre-compute hashtable for O(1) membership *)
80
let values_tbl = Hashtbl.create (List.length values) in
81
-
List.iter (fun v -> Hashtbl.add values_tbl (String.lowercase_ascii v) ()) values;
82
let values_str = String.concat ", " (List.map (Printf.sprintf "'%s'") values) in
83
(module struct
84
let name = name
85
let validate s =
86
-
let s_lower = string_to_ascii_lowercase s in
87
if (allow_empty && s = "") || Hashtbl.mem values_tbl s_lower then Ok ()
88
else Error (Printf.sprintf "The value '%s' is not a valid %s value. Expected %s%s."
89
s name (if allow_empty then "empty string, " else "") values_str)
···
32
| '0' .. '9' | 'a' .. 'f' | 'A' .. 'F' -> true
33
| _ -> false
34
35
+
(** Case conversion - delegated to Astring *)
36
37
+
(* Removed to_ascii_lowercase and string_to_ascii_lowercase - use Astring.String.Ascii.lowercase instead *)
38
39
(** String predicates *)
40
···
74
let make_enum ~name ~values ?(allow_empty = true) () : t =
75
(* Pre-compute hashtable for O(1) membership *)
76
let values_tbl = Hashtbl.create (List.length values) in
77
+
List.iter (fun v -> Hashtbl.add values_tbl (Astring.String.Ascii.lowercase v) ()) values;
78
let values_str = String.concat ", " (List.map (Printf.sprintf "'%s'") values) in
79
(module struct
80
let name = name
81
let validate s =
82
+
let s_lower = Astring.String.Ascii.lowercase s in
83
if (allow_empty && s = "") || Hashtbl.mem values_tbl s_lower then Ok ()
84
else Error (Printf.sprintf "The value '%s' is not a valid %s value. Expected %s%s."
85
s name (if allow_empty then "empty string, " else "") values_str)
+1
-5
lib/check/datatype/datatype.mli
+1
-5
lib/check/datatype/datatype.mli
+2
-1
lib/check/datatype/dt_autocomplete.ml
+2
-1
lib/check/datatype/dt_autocomplete.ml
+2
-2
lib/check/datatype/dt_boolean.ml
+2
-2
lib/check/datatype/dt_boolean.ml
+1
-1
lib/check/datatype/dt_charset.ml
+1
-1
lib/check/datatype/dt_charset.ml
+1
-1
lib/check/datatype/dt_color.ml
+1
-1
lib/check/datatype/dt_color.ml
···
208
let name = "color"
209
210
let validate s =
211
+
let s = String.trim s |> Astring.String.Ascii.lowercase in
212
if String.length s = 0 then Error "Color value must not be empty"
213
else if List.mem s named_colors then Ok ()
214
else if String.length s > 0 && s.[0] = '#' then validate_hex_color s
+1
-1
lib/check/datatype/dt_contenteditable.ml
+1
-1
lib/check/datatype/dt_contenteditable.ml
+1
-1
lib/check/datatype/dt_crossorigin.ml
+1
-1
lib/check/datatype/dt_crossorigin.ml
+1
-1
lib/check/datatype/dt_decoding.ml
+1
-1
lib/check/datatype/dt_decoding.ml
+1
-1
lib/check/datatype/dt_dir.ml
+1
-1
lib/check/datatype/dt_dir.ml
+1
-1
lib/check/datatype/dt_draggable.ml
+1
-1
lib/check/datatype/dt_draggable.ml
+1
-1
lib/check/datatype/dt_enterkeyhint.ml
+1
-1
lib/check/datatype/dt_enterkeyhint.ml
+1
-1
lib/check/datatype/dt_fetchpriority.ml
+1
-1
lib/check/datatype/dt_fetchpriority.ml
+1
-1
lib/check/datatype/dt_form_enctype.ml
+1
-1
lib/check/datatype/dt_form_enctype.ml
+1
-1
lib/check/datatype/dt_form_method.ml
+1
-1
lib/check/datatype/dt_form_method.ml
+1
-1
lib/check/datatype/dt_input_type.ml
+1
-1
lib/check/datatype/dt_input_type.ml
+1
-1
lib/check/datatype/dt_inputmode.ml
+1
-1
lib/check/datatype/dt_inputmode.ml
+1
-1
lib/check/datatype/dt_integrity.ml
+1
-1
lib/check/datatype/dt_integrity.ml
···
49
"Hash value '%s' must be in format 'algorithm-base64hash'" trimmed)
50
| Some dash_pos ->
51
let algorithm = String.sub trimmed 0 dash_pos in
52
-
let algorithm_lower = Datatype.string_to_ascii_lowercase algorithm in
53
if not (List.mem algorithm_lower valid_algorithms) then
54
Error
55
(Printf.sprintf
···
49
"Hash value '%s' must be in format 'algorithm-base64hash'" trimmed)
50
| Some dash_pos ->
51
let algorithm = String.sub trimmed 0 dash_pos in
52
+
let algorithm_lower = Astring.String.Ascii.lowercase algorithm in
53
if not (List.mem algorithm_lower valid_algorithms) then
54
Error
55
(Printf.sprintf
+1
-1
lib/check/datatype/dt_kind.ml
+1
-1
lib/check/datatype/dt_kind.ml
+1
-1
lib/check/datatype/dt_language.ml
+1
-1
lib/check/datatype/dt_language.ml
···
5
(* Use shared character predicates from Datatype *)
6
let is_all_alpha = Datatype.is_all_alpha
7
let is_all_alphanumeric = Datatype.is_all_alphanumeric
8
-
let to_lower = Datatype.string_to_ascii_lowercase
9
10
(** Valid extlang subtags per IANA language-subtag-registry.
11
Extlangs are 3-letter subtags that follow the primary language.
···
5
(* Use shared character predicates from Datatype *)
6
let is_all_alpha = Datatype.is_all_alpha
7
let is_all_alphanumeric = Datatype.is_all_alphanumeric
8
+
let to_lower = Astring.String.Ascii.lowercase
9
10
(** Valid extlang subtags per IANA language-subtag-registry.
11
Extlangs are 3-letter subtags that follow the primary language.
+1
-1
lib/check/datatype/dt_link_type.ml
+1
-1
lib/check/datatype/dt_link_type.ml
+1
-1
lib/check/datatype/dt_list_type.ml
+1
-1
lib/check/datatype/dt_list_type.ml
+1
-1
lib/check/datatype/dt_loading.ml
+1
-1
lib/check/datatype/dt_loading.ml
+8
-8
lib/check/datatype/dt_media_query.ml
+8
-8
lib/check/datatype/dt_media_query.ml
···
147
let trimmed = String.trim s in
148
if String.length trimmed >= 3 then begin
149
let suffix = String.sub trimmed (String.length trimmed - 3) 3 in
150
-
if String.lowercase_ascii suffix = "and" then
151
Error "Parse Error."
152
else if String.length trimmed >= 4 then begin
153
let suffix4 = String.sub trimmed (String.length trimmed - 4) 4 in
154
-
if String.lowercase_ascii suffix4 = "and(" then
155
Error "Parse Error."
156
else
157
validate_media_query_content trimmed
···
197
let has_not = ref false in
198
(match read_ident () with
199
| Some w ->
200
-
let w_lower = String.lowercase_ascii w in
201
if w_lower = "only" then (has_only := true; skip_ws ())
202
else if w_lower = "not" then (has_not := true; skip_ws ())
203
else i := !i - String.length w (* put back *)
···
234
match read_ident () with
235
| None -> Error "Parse Error."
236
| Some kw ->
237
-
let kw_lower = String.lowercase_ascii kw in
238
if kw_lower <> "and" then Error "Parse Error."
239
else begin
240
(* Check that there was whitespace before 'and' *)
···
263
match read_ident () with
264
| None -> Error "Parse Error."
265
| Some kw2 ->
266
-
let kw2_lower = String.lowercase_ascii kw2 in
267
if kw2_lower <> "and" then Error "Parse Error."
268
else begin
269
skip_ws ();
···
291
match String.index_opt content ':' with
292
| None ->
293
(* Just feature name - boolean feature or range syntax *)
294
-
let feature_lower = String.lowercase_ascii content in
295
if List.mem feature_lower deprecated_media_features then
296
Error (Printf.sprintf "Deprecated media feature \"%s\". For guidance, see the Deprecated Media Features section in the current Media Queries specification." feature_lower)
297
else if List.mem feature_lower valid_media_features then
···
301
| Some colon_pos ->
302
let feature = String.trim (String.sub content 0 colon_pos) in
303
let value = String.trim (String.sub content (colon_pos + 1) (String.length content - colon_pos - 1)) in
304
-
let feature_lower = String.lowercase_ascii feature in
305
306
(* Check for deprecated features *)
307
if List.mem feature_lower deprecated_media_features then
···
362
else if unit_part = "" then
363
Error "only \"0\" can be a \"unit\". You must put a unit after your number"
364
else begin
365
-
let unit_lower = String.lowercase_ascii unit_part in
366
if List.mem unit_lower valid_length_units then Ok ()
367
else if List.mem unit_lower valid_resolution_units then
368
Error (Printf.sprintf "\"%s\" is not a \"%s\" value" value base_feature)
···
147
let trimmed = String.trim s in
148
if String.length trimmed >= 3 then begin
149
let suffix = String.sub trimmed (String.length trimmed - 3) 3 in
150
+
if Astring.String.Ascii.lowercase suffix = "and" then
151
Error "Parse Error."
152
else if String.length trimmed >= 4 then begin
153
let suffix4 = String.sub trimmed (String.length trimmed - 4) 4 in
154
+
if Astring.String.Ascii.lowercase suffix4 = "and(" then
155
Error "Parse Error."
156
else
157
validate_media_query_content trimmed
···
197
let has_not = ref false in
198
(match read_ident () with
199
| Some w ->
200
+
let w_lower = Astring.String.Ascii.lowercase w in
201
if w_lower = "only" then (has_only := true; skip_ws ())
202
else if w_lower = "not" then (has_not := true; skip_ws ())
203
else i := !i - String.length w (* put back *)
···
234
match read_ident () with
235
| None -> Error "Parse Error."
236
| Some kw ->
237
+
let kw_lower = Astring.String.Ascii.lowercase kw in
238
if kw_lower <> "and" then Error "Parse Error."
239
else begin
240
(* Check that there was whitespace before 'and' *)
···
263
match read_ident () with
264
| None -> Error "Parse Error."
265
| Some kw2 ->
266
+
let kw2_lower = Astring.String.Ascii.lowercase kw2 in
267
if kw2_lower <> "and" then Error "Parse Error."
268
else begin
269
skip_ws ();
···
291
match String.index_opt content ':' with
292
| None ->
293
(* Just feature name - boolean feature or range syntax *)
294
+
let feature_lower = Astring.String.Ascii.lowercase content in
295
if List.mem feature_lower deprecated_media_features then
296
Error (Printf.sprintf "Deprecated media feature \"%s\". For guidance, see the Deprecated Media Features section in the current Media Queries specification." feature_lower)
297
else if List.mem feature_lower valid_media_features then
···
301
| Some colon_pos ->
302
let feature = String.trim (String.sub content 0 colon_pos) in
303
let value = String.trim (String.sub content (colon_pos + 1) (String.length content - colon_pos - 1)) in
304
+
let feature_lower = Astring.String.Ascii.lowercase feature in
305
306
(* Check for deprecated features *)
307
if List.mem feature_lower deprecated_media_features then
···
362
else if unit_part = "" then
363
Error "only \"0\" can be a \"unit\". You must put a unit after your number"
364
else begin
365
+
let unit_lower = Astring.String.Ascii.lowercase unit_part in
366
if List.mem unit_lower valid_length_units then Ok ()
367
else if List.mem unit_lower valid_resolution_units then
368
Error (Printf.sprintf "\"%s\" is not a \"%s\" value" value base_feature)
+1
-1
lib/check/datatype/dt_mime.ml
+1
-1
lib/check/datatype/dt_mime.ml
···
91
if is_token_char c then parse In_subtype (i + 1)
92
else if c = ';' then
93
(* Check if this is a JavaScript MIME type *)
94
-
let mime_type = String.sub s 0 i |> String.lowercase_ascii in
95
if List.mem mime_type javascript_mime_types then
96
Error
97
"A JavaScript MIME type must not contain any characters after \
···
91
if is_token_char c then parse In_subtype (i + 1)
92
else if c = ';' then
93
(* Check if this is a JavaScript MIME type *)
94
+
let mime_type = String.sub s 0 i |> Astring.String.Ascii.lowercase in
95
if List.mem mime_type javascript_mime_types then
96
Error
97
"A JavaScript MIME type must not contain any characters after \
+1
-1
lib/check/datatype/dt_popover.ml
+1
-1
lib/check/datatype/dt_popover.ml
+1
-1
lib/check/datatype/dt_preload.ml
+1
-1
lib/check/datatype/dt_preload.ml
+1
-1
lib/check/datatype/dt_referrer.ml
+1
-1
lib/check/datatype/dt_referrer.ml
+1
-1
lib/check/datatype/dt_scope.ml
+1
-1
lib/check/datatype/dt_scope.ml
+1
-1
lib/check/datatype/dt_shape.ml
+1
-1
lib/check/datatype/dt_shape.ml
+1
-1
lib/check/datatype/dt_spellcheck.ml
+1
-1
lib/check/datatype/dt_spellcheck.ml
+1
-1
lib/check/datatype/dt_target.ml
+1
-1
lib/check/datatype/dt_target.ml
+1
-1
lib/check/datatype/dt_translate.ml
+1
-1
lib/check/datatype/dt_translate.ml
+1
-1
lib/check/datatype/dt_url.ml
+1
-1
lib/check/datatype/dt_url.ml
+1
-1
lib/check/datatype/dt_wrap.ml
+1
-1
lib/check/datatype/dt_wrap.ml
+3
-3
lib/check/element/attr.ml
+3
-3
lib/check/element/attr.ml
···
571
572
(** Parse a single attribute name-value pair to typed attribute *)
573
let parse_attr name value : t =
574
-
let name_lower = String.lowercase_ascii name in
575
-
let value_lower = String.lowercase_ascii value in
576
match name_lower with
577
(* Global attributes *)
578
| "id" -> `Id value
···
875
(** Get rel attribute as list of link types (space-separated, lowercased per HTML5 spec) *)
876
let get_rel_list attrs =
877
match get_rel attrs with
878
-
| Some s -> List.map String.lowercase_ascii (Datatype.split_on_whitespace s)
879
| None -> []
880
881
(** Get headers attribute as raw string *)
···
571
572
(** Parse a single attribute name-value pair to typed attribute *)
573
let parse_attr name value : t =
574
+
let name_lower = Astring.String.Ascii.lowercase name in
575
+
let value_lower = Astring.String.Ascii.lowercase value in
576
match name_lower with
577
(* Global attributes *)
578
| "id" -> `Id value
···
875
(** Get rel attribute as list of link types (space-separated, lowercased per HTML5 spec) *)
876
let get_rel_list attrs =
877
match get_rel attrs with
878
+
| Some s -> List.map Astring.String.Ascii.lowercase (Datatype.split_on_whitespace s)
879
| None -> []
880
881
(** Get headers attribute as raw string *)
+4
-4
lib/check/element/element.ml
+4
-4
lib/check/element/element.ml
···
21
22
(** Parse element-specific type attribute based on tag *)
23
let parse_type_attr (tag : Tag.html_tag) value : Attr.t =
24
-
let value_lower = String.lowercase_ascii value in
25
match tag with
26
| `Input ->
27
(match Attr.parse_input_type value_lower with
···
42
(** Parse attributes with element context for type attribute *)
43
let parse_attrs_for_tag (tag : Tag.element_tag) (raw_attrs : (string * string) list) : Attr.t list =
44
List.map (fun (name, value) ->
45
-
let name_lower = String.lowercase_ascii name in
46
if name_lower = "type" then
47
match tag with
48
| Tag.Html html_tag -> parse_type_attr html_tag value
···
274
(** Get raw attribute value (from original attrs) *)
275
let get_raw_attr name elem =
276
List.find_map (fun (n, v) ->
277
-
if String.lowercase_ascii n = String.lowercase_ascii name then Some v else None
278
) elem.raw_attrs
279
280
(** Check if raw attribute exists *)
281
let has_raw_attr name elem =
282
List.exists (fun (n, _) ->
283
-
String.lowercase_ascii n = String.lowercase_ascii name
284
) elem.raw_attrs
285
286
(** {1 Pattern Matching Helpers} *)
···
21
22
(** Parse element-specific type attribute based on tag *)
23
let parse_type_attr (tag : Tag.html_tag) value : Attr.t =
24
+
let value_lower = Astring.String.Ascii.lowercase value in
25
match tag with
26
| `Input ->
27
(match Attr.parse_input_type value_lower with
···
42
(** Parse attributes with element context for type attribute *)
43
let parse_attrs_for_tag (tag : Tag.element_tag) (raw_attrs : (string * string) list) : Attr.t list =
44
List.map (fun (name, value) ->
45
+
let name_lower = Astring.String.Ascii.lowercase name in
46
if name_lower = "type" then
47
match tag with
48
| Tag.Html html_tag -> parse_type_attr html_tag value
···
274
(** Get raw attribute value (from original attrs) *)
275
let get_raw_attr name elem =
276
List.find_map (fun (n, v) ->
277
+
if Astring.String.Ascii.lowercase n = Astring.String.Ascii.lowercase name then Some v else None
278
) elem.raw_attrs
279
280
(** Check if raw attribute exists *)
281
let has_raw_attr name elem =
282
List.exists (fun (n, _) ->
283
+
Astring.String.Ascii.lowercase n = Astring.String.Ascii.lowercase name
284
) elem.raw_attrs
285
286
(** {1 Pattern Matching Helpers} *)
+3
-3
lib/check/element/tag.ml
+3
-3
lib/check/element/tag.ml
···
234
(** Check if a name is a valid custom element name (contains hyphen, not reserved) *)
235
let is_custom_element_name name =
236
String.contains name '-' &&
237
-
not (String.starts_with ~prefix:"xml" (String.lowercase_ascii name)) &&
238
-
not (String.equal (String.lowercase_ascii name) "annotation-xml")
239
240
(** SVG namespace URI *)
241
let svg_namespace = "http://www.w3.org/2000/svg"
···
255
256
(** Convert tag name and optional namespace to element_tag *)
257
let tag_of_string ?namespace name =
258
-
let name_lower = String.lowercase_ascii name in
259
match namespace with
260
| Some ns when is_svg_namespace ns -> Svg name (* Preserve original case for SVG *)
261
| Some ns when is_mathml_namespace ns -> MathML name (* Preserve original case for MathML *)
···
234
(** Check if a name is a valid custom element name (contains hyphen, not reserved) *)
235
let is_custom_element_name name =
236
String.contains name '-' &&
237
+
not (String.starts_with ~prefix:"xml" (Astring.String.Ascii.lowercase name)) &&
238
+
not (String.equal (Astring.String.Ascii.lowercase name) "annotation-xml")
239
240
(** SVG namespace URI *)
241
let svg_namespace = "http://www.w3.org/2000/svg"
···
255
256
(** Convert tag name and optional namespace to element_tag *)
257
let tag_of_string ?namespace name =
258
+
let name_lower = Astring.String.Ascii.lowercase name in
259
match namespace with
260
| Some ns when is_svg_namespace ns -> Svg name (* Preserve original case for SVG *)
261
| Some ns when is_mathml_namespace ns -> MathML name (* Preserve original case for MathML *)
+1
-1
lib/check/semantic/form_checker.ml
+1
-1
lib/check/semantic/form_checker.ml
+2
-2
lib/check/semantic/lang_detecting_checker.ml
+2
-2
lib/check/semantic/lang_detecting_checker.ml
···
54
let get_lang_code lang =
55
(* Extract primary language subtag *)
56
match String.split_on_char '-' lang with
57
-
| code :: _ -> String.lowercase_ascii code
58
| [] -> ""
59
60
(* Create detector lazily with deterministic seed *)
···
324
| None ->
325
Message_collector.add_typed collector
326
(`I18n (`Missing_dir_rtl (`Language detected_name)))
327
-
| Some dir when String.lowercase_ascii dir <> "rtl" ->
328
Message_collector.add_typed collector
329
(`I18n (`Wrong_dir (`Language detected_name, `Declared dir)))
330
| _ -> ()
···
54
let get_lang_code lang =
55
(* Extract primary language subtag *)
56
match String.split_on_char '-' lang with
57
+
| code :: _ -> Astring.String.Ascii.lowercase code
58
| [] -> ""
59
60
(* Create detector lazily with deterministic seed *)
···
324
| None ->
325
Message_collector.add_typed collector
326
(`I18n (`Missing_dir_rtl (`Language detected_name)))
327
+
| Some dir when Astring.String.Ascii.lowercase dir <> "rtl" ->
328
Message_collector.add_typed collector
329
(`I18n (`Wrong_dir (`Language detected_name, `Declared dir)))
330
| _ -> ()
+11
-13
lib/check/semantic/nesting_checker.ml
+11
-13
lib/check/semantic/nesting_checker.ml
···
190
state.ancestor_flags <- empty_flags ()
191
192
(** Get attribute value by name from attribute list. *)
193
-
let get_attr attrs name =
194
-
List.assoc_opt name attrs
195
196
(** Check if an attribute exists. *)
197
-
let has_attr attrs name =
198
-
get_attr attrs name <> None
199
200
(** Check if element is interactive based on its attributes. *)
201
let is_interactive_element name attrs =
202
match name with
203
-
| "a" -> has_attr attrs "href"
204
-
| "audio" | "video" -> has_attr attrs "controls"
205
-
| "img" | "object" -> has_attr attrs "usemap"
206
| "input" ->
207
-
(match get_attr attrs "type" with
208
| Some "hidden" -> false
209
| _ -> true)
210
| "button" | "details" | "embed" | "iframe" | "label" | "select"
···
239
(* Determine attribute to mention in error messages *)
240
let attr =
241
match name with
242
-
| "a" when has_attr attrs "href" -> Some "href"
243
-
| "audio" when has_attr attrs "controls" -> Some "controls"
244
-
| "video" when has_attr attrs "controls" -> Some "controls"
245
-
| "img" when has_attr attrs "usemap" -> Some "usemap"
246
-
| "object" when has_attr attrs "usemap" -> Some "usemap"
247
| _ -> None
248
in
249
···
190
state.ancestor_flags <- empty_flags ()
191
192
(** Get attribute value by name from attribute list. *)
193
+
let get_attr = Attr_utils.get_attr
194
195
(** Check if an attribute exists. *)
196
+
let has_attr = Attr_utils.has_attr
197
198
(** Check if element is interactive based on its attributes. *)
199
let is_interactive_element name attrs =
200
match name with
201
+
| "a" -> has_attr "href" attrs
202
+
| "audio" | "video" -> has_attr "controls" attrs
203
+
| "img" | "object" -> has_attr "usemap" attrs
204
| "input" ->
205
+
(match get_attr "type" attrs with
206
| Some "hidden" -> false
207
| _ -> true)
208
| "button" | "details" | "embed" | "iframe" | "label" | "select"
···
237
(* Determine attribute to mention in error messages *)
238
let attr =
239
match name with
240
+
| "a" when has_attr "href" attrs -> Some "href"
241
+
| "audio" when has_attr "controls" attrs -> Some "controls"
242
+
| "video" when has_attr "controls" attrs -> Some "controls"
243
+
| "img" when has_attr "usemap" attrs -> Some "usemap"
244
+
| "object" when has_attr "usemap" attrs -> Some "usemap"
245
| _ -> None
246
in
247
+2
-2
lib/check/semantic/obsolete_checker.ml
+2
-2
lib/check/semantic/obsolete_checker.ml
···
260
match element.Element.tag with
261
| Tag.Html _ ->
262
let name = Tag.tag_to_string element.tag in
263
-
let name_lower = String.lowercase_ascii name in
264
let attrs = element.raw_attrs in
265
266
(* Track head context *)
···
275
276
(* Check for obsolete attributes *)
277
List.iter (fun (attr_name, _attr_value) ->
278
-
let attr_lower = String.lowercase_ascii attr_name in
279
280
(* Special handling for scoped attribute on style *)
281
if attr_lower = "scoped" && name_lower = "style" then begin
···
260
match element.Element.tag with
261
| Tag.Html _ ->
262
let name = Tag.tag_to_string element.tag in
263
+
let name_lower = Astring.String.Ascii.lowercase name in
264
let attrs = element.raw_attrs in
265
266
(* Track head context *)
···
275
276
(* Check for obsolete attributes *)
277
List.iter (fun (attr_name, _attr_value) ->
278
+
let attr_lower = Astring.String.Ascii.lowercase attr_name in
279
280
(* Special handling for scoped attribute on style *)
281
if attr_lower = "scoped" && name_lower = "style" then begin
+1
-1
lib/check/semantic/required_attr_checker.ml
+1
-1
lib/check/semantic/required_attr_checker.ml
···
120
(* popover attribute must have valid value *)
121
match Attr_utils.get_attr "popover" attrs with
122
| Some value ->
123
-
let value_lower = String.lowercase_ascii value in
124
(* Valid values: empty string, auto, manual, hint *)
125
if value_lower <> "" && value_lower <> "auto" && value_lower <> "manual" && value_lower <> "hint" then
126
Message_collector.add_typed collector
···
120
(* popover attribute must have valid value *)
121
match Attr_utils.get_attr "popover" attrs with
122
| Some value ->
123
+
let value_lower = Astring.String.Ascii.lowercase value in
124
(* Valid values: empty string, auto, manual, hint *)
125
if value_lower <> "" && value_lower <> "auto" && value_lower <> "manual" && value_lower <> "hint" then
126
Message_collector.add_typed collector
+34
-34
lib/check/specialized/aria_checker.ml
+34
-34
lib/check/specialized/aria_checker.ml
···
309
else
310
String.split_on_char ' ' trimmed
311
|> List.filter (fun s -> String.trim s <> "")
312
-
|> List.map String.lowercase_ascii
313
314
(** Get the implicit role for an HTML element. *)
315
let get_implicit_role element_name attrs =
316
(* Check for input element with type attribute *)
317
if element_name = "input" then begin
318
-
match List.assoc_opt "type" attrs with
319
| Some input_type ->
320
-
let input_type = String.lowercase_ascii input_type in
321
begin match Hashtbl.find_opt input_types_with_implicit_role input_type with
322
| Some role -> Some role
323
| None ->
···
332
end
333
(* Check for area element - implicit role depends on href attribute *)
334
else if element_name = "area" then begin
335
-
match List.assoc_opt "href" attrs with
336
| Some _ -> Some "link" (* area with href has implicit role "link" *)
337
| None -> Some "generic" (* area without href has no corresponding role, treated as generic *)
338
end
339
(* Check for a element - implicit role depends on href attribute *)
340
else if element_name = "a" then begin
341
-
match List.assoc_opt "href" attrs with
342
| Some _ -> Some "link" (* a with href has implicit role "link" *)
343
| None -> Some "generic" (* a without href has no corresponding role, treated as generic *)
344
end
···
430
match element.Element.tag with
431
| Tag.Html _ ->
432
let name = Tag.tag_to_string element.tag in
433
-
let name_lower = String.lowercase_ascii name in
434
let attrs = element.raw_attrs in
435
-
let role_attr = List.assoc_opt "role" attrs in
436
-
let aria_label = List.assoc_opt "aria-label" attrs in
437
-
let aria_labelledby = List.assoc_opt "aria-labelledby" attrs in
438
-
let aria_braillelabel = List.assoc_opt "aria-braillelabel" attrs in
439
let has_aria_label = match aria_label with Some v -> String.trim v <> "" | None -> false in
440
let has_aria_labelledby = match aria_labelledby with Some v -> String.trim v <> "" | None -> false in
441
let has_aria_braillelabel = match aria_braillelabel with Some v -> String.trim v <> "" | None -> false in
···
459
460
(* Track active tabs and tabpanel roles for end_document validation *)
461
if List.mem "tab" explicit_roles then begin
462
-
let aria_selected = List.assoc_opt "aria-selected" attrs in
463
if aria_selected = Some "true" then state.has_active_tab <- true
464
end;
465
if List.mem "tabpanel" explicit_roles then state.has_tabpanel <- true;
466
467
(* Track visible main elements (explicit role=main or implicit main role) *)
468
let is_hidden =
469
-
let aria_hidden = List.assoc_opt "aria-hidden" attrs in
470
aria_hidden = Some "true"
471
in
472
if not is_hidden then begin
···
489
(* Check br/wbr aria-* attribute restrictions - not allowed *)
490
if name_lower = "br" || name_lower = "wbr" then begin
491
List.iter (fun (attr_name, _) ->
492
-
let attr_lower = String.lowercase_ascii attr_name in
493
if String.starts_with ~prefix:"aria-" attr_lower &&
494
attr_lower <> "aria-hidden" then
495
Message_collector.add_typed collector
···
515
516
(* Check for img with empty alt having role attribute *)
517
if name_lower = "img" then begin
518
-
let alt_value = List.assoc_opt "alt" attrs in
519
match alt_value with
520
| Some alt when String.trim alt = "" ->
521
(* img with empty alt must not have role attribute *)
···
526
527
(* Check for input[type=checkbox][role=button] requires aria-pressed *)
528
if name_lower = "input" then begin
529
-
let input_type = match List.assoc_opt "type" attrs with
530
-
| Some t -> String.lowercase_ascii t
531
| None -> "text"
532
in
533
if input_type = "checkbox" && List.mem "button" explicit_roles then begin
534
-
let has_aria_pressed = List.assoc_opt "aria-pressed" attrs <> None in
535
if not has_aria_pressed then
536
Message_collector.add_typed collector (`Input `Checkbox_needs_aria_pressed)
537
end
···
566
567
(* Check for aria-hidden="true" on body element *)
568
if name_lower = "body" then begin
569
-
let aria_hidden = List.assoc_opt "aria-hidden" attrs in
570
match aria_hidden with
571
| Some "true" ->
572
Message_collector.add_typed collector (`Aria `Hidden_on_body)
···
574
end;
575
576
(* Check for aria-checked on input[type=checkbox] *)
577
-
let aria_checked = List.assoc_opt "aria-checked" attrs in
578
if name_lower = "input" then begin
579
-
match List.assoc_opt "type" attrs with
580
-
| Some input_type when String.lowercase_ascii input_type = "checkbox" ->
581
if aria_checked <> None then
582
Message_collector.add_typed collector
583
(`Aria (`Must_not_use (`Attr "aria-checked", `Elem "input",
···
586
end;
587
588
(* Check for aria-expanded on roles that don't support it *)
589
-
let aria_expanded = List.assoc_opt "aria-expanded" attrs in
590
if aria_expanded <> None then begin
591
let role_to_check = match explicit_roles with
592
| first :: _ -> Some first
···
605
(* Special message for input[type=text] with role="textbox" *)
606
let reason =
607
if name_lower = "input" && first_role = "textbox" then begin
608
-
let has_list = List.exists (fun (k, _) -> String.lowercase_ascii k = "list") attrs in
609
-
let input_type = match List.assoc_opt "type" attrs with
610
-
| Some t -> String.lowercase_ascii t
611
| None -> "text"
612
in
613
if not has_list && input_type = "text" then
···
671
672
(* Check for redundant default ARIA attribute values *)
673
List.iter (fun (attr_name, attr_value) ->
674
-
let attr_lower = String.lowercase_ascii attr_name in
675
if String.starts_with ~prefix:"aria-" attr_lower then
676
match Hashtbl.find_opt aria_default_values attr_lower with
677
| Some default_value ->
678
-
let value_lower = String.lowercase_ascii (String.trim attr_value) in
679
if value_lower = default_value then
680
Message_collector.add_typed collector
681
(`Generic (Printf.sprintf
···
688
if name_lower = "summary" then begin
689
let parent = get_parent_element state in
690
let is_in_details = parent = Some "details" in
691
-
let has_role_attr = List.exists (fun (k, _) -> String.lowercase_ascii k = "role") attrs in
692
-
let has_aria_expanded = List.assoc_opt "aria-expanded" attrs <> None in
693
-
let has_aria_pressed = List.assoc_opt "aria-pressed" attrs <> None in
694
if is_in_details then begin
695
(* summary that is the first child of details *)
696
if has_role_attr then
···
726
(* Custom elements (autonomous custom elements) have generic role by default
727
and cannot have accessible names unless they have an explicit role *)
728
let attrs = element.raw_attrs in
729
-
let role_attr = List.assoc_opt "role" attrs in
730
-
let aria_label = List.assoc_opt "aria-label" attrs in
731
-
let aria_labelledby = List.assoc_opt "aria-labelledby" attrs in
732
-
let aria_braillelabel = List.assoc_opt "aria-braillelabel" attrs in
733
let has_aria_label = match aria_label with Some v -> String.trim v <> "" | None -> false in
734
let has_aria_labelledby = match aria_labelledby with Some v -> String.trim v <> "" | None -> false in
735
let has_aria_braillelabel = match aria_braillelabel with Some v -> String.trim v <> "" | None -> false in
···
309
else
310
String.split_on_char ' ' trimmed
311
|> List.filter (fun s -> String.trim s <> "")
312
+
|> List.map Astring.String.Ascii.lowercase
313
314
(** Get the implicit role for an HTML element. *)
315
let get_implicit_role element_name attrs =
316
(* Check for input element with type attribute *)
317
if element_name = "input" then begin
318
+
match Attr_utils.get_attr "type" attrs with
319
| Some input_type ->
320
+
let input_type = Astring.String.Ascii.lowercase input_type in
321
begin match Hashtbl.find_opt input_types_with_implicit_role input_type with
322
| Some role -> Some role
323
| None ->
···
332
end
333
(* Check for area element - implicit role depends on href attribute *)
334
else if element_name = "area" then begin
335
+
match Attr_utils.get_attr "href" attrs with
336
| Some _ -> Some "link" (* area with href has implicit role "link" *)
337
| None -> Some "generic" (* area without href has no corresponding role, treated as generic *)
338
end
339
(* Check for a element - implicit role depends on href attribute *)
340
else if element_name = "a" then begin
341
+
match Attr_utils.get_attr "href" attrs with
342
| Some _ -> Some "link" (* a with href has implicit role "link" *)
343
| None -> Some "generic" (* a without href has no corresponding role, treated as generic *)
344
end
···
430
match element.Element.tag with
431
| Tag.Html _ ->
432
let name = Tag.tag_to_string element.tag in
433
+
let name_lower = Astring.String.Ascii.lowercase name in
434
let attrs = element.raw_attrs in
435
+
let role_attr = Attr_utils.get_attr "role" attrs in
436
+
let aria_label = Attr_utils.get_attr "aria-label" attrs in
437
+
let aria_labelledby = Attr_utils.get_attr "aria-labelledby" attrs in
438
+
let aria_braillelabel = Attr_utils.get_attr "aria-braillelabel" attrs in
439
let has_aria_label = match aria_label with Some v -> String.trim v <> "" | None -> false in
440
let has_aria_labelledby = match aria_labelledby with Some v -> String.trim v <> "" | None -> false in
441
let has_aria_braillelabel = match aria_braillelabel with Some v -> String.trim v <> "" | None -> false in
···
459
460
(* Track active tabs and tabpanel roles for end_document validation *)
461
if List.mem "tab" explicit_roles then begin
462
+
let aria_selected = Attr_utils.get_attr "aria-selected" attrs in
463
if aria_selected = Some "true" then state.has_active_tab <- true
464
end;
465
if List.mem "tabpanel" explicit_roles then state.has_tabpanel <- true;
466
467
(* Track visible main elements (explicit role=main or implicit main role) *)
468
let is_hidden =
469
+
let aria_hidden = Attr_utils.get_attr "aria-hidden" attrs in
470
aria_hidden = Some "true"
471
in
472
if not is_hidden then begin
···
489
(* Check br/wbr aria-* attribute restrictions - not allowed *)
490
if name_lower = "br" || name_lower = "wbr" then begin
491
List.iter (fun (attr_name, _) ->
492
+
let attr_lower = Astring.String.Ascii.lowercase attr_name in
493
if String.starts_with ~prefix:"aria-" attr_lower &&
494
attr_lower <> "aria-hidden" then
495
Message_collector.add_typed collector
···
515
516
(* Check for img with empty alt having role attribute *)
517
if name_lower = "img" then begin
518
+
let alt_value = Attr_utils.get_attr "alt" attrs in
519
match alt_value with
520
| Some alt when String.trim alt = "" ->
521
(* img with empty alt must not have role attribute *)
···
526
527
(* Check for input[type=checkbox][role=button] requires aria-pressed *)
528
if name_lower = "input" then begin
529
+
let input_type = match Attr_utils.get_attr "type" attrs with
530
+
| Some t -> Astring.String.Ascii.lowercase t
531
| None -> "text"
532
in
533
if input_type = "checkbox" && List.mem "button" explicit_roles then begin
534
+
let has_aria_pressed = Attr_utils.has_attr "aria-pressed" attrs in
535
if not has_aria_pressed then
536
Message_collector.add_typed collector (`Input `Checkbox_needs_aria_pressed)
537
end
···
566
567
(* Check for aria-hidden="true" on body element *)
568
if name_lower = "body" then begin
569
+
let aria_hidden = Attr_utils.get_attr "aria-hidden" attrs in
570
match aria_hidden with
571
| Some "true" ->
572
Message_collector.add_typed collector (`Aria `Hidden_on_body)
···
574
end;
575
576
(* Check for aria-checked on input[type=checkbox] *)
577
+
let aria_checked = Attr_utils.get_attr "aria-checked" attrs in
578
if name_lower = "input" then begin
579
+
match Attr_utils.get_attr "type" attrs with
580
+
| Some input_type when Astring.String.Ascii.lowercase input_type = "checkbox" ->
581
if aria_checked <> None then
582
Message_collector.add_typed collector
583
(`Aria (`Must_not_use (`Attr "aria-checked", `Elem "input",
···
586
end;
587
588
(* Check for aria-expanded on roles that don't support it *)
589
+
let aria_expanded = Attr_utils.get_attr "aria-expanded" attrs in
590
if aria_expanded <> None then begin
591
let role_to_check = match explicit_roles with
592
| first :: _ -> Some first
···
605
(* Special message for input[type=text] with role="textbox" *)
606
let reason =
607
if name_lower = "input" && first_role = "textbox" then begin
608
+
let has_list = Attr_utils.has_attr "list" attrs in
609
+
let input_type = match Attr_utils.get_attr "type" attrs with
610
+
| Some t -> Astring.String.Ascii.lowercase t
611
| None -> "text"
612
in
613
if not has_list && input_type = "text" then
···
671
672
(* Check for redundant default ARIA attribute values *)
673
List.iter (fun (attr_name, attr_value) ->
674
+
let attr_lower = Astring.String.Ascii.lowercase attr_name in
675
if String.starts_with ~prefix:"aria-" attr_lower then
676
match Hashtbl.find_opt aria_default_values attr_lower with
677
| Some default_value ->
678
+
let value_lower = Astring.String.Ascii.lowercase (String.trim attr_value) in
679
if value_lower = default_value then
680
Message_collector.add_typed collector
681
(`Generic (Printf.sprintf
···
688
if name_lower = "summary" then begin
689
let parent = get_parent_element state in
690
let is_in_details = parent = Some "details" in
691
+
let has_role_attr = Attr_utils.has_attr "role" attrs in
692
+
let has_aria_expanded = Attr_utils.has_attr "aria-expanded" attrs in
693
+
let has_aria_pressed = Attr_utils.has_attr "aria-pressed" attrs in
694
if is_in_details then begin
695
(* summary that is the first child of details *)
696
if has_role_attr then
···
726
(* Custom elements (autonomous custom elements) have generic role by default
727
and cannot have accessible names unless they have an explicit role *)
728
let attrs = element.raw_attrs in
729
+
let role_attr = Attr_utils.get_attr "role" attrs in
730
+
let aria_label = Attr_utils.get_attr "aria-label" attrs in
731
+
let aria_labelledby = Attr_utils.get_attr "aria-labelledby" attrs in
732
+
let aria_braillelabel = Attr_utils.get_attr "aria-braillelabel" attrs in
733
let has_aria_label = match aria_label with Some v -> String.trim v <> "" | None -> false in
734
let has_aria_labelledby = match aria_labelledby with Some v -> String.trim v <> "" | None -> false in
735
let has_aria_braillelabel = match aria_braillelabel with Some v -> String.trim v <> "" | None -> false in
+21
-21
lib/check/specialized/attr_restrictions_checker.ml
+21
-21
lib/check/specialized/attr_restrictions_checker.ml
···
58
match element.Element.tag with
59
| Tag.Html _ ->
60
let name = Tag.tag_to_string element.tag in
61
-
let name_lower = String.lowercase_ascii name in
62
let attrs = element.raw_attrs in
63
64
(* Detect XHTML mode from xmlns attribute on html element *)
···
86
(* Check for xmlns:* prefixed attributes - not allowed in HTML *)
87
(* Standard xmlns declarations are allowed but custom prefixes are not *)
88
List.iter (fun (attr_name, _) ->
89
-
let attr_lower = String.lowercase_ascii attr_name in
90
if String.starts_with ~prefix:"xmlns:" attr_lower then begin
91
let prefix = String.sub attr_lower 6 (String.length attr_lower - 6) in
92
(* Only xmlns:xlink (with correct value) and xmlns:xml are allowed *)
···
113
(* Validate style type attribute - must be "text/css" or omitted *)
114
if name_lower = "style" then begin
115
List.iter (fun (attr_name, attr_value) ->
116
-
let attr_lower = String.lowercase_ascii attr_name in
117
if attr_lower = "type" then begin
118
-
let value_lower = String.lowercase_ascii (String.trim attr_value) in
119
if value_lower <> "text/css" then
120
Message_collector.add_typed collector (`Misc `Style_type_invalid)
121
end
···
144
(* imagesrcset requires as="image" *)
145
if has_imagesrcset then begin
146
let as_is_image = match as_value with
147
-
| Some v -> String.lowercase_ascii (String.trim v) = "image"
148
| None -> false
149
in
150
if not as_is_image then
···
164
(* Validate img usemap attribute - must be hash-name reference with content *)
165
if name_lower = "img" then begin
166
List.iter (fun (attr_name, attr_value) ->
167
-
let attr_lower = String.lowercase_ascii attr_name in
168
if attr_lower = "usemap" then begin
169
if attr_value = "#" then
170
Message_collector.add_typed collector
···
178
(* Validate embed type attribute - must be valid MIME type *)
179
if name_lower = "embed" then begin
180
List.iter (fun (attr_name, attr_value) ->
181
-
let attr_lower = String.lowercase_ascii attr_name in
182
if attr_lower = "type" then begin
183
match Dt_mime.validate_mime_type attr_value with
184
| Ok () -> ()
···
197
name_lower = "iframe" || name_lower = "source" in
198
if is_dimension_element then begin
199
List.iter (fun (attr_name, attr_value) ->
200
-
let attr_lower = String.lowercase_ascii attr_name in
201
if attr_lower = "width" || attr_lower = "height" then begin
202
(* Check for non-negative integer only *)
203
let is_valid =
···
245
(* Validate area[shape=default] cannot have coords *)
246
if name_lower = "area" then begin
247
match Attr_utils.get_attr "shape" attrs with
248
-
| Some s when String.lowercase_ascii (String.trim s) = "default" ->
249
if Attr_utils.has_attr "coords" attrs then
250
Message_collector.add_typed collector
251
(`Attr (`Not_allowed (`Attr "coords", `Elem "area")))
···
257
match Attr_utils.get_attr "dir" attrs with
258
| None ->
259
Message_collector.add_typed collector (`Misc `Bdo_missing_dir)
260
-
| Some v when String.lowercase_ascii (String.trim v) = "auto" ->
261
Message_collector.add_typed collector (`Misc `Bdo_dir_auto)
262
| _ -> ()
263
end;
···
266
if name_lower = "input" then begin
267
if Attr_utils.has_attr "list" attrs then begin
268
let input_type = Attr_utils.get_attr_or "type" ~default:"text" attrs
269
-
|> String.trim |> String.lowercase_ascii in
270
if not (List.mem input_type input_types_allowing_list) then
271
Message_collector.add_typed collector (`Input `List_not_allowed)
272
end
···
274
275
(* Validate data-* attributes *)
276
List.iter (fun (attr_name, _) ->
277
-
let attr_lower = String.lowercase_ascii attr_name in
278
(* Check if it starts with "data-" *)
279
if String.starts_with ~prefix:"data-" attr_lower then begin
280
let after_prefix = String.sub attr_lower 5 (String.length attr_lower - 5) in
···
297
(match lang_value with
298
| None ->
299
Message_collector.add_typed collector (`I18n `Xml_lang_without_lang)
300
-
| Some lang when String.lowercase_ascii lang <> String.lowercase_ascii xmllang ->
301
Message_collector.add_typed collector (`I18n `Xml_lang_without_lang)
302
| _ -> ())
303
| None -> ()
···
305
306
(* Validate spellcheck attribute - must be "true" or "false" or empty *)
307
List.iter (fun (attr_name, attr_value) ->
308
-
let attr_lower = String.lowercase_ascii attr_name in
309
if attr_lower = "spellcheck" then begin
310
-
let value_lower = String.lowercase_ascii (String.trim attr_value) in
311
if value_lower <> "" && value_lower <> "true" && value_lower <> "false" then
312
Message_collector.add_typed collector
313
(`Attr (`Bad_value (`Elem name, `Attr attr_name, `Value attr_value, `Reason "")))
···
317
(* Validate enterkeyhint attribute - must be one of specific values *)
318
let valid_enterkeyhint = ["enter"; "done"; "go"; "next"; "previous"; "search"; "send"] in
319
List.iter (fun (attr_name, attr_value) ->
320
-
let attr_lower = String.lowercase_ascii attr_name in
321
if attr_lower = "enterkeyhint" then begin
322
-
let value_lower = String.lowercase_ascii (String.trim attr_value) in
323
if not (List.mem value_lower valid_enterkeyhint) then
324
Message_collector.add_typed collector
325
(`Attr (`Bad_value (`Elem name, `Attr attr_name, `Value attr_value, `Reason "")))
···
328
329
(* Validate headingoffset attribute - must be a number between 0 and 8 *)
330
List.iter (fun (attr_name, attr_value) ->
331
-
let attr_lower = String.lowercase_ascii attr_name in
332
if attr_lower = "headingoffset" then begin
333
let trimmed = String.trim attr_value in
334
let is_valid =
···
346
347
(* Validate accesskey attribute - each key label must be a single code point *)
348
List.iter (fun (attr_name, attr_value) ->
349
-
let attr_lower = String.lowercase_ascii attr_name in
350
if attr_lower = "accesskey" then begin
351
(* Split by whitespace to get key labels *)
352
let keys = String.split_on_char ' ' attr_value |>
···
418
let is_media_element = name_lower = "link" || name_lower = "style" || name_lower = "source" in
419
if is_media_element then begin
420
List.iter (fun (attr_name, attr_value) ->
421
-
let attr_lower = String.lowercase_ascii attr_name in
422
if attr_lower = "media" then begin
423
let trimmed = String.trim attr_value in
424
if trimmed <> "" then begin
···
436
437
(* Validate RDFa prefix attribute - space-separated list of prefix:iri pairs *)
438
List.iter (fun (attr_name, attr_value) ->
439
-
let attr_lower = String.lowercase_ascii attr_name in
440
if attr_lower = "prefix" then begin
441
(* Parse prefix attribute value - format: "prefix1: iri1 prefix2: iri2 ..." *)
442
let trimmed = String.trim attr_value in
···
58
match element.Element.tag with
59
| Tag.Html _ ->
60
let name = Tag.tag_to_string element.tag in
61
+
let name_lower = Astring.String.Ascii.lowercase name in
62
let attrs = element.raw_attrs in
63
64
(* Detect XHTML mode from xmlns attribute on html element *)
···
86
(* Check for xmlns:* prefixed attributes - not allowed in HTML *)
87
(* Standard xmlns declarations are allowed but custom prefixes are not *)
88
List.iter (fun (attr_name, _) ->
89
+
let attr_lower = Astring.String.Ascii.lowercase attr_name in
90
if String.starts_with ~prefix:"xmlns:" attr_lower then begin
91
let prefix = String.sub attr_lower 6 (String.length attr_lower - 6) in
92
(* Only xmlns:xlink (with correct value) and xmlns:xml are allowed *)
···
113
(* Validate style type attribute - must be "text/css" or omitted *)
114
if name_lower = "style" then begin
115
List.iter (fun (attr_name, attr_value) ->
116
+
let attr_lower = Astring.String.Ascii.lowercase attr_name in
117
if attr_lower = "type" then begin
118
+
let value_lower = Astring.String.Ascii.lowercase (String.trim attr_value) in
119
if value_lower <> "text/css" then
120
Message_collector.add_typed collector (`Misc `Style_type_invalid)
121
end
···
144
(* imagesrcset requires as="image" *)
145
if has_imagesrcset then begin
146
let as_is_image = match as_value with
147
+
| Some v -> Astring.String.Ascii.lowercase (String.trim v) = "image"
148
| None -> false
149
in
150
if not as_is_image then
···
164
(* Validate img usemap attribute - must be hash-name reference with content *)
165
if name_lower = "img" then begin
166
List.iter (fun (attr_name, attr_value) ->
167
+
let attr_lower = Astring.String.Ascii.lowercase attr_name in
168
if attr_lower = "usemap" then begin
169
if attr_value = "#" then
170
Message_collector.add_typed collector
···
178
(* Validate embed type attribute - must be valid MIME type *)
179
if name_lower = "embed" then begin
180
List.iter (fun (attr_name, attr_value) ->
181
+
let attr_lower = Astring.String.Ascii.lowercase attr_name in
182
if attr_lower = "type" then begin
183
match Dt_mime.validate_mime_type attr_value with
184
| Ok () -> ()
···
197
name_lower = "iframe" || name_lower = "source" in
198
if is_dimension_element then begin
199
List.iter (fun (attr_name, attr_value) ->
200
+
let attr_lower = Astring.String.Ascii.lowercase attr_name in
201
if attr_lower = "width" || attr_lower = "height" then begin
202
(* Check for non-negative integer only *)
203
let is_valid =
···
245
(* Validate area[shape=default] cannot have coords *)
246
if name_lower = "area" then begin
247
match Attr_utils.get_attr "shape" attrs with
248
+
| Some s when Astring.String.Ascii.lowercase (String.trim s) = "default" ->
249
if Attr_utils.has_attr "coords" attrs then
250
Message_collector.add_typed collector
251
(`Attr (`Not_allowed (`Attr "coords", `Elem "area")))
···
257
match Attr_utils.get_attr "dir" attrs with
258
| None ->
259
Message_collector.add_typed collector (`Misc `Bdo_missing_dir)
260
+
| Some v when Astring.String.Ascii.lowercase (String.trim v) = "auto" ->
261
Message_collector.add_typed collector (`Misc `Bdo_dir_auto)
262
| _ -> ()
263
end;
···
266
if name_lower = "input" then begin
267
if Attr_utils.has_attr "list" attrs then begin
268
let input_type = Attr_utils.get_attr_or "type" ~default:"text" attrs
269
+
|> String.trim |> Astring.String.Ascii.lowercase in
270
if not (List.mem input_type input_types_allowing_list) then
271
Message_collector.add_typed collector (`Input `List_not_allowed)
272
end
···
274
275
(* Validate data-* attributes *)
276
List.iter (fun (attr_name, _) ->
277
+
let attr_lower = Astring.String.Ascii.lowercase attr_name in
278
(* Check if it starts with "data-" *)
279
if String.starts_with ~prefix:"data-" attr_lower then begin
280
let after_prefix = String.sub attr_lower 5 (String.length attr_lower - 5) in
···
297
(match lang_value with
298
| None ->
299
Message_collector.add_typed collector (`I18n `Xml_lang_without_lang)
300
+
| Some lang when Astring.String.Ascii.lowercase lang <> Astring.String.Ascii.lowercase xmllang ->
301
Message_collector.add_typed collector (`I18n `Xml_lang_without_lang)
302
| _ -> ())
303
| None -> ()
···
305
306
(* Validate spellcheck attribute - must be "true" or "false" or empty *)
307
List.iter (fun (attr_name, attr_value) ->
308
+
let attr_lower = Astring.String.Ascii.lowercase attr_name in
309
if attr_lower = "spellcheck" then begin
310
+
let value_lower = Astring.String.Ascii.lowercase (String.trim attr_value) in
311
if value_lower <> "" && value_lower <> "true" && value_lower <> "false" then
312
Message_collector.add_typed collector
313
(`Attr (`Bad_value (`Elem name, `Attr attr_name, `Value attr_value, `Reason "")))
···
317
(* Validate enterkeyhint attribute - must be one of specific values *)
318
let valid_enterkeyhint = ["enter"; "done"; "go"; "next"; "previous"; "search"; "send"] in
319
List.iter (fun (attr_name, attr_value) ->
320
+
let attr_lower = Astring.String.Ascii.lowercase attr_name in
321
if attr_lower = "enterkeyhint" then begin
322
+
let value_lower = Astring.String.Ascii.lowercase (String.trim attr_value) in
323
if not (List.mem value_lower valid_enterkeyhint) then
324
Message_collector.add_typed collector
325
(`Attr (`Bad_value (`Elem name, `Attr attr_name, `Value attr_value, `Reason "")))
···
328
329
(* Validate headingoffset attribute - must be a number between 0 and 8 *)
330
List.iter (fun (attr_name, attr_value) ->
331
+
let attr_lower = Astring.String.Ascii.lowercase attr_name in
332
if attr_lower = "headingoffset" then begin
333
let trimmed = String.trim attr_value in
334
let is_valid =
···
346
347
(* Validate accesskey attribute - each key label must be a single code point *)
348
List.iter (fun (attr_name, attr_value) ->
349
+
let attr_lower = Astring.String.Ascii.lowercase attr_name in
350
if attr_lower = "accesskey" then begin
351
(* Split by whitespace to get key labels *)
352
let keys = String.split_on_char ' ' attr_value |>
···
418
let is_media_element = name_lower = "link" || name_lower = "style" || name_lower = "source" in
419
if is_media_element then begin
420
List.iter (fun (attr_name, attr_value) ->
421
+
let attr_lower = Astring.String.Ascii.lowercase attr_name in
422
if attr_lower = "media" then begin
423
let trimmed = String.trim attr_value in
424
if trimmed <> "" then begin
···
436
437
(* Validate RDFa prefix attribute - space-separated list of prefix:iri pairs *)
438
List.iter (fun (attr_name, attr_value) ->
439
+
let attr_lower = Astring.String.Ascii.lowercase attr_name in
440
if attr_lower = "prefix" then begin
441
(* Parse prefix attribute value - format: "prefix1: iri1 prefix2: iri2 ..." *)
442
let trimmed = String.trim attr_value in
+1
-1
lib/check/specialized/datetime_checker.ml
+1
-1
lib/check/specialized/datetime_checker.ml
+1
-1
lib/check/specialized/dl_checker.ml
+1
-1
lib/check/specialized/dl_checker.ml
···
106
(`Element (`Not_allowed_as_child (`Child "div", `Parent "dl")));
107
(match Attr.get_role element.attrs with
108
| Some role_value ->
109
-
let role_lower = String.lowercase_ascii (String.trim role_value) in
110
if role_lower <> "presentation" && role_lower <> "none" then
111
Message_collector.add_typed collector (`Li_role `Div_in_dl_bad_role)
112
| None -> ());
···
106
(`Element (`Not_allowed_as_child (`Child "div", `Parent "dl")));
107
(match Attr.get_role element.attrs with
108
| Some role_value ->
109
+
let role_lower = Astring.String.Ascii.lowercase (String.trim role_value) in
110
if role_lower <> "presentation" && role_lower <> "none" then
111
Message_collector.add_typed collector (`Li_role `Div_in_dl_bad_role)
112
| None -> ());
+2
-2
lib/check/specialized/importmap_checker.ml
+2
-2
lib/check/specialized/importmap_checker.ml
···
270
| Tag.Html `Script ->
271
(* Check if type="importmap" *)
272
let type_attr = List.find_opt (fun (n, _) ->
273
-
String.lowercase_ascii n = "type"
274
) element.raw_attrs in
275
(match type_attr with
276
-
| Some (_, v) when String.lowercase_ascii v = "importmap" ->
277
state.in_importmap <- true;
278
Buffer.clear state.content
279
| _ -> ())
···
270
| Tag.Html `Script ->
271
(* Check if type="importmap" *)
272
let type_attr = List.find_opt (fun (n, _) ->
273
+
Astring.String.Ascii.lowercase n = "type"
274
) element.raw_attrs in
275
(match type_attr with
276
+
| Some (_, v) when Astring.String.Ascii.lowercase v = "importmap" ->
277
state.in_importmap <- true;
278
Buffer.clear state.content
279
| _ -> ())
+1
-1
lib/check/specialized/label_checker.ml
+1
-1
lib/check/specialized/label_checker.ml
+1
-1
lib/check/specialized/language_checker.ml
+1
-1
lib/check/specialized/language_checker.ml
+3
-6
lib/check/specialized/mime_type_checker.ml
+3
-6
lib/check/specialized/mime_type_checker.ml
···
153
let create () = ()
154
let reset _state = ()
155
156
-
let get_attr_value name attrs =
157
-
List.find_map (fun (k, v) ->
158
-
if String.lowercase_ascii k = String.lowercase_ascii name then Some v else None
159
-
) attrs
160
161
let start_element _state ~element collector =
162
match element.Element.tag with
163
| Tag.Html tag ->
164
let name = Tag.html_tag_to_string tag in
165
-
let name_lower = String.lowercase_ascii name in
166
(match List.assoc_opt name_lower mime_type_attrs with
167
| None -> ()
168
| Some type_attrs ->
···
174
if value = "" then ()
175
else if name_lower = "script" then
176
(* script type can be module, importmap, etc. - skip validation for non-MIME types *)
177
-
let value_lower = String.lowercase_ascii value in
178
if value_lower = "module" || value_lower = "importmap" ||
179
not (String.contains value '/') then ()
180
else
···
153
let create () = ()
154
let reset _state = ()
155
156
+
let get_attr_value = Attr_utils.get_attr
157
158
let start_element _state ~element collector =
159
match element.Element.tag with
160
| Tag.Html tag ->
161
let name = Tag.html_tag_to_string tag in
162
+
let name_lower = Astring.String.Ascii.lowercase name in
163
(match List.assoc_opt name_lower mime_type_attrs with
164
| None -> ()
165
| Some type_attrs ->
···
171
if value = "" then ()
172
else if name_lower = "script" then
173
(* script type can be module, importmap, etc. - skip validation for non-MIME types *)
174
+
let value_lower = Astring.String.Ascii.lowercase value in
175
if value_lower = "module" || value_lower = "importmap" ||
176
not (String.contains value '/') then ()
177
else
+2
-2
lib/check/specialized/picture_checker.ml
+2
-2
lib/check/specialized/picture_checker.ml
···
133
let media_value = Attr_utils.get_attr "media" attrs in
134
let has_type = Attr_utils.has_attr "type" attrs in
135
let is_media_all = match media_value with
136
-
| Some v -> String.lowercase_ascii (String.trim v) = "all"
137
| None -> false in
138
let is_media_empty = match media_value with
139
| Some v -> String.trim v = ""
···
142
| None -> not has_type
143
| Some v ->
144
let trimmed = String.trim v in
145
-
trimmed = "" || String.lowercase_ascii trimmed = "all"
146
in
147
if is_always_matching then begin
148
state.has_always_matching_source <- true;
···
133
let media_value = Attr_utils.get_attr "media" attrs in
134
let has_type = Attr_utils.has_attr "type" attrs in
135
let is_media_all = match media_value with
136
+
| Some v -> Astring.String.Ascii.lowercase (String.trim v) = "all"
137
| None -> false in
138
let is_media_empty = match media_value with
139
| Some v -> String.trim v = ""
···
142
| None -> not has_type
143
| Some v ->
144
let trimmed = String.trim v in
145
+
trimmed = "" || Astring.String.Ascii.lowercase trimmed = "all"
146
in
147
if is_always_matching then begin
148
state.has_always_matching_source <- true;
+12
-12
lib/check/specialized/srcset_sizes_checker.ml
+12
-12
lib/check/specialized/srcset_sizes_checker.ml
···
153
154
(** Check if scientific notation has invalid exponent (like 1e+1.5 - decimal in exponent) *)
155
let has_invalid_scientific_notation s =
156
-
let lower = String.lowercase_ascii s in
157
(* Find 'e' for scientific notation *)
158
match String.index_opt lower 'e' with
159
| None -> false
···
176
(* Check for % at the end *)
177
else if trimmed.[len - 1] = '%' then "%"
178
else begin
179
-
let lower = String.lowercase_ascii trimmed in
180
(* Try to find a unit at the end (letters only) *)
181
let rec find_unit_length i =
182
if i < 0 then 0
···
205
if has_invalid_scientific_notation value_no_comments then BadScientificNotation
206
(* "auto" is only valid with lazy loading, which requires checking the element context.
207
For general validation, treat "auto" alone as invalid in sizes. *)
208
-
else if String.lowercase_ascii value_no_comments = "auto" then
209
BadCssNumber (value_no_comments.[0], trimmed)
210
else if value_no_comments = "" then InvalidUnit ("", trimmed)
211
else begin
212
-
let lower = String.lowercase_ascii value_no_comments in
213
(* Check for calc() or other CSS functions first - these are always valid *)
214
if String.contains value_no_comments '(' then Valid
215
else begin
···
310
Some "Bad media condition: Parse Error"
311
end else begin
312
(* Check for bare "all" which is invalid *)
313
-
let lower = String.lowercase_ascii trimmed in
314
let parts = String.split_on_char ' ' lower |> List.filter (fun s -> s <> "") in
315
match parts with
316
| keyword :: _ when keyword = "all" ->
···
358
end
359
else begin
360
(* Check if remaining starts with "and", "or", "not" followed by space or paren *)
361
-
let lower_remaining = String.lowercase_ascii remaining in
362
if remaining_len >= 4 && String.sub lower_remaining 0 4 = "and " then
363
skip_media_condition (i + (len - i) - remaining_len + 4)
364
else if remaining_len >= 3 && String.sub lower_remaining 0 3 = "or " then
···
577
578
(** Validate srcset descriptor *)
579
let validate_srcset_descriptor desc element_name srcset_value has_sizes collector =
580
-
let desc_lower = String.lowercase_ascii (String.trim desc) in
581
if String.length desc_lower = 0 then true
582
else begin
583
let last_char = desc_lower.[String.length desc_lower - 1] in
···
723
724
(** Normalize descriptor for duplicate detection (e.g., 1x = 1.0x) *)
725
let normalize_descriptor desc =
726
-
let desc_lower = String.lowercase_ascii (String.trim desc) in
727
if String.length desc_lower = 0 then desc_lower
728
else
729
let last_char = desc_lower.[String.length desc_lower - 1] in
···
793
(* Special schemes that require host/content after :// *)
794
let special_schemes = ["http"; "https"; "ftp"; "ws"; "wss"] in
795
(* Check for scheme-only URL like "http:" *)
796
-
let url_lower = String.lowercase_ascii url in
797
List.iter (fun scheme ->
798
let scheme_colon = scheme ^ ":" in
799
if url_lower = scheme_colon then
···
824
(`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: Expected single descriptor but found extraneous descriptor %s at %s." (q value) (q "srcset") (q element_name) (q extra_desc) (q value)))))
825
end;
826
827
-
let desc_lower = String.lowercase_ascii (String.trim desc) in
828
if String.length desc_lower > 0 then begin
829
let last_char = desc_lower.[String.length desc_lower - 1] in
830
if last_char = 'w' then has_w_descriptor := true
···
872
begin match Hashtbl.find_opt seen_descriptors normalized with
873
| Some first_url ->
874
Message_collector.add_typed collector
875
-
(`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: %s for image %s is identical to %s for image %s." (q value) (q "srcset") (q element_name) dup_type (q url) (String.lowercase_ascii dup_type) (q first_url)))))
876
| None ->
877
begin match (if is_1x then Hashtbl.find_opt seen_descriptors "implicit-1x" else None) with
878
| Some first_url ->
879
(* Explicit 1x conflicts with implicit 1x *)
880
Message_collector.add_typed collector
881
-
(`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: %s for image %s is identical to %s for image %s." (q value) (q "srcset") (q element_name) dup_type (q url) (String.lowercase_ascii dup_type) (q first_url)))))
882
| None ->
883
Hashtbl.add seen_descriptors normalized url;
884
if is_1x then Hashtbl.add seen_descriptors "explicit-1x" url
···
153
154
(** Check if scientific notation has invalid exponent (like 1e+1.5 - decimal in exponent) *)
155
let has_invalid_scientific_notation s =
156
+
let lower = Astring.String.Ascii.lowercase s in
157
(* Find 'e' for scientific notation *)
158
match String.index_opt lower 'e' with
159
| None -> false
···
176
(* Check for % at the end *)
177
else if trimmed.[len - 1] = '%' then "%"
178
else begin
179
+
let lower = Astring.String.Ascii.lowercase trimmed in
180
(* Try to find a unit at the end (letters only) *)
181
let rec find_unit_length i =
182
if i < 0 then 0
···
205
if has_invalid_scientific_notation value_no_comments then BadScientificNotation
206
(* "auto" is only valid with lazy loading, which requires checking the element context.
207
For general validation, treat "auto" alone as invalid in sizes. *)
208
+
else if Astring.String.Ascii.lowercase value_no_comments = "auto" then
209
BadCssNumber (value_no_comments.[0], trimmed)
210
else if value_no_comments = "" then InvalidUnit ("", trimmed)
211
else begin
212
+
let lower = Astring.String.Ascii.lowercase value_no_comments in
213
(* Check for calc() or other CSS functions first - these are always valid *)
214
if String.contains value_no_comments '(' then Valid
215
else begin
···
310
Some "Bad media condition: Parse Error"
311
end else begin
312
(* Check for bare "all" which is invalid *)
313
+
let lower = Astring.String.Ascii.lowercase trimmed in
314
let parts = String.split_on_char ' ' lower |> List.filter (fun s -> s <> "") in
315
match parts with
316
| keyword :: _ when keyword = "all" ->
···
358
end
359
else begin
360
(* Check if remaining starts with "and", "or", "not" followed by space or paren *)
361
+
let lower_remaining = Astring.String.Ascii.lowercase remaining in
362
if remaining_len >= 4 && String.sub lower_remaining 0 4 = "and " then
363
skip_media_condition (i + (len - i) - remaining_len + 4)
364
else if remaining_len >= 3 && String.sub lower_remaining 0 3 = "or " then
···
577
578
(** Validate srcset descriptor *)
579
let validate_srcset_descriptor desc element_name srcset_value has_sizes collector =
580
+
let desc_lower = Astring.String.Ascii.lowercase (String.trim desc) in
581
if String.length desc_lower = 0 then true
582
else begin
583
let last_char = desc_lower.[String.length desc_lower - 1] in
···
723
724
(** Normalize descriptor for duplicate detection (e.g., 1x = 1.0x) *)
725
let normalize_descriptor desc =
726
+
let desc_lower = Astring.String.Ascii.lowercase (String.trim desc) in
727
if String.length desc_lower = 0 then desc_lower
728
else
729
let last_char = desc_lower.[String.length desc_lower - 1] in
···
793
(* Special schemes that require host/content after :// *)
794
let special_schemes = ["http"; "https"; "ftp"; "ws"; "wss"] in
795
(* Check for scheme-only URL like "http:" *)
796
+
let url_lower = Astring.String.Ascii.lowercase url in
797
List.iter (fun scheme ->
798
let scheme_colon = scheme ^ ":" in
799
if url_lower = scheme_colon then
···
824
(`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: Expected single descriptor but found extraneous descriptor %s at %s." (q value) (q "srcset") (q element_name) (q extra_desc) (q value)))))
825
end;
826
827
+
let desc_lower = Astring.String.Ascii.lowercase (String.trim desc) in
828
if String.length desc_lower > 0 then begin
829
let last_char = desc_lower.[String.length desc_lower - 1] in
830
if last_char = 'w' then has_w_descriptor := true
···
872
begin match Hashtbl.find_opt seen_descriptors normalized with
873
| Some first_url ->
874
Message_collector.add_typed collector
875
+
(`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: %s for image %s is identical to %s for image %s." (q value) (q "srcset") (q element_name) dup_type (q url) (Astring.String.Ascii.lowercase dup_type) (q first_url)))))
876
| None ->
877
begin match (if is_1x then Hashtbl.find_opt seen_descriptors "implicit-1x" else None) with
878
| Some first_url ->
879
(* Explicit 1x conflicts with implicit 1x *)
880
Message_collector.add_typed collector
881
+
(`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: %s for image %s is identical to %s for image %s." (q value) (q "srcset") (q element_name) dup_type (q url) (Astring.String.Ascii.lowercase dup_type) (q first_url)))))
882
| None ->
883
Hashtbl.add seen_descriptors normalized url;
884
if is_1x then Hashtbl.add seen_descriptors "explicit-1x" url
+10
-10
lib/check/specialized/svg_checker.ml
+10
-10
lib/check/specialized/svg_checker.ml
···
260
261
(* Check if attribute name matches a pattern like "data-*" or "aria-*" - case insensitive *)
262
let matches_pattern attr pattern =
263
-
let attr_lower = String.lowercase_ascii attr in
264
-
let pattern_lower = String.lowercase_ascii pattern in
265
if String.ends_with ~suffix:"-*" pattern_lower then
266
let prefix = String.sub pattern_lower 0 (String.length pattern_lower - 1) in
267
String.starts_with ~prefix attr_lower
···
361
state.in_svg <- true;
362
363
if is_svg_element || state.in_svg then begin
364
-
let name_lower = String.lowercase_ascii name in
365
366
(* Check SVG content model rules *)
367
(* 1. Check if child is allowed in SVG <a> *)
368
(match state.element_stack with
369
-
| parent :: _ when String.lowercase_ascii parent = "a" ->
370
if List.mem name_lower a_disallowed_children then
371
Message_collector.add_typed collector
372
(`Element (`Not_allowed_as_child (`Child name, `Parent "a")))
···
382
(* 2.5 Check stop element is only in linearGradient or radialGradient *)
383
if name_lower = "stop" then begin
384
match state.element_stack with
385
-
| parent :: _ when (let p = String.lowercase_ascii parent in
386
p = "lineargradient" || p = "radialgradient") -> ()
387
| parent :: _ ->
388
Message_collector.add_typed collector
···
393
(* 2.6 Check use element is not nested inside another use element *)
394
if name_lower = "use" then begin
395
match state.element_stack with
396
-
| parent :: _ when String.lowercase_ascii parent = "use" ->
397
Message_collector.add_typed collector
398
(`Element (`Not_allowed_as_child (`Child name, `Parent parent)))
399
| _ -> ()
···
401
402
(* 3. Check duplicate feFunc* in feComponentTransfer *)
403
(match state.element_stack with
404
-
| parent :: _ when String.lowercase_ascii parent = "fecomponenttransfer" ->
405
if List.mem name_lower ["fefuncr"; "fefuncg"; "fefuncb"; "fefunca"] then begin
406
match state.fecomponenttransfer_stack with
407
| fect :: _ ->
···
435
436
(* Check each attribute *)
437
List.iter (fun (attr, value) ->
438
-
let attr_lower = String.lowercase_ascii attr in
439
440
(* Validate xmlns attributes *)
441
if String.starts_with ~prefix:"xmlns" attr_lower then
···
457
(match List.assoc_opt name_lower required_attrs with
458
| Some req_attrs ->
459
List.iter (fun req_attr ->
460
-
if not (List.exists (fun (a, _) -> String.lowercase_ascii a = req_attr) attrs) then
461
Message_collector.add_typed collector
462
(`Svg (`Missing_attr (`Elem name_lower, `Attr req_attr)))
463
) req_attrs
···
469
let name = Tag.tag_to_string tag in
470
471
if is_svg_element || state.in_svg then begin
472
-
let name_lower = String.lowercase_ascii name in
473
474
(* Check required children when closing font element *)
475
if name_lower = "font" then begin
···
260
261
(* Check if attribute name matches a pattern like "data-*" or "aria-*" - case insensitive *)
262
let matches_pattern attr pattern =
263
+
let attr_lower = Astring.String.Ascii.lowercase attr in
264
+
let pattern_lower = Astring.String.Ascii.lowercase pattern in
265
if String.ends_with ~suffix:"-*" pattern_lower then
266
let prefix = String.sub pattern_lower 0 (String.length pattern_lower - 1) in
267
String.starts_with ~prefix attr_lower
···
361
state.in_svg <- true;
362
363
if is_svg_element || state.in_svg then begin
364
+
let name_lower = Astring.String.Ascii.lowercase name in
365
366
(* Check SVG content model rules *)
367
(* 1. Check if child is allowed in SVG <a> *)
368
(match state.element_stack with
369
+
| parent :: _ when Astring.String.Ascii.lowercase parent = "a" ->
370
if List.mem name_lower a_disallowed_children then
371
Message_collector.add_typed collector
372
(`Element (`Not_allowed_as_child (`Child name, `Parent "a")))
···
382
(* 2.5 Check stop element is only in linearGradient or radialGradient *)
383
if name_lower = "stop" then begin
384
match state.element_stack with
385
+
| parent :: _ when (let p = Astring.String.Ascii.lowercase parent in
386
p = "lineargradient" || p = "radialgradient") -> ()
387
| parent :: _ ->
388
Message_collector.add_typed collector
···
393
(* 2.6 Check use element is not nested inside another use element *)
394
if name_lower = "use" then begin
395
match state.element_stack with
396
+
| parent :: _ when Astring.String.Ascii.lowercase parent = "use" ->
397
Message_collector.add_typed collector
398
(`Element (`Not_allowed_as_child (`Child name, `Parent parent)))
399
| _ -> ()
···
401
402
(* 3. Check duplicate feFunc* in feComponentTransfer *)
403
(match state.element_stack with
404
+
| parent :: _ when Astring.String.Ascii.lowercase parent = "fecomponenttransfer" ->
405
if List.mem name_lower ["fefuncr"; "fefuncg"; "fefuncb"; "fefunca"] then begin
406
match state.fecomponenttransfer_stack with
407
| fect :: _ ->
···
435
436
(* Check each attribute *)
437
List.iter (fun (attr, value) ->
438
+
let attr_lower = Astring.String.Ascii.lowercase attr in
439
440
(* Validate xmlns attributes *)
441
if String.starts_with ~prefix:"xmlns" attr_lower then
···
457
(match List.assoc_opt name_lower required_attrs with
458
| Some req_attrs ->
459
List.iter (fun req_attr ->
460
+
if not (Attr_utils.has_attr req_attr attrs) then
461
Message_collector.add_typed collector
462
(`Svg (`Missing_attr (`Elem name_lower, `Attr req_attr)))
463
) req_attrs
···
469
let name = Tag.tag_to_string tag in
470
471
if is_svg_element || state.in_svg then begin
472
+
let name_lower = Astring.String.Ascii.lowercase name in
473
474
(* Check required children when closing font element *)
475
if name_lower = "font" then begin
+5
-5
lib/check/specialized/table_checker.ml
+5
-5
lib/check/specialized/table_checker.ml
···
354
355
(** Parse a non-negative integer attribute, returning 1 if absent or invalid *)
356
let parse_non_negative_int attrs name =
357
-
match List.assoc_opt name attrs with
358
| None -> 1
359
| Some v -> (
360
try
···
364
365
(** Parse a positive integer attribute, returning 1 if absent or invalid *)
366
let parse_positive_int attrs name =
367
-
match List.assoc_opt name attrs with
368
| None -> 1
369
| Some v -> (
370
try
···
374
375
(** Parse the headers attribute into a list of IDs *)
376
let parse_headers attrs =
377
-
match List.assoc_opt "headers" attrs with
378
| None -> []
379
| Some v ->
380
let parts = String.split_on_char ' ' v in
···
523
table.state <- InCellInRowGroup;
524
(* Record header ID if present *)
525
if is_header then (
526
-
match List.assoc_opt "id" attrs with
527
| Some id when String.length id > 0 -> Hashtbl.replace table.header_ids id ()
528
| _ -> ());
529
(* Parse cell attributes *)
···
541
table.state <- InCellInImplicitRowGroup;
542
(* Same logic as above *)
543
if is_header then (
544
-
match List.assoc_opt "id" attrs with
545
| Some id when String.length id > 0 -> Hashtbl.replace table.header_ids id ()
546
| _ -> ());
547
let colspan = abs (parse_positive_int attrs "colspan") in
···
354
355
(** Parse a non-negative integer attribute, returning 1 if absent or invalid *)
356
let parse_non_negative_int attrs name =
357
+
match Attr_utils.get_attr name attrs with
358
| None -> 1
359
| Some v -> (
360
try
···
364
365
(** Parse a positive integer attribute, returning 1 if absent or invalid *)
366
let parse_positive_int attrs name =
367
+
match Attr_utils.get_attr name attrs with
368
| None -> 1
369
| Some v -> (
370
try
···
374
375
(** Parse the headers attribute into a list of IDs *)
376
let parse_headers attrs =
377
+
match Attr_utils.get_attr "headers" attrs with
378
| None -> []
379
| Some v ->
380
let parts = String.split_on_char ' ' v in
···
523
table.state <- InCellInRowGroup;
524
(* Record header ID if present *)
525
if is_header then (
526
+
match Attr_utils.get_attr "id" attrs with
527
| Some id when String.length id > 0 -> Hashtbl.replace table.header_ids id ()
528
| _ -> ());
529
(* Parse cell attributes *)
···
541
table.state <- InCellInImplicitRowGroup;
542
(* Same logic as above *)
543
if is_header then (
544
+
match Attr_utils.get_attr "id" attrs with
545
| Some id when String.length id > 0 -> Hashtbl.replace table.header_ids id ()
546
| _ -> ());
547
let colspan = abs (parse_positive_int attrs "colspan") in
+1
-1
lib/check/specialized/unknown_element_checker.ml
+1
-1
lib/check/specialized/unknown_element_checker.ml
+24
-25
lib/check/specialized/url_checker.ml
+24
-25
lib/check/specialized/url_checker.ml
···
67
68
(** Check if pipe is allowed in this host context. *)
69
let is_pipe_allowed_in_host url host =
70
-
let scheme = try String.lowercase_ascii (String.sub url 0 (String.index url ':')) with _ -> "" in
71
scheme = "file" && is_valid_windows_drive host
72
73
(** Special schemes that require double slash (//).
···
95
(c >= '0' && c <= '9') || c = '+' || c = '-' || c = '.'
96
) potential_scheme in
97
if is_valid_scheme then
98
-
Some (String.lowercase_ascii potential_scheme)
99
else
100
None
101
with Not_found -> None
···
104
let extract_host_and_port url =
105
try
106
let double_slash =
107
-
try Some (Str.search_forward (Str.regexp "://") url 0 + 3)
108
-
with Not_found -> None
109
in
110
match double_slash with
111
| None -> (None, None)
···
250
(* Check for ASCII percent *)
251
String.contains s '%' ||
252
(* Check for fullwidth percent (U+FF05 = 0xEF 0xBC 0x85 in UTF-8) *)
253
-
try
254
-
let _ = Str.search_forward (Str.regexp "\xef\xbc\x85") s 0 in
255
-
true
256
-
with Not_found -> false
257
258
(** Check if decoded host contains forbidden characters.
259
Some URLs have percent-encoded fullwidth characters that decode to forbidden chars. *)
···
424
let check_path_segment url attr_name element_name =
425
(* Extract path: everything after authority (or after scheme: for non-authority URLs) *)
426
let raw_path =
427
-
try
428
-
let double_slash = Str.search_forward (Str.regexp "://") url 0 in
429
let after_auth_start = double_slash + 3 in
430
let rest = String.sub url after_auth_start (String.length url - after_auth_start) in
431
(* Find end of authority *)
···
437
String.sub rest path_start (String.length rest - path_start)
438
else
439
""
440
-
with Not_found ->
441
(* No double slash - check for single slash path *)
442
-
match extract_scheme url with
443
| Some _ ->
444
-
let colon_pos = String.index url ':' in
445
-
let after_colon = String.sub url (colon_pos + 1) (String.length url - colon_pos - 1) in
446
-
after_colon
447
| None ->
448
(* Relative URL - the whole thing is the path *)
449
-
url
450
in
451
(* Remove query and fragment for path-specific checks *)
452
let path = remove_query_fragment raw_path in
···
546
547
(** Check for illegal characters in userinfo (user:password). *)
548
let check_userinfo url attr_name element_name =
549
try
550
(* Look for :// then find the LAST @ before the next / or end *)
551
-
let double_slash = Str.search_forward (Str.regexp "://") url 0 + 3 in
552
let rest = String.sub url double_slash (String.length url - double_slash) in
553
(* Find first / or ? or # to limit authority section *)
554
let auth_end =
···
633
let url = String.trim url in
634
(* Empty URL check for certain attributes *)
635
if url = "" then begin
636
-
let name_lower = String.lowercase_ascii element_name in
637
-
let attr_lower = String.lowercase_ascii attr_name in
638
if List.mem attr_lower must_be_non_empty ||
639
List.mem (name_lower, attr_lower) must_be_non_empty_combinations then
640
Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Must be non-empty."
···
739
let reset _state = ()
740
741
(** Get attribute value by name. *)
742
-
let get_attr_value name attrs =
743
-
List.find_map (fun (k, v) ->
744
-
if String.lowercase_ascii k = String.lowercase_ascii name then Some v else None
745
-
) attrs
746
747
let start_element _state ~element collector =
748
match element.Element.tag with
749
| Tag.Html _ ->
750
let name = Tag.tag_to_string element.tag in
751
-
let name_lower = String.lowercase_ascii name in
752
let attrs = element.raw_attrs in
753
(* Check URL attributes for elements that have them *)
754
(match List.assoc_opt name_lower url_attributes with
···
794
match validate_url url name "value" with
795
| None -> ()
796
| Some error_msg ->
797
-
let error_msg = Str.global_replace (Str.regexp "Bad URL:") "Bad absolute URL:" error_msg in
798
Message_collector.add_typed collector (`Attr (`Bad_value_generic (`Message error_msg)))
799
end
800
end
···
67
68
(** Check if pipe is allowed in this host context. *)
69
let is_pipe_allowed_in_host url host =
70
+
let scheme = try Astring.String.Ascii.lowercase (String.sub url 0 (String.index url ':')) with _ -> "" in
71
scheme = "file" && is_valid_windows_drive host
72
73
(** Special schemes that require double slash (//).
···
95
(c >= '0' && c <= '9') || c = '+' || c = '-' || c = '.'
96
) potential_scheme in
97
if is_valid_scheme then
98
+
Some (Astring.String.Ascii.lowercase potential_scheme)
99
else
100
None
101
with Not_found -> None
···
104
let extract_host_and_port url =
105
try
106
let double_slash =
107
+
match Astring.String.find_sub ~sub:"://" url with
108
+
| Some pos -> Some (pos + 3)
109
+
| None -> None
110
in
111
match double_slash with
112
| None -> (None, None)
···
251
(* Check for ASCII percent *)
252
String.contains s '%' ||
253
(* Check for fullwidth percent (U+FF05 = 0xEF 0xBC 0x85 in UTF-8) *)
254
+
Astring.String.is_infix ~affix:"\xef\xbc\x85" s
255
256
(** Check if decoded host contains forbidden characters.
257
Some URLs have percent-encoded fullwidth characters that decode to forbidden chars. *)
···
422
let check_path_segment url attr_name element_name =
423
(* Extract path: everything after authority (or after scheme: for non-authority URLs) *)
424
let raw_path =
425
+
match Astring.String.find_sub ~sub:"://" url with
426
+
| Some double_slash ->
427
let after_auth_start = double_slash + 3 in
428
let rest = String.sub url after_auth_start (String.length url - after_auth_start) in
429
(* Find end of authority *)
···
435
String.sub rest path_start (String.length rest - path_start)
436
else
437
""
438
+
| None ->
439
(* No double slash - check for single slash path *)
440
+
(match extract_scheme url with
441
| Some _ ->
442
+
(try
443
+
let colon_pos = String.index url ':' in
444
+
String.sub url (colon_pos + 1) (String.length url - colon_pos - 1)
445
+
with Not_found -> url)
446
| None ->
447
(* Relative URL - the whole thing is the path *)
448
+
url)
449
in
450
(* Remove query and fragment for path-specific checks *)
451
let path = remove_query_fragment raw_path in
···
545
546
(** Check for illegal characters in userinfo (user:password). *)
547
let check_userinfo url attr_name element_name =
548
+
match Astring.String.find_sub ~sub:"://" url with
549
+
| None -> None
550
+
| Some pos ->
551
try
552
(* Look for :// then find the LAST @ before the next / or end *)
553
+
let double_slash = pos + 3 in
554
let rest = String.sub url double_slash (String.length url - double_slash) in
555
(* Find first / or ? or # to limit authority section *)
556
let auth_end =
···
635
let url = String.trim url in
636
(* Empty URL check for certain attributes *)
637
if url = "" then begin
638
+
let name_lower = Astring.String.Ascii.lowercase element_name in
639
+
let attr_lower = Astring.String.Ascii.lowercase attr_name in
640
if List.mem attr_lower must_be_non_empty ||
641
List.mem (name_lower, attr_lower) must_be_non_empty_combinations then
642
Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Must be non-empty."
···
741
let reset _state = ()
742
743
(** Get attribute value by name. *)
744
+
let get_attr_value = Attr_utils.get_attr
745
746
let start_element _state ~element collector =
747
match element.Element.tag with
748
| Tag.Html _ ->
749
let name = Tag.tag_to_string element.tag in
750
+
let name_lower = Astring.String.Ascii.lowercase name in
751
let attrs = element.raw_attrs in
752
(* Check URL attributes for elements that have them *)
753
(match List.assoc_opt name_lower url_attributes with
···
793
match validate_url url name "value" with
794
| None -> ()
795
| Some error_msg ->
796
+
let error_msg = Astring.String.concat ~sep:"Bad absolute URL:" (Astring.String.cuts ~sep:"Bad URL:" error_msg) in
797
Message_collector.add_typed collector (`Attr (`Bad_value_generic (`Message error_msg)))
798
end
799
end
+5
-5
lib/check/specialized/xhtml_content_checker.ml
+5
-5
lib/check/specialized/xhtml_content_checker.ml
···
54
55
let start_element state ~element collector =
56
let name = Tag.tag_to_string element.Element.tag in
57
-
let name_lower = String.lowercase_ascii name in
58
let attrs = element.raw_attrs in
59
60
(* Check data-* attributes for uppercase *)
···
63
(* Check if this element is allowed as child of parent *)
64
(match state.element_stack with
65
| parent :: _ ->
66
-
let parent_lower = String.lowercase_ascii parent in
67
if not (is_child_allowed ~parent:parent_lower ~child:name_lower) then
68
Message_collector.add_typed collector
69
(`Element (`Not_allowed_as_child (`Child name_lower, `Parent parent_lower)))
···
71
72
(* Handle figure content model *)
73
(match state.element_stack with
74
-
| parent :: _ when String.lowercase_ascii parent = "figure" ->
75
(* We're inside a figure, check content model *)
76
(match state.figure_stack with
77
| fig :: _ ->
···
99
state.element_stack <- name :: state.element_stack
100
101
let end_element state ~tag _collector =
102
-
let name_lower = String.lowercase_ascii (Tag.tag_to_string tag) in
103
(* Pop figure state if leaving a figure *)
104
if name_lower = "figure" then begin
105
match state.figure_stack with
···
115
match state.element_stack with
116
| [] -> ()
117
| parent :: _ ->
118
-
let parent_lower = String.lowercase_ascii parent in
119
let trimmed = String.trim text in
120
if trimmed <> "" then begin
121
if parent_lower = "figure" then begin
···
54
55
let start_element state ~element collector =
56
let name = Tag.tag_to_string element.Element.tag in
57
+
let name_lower = Astring.String.Ascii.lowercase name in
58
let attrs = element.raw_attrs in
59
60
(* Check data-* attributes for uppercase *)
···
63
(* Check if this element is allowed as child of parent *)
64
(match state.element_stack with
65
| parent :: _ ->
66
+
let parent_lower = Astring.String.Ascii.lowercase parent in
67
if not (is_child_allowed ~parent:parent_lower ~child:name_lower) then
68
Message_collector.add_typed collector
69
(`Element (`Not_allowed_as_child (`Child name_lower, `Parent parent_lower)))
···
71
72
(* Handle figure content model *)
73
(match state.element_stack with
74
+
| parent :: _ when Astring.String.Ascii.lowercase parent = "figure" ->
75
(* We're inside a figure, check content model *)
76
(match state.figure_stack with
77
| fig :: _ ->
···
99
state.element_stack <- name :: state.element_stack
100
101
let end_element state ~tag _collector =
102
+
let name_lower = Astring.String.Ascii.lowercase (Tag.tag_to_string tag) in
103
(* Pop figure state if leaving a figure *)
104
if name_lower = "figure" then begin
105
match state.figure_stack with
···
115
match state.element_stack with
116
| [] -> ()
117
| parent :: _ ->
118
+
let parent_lower = Astring.String.Ascii.lowercase parent in
119
let trimmed = String.trim text in
120
if trimmed <> "" then begin
121
if parent_lower = "figure" then begin