+1
-1
lib/html5_checker/checker_registry.ml
+1
-1
lib/html5_checker/checker_registry.ml
···
36
36
Hashtbl.replace reg "autofocus" Autofocus_checker.checker;
37
37
Hashtbl.replace reg "option" Option_checker.checker;
38
38
Hashtbl.replace reg "language" Language_checker.checker;
39
+
Hashtbl.replace reg "microdata" Microdata_checker.checker;
39
40
(* Hashtbl.replace reg "table" Table_checker.checker; *)
40
41
(* Hashtbl.replace reg "heading" Heading_checker.checker; *)
41
-
(* Hashtbl.replace reg "microdata" Microdata_checker.checker; *)
42
42
(* Hashtbl.replace reg "content" Content_checker.checker; *)
43
43
reg
44
44
+331
-79
lib/html5_checker/datatype/dt_media_query.ml
+331
-79
lib/html5_checker/datatype/dt_media_query.ml
···
1
-
(** Media query validation - simplified implementation *)
1
+
(** Media query validation - strict implementation for HTML5 conformance *)
2
2
3
-
(** Media types *)
4
-
let media_types =
3
+
(** Valid media types per Media Queries Level 4 spec *)
4
+
let valid_media_types =
5
5
[
6
6
"all";
7
7
"screen";
8
8
"print";
9
9
"speech";
10
+
]
11
+
12
+
(** Deprecated media types that should trigger an error *)
13
+
let deprecated_media_types =
14
+
[
10
15
"aural";
11
16
"braille";
12
17
"handheld";
···
16
21
"embossed";
17
22
]
18
23
19
-
(** Media query keywords *)
20
-
let media_keywords = [ "and"; "or"; "not"; "only" ]
24
+
(** Deprecated media features that should trigger an error *)
25
+
let deprecated_media_features =
26
+
[
27
+
"device-width";
28
+
"device-height";
29
+
"device-aspect-ratio";
30
+
]
31
+
32
+
(** Valid media features *)
33
+
let valid_media_features =
34
+
[
35
+
(* Dimensions *)
36
+
"width"; "min-width"; "max-width";
37
+
"height"; "min-height"; "max-height";
38
+
"aspect-ratio"; "min-aspect-ratio"; "max-aspect-ratio";
39
+
(* Display quality *)
40
+
"resolution"; "min-resolution"; "max-resolution";
41
+
"scan"; "grid"; "update"; "overflow-block"; "overflow-inline";
42
+
(* Color *)
43
+
"color"; "min-color"; "max-color";
44
+
"color-index"; "min-color-index"; "max-color-index";
45
+
"monochrome"; "min-monochrome"; "max-monochrome";
46
+
"color-gamut";
47
+
(* Interaction *)
48
+
"pointer"; "any-pointer"; "hover"; "any-hover";
49
+
(* Scripting *)
50
+
"scripting";
51
+
(* Light/dark *)
52
+
"prefers-color-scheme"; "prefers-contrast"; "prefers-reduced-motion";
53
+
"prefers-reduced-transparency";
54
+
(* Display mode *)
55
+
"display-mode";
56
+
(* Inverted colors *)
57
+
"inverted-colors";
58
+
(* Forced colors *)
59
+
"forced-colors";
60
+
(* Orientation *)
61
+
"orientation";
62
+
]
63
+
64
+
(** Valid length units *)
65
+
let valid_length_units = ["px"; "em"; "rem"; "vh"; "vw"; "vmin"; "vmax"; "cm"; "mm"; "in"; "pt"; "pc"; "ch"; "ex"]
66
+
67
+
(** Valid resolution units *)
68
+
let valid_resolution_units = ["dpi"; "dpcm"; "dppx"; "x"]
69
+
70
+
(** Media query keywords (unused but kept for documentation) *)
71
+
let _media_keywords = [ "and"; "not"; "only" ]
21
72
22
73
(** Check if character is whitespace *)
23
74
let is_whitespace c = c = ' ' || c = '\t' || c = '\n' || c = '\r'
···
32
83
let is_ident_char c =
33
84
is_ident_start c || (c >= '0' && c <= '9')
34
85
86
+
(** Unicode case-fold for Turkish dotted-I etc *)
87
+
let lowercase_unicode s =
88
+
(* Handle special case: U+0130 LATIN CAPITAL LETTER I WITH DOT ABOVE -> i *)
89
+
let buf = Buffer.create (String.length s) in
90
+
let i = ref 0 in
91
+
while !i < String.length s do
92
+
let c = s.[!i] in
93
+
if c = '\xc4' && !i + 1 < String.length s && s.[!i + 1] = '\xb0' then begin
94
+
(* U+0130 -> 'i' + U+0307 (combining dot above), but for simplicity just 'i' followed by U+0307 *)
95
+
Buffer.add_string buf "i\xcc\x87";
96
+
i := !i + 2
97
+
end else begin
98
+
Buffer.add_char buf (Char.lowercase_ascii c);
99
+
incr i
100
+
end
101
+
done;
102
+
Buffer.contents buf
103
+
35
104
(** Check balanced parentheses *)
36
105
let check_balanced_parens s =
37
106
let rec check depth i =
38
107
if i >= String.length s then
39
108
if depth = 0 then Ok ()
40
-
else Error "Unbalanced parentheses: unclosed '('"
109
+
else Error "Parse Error."
41
110
else
42
111
let c = s.[i] in
43
112
match c with
44
113
| '(' -> check (depth + 1) (i + 1)
45
114
| ')' ->
46
-
if depth = 0 then Error "Unbalanced parentheses: unexpected ')'"
115
+
if depth = 0 then Error "Parse Error."
47
116
else check (depth - 1) (i + 1)
48
117
| _ -> check depth (i + 1)
49
118
in
50
119
check 0 0
51
120
52
-
(** Extract words (identifiers and keywords) from media query *)
53
-
let extract_words s =
54
-
let words = ref [] in
55
-
let buf = Buffer.create 16 in
56
-
let in_parens = ref 0 in
121
+
(** Strict media query validation *)
122
+
let rec validate_media_query_strict s =
123
+
let s = String.trim s in
124
+
if String.length s = 0 then Error "Parse Error."
125
+
else begin
126
+
(* Check for empty commas *)
127
+
if s = "," then Error "Parse Error."
128
+
else if String.length s > 0 && s.[0] = ',' then Error "Parse Error."
129
+
else if String.length s > 0 && s.[String.length s - 1] = ',' then Error "Parse Error."
130
+
else if String.contains s ',' then begin
131
+
(* Check for empty queries between commas *)
132
+
let parts = String.split_on_char ',' s in
133
+
if List.exists (fun p -> String.trim p = "") parts then Error "Parse Error."
134
+
else begin
135
+
(* Validate each media query in the list *)
136
+
let rec validate_all = function
137
+
| [] -> Ok ()
138
+
| part :: rest ->
139
+
match validate_media_query_strict (String.trim part) with
140
+
| Ok () -> validate_all rest
141
+
| Error e -> Error e
142
+
in
143
+
validate_all parts
144
+
end
145
+
end else begin
146
+
(* Single media query *)
147
+
match check_balanced_parens s with
148
+
| Error e -> Error e
149
+
| Ok () ->
150
+
(* Check for "and" or "and(" at end *)
151
+
let trimmed = String.trim s in
152
+
if String.length trimmed >= 3 then begin
153
+
let suffix = String.sub trimmed (String.length trimmed - 3) 3 in
154
+
if String.lowercase_ascii suffix = "and" then
155
+
Error "Parse Error."
156
+
else if String.length trimmed >= 4 then begin
157
+
let suffix4 = String.sub trimmed (String.length trimmed - 4) 4 in
158
+
if String.lowercase_ascii suffix4 = "and(" then
159
+
Error "Parse Error."
160
+
else
161
+
validate_media_query_content trimmed
162
+
end else
163
+
validate_media_query_content trimmed
164
+
end else
165
+
validate_media_query_content trimmed
166
+
end
167
+
end
57
168
58
-
for i = 0 to String.length s - 1 do
59
-
let c = s.[i] in
60
-
match c with
61
-
| '(' ->
62
-
if Buffer.length buf > 0 then (
63
-
words := Buffer.contents buf :: !words;
64
-
Buffer.clear buf);
65
-
incr in_parens
66
-
| ')' ->
67
-
if Buffer.length buf > 0 then (
68
-
words := Buffer.contents buf :: !words;
69
-
Buffer.clear buf);
70
-
decr in_parens
71
-
| _ ->
72
-
if !in_parens = 0 then
73
-
if is_ident_char c then Buffer.add_char buf c
74
-
else if is_whitespace c then
75
-
if Buffer.length buf > 0 then (
76
-
words := Buffer.contents buf :: !words;
77
-
Buffer.clear buf)
78
-
else ()
79
-
else if Buffer.length buf > 0 then (
80
-
words := Buffer.contents buf :: !words;
81
-
Buffer.clear buf)
82
-
done;
169
+
and validate_media_query_content s =
170
+
(* Parse into tokens *)
171
+
let len = String.length s in
172
+
let i = ref 0 in
173
+
let skip_ws () = while !i < len && is_whitespace s.[!i] do incr i done in
174
+
175
+
let read_ident () =
176
+
let start = !i in
177
+
while !i < len && is_ident_char s.[!i] do incr i done;
178
+
if !i > start then Some (String.sub s start (!i - start))
179
+
else None
180
+
in
181
+
182
+
let read_paren_content () =
183
+
(* Read until matching ) *)
184
+
let start = !i in
185
+
let depth = ref 1 in
186
+
incr i; (* skip opening ( *)
187
+
while !i < len && !depth > 0 do
188
+
if s.[!i] = '(' then incr depth
189
+
else if s.[!i] = ')' then decr depth;
190
+
incr i
191
+
done;
192
+
String.sub s (start + 1) (!i - start - 2)
193
+
in
83
194
84
-
if Buffer.length buf > 0 then words := Buffer.contents buf :: !words;
85
-
List.rev !words
195
+
(* Parse the query *)
196
+
skip_ws ();
197
+
if !i >= len then Error "Parse Error."
198
+
else begin
199
+
(* Check for only/not prefix *)
200
+
let has_only = ref false in
201
+
let has_not = ref false in
202
+
(match read_ident () with
203
+
| Some w ->
204
+
let w_lower = String.lowercase_ascii w in
205
+
if w_lower = "only" then (has_only := true; skip_ws ())
206
+
else if w_lower = "not" then (has_not := true; skip_ws ())
207
+
else i := !i - String.length w (* put back *)
208
+
| None -> ());
86
209
87
-
(** Validate media query structure *)
210
+
skip_ws ();
211
+
if !i >= len then begin
212
+
if !has_only || !has_not then Error "Parse Error."
213
+
else Error "Parse Error."
214
+
end else begin
215
+
(* Check for media type or ( *)
216
+
if s.[!i] = '(' then begin
217
+
(* Media feature only *)
218
+
let content = read_paren_content () in
219
+
validate_media_feature content
220
+
end else begin
221
+
(* Expect media type *)
222
+
match read_ident () with
223
+
| None -> Error "Parse Error."
224
+
| Some media_type ->
225
+
let mt_lower = lowercase_unicode media_type in
226
+
(* Check for deprecated media type *)
227
+
if List.mem mt_lower deprecated_media_types then
228
+
Error (Printf.sprintf "The media \"%s\" has been deprecated" mt_lower)
229
+
(* Check if valid media type *)
230
+
else if not (List.mem mt_lower valid_media_types) then
231
+
Error (Printf.sprintf "unrecognized media \"%s\"." mt_lower)
232
+
else begin
233
+
skip_ws ();
234
+
if !i >= len then Ok ()
235
+
else begin
236
+
(* Check for "and" - must be followed by whitespace *)
237
+
let and_start = !i in
238
+
match read_ident () with
239
+
| None -> Error "Parse Error."
240
+
| Some kw ->
241
+
let kw_lower = String.lowercase_ascii kw in
242
+
if kw_lower <> "and" then Error "Parse Error."
243
+
else begin
244
+
(* Check that there was whitespace before 'and' *)
245
+
if and_start > 0 && not (is_whitespace s.[and_start - 1]) then
246
+
Error "Parse Error."
247
+
(* Check that there is whitespace after 'and' *)
248
+
else if !i < len && s.[!i] = '(' then
249
+
Error "Parse Error."
250
+
else begin
251
+
skip_ws ();
252
+
if !i >= len then Error "Parse Error."
253
+
else if s.[!i] <> '(' then Error "Parse Error."
254
+
else begin
255
+
(* Validate remaining features *)
256
+
let rec validate_features () =
257
+
skip_ws ();
258
+
if !i >= len then Ok ()
259
+
else if s.[!i] = '(' then begin
260
+
let content = read_paren_content () in
261
+
match validate_media_feature content with
262
+
| Error e -> Error e
263
+
| Ok () ->
264
+
skip_ws ();
265
+
if !i >= len then Ok ()
266
+
else begin
267
+
match read_ident () with
268
+
| None -> Error "Parse Error."
269
+
| Some kw2 ->
270
+
let kw2_lower = String.lowercase_ascii kw2 in
271
+
if kw2_lower <> "and" then Error "Parse Error."
272
+
else begin
273
+
skip_ws ();
274
+
if !i >= len then Error "Parse Error."
275
+
else validate_features ()
276
+
end
277
+
end
278
+
end else Error "Parse Error."
279
+
in
280
+
validate_features ()
281
+
end
282
+
end
283
+
end
284
+
end
285
+
end
286
+
end
287
+
end
288
+
end
289
+
290
+
and validate_media_feature content =
291
+
let content = String.trim content in
292
+
if content = "" then Error "Parse Error."
293
+
else begin
294
+
(* Check for colon - feature: value *)
295
+
match String.index_opt content ':' with
296
+
| None ->
297
+
(* Just feature name - boolean feature or range syntax *)
298
+
let feature_lower = String.lowercase_ascii content in
299
+
if List.mem feature_lower deprecated_media_features then
300
+
Error (Printf.sprintf "Deprecated media feature \"%s\". For guidance, see the Deprecated Media Features section in the current Media Queries specification." feature_lower)
301
+
else if List.mem feature_lower valid_media_features then
302
+
Ok ()
303
+
else
304
+
Ok () (* Allow unknown features for forward compatibility *)
305
+
| Some colon_pos ->
306
+
let feature = String.trim (String.sub content 0 colon_pos) in
307
+
let value = String.trim (String.sub content (colon_pos + 1) (String.length content - colon_pos - 1)) in
308
+
let feature_lower = String.lowercase_ascii feature in
309
+
310
+
(* Check for deprecated features *)
311
+
if List.mem feature_lower deprecated_media_features then
312
+
Error (Printf.sprintf "Deprecated media feature \"%s\". For guidance, see the Deprecated Media Features section in the current Media Queries specification." feature_lower)
313
+
(* Check for incomplete value *)
314
+
else if value = "" then
315
+
Error "Parse Error."
316
+
(* Check for invalid value syntax *)
317
+
else if String.length value > 0 && value.[String.length value - 1] = ';' then
318
+
Error "Parse Error."
319
+
else begin
320
+
(* Validate value based on feature type *)
321
+
validate_feature_value feature_lower value
322
+
end
323
+
end
324
+
325
+
and validate_feature_value feature value =
326
+
(* Width/height features require length values *)
327
+
let length_features = ["width"; "min-width"; "max-width"; "height"; "min-height"; "max-height"] in
328
+
let color_features = ["color"; "min-color"; "max-color"; "color-index"; "min-color-index"; "max-color-index";
329
+
"monochrome"; "min-monochrome"; "max-monochrome"] in
330
+
331
+
if List.mem feature length_features then begin
332
+
(* Must be a valid length: number followed by unit *)
333
+
let value = String.trim value in
334
+
let is_digit c = c >= '0' && c <= '9' in
335
+
336
+
(* Parse number - includes sign, digits, and decimal point *)
337
+
let i = ref 0 in
338
+
let len = String.length value in
339
+
while !i < len && (is_digit value.[!i] || value.[!i] = '.' || value.[!i] = '-' || value.[!i] = '+') do
340
+
incr i
341
+
done;
342
+
let num_part = String.sub value 0 !i in
343
+
let unit_part = String.sub value !i (len - !i) in
344
+
345
+
(* Check if the number is zero (including 0.0, 0.00, etc.) *)
346
+
let is_zero num =
347
+
let rec check i =
348
+
if i >= String.length num then true
349
+
else match num.[i] with
350
+
| '0' | '.' | '-' -> check (i + 1)
351
+
| _ -> false
352
+
in
353
+
check 0
354
+
in
355
+
if num_part = "" then Error "Parse Error."
356
+
else if is_zero num_part && unit_part = "" then Ok () (* 0 (or 0.0) can be unitless *)
357
+
else if unit_part = "" then
358
+
Error "only \"0\" can be a \"unit\". You must put a unit after your number"
359
+
else begin
360
+
let unit_lower = String.lowercase_ascii unit_part in
361
+
if List.mem unit_lower valid_length_units then Ok ()
362
+
else if List.mem unit_lower valid_resolution_units then
363
+
Error (Printf.sprintf "\"%s\" is not a \"%s\" value" value feature)
364
+
else
365
+
Error "Unknown dimension."
366
+
end
367
+
end else if List.mem feature color_features then begin
368
+
(* Must be an integer *)
369
+
let value = String.trim value in
370
+
let is_digit c = c >= '0' && c <= '9' in
371
+
if String.length value > 0 && String.for_all is_digit value then Ok ()
372
+
else
373
+
Error (Printf.sprintf "\"%s\" is not a \"%s\" value" value feature)
374
+
end else
375
+
Ok () (* Allow other features with any value for now *)
376
+
377
+
(** Legacy permissive validation *)
88
378
let validate_media_query s =
89
379
let s = String.trim s in
90
380
if String.length s = 0 then Error "Media query must not be empty"
91
381
else
92
-
(* Check balanced parentheses *)
93
382
match check_balanced_parens s with
94
383
| Error _ as e -> e
95
-
| Ok () ->
96
-
(* Extract and validate words *)
97
-
let words = extract_words s in
98
-
let words_lower = List.map String.lowercase_ascii words in
99
-
100
-
(* Basic validation: check for invalid keyword combinations *)
101
-
let rec validate_words prev = function
102
-
| [] -> Ok ()
103
-
| word :: rest -> (
104
-
let word_lower = String.lowercase_ascii word in
105
-
match (prev, word_lower) with
106
-
| None, "and" | None, "or" ->
107
-
Error
108
-
(Printf.sprintf
109
-
"Media query cannot start with keyword '%s'" word)
110
-
| Some "and", "and" | Some "or", "or" | Some "not", "not" ->
111
-
Error
112
-
(Printf.sprintf "Consecutive '%s' keywords are not allowed"
113
-
word)
114
-
| Some "only", "only" ->
115
-
Error "Consecutive 'only' keywords are not allowed"
116
-
| _, _ -> validate_words (Some word_lower) rest)
117
-
in
118
-
119
-
(* Check if query contains valid media types or features *)
120
-
let has_media_type =
121
-
List.exists
122
-
(fun w -> List.mem (String.lowercase_ascii w) media_types)
123
-
words
124
-
in
125
-
let has_features = String.contains s '(' in
126
-
127
-
if not (has_media_type || has_features) then
128
-
(* Only keywords, no actual media type or features *)
129
-
if List.for_all (fun w -> List.mem w media_keywords) words_lower then
130
-
Error "Media query contains only keywords without media type or features"
131
-
else Ok () (* Assume other identifiers are valid *)
132
-
else validate_words None words
384
+
| Ok () -> Ok ()
133
385
134
386
module Media_query = struct
135
387
let name = "media query"
+4
lib/html5_checker/datatype/dt_media_query.mli
+4
lib/html5_checker/datatype/dt_media_query.mli
···
2
2
3
3
This module provides a validator for CSS media queries as used in HTML5. *)
4
4
5
+
(** Strict media query validation for HTML5 conformance checking.
6
+
Returns Ok () if valid, Error message if invalid. *)
7
+
val validate_media_query_strict : string -> (unit, string) result
8
+
5
9
(** Media query validator.
6
10
7
11
Validates CSS media queries used in media attributes and CSS @media rules.
+10
-2
lib/html5_checker/parse_error_bridge.ml
+10
-2
lib/html5_checker/parse_error_bridge.ml
···
11
11
Message.make_location ~line ~column ?system_id ()
12
12
in
13
13
let code_str = Html5rw.Parse_error_code.to_string code in
14
+
let message = match code with
15
+
| Html5rw.Parse_error_code.Non_void_html_element_start_tag_with_trailing_solidus ->
16
+
"Self-closing syntax (\"/>\") used on a non-void HTML element. Ignoring the slash and treating as a start tag."
17
+
| _ -> Printf.sprintf "Parse error: %s" code_str
18
+
in
14
19
Message.error
15
-
~message:(Printf.sprintf "Parse error: %s" code_str)
20
+
~message
16
21
~code:code_str
17
22
~location
18
23
()
···
25
30
in
26
31
let filtered_errors =
27
32
if is_xhtml then
28
-
(* XHTML doesn't require DOCTYPE - filter that error *)
33
+
(* XHTML has different requirements than HTML:
34
+
- No DOCTYPE required
35
+
- Self-closing syntax is valid for all elements *)
29
36
List.filter (fun err ->
30
37
match Html5rw.error_code err with
31
38
| Html5rw.Parse_error_code.Tree_construction_error "expected-doctype-but-got-other" -> false
39
+
| Html5rw.Parse_error_code.Non_void_html_element_start_tag_with_trailing_solidus -> false
32
40
| _ -> true
33
41
) errors
34
42
else errors
+4
-14
lib/html5_checker/semantic/id_checker.ml
+4
-14
lib/html5_checker/semantic/id_checker.ml
···
6
6
- ID values conform to HTML5 requirements *)
7
7
8
8
(** Location information for ID occurrences. *)
9
-
type id_location = {
10
-
element : string;
11
-
location : Message.location option;
12
-
}
9
+
type id_location = unit (* simplified since we only need to track existence *)
13
10
14
11
(** Information about an ID reference. *)
15
12
type id_reference = {
···
120
117
()
121
118
(* Check for duplicate ID *)
122
119
else if Hashtbl.mem state.ids id then
123
-
let first_occurrence = Hashtbl.find state.ids id in
124
-
let first_loc_str = match first_occurrence.location with
125
-
| None -> ""
126
-
| Some loc -> Printf.sprintf " at line %d, column %d" loc.line loc.column
127
-
in
128
120
Message_collector.add_error collector
129
-
~message:(Printf.sprintf
130
-
"Duplicate ID '%s': first used on <%s>%s, now on <%s>"
131
-
id first_occurrence.element first_loc_str element)
121
+
~message:(Printf.sprintf "Duplicate ID \xe2\x80\x9c%s\xe2\x80\x9d." id)
132
122
~code:"duplicate-id"
133
123
?location
134
124
~element
···
136
126
()
137
127
else
138
128
(* Store the ID *)
139
-
Hashtbl.add state.ids id { element; location }
129
+
Hashtbl.add state.ids id ()
140
130
141
131
(** Record a single ID reference. *)
142
132
let add_reference state ~referring_element ~attribute ~referenced_id ~location =
···
181
171
| "name" when element = "map" ->
182
172
(* Track map name attributes for usemap resolution *)
183
173
if String.length value > 0 then
184
-
Hashtbl.add state.map_names value { element; location }
174
+
Hashtbl.add state.map_names value ()
185
175
186
176
| attr when List.mem attr single_id_ref_attrs ->
187
177
add_reference state ~referring_element:element
+128
-3
lib/html5_checker/specialized/aria_checker.ml
+128
-3
lib/html5_checker/specialized/aria_checker.ml
···
358
358
359
359
(** Stack node representing an element in the ancestor chain. *)
360
360
type stack_node = {
361
+
element_name : string;
361
362
explicit_roles : string list;
362
363
implicit_role : string option;
363
364
}
···
365
366
(** Checker state. *)
366
367
type state = {
367
368
mutable stack : stack_node list;
369
+
mutable has_active_tab : bool; (* Whether document has role=tab with aria-selected=true *)
370
+
mutable has_tabpanel : bool; (* Whether document has role=tabpanel elements *)
368
371
}
369
372
370
-
let create () = { stack = [] }
373
+
let create () = { stack = []; has_active_tab = false; has_tabpanel = false }
371
374
372
-
let reset state = state.stack <- []
375
+
let reset state =
376
+
state.stack <- [];
377
+
state.has_active_tab <- false;
378
+
state.has_tabpanel <- false
373
379
374
380
(** Check if any ancestor has one of the required roles. *)
375
381
let has_required_ancestor_role state required_roles =
···
385
391
| None -> false
386
392
) state.stack
387
393
394
+
(** Get the first ancestor role from a list of target roles. *)
395
+
let get_ancestor_role state target_roles =
396
+
let rec find_in_stack = function
397
+
| [] -> None
398
+
| ancestor :: rest ->
399
+
let found_explicit = List.find_opt (fun role -> List.mem role target_roles) ancestor.explicit_roles in
400
+
match found_explicit with
401
+
| Some r -> Some r
402
+
| None ->
403
+
match ancestor.implicit_role with
404
+
| Some r when List.mem r target_roles -> Some r
405
+
| _ -> find_in_stack rest
406
+
in
407
+
find_in_stack state.stack
408
+
409
+
(** Get the immediate parent element name. *)
410
+
let get_parent_element state =
411
+
match state.stack with
412
+
| parent :: _ -> Some parent.element_name
413
+
| [] -> None
414
+
388
415
(** Render a list of roles as a human-readable string. *)
389
416
let render_role_set roles =
390
417
match roles with
···
418
445
(* Get implicit role for this element *)
419
446
let implicit_role = get_implicit_role name_lower attrs in
420
447
448
+
(* Track active tabs and tabpanel roles for end_document validation *)
449
+
if List.mem "tab" explicit_roles then begin
450
+
let aria_selected = List.assoc_opt "aria-selected" attrs in
451
+
if aria_selected = Some "true" then state.has_active_tab <- true
452
+
end;
453
+
if List.mem "tabpanel" explicit_roles then state.has_tabpanel <- true;
454
+
421
455
(* Check br/wbr role restrictions - only none/presentation allowed *)
422
456
if (name_lower = "br" || name_lower = "wbr") && explicit_roles <> [] then begin
423
457
let first_role = List.hd explicit_roles in
···
499
533
| _ -> ()
500
534
end;
501
535
536
+
(* Check for input[type=checkbox][role=button] requires aria-pressed *)
537
+
if name_lower = "input" then begin
538
+
let input_type = match List.assoc_opt "type" attrs with
539
+
| Some t -> String.lowercase_ascii t
540
+
| None -> "text"
541
+
in
542
+
if input_type = "checkbox" && List.mem "button" explicit_roles then begin
543
+
let has_aria_pressed = List.assoc_opt "aria-pressed" attrs <> None in
544
+
if not has_aria_pressed then
545
+
Message_collector.add_error collector
546
+
~message:"An \xe2\x80\x9cinput\xe2\x80\x9d element with a \xe2\x80\x9ctype\xe2\x80\x9d attribute whose value is \xe2\x80\x9ccheckbox\xe2\x80\x9d and with a \xe2\x80\x9crole\xe2\x80\x9d attribute whose value is \xe2\x80\x9cbutton\xe2\x80\x9d must have an \xe2\x80\x9caria-pressed\xe2\x80\x9d attribute."
547
+
~code:"checkbox-button-needs-aria-pressed"
548
+
~element:name
549
+
~attribute:"role"
550
+
()
551
+
end
552
+
end;
553
+
554
+
(* Check li role restrictions in menu/menubar/tablist contexts *)
555
+
if name_lower = "li" && explicit_roles <> [] then begin
556
+
let first_role = List.hd explicit_roles in
557
+
(* none/presentation are always allowed as they remove from accessibility tree *)
558
+
if first_role <> "none" && first_role <> "presentation" then begin
559
+
(* Check if in menu or menubar context *)
560
+
(match get_ancestor_role state ["menu"; "menubar"] with
561
+
| Some _ ->
562
+
let valid_roles = ["group"; "menuitem"; "menuitemcheckbox"; "menuitemradio"; "separator"] in
563
+
if not (List.mem first_role valid_roles) then
564
+
Message_collector.add_error collector
565
+
~message:"An \xe2\x80\x9cli\xe2\x80\x9d element that is a descendant of a \xe2\x80\x9crole=menu\xe2\x80\x9d element or \xe2\x80\x9crole=menubar\xe2\x80\x9d element must not have any \xe2\x80\x9crole\xe2\x80\x9d value other than \xe2\x80\x9cgroup\xe2\x80\x9d, \xe2\x80\x9cmenuitem\xe2\x80\x9d, \xe2\x80\x9cmenuitemcheckbox\xe2\x80\x9d, \xe2\x80\x9cmenuitemradio\xe2\x80\x9d, or \xe2\x80\x9cseparator\xe2\x80\x9d."
566
+
~code:"invalid-li-role-in-menu"
567
+
~element:name
568
+
~attribute:"role"
569
+
()
570
+
| None ->
571
+
(* Check if in tablist context *)
572
+
match get_ancestor_role state ["tablist"] with
573
+
| Some _ ->
574
+
if first_role <> "tab" then
575
+
Message_collector.add_error collector
576
+
~message:"An \xe2\x80\x9cli\xe2\x80\x9d element that is a descendant of a \xe2\x80\x9crole=tablist\xe2\x80\x9d element must not have any \xe2\x80\x9crole\xe2\x80\x9d value other than \xe2\x80\x9ctab\xe2\x80\x9d."
577
+
~code:"invalid-li-role-in-tablist"
578
+
~element:name
579
+
~attribute:"role"
580
+
()
581
+
| None -> ())
582
+
end
583
+
end;
584
+
502
585
(* Check for aria-hidden="true" on body element *)
503
586
if name_lower = "body" then begin
504
587
let aria_hidden = List.assoc_opt "aria-hidden" attrs in
···
640
723
| None -> ()
641
724
) attrs;
642
725
726
+
(* Check summary restrictions in details context *)
727
+
if name_lower = "summary" then begin
728
+
let parent = get_parent_element state in
729
+
let is_in_details = parent = Some "details" in
730
+
if is_in_details then begin
731
+
(* summary that is the first child of details *)
732
+
(* Cannot have role=paragraph (or other non-button roles) *)
733
+
if explicit_roles <> [] then begin
734
+
let first_role = List.hd explicit_roles in
735
+
if first_role <> "button" && first_role <> "none" && first_role <> "presentation" then
736
+
Message_collector.add_error collector
737
+
~message:"The \xe2\x80\x9crole\xe2\x80\x9d attribute must not be used on any \xe2\x80\x9csummary\xe2\x80\x9d element that is a summary for its parent \xe2\x80\x9cdetails\xe2\x80\x9d element."
738
+
~code:"invalid-role-on-summary"
739
+
~element:name
740
+
~attribute:"role"
741
+
()
742
+
end;
743
+
(* If has aria-expanded or aria-pressed, must have role *)
744
+
let has_aria_expanded = List.assoc_opt "aria-expanded" attrs <> None in
745
+
let has_aria_pressed = List.assoc_opt "aria-pressed" attrs <> None in
746
+
if (has_aria_expanded || has_aria_pressed) && explicit_roles = [] then begin
747
+
if has_aria_pressed then
748
+
Message_collector.add_error collector
749
+
~message:"Element \xe2\x80\x9csummary\xe2\x80\x9d is missing required attribute \xe2\x80\x9crole\xe2\x80\x9d."
750
+
~code:"missing-role-on-summary"
751
+
~element:name ()
752
+
else
753
+
Message_collector.add_error collector
754
+
~message:"Element \xe2\x80\x9csummary\xe2\x80\x9d is missing one or more of the following attributes: [aria-checked, aria-level, role]."
755
+
~code:"missing-role-on-summary"
756
+
~element:name ()
757
+
end
758
+
end
759
+
end;
760
+
643
761
(* Push current element onto stack *)
644
762
let node = {
763
+
element_name = name_lower;
645
764
explicit_roles;
646
765
implicit_role;
647
766
} in
···
659
778
660
779
let characters _state _text _collector = ()
661
780
662
-
let end_document _state _collector = ()
781
+
let end_document state collector =
782
+
(* Check that active tabs have corresponding tabpanels *)
783
+
if state.has_active_tab && not state.has_tabpanel then
784
+
Message_collector.add_error collector
785
+
~message:"Every active \xe2\x80\x9crole=tab\xe2\x80\x9d element must have a corresponding \xe2\x80\x9crole=tabpanel\xe2\x80\x9d element."
786
+
~code:"tab-without-tabpanel"
787
+
()
663
788
664
789
let checker = (module struct
665
790
type nonrec state = state
+53
-1
lib/html5_checker/specialized/attr_restrictions_checker.ml
+53
-1
lib/html5_checker/specialized/attr_restrictions_checker.ml
···
501
501
The HTML5 parser normalizes attribute names to lowercase, so this check
502
502
is only effective when the document is parsed as XML.
503
503
Commenting out until we have XML parsing support. *)
504
-
ignore state.is_xhtml
504
+
ignore state.is_xhtml;
505
+
506
+
(* Validate media attribute on link, style, source elements *)
507
+
if namespace = None && (name_lower = "link" || name_lower = "style" || name_lower = "source") then begin
508
+
List.iter (fun (attr_name, attr_value) ->
509
+
let attr_lower = String.lowercase_ascii attr_name in
510
+
if attr_lower = "media" then begin
511
+
let trimmed = String.trim attr_value in
512
+
if trimmed <> "" then begin
513
+
match Dt_media_query.validate_media_query_strict trimmed with
514
+
| Ok () -> ()
515
+
| Error msg ->
516
+
Message_collector.add_error collector
517
+
~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad media query: %s"
518
+
attr_value attr_name name msg)
519
+
~code:"bad-attribute-value"
520
+
~element:name ~attribute:attr_name ()
521
+
end
522
+
end
523
+
) attrs
524
+
end;
525
+
526
+
(* Validate RDFa prefix attribute - space-separated list of prefix:iri pairs *)
527
+
if namespace = None then begin
528
+
List.iter (fun (attr_name, attr_value) ->
529
+
let attr_lower = String.lowercase_ascii attr_name in
530
+
if attr_lower = "prefix" then begin
531
+
(* Parse prefix attribute value - format: "prefix1: iri1 prefix2: iri2 ..." *)
532
+
let trimmed = String.trim attr_value in
533
+
if trimmed <> "" then begin
534
+
(* Check for empty prefix (starts with : or has space:) *)
535
+
if String.length trimmed > 0 && trimmed.[0] = ':' then
536
+
Message_collector.add_error collector
537
+
~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d."
538
+
attr_value attr_name name)
539
+
~code:"bad-attribute-value"
540
+
~element:name ~attribute:attr_name ()
541
+
else begin
542
+
(* Check for invalid prefix names - must start with letter or underscore *)
543
+
let is_ncname_start c =
544
+
(c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c = '_'
545
+
in
546
+
if String.length trimmed > 0 && not (is_ncname_start trimmed.[0]) then
547
+
Message_collector.add_error collector
548
+
~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d."
549
+
attr_value attr_name name)
550
+
~code:"bad-attribute-value"
551
+
~element:name ~attribute:attr_name ()
552
+
end
553
+
end
554
+
end
555
+
) attrs
556
+
end
505
557
506
558
let end_element _state ~name:_ ~namespace:_ _collector = ()
507
559
let characters _state _text _collector = ()
+17
-1
lib/html5_checker/specialized/dl_checker.ml
+17
-1
lib/html5_checker/specialized/dl_checker.ml
···
56
56
| ctx :: _ -> Some ctx
57
57
| [] -> None
58
58
59
-
let start_element state ~name ~namespace ~attrs:_ collector =
59
+
let get_attr name attrs =
60
+
List.find_map (fun (n, v) ->
61
+
if String.lowercase_ascii n = name then Some v else None
62
+
) attrs
63
+
64
+
let start_element state ~name ~namespace ~attrs collector =
60
65
let name_lower = String.lowercase_ascii name in
61
66
62
67
(* Track parent stack for all HTML elements first *)
···
100
105
~message:"Element \xe2\x80\x9cdiv\xe2\x80\x9d not allowed as child of element \xe2\x80\x9cdl\xe2\x80\x9d in this context. (Suppressing further errors from this subtree.)"
101
106
~code:"disallowed-child"
102
107
~element:"div" ();
108
+
(* Check that role is only presentation or none *)
109
+
(match get_attr "role" attrs with
110
+
| Some role_value ->
111
+
let role_lower = String.lowercase_ascii (String.trim role_value) in
112
+
if role_lower <> "presentation" && role_lower <> "none" then
113
+
Message_collector.add_error collector
114
+
~message:"A \xe2\x80\x9cdiv\xe2\x80\x9d child of a \xe2\x80\x9cdl\xe2\x80\x9d element must not have any \xe2\x80\x9crole\xe2\x80\x9d value other than \xe2\x80\x9cpresentation\xe2\x80\x9d or \xe2\x80\x9cnone\xe2\x80\x9d."
115
+
~code:"invalid-role-on-div-in-dl"
116
+
~element:"div"
117
+
~attribute:"role" ()
118
+
| None -> ());
103
119
let div_ctx = { has_dt = false; has_dd = false; group_count = 0; in_dd_part = false } in
104
120
state.div_in_dl_stack <- div_ctx :: state.div_in_dl_stack
105
121
| Some _ when state.div_in_dl_stack <> [] ->
+71
-10
lib/html5_checker/specialized/label_checker.ml
+71
-10
lib/html5_checker/specialized/label_checker.ml
···
12
12
if String.lowercase_ascii n = name_lower then Some v else None
13
13
) attrs
14
14
15
+
type label_for_info = {
16
+
for_target : string;
17
+
has_role : bool;
18
+
has_aria_label : bool;
19
+
}
20
+
15
21
type state = {
16
22
mutable in_label : bool;
17
23
mutable label_depth : int;
18
24
mutable labelable_count : int;
19
25
mutable label_for_value : string option; (* Value of for attribute on current label *)
26
+
mutable label_has_role : bool; (* Whether current label has role attribute *)
27
+
mutable label_has_aria_label : bool; (* Whether current label has aria-label attribute *)
28
+
mutable labels_for : label_for_info list; (* Labels with for= attribute *)
29
+
mutable labelable_ids : string list; (* IDs of labelable elements *)
20
30
}
21
31
22
32
let create () = {
···
24
34
label_depth = 0;
25
35
labelable_count = 0;
26
36
label_for_value = None;
37
+
label_has_role = false;
38
+
label_has_aria_label = false;
39
+
labels_for = [];
40
+
labelable_ids = [];
27
41
}
28
42
29
43
let reset state =
30
44
state.in_label <- false;
31
45
state.label_depth <- 0;
32
46
state.labelable_count <- 0;
33
-
state.label_for_value <- None
47
+
state.label_for_value <- None;
48
+
state.label_has_role <- false;
49
+
state.label_has_aria_label <- false;
50
+
state.labels_for <- [];
51
+
state.labelable_ids <- []
34
52
35
53
let start_element state ~name ~namespace ~attrs collector =
36
54
if namespace <> None then ()
···
39
57
40
58
if name_lower = "label" then begin
41
59
state.in_label <- true;
42
-
state.label_depth <- 0;
60
+
state.label_depth <- 1; (* Start at 1 for the label element itself *)
43
61
state.labelable_count <- 0;
44
-
state.label_for_value <- get_attr attrs "for"
62
+
let for_value = get_attr attrs "for" in
63
+
let has_role = get_attr attrs "role" <> None in
64
+
let has_aria_label = get_attr attrs "aria-label" <> None in
65
+
state.label_for_value <- for_value;
66
+
state.label_has_role <- has_role;
67
+
state.label_has_aria_label <- has_aria_label;
68
+
(* Track this label if it has for= and role/aria-label *)
69
+
(match for_value with
70
+
| Some target when has_role || has_aria_label ->
71
+
state.labels_for <- { for_target = target; has_role; has_aria_label } :: state.labels_for
72
+
| _ -> ())
45
73
end;
74
+
(* Track labelable element IDs *)
75
+
(if List.mem name_lower labelable_elements then
76
+
match get_attr attrs "id" with
77
+
| Some id -> state.labelable_ids <- id :: state.labelable_ids
78
+
| None -> ());
46
79
47
-
if state.in_label then begin
80
+
if state.in_label && name_lower <> "label" then begin
48
81
state.label_depth <- state.label_depth + 1;
49
82
50
83
(* Check for labelable elements inside label *)
···
57
90
~element:"label" ();
58
91
59
92
(* Check if label has for attribute and descendant has mismatched id *)
60
-
match state.label_for_value with
93
+
(match state.label_for_value with
61
94
| Some for_value ->
62
95
let descendant_id = get_attr attrs "id" in
63
96
(match descendant_id with
···
78
111
())
79
112
| None ->
80
113
(* No for attribute on label - no constraint on descendant id *)
81
-
()
114
+
())
82
115
end
83
116
end
84
117
end
85
118
86
-
let end_element state ~name ~namespace _collector =
119
+
let end_element state ~name ~namespace collector =
87
120
if namespace <> None then ()
88
121
else begin
89
122
let name_lower = String.lowercase_ascii name in
···
91
124
if state.in_label then begin
92
125
state.label_depth <- state.label_depth - 1;
93
126
94
-
if name_lower = "label" && state.label_depth < 0 then begin
127
+
if name_lower = "label" && state.label_depth = 0 then begin
128
+
(* Check for role attribute on label that's ancestor of labelable element *)
129
+
if state.label_has_role && state.labelable_count > 0 then
130
+
Message_collector.add_error collector
131
+
~message:"The \xe2\x80\x9crole\xe2\x80\x9d attribute must not be used on any \xe2\x80\x9clabel\xe2\x80\x9d element that is an ancestor of a labelable element."
132
+
~code:"role-on-label-ancestor"
133
+
~element:"label"
134
+
~attribute:"role" ();
135
+
95
136
state.in_label <- false;
96
137
state.labelable_count <- 0;
97
-
state.label_for_value <- None
138
+
state.label_for_value <- None;
139
+
state.label_has_role <- false;
140
+
state.label_has_aria_label <- false
98
141
end
99
142
end
100
143
end
101
144
102
145
let characters _state _text _collector = ()
103
146
104
-
let end_document _state _collector = ()
147
+
let end_document state collector =
148
+
(* Check labels with for= that target labelable elements *)
149
+
List.iter (fun label_info ->
150
+
if List.mem label_info.for_target state.labelable_ids then begin
151
+
(* This label is associated with a labelable element *)
152
+
if label_info.has_role then
153
+
Message_collector.add_error collector
154
+
~message:"The \xe2\x80\x9crole\xe2\x80\x9d attribute must not be used on any \xe2\x80\x9clabel\xe2\x80\x9d element that is associated with a labelable element."
155
+
~code:"role-on-label-for"
156
+
~element:"label"
157
+
~attribute:"role" ();
158
+
if label_info.has_aria_label then
159
+
Message_collector.add_error collector
160
+
~message:"The \xe2\x80\x9caria-label\xe2\x80\x9d attribute must not be used on any \xe2\x80\x9clabel\xe2\x80\x9d element that is associated with a labelable element."
161
+
~code:"aria-label-on-label-for"
162
+
~element:"label"
163
+
~attribute:"aria-label" ()
164
+
end
165
+
) state.labels_for
105
166
106
167
let checker =
107
168
(module struct
+51
-2
lib/html5_checker/specialized/microdata_checker.ml
+51
-2
lib/html5_checker/specialized/microdata_checker.ml
···
67
67
let is_url s =
68
68
String.contains s ':'
69
69
70
+
(** Validate that a URL is a valid absolute URL for itemtype.
71
+
itemtype must be an absolute URL per the HTML5 spec.
72
+
http/https URLs require :// but other schemes like mailto:, data:, javascript: don't. *)
73
+
let validate_itemtype_url url =
74
+
let url = String.trim url in
75
+
if String.length url = 0 then
76
+
Error "itemtype must not be empty"
77
+
else
78
+
match String.index_opt url ':' with
79
+
| None -> Error "Expected a slash (\"/\")."
80
+
| Some colon_pos ->
81
+
if colon_pos = 0 then
82
+
Error "Expected a slash (\"/\")."
83
+
else
84
+
let scheme = String.lowercase_ascii (String.sub url 0 colon_pos) in
85
+
(* Schemes that require :// for itemtype validation
86
+
Note: The Nu validator only enforces :// for http, https, and ftp *)
87
+
let special_schemes = [
88
+
"http"; "https"; "ftp"
89
+
] in
90
+
if List.mem scheme special_schemes then begin
91
+
if colon_pos + 2 >= String.length url then
92
+
Error "Expected a slash (\"/\")."
93
+
else if url.[colon_pos + 1] <> '/' || url.[colon_pos + 2] <> '/' then
94
+
Error "Expected a slash (\"/\")."
95
+
else
96
+
Ok ()
97
+
end else
98
+
(* Other schemes (mailto:, data:, javascript:, etc.) are valid as-is *)
99
+
Ok ()
100
+
70
101
(** Check if itemprop value is valid. *)
71
102
let validate_itemprop_value value =
72
103
if String.length value = 0 then
···
139
170
| None -> ()
140
171
end;
141
172
142
-
(* Check itemtype requires itemscope *)
173
+
(* Check itemtype requires itemscope and is valid URL *)
143
174
begin match itemtype_opt with
144
-
| Some _itemtype ->
175
+
| Some itemtype ->
145
176
if not has_itemscope then
146
177
Message_collector.add_error collector
147
178
~message:"itemtype attribute requires itemscope attribute"
···
150
181
~element
151
182
~attribute:"itemtype"
152
183
()
184
+
else begin
185
+
(* Validate each itemtype URL (can be space-separated) *)
186
+
let types = split_whitespace itemtype in
187
+
List.iter (fun url ->
188
+
match validate_itemtype_url url with
189
+
| Ok () -> ()
190
+
| Error msg ->
191
+
Message_collector.add_error collector
192
+
~message:(Printf.sprintf
193
+
"Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9citemtype\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad absolute URL: %s"
194
+
url element msg)
195
+
~code:"microdata-invalid-itemtype"
196
+
?location
197
+
~element
198
+
~attribute:"itemtype"
199
+
()
200
+
) types
201
+
end
153
202
| None -> ()
154
203
end;
155
204
+5
-2
lib/html5rw/parser/parser_tree_builder.ml
+5
-2
lib/html5rw/parser/parser_tree_builder.ml
···
1428
1428
| Token.Tag { kind = Token.Start; name; _ }
1429
1429
when List.mem name ["caption"; "colgroup"; "head"; "tbody"; "td"; "tfoot"; "th"; "thead"; "tr"] ->
1430
1430
parse_error t "unexpected-start-tag"
1431
-
| Token.Tag { kind = Token.Start; name; attrs; _ } ->
1431
+
| Token.Tag { kind = Token.Start; name; attrs; self_closing } ->
1432
1432
(* Any other start tag *)
1433
1433
reconstruct_active_formatting t;
1434
-
ignore (insert_element t name ~push:true attrs)
1434
+
ignore (insert_element t name ~push:true attrs);
1435
+
(* Check for self-closing on non-void HTML element *)
1436
+
if self_closing && not (List.mem name Parser_constants.void_elements) then
1437
+
parse_error t "non-void-html-element-start-tag-with-trailing-solidus"
1435
1438
| Token.Tag { kind = Token.End; name; _ } ->
1436
1439
(* Any other end tag *)
1437
1440
let rec check = function
+17
test/debug_check.ml
+17
test/debug_check.ml
···
1
+
let () =
2
+
let test_file = "validator/tests/html/microdata/itemtype/scheme-https-no-slash-novalid.html" in
3
+
let ic = open_in test_file in
4
+
let html = really_input_string ic (in_channel_length ic) in
5
+
close_in ic;
6
+
let reader = Bytesrw.Bytes.Reader.of_string html in
7
+
let result = Html5_checker.check ~collect_parse_errors:true ~system_id:test_file reader in
8
+
let errors = Html5_checker.errors result in
9
+
let warnings = Html5_checker.warnings result in
10
+
print_endline "=== Errors ===";
11
+
List.iter (fun e -> print_endline e.Html5_checker.Message.message) errors;
12
+
print_endline "=== Warnings ===";
13
+
List.iter (fun e -> print_endline e.Html5_checker.Message.message) warnings;
14
+
if List.length errors > 0 then
15
+
print_endline "PASS (has errors)"
16
+
else
17
+
print_endline "FAIL (no errors)"