+1
dune-project
+1
dune-project
+1
html5rw.opam
+1
html5rw.opam
+1
-1
lib/check/checker_registry.ml
+1
-1
lib/check/checker_registry.ml
···
22
22
Hashtbl.replace reg "source" Source_checker.checker;
23
23
Hashtbl.replace reg "label" Label_checker.checker;
24
24
Hashtbl.replace reg "ruby" Ruby_checker.checker;
25
-
Hashtbl.replace reg "h1" H1_checker.checker;
25
+
Hashtbl.replace reg "heading" Heading_checker.checker;
26
26
Hashtbl.replace reg "srcset-sizes" Srcset_sizes_checker.checker;
27
27
Hashtbl.replace reg "autofocus" Autofocus_checker.checker;
28
28
Hashtbl.replace reg "option" Option_checker.checker;
+26
lib/check/datatype/datatype.ml
+26
lib/check/datatype/datatype.ml
···
12
12
13
13
(* Helper utilities *)
14
14
15
+
(** Character predicates *)
16
+
15
17
let is_whitespace = function
16
18
| ' ' | '\t' | '\n' | '\r' | '\012' (* FF *) -> true
17
19
| _ -> false
18
20
19
21
let is_ascii_digit = function '0' .. '9' -> true | _ -> false
20
22
23
+
let is_lower_alpha = function 'a' .. 'z' -> true | _ -> false
24
+
25
+
let is_upper_alpha = function 'A' .. 'Z' -> true | _ -> false
26
+
27
+
let is_alpha c = is_lower_alpha c || is_upper_alpha c
28
+
29
+
let is_alphanumeric c = is_alpha c || is_ascii_digit c
30
+
31
+
let is_hex_digit = function
32
+
| '0' .. '9' | 'a' .. 'f' | 'A' .. 'F' -> true
33
+
| _ -> false
34
+
35
+
(** Case conversion *)
36
+
21
37
let to_ascii_lowercase c =
22
38
match c with 'A' .. 'Z' -> Char.chr (Char.code c + 32) | _ -> c
23
39
24
40
let string_to_ascii_lowercase s =
25
41
String.map to_ascii_lowercase s
42
+
43
+
(** String predicates *)
44
+
45
+
let is_non_empty s = String.trim s <> ""
46
+
47
+
let is_all_digits s = String.length s > 0 && String.for_all is_ascii_digit s
48
+
49
+
let is_all_alpha s = String.length s > 0 && String.for_all is_alpha s
50
+
51
+
let is_all_alphanumeric s = String.length s > 0 && String.for_all is_alphanumeric s
26
52
27
53
let trim_html_spaces s =
28
54
let len = String.length s in
+36
-1
lib/check/datatype/datatype.mli
+36
-1
lib/check/datatype/datatype.mli
···
27
27
(** Check if a value is valid *)
28
28
val is_valid : t -> string -> bool
29
29
30
-
(** Helper utilities for implementing datatype validators. *)
30
+
(** {1 Helper utilities for implementing datatype validators} *)
31
+
32
+
(** {2 Character predicates} *)
31
33
32
34
(** Check if a character is HTML5 whitespace (space, tab, LF, FF, or CR). *)
33
35
val is_whitespace : char -> bool
···
35
37
(** Check if a character is an ASCII digit (0-9). *)
36
38
val is_ascii_digit : char -> bool
37
39
40
+
(** Check if a character is a lowercase ASCII letter (a-z). *)
41
+
val is_lower_alpha : char -> bool
42
+
43
+
(** Check if a character is an uppercase ASCII letter (A-Z). *)
44
+
val is_upper_alpha : char -> bool
45
+
46
+
(** Check if a character is an ASCII letter (a-z or A-Z). *)
47
+
val is_alpha : char -> bool
48
+
49
+
(** Check if a character is an ASCII letter or digit. *)
50
+
val is_alphanumeric : char -> bool
51
+
52
+
(** Check if a character is a hexadecimal digit (0-9, a-f, A-F). *)
53
+
val is_hex_digit : char -> bool
54
+
55
+
(** {2 Case conversion} *)
56
+
38
57
(** Convert an ASCII character to lowercase. *)
39
58
val to_ascii_lowercase : char -> char
40
59
41
60
(** Convert an ASCII string to lowercase. *)
42
61
val string_to_ascii_lowercase : string -> string
62
+
63
+
(** {2 String predicates} *)
64
+
65
+
(** Check if a string has non-whitespace content after trimming. *)
66
+
val is_non_empty : string -> bool
67
+
68
+
(** Check if all characters in a non-empty string are ASCII digits. *)
69
+
val is_all_digits : string -> bool
70
+
71
+
(** Check if all characters in a non-empty string are ASCII letters. *)
72
+
val is_all_alpha : string -> bool
73
+
74
+
(** Check if all characters in a non-empty string are ASCII letters or digits. *)
75
+
val is_all_alphanumeric : string -> bool
76
+
77
+
(** {2 String manipulation} *)
43
78
44
79
(** Trim HTML5 whitespace from both ends of a string. *)
45
80
val trim_html_spaces : string -> string
+6
-8
lib/check/datatype/dt_autocomplete.ml
+6
-8
lib/check/datatype/dt_autocomplete.ml
···
4
4
let is_whitespace = Datatype.is_whitespace
5
5
let to_ascii_lowercase = Datatype.to_ascii_lowercase
6
6
7
+
(* Use Astring for string operations *)
8
+
let is_prefix = Astring.String.is_prefix
9
+
7
10
(** Trim whitespace from string and collapse internal whitespace *)
8
11
let trim_whitespace s =
9
12
let s = String.trim s in
···
104
107
(** Split string on whitespace - uses shared utility *)
105
108
let split_on_whitespace = Datatype.split_on_whitespace
106
109
107
-
(** Check if string starts with prefix *)
108
-
let starts_with s prefix =
109
-
String.length s >= String.length prefix
110
-
&& String.sub s 0 (String.length prefix) = prefix
111
-
112
110
(** Validate detail tokens *)
113
111
let check_tokens tokens =
114
112
let tokens = ref tokens in
···
116
114
117
115
(* Check for section-* *)
118
116
(match !tokens with
119
-
| token :: rest when starts_with token "section-" ->
117
+
| token :: rest when is_prefix ~affix:"section-" token ->
120
118
tokens := rest
121
119
| _ -> ());
122
120
···
145
143
146
144
(* Check if any token in the list is a section-* indicator *)
147
145
let find_section tokens =
148
-
List.find_opt (fun t -> starts_with t "section-") tokens
146
+
List.find_opt (fun t -> is_prefix ~affix:"section-" t) tokens
149
147
in
150
148
151
149
(* Check if webauthn appears anywhere except as the very last token *)
···
207
205
(Printf.sprintf
208
206
"The token \"%s\" must only appear before any autofill field names."
209
207
token)
210
-
| token :: _ when starts_with token "section-" ->
208
+
| token :: _ when is_prefix ~affix:"section-" token ->
211
209
Error
212
210
"A \"section-*\" indicator must only appear as the first token in a \
213
211
list of autofill detail tokens."
+3
-6
lib/check/datatype/dt_charset.ml
+3
-6
lib/check/datatype/dt_charset.ml
···
1
1
(** Helper functions for charset validation *)
2
2
3
3
let is_valid_charset_char c =
4
-
(c >= '0' && c <= '9') ||
5
-
(c >= 'a' && c <= 'z') ||
6
-
(c >= 'A' && c <= 'Z') ||
4
+
Datatype.is_alphanumeric c ||
7
5
c = '-' || c = '!' || c = '#' || c = '$' || c = '%' || c = '&' ||
8
6
c = '\'' || c = '+' || c = '_' || c = '`' || c = '{' || c = '}' ||
9
7
c = '~' || c = '^'
10
8
11
-
let to_lower s = String.lowercase_ascii s
9
+
let to_lower = Datatype.string_to_ascii_lowercase
12
10
13
11
(** Common encoding labels recognized by WHATWG Encoding Standard.
14
12
This is a subset of the full list. *)
···
74
72
module Meta_charset = struct
75
73
let name = "legacy character encoding declaration"
76
74
77
-
let is_whitespace c =
78
-
c = ' ' || c = '\t' || c = '\n' || c = '\012' || c = '\r'
75
+
let is_whitespace = Datatype.is_whitespace
79
76
80
77
let validate s =
81
78
let lower = to_lower s in
+1
-2
lib/check/datatype/dt_color.ml
+1
-2
lib/check/datatype/dt_color.ml
···
154
154
]
155
155
156
156
(** Check if character is hex digit *)
157
-
let is_hex_digit c =
158
-
(c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F')
157
+
let is_hex_digit = Datatype.is_hex_digit
159
158
160
159
(** Validate hex color (#RGB or #RRGGBB) *)
161
160
let validate_hex_color s =
+1
-4
lib/check/datatype/dt_datetime.ml
+1
-4
lib/check/datatype/dt_datetime.ml
+1
-3
lib/check/datatype/dt_email.ml
+1
-3
lib/check/datatype/dt_email.ml
···
2
2
3
3
(** Helper to check if a character is valid in email local/domain parts *)
4
4
let is_email_char c =
5
-
(c >= 'a' && c <= 'z')
6
-
|| (c >= 'A' && c <= 'Z')
7
-
|| (c >= '0' && c <= '9')
5
+
Datatype.is_alphanumeric c
8
6
|| c = '.' || c = '-' || c = '_' || c = '+' || c = '='
9
7
10
8
(** Validate a single email address using simplified rules *)
+4
-17
lib/check/datatype/dt_language.ml
+4
-17
lib/check/datatype/dt_language.ml
···
2
2
3
3
let q = Error_code.q
4
4
5
-
let is_lower_alpha c = c >= 'a' && c <= 'z'
6
-
let is_upper_alpha c = c >= 'A' && c <= 'Z'
7
-
let is_alpha c = is_lower_alpha c || is_upper_alpha c
8
-
let is_digit c = c >= '0' && c <= '9'
9
-
let is_alphanumeric c = is_alpha c || is_digit c
10
-
11
-
let is_all_alpha s =
12
-
String.for_all is_alpha s
13
-
14
-
let _is_all_digits s =
15
-
String.for_all is_digit s
16
-
17
-
let is_all_alphanumeric s =
18
-
String.for_all is_alphanumeric s
19
-
20
-
let to_lower s =
21
-
String.lowercase_ascii s
5
+
(* Use shared character predicates from Datatype *)
6
+
let is_all_alpha = Datatype.is_all_alpha
7
+
let is_all_alphanumeric = Datatype.is_all_alphanumeric
8
+
let to_lower = Datatype.string_to_ascii_lowercase
22
9
23
10
(** Valid extlang subtags per IANA language-subtag-registry.
24
11
Extlangs are 3-letter subtags that follow the primary language.
+20
-26
lib/check/datatype/dt_media_query.ml
+20
-26
lib/check/datatype/dt_media_query.ml
···
70
70
(** Media query keywords (unused but kept for documentation) *)
71
71
let _media_keywords = [ "and"; "not"; "only" ]
72
72
73
-
(** Check if character is whitespace *)
74
-
let is_whitespace c = c = ' ' || c = '\t' || c = '\n' || c = '\r'
73
+
let is_whitespace = Datatype.is_whitespace
75
74
76
75
(** Check if character can start an identifier *)
77
76
let is_ident_start c =
78
-
(c >= 'a' && c <= 'z')
79
-
|| (c >= 'A' && c <= 'Z')
80
-
|| c = '_' || c = '-' || Char.code c >= 128
77
+
Datatype.is_alpha c || c = '_' || c = '-' || Char.code c >= 128
81
78
82
79
(** Check if character can be in an identifier *)
83
80
let is_ident_char c =
84
-
is_ident_start c || (c >= '0' && c <= '9')
81
+
is_ident_start c || Datatype.is_ascii_digit c
85
82
86
-
(** Unicode case-fold for Turkish dotted-I etc *)
87
-
let lowercase_unicode s =
88
-
(* Handle special case: U+0130 LATIN CAPITAL LETTER I WITH DOT ABOVE -> i *)
83
+
(** Unicode case folding for case-insensitive comparison.
84
+
85
+
Uses the Uucp library for proper Unicode case folding, which handles
86
+
special cases like Turkish dotted-I (U+0130 -> 'i' + U+0307) correctly. *)
87
+
let case_fold s =
89
88
let buf = Buffer.create (String.length s) in
90
-
let i = ref 0 in
91
-
while !i < String.length s do
92
-
let c = s.[!i] in
93
-
if c = '\xc4' && !i + 1 < String.length s && s.[!i + 1] = '\xb0' then begin
94
-
(* U+0130 -> 'i' + U+0307 (combining dot above), but for simplicity just 'i' followed by U+0307 *)
95
-
Buffer.add_string buf "i\xcc\x87";
96
-
i := !i + 2
97
-
end else begin
98
-
Buffer.add_char buf (Char.lowercase_ascii c);
99
-
incr i
100
-
end
101
-
done;
89
+
let add_uchar u = Uutf.Buffer.add_utf_8 buf u in
90
+
let fold_char () _pos = function
91
+
| `Malformed _ -> () (* Skip malformed sequences *)
92
+
| `Uchar u ->
93
+
match Uucp.Case.Fold.fold u with
94
+
| `Self -> add_uchar u
95
+
| `Uchars us -> List.iter add_uchar us
96
+
in
97
+
Uutf.String.fold_utf_8 fold_char () s;
102
98
Buffer.contents buf
103
99
104
100
(** Check balanced parentheses *)
···
222
218
match read_ident () with
223
219
| None -> Error "Parse Error."
224
220
| Some media_type ->
225
-
let mt_lower = lowercase_unicode media_type in
221
+
let mt_lower = case_fold media_type in
226
222
(* Check for deprecated media type *)
227
223
if List.mem mt_lower deprecated_media_types then
228
224
Error (Printf.sprintf "The media \"%s\" has been deprecated" mt_lower)
···
341
337
if List.mem feature length_features then begin
342
338
(* Must be a valid length: number followed by unit *)
343
339
let value = String.trim value in
344
-
let is_digit c = c >= '0' && c <= '9' in
345
340
346
341
(* Parse number - includes sign, digits, and decimal point *)
347
342
let i = ref 0 in
348
343
let len = String.length value in
349
-
while !i < len && (is_digit value.[!i] || value.[!i] = '.' || value.[!i] = '-' || value.[!i] = '+') do
344
+
while !i < len && (Datatype.is_ascii_digit value.[!i] || value.[!i] = '.' || value.[!i] = '-' || value.[!i] = '+') do
350
345
incr i
351
346
done;
352
347
let num_part = String.sub value 0 !i in
···
377
372
end else if List.mem feature color_features then begin
378
373
(* Must be an integer *)
379
374
let value = String.trim value in
380
-
let is_digit c = c >= '0' && c <= '9' in
381
-
if String.length value > 0 && String.for_all is_digit value then Ok ()
375
+
if Datatype.is_all_digits value then Ok ()
382
376
else
383
377
Error (Printf.sprintf "\"%s\" is not a \"%s\" value" value base_feature)
384
378
end else
+1
-2
lib/check/datatype/dt_mime.ml
+1
-2
lib/check/datatype/dt_mime.ml
···
1
1
(** MIME type validation based on RFC 2045 and HTML5 spec *)
2
2
3
-
(** Check if character is whitespace *)
4
-
let is_whitespace c = c = ' ' || c = '\t' || c = '\n' || c = '\r'
3
+
let is_whitespace = Datatype.is_whitespace
5
4
6
5
(** Check if character is a token character (RFC 2045) *)
7
6
let is_token_char c =
+1
-1
lib/check/dune
+1
-1
lib/check/dune
-37
lib/check/specialized/h1_checker.ml
-37
lib/check/specialized/h1_checker.ml
···
1
-
(** H1 element counter - warns about multiple h1 elements in a document. *)
2
-
3
-
type state = {
4
-
mutable h1_count : int;
5
-
mutable svg_depth : int; (* Track depth inside SVG *)
6
-
}
7
-
8
-
let create () = {
9
-
h1_count = 0;
10
-
svg_depth = 0;
11
-
}
12
-
13
-
let reset state =
14
-
state.h1_count <- 0;
15
-
state.svg_depth <- 0
16
-
17
-
let start_element state ~element collector =
18
-
(* Track SVG depth - h1 inside SVG (foreignObject, desc) shouldn't count *)
19
-
match element.Element.tag with
20
-
| Tag.Svg _ ->
21
-
state.svg_depth <- state.svg_depth + 1
22
-
| Tag.Html `H1 when state.svg_depth = 0 ->
23
-
state.h1_count <- state.h1_count + 1;
24
-
if state.h1_count > 1 then
25
-
Message_collector.add_typed collector (`Misc `Multiple_h1)
26
-
| Tag.Html _ when state.svg_depth = 0 ->
27
-
() (* Other HTML elements outside SVG *)
28
-
| _ ->
29
-
() (* Non-HTML or inside SVG *)
30
-
31
-
let end_element state ~tag _collector =
32
-
match tag with
33
-
| Tag.Svg _ when state.svg_depth > 0 ->
34
-
state.svg_depth <- state.svg_depth - 1
35
-
| _ -> ()
36
-
37
-
let checker = Checker.make ~create ~reset ~start_element ~end_element ()
-16
lib/check/specialized/h1_checker.mli
-16
lib/check/specialized/h1_checker.mli
···
1
-
(** H1 element counter checker.
2
-
3
-
This checker validates that documents don't have multiple h1 elements,
4
-
which can confuse document structure and accessibility tools.
5
-
6
-
{2 Validation Rules}
7
-
8
-
- Documents should have at most one [<h1>] element
9
-
- [<h1>] elements inside SVG content (foreignObject, desc) are not counted
10
-
11
-
{2 Error Messages}
12
-
13
-
- [Multiple_h1]: Document contains more than one h1 element *)
14
-
15
-
val checker : Checker.t
16
-
(** The H1 checker instance. *)
+19
-107
lib/check/specialized/heading_checker.ml
+19
-107
lib/check/specialized/heading_checker.ml
···
1
1
(** Heading structure validation checker.
2
2
3
3
This checker validates that:
4
-
- Heading levels don't skip (e.g., h1 to h3)
5
-
- Documents have at least one heading
6
-
- Multiple h1 usage is noted
7
-
- Headings are not empty *)
4
+
- Multiple h1 usage is reported as an error
5
+
6
+
Note: Additional accessibility checks (first heading should be h1, skipped
7
+
levels, empty headings) are intentionally not included as errors since they
8
+
are recommendations rather than HTML5 spec requirements. *)
8
9
9
10
(** Checker state tracking heading structure. *)
10
11
type state = {
11
-
mutable current_level : int option;
12
12
mutable h1_count : int;
13
-
mutable has_any_heading : bool;
14
-
mutable first_heading_checked : bool;
15
-
mutable in_heading : Tag.html_tag option;
16
-
mutable heading_has_text : bool;
17
13
mutable svg_depth : int; (* Track depth inside SVG - headings in SVG don't count *)
18
14
}
19
15
20
-
let create () =
21
-
{
22
-
current_level = None;
23
-
h1_count = 0;
24
-
has_any_heading = false;
25
-
first_heading_checked = false;
26
-
in_heading = None;
27
-
heading_has_text = false;
28
-
svg_depth = 0;
29
-
}
16
+
let create () = {
17
+
h1_count = 0;
18
+
svg_depth = 0;
19
+
}
30
20
31
21
let reset state =
32
-
state.current_level <- None;
33
22
state.h1_count <- 0;
34
-
state.has_any_heading <- false;
35
-
state.first_heading_checked <- false;
36
-
state.in_heading <- None;
37
-
state.heading_has_text <- false;
38
23
state.svg_depth <- 0
39
24
40
-
(** Check if text is effectively empty (only whitespace). *)
41
-
let is_empty_text text =
42
-
let rec check i =
43
-
if i >= String.length text then
44
-
true
45
-
else
46
-
match text.[i] with
47
-
| ' ' | '\t' | '\n' | '\r' -> check (i + 1)
48
-
| _ -> false
49
-
in
50
-
check 0
51
-
52
25
let start_element state ~element collector =
53
26
match element.Element.tag with
54
27
| Tag.Svg _ ->
55
-
(* Track SVG depth - headings inside SVG (foreignObject, desc) don't count *)
28
+
(* Track SVG depth - h1 inside SVG (foreignObject, desc) shouldn't count *)
56
29
state.svg_depth <- state.svg_depth + 1
57
-
| Tag.Html (#Tag.heading_tag as h) when state.svg_depth = 0 ->
58
-
let level = match Tag.heading_level h with Some l -> l | None -> 0 in
59
-
let name = Tag.html_tag_to_string h in
60
-
state.has_any_heading <- true;
61
-
62
-
(* Check if this is the first heading *)
63
-
if not state.first_heading_checked then begin
64
-
state.first_heading_checked <- true;
65
-
if level <> 1 then
66
-
Message_collector.add_typed collector
67
-
(`Generic (Printf.sprintf
68
-
"First heading in document is <%s>, should typically be <h1>" name))
69
-
end;
70
-
71
-
(* Track h1 count *)
72
-
if level = 1 then begin
73
-
state.h1_count <- state.h1_count + 1;
74
-
if state.h1_count > 1 then
75
-
Message_collector.add_typed collector (`Misc `Multiple_h1)
76
-
end;
77
-
78
-
(* Check for skipped levels *)
79
-
begin match state.current_level with
80
-
| None ->
81
-
state.current_level <- Some level
82
-
| Some prev_level ->
83
-
let diff = level - prev_level in
84
-
if diff > 1 then
85
-
Message_collector.add_typed collector
86
-
(`Generic (Printf.sprintf
87
-
"Heading level skipped: <%s> follows <h%d>, skipping %d level%s. This can confuse screen reader users"
88
-
name prev_level (diff - 1) (if diff > 2 then "s" else "")));
89
-
state.current_level <- Some level
90
-
end;
91
-
92
-
(* Track that we're in a heading to check for empty content *)
93
-
state.in_heading <- Some h;
94
-
state.heading_has_text <- false
30
+
| Tag.Html `H1 when state.svg_depth = 0 ->
31
+
state.h1_count <- state.h1_count + 1;
32
+
if state.h1_count > 1 then
33
+
Message_collector.add_typed collector (`Misc `Multiple_h1)
95
34
| _ -> ()
96
35
97
-
let end_element state ~tag collector =
98
-
(* Track SVG depth *)
99
-
(match tag with
100
-
| Tag.Svg _ when state.svg_depth > 0 ->
101
-
state.svg_depth <- state.svg_depth - 1
102
-
| _ -> ());
103
-
(* Check for empty headings *)
104
-
match state.in_heading, tag with
105
-
| Some h, Tag.Html h2 when h = h2 ->
106
-
if not state.heading_has_text then
107
-
Message_collector.add_typed collector
108
-
(`Generic (Printf.sprintf
109
-
"Heading <%s> is empty or contains only whitespace. Empty headings are problematic for screen readers"
110
-
(Tag.html_tag_to_string h)));
111
-
state.in_heading <- None;
112
-
state.heading_has_text <- false
36
+
let end_element state ~tag _collector =
37
+
match tag with
38
+
| Tag.Svg _ when state.svg_depth > 0 ->
39
+
state.svg_depth <- state.svg_depth - 1
113
40
| _ -> ()
114
41
115
-
let characters state text _collector =
116
-
(* If we're inside a heading, check if this text is non-whitespace *)
117
-
match state.in_heading with
118
-
| Some _ ->
119
-
if not (is_empty_text text) then
120
-
state.heading_has_text <- true
121
-
| None ->
122
-
()
123
-
124
-
let end_document state collector =
125
-
if not state.has_any_heading then
126
-
Message_collector.add_typed collector
127
-
(`Generic "Document contains no heading elements (h1-h6). Headings provide important document structure for accessibility")
128
-
129
-
let checker = Checker.make ~create ~reset ~start_element ~end_element
130
-
~characters ~end_document ()
42
+
let checker = Checker.make ~create ~reset ~start_element ~end_element ()