+19
lib/htmlrw_check/attr_utils.ml
+19
lib/htmlrw_check/attr_utils.ml
···
1
+
(** Common attribute utilities used across checkers. *)
2
+
3
+
type attrs = (string * string) list
4
+
5
+
let has_attr name attrs =
6
+
List.exists (fun (n, _) -> String.lowercase_ascii n = name) attrs
7
+
8
+
let get_attr name attrs =
9
+
List.find_map (fun (n, v) ->
10
+
if String.lowercase_ascii n = name then Some v else None
11
+
) attrs
12
+
13
+
let get_attr_or name ~default attrs =
14
+
Option.value ~default (get_attr name attrs)
15
+
16
+
let is_non_empty_attr name attrs =
17
+
match get_attr name attrs with
18
+
| Some v -> String.trim v <> ""
19
+
| None -> false
+2
-8
lib/htmlrw_check/datatype/dt_color.ml
+2
-8
lib/htmlrw_check/datatype/dt_color.ml
···
213
213
if String.length s = 0 then Error "Color value must not be empty"
214
214
else if List.mem s named_colors then Ok ()
215
215
else if String.length s > 0 && s.[0] = '#' then validate_hex_color s
216
-
else if
217
-
String.length s > 4
218
-
&& (String.sub s 0 4 = "rgb(" || String.sub s 0 5 = "rgba(")
219
-
then
216
+
else if String.starts_with ~prefix:"rgb(" s || String.starts_with ~prefix:"rgba(" s then
220
217
(* Basic validation for rgb/rgba - just check balanced parens *)
221
218
if s.[String.length s - 1] = ')' then Ok ()
222
219
else Error "rgb/rgba function must end with ')'"
223
-
else if
224
-
String.length s > 4
225
-
&& (String.sub s 0 4 = "hsl(" || String.sub s 0 5 = "hsla(")
226
-
then
220
+
else if String.starts_with ~prefix:"hsl(" s || String.starts_with ~prefix:"hsla(" s then
227
221
(* Basic validation for hsl/hsla - just check balanced parens *)
228
222
if s.[String.length s - 1] = ')' then Ok ()
229
223
else Error "hsl/hsla function must end with ')'"
+2
-2
lib/htmlrw_check/datatype/dt_media_query.ml
+2
-2
lib/htmlrw_check/datatype/dt_media_query.ml
···
330
330
331
331
(* Get base feature name for error messages (strip min-/max- prefix) *)
332
332
let base_feature =
333
-
if String.length feature > 4 && String.sub feature 0 4 = "min-" then
333
+
if String.starts_with ~prefix:"min-" feature then
334
334
String.sub feature 4 (String.length feature - 4)
335
-
else if String.length feature > 4 && String.sub feature 0 4 = "max-" then
335
+
else if String.starts_with ~prefix:"max-" feature then
336
336
String.sub feature 4 (String.length feature - 4)
337
337
else
338
338
feature
+77
-102
lib/htmlrw_check/message_format.ml
+77
-102
lib/htmlrw_check/message_format.ml
···
1
+
(** Get effective system_id, preferring location's system_id over the passed one *)
2
+
let get_system_id ?system_id loc_system_id =
3
+
loc_system_id
4
+
|> Option.fold ~none:system_id ~some:Option.some
5
+
|> Option.value ~default:"input"
6
+
1
7
let format_text ?system_id messages =
2
8
let buf = Buffer.create 1024 in
3
-
List.iter
4
-
(fun msg ->
5
-
let loc_str =
6
-
match msg.Message.location with
7
-
| Some loc -> (
8
-
let sid =
9
-
match loc.Message.system_id with
10
-
| Some s -> s
11
-
| None -> (
12
-
match system_id with Some s -> s | None -> "input")
13
-
in
14
-
let col_info =
15
-
match (loc.end_line, loc.end_column) with
16
-
| Some el, Some ec when el = loc.line && ec > loc.column ->
17
-
Printf.sprintf "%d.%d-%d" loc.line loc.column ec
18
-
| Some el, Some ec when el > loc.line ->
19
-
Printf.sprintf "%d.%d-%d.%d" loc.line loc.column el ec
20
-
| _ -> Printf.sprintf "%d.%d" loc.line loc.column
21
-
in
22
-
Printf.sprintf "%s:%s" sid col_info)
23
-
| None -> (
24
-
match system_id with Some s -> s | None -> "input")
25
-
in
26
-
let severity_str = Message.severity_to_string msg.Message.severity in
27
-
let code_str = " [" ^ Message.error_code_to_string msg.Message.error_code ^ "]" in
28
-
let elem_str =
29
-
match msg.Message.element with
30
-
| Some e -> " (element: " ^ e ^ ")"
31
-
| None -> ""
32
-
in
33
-
let attr_str =
34
-
match msg.Message.attribute with
35
-
| Some a -> " (attribute: " ^ a ^ ")"
36
-
| None -> ""
37
-
in
38
-
Buffer.add_string buf
39
-
(Printf.sprintf "%s: %s%s: %s%s%s\n" loc_str severity_str code_str
40
-
msg.Message.message elem_str attr_str))
41
-
messages;
9
+
List.iter (fun msg ->
10
+
let loc_str = match msg.Message.location with
11
+
| Some loc ->
12
+
let sid = get_system_id ?system_id loc.Message.system_id in
13
+
let col_info = match loc.end_line, loc.end_column with
14
+
| Some el, Some ec when el = loc.line && ec > loc.column ->
15
+
Printf.sprintf "%d.%d-%d" loc.line loc.column ec
16
+
| Some el, Some ec when el > loc.line ->
17
+
Printf.sprintf "%d.%d-%d.%d" loc.line loc.column el ec
18
+
| _ ->
19
+
Printf.sprintf "%d.%d" loc.line loc.column
20
+
in
21
+
Printf.sprintf "%s:%s" sid col_info
22
+
| None ->
23
+
Option.value system_id ~default:"input"
24
+
in
25
+
let elem_str = Option.fold ~none:"" ~some:(Printf.sprintf " (element: %s)") msg.Message.element in
26
+
let attr_str = Option.fold ~none:"" ~some:(Printf.sprintf " (attribute: %s)") msg.Message.attribute in
27
+
Buffer.add_string buf (Printf.sprintf "%s: %s [%s]: %s%s%s\n"
28
+
loc_str
29
+
(Message.severity_to_string msg.Message.severity)
30
+
(Message.error_code_to_string msg.Message.error_code)
31
+
msg.Message.message
32
+
elem_str
33
+
attr_str)
34
+
) messages;
42
35
Buffer.contents buf
43
36
44
37
let format_gnu ?system_id messages =
45
38
let buf = Buffer.create 1024 in
46
-
List.iter
47
-
(fun msg ->
48
-
let loc_str =
49
-
match msg.Message.location with
50
-
| Some loc -> (
51
-
let sid =
52
-
match loc.Message.system_id with
53
-
| Some s -> s
54
-
| None -> (
55
-
match system_id with Some s -> s | None -> "input")
56
-
in
57
-
Printf.sprintf "%s:%d:%d" sid loc.line loc.column)
58
-
| None -> (
59
-
match system_id with Some s -> s ^ ":0:0" | None -> "input:0:0")
60
-
in
61
-
let severity_str = Message.severity_to_string msg.Message.severity in
62
-
let code_str = " [" ^ Message.error_code_to_string msg.Message.error_code ^ "]" in
63
-
Buffer.add_string buf
64
-
(Printf.sprintf "%s: %s%s: %s\n" loc_str severity_str code_str
65
-
msg.Message.message))
66
-
messages;
39
+
List.iter (fun msg ->
40
+
let loc_str = match msg.Message.location with
41
+
| Some loc ->
42
+
Printf.sprintf "%s:%d:%d"
43
+
(get_system_id ?system_id loc.Message.system_id)
44
+
loc.line loc.column
45
+
| None ->
46
+
Option.value system_id ~default:"input" ^ ":0:0"
47
+
in
48
+
Buffer.add_string buf (Printf.sprintf "%s: %s [%s]: %s\n"
49
+
loc_str
50
+
(Message.severity_to_string msg.Message.severity)
51
+
(Message.error_code_to_string msg.Message.error_code)
52
+
msg.Message.message)
53
+
) messages;
67
54
Buffer.contents buf
68
55
69
56
let message_to_json ?system_id msg =
70
57
let open Jsont in
71
-
let severity = String (Message.severity_to_string msg.Message.severity, Meta.none) in
72
-
let message_text = String (msg.Message.message, Meta.none) in
73
-
let base = [ (("type", Meta.none), severity); (("message", Meta.none), message_text) ] in
74
-
let with_code =
75
-
(("subType", Meta.none), String (Message.error_code_to_string msg.Message.error_code, Meta.none)) :: base
76
-
in
77
-
let with_location =
78
-
match msg.Message.location with
58
+
let str s = String (s, Meta.none) in
59
+
let num n = Number (float_of_int n, Meta.none) in
60
+
let field name value = ((name, Meta.none), value) in
61
+
62
+
let base = [
63
+
field "type" (str (Message.severity_to_string msg.Message.severity));
64
+
field "message" (str msg.Message.message);
65
+
field "subType" (str (Message.error_code_to_string msg.Message.error_code));
66
+
] in
67
+
68
+
let with_location = match msg.Message.location with
79
69
| Some loc ->
80
-
let line = Number (float_of_int loc.Message.line, Meta.none) in
81
-
let column = Number (float_of_int loc.Message.column, Meta.none) in
82
-
let loc_fields =
83
-
[ (("firstLine", Meta.none), line); (("firstColumn", Meta.none), column) ]
84
-
in
85
-
let loc_fields =
86
-
match loc.Message.end_line with
87
-
| Some el ->
88
-
(("lastLine", Meta.none), Number (float_of_int el, Meta.none)) :: loc_fields
89
-
| None -> loc_fields
90
-
in
91
-
let loc_fields =
92
-
match loc.Message.end_column with
93
-
| Some ec ->
94
-
(("lastColumn", Meta.none), Number (float_of_int ec, Meta.none))
95
-
:: loc_fields
96
-
| None -> loc_fields
97
-
in
98
-
let url =
99
-
match loc.Message.system_id with
100
-
| Some s -> s
101
-
| None -> (
102
-
match system_id with Some s -> s | None -> "input")
103
-
in
104
-
(("url", Meta.none), String (url, Meta.none)) :: loc_fields @ with_code
70
+
let url = get_system_id ?system_id loc.Message.system_id in
71
+
let loc_fields = [
72
+
field "url" (str url);
73
+
field "firstLine" (num loc.line);
74
+
field "firstColumn" (num loc.column);
75
+
] in
76
+
let loc_fields = Option.fold ~none:loc_fields
77
+
~some:(fun el -> field "lastLine" (num el) :: loc_fields)
78
+
loc.Message.end_line in
79
+
let loc_fields = Option.fold ~none:loc_fields
80
+
~some:(fun ec -> field "lastColumn" (num ec) :: loc_fields)
81
+
loc.Message.end_column in
82
+
loc_fields @ base
105
83
| None ->
106
-
let url =
107
-
match system_id with Some s -> s | None -> "input"
108
-
in
109
-
(("url", Meta.none), String (url, Meta.none)) :: with_code
84
+
field "url" (str (Option.value system_id ~default:"input")) :: base
110
85
in
111
-
let with_extract =
112
-
match msg.Message.extract with
113
-
| Some e -> (("extract", Meta.none), String (e, Meta.none)) :: with_location
114
-
| None -> with_location
115
-
in
86
+
87
+
let with_extract = Option.fold ~none:with_location
88
+
~some:(fun e -> field "extract" (str e) :: with_location)
89
+
msg.Message.extract in
90
+
116
91
Object (with_extract, Meta.none)
117
92
118
93
let format_json ?system_id messages =
+10
-10
lib/htmlrw_check/parse_error_bridge.ml
+10
-10
lib/htmlrw_check/parse_error_bridge.ml
···
14
14
| Html5rw.Parse_error_code.Tree_construction_error s ->
15
15
(* Check for control-character/noncharacter/surrogate with codepoint info *)
16
16
(try
17
-
if String.length s > 28 && String.sub s 0 28 = "control-character-in-input-s" then
17
+
if String.starts_with ~prefix:"control-character-in-input-s" s then
18
18
let colon_pos = String.index s ':' in
19
19
let cp_str = String.sub s (colon_pos + 1) (String.length s - colon_pos - 1) in
20
20
let cp = int_of_string ("0x" ^ cp_str) in
21
21
Printf.sprintf "Forbidden code point U+%04x." cp
22
-
else if String.length s > 25 && String.sub s 0 25 = "noncharacter-in-input-str" then
22
+
else if String.starts_with ~prefix:"noncharacter-in-input-str" s then
23
23
let colon_pos = String.index s ':' in
24
24
let cp_str = String.sub s (colon_pos + 1) (String.length s - colon_pos - 1) in
25
25
let cp = int_of_string ("0x" ^ cp_str) in
26
26
Printf.sprintf "Forbidden code point U+%04x." cp
27
-
else if String.length s > 22 && String.sub s 0 22 = "surrogate-in-input-str" then
27
+
else if String.starts_with ~prefix:"surrogate-in-input-str" s then
28
28
let colon_pos = String.index s ':' in
29
29
let cp_str = String.sub s (colon_pos + 1) (String.length s - colon_pos - 1) in
30
30
let cp = int_of_string ("0x" ^ cp_str) in
31
31
Printf.sprintf "Forbidden code point U+%04x." cp
32
32
(* Character reference errors *)
33
-
else if String.length s > 28 && String.sub s 0 28 = "control-character-reference:" then
33
+
else if String.starts_with ~prefix:"control-character-reference:" s then
34
34
let cp_str = String.sub s 28 (String.length s - 28) in
35
35
let cp = int_of_string ("0x" ^ cp_str) in
36
36
if cp = 0x0D then
37
37
"A numeric character reference expanded to carriage return."
38
38
else
39
39
Printf.sprintf "Character reference expands to a control character (U+%04x)." cp
40
-
else if String.length s > 31 && String.sub s 0 31 = "noncharacter-character-referenc" then
40
+
else if String.starts_with ~prefix:"noncharacter-character-referenc" s then
41
41
let colon_pos = String.index s ':' in
42
42
let cp_str = String.sub s (colon_pos + 1) (String.length s - colon_pos - 1) in
43
43
let cp = int_of_string ("0x" ^ cp_str) in
···
49
49
Printf.sprintf "Character reference expands to an astral non-character (U+%05x)." cp
50
50
else
51
51
Printf.sprintf "Character reference expands to a non-character (U+%04x)." cp
52
-
else if String.length s > 36 && String.sub s 0 36 = "character-reference-outside-unicode-" then
52
+
else if String.starts_with ~prefix:"character-reference-outside-unicode-" s then
53
53
"Character reference outside the permissible Unicode range."
54
-
else if String.length s > 27 && String.sub s 0 27 = "surrogate-character-referen" then
54
+
else if String.starts_with ~prefix:"surrogate-character-referen" s then
55
55
let colon_pos = String.index s ':' in
56
56
let cp_str = String.sub s (colon_pos + 1) (String.length s - colon_pos - 1) in
57
57
let cp = int_of_string ("0x" ^ cp_str) in
···
64
64
"End tag \xe2\x80\x9cbr\xe2\x80\x9d."
65
65
else if s = "expected-closing-tag-but-got-eof" then
66
66
"End of file seen and there were open elements."
67
-
else if String.length s > 28 && String.sub s 0 28 = "bad-start-tag-in-head-noscri" then
67
+
else if String.starts_with ~prefix:"bad-start-tag-in-head-noscri" s then
68
68
let colon_pos = String.index s ':' in
69
69
let element = String.sub s (colon_pos + 1) (String.length s - colon_pos - 1) in
70
70
Printf.sprintf "Bad start tag in \xe2\x80\x9c%s\xe2\x80\x9d in \xe2\x80\x9cnoscript\xe2\x80\x9d in \xe2\x80\x9chead\xe2\x80\x9d." element
71
-
else if String.length s > 19 && String.sub s 0 19 = "unexpected-end-tag:" then
71
+
else if String.starts_with ~prefix:"unexpected-end-tag:" s then
72
72
let element = String.sub s 19 (String.length s - 19) in
73
73
Printf.sprintf "Stray end tag \xe2\x80\x9c%s\xe2\x80\x9d." element
74
-
else if String.length s > 19 && String.sub s 0 19 = "start-tag-in-table:" then
74
+
else if String.starts_with ~prefix:"start-tag-in-table:" s then
75
75
let tag = String.sub s 19 (String.length s - 19) in
76
76
Printf.sprintf "Start tag \xe2\x80\x9c%s\xe2\x80\x9d seen in \xe2\x80\x9ctable\xe2\x80\x9d." tag
77
77
else
+17
-39
lib/htmlrw_check/semantic/autofocus_checker.ml
+17
-39
lib/htmlrw_check/semantic/autofocus_checker.ml
···
3
3
Validates that only one element with autofocus attribute exists within
4
4
each dialog or popover context. *)
5
5
6
-
(** Context for tracking autofocus elements. *)
7
6
type context_type = Dialog | Popover
8
7
9
8
type context = {
···
26
25
state.context_stack <- [];
27
26
state.current_depth <- 0
28
27
29
-
(** Check if an attribute list contains a specific attribute. *)
30
-
let has_attr name attrs =
31
-
List.exists (fun (attr_name, _) -> String.lowercase_ascii attr_name = name) attrs
32
-
33
-
(** Get an attribute value from the list. *)
34
-
let get_attr name attrs =
35
-
List.find_map (fun (attr_name, value) ->
36
-
if String.lowercase_ascii attr_name = name then Some value else None
37
-
) attrs
38
-
39
-
(** Check if element has popover attribute. *)
40
-
let has_popover attrs =
41
-
List.exists (fun (attr_name, _) ->
42
-
String.lowercase_ascii attr_name = "popover"
43
-
) attrs
44
-
45
28
let start_element state ~name ~namespace ~attrs collector =
46
-
let name_lower = String.lowercase_ascii name in
47
-
48
-
(* Track depth *)
49
29
state.current_depth <- state.current_depth + 1;
50
30
51
-
if namespace = None then begin
31
+
match namespace with
32
+
| Some _ -> ()
33
+
| None ->
34
+
let name_lower = String.lowercase_ascii name in
35
+
52
36
(* Check if we're entering a dialog or popover context *)
53
-
let enters_context =
54
-
if name_lower = "dialog" then Some Dialog
55
-
else if has_popover attrs then Some Popover
56
-
else None
37
+
let enters_context = match name_lower with
38
+
| "dialog" -> Some Dialog
39
+
| _ when Attr_utils.has_attr "popover" attrs -> Some Popover
40
+
| _ -> None
57
41
in
58
42
59
-
(match enters_context with
60
-
| Some ctx_type ->
43
+
Option.iter (fun ctx_type ->
61
44
let ctx = { context_type = ctx_type; autofocus_count = 0; depth = state.current_depth } in
62
45
state.context_stack <- ctx :: state.context_stack
63
-
| None -> ());
46
+
) enters_context;
64
47
65
48
(* Check for autofocus attribute *)
66
-
if has_attr "autofocus" attrs then begin
67
-
(* Increment count in innermost context if any *)
49
+
if Attr_utils.has_attr "autofocus" attrs then
68
50
match state.context_stack with
69
51
| ctx :: _ ->
70
52
ctx.autofocus_count <- ctx.autofocus_count + 1;
71
53
if ctx.autofocus_count > 1 then
72
54
Message_collector.add_typed collector (`Misc `Multiple_autofocus)
73
55
| [] -> ()
74
-
end
75
-
end
76
56
77
57
let end_element state ~name ~namespace _collector =
78
-
let name_lower = String.lowercase_ascii name in
79
-
80
-
if namespace = None then begin
81
-
(* Pop context if we're leaving one *)
58
+
(match namespace with
59
+
| Some _ -> ()
60
+
| None ->
61
+
let name_lower = String.lowercase_ascii name in
82
62
match state.context_stack with
83
63
| ctx :: rest when ctx.depth = state.current_depth ->
84
-
(* Verify this is the right element *)
85
64
let matches =
86
65
(name_lower = "dialog" && ctx.context_type = Dialog) ||
87
66
(ctx.context_type = Popover)
88
67
in
89
68
if matches then state.context_stack <- rest
90
-
| _ -> ()
91
-
end;
69
+
| _ -> ());
92
70
93
71
state.current_depth <- state.current_depth - 1
94
72
+1
-8
lib/htmlrw_check/semantic/form_checker.ml
+1
-8
lib/htmlrw_check/semantic/form_checker.ml
···
10
10
11
11
let reset _state = ()
12
12
13
-
(** Get the value of an attribute if present. *)
14
-
let get_attr name attrs =
15
-
List.find_map
16
-
(fun (attr_name, value) ->
17
-
if String.equal attr_name name then Some value else None)
18
-
attrs
19
-
20
13
(** Check if autocomplete value contains webauthn token *)
21
14
let contains_webauthn value =
22
15
let lower = String.lowercase_ascii value in
···
42
35
(* Check autocomplete attribute on form elements *)
43
36
match name with
44
37
| "input" | "select" | "textarea" ->
45
-
(match get_attr "autocomplete" attrs with
38
+
(match Attr_utils.get_attr "autocomplete" attrs with
46
39
| Some autocomplete_value ->
47
40
check_autocomplete_value autocomplete_value name collector
48
41
| None -> ())
+3
-8
lib/htmlrw_check/semantic/lang_detecting_checker.ml
+3
-8
lib/htmlrw_check/semantic/lang_detecting_checker.ml
···
60
60
let n = String.lowercase_ascii name in
61
61
n = "svg" || n = "math"
62
62
63
-
let get_attr name attrs =
64
-
List.find_map (fun (n, v) ->
65
-
if String.lowercase_ascii n = name then Some v else None
66
-
) attrs
67
-
68
63
let get_lang_code lang =
69
64
(* Extract primary language subtag *)
70
65
match String.split_on_char '-' lang with
···
226
221
let ns = Option.value namespace ~default:"" in
227
222
228
223
if name_lower = "html" then begin
229
-
state.html_lang <- get_attr "lang" attrs;
230
-
state.html_dir <- get_attr "dir" attrs;
224
+
state.html_lang <- Attr_utils.get_attr "lang" attrs;
225
+
state.html_dir <- Attr_utils.get_attr "dir" attrs;
231
226
(* TODO: get line/column from locator *)
232
227
state.html_locator <- Some (1, 1)
233
228
end
···
244
239
state.skip_depth <- state.skip_depth + 1
245
240
else begin
246
241
(* Check for different lang attribute *)
247
-
match get_attr "lang" attrs with
242
+
match Attr_utils.get_attr "lang" attrs with
248
243
| Some lang when state.html_lang <> Some lang ->
249
244
state.skip_depth <- state.skip_depth + 1
250
245
| _ -> ()
+1
-7
lib/htmlrw_check/semantic/option_checker.ml
+1
-7
lib/htmlrw_check/semantic/option_checker.ml
···
22
22
state.option_stack <- [];
23
23
state.in_template <- 0
24
24
25
-
(** Get attribute value if present. *)
26
-
let get_attr name attrs =
27
-
List.find_map (fun (attr_name, value) ->
28
-
if String.lowercase_ascii attr_name = name then Some value else None
29
-
) attrs
30
-
31
25
let start_element state ~name ~namespace ~attrs collector =
32
26
let name_lower = String.lowercase_ascii name in
33
27
···
36
30
if name_lower = "template" then
37
31
state.in_template <- state.in_template + 1
38
32
else if state.in_template = 0 && name_lower = "option" then begin
39
-
let label_opt = get_attr "label" attrs in
33
+
let label_opt = Attr_utils.get_attr "label" attrs in
40
34
let has_label = label_opt <> None in
41
35
let label_empty = match label_opt with
42
36
| Some v -> String.trim v = ""
+29
-40
lib/htmlrw_check/semantic/required_attr_checker.ml
+29
-40
lib/htmlrw_check/semantic/required_attr_checker.ml
···
13
13
state._in_figure <- false;
14
14
state.in_a_with_href <- false
15
15
16
-
(** Check if an attribute list contains a specific attribute. *)
17
-
let has_attr name attrs =
18
-
List.exists (fun (attr_name, _) -> String.equal attr_name name) attrs
19
-
20
-
(** Get the value of an attribute if present. *)
21
-
let get_attr name attrs =
22
-
List.find_map
23
-
(fun (attr_name, value) ->
24
-
if String.equal attr_name name then Some value else None)
25
-
attrs
26
-
27
16
let check_img_element state attrs collector =
28
17
(* Check for required src OR srcset attribute *)
29
-
if not (has_attr "src" attrs) && not (has_attr "srcset" attrs) then
18
+
if not (Attr_utils.has_attr "src" attrs) && not (Attr_utils.has_attr "srcset" attrs) then
30
19
Message_collector.add_typed collector (`Img `Missing_src_or_srcset);
31
20
32
21
(* Check for alt attribute - always required *)
33
-
if not (has_attr "alt" attrs) then
22
+
if not (Attr_utils.has_attr "alt" attrs) then
34
23
Message_collector.add_typed collector (`Img `Missing_alt);
35
24
36
25
(* Check ismap requires 'a' ancestor with href *)
37
-
if has_attr "ismap" attrs && not state.in_a_with_href then
26
+
if Attr_utils.has_attr "ismap" attrs && not state.in_a_with_href then
38
27
Message_collector.add_typed collector (`Img `Ismap_needs_href)
39
28
40
29
let check_area_element attrs collector =
41
30
(* area with href requires alt *)
42
-
if has_attr "href" attrs && not (has_attr "alt" attrs) then
31
+
if Attr_utils.has_attr "href" attrs && not (Attr_utils.has_attr "alt" attrs) then
43
32
Message_collector.add_typed collector
44
33
(`Attr (`Missing (`Elem "area", `Attr "alt")))
45
34
46
35
let check_input_element attrs collector =
47
-
match get_attr "type" attrs with
36
+
match Attr_utils.get_attr "type" attrs with
48
37
| Some "image" ->
49
38
(* input[type=image] requires alt *)
50
-
if not (has_attr "alt" attrs) then
39
+
if not (Attr_utils.has_attr "alt" attrs) then
51
40
Message_collector.add_typed collector
52
41
(`Attr (`Missing (`Elem "input", `Attr "alt")))
53
42
| Some "hidden" ->
54
43
(* input[type=hidden] should not have required attribute *)
55
-
if has_attr "required" attrs then
44
+
if Attr_utils.has_attr "required" attrs then
56
45
Message_collector.add_typed collector
57
46
(`Attr (`Not_allowed_when (`Attr "required", `Elem "input", `Condition "the type attribute is hidden")))
58
47
| Some "file" ->
59
48
(* input[type=file] should not have value attribute *)
60
-
if has_attr "value" attrs then
49
+
if Attr_utils.has_attr "value" attrs then
61
50
Message_collector.add_typed collector
62
51
(`Attr (`Not_allowed_when (`Attr "value", `Elem "input", `Condition "the type attribute is file")))
63
52
| _ -> ()
64
53
65
54
let check_script_element attrs _collector =
66
55
(* script requires src OR text content *)
67
-
if not (has_attr "src" attrs) then
56
+
if not (Attr_utils.has_attr "src" attrs) then
68
57
(* We can't check for text content here; that would need to be done
69
58
in end_element or with state tracking *)
70
59
()
···
76
65
- http-equiv AND content
77
66
- property AND content (RDFa)
78
67
- itemprop AND content (microdata) *)
79
-
let has_charset = has_attr "charset" attrs in
80
-
let has_name = has_attr "name" attrs in
81
-
let has_content = has_attr "content" attrs in
82
-
let has_http_equiv = has_attr "http-equiv" attrs in
83
-
let has_property = has_attr "property" attrs in
84
-
let has_itemprop = has_attr "itemprop" attrs in
68
+
let has_charset = Attr_utils.has_attr "charset" attrs in
69
+
let has_name = Attr_utils.has_attr "name" attrs in
70
+
let has_content = Attr_utils.has_attr "content" attrs in
71
+
let has_http_equiv = Attr_utils.has_attr "http-equiv" attrs in
72
+
let has_property = Attr_utils.has_attr "property" attrs in
73
+
let has_itemprop = Attr_utils.has_attr "itemprop" attrs in
85
74
86
75
let valid =
87
76
has_charset
···
100
89
101
90
let check_link_element attrs collector =
102
91
(* link[rel="stylesheet"] requires href *)
103
-
match get_attr "rel" attrs with
92
+
match Attr_utils.get_attr "rel" attrs with
104
93
| Some rel when String.equal rel "stylesheet" ->
105
-
if not (has_attr "href" attrs) then
94
+
if not (Attr_utils.has_attr "href" attrs) then
106
95
Message_collector.add_typed collector (`Link `Missing_href)
107
96
| _ -> ()
108
97
109
98
let check_a_element attrs collector =
110
99
(* a[download] requires href *)
111
-
if has_attr "download" attrs && not (has_attr "href" attrs) then
100
+
if Attr_utils.has_attr "download" attrs && not (Attr_utils.has_attr "href" attrs) then
112
101
Message_collector.add_typed collector
113
102
(`Attr (`Missing (`Elem "a", `Attr "href")))
114
103
115
104
let check_map_element attrs collector =
116
105
(* map requires name *)
117
-
if not (has_attr "name" attrs) then
106
+
if not (Attr_utils.has_attr "name" attrs) then
118
107
Message_collector.add_typed collector
119
108
(`Attr (`Missing (`Elem "map", `Attr "name")))
120
109
121
110
let check_object_element attrs collector =
122
111
(* object requires data attribute (or type attribute alone is not sufficient) *)
123
-
let has_data = has_attr "data" attrs in
124
-
let has_type = has_attr "type" attrs in
112
+
let has_data = Attr_utils.has_attr "data" attrs in
113
+
let has_type = Attr_utils.has_attr "type" attrs in
125
114
if not has_data && has_type then
126
115
Message_collector.add_typed collector
127
116
(`Attr (`Missing (`Elem "object", `Attr "data")))
128
117
129
118
let check_popover_element element_name attrs collector =
130
119
(* popover attribute must have valid value *)
131
-
match get_attr "popover" attrs with
120
+
match Attr_utils.get_attr "popover" attrs with
132
121
| Some value ->
133
122
let value_lower = String.lowercase_ascii value in
134
123
(* Valid values: empty string, auto, manual, hint *)
···
141
130
142
131
let check_meter_element attrs collector =
143
132
(* meter requires value attribute *)
144
-
if not (has_attr "value" attrs) then
133
+
if not (Attr_utils.has_attr "value" attrs) then
145
134
Message_collector.add_typed collector
146
135
(`Attr (`Missing (`Elem "meter", `Attr "value")))
147
136
else begin
148
137
(* Validate min <= value constraint *)
149
-
match get_attr "value" attrs, get_attr "min" attrs with
138
+
match Attr_utils.get_attr "value" attrs, Attr_utils.get_attr "min" attrs with
150
139
| Some value_str, Some min_str ->
151
140
(try
152
141
let value = float_of_string value_str in
···
162
151
163
152
let check_progress_element attrs collector =
164
153
(* Validate progress value constraints *)
165
-
match get_attr "value" attrs with
154
+
match Attr_utils.get_attr "value" attrs with
166
155
| None -> () (* value is optional *)
167
156
| Some value_str ->
168
157
(try
169
158
let value = float_of_string value_str in
170
-
let max_val = match get_attr "max" attrs with
159
+
let max_val = match Attr_utils.get_attr "max" attrs with
171
160
| None -> 1.0 (* default max is 1 *)
172
161
| Some max_str -> (try float_of_string max_str with _ -> 1.0)
173
162
in
174
163
if value > max_val then
175
164
let q s = "\xe2\x80\x9c" ^ s ^ "\xe2\x80\x9d" in
176
165
(* Check which message to use based on whether max is present *)
177
-
if has_attr "max" attrs then
166
+
if Attr_utils.has_attr "max" attrs then
178
167
Message_collector.add_typed collector
179
168
(`Generic (
180
169
(* Note: double space before "value" matches Nu validator quirk *)
···
198
187
| "link" -> check_link_element attrs collector
199
188
| "a" ->
200
189
check_a_element attrs collector;
201
-
if has_attr "href" attrs then state.in_a_with_href <- true
190
+
if Attr_utils.has_attr "href" attrs then state.in_a_with_href <- true
202
191
| "map" -> check_map_element attrs collector
203
192
| "object" -> check_object_element attrs collector
204
193
| "meter" -> check_meter_element attrs collector
···
206
195
| "figure" -> state._in_figure <- true
207
196
| _ ->
208
197
(* Check popover attribute on any element *)
209
-
if has_attr "popover" attrs then check_popover_element name attrs collector
198
+
if Attr_utils.has_attr "popover" attrs then check_popover_element name attrs collector
210
199
211
200
let end_element state ~name ~namespace:_ _collector =
212
201
match name with
+1
-1
lib/htmlrw_check/specialized/aria_checker.ml
+1
-1
lib/htmlrw_check/specialized/aria_checker.ml
···
491
491
if name_lower = "br" || name_lower = "wbr" then begin
492
492
List.iter (fun (attr_name, _) ->
493
493
let attr_lower = String.lowercase_ascii attr_name in
494
-
if String.length attr_lower > 5 && String.sub attr_lower 0 5 = "aria-" &&
494
+
if String.starts_with ~prefix:"aria-" attr_lower &&
495
495
attr_lower <> "aria-hidden" then
496
496
Message_collector.add_typed collector
497
497
(`Attr (`Not_allowed (`Attr attr_name, `Elem name)))
+116
-97
lib/htmlrw_check/specialized/attr_restrictions_checker.ml
+116
-97
lib/htmlrw_check/specialized/attr_restrictions_checker.ml
···
41
41
let create () = { is_xhtml = false }
42
42
let reset state = state.is_xhtml <- false
43
43
44
-
(** Check if an attribute list contains a specific attribute. *)
45
-
let has_attr name attrs =
46
-
List.exists (fun (attr_name, _) -> String.lowercase_ascii attr_name = name) attrs
47
-
48
-
(** Get an attribute value from the list. *)
49
-
let get_attr name attrs =
50
-
List.find_map (fun (attr_name, value) ->
51
-
if String.lowercase_ascii attr_name = name then Some value else None
52
-
) attrs
53
-
54
44
(** Input types that allow the list attribute. *)
55
45
let input_types_allowing_list = [
56
46
"color"; "date"; "datetime-local"; "email"; "month"; "number";
···
67
57
68
58
(* Detect XHTML mode from xmlns attribute on html element *)
69
59
if name_lower = "html" then begin
70
-
let xmlns_value = get_attr "xmlns" attrs in
71
-
match xmlns_value with
60
+
match Attr_utils.get_attr "xmlns" attrs with
72
61
| Some "http://www.w3.org/1999/xhtml" -> state.is_xhtml <- true
73
62
| _ -> ()
74
63
end;
75
64
76
65
(* Check HTML element attribute restrictions *)
77
-
if namespace = None then begin
66
+
(match namespace with
67
+
| Some _ -> ()
68
+
| None ->
78
69
match List.assoc_opt name_lower disallowed_attrs_html with
79
70
| Some disallowed ->
80
71
List.iter (fun attr ->
81
-
if has_attr attr attrs then
72
+
if Attr_utils.has_attr attr attrs then
82
73
report_disallowed_attr name_lower attr collector
83
74
) disallowed
84
-
| None -> ()
85
-
end;
75
+
| None -> ());
86
76
87
77
(* Check for xml:base attribute - not allowed in HTML *)
88
-
if namespace = None && name_lower = "html" then begin
89
-
if has_attr "xml:base" attrs then
78
+
(match namespace with
79
+
| Some _ -> ()
80
+
| None when name_lower = "html" ->
81
+
if Attr_utils.has_attr "xml:base" attrs then
90
82
report_disallowed_attr name_lower "xml:base" collector
91
-
end;
83
+
| None -> ());
92
84
93
85
(* Check for xmlns:* prefixed attributes - not allowed in HTML *)
94
86
(* Standard xmlns declarations are allowed but custom prefixes are not *)
95
-
if namespace = None then begin
87
+
(match namespace with
88
+
| Some _ -> ()
89
+
| None ->
96
90
List.iter (fun (attr_name, _) ->
97
91
let attr_lower = String.lowercase_ascii attr_name in
98
-
if String.length attr_lower > 6 && String.sub attr_lower 0 6 = "xmlns:" then begin
92
+
if String.starts_with ~prefix:"xmlns:" attr_lower then begin
99
93
let prefix = String.sub attr_lower 6 (String.length attr_lower - 6) in
100
94
(* Only xmlns:xlink (with correct value) and xmlns:xml are allowed *)
101
95
if prefix <> "xlink" && prefix <> "xml" then
102
96
Message_collector.add_typed collector
103
97
(`Attr (`Not_allowed_here (`Attr attr_name)))
104
98
end
105
-
) attrs
106
-
end;
99
+
) attrs);
107
100
108
101
(* Check SVG element restrictions - works in both HTML-embedded and XHTML SVG *)
109
102
(* xml:id is never valid on SVG elements in HTML5 *)
110
103
if List.mem name_lower svg_no_xml_id then begin
111
-
if has_attr "xml:id" attrs then
104
+
if Attr_utils.has_attr "xml:id" attrs then
112
105
report_disallowed_attr name_lower "xml:id" collector
113
106
end;
114
107
115
108
(* SVG feConvolveMatrix requires order attribute *)
116
109
if name_lower = "feconvolvematrix" then begin
117
-
if not (has_attr "order" attrs) then
110
+
if not (Attr_utils.has_attr "order" attrs) then
118
111
Message_collector.add_typed collector
119
112
(`Svg (`Missing_attr (`Elem "feConvolveMatrix", `Attr "order")))
120
113
end;
121
114
122
115
(* Validate style type attribute - must be "text/css" or omitted *)
123
-
if namespace = None && name_lower = "style" then begin
116
+
(match namespace with
117
+
| Some _ -> ()
118
+
| None when name_lower = "style" ->
124
119
List.iter (fun (attr_name, attr_value) ->
125
120
let attr_lower = String.lowercase_ascii attr_name in
126
121
if attr_lower = "type" then begin
···
129
124
Message_collector.add_typed collector (`Misc `Style_type_invalid)
130
125
end
131
126
) attrs
132
-
end;
127
+
| None -> ());
133
128
134
129
(* Validate object element requires data or type attribute *)
135
-
if namespace = None && name_lower = "object" then begin
136
-
let has_data = has_attr "data" attrs in
137
-
let has_type = has_attr "type" attrs in
130
+
(match namespace with
131
+
| Some _ -> ()
132
+
| None when name_lower = "object" ->
133
+
let has_data = Attr_utils.has_attr "data" attrs in
134
+
let has_type = Attr_utils.has_attr "type" attrs in
138
135
if not has_data && not has_type then
139
136
Message_collector.add_typed collector
140
137
(`Attr (`Missing (`Elem "object", `Attr "data")))
141
-
end;
138
+
| None -> ());
142
139
143
140
(* Validate link imagesizes/imagesrcset attributes *)
144
-
if namespace = None && name_lower = "link" then begin
145
-
let has_imagesizes = has_attr "imagesizes" attrs in
146
-
let has_imagesrcset = has_attr "imagesrcset" attrs in
147
-
let rel_value = get_attr "rel" attrs in
148
-
let as_value = get_attr "as" attrs in
141
+
(match namespace with
142
+
| Some _ -> ()
143
+
| None when name_lower = "link" ->
144
+
let has_imagesizes = Attr_utils.has_attr "imagesizes" attrs in
145
+
let has_imagesrcset = Attr_utils.has_attr "imagesrcset" attrs in
146
+
let rel_value = Attr_utils.get_attr "rel" attrs in
147
+
let as_value = Attr_utils.get_attr "as" attrs in
149
148
150
149
(* imagesizes requires imagesrcset *)
151
150
if has_imagesizes && not has_imagesrcset then
···
175
174
if not rel_is_preload then
176
175
Message_collector.add_typed collector (`Link `As_requires_preload)
177
176
| None -> ())
178
-
end;
177
+
| None -> ());
179
178
180
179
(* Validate img usemap attribute - must be hash-name reference with content *)
181
-
if namespace = None && name_lower = "img" then begin
180
+
(match namespace with
181
+
| Some _ -> ()
182
+
| None when name_lower = "img" ->
182
183
List.iter (fun (attr_name, attr_value) ->
183
184
let attr_lower = String.lowercase_ascii attr_name in
184
185
if attr_lower = "usemap" then begin
···
189
190
attr_value attr_name name))))
190
191
end
191
192
) attrs
192
-
end;
193
+
| None -> ());
193
194
194
195
(* Validate embed type attribute - must be valid MIME type *)
195
-
if namespace = None && name_lower = "embed" then begin
196
+
(match namespace with
197
+
| Some _ -> ()
198
+
| None when name_lower = "embed" ->
196
199
List.iter (fun (attr_name, attr_value) ->
197
200
let attr_lower = String.lowercase_ascii attr_name in
198
201
if attr_lower = "type" then begin
···
205
208
attr_value attr_name name msg))))
206
209
end
207
210
) attrs
208
-
end;
211
+
| None -> ());
209
212
210
213
(* Validate width/height on embed and img - must be non-negative integers *)
211
-
if namespace = None && (name_lower = "embed" || name_lower = "img" ||
212
-
name_lower = "video" || name_lower = "canvas" ||
213
-
name_lower = "iframe" || name_lower = "source") then begin
214
+
let is_dimension_element = name_lower = "embed" || name_lower = "img" ||
215
+
name_lower = "video" || name_lower = "canvas" ||
216
+
name_lower = "iframe" || name_lower = "source" in
217
+
(match namespace with
218
+
| Some _ -> ()
219
+
| None when is_dimension_element ->
214
220
List.iter (fun (attr_name, attr_value) ->
215
221
let attr_lower = String.lowercase_ascii attr_name in
216
222
if attr_lower = "width" || attr_lower = "height" then begin
···
255
261
end
256
262
end
257
263
) attrs
258
-
end;
264
+
| None -> ());
259
265
260
266
(* Validate area[shape=default] cannot have coords *)
261
-
if namespace = None && name_lower = "area" then begin
262
-
let shape_value = get_attr "shape" attrs in
263
-
match shape_value with
267
+
(match namespace with
268
+
| Some _ -> ()
269
+
| None when name_lower = "area" ->
270
+
(match Attr_utils.get_attr "shape" attrs with
264
271
| Some s when String.lowercase_ascii (String.trim s) = "default" ->
265
-
if has_attr "coords" attrs then
272
+
if Attr_utils.has_attr "coords" attrs then
266
273
Message_collector.add_typed collector
267
274
(`Attr (`Not_allowed (`Attr "coords", `Elem "area")))
268
-
| _ -> ()
269
-
end;
275
+
| _ -> ())
276
+
| None -> ());
270
277
271
278
(* Validate bdo element requires dir attribute, and dir cannot be "auto" *)
272
-
if namespace = None && name_lower = "bdo" then begin
273
-
let dir_value = get_attr "dir" attrs in
274
-
match dir_value with
279
+
(match namespace with
280
+
| Some _ -> ()
281
+
| None when name_lower = "bdo" ->
282
+
(match Attr_utils.get_attr "dir" attrs with
275
283
| None ->
276
284
Message_collector.add_typed collector (`Misc `Bdo_missing_dir)
277
285
| Some v when String.lowercase_ascii (String.trim v) = "auto" ->
278
286
Message_collector.add_typed collector (`Misc `Bdo_dir_auto)
279
-
| _ -> ()
280
-
end;
287
+
| _ -> ())
288
+
| None -> ());
281
289
282
290
(* Validate input list attribute - only allowed for certain types *)
283
-
if namespace = None && name_lower = "input" then begin
284
-
if has_attr "list" attrs then begin
285
-
let input_type = match get_attr "type" attrs with
286
-
| Some t -> String.lowercase_ascii (String.trim t)
287
-
| None -> "text" (* default type is text *)
288
-
in
291
+
(match namespace with
292
+
| Some _ -> ()
293
+
| None when name_lower = "input" ->
294
+
if Attr_utils.has_attr "list" attrs then begin
295
+
let input_type = Attr_utils.get_attr_or "type" ~default:"text" attrs
296
+
|> String.trim |> String.lowercase_ascii in
289
297
if not (List.mem input_type input_types_allowing_list) then
290
298
Message_collector.add_typed collector (`Input `List_not_allowed)
291
299
end
292
-
end;
300
+
| None -> ());
293
301
294
302
(* Validate data-* attributes *)
295
-
if namespace = None then begin
303
+
(match namespace with
304
+
| Some _ -> ()
305
+
| None ->
296
306
List.iter (fun (attr_name, _) ->
297
307
let attr_lower = String.lowercase_ascii attr_name in
298
308
(* Check if it starts with "data-" *)
299
-
if String.length attr_lower >= 5 && String.sub attr_lower 0 5 = "data-" then begin
309
+
if String.starts_with ~prefix:"data-" attr_lower then begin
300
310
let after_prefix = String.sub attr_lower 5 (String.length attr_lower - 5) in
301
311
(* Check if it's exactly "data-" with nothing after *)
302
312
if after_prefix = "" then
···
306
316
Message_collector.add_typed collector
307
317
(`Attr (`Data_invalid_name (`Reason "must be XML 1.0 4th ed. plus Namespaces NCNames")))
308
318
end
309
-
) attrs
310
-
end;
319
+
) attrs);
311
320
312
321
(* Validate xml:lang must have matching lang attribute - only in HTML mode, not XHTML *)
313
-
if namespace = None && not state.is_xhtml then begin
314
-
let xmllang_value = get_attr "xml:lang" attrs in
315
-
let lang_value = get_attr "lang" attrs in
316
-
match xmllang_value with
322
+
(match namespace with
323
+
| Some _ -> ()
324
+
| None when not state.is_xhtml ->
325
+
let xmllang_value = Attr_utils.get_attr "xml:lang" attrs in
326
+
let lang_value = Attr_utils.get_attr "lang" attrs in
327
+
(match xmllang_value with
317
328
| Some xmllang ->
318
329
(match lang_value with
319
330
| None ->
320
-
(* xml:lang without lang attribute *)
321
331
Message_collector.add_typed collector (`I18n `Xml_lang_without_lang)
322
332
| Some lang when String.lowercase_ascii lang <> String.lowercase_ascii xmllang ->
323
-
(* xml:lang and lang have different values - "lang present with same value" message *)
324
333
Message_collector.add_typed collector (`I18n `Xml_lang_without_lang)
325
334
| _ -> ())
326
-
| None -> ()
327
-
end;
335
+
| None -> ())
336
+
| None -> ());
328
337
329
338
(* Validate spellcheck attribute - must be "true" or "false" or empty *)
330
-
if namespace = None then begin
339
+
(match namespace with
340
+
| Some _ -> ()
341
+
| None ->
331
342
List.iter (fun (attr_name, attr_value) ->
332
343
let attr_lower = String.lowercase_ascii attr_name in
333
344
if attr_lower = "spellcheck" then begin
···
336
347
Message_collector.add_typed collector
337
348
(`Attr (`Bad_value (`Elem name, `Attr attr_name, `Value attr_value, `Reason "")))
338
349
end
339
-
) attrs
340
-
end;
350
+
) attrs);
341
351
342
352
(* Validate enterkeyhint attribute - must be one of specific values *)
343
-
if namespace = None then begin
353
+
(match namespace with
354
+
| Some _ -> ()
355
+
| None ->
344
356
let valid_enterkeyhint = ["enter"; "done"; "go"; "next"; "previous"; "search"; "send"] in
345
357
List.iter (fun (attr_name, attr_value) ->
346
358
let attr_lower = String.lowercase_ascii attr_name in
···
350
362
Message_collector.add_typed collector
351
363
(`Attr (`Bad_value (`Elem name, `Attr attr_name, `Value attr_value, `Reason "")))
352
364
end
353
-
) attrs
354
-
end;
365
+
) attrs);
355
366
356
367
(* Validate headingoffset attribute - must be a number between 0 and 8 *)
357
-
if namespace = None then begin
368
+
(match namespace with
369
+
| Some _ -> ()
370
+
| None ->
358
371
List.iter (fun (attr_name, attr_value) ->
359
372
let attr_lower = String.lowercase_ascii attr_name in
360
373
if attr_lower = "headingoffset" then begin
···
370
383
if not is_valid then
371
384
Message_collector.add_typed collector (`Misc `Headingoffset_invalid)
372
385
end
373
-
) attrs
374
-
end;
386
+
) attrs);
375
387
376
388
(* Validate accesskey attribute - each key label must be a single code point *)
377
-
if namespace = None then begin
389
+
(match namespace with
390
+
| Some _ -> ()
391
+
| None ->
378
392
List.iter (fun (attr_name, attr_value) ->
379
393
let attr_lower = String.lowercase_ascii attr_name in
380
394
if attr_lower = "accesskey" then begin
···
419
433
in
420
434
find_duplicates [] keys
421
435
end
422
-
) attrs
423
-
end;
436
+
) attrs);
424
437
425
438
(* Validate that command and popovertarget cannot have aria-expanded *)
426
-
if namespace = None && name_lower = "button" then begin
427
-
let has_command = has_attr "command" attrs in
428
-
let has_popovertarget = has_attr "popovertarget" attrs in
429
-
let has_aria_expanded = has_attr "aria-expanded" attrs in
439
+
(match namespace with
440
+
| Some _ -> ()
441
+
| None when name_lower = "button" ->
442
+
let has_command = Attr_utils.has_attr "command" attrs in
443
+
let has_popovertarget = Attr_utils.has_attr "popovertarget" attrs in
444
+
let has_aria_expanded = Attr_utils.has_attr "aria-expanded" attrs in
430
445
431
446
if has_command && has_aria_expanded then
432
447
Message_collector.add_typed collector
···
437
452
Message_collector.add_typed collector
438
453
(`Attr (`Not_allowed_when (`Attr "aria-expanded", `Elem name,
439
454
`Condition "a \xe2\x80\x9cpopovertarget\xe2\x80\x9d attribute")))
440
-
end;
455
+
| None -> ());
441
456
442
457
(* Note: data-* uppercase check requires XML parsing which preserves case.
443
458
The HTML5 parser normalizes attribute names to lowercase, so this check
···
446
461
ignore state.is_xhtml;
447
462
448
463
(* Validate media attribute on link, style, source elements *)
449
-
if namespace = None && (name_lower = "link" || name_lower = "style" || name_lower = "source") then begin
464
+
let is_media_element = name_lower = "link" || name_lower = "style" || name_lower = "source" in
465
+
(match namespace with
466
+
| Some _ -> ()
467
+
| None when is_media_element ->
450
468
List.iter (fun (attr_name, attr_value) ->
451
469
let attr_lower = String.lowercase_ascii attr_name in
452
470
if attr_lower = "media" then begin
···
462
480
end
463
481
end
464
482
) attrs
465
-
end;
483
+
| None -> ());
466
484
467
485
(* Validate RDFa prefix attribute - space-separated list of prefix:iri pairs *)
468
-
if namespace = None then begin
486
+
(match namespace with
487
+
| Some _ -> ()
488
+
| None ->
469
489
List.iter (fun (attr_name, attr_value) ->
470
490
let attr_lower = String.lowercase_ascii attr_name in
471
491
if attr_lower = "prefix" then begin
···
487
507
end
488
508
end
489
509
end
490
-
) attrs
491
-
end
510
+
) attrs)
492
511
493
512
let end_element _state ~name:_ ~namespace:_ _collector = ()
494
513
let characters _state _text _collector = ()
+5
-12
lib/htmlrw_check/specialized/base_checker.ml
+5
-12
lib/htmlrw_check/specialized/base_checker.ml
···
11
11
let reset state =
12
12
state.seen_link_or_script <- false
13
13
14
-
(** Check if an attribute list contains a specific attribute. *)
15
-
let has_attr name attrs =
16
-
List.exists (fun (attr_name, _) -> String.lowercase_ascii attr_name = name) attrs
17
-
18
14
let start_element state ~name ~namespace ~attrs collector =
19
-
if namespace <> None then ()
20
-
else begin
21
-
let name_lower = String.lowercase_ascii name in
22
-
match name_lower with
15
+
match namespace with
16
+
| Some _ -> ()
17
+
| None ->
18
+
match String.lowercase_ascii name with
23
19
| "link" | "script" ->
24
20
state.seen_link_or_script <- true
25
21
| "base" ->
26
22
if state.seen_link_or_script then
27
23
Message_collector.add_typed collector (`Misc `Base_after_link_script);
28
24
(* base element must have href or target attribute *)
29
-
let has_href = has_attr "href" attrs in
30
-
let has_target = has_attr "target" attrs in
31
-
if not has_href && not has_target then
25
+
if not (Attr_utils.has_attr "href" attrs || Attr_utils.has_attr "target" attrs) then
32
26
Message_collector.add_typed collector (`Misc `Base_missing_href_or_target)
33
27
| _ -> ()
34
-
end
35
28
36
29
let end_element _state ~name:_ ~namespace:_ _collector = ()
37
30
let characters _state _text _collector = ()
+1
-6
lib/htmlrw_check/specialized/dl_checker.ml
+1
-6
lib/htmlrw_check/specialized/dl_checker.ml
···
57
57
| ctx :: _ -> Some ctx
58
58
| [] -> None
59
59
60
-
let get_attr name attrs =
61
-
List.find_map (fun (n, v) ->
62
-
if String.lowercase_ascii n = name then Some v else None
63
-
) attrs
64
-
65
60
let start_element state ~name ~namespace ~attrs collector =
66
61
let name_lower = String.lowercase_ascii name in
67
62
···
115
110
Message_collector.add_typed collector
116
111
(`Element (`Not_allowed_as_child (`Child "div", `Parent "dl")));
117
112
(* Check that role is only presentation or none *)
118
-
(match get_attr "role" attrs with
113
+
(match Attr_utils.get_attr "role" attrs with
119
114
| Some role_value ->
120
115
let role_lower = String.lowercase_ascii (String.trim role_value) in
121
116
if role_lower <> "presentation" && role_lower <> "none" then
+42
-76
lib/htmlrw_check/specialized/picture_checker.ml
+42
-76
lib/htmlrw_check/specialized/picture_checker.ml
···
66
66
state.always_matching_is_media_all <- false;
67
67
state.always_matching_is_media_empty <- false
68
68
69
-
(** Check if an attribute list contains a specific attribute. *)
70
-
let has_attr name attrs =
71
-
List.exists (fun (attr_name, _) -> String.lowercase_ascii attr_name = name) attrs
72
-
73
69
(** Report disallowed attribute error *)
74
70
let report_disallowed_attr element attr collector =
75
71
Message_collector.add_typed collector
···
80
76
Message_collector.add_typed collector
81
77
(`Element (`Not_allowed_as_child (`Child child, `Parent parent)))
82
78
79
+
let check_disallowed_attrs element disallowed_list attrs collector =
80
+
List.iter (fun attr ->
81
+
if Attr_utils.has_attr attr attrs then
82
+
report_disallowed_attr element attr collector
83
+
) disallowed_list
84
+
83
85
let check_picture_attrs attrs collector =
84
-
List.iter (fun disallowed ->
85
-
if has_attr disallowed attrs then
86
-
report_disallowed_attr "picture" disallowed collector
87
-
) disallowed_picture_attrs
86
+
check_disallowed_attrs "picture" disallowed_picture_attrs attrs collector
88
87
89
88
let check_source_attrs_in_picture attrs collector =
90
-
List.iter (fun disallowed ->
91
-
if has_attr disallowed attrs then
92
-
report_disallowed_attr "source" disallowed collector
93
-
) disallowed_source_attrs_in_picture;
94
-
(* source in picture requires srcset *)
95
-
if not (has_attr "srcset" attrs) then
96
-
Message_collector.add_typed collector
97
-
(`Srcset `Source_missing_srcset)
89
+
check_disallowed_attrs "source" disallowed_source_attrs_in_picture attrs collector;
90
+
if not (Attr_utils.has_attr "srcset" attrs) then
91
+
Message_collector.add_typed collector (`Srcset `Source_missing_srcset)
98
92
99
93
let check_img_attrs attrs collector =
100
-
List.iter (fun disallowed ->
101
-
if has_attr disallowed attrs then
102
-
report_disallowed_attr "img" disallowed collector
103
-
) disallowed_img_attrs
94
+
check_disallowed_attrs "img" disallowed_img_attrs attrs collector
104
95
105
96
let start_element state ~name ~namespace ~attrs collector =
106
97
let name_lower = String.lowercase_ascii name in
···
112
103
end;
113
104
114
105
(* Rest of checks only apply to HTML namespace elements *)
115
-
if namespace = None then begin
116
-
match name_lower with
106
+
match namespace with
107
+
| Some _ -> ()
108
+
| None ->
109
+
(match name_lower with
117
110
| "picture" ->
118
111
(* Check if picture is in a disallowed parent context *)
119
112
(match state.parent_stack with
···
124
117
check_picture_attrs attrs collector;
125
118
state.in_picture <- true;
126
119
state.has_img_in_picture <- false;
127
-
state.picture_depth <- 0; (* Will be incremented to 1 at end of function *)
120
+
state.picture_depth <- 0;
128
121
state.children_in_picture <- [];
129
122
state.last_was_img <- false;
130
123
state.has_source_after_img <- false;
···
136
129
state.children_in_picture <- "source" :: state.children_in_picture;
137
130
if state.last_was_img then
138
131
state.has_source_after_img <- true;
139
-
(* Check for always-matching source followed by another source *)
140
132
if state.has_always_matching_source then
141
133
state.source_after_always_matching <- true;
142
-
(* A source is "always matching" if it has:
143
-
- no media and no type attribute, OR
144
-
- media attribute with empty/whitespace-only value, OR
145
-
- media="all" (with optional whitespace) *)
146
-
let media_value = List.find_map (fun (attr_name, v) ->
147
-
if String.lowercase_ascii attr_name = "media" then Some v else None
148
-
) attrs in
149
-
let has_type = has_attr "type" attrs in
134
+
(* A source is "always matching" if it has no media/type, or media="" or media="all" *)
135
+
let media_value = Attr_utils.get_attr "media" attrs in
136
+
let has_type = Attr_utils.has_attr "type" attrs in
150
137
let is_media_all = match media_value with
151
138
| Some v -> String.lowercase_ascii (String.trim v) = "all"
152
-
| None -> false
153
-
in
139
+
| None -> false in
154
140
let is_media_empty = match media_value with
155
141
| Some v -> String.trim v = ""
156
-
| None -> false
157
-
in
142
+
| None -> false in
158
143
let is_always_matching = match media_value with
159
-
| None -> not has_type (* no media, check if no type either *)
144
+
| None -> not has_type
160
145
| Some v ->
161
146
let trimmed = String.trim v in
162
147
trimmed = "" || String.lowercase_ascii trimmed = "all"
163
148
in
164
149
if is_always_matching then begin
165
150
state.has_always_matching_source <- true;
166
-
if is_media_all then
167
-
state.always_matching_is_media_all <- true
168
-
else if is_media_empty then
169
-
state.always_matching_is_media_empty <- true
151
+
(* Only set flags to true, never reset to false *)
152
+
if is_media_all then state.always_matching_is_media_all <- true;
153
+
if is_media_empty then state.always_matching_is_media_empty <- true
170
154
end
171
155
172
156
| "img" when state.in_picture && state.picture_depth = 1 ->
···
174
158
state.has_img_in_picture <- true;
175
159
state.children_in_picture <- "img" :: state.children_in_picture;
176
160
state.last_was_img <- true;
177
-
(* Check for multiple img elements *)
178
-
let img_count = List.filter (fun c -> c = "img") state.children_in_picture |> List.length in
161
+
let img_count = List.length (List.filter (( = ) "img") state.children_in_picture) in
179
162
if img_count > 1 then
180
163
report_disallowed_child "picture" "img" collector;
181
-
(* Check if always-matching source is followed by img with srcset *)
182
-
if state.has_always_matching_source && has_attr "srcset" attrs then begin
183
-
if state.always_matching_is_media_all then
184
-
Message_collector.add_typed collector (`Misc `Media_all)
185
-
else if state.always_matching_is_media_empty then
186
-
Message_collector.add_typed collector (`Misc `Media_empty)
187
-
else
188
-
Message_collector.add_typed collector (`Srcset `Source_needs_media_or_type)
189
-
end
164
+
if state.has_always_matching_source && Attr_utils.has_attr "srcset" attrs then
165
+
Message_collector.add_typed collector
166
+
(if state.always_matching_is_media_all then `Misc `Media_all
167
+
else if state.always_matching_is_media_empty then `Misc `Media_empty
168
+
else `Srcset `Source_needs_media_or_type)
190
169
191
170
| "script" when state.in_picture && state.picture_depth = 1 ->
192
171
state.children_in_picture <- "script" :: state.children_in_picture
···
197
176
| "img" ->
198
177
check_img_attrs attrs collector
199
178
200
-
| _ -> ()
201
-
end;
179
+
| _ -> ());
202
180
203
181
(* Track depth when inside picture *)
204
182
if state.in_picture then
···
209
187
state.parent_stack <- name_lower :: state.parent_stack
210
188
211
189
let end_element state ~name ~namespace collector =
212
-
if namespace <> None then ()
213
-
else begin
190
+
match namespace with
191
+
| Some _ -> ()
192
+
| None ->
214
193
let name_lower = String.lowercase_ascii name in
215
194
216
-
(* Track depth *)
217
195
if state.in_picture then
218
196
state.picture_depth <- state.picture_depth - 1;
219
197
220
198
if name_lower = "picture" && state.picture_depth = 0 then begin
221
-
(* Check if picture had img child *)
222
199
if not state.has_img_in_picture then
223
-
Message_collector.add_typed collector
224
-
(`Srcset `Picture_missing_img);
225
-
(* Check for source after img *)
200
+
Message_collector.add_typed collector (`Srcset `Picture_missing_img);
226
201
if state.has_source_after_img then
227
202
report_disallowed_child "picture" "source" collector;
228
-
(* Check for source after always-matching source *)
229
-
if state.source_after_always_matching then begin
230
-
if state.always_matching_is_media_all then
231
-
Message_collector.add_typed collector (`Misc `Media_all)
232
-
else if state.always_matching_is_media_empty then
233
-
Message_collector.add_typed collector (`Misc `Media_empty)
234
-
else
235
-
Message_collector.add_typed collector (`Srcset `Source_needs_media_or_type)
236
-
end;
237
-
203
+
if state.source_after_always_matching then
204
+
Message_collector.add_typed collector
205
+
(if state.always_matching_is_media_all then `Misc `Media_all
206
+
else if state.always_matching_is_media_empty then `Misc `Media_empty
207
+
else `Srcset `Source_needs_media_or_type);
238
208
state.in_picture <- false
239
209
end;
240
210
241
-
(* Pop from parent stack *)
242
-
state.parent_stack <- (match state.parent_stack with
243
-
| _ :: rest -> rest
244
-
| [] -> [])
245
-
end
211
+
state.parent_stack <- match state.parent_stack with _ :: rest -> rest | [] -> []
246
212
247
213
let characters state text collector =
248
214
(* Text in picture element is not allowed *)
+4
-8
lib/htmlrw_check/specialized/source_checker.ml
+4
-8
lib/htmlrw_check/specialized/source_checker.ml
···
23
23
| ctx :: _ -> ctx
24
24
| [] -> Other
25
25
26
-
(** Check if an attribute list contains a specific attribute. *)
27
-
let has_attr name attrs =
28
-
List.exists (fun (attr_name, _) -> String.lowercase_ascii attr_name = name) attrs
29
-
30
26
let start_element state ~name ~namespace ~attrs collector =
31
27
if namespace <> None then ()
32
28
else begin
···
42
38
let ctx = current_context state in
43
39
begin match ctx with
44
40
| Video | Audio ->
45
-
if has_attr "srcset" attrs then
41
+
if Attr_utils.has_attr "srcset" attrs then
46
42
Message_collector.add_typed collector
47
43
(`Attr (`Not_allowed (`Attr "srcset", `Elem "source")));
48
-
if has_attr "sizes" attrs then
44
+
if Attr_utils.has_attr "sizes" attrs then
49
45
Message_collector.add_typed collector
50
46
(`Attr (`Not_allowed (`Attr "sizes", `Elem "source")));
51
-
if has_attr "width" attrs then
47
+
if Attr_utils.has_attr "width" attrs then
52
48
Message_collector.add_typed collector
53
49
(`Attr (`Not_allowed (`Attr "width", `Elem "source")));
54
-
if has_attr "height" attrs then
50
+
if Attr_utils.has_attr "height" attrs then
55
51
Message_collector.add_typed collector
56
52
(`Attr (`Not_allowed (`Attr "height", `Elem "source")))
57
53
| Picture | Other -> ()
+3
-9
lib/htmlrw_check/specialized/srcset_sizes_checker.ml
+3
-9
lib/htmlrw_check/specialized/srcset_sizes_checker.ml
···
14
14
let create () = ()
15
15
let reset _state = ()
16
16
17
-
(** Get attribute value *)
18
-
let get_attr name attrs =
19
-
List.find_map (fun (n, v) ->
20
-
if String.lowercase_ascii n = name then Some v else None
21
-
) attrs
22
-
23
17
(** Split string on a character while respecting parentheses *)
24
18
let split_respecting_parens ~sep s =
25
19
let len = String.length s in
···
971
965
972
966
(* SVG image elements should not have srcset *)
973
967
if namespace <> None && name_lower = "image" then begin
974
-
if get_attr "srcset" attrs <> None then
968
+
if Attr_utils.get_attr "srcset" attrs <> None then
975
969
Message_collector.add_typed collector
976
970
(`Attr (`Not_allowed (`Attr "srcset", `Elem "image")))
977
971
end;
···
980
974
else begin
981
975
(* Check sizes and srcset on img and source *)
982
976
if name_lower = "img" || name_lower = "source" then begin
983
-
let sizes_value = get_attr "sizes" attrs in
984
-
let srcset_value = get_attr "srcset" attrs in
977
+
let sizes_value = Attr_utils.get_attr "sizes" attrs in
978
+
let srcset_value = Attr_utils.get_attr "srcset" attrs in
985
979
let has_sizes = sizes_value <> None in
986
980
let has_srcset = srcset_value <> None in
987
981
+1
-2
lib/htmlrw_check/specialized/xhtml_content_checker.ml
+1
-2
lib/htmlrw_check/specialized/xhtml_content_checker.ml
···
46
46
(* Check if data-* attribute has uppercase characters *)
47
47
let check_data_attr_case attrs collector =
48
48
List.iter (fun (attr_name, _) ->
49
-
if String.length attr_name > 5 &&
50
-
String.sub attr_name 0 5 = "data-" then
49
+
if String.starts_with ~prefix:"data-" attr_name then
51
50
let suffix = String.sub attr_name 5 (String.length attr_name - 5) in
52
51
if String.exists (fun c -> c >= 'A' && c <= 'Z') suffix then
53
52
Message_collector.add_typed collector (`Attr `Data_uppercase)