+19
lib/htmlrw_check/attr_utils.ml
+19
lib/htmlrw_check/attr_utils.ml
···
···
1
+
(** Common attribute utilities used across checkers. *)
2
+
3
+
type attrs = (string * string) list
4
+
5
+
let has_attr name attrs =
6
+
List.exists (fun (n, _) -> String.lowercase_ascii n = name) attrs
7
+
8
+
let get_attr name attrs =
9
+
List.find_map (fun (n, v) ->
10
+
if String.lowercase_ascii n = name then Some v else None
11
+
) attrs
12
+
13
+
let get_attr_or name ~default attrs =
14
+
Option.value ~default (get_attr name attrs)
15
+
16
+
let is_non_empty_attr name attrs =
17
+
match get_attr name attrs with
18
+
| Some v -> String.trim v <> ""
19
+
| None -> false
+2
-8
lib/htmlrw_check/datatype/dt_color.ml
+2
-8
lib/htmlrw_check/datatype/dt_color.ml
···
213
if String.length s = 0 then Error "Color value must not be empty"
214
else if List.mem s named_colors then Ok ()
215
else if String.length s > 0 && s.[0] = '#' then validate_hex_color s
216
-
else if
217
-
String.length s > 4
218
-
&& (String.sub s 0 4 = "rgb(" || String.sub s 0 5 = "rgba(")
219
-
then
220
(* Basic validation for rgb/rgba - just check balanced parens *)
221
if s.[String.length s - 1] = ')' then Ok ()
222
else Error "rgb/rgba function must end with ')'"
223
-
else if
224
-
String.length s > 4
225
-
&& (String.sub s 0 4 = "hsl(" || String.sub s 0 5 = "hsla(")
226
-
then
227
(* Basic validation for hsl/hsla - just check balanced parens *)
228
if s.[String.length s - 1] = ')' then Ok ()
229
else Error "hsl/hsla function must end with ')'"
···
213
if String.length s = 0 then Error "Color value must not be empty"
214
else if List.mem s named_colors then Ok ()
215
else if String.length s > 0 && s.[0] = '#' then validate_hex_color s
216
+
else if String.starts_with ~prefix:"rgb(" s || String.starts_with ~prefix:"rgba(" s then
217
(* Basic validation for rgb/rgba - just check balanced parens *)
218
if s.[String.length s - 1] = ')' then Ok ()
219
else Error "rgb/rgba function must end with ')'"
220
+
else if String.starts_with ~prefix:"hsl(" s || String.starts_with ~prefix:"hsla(" s then
221
(* Basic validation for hsl/hsla - just check balanced parens *)
222
if s.[String.length s - 1] = ')' then Ok ()
223
else Error "hsl/hsla function must end with ')'"
+2
-2
lib/htmlrw_check/datatype/dt_media_query.ml
+2
-2
lib/htmlrw_check/datatype/dt_media_query.ml
···
330
331
(* Get base feature name for error messages (strip min-/max- prefix) *)
332
let base_feature =
333
-
if String.length feature > 4 && String.sub feature 0 4 = "min-" then
334
String.sub feature 4 (String.length feature - 4)
335
-
else if String.length feature > 4 && String.sub feature 0 4 = "max-" then
336
String.sub feature 4 (String.length feature - 4)
337
else
338
feature
···
330
331
(* Get base feature name for error messages (strip min-/max- prefix) *)
332
let base_feature =
333
+
if String.starts_with ~prefix:"min-" feature then
334
String.sub feature 4 (String.length feature - 4)
335
+
else if String.starts_with ~prefix:"max-" feature then
336
String.sub feature 4 (String.length feature - 4)
337
else
338
feature
+77
-102
lib/htmlrw_check/message_format.ml
+77
-102
lib/htmlrw_check/message_format.ml
···
1
let format_text ?system_id messages =
2
let buf = Buffer.create 1024 in
3
-
List.iter
4
-
(fun msg ->
5
-
let loc_str =
6
-
match msg.Message.location with
7
-
| Some loc -> (
8
-
let sid =
9
-
match loc.Message.system_id with
10
-
| Some s -> s
11
-
| None -> (
12
-
match system_id with Some s -> s | None -> "input")
13
-
in
14
-
let col_info =
15
-
match (loc.end_line, loc.end_column) with
16
-
| Some el, Some ec when el = loc.line && ec > loc.column ->
17
-
Printf.sprintf "%d.%d-%d" loc.line loc.column ec
18
-
| Some el, Some ec when el > loc.line ->
19
-
Printf.sprintf "%d.%d-%d.%d" loc.line loc.column el ec
20
-
| _ -> Printf.sprintf "%d.%d" loc.line loc.column
21
-
in
22
-
Printf.sprintf "%s:%s" sid col_info)
23
-
| None -> (
24
-
match system_id with Some s -> s | None -> "input")
25
-
in
26
-
let severity_str = Message.severity_to_string msg.Message.severity in
27
-
let code_str = " [" ^ Message.error_code_to_string msg.Message.error_code ^ "]" in
28
-
let elem_str =
29
-
match msg.Message.element with
30
-
| Some e -> " (element: " ^ e ^ ")"
31
-
| None -> ""
32
-
in
33
-
let attr_str =
34
-
match msg.Message.attribute with
35
-
| Some a -> " (attribute: " ^ a ^ ")"
36
-
| None -> ""
37
-
in
38
-
Buffer.add_string buf
39
-
(Printf.sprintf "%s: %s%s: %s%s%s\n" loc_str severity_str code_str
40
-
msg.Message.message elem_str attr_str))
41
-
messages;
42
Buffer.contents buf
43
44
let format_gnu ?system_id messages =
45
let buf = Buffer.create 1024 in
46
-
List.iter
47
-
(fun msg ->
48
-
let loc_str =
49
-
match msg.Message.location with
50
-
| Some loc -> (
51
-
let sid =
52
-
match loc.Message.system_id with
53
-
| Some s -> s
54
-
| None -> (
55
-
match system_id with Some s -> s | None -> "input")
56
-
in
57
-
Printf.sprintf "%s:%d:%d" sid loc.line loc.column)
58
-
| None -> (
59
-
match system_id with Some s -> s ^ ":0:0" | None -> "input:0:0")
60
-
in
61
-
let severity_str = Message.severity_to_string msg.Message.severity in
62
-
let code_str = " [" ^ Message.error_code_to_string msg.Message.error_code ^ "]" in
63
-
Buffer.add_string buf
64
-
(Printf.sprintf "%s: %s%s: %s\n" loc_str severity_str code_str
65
-
msg.Message.message))
66
-
messages;
67
Buffer.contents buf
68
69
let message_to_json ?system_id msg =
70
let open Jsont in
71
-
let severity = String (Message.severity_to_string msg.Message.severity, Meta.none) in
72
-
let message_text = String (msg.Message.message, Meta.none) in
73
-
let base = [ (("type", Meta.none), severity); (("message", Meta.none), message_text) ] in
74
-
let with_code =
75
-
(("subType", Meta.none), String (Message.error_code_to_string msg.Message.error_code, Meta.none)) :: base
76
-
in
77
-
let with_location =
78
-
match msg.Message.location with
79
| Some loc ->
80
-
let line = Number (float_of_int loc.Message.line, Meta.none) in
81
-
let column = Number (float_of_int loc.Message.column, Meta.none) in
82
-
let loc_fields =
83
-
[ (("firstLine", Meta.none), line); (("firstColumn", Meta.none), column) ]
84
-
in
85
-
let loc_fields =
86
-
match loc.Message.end_line with
87
-
| Some el ->
88
-
(("lastLine", Meta.none), Number (float_of_int el, Meta.none)) :: loc_fields
89
-
| None -> loc_fields
90
-
in
91
-
let loc_fields =
92
-
match loc.Message.end_column with
93
-
| Some ec ->
94
-
(("lastColumn", Meta.none), Number (float_of_int ec, Meta.none))
95
-
:: loc_fields
96
-
| None -> loc_fields
97
-
in
98
-
let url =
99
-
match loc.Message.system_id with
100
-
| Some s -> s
101
-
| None -> (
102
-
match system_id with Some s -> s | None -> "input")
103
-
in
104
-
(("url", Meta.none), String (url, Meta.none)) :: loc_fields @ with_code
105
| None ->
106
-
let url =
107
-
match system_id with Some s -> s | None -> "input"
108
-
in
109
-
(("url", Meta.none), String (url, Meta.none)) :: with_code
110
in
111
-
let with_extract =
112
-
match msg.Message.extract with
113
-
| Some e -> (("extract", Meta.none), String (e, Meta.none)) :: with_location
114
-
| None -> with_location
115
-
in
116
Object (with_extract, Meta.none)
117
118
let format_json ?system_id messages =
···
1
+
(** Get effective system_id, preferring location's system_id over the passed one *)
2
+
let get_system_id ?system_id loc_system_id =
3
+
loc_system_id
4
+
|> Option.fold ~none:system_id ~some:Option.some
5
+
|> Option.value ~default:"input"
6
+
7
let format_text ?system_id messages =
8
let buf = Buffer.create 1024 in
9
+
List.iter (fun msg ->
10
+
let loc_str = match msg.Message.location with
11
+
| Some loc ->
12
+
let sid = get_system_id ?system_id loc.Message.system_id in
13
+
let col_info = match loc.end_line, loc.end_column with
14
+
| Some el, Some ec when el = loc.line && ec > loc.column ->
15
+
Printf.sprintf "%d.%d-%d" loc.line loc.column ec
16
+
| Some el, Some ec when el > loc.line ->
17
+
Printf.sprintf "%d.%d-%d.%d" loc.line loc.column el ec
18
+
| _ ->
19
+
Printf.sprintf "%d.%d" loc.line loc.column
20
+
in
21
+
Printf.sprintf "%s:%s" sid col_info
22
+
| None ->
23
+
Option.value system_id ~default:"input"
24
+
in
25
+
let elem_str = Option.fold ~none:"" ~some:(Printf.sprintf " (element: %s)") msg.Message.element in
26
+
let attr_str = Option.fold ~none:"" ~some:(Printf.sprintf " (attribute: %s)") msg.Message.attribute in
27
+
Buffer.add_string buf (Printf.sprintf "%s: %s [%s]: %s%s%s\n"
28
+
loc_str
29
+
(Message.severity_to_string msg.Message.severity)
30
+
(Message.error_code_to_string msg.Message.error_code)
31
+
msg.Message.message
32
+
elem_str
33
+
attr_str)
34
+
) messages;
35
Buffer.contents buf
36
37
let format_gnu ?system_id messages =
38
let buf = Buffer.create 1024 in
39
+
List.iter (fun msg ->
40
+
let loc_str = match msg.Message.location with
41
+
| Some loc ->
42
+
Printf.sprintf "%s:%d:%d"
43
+
(get_system_id ?system_id loc.Message.system_id)
44
+
loc.line loc.column
45
+
| None ->
46
+
Option.value system_id ~default:"input" ^ ":0:0"
47
+
in
48
+
Buffer.add_string buf (Printf.sprintf "%s: %s [%s]: %s\n"
49
+
loc_str
50
+
(Message.severity_to_string msg.Message.severity)
51
+
(Message.error_code_to_string msg.Message.error_code)
52
+
msg.Message.message)
53
+
) messages;
54
Buffer.contents buf
55
56
let message_to_json ?system_id msg =
57
let open Jsont in
58
+
let str s = String (s, Meta.none) in
59
+
let num n = Number (float_of_int n, Meta.none) in
60
+
let field name value = ((name, Meta.none), value) in
61
+
62
+
let base = [
63
+
field "type" (str (Message.severity_to_string msg.Message.severity));
64
+
field "message" (str msg.Message.message);
65
+
field "subType" (str (Message.error_code_to_string msg.Message.error_code));
66
+
] in
67
+
68
+
let with_location = match msg.Message.location with
69
| Some loc ->
70
+
let url = get_system_id ?system_id loc.Message.system_id in
71
+
let loc_fields = [
72
+
field "url" (str url);
73
+
field "firstLine" (num loc.line);
74
+
field "firstColumn" (num loc.column);
75
+
] in
76
+
let loc_fields = Option.fold ~none:loc_fields
77
+
~some:(fun el -> field "lastLine" (num el) :: loc_fields)
78
+
loc.Message.end_line in
79
+
let loc_fields = Option.fold ~none:loc_fields
80
+
~some:(fun ec -> field "lastColumn" (num ec) :: loc_fields)
81
+
loc.Message.end_column in
82
+
loc_fields @ base
83
| None ->
84
+
field "url" (str (Option.value system_id ~default:"input")) :: base
85
in
86
+
87
+
let with_extract = Option.fold ~none:with_location
88
+
~some:(fun e -> field "extract" (str e) :: with_location)
89
+
msg.Message.extract in
90
+
91
Object (with_extract, Meta.none)
92
93
let format_json ?system_id messages =
+10
-10
lib/htmlrw_check/parse_error_bridge.ml
+10
-10
lib/htmlrw_check/parse_error_bridge.ml
···
14
| Html5rw.Parse_error_code.Tree_construction_error s ->
15
(* Check for control-character/noncharacter/surrogate with codepoint info *)
16
(try
17
-
if String.length s > 28 && String.sub s 0 28 = "control-character-in-input-s" then
18
let colon_pos = String.index s ':' in
19
let cp_str = String.sub s (colon_pos + 1) (String.length s - colon_pos - 1) in
20
let cp = int_of_string ("0x" ^ cp_str) in
21
Printf.sprintf "Forbidden code point U+%04x." cp
22
-
else if String.length s > 25 && String.sub s 0 25 = "noncharacter-in-input-str" then
23
let colon_pos = String.index s ':' in
24
let cp_str = String.sub s (colon_pos + 1) (String.length s - colon_pos - 1) in
25
let cp = int_of_string ("0x" ^ cp_str) in
26
Printf.sprintf "Forbidden code point U+%04x." cp
27
-
else if String.length s > 22 && String.sub s 0 22 = "surrogate-in-input-str" then
28
let colon_pos = String.index s ':' in
29
let cp_str = String.sub s (colon_pos + 1) (String.length s - colon_pos - 1) in
30
let cp = int_of_string ("0x" ^ cp_str) in
31
Printf.sprintf "Forbidden code point U+%04x." cp
32
(* Character reference errors *)
33
-
else if String.length s > 28 && String.sub s 0 28 = "control-character-reference:" then
34
let cp_str = String.sub s 28 (String.length s - 28) in
35
let cp = int_of_string ("0x" ^ cp_str) in
36
if cp = 0x0D then
37
"A numeric character reference expanded to carriage return."
38
else
39
Printf.sprintf "Character reference expands to a control character (U+%04x)." cp
40
-
else if String.length s > 31 && String.sub s 0 31 = "noncharacter-character-referenc" then
41
let colon_pos = String.index s ':' in
42
let cp_str = String.sub s (colon_pos + 1) (String.length s - colon_pos - 1) in
43
let cp = int_of_string ("0x" ^ cp_str) in
···
49
Printf.sprintf "Character reference expands to an astral non-character (U+%05x)." cp
50
else
51
Printf.sprintf "Character reference expands to a non-character (U+%04x)." cp
52
-
else if String.length s > 36 && String.sub s 0 36 = "character-reference-outside-unicode-" then
53
"Character reference outside the permissible Unicode range."
54
-
else if String.length s > 27 && String.sub s 0 27 = "surrogate-character-referen" then
55
let colon_pos = String.index s ':' in
56
let cp_str = String.sub s (colon_pos + 1) (String.length s - colon_pos - 1) in
57
let cp = int_of_string ("0x" ^ cp_str) in
···
64
"End tag \xe2\x80\x9cbr\xe2\x80\x9d."
65
else if s = "expected-closing-tag-but-got-eof" then
66
"End of file seen and there were open elements."
67
-
else if String.length s > 28 && String.sub s 0 28 = "bad-start-tag-in-head-noscri" then
68
let colon_pos = String.index s ':' in
69
let element = String.sub s (colon_pos + 1) (String.length s - colon_pos - 1) in
70
Printf.sprintf "Bad start tag in \xe2\x80\x9c%s\xe2\x80\x9d in \xe2\x80\x9cnoscript\xe2\x80\x9d in \xe2\x80\x9chead\xe2\x80\x9d." element
71
-
else if String.length s > 19 && String.sub s 0 19 = "unexpected-end-tag:" then
72
let element = String.sub s 19 (String.length s - 19) in
73
Printf.sprintf "Stray end tag \xe2\x80\x9c%s\xe2\x80\x9d." element
74
-
else if String.length s > 19 && String.sub s 0 19 = "start-tag-in-table:" then
75
let tag = String.sub s 19 (String.length s - 19) in
76
Printf.sprintf "Start tag \xe2\x80\x9c%s\xe2\x80\x9d seen in \xe2\x80\x9ctable\xe2\x80\x9d." tag
77
else
···
14
| Html5rw.Parse_error_code.Tree_construction_error s ->
15
(* Check for control-character/noncharacter/surrogate with codepoint info *)
16
(try
17
+
if String.starts_with ~prefix:"control-character-in-input-s" s then
18
let colon_pos = String.index s ':' in
19
let cp_str = String.sub s (colon_pos + 1) (String.length s - colon_pos - 1) in
20
let cp = int_of_string ("0x" ^ cp_str) in
21
Printf.sprintf "Forbidden code point U+%04x." cp
22
+
else if String.starts_with ~prefix:"noncharacter-in-input-str" s then
23
let colon_pos = String.index s ':' in
24
let cp_str = String.sub s (colon_pos + 1) (String.length s - colon_pos - 1) in
25
let cp = int_of_string ("0x" ^ cp_str) in
26
Printf.sprintf "Forbidden code point U+%04x." cp
27
+
else if String.starts_with ~prefix:"surrogate-in-input-str" s then
28
let colon_pos = String.index s ':' in
29
let cp_str = String.sub s (colon_pos + 1) (String.length s - colon_pos - 1) in
30
let cp = int_of_string ("0x" ^ cp_str) in
31
Printf.sprintf "Forbidden code point U+%04x." cp
32
(* Character reference errors *)
33
+
else if String.starts_with ~prefix:"control-character-reference:" s then
34
let cp_str = String.sub s 28 (String.length s - 28) in
35
let cp = int_of_string ("0x" ^ cp_str) in
36
if cp = 0x0D then
37
"A numeric character reference expanded to carriage return."
38
else
39
Printf.sprintf "Character reference expands to a control character (U+%04x)." cp
40
+
else if String.starts_with ~prefix:"noncharacter-character-referenc" s then
41
let colon_pos = String.index s ':' in
42
let cp_str = String.sub s (colon_pos + 1) (String.length s - colon_pos - 1) in
43
let cp = int_of_string ("0x" ^ cp_str) in
···
49
Printf.sprintf "Character reference expands to an astral non-character (U+%05x)." cp
50
else
51
Printf.sprintf "Character reference expands to a non-character (U+%04x)." cp
52
+
else if String.starts_with ~prefix:"character-reference-outside-unicode-" s then
53
"Character reference outside the permissible Unicode range."
54
+
else if String.starts_with ~prefix:"surrogate-character-referen" s then
55
let colon_pos = String.index s ':' in
56
let cp_str = String.sub s (colon_pos + 1) (String.length s - colon_pos - 1) in
57
let cp = int_of_string ("0x" ^ cp_str) in
···
64
"End tag \xe2\x80\x9cbr\xe2\x80\x9d."
65
else if s = "expected-closing-tag-but-got-eof" then
66
"End of file seen and there were open elements."
67
+
else if String.starts_with ~prefix:"bad-start-tag-in-head-noscri" s then
68
let colon_pos = String.index s ':' in
69
let element = String.sub s (colon_pos + 1) (String.length s - colon_pos - 1) in
70
Printf.sprintf "Bad start tag in \xe2\x80\x9c%s\xe2\x80\x9d in \xe2\x80\x9cnoscript\xe2\x80\x9d in \xe2\x80\x9chead\xe2\x80\x9d." element
71
+
else if String.starts_with ~prefix:"unexpected-end-tag:" s then
72
let element = String.sub s 19 (String.length s - 19) in
73
Printf.sprintf "Stray end tag \xe2\x80\x9c%s\xe2\x80\x9d." element
74
+
else if String.starts_with ~prefix:"start-tag-in-table:" s then
75
let tag = String.sub s 19 (String.length s - 19) in
76
Printf.sprintf "Start tag \xe2\x80\x9c%s\xe2\x80\x9d seen in \xe2\x80\x9ctable\xe2\x80\x9d." tag
77
else
+17
-39
lib/htmlrw_check/semantic/autofocus_checker.ml
+17
-39
lib/htmlrw_check/semantic/autofocus_checker.ml
···
3
Validates that only one element with autofocus attribute exists within
4
each dialog or popover context. *)
5
6
-
(** Context for tracking autofocus elements. *)
7
type context_type = Dialog | Popover
8
9
type context = {
···
26
state.context_stack <- [];
27
state.current_depth <- 0
28
29
-
(** Check if an attribute list contains a specific attribute. *)
30
-
let has_attr name attrs =
31
-
List.exists (fun (attr_name, _) -> String.lowercase_ascii attr_name = name) attrs
32
-
33
-
(** Get an attribute value from the list. *)
34
-
let get_attr name attrs =
35
-
List.find_map (fun (attr_name, value) ->
36
-
if String.lowercase_ascii attr_name = name then Some value else None
37
-
) attrs
38
-
39
-
(** Check if element has popover attribute. *)
40
-
let has_popover attrs =
41
-
List.exists (fun (attr_name, _) ->
42
-
String.lowercase_ascii attr_name = "popover"
43
-
) attrs
44
-
45
let start_element state ~name ~namespace ~attrs collector =
46
-
let name_lower = String.lowercase_ascii name in
47
-
48
-
(* Track depth *)
49
state.current_depth <- state.current_depth + 1;
50
51
-
if namespace = None then begin
52
(* Check if we're entering a dialog or popover context *)
53
-
let enters_context =
54
-
if name_lower = "dialog" then Some Dialog
55
-
else if has_popover attrs then Some Popover
56
-
else None
57
in
58
59
-
(match enters_context with
60
-
| Some ctx_type ->
61
let ctx = { context_type = ctx_type; autofocus_count = 0; depth = state.current_depth } in
62
state.context_stack <- ctx :: state.context_stack
63
-
| None -> ());
64
65
(* Check for autofocus attribute *)
66
-
if has_attr "autofocus" attrs then begin
67
-
(* Increment count in innermost context if any *)
68
match state.context_stack with
69
| ctx :: _ ->
70
ctx.autofocus_count <- ctx.autofocus_count + 1;
71
if ctx.autofocus_count > 1 then
72
Message_collector.add_typed collector (`Misc `Multiple_autofocus)
73
| [] -> ()
74
-
end
75
-
end
76
77
let end_element state ~name ~namespace _collector =
78
-
let name_lower = String.lowercase_ascii name in
79
-
80
-
if namespace = None then begin
81
-
(* Pop context if we're leaving one *)
82
match state.context_stack with
83
| ctx :: rest when ctx.depth = state.current_depth ->
84
-
(* Verify this is the right element *)
85
let matches =
86
(name_lower = "dialog" && ctx.context_type = Dialog) ||
87
(ctx.context_type = Popover)
88
in
89
if matches then state.context_stack <- rest
90
-
| _ -> ()
91
-
end;
92
93
state.current_depth <- state.current_depth - 1
94
···
3
Validates that only one element with autofocus attribute exists within
4
each dialog or popover context. *)
5
6
type context_type = Dialog | Popover
7
8
type context = {
···
25
state.context_stack <- [];
26
state.current_depth <- 0
27
28
let start_element state ~name ~namespace ~attrs collector =
29
state.current_depth <- state.current_depth + 1;
30
31
+
match namespace with
32
+
| Some _ -> ()
33
+
| None ->
34
+
let name_lower = String.lowercase_ascii name in
35
+
36
(* Check if we're entering a dialog or popover context *)
37
+
let enters_context = match name_lower with
38
+
| "dialog" -> Some Dialog
39
+
| _ when Attr_utils.has_attr "popover" attrs -> Some Popover
40
+
| _ -> None
41
in
42
43
+
Option.iter (fun ctx_type ->
44
let ctx = { context_type = ctx_type; autofocus_count = 0; depth = state.current_depth } in
45
state.context_stack <- ctx :: state.context_stack
46
+
) enters_context;
47
48
(* Check for autofocus attribute *)
49
+
if Attr_utils.has_attr "autofocus" attrs then
50
match state.context_stack with
51
| ctx :: _ ->
52
ctx.autofocus_count <- ctx.autofocus_count + 1;
53
if ctx.autofocus_count > 1 then
54
Message_collector.add_typed collector (`Misc `Multiple_autofocus)
55
| [] -> ()
56
57
let end_element state ~name ~namespace _collector =
58
+
(match namespace with
59
+
| Some _ -> ()
60
+
| None ->
61
+
let name_lower = String.lowercase_ascii name in
62
match state.context_stack with
63
| ctx :: rest when ctx.depth = state.current_depth ->
64
let matches =
65
(name_lower = "dialog" && ctx.context_type = Dialog) ||
66
(ctx.context_type = Popover)
67
in
68
if matches then state.context_stack <- rest
69
+
| _ -> ());
70
71
state.current_depth <- state.current_depth - 1
72
+1
-8
lib/htmlrw_check/semantic/form_checker.ml
+1
-8
lib/htmlrw_check/semantic/form_checker.ml
···
10
11
let reset _state = ()
12
13
-
(** Get the value of an attribute if present. *)
14
-
let get_attr name attrs =
15
-
List.find_map
16
-
(fun (attr_name, value) ->
17
-
if String.equal attr_name name then Some value else None)
18
-
attrs
19
-
20
(** Check if autocomplete value contains webauthn token *)
21
let contains_webauthn value =
22
let lower = String.lowercase_ascii value in
···
42
(* Check autocomplete attribute on form elements *)
43
match name with
44
| "input" | "select" | "textarea" ->
45
-
(match get_attr "autocomplete" attrs with
46
| Some autocomplete_value ->
47
check_autocomplete_value autocomplete_value name collector
48
| None -> ())
···
10
11
let reset _state = ()
12
13
(** Check if autocomplete value contains webauthn token *)
14
let contains_webauthn value =
15
let lower = String.lowercase_ascii value in
···
35
(* Check autocomplete attribute on form elements *)
36
match name with
37
| "input" | "select" | "textarea" ->
38
+
(match Attr_utils.get_attr "autocomplete" attrs with
39
| Some autocomplete_value ->
40
check_autocomplete_value autocomplete_value name collector
41
| None -> ())
+3
-8
lib/htmlrw_check/semantic/lang_detecting_checker.ml
+3
-8
lib/htmlrw_check/semantic/lang_detecting_checker.ml
···
60
let n = String.lowercase_ascii name in
61
n = "svg" || n = "math"
62
63
-
let get_attr name attrs =
64
-
List.find_map (fun (n, v) ->
65
-
if String.lowercase_ascii n = name then Some v else None
66
-
) attrs
67
-
68
let get_lang_code lang =
69
(* Extract primary language subtag *)
70
match String.split_on_char '-' lang with
···
226
let ns = Option.value namespace ~default:"" in
227
228
if name_lower = "html" then begin
229
-
state.html_lang <- get_attr "lang" attrs;
230
-
state.html_dir <- get_attr "dir" attrs;
231
(* TODO: get line/column from locator *)
232
state.html_locator <- Some (1, 1)
233
end
···
244
state.skip_depth <- state.skip_depth + 1
245
else begin
246
(* Check for different lang attribute *)
247
-
match get_attr "lang" attrs with
248
| Some lang when state.html_lang <> Some lang ->
249
state.skip_depth <- state.skip_depth + 1
250
| _ -> ()
···
60
let n = String.lowercase_ascii name in
61
n = "svg" || n = "math"
62
63
let get_lang_code lang =
64
(* Extract primary language subtag *)
65
match String.split_on_char '-' lang with
···
221
let ns = Option.value namespace ~default:"" in
222
223
if name_lower = "html" then begin
224
+
state.html_lang <- Attr_utils.get_attr "lang" attrs;
225
+
state.html_dir <- Attr_utils.get_attr "dir" attrs;
226
(* TODO: get line/column from locator *)
227
state.html_locator <- Some (1, 1)
228
end
···
239
state.skip_depth <- state.skip_depth + 1
240
else begin
241
(* Check for different lang attribute *)
242
+
match Attr_utils.get_attr "lang" attrs with
243
| Some lang when state.html_lang <> Some lang ->
244
state.skip_depth <- state.skip_depth + 1
245
| _ -> ()
+1
-7
lib/htmlrw_check/semantic/option_checker.ml
+1
-7
lib/htmlrw_check/semantic/option_checker.ml
···
22
state.option_stack <- [];
23
state.in_template <- 0
24
25
-
(** Get attribute value if present. *)
26
-
let get_attr name attrs =
27
-
List.find_map (fun (attr_name, value) ->
28
-
if String.lowercase_ascii attr_name = name then Some value else None
29
-
) attrs
30
-
31
let start_element state ~name ~namespace ~attrs collector =
32
let name_lower = String.lowercase_ascii name in
33
···
36
if name_lower = "template" then
37
state.in_template <- state.in_template + 1
38
else if state.in_template = 0 && name_lower = "option" then begin
39
-
let label_opt = get_attr "label" attrs in
40
let has_label = label_opt <> None in
41
let label_empty = match label_opt with
42
| Some v -> String.trim v = ""
···
22
state.option_stack <- [];
23
state.in_template <- 0
24
25
let start_element state ~name ~namespace ~attrs collector =
26
let name_lower = String.lowercase_ascii name in
27
···
30
if name_lower = "template" then
31
state.in_template <- state.in_template + 1
32
else if state.in_template = 0 && name_lower = "option" then begin
33
+
let label_opt = Attr_utils.get_attr "label" attrs in
34
let has_label = label_opt <> None in
35
let label_empty = match label_opt with
36
| Some v -> String.trim v = ""
+29
-40
lib/htmlrw_check/semantic/required_attr_checker.ml
+29
-40
lib/htmlrw_check/semantic/required_attr_checker.ml
···
13
state._in_figure <- false;
14
state.in_a_with_href <- false
15
16
-
(** Check if an attribute list contains a specific attribute. *)
17
-
let has_attr name attrs =
18
-
List.exists (fun (attr_name, _) -> String.equal attr_name name) attrs
19
-
20
-
(** Get the value of an attribute if present. *)
21
-
let get_attr name attrs =
22
-
List.find_map
23
-
(fun (attr_name, value) ->
24
-
if String.equal attr_name name then Some value else None)
25
-
attrs
26
-
27
let check_img_element state attrs collector =
28
(* Check for required src OR srcset attribute *)
29
-
if not (has_attr "src" attrs) && not (has_attr "srcset" attrs) then
30
Message_collector.add_typed collector (`Img `Missing_src_or_srcset);
31
32
(* Check for alt attribute - always required *)
33
-
if not (has_attr "alt" attrs) then
34
Message_collector.add_typed collector (`Img `Missing_alt);
35
36
(* Check ismap requires 'a' ancestor with href *)
37
-
if has_attr "ismap" attrs && not state.in_a_with_href then
38
Message_collector.add_typed collector (`Img `Ismap_needs_href)
39
40
let check_area_element attrs collector =
41
(* area with href requires alt *)
42
-
if has_attr "href" attrs && not (has_attr "alt" attrs) then
43
Message_collector.add_typed collector
44
(`Attr (`Missing (`Elem "area", `Attr "alt")))
45
46
let check_input_element attrs collector =
47
-
match get_attr "type" attrs with
48
| Some "image" ->
49
(* input[type=image] requires alt *)
50
-
if not (has_attr "alt" attrs) then
51
Message_collector.add_typed collector
52
(`Attr (`Missing (`Elem "input", `Attr "alt")))
53
| Some "hidden" ->
54
(* input[type=hidden] should not have required attribute *)
55
-
if has_attr "required" attrs then
56
Message_collector.add_typed collector
57
(`Attr (`Not_allowed_when (`Attr "required", `Elem "input", `Condition "the type attribute is hidden")))
58
| Some "file" ->
59
(* input[type=file] should not have value attribute *)
60
-
if has_attr "value" attrs then
61
Message_collector.add_typed collector
62
(`Attr (`Not_allowed_when (`Attr "value", `Elem "input", `Condition "the type attribute is file")))
63
| _ -> ()
64
65
let check_script_element attrs _collector =
66
(* script requires src OR text content *)
67
-
if not (has_attr "src" attrs) then
68
(* We can't check for text content here; that would need to be done
69
in end_element or with state tracking *)
70
()
···
76
- http-equiv AND content
77
- property AND content (RDFa)
78
- itemprop AND content (microdata) *)
79
-
let has_charset = has_attr "charset" attrs in
80
-
let has_name = has_attr "name" attrs in
81
-
let has_content = has_attr "content" attrs in
82
-
let has_http_equiv = has_attr "http-equiv" attrs in
83
-
let has_property = has_attr "property" attrs in
84
-
let has_itemprop = has_attr "itemprop" attrs in
85
86
let valid =
87
has_charset
···
100
101
let check_link_element attrs collector =
102
(* link[rel="stylesheet"] requires href *)
103
-
match get_attr "rel" attrs with
104
| Some rel when String.equal rel "stylesheet" ->
105
-
if not (has_attr "href" attrs) then
106
Message_collector.add_typed collector (`Link `Missing_href)
107
| _ -> ()
108
109
let check_a_element attrs collector =
110
(* a[download] requires href *)
111
-
if has_attr "download" attrs && not (has_attr "href" attrs) then
112
Message_collector.add_typed collector
113
(`Attr (`Missing (`Elem "a", `Attr "href")))
114
115
let check_map_element attrs collector =
116
(* map requires name *)
117
-
if not (has_attr "name" attrs) then
118
Message_collector.add_typed collector
119
(`Attr (`Missing (`Elem "map", `Attr "name")))
120
121
let check_object_element attrs collector =
122
(* object requires data attribute (or type attribute alone is not sufficient) *)
123
-
let has_data = has_attr "data" attrs in
124
-
let has_type = has_attr "type" attrs in
125
if not has_data && has_type then
126
Message_collector.add_typed collector
127
(`Attr (`Missing (`Elem "object", `Attr "data")))
128
129
let check_popover_element element_name attrs collector =
130
(* popover attribute must have valid value *)
131
-
match get_attr "popover" attrs with
132
| Some value ->
133
let value_lower = String.lowercase_ascii value in
134
(* Valid values: empty string, auto, manual, hint *)
···
141
142
let check_meter_element attrs collector =
143
(* meter requires value attribute *)
144
-
if not (has_attr "value" attrs) then
145
Message_collector.add_typed collector
146
(`Attr (`Missing (`Elem "meter", `Attr "value")))
147
else begin
148
(* Validate min <= value constraint *)
149
-
match get_attr "value" attrs, get_attr "min" attrs with
150
| Some value_str, Some min_str ->
151
(try
152
let value = float_of_string value_str in
···
162
163
let check_progress_element attrs collector =
164
(* Validate progress value constraints *)
165
-
match get_attr "value" attrs with
166
| None -> () (* value is optional *)
167
| Some value_str ->
168
(try
169
let value = float_of_string value_str in
170
-
let max_val = match get_attr "max" attrs with
171
| None -> 1.0 (* default max is 1 *)
172
| Some max_str -> (try float_of_string max_str with _ -> 1.0)
173
in
174
if value > max_val then
175
let q s = "\xe2\x80\x9c" ^ s ^ "\xe2\x80\x9d" in
176
(* Check which message to use based on whether max is present *)
177
-
if has_attr "max" attrs then
178
Message_collector.add_typed collector
179
(`Generic (
180
(* Note: double space before "value" matches Nu validator quirk *)
···
198
| "link" -> check_link_element attrs collector
199
| "a" ->
200
check_a_element attrs collector;
201
-
if has_attr "href" attrs then state.in_a_with_href <- true
202
| "map" -> check_map_element attrs collector
203
| "object" -> check_object_element attrs collector
204
| "meter" -> check_meter_element attrs collector
···
206
| "figure" -> state._in_figure <- true
207
| _ ->
208
(* Check popover attribute on any element *)
209
-
if has_attr "popover" attrs then check_popover_element name attrs collector
210
211
let end_element state ~name ~namespace:_ _collector =
212
match name with
···
13
state._in_figure <- false;
14
state.in_a_with_href <- false
15
16
let check_img_element state attrs collector =
17
(* Check for required src OR srcset attribute *)
18
+
if not (Attr_utils.has_attr "src" attrs) && not (Attr_utils.has_attr "srcset" attrs) then
19
Message_collector.add_typed collector (`Img `Missing_src_or_srcset);
20
21
(* Check for alt attribute - always required *)
22
+
if not (Attr_utils.has_attr "alt" attrs) then
23
Message_collector.add_typed collector (`Img `Missing_alt);
24
25
(* Check ismap requires 'a' ancestor with href *)
26
+
if Attr_utils.has_attr "ismap" attrs && not state.in_a_with_href then
27
Message_collector.add_typed collector (`Img `Ismap_needs_href)
28
29
let check_area_element attrs collector =
30
(* area with href requires alt *)
31
+
if Attr_utils.has_attr "href" attrs && not (Attr_utils.has_attr "alt" attrs) then
32
Message_collector.add_typed collector
33
(`Attr (`Missing (`Elem "area", `Attr "alt")))
34
35
let check_input_element attrs collector =
36
+
match Attr_utils.get_attr "type" attrs with
37
| Some "image" ->
38
(* input[type=image] requires alt *)
39
+
if not (Attr_utils.has_attr "alt" attrs) then
40
Message_collector.add_typed collector
41
(`Attr (`Missing (`Elem "input", `Attr "alt")))
42
| Some "hidden" ->
43
(* input[type=hidden] should not have required attribute *)
44
+
if Attr_utils.has_attr "required" attrs then
45
Message_collector.add_typed collector
46
(`Attr (`Not_allowed_when (`Attr "required", `Elem "input", `Condition "the type attribute is hidden")))
47
| Some "file" ->
48
(* input[type=file] should not have value attribute *)
49
+
if Attr_utils.has_attr "value" attrs then
50
Message_collector.add_typed collector
51
(`Attr (`Not_allowed_when (`Attr "value", `Elem "input", `Condition "the type attribute is file")))
52
| _ -> ()
53
54
let check_script_element attrs _collector =
55
(* script requires src OR text content *)
56
+
if not (Attr_utils.has_attr "src" attrs) then
57
(* We can't check for text content here; that would need to be done
58
in end_element or with state tracking *)
59
()
···
65
- http-equiv AND content
66
- property AND content (RDFa)
67
- itemprop AND content (microdata) *)
68
+
let has_charset = Attr_utils.has_attr "charset" attrs in
69
+
let has_name = Attr_utils.has_attr "name" attrs in
70
+
let has_content = Attr_utils.has_attr "content" attrs in
71
+
let has_http_equiv = Attr_utils.has_attr "http-equiv" attrs in
72
+
let has_property = Attr_utils.has_attr "property" attrs in
73
+
let has_itemprop = Attr_utils.has_attr "itemprop" attrs in
74
75
let valid =
76
has_charset
···
89
90
let check_link_element attrs collector =
91
(* link[rel="stylesheet"] requires href *)
92
+
match Attr_utils.get_attr "rel" attrs with
93
| Some rel when String.equal rel "stylesheet" ->
94
+
if not (Attr_utils.has_attr "href" attrs) then
95
Message_collector.add_typed collector (`Link `Missing_href)
96
| _ -> ()
97
98
let check_a_element attrs collector =
99
(* a[download] requires href *)
100
+
if Attr_utils.has_attr "download" attrs && not (Attr_utils.has_attr "href" attrs) then
101
Message_collector.add_typed collector
102
(`Attr (`Missing (`Elem "a", `Attr "href")))
103
104
let check_map_element attrs collector =
105
(* map requires name *)
106
+
if not (Attr_utils.has_attr "name" attrs) then
107
Message_collector.add_typed collector
108
(`Attr (`Missing (`Elem "map", `Attr "name")))
109
110
let check_object_element attrs collector =
111
(* object requires data attribute (or type attribute alone is not sufficient) *)
112
+
let has_data = Attr_utils.has_attr "data" attrs in
113
+
let has_type = Attr_utils.has_attr "type" attrs in
114
if not has_data && has_type then
115
Message_collector.add_typed collector
116
(`Attr (`Missing (`Elem "object", `Attr "data")))
117
118
let check_popover_element element_name attrs collector =
119
(* popover attribute must have valid value *)
120
+
match Attr_utils.get_attr "popover" attrs with
121
| Some value ->
122
let value_lower = String.lowercase_ascii value in
123
(* Valid values: empty string, auto, manual, hint *)
···
130
131
let check_meter_element attrs collector =
132
(* meter requires value attribute *)
133
+
if not (Attr_utils.has_attr "value" attrs) then
134
Message_collector.add_typed collector
135
(`Attr (`Missing (`Elem "meter", `Attr "value")))
136
else begin
137
(* Validate min <= value constraint *)
138
+
match Attr_utils.get_attr "value" attrs, Attr_utils.get_attr "min" attrs with
139
| Some value_str, Some min_str ->
140
(try
141
let value = float_of_string value_str in
···
151
152
let check_progress_element attrs collector =
153
(* Validate progress value constraints *)
154
+
match Attr_utils.get_attr "value" attrs with
155
| None -> () (* value is optional *)
156
| Some value_str ->
157
(try
158
let value = float_of_string value_str in
159
+
let max_val = match Attr_utils.get_attr "max" attrs with
160
| None -> 1.0 (* default max is 1 *)
161
| Some max_str -> (try float_of_string max_str with _ -> 1.0)
162
in
163
if value > max_val then
164
let q s = "\xe2\x80\x9c" ^ s ^ "\xe2\x80\x9d" in
165
(* Check which message to use based on whether max is present *)
166
+
if Attr_utils.has_attr "max" attrs then
167
Message_collector.add_typed collector
168
(`Generic (
169
(* Note: double space before "value" matches Nu validator quirk *)
···
187
| "link" -> check_link_element attrs collector
188
| "a" ->
189
check_a_element attrs collector;
190
+
if Attr_utils.has_attr "href" attrs then state.in_a_with_href <- true
191
| "map" -> check_map_element attrs collector
192
| "object" -> check_object_element attrs collector
193
| "meter" -> check_meter_element attrs collector
···
195
| "figure" -> state._in_figure <- true
196
| _ ->
197
(* Check popover attribute on any element *)
198
+
if Attr_utils.has_attr "popover" attrs then check_popover_element name attrs collector
199
200
let end_element state ~name ~namespace:_ _collector =
201
match name with
+1
-1
lib/htmlrw_check/specialized/aria_checker.ml
+1
-1
lib/htmlrw_check/specialized/aria_checker.ml
···
491
if name_lower = "br" || name_lower = "wbr" then begin
492
List.iter (fun (attr_name, _) ->
493
let attr_lower = String.lowercase_ascii attr_name in
494
-
if String.length attr_lower > 5 && String.sub attr_lower 0 5 = "aria-" &&
495
attr_lower <> "aria-hidden" then
496
Message_collector.add_typed collector
497
(`Attr (`Not_allowed (`Attr attr_name, `Elem name)))
···
491
if name_lower = "br" || name_lower = "wbr" then begin
492
List.iter (fun (attr_name, _) ->
493
let attr_lower = String.lowercase_ascii attr_name in
494
+
if String.starts_with ~prefix:"aria-" attr_lower &&
495
attr_lower <> "aria-hidden" then
496
Message_collector.add_typed collector
497
(`Attr (`Not_allowed (`Attr attr_name, `Elem name)))
+116
-97
lib/htmlrw_check/specialized/attr_restrictions_checker.ml
+116
-97
lib/htmlrw_check/specialized/attr_restrictions_checker.ml
···
41
let create () = { is_xhtml = false }
42
let reset state = state.is_xhtml <- false
43
44
-
(** Check if an attribute list contains a specific attribute. *)
45
-
let has_attr name attrs =
46
-
List.exists (fun (attr_name, _) -> String.lowercase_ascii attr_name = name) attrs
47
-
48
-
(** Get an attribute value from the list. *)
49
-
let get_attr name attrs =
50
-
List.find_map (fun (attr_name, value) ->
51
-
if String.lowercase_ascii attr_name = name then Some value else None
52
-
) attrs
53
-
54
(** Input types that allow the list attribute. *)
55
let input_types_allowing_list = [
56
"color"; "date"; "datetime-local"; "email"; "month"; "number";
···
67
68
(* Detect XHTML mode from xmlns attribute on html element *)
69
if name_lower = "html" then begin
70
-
let xmlns_value = get_attr "xmlns" attrs in
71
-
match xmlns_value with
72
| Some "http://www.w3.org/1999/xhtml" -> state.is_xhtml <- true
73
| _ -> ()
74
end;
75
76
(* Check HTML element attribute restrictions *)
77
-
if namespace = None then begin
78
match List.assoc_opt name_lower disallowed_attrs_html with
79
| Some disallowed ->
80
List.iter (fun attr ->
81
-
if has_attr attr attrs then
82
report_disallowed_attr name_lower attr collector
83
) disallowed
84
-
| None -> ()
85
-
end;
86
87
(* Check for xml:base attribute - not allowed in HTML *)
88
-
if namespace = None && name_lower = "html" then begin
89
-
if has_attr "xml:base" attrs then
90
report_disallowed_attr name_lower "xml:base" collector
91
-
end;
92
93
(* Check for xmlns:* prefixed attributes - not allowed in HTML *)
94
(* Standard xmlns declarations are allowed but custom prefixes are not *)
95
-
if namespace = None then begin
96
List.iter (fun (attr_name, _) ->
97
let attr_lower = String.lowercase_ascii attr_name in
98
-
if String.length attr_lower > 6 && String.sub attr_lower 0 6 = "xmlns:" then begin
99
let prefix = String.sub attr_lower 6 (String.length attr_lower - 6) in
100
(* Only xmlns:xlink (with correct value) and xmlns:xml are allowed *)
101
if prefix <> "xlink" && prefix <> "xml" then
102
Message_collector.add_typed collector
103
(`Attr (`Not_allowed_here (`Attr attr_name)))
104
end
105
-
) attrs
106
-
end;
107
108
(* Check SVG element restrictions - works in both HTML-embedded and XHTML SVG *)
109
(* xml:id is never valid on SVG elements in HTML5 *)
110
if List.mem name_lower svg_no_xml_id then begin
111
-
if has_attr "xml:id" attrs then
112
report_disallowed_attr name_lower "xml:id" collector
113
end;
114
115
(* SVG feConvolveMatrix requires order attribute *)
116
if name_lower = "feconvolvematrix" then begin
117
-
if not (has_attr "order" attrs) then
118
Message_collector.add_typed collector
119
(`Svg (`Missing_attr (`Elem "feConvolveMatrix", `Attr "order")))
120
end;
121
122
(* Validate style type attribute - must be "text/css" or omitted *)
123
-
if namespace = None && name_lower = "style" then begin
124
List.iter (fun (attr_name, attr_value) ->
125
let attr_lower = String.lowercase_ascii attr_name in
126
if attr_lower = "type" then begin
···
129
Message_collector.add_typed collector (`Misc `Style_type_invalid)
130
end
131
) attrs
132
-
end;
133
134
(* Validate object element requires data or type attribute *)
135
-
if namespace = None && name_lower = "object" then begin
136
-
let has_data = has_attr "data" attrs in
137
-
let has_type = has_attr "type" attrs in
138
if not has_data && not has_type then
139
Message_collector.add_typed collector
140
(`Attr (`Missing (`Elem "object", `Attr "data")))
141
-
end;
142
143
(* Validate link imagesizes/imagesrcset attributes *)
144
-
if namespace = None && name_lower = "link" then begin
145
-
let has_imagesizes = has_attr "imagesizes" attrs in
146
-
let has_imagesrcset = has_attr "imagesrcset" attrs in
147
-
let rel_value = get_attr "rel" attrs in
148
-
let as_value = get_attr "as" attrs in
149
150
(* imagesizes requires imagesrcset *)
151
if has_imagesizes && not has_imagesrcset then
···
175
if not rel_is_preload then
176
Message_collector.add_typed collector (`Link `As_requires_preload)
177
| None -> ())
178
-
end;
179
180
(* Validate img usemap attribute - must be hash-name reference with content *)
181
-
if namespace = None && name_lower = "img" then begin
182
List.iter (fun (attr_name, attr_value) ->
183
let attr_lower = String.lowercase_ascii attr_name in
184
if attr_lower = "usemap" then begin
···
189
attr_value attr_name name))))
190
end
191
) attrs
192
-
end;
193
194
(* Validate embed type attribute - must be valid MIME type *)
195
-
if namespace = None && name_lower = "embed" then begin
196
List.iter (fun (attr_name, attr_value) ->
197
let attr_lower = String.lowercase_ascii attr_name in
198
if attr_lower = "type" then begin
···
205
attr_value attr_name name msg))))
206
end
207
) attrs
208
-
end;
209
210
(* Validate width/height on embed and img - must be non-negative integers *)
211
-
if namespace = None && (name_lower = "embed" || name_lower = "img" ||
212
-
name_lower = "video" || name_lower = "canvas" ||
213
-
name_lower = "iframe" || name_lower = "source") then begin
214
List.iter (fun (attr_name, attr_value) ->
215
let attr_lower = String.lowercase_ascii attr_name in
216
if attr_lower = "width" || attr_lower = "height" then begin
···
255
end
256
end
257
) attrs
258
-
end;
259
260
(* Validate area[shape=default] cannot have coords *)
261
-
if namespace = None && name_lower = "area" then begin
262
-
let shape_value = get_attr "shape" attrs in
263
-
match shape_value with
264
| Some s when String.lowercase_ascii (String.trim s) = "default" ->
265
-
if has_attr "coords" attrs then
266
Message_collector.add_typed collector
267
(`Attr (`Not_allowed (`Attr "coords", `Elem "area")))
268
-
| _ -> ()
269
-
end;
270
271
(* Validate bdo element requires dir attribute, and dir cannot be "auto" *)
272
-
if namespace = None && name_lower = "bdo" then begin
273
-
let dir_value = get_attr "dir" attrs in
274
-
match dir_value with
275
| None ->
276
Message_collector.add_typed collector (`Misc `Bdo_missing_dir)
277
| Some v when String.lowercase_ascii (String.trim v) = "auto" ->
278
Message_collector.add_typed collector (`Misc `Bdo_dir_auto)
279
-
| _ -> ()
280
-
end;
281
282
(* Validate input list attribute - only allowed for certain types *)
283
-
if namespace = None && name_lower = "input" then begin
284
-
if has_attr "list" attrs then begin
285
-
let input_type = match get_attr "type" attrs with
286
-
| Some t -> String.lowercase_ascii (String.trim t)
287
-
| None -> "text" (* default type is text *)
288
-
in
289
if not (List.mem input_type input_types_allowing_list) then
290
Message_collector.add_typed collector (`Input `List_not_allowed)
291
end
292
-
end;
293
294
(* Validate data-* attributes *)
295
-
if namespace = None then begin
296
List.iter (fun (attr_name, _) ->
297
let attr_lower = String.lowercase_ascii attr_name in
298
(* Check if it starts with "data-" *)
299
-
if String.length attr_lower >= 5 && String.sub attr_lower 0 5 = "data-" then begin
300
let after_prefix = String.sub attr_lower 5 (String.length attr_lower - 5) in
301
(* Check if it's exactly "data-" with nothing after *)
302
if after_prefix = "" then
···
306
Message_collector.add_typed collector
307
(`Attr (`Data_invalid_name (`Reason "must be XML 1.0 4th ed. plus Namespaces NCNames")))
308
end
309
-
) attrs
310
-
end;
311
312
(* Validate xml:lang must have matching lang attribute - only in HTML mode, not XHTML *)
313
-
if namespace = None && not state.is_xhtml then begin
314
-
let xmllang_value = get_attr "xml:lang" attrs in
315
-
let lang_value = get_attr "lang" attrs in
316
-
match xmllang_value with
317
| Some xmllang ->
318
(match lang_value with
319
| None ->
320
-
(* xml:lang without lang attribute *)
321
Message_collector.add_typed collector (`I18n `Xml_lang_without_lang)
322
| Some lang when String.lowercase_ascii lang <> String.lowercase_ascii xmllang ->
323
-
(* xml:lang and lang have different values - "lang present with same value" message *)
324
Message_collector.add_typed collector (`I18n `Xml_lang_without_lang)
325
| _ -> ())
326
-
| None -> ()
327
-
end;
328
329
(* Validate spellcheck attribute - must be "true" or "false" or empty *)
330
-
if namespace = None then begin
331
List.iter (fun (attr_name, attr_value) ->
332
let attr_lower = String.lowercase_ascii attr_name in
333
if attr_lower = "spellcheck" then begin
···
336
Message_collector.add_typed collector
337
(`Attr (`Bad_value (`Elem name, `Attr attr_name, `Value attr_value, `Reason "")))
338
end
339
-
) attrs
340
-
end;
341
342
(* Validate enterkeyhint attribute - must be one of specific values *)
343
-
if namespace = None then begin
344
let valid_enterkeyhint = ["enter"; "done"; "go"; "next"; "previous"; "search"; "send"] in
345
List.iter (fun (attr_name, attr_value) ->
346
let attr_lower = String.lowercase_ascii attr_name in
···
350
Message_collector.add_typed collector
351
(`Attr (`Bad_value (`Elem name, `Attr attr_name, `Value attr_value, `Reason "")))
352
end
353
-
) attrs
354
-
end;
355
356
(* Validate headingoffset attribute - must be a number between 0 and 8 *)
357
-
if namespace = None then begin
358
List.iter (fun (attr_name, attr_value) ->
359
let attr_lower = String.lowercase_ascii attr_name in
360
if attr_lower = "headingoffset" then begin
···
370
if not is_valid then
371
Message_collector.add_typed collector (`Misc `Headingoffset_invalid)
372
end
373
-
) attrs
374
-
end;
375
376
(* Validate accesskey attribute - each key label must be a single code point *)
377
-
if namespace = None then begin
378
List.iter (fun (attr_name, attr_value) ->
379
let attr_lower = String.lowercase_ascii attr_name in
380
if attr_lower = "accesskey" then begin
···
419
in
420
find_duplicates [] keys
421
end
422
-
) attrs
423
-
end;
424
425
(* Validate that command and popovertarget cannot have aria-expanded *)
426
-
if namespace = None && name_lower = "button" then begin
427
-
let has_command = has_attr "command" attrs in
428
-
let has_popovertarget = has_attr "popovertarget" attrs in
429
-
let has_aria_expanded = has_attr "aria-expanded" attrs in
430
431
if has_command && has_aria_expanded then
432
Message_collector.add_typed collector
···
437
Message_collector.add_typed collector
438
(`Attr (`Not_allowed_when (`Attr "aria-expanded", `Elem name,
439
`Condition "a \xe2\x80\x9cpopovertarget\xe2\x80\x9d attribute")))
440
-
end;
441
442
(* Note: data-* uppercase check requires XML parsing which preserves case.
443
The HTML5 parser normalizes attribute names to lowercase, so this check
···
446
ignore state.is_xhtml;
447
448
(* Validate media attribute on link, style, source elements *)
449
-
if namespace = None && (name_lower = "link" || name_lower = "style" || name_lower = "source") then begin
450
List.iter (fun (attr_name, attr_value) ->
451
let attr_lower = String.lowercase_ascii attr_name in
452
if attr_lower = "media" then begin
···
462
end
463
end
464
) attrs
465
-
end;
466
467
(* Validate RDFa prefix attribute - space-separated list of prefix:iri pairs *)
468
-
if namespace = None then begin
469
List.iter (fun (attr_name, attr_value) ->
470
let attr_lower = String.lowercase_ascii attr_name in
471
if attr_lower = "prefix" then begin
···
487
end
488
end
489
end
490
-
) attrs
491
-
end
492
493
let end_element _state ~name:_ ~namespace:_ _collector = ()
494
let characters _state _text _collector = ()
···
41
let create () = { is_xhtml = false }
42
let reset state = state.is_xhtml <- false
43
44
(** Input types that allow the list attribute. *)
45
let input_types_allowing_list = [
46
"color"; "date"; "datetime-local"; "email"; "month"; "number";
···
57
58
(* Detect XHTML mode from xmlns attribute on html element *)
59
if name_lower = "html" then begin
60
+
match Attr_utils.get_attr "xmlns" attrs with
61
| Some "http://www.w3.org/1999/xhtml" -> state.is_xhtml <- true
62
| _ -> ()
63
end;
64
65
(* Check HTML element attribute restrictions *)
66
+
(match namespace with
67
+
| Some _ -> ()
68
+
| None ->
69
match List.assoc_opt name_lower disallowed_attrs_html with
70
| Some disallowed ->
71
List.iter (fun attr ->
72
+
if Attr_utils.has_attr attr attrs then
73
report_disallowed_attr name_lower attr collector
74
) disallowed
75
+
| None -> ());
76
77
(* Check for xml:base attribute - not allowed in HTML *)
78
+
(match namespace with
79
+
| Some _ -> ()
80
+
| None when name_lower = "html" ->
81
+
if Attr_utils.has_attr "xml:base" attrs then
82
report_disallowed_attr name_lower "xml:base" collector
83
+
| None -> ());
84
85
(* Check for xmlns:* prefixed attributes - not allowed in HTML *)
86
(* Standard xmlns declarations are allowed but custom prefixes are not *)
87
+
(match namespace with
88
+
| Some _ -> ()
89
+
| None ->
90
List.iter (fun (attr_name, _) ->
91
let attr_lower = String.lowercase_ascii attr_name in
92
+
if String.starts_with ~prefix:"xmlns:" attr_lower then begin
93
let prefix = String.sub attr_lower 6 (String.length attr_lower - 6) in
94
(* Only xmlns:xlink (with correct value) and xmlns:xml are allowed *)
95
if prefix <> "xlink" && prefix <> "xml" then
96
Message_collector.add_typed collector
97
(`Attr (`Not_allowed_here (`Attr attr_name)))
98
end
99
+
) attrs);
100
101
(* Check SVG element restrictions - works in both HTML-embedded and XHTML SVG *)
102
(* xml:id is never valid on SVG elements in HTML5 *)
103
if List.mem name_lower svg_no_xml_id then begin
104
+
if Attr_utils.has_attr "xml:id" attrs then
105
report_disallowed_attr name_lower "xml:id" collector
106
end;
107
108
(* SVG feConvolveMatrix requires order attribute *)
109
if name_lower = "feconvolvematrix" then begin
110
+
if not (Attr_utils.has_attr "order" attrs) then
111
Message_collector.add_typed collector
112
(`Svg (`Missing_attr (`Elem "feConvolveMatrix", `Attr "order")))
113
end;
114
115
(* Validate style type attribute - must be "text/css" or omitted *)
116
+
(match namespace with
117
+
| Some _ -> ()
118
+
| None when name_lower = "style" ->
119
List.iter (fun (attr_name, attr_value) ->
120
let attr_lower = String.lowercase_ascii attr_name in
121
if attr_lower = "type" then begin
···
124
Message_collector.add_typed collector (`Misc `Style_type_invalid)
125
end
126
) attrs
127
+
| None -> ());
128
129
(* Validate object element requires data or type attribute *)
130
+
(match namespace with
131
+
| Some _ -> ()
132
+
| None when name_lower = "object" ->
133
+
let has_data = Attr_utils.has_attr "data" attrs in
134
+
let has_type = Attr_utils.has_attr "type" attrs in
135
if not has_data && not has_type then
136
Message_collector.add_typed collector
137
(`Attr (`Missing (`Elem "object", `Attr "data")))
138
+
| None -> ());
139
140
(* Validate link imagesizes/imagesrcset attributes *)
141
+
(match namespace with
142
+
| Some _ -> ()
143
+
| None when name_lower = "link" ->
144
+
let has_imagesizes = Attr_utils.has_attr "imagesizes" attrs in
145
+
let has_imagesrcset = Attr_utils.has_attr "imagesrcset" attrs in
146
+
let rel_value = Attr_utils.get_attr "rel" attrs in
147
+
let as_value = Attr_utils.get_attr "as" attrs in
148
149
(* imagesizes requires imagesrcset *)
150
if has_imagesizes && not has_imagesrcset then
···
174
if not rel_is_preload then
175
Message_collector.add_typed collector (`Link `As_requires_preload)
176
| None -> ())
177
+
| None -> ());
178
179
(* Validate img usemap attribute - must be hash-name reference with content *)
180
+
(match namespace with
181
+
| Some _ -> ()
182
+
| None when name_lower = "img" ->
183
List.iter (fun (attr_name, attr_value) ->
184
let attr_lower = String.lowercase_ascii attr_name in
185
if attr_lower = "usemap" then begin
···
190
attr_value attr_name name))))
191
end
192
) attrs
193
+
| None -> ());
194
195
(* Validate embed type attribute - must be valid MIME type *)
196
+
(match namespace with
197
+
| Some _ -> ()
198
+
| None when name_lower = "embed" ->
199
List.iter (fun (attr_name, attr_value) ->
200
let attr_lower = String.lowercase_ascii attr_name in
201
if attr_lower = "type" then begin
···
208
attr_value attr_name name msg))))
209
end
210
) attrs
211
+
| None -> ());
212
213
(* Validate width/height on embed and img - must be non-negative integers *)
214
+
let is_dimension_element = name_lower = "embed" || name_lower = "img" ||
215
+
name_lower = "video" || name_lower = "canvas" ||
216
+
name_lower = "iframe" || name_lower = "source" in
217
+
(match namespace with
218
+
| Some _ -> ()
219
+
| None when is_dimension_element ->
220
List.iter (fun (attr_name, attr_value) ->
221
let attr_lower = String.lowercase_ascii attr_name in
222
if attr_lower = "width" || attr_lower = "height" then begin
···
261
end
262
end
263
) attrs
264
+
| None -> ());
265
266
(* Validate area[shape=default] cannot have coords *)
267
+
(match namespace with
268
+
| Some _ -> ()
269
+
| None when name_lower = "area" ->
270
+
(match Attr_utils.get_attr "shape" attrs with
271
| Some s when String.lowercase_ascii (String.trim s) = "default" ->
272
+
if Attr_utils.has_attr "coords" attrs then
273
Message_collector.add_typed collector
274
(`Attr (`Not_allowed (`Attr "coords", `Elem "area")))
275
+
| _ -> ())
276
+
| None -> ());
277
278
(* Validate bdo element requires dir attribute, and dir cannot be "auto" *)
279
+
(match namespace with
280
+
| Some _ -> ()
281
+
| None when name_lower = "bdo" ->
282
+
(match Attr_utils.get_attr "dir" attrs with
283
| None ->
284
Message_collector.add_typed collector (`Misc `Bdo_missing_dir)
285
| Some v when String.lowercase_ascii (String.trim v) = "auto" ->
286
Message_collector.add_typed collector (`Misc `Bdo_dir_auto)
287
+
| _ -> ())
288
+
| None -> ());
289
290
(* Validate input list attribute - only allowed for certain types *)
291
+
(match namespace with
292
+
| Some _ -> ()
293
+
| None when name_lower = "input" ->
294
+
if Attr_utils.has_attr "list" attrs then begin
295
+
let input_type = Attr_utils.get_attr_or "type" ~default:"text" attrs
296
+
|> String.trim |> String.lowercase_ascii in
297
if not (List.mem input_type input_types_allowing_list) then
298
Message_collector.add_typed collector (`Input `List_not_allowed)
299
end
300
+
| None -> ());
301
302
(* Validate data-* attributes *)
303
+
(match namespace with
304
+
| Some _ -> ()
305
+
| None ->
306
List.iter (fun (attr_name, _) ->
307
let attr_lower = String.lowercase_ascii attr_name in
308
(* Check if it starts with "data-" *)
309
+
if String.starts_with ~prefix:"data-" attr_lower then begin
310
let after_prefix = String.sub attr_lower 5 (String.length attr_lower - 5) in
311
(* Check if it's exactly "data-" with nothing after *)
312
if after_prefix = "" then
···
316
Message_collector.add_typed collector
317
(`Attr (`Data_invalid_name (`Reason "must be XML 1.0 4th ed. plus Namespaces NCNames")))
318
end
319
+
) attrs);
320
321
(* Validate xml:lang must have matching lang attribute - only in HTML mode, not XHTML *)
322
+
(match namespace with
323
+
| Some _ -> ()
324
+
| None when not state.is_xhtml ->
325
+
let xmllang_value = Attr_utils.get_attr "xml:lang" attrs in
326
+
let lang_value = Attr_utils.get_attr "lang" attrs in
327
+
(match xmllang_value with
328
| Some xmllang ->
329
(match lang_value with
330
| None ->
331
Message_collector.add_typed collector (`I18n `Xml_lang_without_lang)
332
| Some lang when String.lowercase_ascii lang <> String.lowercase_ascii xmllang ->
333
Message_collector.add_typed collector (`I18n `Xml_lang_without_lang)
334
| _ -> ())
335
+
| None -> ())
336
+
| None -> ());
337
338
(* Validate spellcheck attribute - must be "true" or "false" or empty *)
339
+
(match namespace with
340
+
| Some _ -> ()
341
+
| None ->
342
List.iter (fun (attr_name, attr_value) ->
343
let attr_lower = String.lowercase_ascii attr_name in
344
if attr_lower = "spellcheck" then begin
···
347
Message_collector.add_typed collector
348
(`Attr (`Bad_value (`Elem name, `Attr attr_name, `Value attr_value, `Reason "")))
349
end
350
+
) attrs);
351
352
(* Validate enterkeyhint attribute - must be one of specific values *)
353
+
(match namespace with
354
+
| Some _ -> ()
355
+
| None ->
356
let valid_enterkeyhint = ["enter"; "done"; "go"; "next"; "previous"; "search"; "send"] in
357
List.iter (fun (attr_name, attr_value) ->
358
let attr_lower = String.lowercase_ascii attr_name in
···
362
Message_collector.add_typed collector
363
(`Attr (`Bad_value (`Elem name, `Attr attr_name, `Value attr_value, `Reason "")))
364
end
365
+
) attrs);
366
367
(* Validate headingoffset attribute - must be a number between 0 and 8 *)
368
+
(match namespace with
369
+
| Some _ -> ()
370
+
| None ->
371
List.iter (fun (attr_name, attr_value) ->
372
let attr_lower = String.lowercase_ascii attr_name in
373
if attr_lower = "headingoffset" then begin
···
383
if not is_valid then
384
Message_collector.add_typed collector (`Misc `Headingoffset_invalid)
385
end
386
+
) attrs);
387
388
(* Validate accesskey attribute - each key label must be a single code point *)
389
+
(match namespace with
390
+
| Some _ -> ()
391
+
| None ->
392
List.iter (fun (attr_name, attr_value) ->
393
let attr_lower = String.lowercase_ascii attr_name in
394
if attr_lower = "accesskey" then begin
···
433
in
434
find_duplicates [] keys
435
end
436
+
) attrs);
437
438
(* Validate that command and popovertarget cannot have aria-expanded *)
439
+
(match namespace with
440
+
| Some _ -> ()
441
+
| None when name_lower = "button" ->
442
+
let has_command = Attr_utils.has_attr "command" attrs in
443
+
let has_popovertarget = Attr_utils.has_attr "popovertarget" attrs in
444
+
let has_aria_expanded = Attr_utils.has_attr "aria-expanded" attrs in
445
446
if has_command && has_aria_expanded then
447
Message_collector.add_typed collector
···
452
Message_collector.add_typed collector
453
(`Attr (`Not_allowed_when (`Attr "aria-expanded", `Elem name,
454
`Condition "a \xe2\x80\x9cpopovertarget\xe2\x80\x9d attribute")))
455
+
| None -> ());
456
457
(* Note: data-* uppercase check requires XML parsing which preserves case.
458
The HTML5 parser normalizes attribute names to lowercase, so this check
···
461
ignore state.is_xhtml;
462
463
(* Validate media attribute on link, style, source elements *)
464
+
let is_media_element = name_lower = "link" || name_lower = "style" || name_lower = "source" in
465
+
(match namespace with
466
+
| Some _ -> ()
467
+
| None when is_media_element ->
468
List.iter (fun (attr_name, attr_value) ->
469
let attr_lower = String.lowercase_ascii attr_name in
470
if attr_lower = "media" then begin
···
480
end
481
end
482
) attrs
483
+
| None -> ());
484
485
(* Validate RDFa prefix attribute - space-separated list of prefix:iri pairs *)
486
+
(match namespace with
487
+
| Some _ -> ()
488
+
| None ->
489
List.iter (fun (attr_name, attr_value) ->
490
let attr_lower = String.lowercase_ascii attr_name in
491
if attr_lower = "prefix" then begin
···
507
end
508
end
509
end
510
+
) attrs)
511
512
let end_element _state ~name:_ ~namespace:_ _collector = ()
513
let characters _state _text _collector = ()
+5
-12
lib/htmlrw_check/specialized/base_checker.ml
+5
-12
lib/htmlrw_check/specialized/base_checker.ml
···
11
let reset state =
12
state.seen_link_or_script <- false
13
14
-
(** Check if an attribute list contains a specific attribute. *)
15
-
let has_attr name attrs =
16
-
List.exists (fun (attr_name, _) -> String.lowercase_ascii attr_name = name) attrs
17
-
18
let start_element state ~name ~namespace ~attrs collector =
19
-
if namespace <> None then ()
20
-
else begin
21
-
let name_lower = String.lowercase_ascii name in
22
-
match name_lower with
23
| "link" | "script" ->
24
state.seen_link_or_script <- true
25
| "base" ->
26
if state.seen_link_or_script then
27
Message_collector.add_typed collector (`Misc `Base_after_link_script);
28
(* base element must have href or target attribute *)
29
-
let has_href = has_attr "href" attrs in
30
-
let has_target = has_attr "target" attrs in
31
-
if not has_href && not has_target then
32
Message_collector.add_typed collector (`Misc `Base_missing_href_or_target)
33
| _ -> ()
34
-
end
35
36
let end_element _state ~name:_ ~namespace:_ _collector = ()
37
let characters _state _text _collector = ()
···
11
let reset state =
12
state.seen_link_or_script <- false
13
14
let start_element state ~name ~namespace ~attrs collector =
15
+
match namespace with
16
+
| Some _ -> ()
17
+
| None ->
18
+
match String.lowercase_ascii name with
19
| "link" | "script" ->
20
state.seen_link_or_script <- true
21
| "base" ->
22
if state.seen_link_or_script then
23
Message_collector.add_typed collector (`Misc `Base_after_link_script);
24
(* base element must have href or target attribute *)
25
+
if not (Attr_utils.has_attr "href" attrs || Attr_utils.has_attr "target" attrs) then
26
Message_collector.add_typed collector (`Misc `Base_missing_href_or_target)
27
| _ -> ()
28
29
let end_element _state ~name:_ ~namespace:_ _collector = ()
30
let characters _state _text _collector = ()
+1
-6
lib/htmlrw_check/specialized/dl_checker.ml
+1
-6
lib/htmlrw_check/specialized/dl_checker.ml
···
57
| ctx :: _ -> Some ctx
58
| [] -> None
59
60
-
let get_attr name attrs =
61
-
List.find_map (fun (n, v) ->
62
-
if String.lowercase_ascii n = name then Some v else None
63
-
) attrs
64
-
65
let start_element state ~name ~namespace ~attrs collector =
66
let name_lower = String.lowercase_ascii name in
67
···
115
Message_collector.add_typed collector
116
(`Element (`Not_allowed_as_child (`Child "div", `Parent "dl")));
117
(* Check that role is only presentation or none *)
118
-
(match get_attr "role" attrs with
119
| Some role_value ->
120
let role_lower = String.lowercase_ascii (String.trim role_value) in
121
if role_lower <> "presentation" && role_lower <> "none" then
···
57
| ctx :: _ -> Some ctx
58
| [] -> None
59
60
let start_element state ~name ~namespace ~attrs collector =
61
let name_lower = String.lowercase_ascii name in
62
···
110
Message_collector.add_typed collector
111
(`Element (`Not_allowed_as_child (`Child "div", `Parent "dl")));
112
(* Check that role is only presentation or none *)
113
+
(match Attr_utils.get_attr "role" attrs with
114
| Some role_value ->
115
let role_lower = String.lowercase_ascii (String.trim role_value) in
116
if role_lower <> "presentation" && role_lower <> "none" then
+42
-76
lib/htmlrw_check/specialized/picture_checker.ml
+42
-76
lib/htmlrw_check/specialized/picture_checker.ml
···
66
state.always_matching_is_media_all <- false;
67
state.always_matching_is_media_empty <- false
68
69
-
(** Check if an attribute list contains a specific attribute. *)
70
-
let has_attr name attrs =
71
-
List.exists (fun (attr_name, _) -> String.lowercase_ascii attr_name = name) attrs
72
-
73
(** Report disallowed attribute error *)
74
let report_disallowed_attr element attr collector =
75
Message_collector.add_typed collector
···
80
Message_collector.add_typed collector
81
(`Element (`Not_allowed_as_child (`Child child, `Parent parent)))
82
83
let check_picture_attrs attrs collector =
84
-
List.iter (fun disallowed ->
85
-
if has_attr disallowed attrs then
86
-
report_disallowed_attr "picture" disallowed collector
87
-
) disallowed_picture_attrs
88
89
let check_source_attrs_in_picture attrs collector =
90
-
List.iter (fun disallowed ->
91
-
if has_attr disallowed attrs then
92
-
report_disallowed_attr "source" disallowed collector
93
-
) disallowed_source_attrs_in_picture;
94
-
(* source in picture requires srcset *)
95
-
if not (has_attr "srcset" attrs) then
96
-
Message_collector.add_typed collector
97
-
(`Srcset `Source_missing_srcset)
98
99
let check_img_attrs attrs collector =
100
-
List.iter (fun disallowed ->
101
-
if has_attr disallowed attrs then
102
-
report_disallowed_attr "img" disallowed collector
103
-
) disallowed_img_attrs
104
105
let start_element state ~name ~namespace ~attrs collector =
106
let name_lower = String.lowercase_ascii name in
···
112
end;
113
114
(* Rest of checks only apply to HTML namespace elements *)
115
-
if namespace = None then begin
116
-
match name_lower with
117
| "picture" ->
118
(* Check if picture is in a disallowed parent context *)
119
(match state.parent_stack with
···
124
check_picture_attrs attrs collector;
125
state.in_picture <- true;
126
state.has_img_in_picture <- false;
127
-
state.picture_depth <- 0; (* Will be incremented to 1 at end of function *)
128
state.children_in_picture <- [];
129
state.last_was_img <- false;
130
state.has_source_after_img <- false;
···
136
state.children_in_picture <- "source" :: state.children_in_picture;
137
if state.last_was_img then
138
state.has_source_after_img <- true;
139
-
(* Check for always-matching source followed by another source *)
140
if state.has_always_matching_source then
141
state.source_after_always_matching <- true;
142
-
(* A source is "always matching" if it has:
143
-
- no media and no type attribute, OR
144
-
- media attribute with empty/whitespace-only value, OR
145
-
- media="all" (with optional whitespace) *)
146
-
let media_value = List.find_map (fun (attr_name, v) ->
147
-
if String.lowercase_ascii attr_name = "media" then Some v else None
148
-
) attrs in
149
-
let has_type = has_attr "type" attrs in
150
let is_media_all = match media_value with
151
| Some v -> String.lowercase_ascii (String.trim v) = "all"
152
-
| None -> false
153
-
in
154
let is_media_empty = match media_value with
155
| Some v -> String.trim v = ""
156
-
| None -> false
157
-
in
158
let is_always_matching = match media_value with
159
-
| None -> not has_type (* no media, check if no type either *)
160
| Some v ->
161
let trimmed = String.trim v in
162
trimmed = "" || String.lowercase_ascii trimmed = "all"
163
in
164
if is_always_matching then begin
165
state.has_always_matching_source <- true;
166
-
if is_media_all then
167
-
state.always_matching_is_media_all <- true
168
-
else if is_media_empty then
169
-
state.always_matching_is_media_empty <- true
170
end
171
172
| "img" when state.in_picture && state.picture_depth = 1 ->
···
174
state.has_img_in_picture <- true;
175
state.children_in_picture <- "img" :: state.children_in_picture;
176
state.last_was_img <- true;
177
-
(* Check for multiple img elements *)
178
-
let img_count = List.filter (fun c -> c = "img") state.children_in_picture |> List.length in
179
if img_count > 1 then
180
report_disallowed_child "picture" "img" collector;
181
-
(* Check if always-matching source is followed by img with srcset *)
182
-
if state.has_always_matching_source && has_attr "srcset" attrs then begin
183
-
if state.always_matching_is_media_all then
184
-
Message_collector.add_typed collector (`Misc `Media_all)
185
-
else if state.always_matching_is_media_empty then
186
-
Message_collector.add_typed collector (`Misc `Media_empty)
187
-
else
188
-
Message_collector.add_typed collector (`Srcset `Source_needs_media_or_type)
189
-
end
190
191
| "script" when state.in_picture && state.picture_depth = 1 ->
192
state.children_in_picture <- "script" :: state.children_in_picture
···
197
| "img" ->
198
check_img_attrs attrs collector
199
200
-
| _ -> ()
201
-
end;
202
203
(* Track depth when inside picture *)
204
if state.in_picture then
···
209
state.parent_stack <- name_lower :: state.parent_stack
210
211
let end_element state ~name ~namespace collector =
212
-
if namespace <> None then ()
213
-
else begin
214
let name_lower = String.lowercase_ascii name in
215
216
-
(* Track depth *)
217
if state.in_picture then
218
state.picture_depth <- state.picture_depth - 1;
219
220
if name_lower = "picture" && state.picture_depth = 0 then begin
221
-
(* Check if picture had img child *)
222
if not state.has_img_in_picture then
223
-
Message_collector.add_typed collector
224
-
(`Srcset `Picture_missing_img);
225
-
(* Check for source after img *)
226
if state.has_source_after_img then
227
report_disallowed_child "picture" "source" collector;
228
-
(* Check for source after always-matching source *)
229
-
if state.source_after_always_matching then begin
230
-
if state.always_matching_is_media_all then
231
-
Message_collector.add_typed collector (`Misc `Media_all)
232
-
else if state.always_matching_is_media_empty then
233
-
Message_collector.add_typed collector (`Misc `Media_empty)
234
-
else
235
-
Message_collector.add_typed collector (`Srcset `Source_needs_media_or_type)
236
-
end;
237
-
238
state.in_picture <- false
239
end;
240
241
-
(* Pop from parent stack *)
242
-
state.parent_stack <- (match state.parent_stack with
243
-
| _ :: rest -> rest
244
-
| [] -> [])
245
-
end
246
247
let characters state text collector =
248
(* Text in picture element is not allowed *)
···
66
state.always_matching_is_media_all <- false;
67
state.always_matching_is_media_empty <- false
68
69
(** Report disallowed attribute error *)
70
let report_disallowed_attr element attr collector =
71
Message_collector.add_typed collector
···
76
Message_collector.add_typed collector
77
(`Element (`Not_allowed_as_child (`Child child, `Parent parent)))
78
79
+
let check_disallowed_attrs element disallowed_list attrs collector =
80
+
List.iter (fun attr ->
81
+
if Attr_utils.has_attr attr attrs then
82
+
report_disallowed_attr element attr collector
83
+
) disallowed_list
84
+
85
let check_picture_attrs attrs collector =
86
+
check_disallowed_attrs "picture" disallowed_picture_attrs attrs collector
87
88
let check_source_attrs_in_picture attrs collector =
89
+
check_disallowed_attrs "source" disallowed_source_attrs_in_picture attrs collector;
90
+
if not (Attr_utils.has_attr "srcset" attrs) then
91
+
Message_collector.add_typed collector (`Srcset `Source_missing_srcset)
92
93
let check_img_attrs attrs collector =
94
+
check_disallowed_attrs "img" disallowed_img_attrs attrs collector
95
96
let start_element state ~name ~namespace ~attrs collector =
97
let name_lower = String.lowercase_ascii name in
···
103
end;
104
105
(* Rest of checks only apply to HTML namespace elements *)
106
+
match namespace with
107
+
| Some _ -> ()
108
+
| None ->
109
+
(match name_lower with
110
| "picture" ->
111
(* Check if picture is in a disallowed parent context *)
112
(match state.parent_stack with
···
117
check_picture_attrs attrs collector;
118
state.in_picture <- true;
119
state.has_img_in_picture <- false;
120
+
state.picture_depth <- 0;
121
state.children_in_picture <- [];
122
state.last_was_img <- false;
123
state.has_source_after_img <- false;
···
129
state.children_in_picture <- "source" :: state.children_in_picture;
130
if state.last_was_img then
131
state.has_source_after_img <- true;
132
if state.has_always_matching_source then
133
state.source_after_always_matching <- true;
134
+
(* A source is "always matching" if it has no media/type, or media="" or media="all" *)
135
+
let media_value = Attr_utils.get_attr "media" attrs in
136
+
let has_type = Attr_utils.has_attr "type" attrs in
137
let is_media_all = match media_value with
138
| Some v -> String.lowercase_ascii (String.trim v) = "all"
139
+
| None -> false in
140
let is_media_empty = match media_value with
141
| Some v -> String.trim v = ""
142
+
| None -> false in
143
let is_always_matching = match media_value with
144
+
| None -> not has_type
145
| Some v ->
146
let trimmed = String.trim v in
147
trimmed = "" || String.lowercase_ascii trimmed = "all"
148
in
149
if is_always_matching then begin
150
state.has_always_matching_source <- true;
151
+
(* Only set flags to true, never reset to false *)
152
+
if is_media_all then state.always_matching_is_media_all <- true;
153
+
if is_media_empty then state.always_matching_is_media_empty <- true
154
end
155
156
| "img" when state.in_picture && state.picture_depth = 1 ->
···
158
state.has_img_in_picture <- true;
159
state.children_in_picture <- "img" :: state.children_in_picture;
160
state.last_was_img <- true;
161
+
let img_count = List.length (List.filter (( = ) "img") state.children_in_picture) in
162
if img_count > 1 then
163
report_disallowed_child "picture" "img" collector;
164
+
if state.has_always_matching_source && Attr_utils.has_attr "srcset" attrs then
165
+
Message_collector.add_typed collector
166
+
(if state.always_matching_is_media_all then `Misc `Media_all
167
+
else if state.always_matching_is_media_empty then `Misc `Media_empty
168
+
else `Srcset `Source_needs_media_or_type)
169
170
| "script" when state.in_picture && state.picture_depth = 1 ->
171
state.children_in_picture <- "script" :: state.children_in_picture
···
176
| "img" ->
177
check_img_attrs attrs collector
178
179
+
| _ -> ());
180
181
(* Track depth when inside picture *)
182
if state.in_picture then
···
187
state.parent_stack <- name_lower :: state.parent_stack
188
189
let end_element state ~name ~namespace collector =
190
+
match namespace with
191
+
| Some _ -> ()
192
+
| None ->
193
let name_lower = String.lowercase_ascii name in
194
195
if state.in_picture then
196
state.picture_depth <- state.picture_depth - 1;
197
198
if name_lower = "picture" && state.picture_depth = 0 then begin
199
if not state.has_img_in_picture then
200
+
Message_collector.add_typed collector (`Srcset `Picture_missing_img);
201
if state.has_source_after_img then
202
report_disallowed_child "picture" "source" collector;
203
+
if state.source_after_always_matching then
204
+
Message_collector.add_typed collector
205
+
(if state.always_matching_is_media_all then `Misc `Media_all
206
+
else if state.always_matching_is_media_empty then `Misc `Media_empty
207
+
else `Srcset `Source_needs_media_or_type);
208
state.in_picture <- false
209
end;
210
211
+
state.parent_stack <- match state.parent_stack with _ :: rest -> rest | [] -> []
212
213
let characters state text collector =
214
(* Text in picture element is not allowed *)
+4
-8
lib/htmlrw_check/specialized/source_checker.ml
+4
-8
lib/htmlrw_check/specialized/source_checker.ml
···
23
| ctx :: _ -> ctx
24
| [] -> Other
25
26
-
(** Check if an attribute list contains a specific attribute. *)
27
-
let has_attr name attrs =
28
-
List.exists (fun (attr_name, _) -> String.lowercase_ascii attr_name = name) attrs
29
-
30
let start_element state ~name ~namespace ~attrs collector =
31
if namespace <> None then ()
32
else begin
···
42
let ctx = current_context state in
43
begin match ctx with
44
| Video | Audio ->
45
-
if has_attr "srcset" attrs then
46
Message_collector.add_typed collector
47
(`Attr (`Not_allowed (`Attr "srcset", `Elem "source")));
48
-
if has_attr "sizes" attrs then
49
Message_collector.add_typed collector
50
(`Attr (`Not_allowed (`Attr "sizes", `Elem "source")));
51
-
if has_attr "width" attrs then
52
Message_collector.add_typed collector
53
(`Attr (`Not_allowed (`Attr "width", `Elem "source")));
54
-
if has_attr "height" attrs then
55
Message_collector.add_typed collector
56
(`Attr (`Not_allowed (`Attr "height", `Elem "source")))
57
| Picture | Other -> ()
···
23
| ctx :: _ -> ctx
24
| [] -> Other
25
26
let start_element state ~name ~namespace ~attrs collector =
27
if namespace <> None then ()
28
else begin
···
38
let ctx = current_context state in
39
begin match ctx with
40
| Video | Audio ->
41
+
if Attr_utils.has_attr "srcset" attrs then
42
Message_collector.add_typed collector
43
(`Attr (`Not_allowed (`Attr "srcset", `Elem "source")));
44
+
if Attr_utils.has_attr "sizes" attrs then
45
Message_collector.add_typed collector
46
(`Attr (`Not_allowed (`Attr "sizes", `Elem "source")));
47
+
if Attr_utils.has_attr "width" attrs then
48
Message_collector.add_typed collector
49
(`Attr (`Not_allowed (`Attr "width", `Elem "source")));
50
+
if Attr_utils.has_attr "height" attrs then
51
Message_collector.add_typed collector
52
(`Attr (`Not_allowed (`Attr "height", `Elem "source")))
53
| Picture | Other -> ()
+3
-9
lib/htmlrw_check/specialized/srcset_sizes_checker.ml
+3
-9
lib/htmlrw_check/specialized/srcset_sizes_checker.ml
···
14
let create () = ()
15
let reset _state = ()
16
17
-
(** Get attribute value *)
18
-
let get_attr name attrs =
19
-
List.find_map (fun (n, v) ->
20
-
if String.lowercase_ascii n = name then Some v else None
21
-
) attrs
22
-
23
(** Split string on a character while respecting parentheses *)
24
let split_respecting_parens ~sep s =
25
let len = String.length s in
···
971
972
(* SVG image elements should not have srcset *)
973
if namespace <> None && name_lower = "image" then begin
974
-
if get_attr "srcset" attrs <> None then
975
Message_collector.add_typed collector
976
(`Attr (`Not_allowed (`Attr "srcset", `Elem "image")))
977
end;
···
980
else begin
981
(* Check sizes and srcset on img and source *)
982
if name_lower = "img" || name_lower = "source" then begin
983
-
let sizes_value = get_attr "sizes" attrs in
984
-
let srcset_value = get_attr "srcset" attrs in
985
let has_sizes = sizes_value <> None in
986
let has_srcset = srcset_value <> None in
987
···
14
let create () = ()
15
let reset _state = ()
16
17
(** Split string on a character while respecting parentheses *)
18
let split_respecting_parens ~sep s =
19
let len = String.length s in
···
965
966
(* SVG image elements should not have srcset *)
967
if namespace <> None && name_lower = "image" then begin
968
+
if Attr_utils.get_attr "srcset" attrs <> None then
969
Message_collector.add_typed collector
970
(`Attr (`Not_allowed (`Attr "srcset", `Elem "image")))
971
end;
···
974
else begin
975
(* Check sizes and srcset on img and source *)
976
if name_lower = "img" || name_lower = "source" then begin
977
+
let sizes_value = Attr_utils.get_attr "sizes" attrs in
978
+
let srcset_value = Attr_utils.get_attr "srcset" attrs in
979
let has_sizes = sizes_value <> None in
980
let has_srcset = srcset_value <> None in
981
+1
-2
lib/htmlrw_check/specialized/xhtml_content_checker.ml
+1
-2
lib/htmlrw_check/specialized/xhtml_content_checker.ml
···
46
(* Check if data-* attribute has uppercase characters *)
47
let check_data_attr_case attrs collector =
48
List.iter (fun (attr_name, _) ->
49
-
if String.length attr_name > 5 &&
50
-
String.sub attr_name 0 5 = "data-" then
51
let suffix = String.sub attr_name 5 (String.length attr_name - 5) in
52
if String.exists (fun c -> c >= 'A' && c <= 'Z') suffix then
53
Message_collector.add_typed collector (`Attr `Data_uppercase)
···
46
(* Check if data-* attribute has uppercase characters *)
47
let check_data_attr_case attrs collector =
48
List.iter (fun (attr_name, _) ->
49
+
if String.starts_with ~prefix:"data-" attr_name then
50
let suffix = String.sub attr_name 5 (String.length attr_name - 5) in
51
if String.exists (fun c -> c >= 'A' && c <= 'Z') suffix then
52
Message_collector.add_typed collector (`Attr `Data_uppercase)