OCaml HTML5 parser/serialiser based on Python's JustHTML

more

+19
lib/htmlrw_check/attr_utils.ml
···
··· 1 + (** Common attribute utilities used across checkers. *) 2 + 3 + type attrs = (string * string) list 4 + 5 + let has_attr name attrs = 6 + List.exists (fun (n, _) -> String.lowercase_ascii n = name) attrs 7 + 8 + let get_attr name attrs = 9 + List.find_map (fun (n, v) -> 10 + if String.lowercase_ascii n = name then Some v else None 11 + ) attrs 12 + 13 + let get_attr_or name ~default attrs = 14 + Option.value ~default (get_attr name attrs) 15 + 16 + let is_non_empty_attr name attrs = 17 + match get_attr name attrs with 18 + | Some v -> String.trim v <> "" 19 + | None -> false
+2 -8
lib/htmlrw_check/datatype/dt_color.ml
··· 213 if String.length s = 0 then Error "Color value must not be empty" 214 else if List.mem s named_colors then Ok () 215 else if String.length s > 0 && s.[0] = '#' then validate_hex_color s 216 - else if 217 - String.length s > 4 218 - && (String.sub s 0 4 = "rgb(" || String.sub s 0 5 = "rgba(") 219 - then 220 (* Basic validation for rgb/rgba - just check balanced parens *) 221 if s.[String.length s - 1] = ')' then Ok () 222 else Error "rgb/rgba function must end with ')'" 223 - else if 224 - String.length s > 4 225 - && (String.sub s 0 4 = "hsl(" || String.sub s 0 5 = "hsla(") 226 - then 227 (* Basic validation for hsl/hsla - just check balanced parens *) 228 if s.[String.length s - 1] = ')' then Ok () 229 else Error "hsl/hsla function must end with ')'"
··· 213 if String.length s = 0 then Error "Color value must not be empty" 214 else if List.mem s named_colors then Ok () 215 else if String.length s > 0 && s.[0] = '#' then validate_hex_color s 216 + else if String.starts_with ~prefix:"rgb(" s || String.starts_with ~prefix:"rgba(" s then 217 (* Basic validation for rgb/rgba - just check balanced parens *) 218 if s.[String.length s - 1] = ')' then Ok () 219 else Error "rgb/rgba function must end with ')'" 220 + else if String.starts_with ~prefix:"hsl(" s || String.starts_with ~prefix:"hsla(" s then 221 (* Basic validation for hsl/hsla - just check balanced parens *) 222 if s.[String.length s - 1] = ')' then Ok () 223 else Error "hsl/hsla function must end with ')'"
+2 -2
lib/htmlrw_check/datatype/dt_media_query.ml
··· 330 331 (* Get base feature name for error messages (strip min-/max- prefix) *) 332 let base_feature = 333 - if String.length feature > 4 && String.sub feature 0 4 = "min-" then 334 String.sub feature 4 (String.length feature - 4) 335 - else if String.length feature > 4 && String.sub feature 0 4 = "max-" then 336 String.sub feature 4 (String.length feature - 4) 337 else 338 feature
··· 330 331 (* Get base feature name for error messages (strip min-/max- prefix) *) 332 let base_feature = 333 + if String.starts_with ~prefix:"min-" feature then 334 String.sub feature 4 (String.length feature - 4) 335 + else if String.starts_with ~prefix:"max-" feature then 336 String.sub feature 4 (String.length feature - 4) 337 else 338 feature
+77 -102
lib/htmlrw_check/message_format.ml
··· 1 let format_text ?system_id messages = 2 let buf = Buffer.create 1024 in 3 - List.iter 4 - (fun msg -> 5 - let loc_str = 6 - match msg.Message.location with 7 - | Some loc -> ( 8 - let sid = 9 - match loc.Message.system_id with 10 - | Some s -> s 11 - | None -> ( 12 - match system_id with Some s -> s | None -> "input") 13 - in 14 - let col_info = 15 - match (loc.end_line, loc.end_column) with 16 - | Some el, Some ec when el = loc.line && ec > loc.column -> 17 - Printf.sprintf "%d.%d-%d" loc.line loc.column ec 18 - | Some el, Some ec when el > loc.line -> 19 - Printf.sprintf "%d.%d-%d.%d" loc.line loc.column el ec 20 - | _ -> Printf.sprintf "%d.%d" loc.line loc.column 21 - in 22 - Printf.sprintf "%s:%s" sid col_info) 23 - | None -> ( 24 - match system_id with Some s -> s | None -> "input") 25 - in 26 - let severity_str = Message.severity_to_string msg.Message.severity in 27 - let code_str = " [" ^ Message.error_code_to_string msg.Message.error_code ^ "]" in 28 - let elem_str = 29 - match msg.Message.element with 30 - | Some e -> " (element: " ^ e ^ ")" 31 - | None -> "" 32 - in 33 - let attr_str = 34 - match msg.Message.attribute with 35 - | Some a -> " (attribute: " ^ a ^ ")" 36 - | None -> "" 37 - in 38 - Buffer.add_string buf 39 - (Printf.sprintf "%s: %s%s: %s%s%s\n" loc_str severity_str code_str 40 - msg.Message.message elem_str attr_str)) 41 - messages; 42 Buffer.contents buf 43 44 let format_gnu ?system_id messages = 45 let buf = Buffer.create 1024 in 46 - List.iter 47 - (fun msg -> 48 - let loc_str = 49 - match msg.Message.location with 50 - | Some loc -> ( 51 - let sid = 52 - match loc.Message.system_id with 53 - | Some s -> s 54 - | None -> ( 55 - match system_id with Some s -> s | None -> "input") 56 - in 57 - Printf.sprintf "%s:%d:%d" sid loc.line loc.column) 58 - | None -> ( 59 - match system_id with Some s -> s ^ ":0:0" | None -> "input:0:0") 60 - in 61 - let severity_str = Message.severity_to_string msg.Message.severity in 62 - let code_str = " [" ^ Message.error_code_to_string msg.Message.error_code ^ "]" in 63 - Buffer.add_string buf 64 - (Printf.sprintf "%s: %s%s: %s\n" loc_str severity_str code_str 65 - msg.Message.message)) 66 - messages; 67 Buffer.contents buf 68 69 let message_to_json ?system_id msg = 70 let open Jsont in 71 - let severity = String (Message.severity_to_string msg.Message.severity, Meta.none) in 72 - let message_text = String (msg.Message.message, Meta.none) in 73 - let base = [ (("type", Meta.none), severity); (("message", Meta.none), message_text) ] in 74 - let with_code = 75 - (("subType", Meta.none), String (Message.error_code_to_string msg.Message.error_code, Meta.none)) :: base 76 - in 77 - let with_location = 78 - match msg.Message.location with 79 | Some loc -> 80 - let line = Number (float_of_int loc.Message.line, Meta.none) in 81 - let column = Number (float_of_int loc.Message.column, Meta.none) in 82 - let loc_fields = 83 - [ (("firstLine", Meta.none), line); (("firstColumn", Meta.none), column) ] 84 - in 85 - let loc_fields = 86 - match loc.Message.end_line with 87 - | Some el -> 88 - (("lastLine", Meta.none), Number (float_of_int el, Meta.none)) :: loc_fields 89 - | None -> loc_fields 90 - in 91 - let loc_fields = 92 - match loc.Message.end_column with 93 - | Some ec -> 94 - (("lastColumn", Meta.none), Number (float_of_int ec, Meta.none)) 95 - :: loc_fields 96 - | None -> loc_fields 97 - in 98 - let url = 99 - match loc.Message.system_id with 100 - | Some s -> s 101 - | None -> ( 102 - match system_id with Some s -> s | None -> "input") 103 - in 104 - (("url", Meta.none), String (url, Meta.none)) :: loc_fields @ with_code 105 | None -> 106 - let url = 107 - match system_id with Some s -> s | None -> "input" 108 - in 109 - (("url", Meta.none), String (url, Meta.none)) :: with_code 110 in 111 - let with_extract = 112 - match msg.Message.extract with 113 - | Some e -> (("extract", Meta.none), String (e, Meta.none)) :: with_location 114 - | None -> with_location 115 - in 116 Object (with_extract, Meta.none) 117 118 let format_json ?system_id messages =
··· 1 + (** Get effective system_id, preferring location's system_id over the passed one *) 2 + let get_system_id ?system_id loc_system_id = 3 + loc_system_id 4 + |> Option.fold ~none:system_id ~some:Option.some 5 + |> Option.value ~default:"input" 6 + 7 let format_text ?system_id messages = 8 let buf = Buffer.create 1024 in 9 + List.iter (fun msg -> 10 + let loc_str = match msg.Message.location with 11 + | Some loc -> 12 + let sid = get_system_id ?system_id loc.Message.system_id in 13 + let col_info = match loc.end_line, loc.end_column with 14 + | Some el, Some ec when el = loc.line && ec > loc.column -> 15 + Printf.sprintf "%d.%d-%d" loc.line loc.column ec 16 + | Some el, Some ec when el > loc.line -> 17 + Printf.sprintf "%d.%d-%d.%d" loc.line loc.column el ec 18 + | _ -> 19 + Printf.sprintf "%d.%d" loc.line loc.column 20 + in 21 + Printf.sprintf "%s:%s" sid col_info 22 + | None -> 23 + Option.value system_id ~default:"input" 24 + in 25 + let elem_str = Option.fold ~none:"" ~some:(Printf.sprintf " (element: %s)") msg.Message.element in 26 + let attr_str = Option.fold ~none:"" ~some:(Printf.sprintf " (attribute: %s)") msg.Message.attribute in 27 + Buffer.add_string buf (Printf.sprintf "%s: %s [%s]: %s%s%s\n" 28 + loc_str 29 + (Message.severity_to_string msg.Message.severity) 30 + (Message.error_code_to_string msg.Message.error_code) 31 + msg.Message.message 32 + elem_str 33 + attr_str) 34 + ) messages; 35 Buffer.contents buf 36 37 let format_gnu ?system_id messages = 38 let buf = Buffer.create 1024 in 39 + List.iter (fun msg -> 40 + let loc_str = match msg.Message.location with 41 + | Some loc -> 42 + Printf.sprintf "%s:%d:%d" 43 + (get_system_id ?system_id loc.Message.system_id) 44 + loc.line loc.column 45 + | None -> 46 + Option.value system_id ~default:"input" ^ ":0:0" 47 + in 48 + Buffer.add_string buf (Printf.sprintf "%s: %s [%s]: %s\n" 49 + loc_str 50 + (Message.severity_to_string msg.Message.severity) 51 + (Message.error_code_to_string msg.Message.error_code) 52 + msg.Message.message) 53 + ) messages; 54 Buffer.contents buf 55 56 let message_to_json ?system_id msg = 57 let open Jsont in 58 + let str s = String (s, Meta.none) in 59 + let num n = Number (float_of_int n, Meta.none) in 60 + let field name value = ((name, Meta.none), value) in 61 + 62 + let base = [ 63 + field "type" (str (Message.severity_to_string msg.Message.severity)); 64 + field "message" (str msg.Message.message); 65 + field "subType" (str (Message.error_code_to_string msg.Message.error_code)); 66 + ] in 67 + 68 + let with_location = match msg.Message.location with 69 | Some loc -> 70 + let url = get_system_id ?system_id loc.Message.system_id in 71 + let loc_fields = [ 72 + field "url" (str url); 73 + field "firstLine" (num loc.line); 74 + field "firstColumn" (num loc.column); 75 + ] in 76 + let loc_fields = Option.fold ~none:loc_fields 77 + ~some:(fun el -> field "lastLine" (num el) :: loc_fields) 78 + loc.Message.end_line in 79 + let loc_fields = Option.fold ~none:loc_fields 80 + ~some:(fun ec -> field "lastColumn" (num ec) :: loc_fields) 81 + loc.Message.end_column in 82 + loc_fields @ base 83 | None -> 84 + field "url" (str (Option.value system_id ~default:"input")) :: base 85 in 86 + 87 + let with_extract = Option.fold ~none:with_location 88 + ~some:(fun e -> field "extract" (str e) :: with_location) 89 + msg.Message.extract in 90 + 91 Object (with_extract, Meta.none) 92 93 let format_json ?system_id messages =
+10 -10
lib/htmlrw_check/parse_error_bridge.ml
··· 14 | Html5rw.Parse_error_code.Tree_construction_error s -> 15 (* Check for control-character/noncharacter/surrogate with codepoint info *) 16 (try 17 - if String.length s > 28 && String.sub s 0 28 = "control-character-in-input-s" then 18 let colon_pos = String.index s ':' in 19 let cp_str = String.sub s (colon_pos + 1) (String.length s - colon_pos - 1) in 20 let cp = int_of_string ("0x" ^ cp_str) in 21 Printf.sprintf "Forbidden code point U+%04x." cp 22 - else if String.length s > 25 && String.sub s 0 25 = "noncharacter-in-input-str" then 23 let colon_pos = String.index s ':' in 24 let cp_str = String.sub s (colon_pos + 1) (String.length s - colon_pos - 1) in 25 let cp = int_of_string ("0x" ^ cp_str) in 26 Printf.sprintf "Forbidden code point U+%04x." cp 27 - else if String.length s > 22 && String.sub s 0 22 = "surrogate-in-input-str" then 28 let colon_pos = String.index s ':' in 29 let cp_str = String.sub s (colon_pos + 1) (String.length s - colon_pos - 1) in 30 let cp = int_of_string ("0x" ^ cp_str) in 31 Printf.sprintf "Forbidden code point U+%04x." cp 32 (* Character reference errors *) 33 - else if String.length s > 28 && String.sub s 0 28 = "control-character-reference:" then 34 let cp_str = String.sub s 28 (String.length s - 28) in 35 let cp = int_of_string ("0x" ^ cp_str) in 36 if cp = 0x0D then 37 "A numeric character reference expanded to carriage return." 38 else 39 Printf.sprintf "Character reference expands to a control character (U+%04x)." cp 40 - else if String.length s > 31 && String.sub s 0 31 = "noncharacter-character-referenc" then 41 let colon_pos = String.index s ':' in 42 let cp_str = String.sub s (colon_pos + 1) (String.length s - colon_pos - 1) in 43 let cp = int_of_string ("0x" ^ cp_str) in ··· 49 Printf.sprintf "Character reference expands to an astral non-character (U+%05x)." cp 50 else 51 Printf.sprintf "Character reference expands to a non-character (U+%04x)." cp 52 - else if String.length s > 36 && String.sub s 0 36 = "character-reference-outside-unicode-" then 53 "Character reference outside the permissible Unicode range." 54 - else if String.length s > 27 && String.sub s 0 27 = "surrogate-character-referen" then 55 let colon_pos = String.index s ':' in 56 let cp_str = String.sub s (colon_pos + 1) (String.length s - colon_pos - 1) in 57 let cp = int_of_string ("0x" ^ cp_str) in ··· 64 "End tag \xe2\x80\x9cbr\xe2\x80\x9d." 65 else if s = "expected-closing-tag-but-got-eof" then 66 "End of file seen and there were open elements." 67 - else if String.length s > 28 && String.sub s 0 28 = "bad-start-tag-in-head-noscri" then 68 let colon_pos = String.index s ':' in 69 let element = String.sub s (colon_pos + 1) (String.length s - colon_pos - 1) in 70 Printf.sprintf "Bad start tag in \xe2\x80\x9c%s\xe2\x80\x9d in \xe2\x80\x9cnoscript\xe2\x80\x9d in \xe2\x80\x9chead\xe2\x80\x9d." element 71 - else if String.length s > 19 && String.sub s 0 19 = "unexpected-end-tag:" then 72 let element = String.sub s 19 (String.length s - 19) in 73 Printf.sprintf "Stray end tag \xe2\x80\x9c%s\xe2\x80\x9d." element 74 - else if String.length s > 19 && String.sub s 0 19 = "start-tag-in-table:" then 75 let tag = String.sub s 19 (String.length s - 19) in 76 Printf.sprintf "Start tag \xe2\x80\x9c%s\xe2\x80\x9d seen in \xe2\x80\x9ctable\xe2\x80\x9d." tag 77 else
··· 14 | Html5rw.Parse_error_code.Tree_construction_error s -> 15 (* Check for control-character/noncharacter/surrogate with codepoint info *) 16 (try 17 + if String.starts_with ~prefix:"control-character-in-input-s" s then 18 let colon_pos = String.index s ':' in 19 let cp_str = String.sub s (colon_pos + 1) (String.length s - colon_pos - 1) in 20 let cp = int_of_string ("0x" ^ cp_str) in 21 Printf.sprintf "Forbidden code point U+%04x." cp 22 + else if String.starts_with ~prefix:"noncharacter-in-input-str" s then 23 let colon_pos = String.index s ':' in 24 let cp_str = String.sub s (colon_pos + 1) (String.length s - colon_pos - 1) in 25 let cp = int_of_string ("0x" ^ cp_str) in 26 Printf.sprintf "Forbidden code point U+%04x." cp 27 + else if String.starts_with ~prefix:"surrogate-in-input-str" s then 28 let colon_pos = String.index s ':' in 29 let cp_str = String.sub s (colon_pos + 1) (String.length s - colon_pos - 1) in 30 let cp = int_of_string ("0x" ^ cp_str) in 31 Printf.sprintf "Forbidden code point U+%04x." cp 32 (* Character reference errors *) 33 + else if String.starts_with ~prefix:"control-character-reference:" s then 34 let cp_str = String.sub s 28 (String.length s - 28) in 35 let cp = int_of_string ("0x" ^ cp_str) in 36 if cp = 0x0D then 37 "A numeric character reference expanded to carriage return." 38 else 39 Printf.sprintf "Character reference expands to a control character (U+%04x)." cp 40 + else if String.starts_with ~prefix:"noncharacter-character-referenc" s then 41 let colon_pos = String.index s ':' in 42 let cp_str = String.sub s (colon_pos + 1) (String.length s - colon_pos - 1) in 43 let cp = int_of_string ("0x" ^ cp_str) in ··· 49 Printf.sprintf "Character reference expands to an astral non-character (U+%05x)." cp 50 else 51 Printf.sprintf "Character reference expands to a non-character (U+%04x)." cp 52 + else if String.starts_with ~prefix:"character-reference-outside-unicode-" s then 53 "Character reference outside the permissible Unicode range." 54 + else if String.starts_with ~prefix:"surrogate-character-referen" s then 55 let colon_pos = String.index s ':' in 56 let cp_str = String.sub s (colon_pos + 1) (String.length s - colon_pos - 1) in 57 let cp = int_of_string ("0x" ^ cp_str) in ··· 64 "End tag \xe2\x80\x9cbr\xe2\x80\x9d." 65 else if s = "expected-closing-tag-but-got-eof" then 66 "End of file seen and there were open elements." 67 + else if String.starts_with ~prefix:"bad-start-tag-in-head-noscri" s then 68 let colon_pos = String.index s ':' in 69 let element = String.sub s (colon_pos + 1) (String.length s - colon_pos - 1) in 70 Printf.sprintf "Bad start tag in \xe2\x80\x9c%s\xe2\x80\x9d in \xe2\x80\x9cnoscript\xe2\x80\x9d in \xe2\x80\x9chead\xe2\x80\x9d." element 71 + else if String.starts_with ~prefix:"unexpected-end-tag:" s then 72 let element = String.sub s 19 (String.length s - 19) in 73 Printf.sprintf "Stray end tag \xe2\x80\x9c%s\xe2\x80\x9d." element 74 + else if String.starts_with ~prefix:"start-tag-in-table:" s then 75 let tag = String.sub s 19 (String.length s - 19) in 76 Printf.sprintf "Start tag \xe2\x80\x9c%s\xe2\x80\x9d seen in \xe2\x80\x9ctable\xe2\x80\x9d." tag 77 else
+17 -39
lib/htmlrw_check/semantic/autofocus_checker.ml
··· 3 Validates that only one element with autofocus attribute exists within 4 each dialog or popover context. *) 5 6 - (** Context for tracking autofocus elements. *) 7 type context_type = Dialog | Popover 8 9 type context = { ··· 26 state.context_stack <- []; 27 state.current_depth <- 0 28 29 - (** Check if an attribute list contains a specific attribute. *) 30 - let has_attr name attrs = 31 - List.exists (fun (attr_name, _) -> String.lowercase_ascii attr_name = name) attrs 32 - 33 - (** Get an attribute value from the list. *) 34 - let get_attr name attrs = 35 - List.find_map (fun (attr_name, value) -> 36 - if String.lowercase_ascii attr_name = name then Some value else None 37 - ) attrs 38 - 39 - (** Check if element has popover attribute. *) 40 - let has_popover attrs = 41 - List.exists (fun (attr_name, _) -> 42 - String.lowercase_ascii attr_name = "popover" 43 - ) attrs 44 - 45 let start_element state ~name ~namespace ~attrs collector = 46 - let name_lower = String.lowercase_ascii name in 47 - 48 - (* Track depth *) 49 state.current_depth <- state.current_depth + 1; 50 51 - if namespace = None then begin 52 (* Check if we're entering a dialog or popover context *) 53 - let enters_context = 54 - if name_lower = "dialog" then Some Dialog 55 - else if has_popover attrs then Some Popover 56 - else None 57 in 58 59 - (match enters_context with 60 - | Some ctx_type -> 61 let ctx = { context_type = ctx_type; autofocus_count = 0; depth = state.current_depth } in 62 state.context_stack <- ctx :: state.context_stack 63 - | None -> ()); 64 65 (* Check for autofocus attribute *) 66 - if has_attr "autofocus" attrs then begin 67 - (* Increment count in innermost context if any *) 68 match state.context_stack with 69 | ctx :: _ -> 70 ctx.autofocus_count <- ctx.autofocus_count + 1; 71 if ctx.autofocus_count > 1 then 72 Message_collector.add_typed collector (`Misc `Multiple_autofocus) 73 | [] -> () 74 - end 75 - end 76 77 let end_element state ~name ~namespace _collector = 78 - let name_lower = String.lowercase_ascii name in 79 - 80 - if namespace = None then begin 81 - (* Pop context if we're leaving one *) 82 match state.context_stack with 83 | ctx :: rest when ctx.depth = state.current_depth -> 84 - (* Verify this is the right element *) 85 let matches = 86 (name_lower = "dialog" && ctx.context_type = Dialog) || 87 (ctx.context_type = Popover) 88 in 89 if matches then state.context_stack <- rest 90 - | _ -> () 91 - end; 92 93 state.current_depth <- state.current_depth - 1 94
··· 3 Validates that only one element with autofocus attribute exists within 4 each dialog or popover context. *) 5 6 type context_type = Dialog | Popover 7 8 type context = { ··· 25 state.context_stack <- []; 26 state.current_depth <- 0 27 28 let start_element state ~name ~namespace ~attrs collector = 29 state.current_depth <- state.current_depth + 1; 30 31 + match namespace with 32 + | Some _ -> () 33 + | None -> 34 + let name_lower = String.lowercase_ascii name in 35 + 36 (* Check if we're entering a dialog or popover context *) 37 + let enters_context = match name_lower with 38 + | "dialog" -> Some Dialog 39 + | _ when Attr_utils.has_attr "popover" attrs -> Some Popover 40 + | _ -> None 41 in 42 43 + Option.iter (fun ctx_type -> 44 let ctx = { context_type = ctx_type; autofocus_count = 0; depth = state.current_depth } in 45 state.context_stack <- ctx :: state.context_stack 46 + ) enters_context; 47 48 (* Check for autofocus attribute *) 49 + if Attr_utils.has_attr "autofocus" attrs then 50 match state.context_stack with 51 | ctx :: _ -> 52 ctx.autofocus_count <- ctx.autofocus_count + 1; 53 if ctx.autofocus_count > 1 then 54 Message_collector.add_typed collector (`Misc `Multiple_autofocus) 55 | [] -> () 56 57 let end_element state ~name ~namespace _collector = 58 + (match namespace with 59 + | Some _ -> () 60 + | None -> 61 + let name_lower = String.lowercase_ascii name in 62 match state.context_stack with 63 | ctx :: rest when ctx.depth = state.current_depth -> 64 let matches = 65 (name_lower = "dialog" && ctx.context_type = Dialog) || 66 (ctx.context_type = Popover) 67 in 68 if matches then state.context_stack <- rest 69 + | _ -> ()); 70 71 state.current_depth <- state.current_depth - 1 72
+1 -8
lib/htmlrw_check/semantic/form_checker.ml
··· 10 11 let reset _state = () 12 13 - (** Get the value of an attribute if present. *) 14 - let get_attr name attrs = 15 - List.find_map 16 - (fun (attr_name, value) -> 17 - if String.equal attr_name name then Some value else None) 18 - attrs 19 - 20 (** Check if autocomplete value contains webauthn token *) 21 let contains_webauthn value = 22 let lower = String.lowercase_ascii value in ··· 42 (* Check autocomplete attribute on form elements *) 43 match name with 44 | "input" | "select" | "textarea" -> 45 - (match get_attr "autocomplete" attrs with 46 | Some autocomplete_value -> 47 check_autocomplete_value autocomplete_value name collector 48 | None -> ())
··· 10 11 let reset _state = () 12 13 (** Check if autocomplete value contains webauthn token *) 14 let contains_webauthn value = 15 let lower = String.lowercase_ascii value in ··· 35 (* Check autocomplete attribute on form elements *) 36 match name with 37 | "input" | "select" | "textarea" -> 38 + (match Attr_utils.get_attr "autocomplete" attrs with 39 | Some autocomplete_value -> 40 check_autocomplete_value autocomplete_value name collector 41 | None -> ())
+3 -8
lib/htmlrw_check/semantic/lang_detecting_checker.ml
··· 60 let n = String.lowercase_ascii name in 61 n = "svg" || n = "math" 62 63 - let get_attr name attrs = 64 - List.find_map (fun (n, v) -> 65 - if String.lowercase_ascii n = name then Some v else None 66 - ) attrs 67 - 68 let get_lang_code lang = 69 (* Extract primary language subtag *) 70 match String.split_on_char '-' lang with ··· 226 let ns = Option.value namespace ~default:"" in 227 228 if name_lower = "html" then begin 229 - state.html_lang <- get_attr "lang" attrs; 230 - state.html_dir <- get_attr "dir" attrs; 231 (* TODO: get line/column from locator *) 232 state.html_locator <- Some (1, 1) 233 end ··· 244 state.skip_depth <- state.skip_depth + 1 245 else begin 246 (* Check for different lang attribute *) 247 - match get_attr "lang" attrs with 248 | Some lang when state.html_lang <> Some lang -> 249 state.skip_depth <- state.skip_depth + 1 250 | _ -> ()
··· 60 let n = String.lowercase_ascii name in 61 n = "svg" || n = "math" 62 63 let get_lang_code lang = 64 (* Extract primary language subtag *) 65 match String.split_on_char '-' lang with ··· 221 let ns = Option.value namespace ~default:"" in 222 223 if name_lower = "html" then begin 224 + state.html_lang <- Attr_utils.get_attr "lang" attrs; 225 + state.html_dir <- Attr_utils.get_attr "dir" attrs; 226 (* TODO: get line/column from locator *) 227 state.html_locator <- Some (1, 1) 228 end ··· 239 state.skip_depth <- state.skip_depth + 1 240 else begin 241 (* Check for different lang attribute *) 242 + match Attr_utils.get_attr "lang" attrs with 243 | Some lang when state.html_lang <> Some lang -> 244 state.skip_depth <- state.skip_depth + 1 245 | _ -> ()
+1 -7
lib/htmlrw_check/semantic/option_checker.ml
··· 22 state.option_stack <- []; 23 state.in_template <- 0 24 25 - (** Get attribute value if present. *) 26 - let get_attr name attrs = 27 - List.find_map (fun (attr_name, value) -> 28 - if String.lowercase_ascii attr_name = name then Some value else None 29 - ) attrs 30 - 31 let start_element state ~name ~namespace ~attrs collector = 32 let name_lower = String.lowercase_ascii name in 33 ··· 36 if name_lower = "template" then 37 state.in_template <- state.in_template + 1 38 else if state.in_template = 0 && name_lower = "option" then begin 39 - let label_opt = get_attr "label" attrs in 40 let has_label = label_opt <> None in 41 let label_empty = match label_opt with 42 | Some v -> String.trim v = ""
··· 22 state.option_stack <- []; 23 state.in_template <- 0 24 25 let start_element state ~name ~namespace ~attrs collector = 26 let name_lower = String.lowercase_ascii name in 27 ··· 30 if name_lower = "template" then 31 state.in_template <- state.in_template + 1 32 else if state.in_template = 0 && name_lower = "option" then begin 33 + let label_opt = Attr_utils.get_attr "label" attrs in 34 let has_label = label_opt <> None in 35 let label_empty = match label_opt with 36 | Some v -> String.trim v = ""
+29 -40
lib/htmlrw_check/semantic/required_attr_checker.ml
··· 13 state._in_figure <- false; 14 state.in_a_with_href <- false 15 16 - (** Check if an attribute list contains a specific attribute. *) 17 - let has_attr name attrs = 18 - List.exists (fun (attr_name, _) -> String.equal attr_name name) attrs 19 - 20 - (** Get the value of an attribute if present. *) 21 - let get_attr name attrs = 22 - List.find_map 23 - (fun (attr_name, value) -> 24 - if String.equal attr_name name then Some value else None) 25 - attrs 26 - 27 let check_img_element state attrs collector = 28 (* Check for required src OR srcset attribute *) 29 - if not (has_attr "src" attrs) && not (has_attr "srcset" attrs) then 30 Message_collector.add_typed collector (`Img `Missing_src_or_srcset); 31 32 (* Check for alt attribute - always required *) 33 - if not (has_attr "alt" attrs) then 34 Message_collector.add_typed collector (`Img `Missing_alt); 35 36 (* Check ismap requires 'a' ancestor with href *) 37 - if has_attr "ismap" attrs && not state.in_a_with_href then 38 Message_collector.add_typed collector (`Img `Ismap_needs_href) 39 40 let check_area_element attrs collector = 41 (* area with href requires alt *) 42 - if has_attr "href" attrs && not (has_attr "alt" attrs) then 43 Message_collector.add_typed collector 44 (`Attr (`Missing (`Elem "area", `Attr "alt"))) 45 46 let check_input_element attrs collector = 47 - match get_attr "type" attrs with 48 | Some "image" -> 49 (* input[type=image] requires alt *) 50 - if not (has_attr "alt" attrs) then 51 Message_collector.add_typed collector 52 (`Attr (`Missing (`Elem "input", `Attr "alt"))) 53 | Some "hidden" -> 54 (* input[type=hidden] should not have required attribute *) 55 - if has_attr "required" attrs then 56 Message_collector.add_typed collector 57 (`Attr (`Not_allowed_when (`Attr "required", `Elem "input", `Condition "the type attribute is hidden"))) 58 | Some "file" -> 59 (* input[type=file] should not have value attribute *) 60 - if has_attr "value" attrs then 61 Message_collector.add_typed collector 62 (`Attr (`Not_allowed_when (`Attr "value", `Elem "input", `Condition "the type attribute is file"))) 63 | _ -> () 64 65 let check_script_element attrs _collector = 66 (* script requires src OR text content *) 67 - if not (has_attr "src" attrs) then 68 (* We can't check for text content here; that would need to be done 69 in end_element or with state tracking *) 70 () ··· 76 - http-equiv AND content 77 - property AND content (RDFa) 78 - itemprop AND content (microdata) *) 79 - let has_charset = has_attr "charset" attrs in 80 - let has_name = has_attr "name" attrs in 81 - let has_content = has_attr "content" attrs in 82 - let has_http_equiv = has_attr "http-equiv" attrs in 83 - let has_property = has_attr "property" attrs in 84 - let has_itemprop = has_attr "itemprop" attrs in 85 86 let valid = 87 has_charset ··· 100 101 let check_link_element attrs collector = 102 (* link[rel="stylesheet"] requires href *) 103 - match get_attr "rel" attrs with 104 | Some rel when String.equal rel "stylesheet" -> 105 - if not (has_attr "href" attrs) then 106 Message_collector.add_typed collector (`Link `Missing_href) 107 | _ -> () 108 109 let check_a_element attrs collector = 110 (* a[download] requires href *) 111 - if has_attr "download" attrs && not (has_attr "href" attrs) then 112 Message_collector.add_typed collector 113 (`Attr (`Missing (`Elem "a", `Attr "href"))) 114 115 let check_map_element attrs collector = 116 (* map requires name *) 117 - if not (has_attr "name" attrs) then 118 Message_collector.add_typed collector 119 (`Attr (`Missing (`Elem "map", `Attr "name"))) 120 121 let check_object_element attrs collector = 122 (* object requires data attribute (or type attribute alone is not sufficient) *) 123 - let has_data = has_attr "data" attrs in 124 - let has_type = has_attr "type" attrs in 125 if not has_data && has_type then 126 Message_collector.add_typed collector 127 (`Attr (`Missing (`Elem "object", `Attr "data"))) 128 129 let check_popover_element element_name attrs collector = 130 (* popover attribute must have valid value *) 131 - match get_attr "popover" attrs with 132 | Some value -> 133 let value_lower = String.lowercase_ascii value in 134 (* Valid values: empty string, auto, manual, hint *) ··· 141 142 let check_meter_element attrs collector = 143 (* meter requires value attribute *) 144 - if not (has_attr "value" attrs) then 145 Message_collector.add_typed collector 146 (`Attr (`Missing (`Elem "meter", `Attr "value"))) 147 else begin 148 (* Validate min <= value constraint *) 149 - match get_attr "value" attrs, get_attr "min" attrs with 150 | Some value_str, Some min_str -> 151 (try 152 let value = float_of_string value_str in ··· 162 163 let check_progress_element attrs collector = 164 (* Validate progress value constraints *) 165 - match get_attr "value" attrs with 166 | None -> () (* value is optional *) 167 | Some value_str -> 168 (try 169 let value = float_of_string value_str in 170 - let max_val = match get_attr "max" attrs with 171 | None -> 1.0 (* default max is 1 *) 172 | Some max_str -> (try float_of_string max_str with _ -> 1.0) 173 in 174 if value > max_val then 175 let q s = "\xe2\x80\x9c" ^ s ^ "\xe2\x80\x9d" in 176 (* Check which message to use based on whether max is present *) 177 - if has_attr "max" attrs then 178 Message_collector.add_typed collector 179 (`Generic ( 180 (* Note: double space before "value" matches Nu validator quirk *) ··· 198 | "link" -> check_link_element attrs collector 199 | "a" -> 200 check_a_element attrs collector; 201 - if has_attr "href" attrs then state.in_a_with_href <- true 202 | "map" -> check_map_element attrs collector 203 | "object" -> check_object_element attrs collector 204 | "meter" -> check_meter_element attrs collector ··· 206 | "figure" -> state._in_figure <- true 207 | _ -> 208 (* Check popover attribute on any element *) 209 - if has_attr "popover" attrs then check_popover_element name attrs collector 210 211 let end_element state ~name ~namespace:_ _collector = 212 match name with
··· 13 state._in_figure <- false; 14 state.in_a_with_href <- false 15 16 let check_img_element state attrs collector = 17 (* Check for required src OR srcset attribute *) 18 + if not (Attr_utils.has_attr "src" attrs) && not (Attr_utils.has_attr "srcset" attrs) then 19 Message_collector.add_typed collector (`Img `Missing_src_or_srcset); 20 21 (* Check for alt attribute - always required *) 22 + if not (Attr_utils.has_attr "alt" attrs) then 23 Message_collector.add_typed collector (`Img `Missing_alt); 24 25 (* Check ismap requires 'a' ancestor with href *) 26 + if Attr_utils.has_attr "ismap" attrs && not state.in_a_with_href then 27 Message_collector.add_typed collector (`Img `Ismap_needs_href) 28 29 let check_area_element attrs collector = 30 (* area with href requires alt *) 31 + if Attr_utils.has_attr "href" attrs && not (Attr_utils.has_attr "alt" attrs) then 32 Message_collector.add_typed collector 33 (`Attr (`Missing (`Elem "area", `Attr "alt"))) 34 35 let check_input_element attrs collector = 36 + match Attr_utils.get_attr "type" attrs with 37 | Some "image" -> 38 (* input[type=image] requires alt *) 39 + if not (Attr_utils.has_attr "alt" attrs) then 40 Message_collector.add_typed collector 41 (`Attr (`Missing (`Elem "input", `Attr "alt"))) 42 | Some "hidden" -> 43 (* input[type=hidden] should not have required attribute *) 44 + if Attr_utils.has_attr "required" attrs then 45 Message_collector.add_typed collector 46 (`Attr (`Not_allowed_when (`Attr "required", `Elem "input", `Condition "the type attribute is hidden"))) 47 | Some "file" -> 48 (* input[type=file] should not have value attribute *) 49 + if Attr_utils.has_attr "value" attrs then 50 Message_collector.add_typed collector 51 (`Attr (`Not_allowed_when (`Attr "value", `Elem "input", `Condition "the type attribute is file"))) 52 | _ -> () 53 54 let check_script_element attrs _collector = 55 (* script requires src OR text content *) 56 + if not (Attr_utils.has_attr "src" attrs) then 57 (* We can't check for text content here; that would need to be done 58 in end_element or with state tracking *) 59 () ··· 65 - http-equiv AND content 66 - property AND content (RDFa) 67 - itemprop AND content (microdata) *) 68 + let has_charset = Attr_utils.has_attr "charset" attrs in 69 + let has_name = Attr_utils.has_attr "name" attrs in 70 + let has_content = Attr_utils.has_attr "content" attrs in 71 + let has_http_equiv = Attr_utils.has_attr "http-equiv" attrs in 72 + let has_property = Attr_utils.has_attr "property" attrs in 73 + let has_itemprop = Attr_utils.has_attr "itemprop" attrs in 74 75 let valid = 76 has_charset ··· 89 90 let check_link_element attrs collector = 91 (* link[rel="stylesheet"] requires href *) 92 + match Attr_utils.get_attr "rel" attrs with 93 | Some rel when String.equal rel "stylesheet" -> 94 + if not (Attr_utils.has_attr "href" attrs) then 95 Message_collector.add_typed collector (`Link `Missing_href) 96 | _ -> () 97 98 let check_a_element attrs collector = 99 (* a[download] requires href *) 100 + if Attr_utils.has_attr "download" attrs && not (Attr_utils.has_attr "href" attrs) then 101 Message_collector.add_typed collector 102 (`Attr (`Missing (`Elem "a", `Attr "href"))) 103 104 let check_map_element attrs collector = 105 (* map requires name *) 106 + if not (Attr_utils.has_attr "name" attrs) then 107 Message_collector.add_typed collector 108 (`Attr (`Missing (`Elem "map", `Attr "name"))) 109 110 let check_object_element attrs collector = 111 (* object requires data attribute (or type attribute alone is not sufficient) *) 112 + let has_data = Attr_utils.has_attr "data" attrs in 113 + let has_type = Attr_utils.has_attr "type" attrs in 114 if not has_data && has_type then 115 Message_collector.add_typed collector 116 (`Attr (`Missing (`Elem "object", `Attr "data"))) 117 118 let check_popover_element element_name attrs collector = 119 (* popover attribute must have valid value *) 120 + match Attr_utils.get_attr "popover" attrs with 121 | Some value -> 122 let value_lower = String.lowercase_ascii value in 123 (* Valid values: empty string, auto, manual, hint *) ··· 130 131 let check_meter_element attrs collector = 132 (* meter requires value attribute *) 133 + if not (Attr_utils.has_attr "value" attrs) then 134 Message_collector.add_typed collector 135 (`Attr (`Missing (`Elem "meter", `Attr "value"))) 136 else begin 137 (* Validate min <= value constraint *) 138 + match Attr_utils.get_attr "value" attrs, Attr_utils.get_attr "min" attrs with 139 | Some value_str, Some min_str -> 140 (try 141 let value = float_of_string value_str in ··· 151 152 let check_progress_element attrs collector = 153 (* Validate progress value constraints *) 154 + match Attr_utils.get_attr "value" attrs with 155 | None -> () (* value is optional *) 156 | Some value_str -> 157 (try 158 let value = float_of_string value_str in 159 + let max_val = match Attr_utils.get_attr "max" attrs with 160 | None -> 1.0 (* default max is 1 *) 161 | Some max_str -> (try float_of_string max_str with _ -> 1.0) 162 in 163 if value > max_val then 164 let q s = "\xe2\x80\x9c" ^ s ^ "\xe2\x80\x9d" in 165 (* Check which message to use based on whether max is present *) 166 + if Attr_utils.has_attr "max" attrs then 167 Message_collector.add_typed collector 168 (`Generic ( 169 (* Note: double space before "value" matches Nu validator quirk *) ··· 187 | "link" -> check_link_element attrs collector 188 | "a" -> 189 check_a_element attrs collector; 190 + if Attr_utils.has_attr "href" attrs then state.in_a_with_href <- true 191 | "map" -> check_map_element attrs collector 192 | "object" -> check_object_element attrs collector 193 | "meter" -> check_meter_element attrs collector ··· 195 | "figure" -> state._in_figure <- true 196 | _ -> 197 (* Check popover attribute on any element *) 198 + if Attr_utils.has_attr "popover" attrs then check_popover_element name attrs collector 199 200 let end_element state ~name ~namespace:_ _collector = 201 match name with
+1 -1
lib/htmlrw_check/specialized/aria_checker.ml
··· 491 if name_lower = "br" || name_lower = "wbr" then begin 492 List.iter (fun (attr_name, _) -> 493 let attr_lower = String.lowercase_ascii attr_name in 494 - if String.length attr_lower > 5 && String.sub attr_lower 0 5 = "aria-" && 495 attr_lower <> "aria-hidden" then 496 Message_collector.add_typed collector 497 (`Attr (`Not_allowed (`Attr attr_name, `Elem name)))
··· 491 if name_lower = "br" || name_lower = "wbr" then begin 492 List.iter (fun (attr_name, _) -> 493 let attr_lower = String.lowercase_ascii attr_name in 494 + if String.starts_with ~prefix:"aria-" attr_lower && 495 attr_lower <> "aria-hidden" then 496 Message_collector.add_typed collector 497 (`Attr (`Not_allowed (`Attr attr_name, `Elem name)))
+116 -97
lib/htmlrw_check/specialized/attr_restrictions_checker.ml
··· 41 let create () = { is_xhtml = false } 42 let reset state = state.is_xhtml <- false 43 44 - (** Check if an attribute list contains a specific attribute. *) 45 - let has_attr name attrs = 46 - List.exists (fun (attr_name, _) -> String.lowercase_ascii attr_name = name) attrs 47 - 48 - (** Get an attribute value from the list. *) 49 - let get_attr name attrs = 50 - List.find_map (fun (attr_name, value) -> 51 - if String.lowercase_ascii attr_name = name then Some value else None 52 - ) attrs 53 - 54 (** Input types that allow the list attribute. *) 55 let input_types_allowing_list = [ 56 "color"; "date"; "datetime-local"; "email"; "month"; "number"; ··· 67 68 (* Detect XHTML mode from xmlns attribute on html element *) 69 if name_lower = "html" then begin 70 - let xmlns_value = get_attr "xmlns" attrs in 71 - match xmlns_value with 72 | Some "http://www.w3.org/1999/xhtml" -> state.is_xhtml <- true 73 | _ -> () 74 end; 75 76 (* Check HTML element attribute restrictions *) 77 - if namespace = None then begin 78 match List.assoc_opt name_lower disallowed_attrs_html with 79 | Some disallowed -> 80 List.iter (fun attr -> 81 - if has_attr attr attrs then 82 report_disallowed_attr name_lower attr collector 83 ) disallowed 84 - | None -> () 85 - end; 86 87 (* Check for xml:base attribute - not allowed in HTML *) 88 - if namespace = None && name_lower = "html" then begin 89 - if has_attr "xml:base" attrs then 90 report_disallowed_attr name_lower "xml:base" collector 91 - end; 92 93 (* Check for xmlns:* prefixed attributes - not allowed in HTML *) 94 (* Standard xmlns declarations are allowed but custom prefixes are not *) 95 - if namespace = None then begin 96 List.iter (fun (attr_name, _) -> 97 let attr_lower = String.lowercase_ascii attr_name in 98 - if String.length attr_lower > 6 && String.sub attr_lower 0 6 = "xmlns:" then begin 99 let prefix = String.sub attr_lower 6 (String.length attr_lower - 6) in 100 (* Only xmlns:xlink (with correct value) and xmlns:xml are allowed *) 101 if prefix <> "xlink" && prefix <> "xml" then 102 Message_collector.add_typed collector 103 (`Attr (`Not_allowed_here (`Attr attr_name))) 104 end 105 - ) attrs 106 - end; 107 108 (* Check SVG element restrictions - works in both HTML-embedded and XHTML SVG *) 109 (* xml:id is never valid on SVG elements in HTML5 *) 110 if List.mem name_lower svg_no_xml_id then begin 111 - if has_attr "xml:id" attrs then 112 report_disallowed_attr name_lower "xml:id" collector 113 end; 114 115 (* SVG feConvolveMatrix requires order attribute *) 116 if name_lower = "feconvolvematrix" then begin 117 - if not (has_attr "order" attrs) then 118 Message_collector.add_typed collector 119 (`Svg (`Missing_attr (`Elem "feConvolveMatrix", `Attr "order"))) 120 end; 121 122 (* Validate style type attribute - must be "text/css" or omitted *) 123 - if namespace = None && name_lower = "style" then begin 124 List.iter (fun (attr_name, attr_value) -> 125 let attr_lower = String.lowercase_ascii attr_name in 126 if attr_lower = "type" then begin ··· 129 Message_collector.add_typed collector (`Misc `Style_type_invalid) 130 end 131 ) attrs 132 - end; 133 134 (* Validate object element requires data or type attribute *) 135 - if namespace = None && name_lower = "object" then begin 136 - let has_data = has_attr "data" attrs in 137 - let has_type = has_attr "type" attrs in 138 if not has_data && not has_type then 139 Message_collector.add_typed collector 140 (`Attr (`Missing (`Elem "object", `Attr "data"))) 141 - end; 142 143 (* Validate link imagesizes/imagesrcset attributes *) 144 - if namespace = None && name_lower = "link" then begin 145 - let has_imagesizes = has_attr "imagesizes" attrs in 146 - let has_imagesrcset = has_attr "imagesrcset" attrs in 147 - let rel_value = get_attr "rel" attrs in 148 - let as_value = get_attr "as" attrs in 149 150 (* imagesizes requires imagesrcset *) 151 if has_imagesizes && not has_imagesrcset then ··· 175 if not rel_is_preload then 176 Message_collector.add_typed collector (`Link `As_requires_preload) 177 | None -> ()) 178 - end; 179 180 (* Validate img usemap attribute - must be hash-name reference with content *) 181 - if namespace = None && name_lower = "img" then begin 182 List.iter (fun (attr_name, attr_value) -> 183 let attr_lower = String.lowercase_ascii attr_name in 184 if attr_lower = "usemap" then begin ··· 189 attr_value attr_name name)))) 190 end 191 ) attrs 192 - end; 193 194 (* Validate embed type attribute - must be valid MIME type *) 195 - if namespace = None && name_lower = "embed" then begin 196 List.iter (fun (attr_name, attr_value) -> 197 let attr_lower = String.lowercase_ascii attr_name in 198 if attr_lower = "type" then begin ··· 205 attr_value attr_name name msg)))) 206 end 207 ) attrs 208 - end; 209 210 (* Validate width/height on embed and img - must be non-negative integers *) 211 - if namespace = None && (name_lower = "embed" || name_lower = "img" || 212 - name_lower = "video" || name_lower = "canvas" || 213 - name_lower = "iframe" || name_lower = "source") then begin 214 List.iter (fun (attr_name, attr_value) -> 215 let attr_lower = String.lowercase_ascii attr_name in 216 if attr_lower = "width" || attr_lower = "height" then begin ··· 255 end 256 end 257 ) attrs 258 - end; 259 260 (* Validate area[shape=default] cannot have coords *) 261 - if namespace = None && name_lower = "area" then begin 262 - let shape_value = get_attr "shape" attrs in 263 - match shape_value with 264 | Some s when String.lowercase_ascii (String.trim s) = "default" -> 265 - if has_attr "coords" attrs then 266 Message_collector.add_typed collector 267 (`Attr (`Not_allowed (`Attr "coords", `Elem "area"))) 268 - | _ -> () 269 - end; 270 271 (* Validate bdo element requires dir attribute, and dir cannot be "auto" *) 272 - if namespace = None && name_lower = "bdo" then begin 273 - let dir_value = get_attr "dir" attrs in 274 - match dir_value with 275 | None -> 276 Message_collector.add_typed collector (`Misc `Bdo_missing_dir) 277 | Some v when String.lowercase_ascii (String.trim v) = "auto" -> 278 Message_collector.add_typed collector (`Misc `Bdo_dir_auto) 279 - | _ -> () 280 - end; 281 282 (* Validate input list attribute - only allowed for certain types *) 283 - if namespace = None && name_lower = "input" then begin 284 - if has_attr "list" attrs then begin 285 - let input_type = match get_attr "type" attrs with 286 - | Some t -> String.lowercase_ascii (String.trim t) 287 - | None -> "text" (* default type is text *) 288 - in 289 if not (List.mem input_type input_types_allowing_list) then 290 Message_collector.add_typed collector (`Input `List_not_allowed) 291 end 292 - end; 293 294 (* Validate data-* attributes *) 295 - if namespace = None then begin 296 List.iter (fun (attr_name, _) -> 297 let attr_lower = String.lowercase_ascii attr_name in 298 (* Check if it starts with "data-" *) 299 - if String.length attr_lower >= 5 && String.sub attr_lower 0 5 = "data-" then begin 300 let after_prefix = String.sub attr_lower 5 (String.length attr_lower - 5) in 301 (* Check if it's exactly "data-" with nothing after *) 302 if after_prefix = "" then ··· 306 Message_collector.add_typed collector 307 (`Attr (`Data_invalid_name (`Reason "must be XML 1.0 4th ed. plus Namespaces NCNames"))) 308 end 309 - ) attrs 310 - end; 311 312 (* Validate xml:lang must have matching lang attribute - only in HTML mode, not XHTML *) 313 - if namespace = None && not state.is_xhtml then begin 314 - let xmllang_value = get_attr "xml:lang" attrs in 315 - let lang_value = get_attr "lang" attrs in 316 - match xmllang_value with 317 | Some xmllang -> 318 (match lang_value with 319 | None -> 320 - (* xml:lang without lang attribute *) 321 Message_collector.add_typed collector (`I18n `Xml_lang_without_lang) 322 | Some lang when String.lowercase_ascii lang <> String.lowercase_ascii xmllang -> 323 - (* xml:lang and lang have different values - "lang present with same value" message *) 324 Message_collector.add_typed collector (`I18n `Xml_lang_without_lang) 325 | _ -> ()) 326 - | None -> () 327 - end; 328 329 (* Validate spellcheck attribute - must be "true" or "false" or empty *) 330 - if namespace = None then begin 331 List.iter (fun (attr_name, attr_value) -> 332 let attr_lower = String.lowercase_ascii attr_name in 333 if attr_lower = "spellcheck" then begin ··· 336 Message_collector.add_typed collector 337 (`Attr (`Bad_value (`Elem name, `Attr attr_name, `Value attr_value, `Reason ""))) 338 end 339 - ) attrs 340 - end; 341 342 (* Validate enterkeyhint attribute - must be one of specific values *) 343 - if namespace = None then begin 344 let valid_enterkeyhint = ["enter"; "done"; "go"; "next"; "previous"; "search"; "send"] in 345 List.iter (fun (attr_name, attr_value) -> 346 let attr_lower = String.lowercase_ascii attr_name in ··· 350 Message_collector.add_typed collector 351 (`Attr (`Bad_value (`Elem name, `Attr attr_name, `Value attr_value, `Reason ""))) 352 end 353 - ) attrs 354 - end; 355 356 (* Validate headingoffset attribute - must be a number between 0 and 8 *) 357 - if namespace = None then begin 358 List.iter (fun (attr_name, attr_value) -> 359 let attr_lower = String.lowercase_ascii attr_name in 360 if attr_lower = "headingoffset" then begin ··· 370 if not is_valid then 371 Message_collector.add_typed collector (`Misc `Headingoffset_invalid) 372 end 373 - ) attrs 374 - end; 375 376 (* Validate accesskey attribute - each key label must be a single code point *) 377 - if namespace = None then begin 378 List.iter (fun (attr_name, attr_value) -> 379 let attr_lower = String.lowercase_ascii attr_name in 380 if attr_lower = "accesskey" then begin ··· 419 in 420 find_duplicates [] keys 421 end 422 - ) attrs 423 - end; 424 425 (* Validate that command and popovertarget cannot have aria-expanded *) 426 - if namespace = None && name_lower = "button" then begin 427 - let has_command = has_attr "command" attrs in 428 - let has_popovertarget = has_attr "popovertarget" attrs in 429 - let has_aria_expanded = has_attr "aria-expanded" attrs in 430 431 if has_command && has_aria_expanded then 432 Message_collector.add_typed collector ··· 437 Message_collector.add_typed collector 438 (`Attr (`Not_allowed_when (`Attr "aria-expanded", `Elem name, 439 `Condition "a \xe2\x80\x9cpopovertarget\xe2\x80\x9d attribute"))) 440 - end; 441 442 (* Note: data-* uppercase check requires XML parsing which preserves case. 443 The HTML5 parser normalizes attribute names to lowercase, so this check ··· 446 ignore state.is_xhtml; 447 448 (* Validate media attribute on link, style, source elements *) 449 - if namespace = None && (name_lower = "link" || name_lower = "style" || name_lower = "source") then begin 450 List.iter (fun (attr_name, attr_value) -> 451 let attr_lower = String.lowercase_ascii attr_name in 452 if attr_lower = "media" then begin ··· 462 end 463 end 464 ) attrs 465 - end; 466 467 (* Validate RDFa prefix attribute - space-separated list of prefix:iri pairs *) 468 - if namespace = None then begin 469 List.iter (fun (attr_name, attr_value) -> 470 let attr_lower = String.lowercase_ascii attr_name in 471 if attr_lower = "prefix" then begin ··· 487 end 488 end 489 end 490 - ) attrs 491 - end 492 493 let end_element _state ~name:_ ~namespace:_ _collector = () 494 let characters _state _text _collector = ()
··· 41 let create () = { is_xhtml = false } 42 let reset state = state.is_xhtml <- false 43 44 (** Input types that allow the list attribute. *) 45 let input_types_allowing_list = [ 46 "color"; "date"; "datetime-local"; "email"; "month"; "number"; ··· 57 58 (* Detect XHTML mode from xmlns attribute on html element *) 59 if name_lower = "html" then begin 60 + match Attr_utils.get_attr "xmlns" attrs with 61 | Some "http://www.w3.org/1999/xhtml" -> state.is_xhtml <- true 62 | _ -> () 63 end; 64 65 (* Check HTML element attribute restrictions *) 66 + (match namespace with 67 + | Some _ -> () 68 + | None -> 69 match List.assoc_opt name_lower disallowed_attrs_html with 70 | Some disallowed -> 71 List.iter (fun attr -> 72 + if Attr_utils.has_attr attr attrs then 73 report_disallowed_attr name_lower attr collector 74 ) disallowed 75 + | None -> ()); 76 77 (* Check for xml:base attribute - not allowed in HTML *) 78 + (match namespace with 79 + | Some _ -> () 80 + | None when name_lower = "html" -> 81 + if Attr_utils.has_attr "xml:base" attrs then 82 report_disallowed_attr name_lower "xml:base" collector 83 + | None -> ()); 84 85 (* Check for xmlns:* prefixed attributes - not allowed in HTML *) 86 (* Standard xmlns declarations are allowed but custom prefixes are not *) 87 + (match namespace with 88 + | Some _ -> () 89 + | None -> 90 List.iter (fun (attr_name, _) -> 91 let attr_lower = String.lowercase_ascii attr_name in 92 + if String.starts_with ~prefix:"xmlns:" attr_lower then begin 93 let prefix = String.sub attr_lower 6 (String.length attr_lower - 6) in 94 (* Only xmlns:xlink (with correct value) and xmlns:xml are allowed *) 95 if prefix <> "xlink" && prefix <> "xml" then 96 Message_collector.add_typed collector 97 (`Attr (`Not_allowed_here (`Attr attr_name))) 98 end 99 + ) attrs); 100 101 (* Check SVG element restrictions - works in both HTML-embedded and XHTML SVG *) 102 (* xml:id is never valid on SVG elements in HTML5 *) 103 if List.mem name_lower svg_no_xml_id then begin 104 + if Attr_utils.has_attr "xml:id" attrs then 105 report_disallowed_attr name_lower "xml:id" collector 106 end; 107 108 (* SVG feConvolveMatrix requires order attribute *) 109 if name_lower = "feconvolvematrix" then begin 110 + if not (Attr_utils.has_attr "order" attrs) then 111 Message_collector.add_typed collector 112 (`Svg (`Missing_attr (`Elem "feConvolveMatrix", `Attr "order"))) 113 end; 114 115 (* Validate style type attribute - must be "text/css" or omitted *) 116 + (match namespace with 117 + | Some _ -> () 118 + | None when name_lower = "style" -> 119 List.iter (fun (attr_name, attr_value) -> 120 let attr_lower = String.lowercase_ascii attr_name in 121 if attr_lower = "type" then begin ··· 124 Message_collector.add_typed collector (`Misc `Style_type_invalid) 125 end 126 ) attrs 127 + | None -> ()); 128 129 (* Validate object element requires data or type attribute *) 130 + (match namespace with 131 + | Some _ -> () 132 + | None when name_lower = "object" -> 133 + let has_data = Attr_utils.has_attr "data" attrs in 134 + let has_type = Attr_utils.has_attr "type" attrs in 135 if not has_data && not has_type then 136 Message_collector.add_typed collector 137 (`Attr (`Missing (`Elem "object", `Attr "data"))) 138 + | None -> ()); 139 140 (* Validate link imagesizes/imagesrcset attributes *) 141 + (match namespace with 142 + | Some _ -> () 143 + | None when name_lower = "link" -> 144 + let has_imagesizes = Attr_utils.has_attr "imagesizes" attrs in 145 + let has_imagesrcset = Attr_utils.has_attr "imagesrcset" attrs in 146 + let rel_value = Attr_utils.get_attr "rel" attrs in 147 + let as_value = Attr_utils.get_attr "as" attrs in 148 149 (* imagesizes requires imagesrcset *) 150 if has_imagesizes && not has_imagesrcset then ··· 174 if not rel_is_preload then 175 Message_collector.add_typed collector (`Link `As_requires_preload) 176 | None -> ()) 177 + | None -> ()); 178 179 (* Validate img usemap attribute - must be hash-name reference with content *) 180 + (match namespace with 181 + | Some _ -> () 182 + | None when name_lower = "img" -> 183 List.iter (fun (attr_name, attr_value) -> 184 let attr_lower = String.lowercase_ascii attr_name in 185 if attr_lower = "usemap" then begin ··· 190 attr_value attr_name name)))) 191 end 192 ) attrs 193 + | None -> ()); 194 195 (* Validate embed type attribute - must be valid MIME type *) 196 + (match namespace with 197 + | Some _ -> () 198 + | None when name_lower = "embed" -> 199 List.iter (fun (attr_name, attr_value) -> 200 let attr_lower = String.lowercase_ascii attr_name in 201 if attr_lower = "type" then begin ··· 208 attr_value attr_name name msg)))) 209 end 210 ) attrs 211 + | None -> ()); 212 213 (* Validate width/height on embed and img - must be non-negative integers *) 214 + let is_dimension_element = name_lower = "embed" || name_lower = "img" || 215 + name_lower = "video" || name_lower = "canvas" || 216 + name_lower = "iframe" || name_lower = "source" in 217 + (match namespace with 218 + | Some _ -> () 219 + | None when is_dimension_element -> 220 List.iter (fun (attr_name, attr_value) -> 221 let attr_lower = String.lowercase_ascii attr_name in 222 if attr_lower = "width" || attr_lower = "height" then begin ··· 261 end 262 end 263 ) attrs 264 + | None -> ()); 265 266 (* Validate area[shape=default] cannot have coords *) 267 + (match namespace with 268 + | Some _ -> () 269 + | None when name_lower = "area" -> 270 + (match Attr_utils.get_attr "shape" attrs with 271 | Some s when String.lowercase_ascii (String.trim s) = "default" -> 272 + if Attr_utils.has_attr "coords" attrs then 273 Message_collector.add_typed collector 274 (`Attr (`Not_allowed (`Attr "coords", `Elem "area"))) 275 + | _ -> ()) 276 + | None -> ()); 277 278 (* Validate bdo element requires dir attribute, and dir cannot be "auto" *) 279 + (match namespace with 280 + | Some _ -> () 281 + | None when name_lower = "bdo" -> 282 + (match Attr_utils.get_attr "dir" attrs with 283 | None -> 284 Message_collector.add_typed collector (`Misc `Bdo_missing_dir) 285 | Some v when String.lowercase_ascii (String.trim v) = "auto" -> 286 Message_collector.add_typed collector (`Misc `Bdo_dir_auto) 287 + | _ -> ()) 288 + | None -> ()); 289 290 (* Validate input list attribute - only allowed for certain types *) 291 + (match namespace with 292 + | Some _ -> () 293 + | None when name_lower = "input" -> 294 + if Attr_utils.has_attr "list" attrs then begin 295 + let input_type = Attr_utils.get_attr_or "type" ~default:"text" attrs 296 + |> String.trim |> String.lowercase_ascii in 297 if not (List.mem input_type input_types_allowing_list) then 298 Message_collector.add_typed collector (`Input `List_not_allowed) 299 end 300 + | None -> ()); 301 302 (* Validate data-* attributes *) 303 + (match namespace with 304 + | Some _ -> () 305 + | None -> 306 List.iter (fun (attr_name, _) -> 307 let attr_lower = String.lowercase_ascii attr_name in 308 (* Check if it starts with "data-" *) 309 + if String.starts_with ~prefix:"data-" attr_lower then begin 310 let after_prefix = String.sub attr_lower 5 (String.length attr_lower - 5) in 311 (* Check if it's exactly "data-" with nothing after *) 312 if after_prefix = "" then ··· 316 Message_collector.add_typed collector 317 (`Attr (`Data_invalid_name (`Reason "must be XML 1.0 4th ed. plus Namespaces NCNames"))) 318 end 319 + ) attrs); 320 321 (* Validate xml:lang must have matching lang attribute - only in HTML mode, not XHTML *) 322 + (match namespace with 323 + | Some _ -> () 324 + | None when not state.is_xhtml -> 325 + let xmllang_value = Attr_utils.get_attr "xml:lang" attrs in 326 + let lang_value = Attr_utils.get_attr "lang" attrs in 327 + (match xmllang_value with 328 | Some xmllang -> 329 (match lang_value with 330 | None -> 331 Message_collector.add_typed collector (`I18n `Xml_lang_without_lang) 332 | Some lang when String.lowercase_ascii lang <> String.lowercase_ascii xmllang -> 333 Message_collector.add_typed collector (`I18n `Xml_lang_without_lang) 334 | _ -> ()) 335 + | None -> ()) 336 + | None -> ()); 337 338 (* Validate spellcheck attribute - must be "true" or "false" or empty *) 339 + (match namespace with 340 + | Some _ -> () 341 + | None -> 342 List.iter (fun (attr_name, attr_value) -> 343 let attr_lower = String.lowercase_ascii attr_name in 344 if attr_lower = "spellcheck" then begin ··· 347 Message_collector.add_typed collector 348 (`Attr (`Bad_value (`Elem name, `Attr attr_name, `Value attr_value, `Reason ""))) 349 end 350 + ) attrs); 351 352 (* Validate enterkeyhint attribute - must be one of specific values *) 353 + (match namespace with 354 + | Some _ -> () 355 + | None -> 356 let valid_enterkeyhint = ["enter"; "done"; "go"; "next"; "previous"; "search"; "send"] in 357 List.iter (fun (attr_name, attr_value) -> 358 let attr_lower = String.lowercase_ascii attr_name in ··· 362 Message_collector.add_typed collector 363 (`Attr (`Bad_value (`Elem name, `Attr attr_name, `Value attr_value, `Reason ""))) 364 end 365 + ) attrs); 366 367 (* Validate headingoffset attribute - must be a number between 0 and 8 *) 368 + (match namespace with 369 + | Some _ -> () 370 + | None -> 371 List.iter (fun (attr_name, attr_value) -> 372 let attr_lower = String.lowercase_ascii attr_name in 373 if attr_lower = "headingoffset" then begin ··· 383 if not is_valid then 384 Message_collector.add_typed collector (`Misc `Headingoffset_invalid) 385 end 386 + ) attrs); 387 388 (* Validate accesskey attribute - each key label must be a single code point *) 389 + (match namespace with 390 + | Some _ -> () 391 + | None -> 392 List.iter (fun (attr_name, attr_value) -> 393 let attr_lower = String.lowercase_ascii attr_name in 394 if attr_lower = "accesskey" then begin ··· 433 in 434 find_duplicates [] keys 435 end 436 + ) attrs); 437 438 (* Validate that command and popovertarget cannot have aria-expanded *) 439 + (match namespace with 440 + | Some _ -> () 441 + | None when name_lower = "button" -> 442 + let has_command = Attr_utils.has_attr "command" attrs in 443 + let has_popovertarget = Attr_utils.has_attr "popovertarget" attrs in 444 + let has_aria_expanded = Attr_utils.has_attr "aria-expanded" attrs in 445 446 if has_command && has_aria_expanded then 447 Message_collector.add_typed collector ··· 452 Message_collector.add_typed collector 453 (`Attr (`Not_allowed_when (`Attr "aria-expanded", `Elem name, 454 `Condition "a \xe2\x80\x9cpopovertarget\xe2\x80\x9d attribute"))) 455 + | None -> ()); 456 457 (* Note: data-* uppercase check requires XML parsing which preserves case. 458 The HTML5 parser normalizes attribute names to lowercase, so this check ··· 461 ignore state.is_xhtml; 462 463 (* Validate media attribute on link, style, source elements *) 464 + let is_media_element = name_lower = "link" || name_lower = "style" || name_lower = "source" in 465 + (match namespace with 466 + | Some _ -> () 467 + | None when is_media_element -> 468 List.iter (fun (attr_name, attr_value) -> 469 let attr_lower = String.lowercase_ascii attr_name in 470 if attr_lower = "media" then begin ··· 480 end 481 end 482 ) attrs 483 + | None -> ()); 484 485 (* Validate RDFa prefix attribute - space-separated list of prefix:iri pairs *) 486 + (match namespace with 487 + | Some _ -> () 488 + | None -> 489 List.iter (fun (attr_name, attr_value) -> 490 let attr_lower = String.lowercase_ascii attr_name in 491 if attr_lower = "prefix" then begin ··· 507 end 508 end 509 end 510 + ) attrs) 511 512 let end_element _state ~name:_ ~namespace:_ _collector = () 513 let characters _state _text _collector = ()
+5 -12
lib/htmlrw_check/specialized/base_checker.ml
··· 11 let reset state = 12 state.seen_link_or_script <- false 13 14 - (** Check if an attribute list contains a specific attribute. *) 15 - let has_attr name attrs = 16 - List.exists (fun (attr_name, _) -> String.lowercase_ascii attr_name = name) attrs 17 - 18 let start_element state ~name ~namespace ~attrs collector = 19 - if namespace <> None then () 20 - else begin 21 - let name_lower = String.lowercase_ascii name in 22 - match name_lower with 23 | "link" | "script" -> 24 state.seen_link_or_script <- true 25 | "base" -> 26 if state.seen_link_or_script then 27 Message_collector.add_typed collector (`Misc `Base_after_link_script); 28 (* base element must have href or target attribute *) 29 - let has_href = has_attr "href" attrs in 30 - let has_target = has_attr "target" attrs in 31 - if not has_href && not has_target then 32 Message_collector.add_typed collector (`Misc `Base_missing_href_or_target) 33 | _ -> () 34 - end 35 36 let end_element _state ~name:_ ~namespace:_ _collector = () 37 let characters _state _text _collector = ()
··· 11 let reset state = 12 state.seen_link_or_script <- false 13 14 let start_element state ~name ~namespace ~attrs collector = 15 + match namespace with 16 + | Some _ -> () 17 + | None -> 18 + match String.lowercase_ascii name with 19 | "link" | "script" -> 20 state.seen_link_or_script <- true 21 | "base" -> 22 if state.seen_link_or_script then 23 Message_collector.add_typed collector (`Misc `Base_after_link_script); 24 (* base element must have href or target attribute *) 25 + if not (Attr_utils.has_attr "href" attrs || Attr_utils.has_attr "target" attrs) then 26 Message_collector.add_typed collector (`Misc `Base_missing_href_or_target) 27 | _ -> () 28 29 let end_element _state ~name:_ ~namespace:_ _collector = () 30 let characters _state _text _collector = ()
+1 -6
lib/htmlrw_check/specialized/dl_checker.ml
··· 57 | ctx :: _ -> Some ctx 58 | [] -> None 59 60 - let get_attr name attrs = 61 - List.find_map (fun (n, v) -> 62 - if String.lowercase_ascii n = name then Some v else None 63 - ) attrs 64 - 65 let start_element state ~name ~namespace ~attrs collector = 66 let name_lower = String.lowercase_ascii name in 67 ··· 115 Message_collector.add_typed collector 116 (`Element (`Not_allowed_as_child (`Child "div", `Parent "dl"))); 117 (* Check that role is only presentation or none *) 118 - (match get_attr "role" attrs with 119 | Some role_value -> 120 let role_lower = String.lowercase_ascii (String.trim role_value) in 121 if role_lower <> "presentation" && role_lower <> "none" then
··· 57 | ctx :: _ -> Some ctx 58 | [] -> None 59 60 let start_element state ~name ~namespace ~attrs collector = 61 let name_lower = String.lowercase_ascii name in 62 ··· 110 Message_collector.add_typed collector 111 (`Element (`Not_allowed_as_child (`Child "div", `Parent "dl"))); 112 (* Check that role is only presentation or none *) 113 + (match Attr_utils.get_attr "role" attrs with 114 | Some role_value -> 115 let role_lower = String.lowercase_ascii (String.trim role_value) in 116 if role_lower <> "presentation" && role_lower <> "none" then
+42 -76
lib/htmlrw_check/specialized/picture_checker.ml
··· 66 state.always_matching_is_media_all <- false; 67 state.always_matching_is_media_empty <- false 68 69 - (** Check if an attribute list contains a specific attribute. *) 70 - let has_attr name attrs = 71 - List.exists (fun (attr_name, _) -> String.lowercase_ascii attr_name = name) attrs 72 - 73 (** Report disallowed attribute error *) 74 let report_disallowed_attr element attr collector = 75 Message_collector.add_typed collector ··· 80 Message_collector.add_typed collector 81 (`Element (`Not_allowed_as_child (`Child child, `Parent parent))) 82 83 let check_picture_attrs attrs collector = 84 - List.iter (fun disallowed -> 85 - if has_attr disallowed attrs then 86 - report_disallowed_attr "picture" disallowed collector 87 - ) disallowed_picture_attrs 88 89 let check_source_attrs_in_picture attrs collector = 90 - List.iter (fun disallowed -> 91 - if has_attr disallowed attrs then 92 - report_disallowed_attr "source" disallowed collector 93 - ) disallowed_source_attrs_in_picture; 94 - (* source in picture requires srcset *) 95 - if not (has_attr "srcset" attrs) then 96 - Message_collector.add_typed collector 97 - (`Srcset `Source_missing_srcset) 98 99 let check_img_attrs attrs collector = 100 - List.iter (fun disallowed -> 101 - if has_attr disallowed attrs then 102 - report_disallowed_attr "img" disallowed collector 103 - ) disallowed_img_attrs 104 105 let start_element state ~name ~namespace ~attrs collector = 106 let name_lower = String.lowercase_ascii name in ··· 112 end; 113 114 (* Rest of checks only apply to HTML namespace elements *) 115 - if namespace = None then begin 116 - match name_lower with 117 | "picture" -> 118 (* Check if picture is in a disallowed parent context *) 119 (match state.parent_stack with ··· 124 check_picture_attrs attrs collector; 125 state.in_picture <- true; 126 state.has_img_in_picture <- false; 127 - state.picture_depth <- 0; (* Will be incremented to 1 at end of function *) 128 state.children_in_picture <- []; 129 state.last_was_img <- false; 130 state.has_source_after_img <- false; ··· 136 state.children_in_picture <- "source" :: state.children_in_picture; 137 if state.last_was_img then 138 state.has_source_after_img <- true; 139 - (* Check for always-matching source followed by another source *) 140 if state.has_always_matching_source then 141 state.source_after_always_matching <- true; 142 - (* A source is "always matching" if it has: 143 - - no media and no type attribute, OR 144 - - media attribute with empty/whitespace-only value, OR 145 - - media="all" (with optional whitespace) *) 146 - let media_value = List.find_map (fun (attr_name, v) -> 147 - if String.lowercase_ascii attr_name = "media" then Some v else None 148 - ) attrs in 149 - let has_type = has_attr "type" attrs in 150 let is_media_all = match media_value with 151 | Some v -> String.lowercase_ascii (String.trim v) = "all" 152 - | None -> false 153 - in 154 let is_media_empty = match media_value with 155 | Some v -> String.trim v = "" 156 - | None -> false 157 - in 158 let is_always_matching = match media_value with 159 - | None -> not has_type (* no media, check if no type either *) 160 | Some v -> 161 let trimmed = String.trim v in 162 trimmed = "" || String.lowercase_ascii trimmed = "all" 163 in 164 if is_always_matching then begin 165 state.has_always_matching_source <- true; 166 - if is_media_all then 167 - state.always_matching_is_media_all <- true 168 - else if is_media_empty then 169 - state.always_matching_is_media_empty <- true 170 end 171 172 | "img" when state.in_picture && state.picture_depth = 1 -> ··· 174 state.has_img_in_picture <- true; 175 state.children_in_picture <- "img" :: state.children_in_picture; 176 state.last_was_img <- true; 177 - (* Check for multiple img elements *) 178 - let img_count = List.filter (fun c -> c = "img") state.children_in_picture |> List.length in 179 if img_count > 1 then 180 report_disallowed_child "picture" "img" collector; 181 - (* Check if always-matching source is followed by img with srcset *) 182 - if state.has_always_matching_source && has_attr "srcset" attrs then begin 183 - if state.always_matching_is_media_all then 184 - Message_collector.add_typed collector (`Misc `Media_all) 185 - else if state.always_matching_is_media_empty then 186 - Message_collector.add_typed collector (`Misc `Media_empty) 187 - else 188 - Message_collector.add_typed collector (`Srcset `Source_needs_media_or_type) 189 - end 190 191 | "script" when state.in_picture && state.picture_depth = 1 -> 192 state.children_in_picture <- "script" :: state.children_in_picture ··· 197 | "img" -> 198 check_img_attrs attrs collector 199 200 - | _ -> () 201 - end; 202 203 (* Track depth when inside picture *) 204 if state.in_picture then ··· 209 state.parent_stack <- name_lower :: state.parent_stack 210 211 let end_element state ~name ~namespace collector = 212 - if namespace <> None then () 213 - else begin 214 let name_lower = String.lowercase_ascii name in 215 216 - (* Track depth *) 217 if state.in_picture then 218 state.picture_depth <- state.picture_depth - 1; 219 220 if name_lower = "picture" && state.picture_depth = 0 then begin 221 - (* Check if picture had img child *) 222 if not state.has_img_in_picture then 223 - Message_collector.add_typed collector 224 - (`Srcset `Picture_missing_img); 225 - (* Check for source after img *) 226 if state.has_source_after_img then 227 report_disallowed_child "picture" "source" collector; 228 - (* Check for source after always-matching source *) 229 - if state.source_after_always_matching then begin 230 - if state.always_matching_is_media_all then 231 - Message_collector.add_typed collector (`Misc `Media_all) 232 - else if state.always_matching_is_media_empty then 233 - Message_collector.add_typed collector (`Misc `Media_empty) 234 - else 235 - Message_collector.add_typed collector (`Srcset `Source_needs_media_or_type) 236 - end; 237 - 238 state.in_picture <- false 239 end; 240 241 - (* Pop from parent stack *) 242 - state.parent_stack <- (match state.parent_stack with 243 - | _ :: rest -> rest 244 - | [] -> []) 245 - end 246 247 let characters state text collector = 248 (* Text in picture element is not allowed *)
··· 66 state.always_matching_is_media_all <- false; 67 state.always_matching_is_media_empty <- false 68 69 (** Report disallowed attribute error *) 70 let report_disallowed_attr element attr collector = 71 Message_collector.add_typed collector ··· 76 Message_collector.add_typed collector 77 (`Element (`Not_allowed_as_child (`Child child, `Parent parent))) 78 79 + let check_disallowed_attrs element disallowed_list attrs collector = 80 + List.iter (fun attr -> 81 + if Attr_utils.has_attr attr attrs then 82 + report_disallowed_attr element attr collector 83 + ) disallowed_list 84 + 85 let check_picture_attrs attrs collector = 86 + check_disallowed_attrs "picture" disallowed_picture_attrs attrs collector 87 88 let check_source_attrs_in_picture attrs collector = 89 + check_disallowed_attrs "source" disallowed_source_attrs_in_picture attrs collector; 90 + if not (Attr_utils.has_attr "srcset" attrs) then 91 + Message_collector.add_typed collector (`Srcset `Source_missing_srcset) 92 93 let check_img_attrs attrs collector = 94 + check_disallowed_attrs "img" disallowed_img_attrs attrs collector 95 96 let start_element state ~name ~namespace ~attrs collector = 97 let name_lower = String.lowercase_ascii name in ··· 103 end; 104 105 (* Rest of checks only apply to HTML namespace elements *) 106 + match namespace with 107 + | Some _ -> () 108 + | None -> 109 + (match name_lower with 110 | "picture" -> 111 (* Check if picture is in a disallowed parent context *) 112 (match state.parent_stack with ··· 117 check_picture_attrs attrs collector; 118 state.in_picture <- true; 119 state.has_img_in_picture <- false; 120 + state.picture_depth <- 0; 121 state.children_in_picture <- []; 122 state.last_was_img <- false; 123 state.has_source_after_img <- false; ··· 129 state.children_in_picture <- "source" :: state.children_in_picture; 130 if state.last_was_img then 131 state.has_source_after_img <- true; 132 if state.has_always_matching_source then 133 state.source_after_always_matching <- true; 134 + (* A source is "always matching" if it has no media/type, or media="" or media="all" *) 135 + let media_value = Attr_utils.get_attr "media" attrs in 136 + let has_type = Attr_utils.has_attr "type" attrs in 137 let is_media_all = match media_value with 138 | Some v -> String.lowercase_ascii (String.trim v) = "all" 139 + | None -> false in 140 let is_media_empty = match media_value with 141 | Some v -> String.trim v = "" 142 + | None -> false in 143 let is_always_matching = match media_value with 144 + | None -> not has_type 145 | Some v -> 146 let trimmed = String.trim v in 147 trimmed = "" || String.lowercase_ascii trimmed = "all" 148 in 149 if is_always_matching then begin 150 state.has_always_matching_source <- true; 151 + (* Only set flags to true, never reset to false *) 152 + if is_media_all then state.always_matching_is_media_all <- true; 153 + if is_media_empty then state.always_matching_is_media_empty <- true 154 end 155 156 | "img" when state.in_picture && state.picture_depth = 1 -> ··· 158 state.has_img_in_picture <- true; 159 state.children_in_picture <- "img" :: state.children_in_picture; 160 state.last_was_img <- true; 161 + let img_count = List.length (List.filter (( = ) "img") state.children_in_picture) in 162 if img_count > 1 then 163 report_disallowed_child "picture" "img" collector; 164 + if state.has_always_matching_source && Attr_utils.has_attr "srcset" attrs then 165 + Message_collector.add_typed collector 166 + (if state.always_matching_is_media_all then `Misc `Media_all 167 + else if state.always_matching_is_media_empty then `Misc `Media_empty 168 + else `Srcset `Source_needs_media_or_type) 169 170 | "script" when state.in_picture && state.picture_depth = 1 -> 171 state.children_in_picture <- "script" :: state.children_in_picture ··· 176 | "img" -> 177 check_img_attrs attrs collector 178 179 + | _ -> ()); 180 181 (* Track depth when inside picture *) 182 if state.in_picture then ··· 187 state.parent_stack <- name_lower :: state.parent_stack 188 189 let end_element state ~name ~namespace collector = 190 + match namespace with 191 + | Some _ -> () 192 + | None -> 193 let name_lower = String.lowercase_ascii name in 194 195 if state.in_picture then 196 state.picture_depth <- state.picture_depth - 1; 197 198 if name_lower = "picture" && state.picture_depth = 0 then begin 199 if not state.has_img_in_picture then 200 + Message_collector.add_typed collector (`Srcset `Picture_missing_img); 201 if state.has_source_after_img then 202 report_disallowed_child "picture" "source" collector; 203 + if state.source_after_always_matching then 204 + Message_collector.add_typed collector 205 + (if state.always_matching_is_media_all then `Misc `Media_all 206 + else if state.always_matching_is_media_empty then `Misc `Media_empty 207 + else `Srcset `Source_needs_media_or_type); 208 state.in_picture <- false 209 end; 210 211 + state.parent_stack <- match state.parent_stack with _ :: rest -> rest | [] -> [] 212 213 let characters state text collector = 214 (* Text in picture element is not allowed *)
+4 -8
lib/htmlrw_check/specialized/source_checker.ml
··· 23 | ctx :: _ -> ctx 24 | [] -> Other 25 26 - (** Check if an attribute list contains a specific attribute. *) 27 - let has_attr name attrs = 28 - List.exists (fun (attr_name, _) -> String.lowercase_ascii attr_name = name) attrs 29 - 30 let start_element state ~name ~namespace ~attrs collector = 31 if namespace <> None then () 32 else begin ··· 42 let ctx = current_context state in 43 begin match ctx with 44 | Video | Audio -> 45 - if has_attr "srcset" attrs then 46 Message_collector.add_typed collector 47 (`Attr (`Not_allowed (`Attr "srcset", `Elem "source"))); 48 - if has_attr "sizes" attrs then 49 Message_collector.add_typed collector 50 (`Attr (`Not_allowed (`Attr "sizes", `Elem "source"))); 51 - if has_attr "width" attrs then 52 Message_collector.add_typed collector 53 (`Attr (`Not_allowed (`Attr "width", `Elem "source"))); 54 - if has_attr "height" attrs then 55 Message_collector.add_typed collector 56 (`Attr (`Not_allowed (`Attr "height", `Elem "source"))) 57 | Picture | Other -> ()
··· 23 | ctx :: _ -> ctx 24 | [] -> Other 25 26 let start_element state ~name ~namespace ~attrs collector = 27 if namespace <> None then () 28 else begin ··· 38 let ctx = current_context state in 39 begin match ctx with 40 | Video | Audio -> 41 + if Attr_utils.has_attr "srcset" attrs then 42 Message_collector.add_typed collector 43 (`Attr (`Not_allowed (`Attr "srcset", `Elem "source"))); 44 + if Attr_utils.has_attr "sizes" attrs then 45 Message_collector.add_typed collector 46 (`Attr (`Not_allowed (`Attr "sizes", `Elem "source"))); 47 + if Attr_utils.has_attr "width" attrs then 48 Message_collector.add_typed collector 49 (`Attr (`Not_allowed (`Attr "width", `Elem "source"))); 50 + if Attr_utils.has_attr "height" attrs then 51 Message_collector.add_typed collector 52 (`Attr (`Not_allowed (`Attr "height", `Elem "source"))) 53 | Picture | Other -> ()
+3 -9
lib/htmlrw_check/specialized/srcset_sizes_checker.ml
··· 14 let create () = () 15 let reset _state = () 16 17 - (** Get attribute value *) 18 - let get_attr name attrs = 19 - List.find_map (fun (n, v) -> 20 - if String.lowercase_ascii n = name then Some v else None 21 - ) attrs 22 - 23 (** Split string on a character while respecting parentheses *) 24 let split_respecting_parens ~sep s = 25 let len = String.length s in ··· 971 972 (* SVG image elements should not have srcset *) 973 if namespace <> None && name_lower = "image" then begin 974 - if get_attr "srcset" attrs <> None then 975 Message_collector.add_typed collector 976 (`Attr (`Not_allowed (`Attr "srcset", `Elem "image"))) 977 end; ··· 980 else begin 981 (* Check sizes and srcset on img and source *) 982 if name_lower = "img" || name_lower = "source" then begin 983 - let sizes_value = get_attr "sizes" attrs in 984 - let srcset_value = get_attr "srcset" attrs in 985 let has_sizes = sizes_value <> None in 986 let has_srcset = srcset_value <> None in 987
··· 14 let create () = () 15 let reset _state = () 16 17 (** Split string on a character while respecting parentheses *) 18 let split_respecting_parens ~sep s = 19 let len = String.length s in ··· 965 966 (* SVG image elements should not have srcset *) 967 if namespace <> None && name_lower = "image" then begin 968 + if Attr_utils.get_attr "srcset" attrs <> None then 969 Message_collector.add_typed collector 970 (`Attr (`Not_allowed (`Attr "srcset", `Elem "image"))) 971 end; ··· 974 else begin 975 (* Check sizes and srcset on img and source *) 976 if name_lower = "img" || name_lower = "source" then begin 977 + let sizes_value = Attr_utils.get_attr "sizes" attrs in 978 + let srcset_value = Attr_utils.get_attr "srcset" attrs in 979 let has_sizes = sizes_value <> None in 980 let has_srcset = srcset_value <> None in 981
+1 -2
lib/htmlrw_check/specialized/xhtml_content_checker.ml
··· 46 (* Check if data-* attribute has uppercase characters *) 47 let check_data_attr_case attrs collector = 48 List.iter (fun (attr_name, _) -> 49 - if String.length attr_name > 5 && 50 - String.sub attr_name 0 5 = "data-" then 51 let suffix = String.sub attr_name 5 (String.length attr_name - 5) in 52 if String.exists (fun c -> c >= 'A' && c <= 'Z') suffix then 53 Message_collector.add_typed collector (`Attr `Data_uppercase)
··· 46 (* Check if data-* attribute has uppercase characters *) 47 let check_data_attr_case attrs collector = 48 List.iter (fun (attr_name, _) -> 49 + if String.starts_with ~prefix:"data-" attr_name then 50 let suffix = String.sub attr_name 5 (String.length attr_name - 5) in 51 if String.exists (fun c -> c >= 'A' && c <= 'Z') suffix then 52 Message_collector.add_typed collector (`Attr `Data_uppercase)