OCaml HTML5 parser/serialiser based on Python's JustHTML

imore

+19 -1
lib/html5_checker/datatype/dt_autocomplete.ml
··· 162 List.find_opt (fun t -> starts_with t "section-") tokens 163 in 164 165 (* Process remaining tokens *) 166 let process_field_tokens tokens = 167 match tokens with 168 | [] -> Error "A list of autofill details tokens must contain an autofill field name." 169 | [ "webauthn" ] -> ··· 246 | None -> 247 Error 248 "A list of autofill details tokens must not contain more than one \ 249 - autofill field name.") 250 in 251 process_field_tokens !tokens 252
··· 162 List.find_opt (fun t -> starts_with t "section-") tokens 163 in 164 165 + (* Check if webauthn appears anywhere except as the very last token *) 166 + let check_webauthn_position tokens = 167 + let rec check = function 168 + | [] -> None 169 + | ["webauthn"] -> None (* webauthn as last token is ok *) 170 + | "webauthn" :: _ :: _ -> Some () (* webauthn not last is error *) 171 + | _ :: rest -> check rest 172 + in 173 + check tokens 174 + in 175 + 176 (* Process remaining tokens *) 177 let process_field_tokens tokens = 178 + (* First check if webauthn appears but not at the very end *) 179 + (match check_webauthn_position tokens with 180 + | Some () -> 181 + Error 182 + "The token \"webauthn\" must only appear as the very last token in a \ 183 + list of autofill detail tokens." 184 + | None -> 185 match tokens with 186 | [] -> Error "A list of autofill details tokens must contain an autofill field name." 187 | [ "webauthn" ] -> ··· 264 | None -> 265 Error 266 "A list of autofill details tokens must not contain more than one \ 267 + autofill field name.")) 268 in 269 process_field_tokens !tokens 270
+8 -8
lib/html5_checker/error_code.ml
··· 382 Printf.sprintf "Element %s is missing required attribute %s." 383 (q element) (q attr) 384 | Missing_required_attr_one_of { element; attrs } -> 385 - let attrs_str = String.concat ", " (List.map q attrs) in 386 Printf.sprintf "Element %s is missing one or more of the following attributes: [%s]." 387 (q element) attrs_str 388 | Bad_attr_value { element; attr; value; reason } -> ··· 420 Printf.sprintf "Element %s is missing required child element %s." 421 (q parent) (q child) 422 | Missing_required_child_one_of { parent; children } -> 423 - let children_str = String.concat ", " (List.map q children) in 424 Printf.sprintf "Element %s is missing one or more of the following child elements: [%s]." 425 (q parent) children_str 426 | Missing_required_child_generic { parent } -> ··· 488 Printf.sprintf "An %s element must have an %s attribute, except under certain conditions. For details, consult guidance on providing text alternatives for images." 489 (q "img") (q "alt") 490 | Img_missing_src_or_srcset -> 491 - Printf.sprintf "Element %s is missing one or more of the following attributes: [%s, %s]." 492 - (q "img") (q "src") (q "srcset") 493 | Option_empty_without_label -> 494 Printf.sprintf "Element %s without attribute %s must not be empty." 495 (q "option") (q "label") ··· 499 Printf.sprintf "The value of %s attribute for the %s element must not be %s." 500 (q "dir") (q "bdo") (q "auto") 501 | Base_missing_href_or_target -> 502 - Printf.sprintf "Element %s is missing one or more of the following attributes: [%s, %s]." 503 - (q "base") (q "href") (q "target") 504 | Base_after_link_script -> 505 Printf.sprintf "The %s element must come before any %s or %s elements in the document." 506 (q "base") (q "link") (q "script") ··· 551 Printf.sprintf "Element %s is missing required attribute %s." 552 (q "summary") (q "role") 553 | Summary_missing_attrs -> 554 - Printf.sprintf "Element %s is missing one or more of the following attributes: [%s, %s, %s]." 555 - (q "summary") (q "aria-checked") (q "aria-level") (q "role") 556 | Autocomplete_webauthn_on_select -> 557 Printf.sprintf "The value of the %s attribute for the %s element must not contain %s." 558 (q "autocomplete") (q "select") (q "webauthn")
··· 382 Printf.sprintf "Element %s is missing required attribute %s." 383 (q element) (q attr) 384 | Missing_required_attr_one_of { element; attrs } -> 385 + let attrs_str = String.concat ", " attrs in 386 Printf.sprintf "Element %s is missing one or more of the following attributes: [%s]." 387 (q element) attrs_str 388 | Bad_attr_value { element; attr; value; reason } -> ··· 420 Printf.sprintf "Element %s is missing required child element %s." 421 (q parent) (q child) 422 | Missing_required_child_one_of { parent; children } -> 423 + let children_str = String.concat ", " children in 424 Printf.sprintf "Element %s is missing one or more of the following child elements: [%s]." 425 (q parent) children_str 426 | Missing_required_child_generic { parent } -> ··· 488 Printf.sprintf "An %s element must have an %s attribute, except under certain conditions. For details, consult guidance on providing text alternatives for images." 489 (q "img") (q "alt") 490 | Img_missing_src_or_srcset -> 491 + Printf.sprintf "Element %s is missing one or more of the following attributes: [src, srcset]." 492 + (q "img") 493 | Option_empty_without_label -> 494 Printf.sprintf "Element %s without attribute %s must not be empty." 495 (q "option") (q "label") ··· 499 Printf.sprintf "The value of %s attribute for the %s element must not be %s." 500 (q "dir") (q "bdo") (q "auto") 501 | Base_missing_href_or_target -> 502 + Printf.sprintf "Element %s is missing one or more of the following attributes: [href, target]." 503 + (q "base") 504 | Base_after_link_script -> 505 Printf.sprintf "The %s element must come before any %s or %s elements in the document." 506 (q "base") (q "link") (q "script") ··· 551 Printf.sprintf "Element %s is missing required attribute %s." 552 (q "summary") (q "role") 553 | Summary_missing_attrs -> 554 + Printf.sprintf "Element %s is missing one or more of the following attributes: [aria-checked, aria-level, role]." 555 + (q "summary") 556 | Autocomplete_webauthn_on_select -> 557 Printf.sprintf "The value of the %s attribute for the %s element must not contain %s." 558 (q "autocomplete") (q "select") (q "webauthn")
+7
lib/html5_checker/parse_error_bridge.ml
··· 14 let (message, final_code) = match code with 15 | Html5rw.Parse_error_code.Non_void_html_element_start_tag_with_trailing_solidus -> 16 ("Self-closing syntax (\"/>\") used on a non-void HTML element. Ignoring the slash and treating as a start tag.", code_str) 17 | Html5rw.Parse_error_code.Tree_construction_error s -> 18 (* Check for control-character/noncharacter/surrogate with codepoint info *) 19 (try ··· 67 ("End tag \xe2\x80\x9cp\xe2\x80\x9d implied, but there were open elements.", "end-tag-p-implied") 68 else if s = "end-tag-br" then 69 ("End tag \xe2\x80\x9cbr\xe2\x80\x9d.", "end-tag-br") 70 else 71 (Printf.sprintf "Parse error: %s" s, s) 72 with _ -> (Printf.sprintf "Parse error: %s" s, s))
··· 14 let (message, final_code) = match code with 15 | Html5rw.Parse_error_code.Non_void_html_element_start_tag_with_trailing_solidus -> 16 ("Self-closing syntax (\"/>\") used on a non-void HTML element. Ignoring the slash and treating as a start tag.", code_str) 17 + | Html5rw.Parse_error_code.Null_character_reference -> 18 + ("Character reference expands to zero.", "null-character-reference") 19 | Html5rw.Parse_error_code.Tree_construction_error s -> 20 (* Check for control-character/noncharacter/surrogate with codepoint info *) 21 (try ··· 69 ("End tag \xe2\x80\x9cp\xe2\x80\x9d implied, but there were open elements.", "end-tag-p-implied") 70 else if s = "end-tag-br" then 71 ("End tag \xe2\x80\x9cbr\xe2\x80\x9d.", "end-tag-br") 72 + else if s = "expected-closing-tag-but-got-eof" then 73 + ("End of file seen and there were open elements.", "eof-in-open-element") 74 + else if String.length s > 19 && String.sub s 0 19 = "unexpected-end-tag:" then 75 + let element = String.sub s 19 (String.length s - 19) in 76 + (Printf.sprintf "Stray end tag \xe2\x80\x9c%s\xe2\x80\x9d." element, "stray-end-tag") 77 else 78 (Printf.sprintf "Parse error: %s" s, s) 79 with _ -> (Printf.sprintf "Parse error: %s" s, s))
+86 -10
lib/html5_checker/semantic/nesting_checker.ml
··· 32 let ancestor_mask_by_descendant : (string, int) Hashtbl.t = 33 Hashtbl.create 64 34 35 (** Register that [ancestor] is prohibited for [descendant]. *) 36 let register_prohibited_ancestor ancestor descendant = 37 let number = special_ancestor_number ancestor in ··· 44 in 45 let new_mask = mask lor (1 lsl number) in 46 Hashtbl.replace ancestor_mask_by_descendant descendant new_mask 47 48 (** Initialize the prohibited ancestor map. *) 49 let () = ··· 113 ) interactive_elements; 114 115 (* Phrasing-only elements: cannot contain flow content like p, div, h1-h6, etc. *) 116 let phrasing_only = ["span"; "strong"; "em"; "b"; "i"; "u"; "s"; "small"; "mark"; 117 "abbr"; "cite"; "code"; "q"; "sub"; "sup"; "samp"; "kbd"; "var"] in 118 let flow_content = ["p"; "div"; "article"; "section"; "nav"; "aside"; "header"; "footer"; ··· 120 "ol"; "ul"; "dl"; "pre"; "blockquote"; "hr"] in 121 List.iter (fun ancestor -> 122 List.iter (fun descendant -> 123 - register_prohibited_ancestor ancestor descendant 124 ) flow_content 125 ) phrasing_only 126 ··· 134 let map_num = special_ancestor_number "map" in 135 1 lsl map_num 136 137 (** Stack node representing an element's context. *) 138 type stack_node = { 139 ancestor_mask : int; 140 - _name : string; [@warning "-69"] 141 } 142 143 (** Checker state. *) ··· 181 | _ -> 182 false 183 184 (** Report nesting violations. *) 185 let check_nesting state name attrs collector = 186 (* Compute the prohibited ancestor mask for this element *) ··· 190 | None -> 0 191 in 192 193 (* Add interactive element restrictions if applicable *) 194 let mask = 195 if is_interactive_element name attrs then ··· 212 | "object" when has_attr attrs "usemap" -> Some "usemap" 213 | _ -> None 214 in 215 (* Find which ancestors are violated *) 216 Array.iteri (fun i ancestor -> 217 let bit = 1 lsl i in 218 - if (mask_hit land bit) <> 0 then 219 - Message_collector.add_typed collector 220 - (Error_code.Element_must_not_be_descendant { 221 - element = name; 222 - attr; 223 - ancestor 224 - }) 225 ) special_ancestors 226 end 227 end ··· 238 }) 239 | _ -> () 240 241 let start_element state ~name ~namespace ~attrs collector = 242 (* Only check HTML elements, not SVG or MathML *) 243 match namespace with ··· 246 (* Check for nesting violations *) 247 check_nesting state name attrs collector; 248 check_required_ancestors state name collector; 249 250 (* Update ancestor mask if this is a special ancestor *) 251 let new_mask = state.ancestor_mask in ··· 267 in 268 269 (* Push onto stack *) 270 - let node = { ancestor_mask = state.ancestor_mask; _name = name } in 271 state.stack <- node :: state.stack; 272 state.ancestor_mask <- new_mask 273
··· 32 let ancestor_mask_by_descendant : (string, int) Hashtbl.t = 33 Hashtbl.create 64 34 35 + (** Map from descendant element name to bitmask of ancestors that cause content model violations. 36 + (These use different error messages than nesting violations.) *) 37 + let content_model_violation_mask : (string, int) Hashtbl.t = 38 + Hashtbl.create 64 39 + 40 (** Register that [ancestor] is prohibited for [descendant]. *) 41 let register_prohibited_ancestor ancestor descendant = 42 let number = special_ancestor_number ancestor in ··· 49 in 50 let new_mask = mask lor (1 lsl number) in 51 Hashtbl.replace ancestor_mask_by_descendant descendant new_mask 52 + 53 + (** Register a content model violation (phrasing-only element containing flow content). *) 54 + let register_content_model_violation ancestor descendant = 55 + register_prohibited_ancestor ancestor descendant; 56 + let number = special_ancestor_number ancestor in 57 + let mask = 58 + match Hashtbl.find_opt content_model_violation_mask descendant with 59 + | None -> 0 60 + | Some m -> m 61 + in 62 + let new_mask = mask lor (1 lsl number) in 63 + Hashtbl.replace content_model_violation_mask descendant new_mask 64 65 (** Initialize the prohibited ancestor map. *) 66 let () = ··· 130 ) interactive_elements; 131 132 (* Phrasing-only elements: cannot contain flow content like p, div, h1-h6, etc. *) 133 + (* These are content model violations, not nesting violations. *) 134 let phrasing_only = ["span"; "strong"; "em"; "b"; "i"; "u"; "s"; "small"; "mark"; 135 "abbr"; "cite"; "code"; "q"; "sub"; "sup"; "samp"; "kbd"; "var"] in 136 let flow_content = ["p"; "div"; "article"; "section"; "nav"; "aside"; "header"; "footer"; ··· 138 "ol"; "ul"; "dl"; "pre"; "blockquote"; "hr"] in 139 List.iter (fun ancestor -> 140 List.iter (fun descendant -> 141 + register_content_model_violation ancestor descendant 142 ) flow_content 143 ) phrasing_only 144 ··· 152 let map_num = special_ancestor_number "map" in 153 1 lsl map_num 154 155 + (** Transparent elements - inherit content model from parent *) 156 + let transparent_elements = ["a"; "canvas"; "video"; "audio"; "object"; "ins"; "del"; "map"] 157 + 158 (** Stack node representing an element's context. *) 159 type stack_node = { 160 ancestor_mask : int; 161 + name : string; 162 + is_transparent : bool; 163 } 164 165 (** Checker state. *) ··· 203 | _ -> 204 false 205 206 + (** Find the nearest transparent element in the ancestor stack, if any. 207 + Returns the immediate parent's name if it's transparent, otherwise None. *) 208 + let find_nearest_transparent_parent state = 209 + match state.stack with 210 + | parent :: _ when parent.is_transparent -> Some parent.name 211 + | _ -> None 212 + 213 (** Report nesting violations. *) 214 let check_nesting state name attrs collector = 215 (* Compute the prohibited ancestor mask for this element *) ··· 219 | None -> 0 220 in 221 222 + (* Get content model violation mask for this element *) 223 + let content_model_mask = 224 + match Hashtbl.find_opt content_model_violation_mask name with 225 + | Some m -> m 226 + | None -> 0 227 + in 228 + 229 (* Add interactive element restrictions if applicable *) 230 let mask = 231 if is_interactive_element name attrs then ··· 248 | "object" when has_attr attrs "usemap" -> Some "usemap" 249 | _ -> None 250 in 251 + (* Find the transparent parent (like canvas) if any *) 252 + let transparent_parent = find_nearest_transparent_parent state in 253 (* Find which ancestors are violated *) 254 Array.iteri (fun i ancestor -> 255 let bit = 1 lsl i in 256 + if (mask_hit land bit) <> 0 then begin 257 + (* Check if this is a content model violation or a nesting violation *) 258 + if (content_model_mask land bit) <> 0 then begin 259 + (* Content model violation: use "not allowed as child" format *) 260 + (* If there's a transparent parent, use that instead of the ancestor *) 261 + let parent = match transparent_parent with 262 + | Some p -> p 263 + | None -> ancestor 264 + in 265 + Message_collector.add_typed collector 266 + (Error_code.Element_not_allowed_as_child { 267 + child = name; 268 + parent 269 + }) 270 + end else 271 + (* Nesting violation: use "must not be descendant" format *) 272 + Message_collector.add_typed collector 273 + (Error_code.Element_must_not_be_descendant { 274 + element = name; 275 + attr; 276 + ancestor 277 + }) 278 + end 279 ) special_ancestors 280 end 281 end ··· 292 }) 293 | _ -> () 294 295 + (** Check for metadata-only elements appearing outside valid contexts. 296 + style element is only valid in head or in noscript (in head). *) 297 + let check_metadata_element_context state name collector = 298 + match name with 299 + | "style" -> 300 + (* style is only valid inside head or noscript *) 301 + begin match state.stack with 302 + | parent :: _ when parent.name = "head" -> () (* valid *) 303 + | parent :: _ when parent.name = "noscript" -> () (* valid in noscript in head *) 304 + | parent :: _ -> 305 + (* style inside any other element is not allowed *) 306 + Message_collector.add_typed collector 307 + (Error_code.Element_not_allowed_as_child { 308 + child = "style"; 309 + parent = parent.name 310 + }) 311 + | [] -> () (* at root level, would be caught elsewhere *) 312 + end 313 + | _ -> () 314 + 315 let start_element state ~name ~namespace ~attrs collector = 316 (* Only check HTML elements, not SVG or MathML *) 317 match namespace with ··· 320 (* Check for nesting violations *) 321 check_nesting state name attrs collector; 322 check_required_ancestors state name collector; 323 + check_metadata_element_context state name collector; 324 325 (* Update ancestor mask if this is a special ancestor *) 326 let new_mask = state.ancestor_mask in ··· 342 in 343 344 (* Push onto stack *) 345 + let is_transparent = List.mem name transparent_elements in 346 + let node = { ancestor_mask = state.ancestor_mask; name; is_transparent } in 347 state.stack <- node :: state.stack; 348 state.ancestor_mask <- new_mask 349
+52 -31
lib/html5_checker/semantic/obsolete_checker.ml
··· 242 tbl 243 244 (** Checker state *) 245 - type state = unit 246 247 - let create () = () 248 249 - let reset _state = () 250 251 - let start_element _state ~name ~namespace ~attrs collector = 252 (* Only check HTML elements (no namespace or explicit HTML namespace) *) 253 let is_html = match namespace with 254 | None -> true ··· 259 else begin 260 let name_lower = String.lowercase_ascii name in 261 262 (* Check for obsolete element *) 263 (match Hashtbl.find_opt obsolete_elements name_lower with 264 | None -> () ··· 270 List.iter (fun (attr_name, _attr_value) -> 271 let attr_lower = String.lowercase_ascii attr_name in 272 273 - (* Check specific obsolete attributes for this element *) 274 - (match Hashtbl.find_opt obsolete_attributes attr_lower with 275 - | None -> () 276 - | Some element_map -> 277 - (match Hashtbl.find_opt element_map name_lower with 278 - | None -> () 279 - | Some suggestion -> 280 - Message_collector.add_typed collector 281 - (Error_code.Obsolete_attr { element = name; attr = attr_name; suggestion = Some suggestion }))); 282 - 283 - (* Check obsolete style attributes *) 284 - (match Hashtbl.find_opt obsolete_style_attrs attr_lower with 285 - | None -> () 286 - | Some elements -> 287 - if List.mem name_lower elements then 288 - Message_collector.add_typed collector 289 - (Error_code.Obsolete_attr { element = name; attr = attr_name; suggestion = Some "Use CSS instead." })); 290 - 291 - (* Check obsolete global attributes *) 292 - (match Hashtbl.find_opt obsolete_global_attrs attr_lower with 293 - | None -> () 294 - | Some suggestion -> 295 - (* Global attributes use a different format - just "The X attribute is obsolete. Y" *) 296 Message_collector.add_error collector 297 - ~message:(Printf.sprintf "The %s attribute is obsolete. %s" (Error_code.q attr_name) suggestion) 298 - ~code:"obsolete-global-attribute" 299 ~element:name 300 ~attribute:attr_name 301 - ()) 302 ) attrs 303 end 304 305 - let end_element _state ~name:_ ~namespace:_ _collector = () 306 307 let characters _state _text _collector = () 308
··· 242 tbl 243 244 (** Checker state *) 245 + type state = { 246 + mutable in_head : bool; 247 + } 248 249 + let create () = { in_head = false } 250 251 + let reset state = state.in_head <- false 252 253 + let start_element state ~name ~namespace ~attrs collector = 254 (* Only check HTML elements (no namespace or explicit HTML namespace) *) 255 let is_html = match namespace with 256 | None -> true ··· 261 else begin 262 let name_lower = String.lowercase_ascii name in 263 264 + (* Track head context *) 265 + if name_lower = "head" then state.in_head <- true; 266 + 267 (* Check for obsolete element *) 268 (match Hashtbl.find_opt obsolete_elements name_lower with 269 | None -> () ··· 275 List.iter (fun (attr_name, _attr_value) -> 276 let attr_lower = String.lowercase_ascii attr_name in 277 278 + (* Special handling for scoped attribute on style *) 279 + if attr_lower = "scoped" && name_lower = "style" then begin 280 + (* Only report if style is in head (correct context) - otherwise the content model 281 + error from nesting_checker takes precedence *) 282 + if state.in_head then 283 Message_collector.add_error collector 284 + ~message:(Printf.sprintf "Attribute %s not allowed on element %s at this point." 285 + (Error_code.q attr_name) (Error_code.q name)) 286 + ~code:"disallowed-attribute" 287 ~element:name 288 ~attribute:attr_name 289 + () 290 + end else begin 291 + (* Check specific obsolete attributes for this element *) 292 + (match Hashtbl.find_opt obsolete_attributes attr_lower with 293 + | None -> () 294 + | Some element_map -> 295 + (match Hashtbl.find_opt element_map name_lower with 296 + | None -> () 297 + | Some suggestion -> 298 + Message_collector.add_typed collector 299 + (Error_code.Obsolete_attr { element = name; attr = attr_name; suggestion = Some suggestion }))); 300 + 301 + (* Check obsolete style attributes *) 302 + (match Hashtbl.find_opt obsolete_style_attrs attr_lower with 303 + | None -> () 304 + | Some elements -> 305 + if List.mem name_lower elements then 306 + Message_collector.add_typed collector 307 + (Error_code.Obsolete_attr { element = name; attr = attr_name; suggestion = Some "Use CSS instead." })); 308 + 309 + (* Check obsolete global attributes *) 310 + (match Hashtbl.find_opt obsolete_global_attrs attr_lower with 311 + | None -> () 312 + | Some suggestion -> 313 + (* Global attributes use a different format - just "The X attribute is obsolete. Y" *) 314 + Message_collector.add_error collector 315 + ~message:(Printf.sprintf "The %s attribute is obsolete. %s" (Error_code.q attr_name) suggestion) 316 + ~code:"obsolete-global-attribute" 317 + ~element:name 318 + ~attribute:attr_name 319 + ()) 320 + end 321 ) attrs 322 end 323 324 + let end_element state ~name ~namespace:_ _collector = 325 + let name_lower = String.lowercase_ascii name in 326 + if name_lower = "head" then state.in_head <- false 327 328 let characters _state _text _collector = () 329
+3 -5
lib/html5_checker/semantic/required_attr_checker.ml
··· 143 (* Valid values: empty string, auto, manual, hint *) 144 if value_lower <> "" && value_lower <> "auto" && value_lower <> "manual" && value_lower <> "hint" then 145 Message_collector.add_typed collector 146 - (Error_code.Bad_attr_value { 147 - element = element_name; 148 - attr = "popover"; 149 - value; 150 - reason = "Must be a valid popover state (auto, manual, or hint)." 151 }) 152 | None -> () 153
··· 143 (* Valid values: empty string, auto, manual, hint *) 144 if value_lower <> "" && value_lower <> "auto" && value_lower <> "manual" && value_lower <> "hint" then 145 Message_collector.add_typed collector 146 + (Error_code.Bad_attr_value_generic { 147 + message = Printf.sprintf "Bad value %s for attribute %s on element %s." 148 + (Error_code.q value) (Error_code.q "popover") (Error_code.q element_name) 149 }) 150 | None -> () 151
+12 -6
lib/html5_checker/specialized/aria_checker.ml
··· 673 | _ -> () 674 end; 675 676 - (* Validate explicit roles *) 677 - List.iter (fun role -> 678 - (* Check if role is valid *) 679 - if not (Hashtbl.mem valid_aria_roles role) then 680 Message_collector.add_error collector 681 ~message:(Printf.sprintf 682 "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9crole\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d." 683 - role name) 684 ~code:"bad-role" 685 ~element:name 686 ~attribute:"role" 687 - (); 688 689 (* Check if role cannot be named *) 690 if Hashtbl.mem roles_which_cannot_be_named role && has_accessible_name then 691 Message_collector.add_error collector
··· 673 | _ -> () 674 end; 675 676 + (* Validate explicit roles - report full attribute value if any role is invalid *) 677 + let has_invalid_role = List.exists (fun role -> 678 + not (Hashtbl.mem valid_aria_roles role) 679 + ) explicit_roles in 680 + if has_invalid_role then begin 681 + match role_attr with 682 + | Some role_value -> 683 Message_collector.add_error collector 684 ~message:(Printf.sprintf 685 "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9crole\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d." 686 + role_value name) 687 ~code:"bad-role" 688 ~element:name 689 ~attribute:"role" 690 + () 691 + | None -> () 692 + end; 693 694 + List.iter (fun role -> 695 (* Check if role cannot be named *) 696 if Hashtbl.mem roles_which_cannot_be_named role && has_accessible_name then 697 Message_collector.add_error collector
+54 -19
lib/html5_checker/specialized/datetime_checker.ml
··· 27 let validate_date s = 28 let pattern = Str.regexp "^\\([0-9]+\\)-\\([0-9][0-9]\\)-\\([0-9][0-9]\\)$" in 29 if not (Str.string_match pattern s 0) then 30 - (false, Some "Date must be in YYYY-MM-DD format") 31 else 32 let year_s = Str.matched_group 1 s in 33 let month_s = Str.matched_group 2 s in 34 let day_s = Str.matched_group 3 s in 35 if String.length year_s < 4 then 36 - (false, Some "Year must be at least 4 digits") 37 else 38 match (parse_int year_s, parse_int month_s, parse_int day_s) with 39 | None, _, _ | _, None, _ | _, _, None -> 40 (false, Some "Invalid year, month or day") 41 | Some year, Some month, Some day -> 42 if year < 1 then (false, Some "Year cannot be less than 1") 43 - else if month < 1 || month > 12 then (false, Some "Month out of range") 44 else if day < 1 then (false, Some "Day cannot be less than 1") 45 else 46 let max_day = max_day_for_month year month in ··· 71 let validate_time s = 72 let pattern = Str.regexp "^\\([0-9][0-9]\\):\\([0-9][0-9]\\)\\(:\\([0-9][0-9]\\)\\(\\.\\([0-9]+\\)\\)?\\)?$" in 73 if not (Str.string_match pattern s 0) then 74 - (false, Some "Time must be in HH:MM format") 75 else 76 let hour_s = Str.matched_group 1 s in 77 let minute_s = Str.matched_group 2 s in 78 match (parse_int hour_s, parse_int minute_s) with 79 | None, _ | _, None -> (false, Some "Invalid hour or minute") 80 | Some hour, Some minute -> 81 - if hour > 23 then (false, Some "Hour out of range") 82 - else if minute > 59 then (false, Some "Minute out of range") 83 else 84 let second_s = try Some (Str.matched_group 4 s) with Not_found -> None in 85 match second_s with ··· 88 match parse_int sec_s with 89 | None -> (false, Some "Invalid seconds") 90 | Some sec -> 91 - if sec > 59 then (false, Some "Second out of range") 92 else 93 (* Check milliseconds if present *) 94 let millis_s = try Some (Str.matched_group 6 s) with Not_found -> None in ··· 108 else 109 let year_s = Str.matched_group 1 s in 110 if String.length year_s < 4 then 111 - (false, Some "Year must be at least 4 digits") 112 else 113 match parse_int year_s with 114 | None -> (false, Some "Invalid year") ··· 125 let year_s = Str.matched_group 1 s in 126 let month_s = Str.matched_group 2 s in 127 if String.length year_s < 4 then 128 - (false, Some "Year must be at least 4 digits") 129 else 130 match (parse_int year_s, parse_int month_s) with 131 | None, _ | _, None -> (false, Some "Invalid year or month") ··· 143 let year_s = Str.matched_group 1 s in 144 let week_s = Str.matched_group 2 s in 145 if String.length year_s < 4 then 146 - (false, Some "Year must be at least 4 digits") 147 else 148 match (parse_int year_s, parse_int week_s) with 149 | None, _ | _, None -> (false, Some "Invalid year or week") ··· 222 (false, "+") 223 in 224 if not matched then 225 - TzError "Invalid timezone offset" 226 else 227 let hour_s = Str.matched_group 2 s in 228 let minute_s = Str.matched_group 3 s in 229 match (parse_int hour_s, parse_int minute_s) with 230 | None, _ | _, None -> TzError "Invalid timezone" 231 | Some hour, Some minute -> 232 - if hour > 23 || minute > 59 then TzError "Timezone offset out of range" 233 else begin 234 (* Check for unusual but valid offsets *) 235 let unusual_range = ··· 267 let time_and_tz = String.sub s (pos + 1) (String.length s - pos - 1) in 268 (* Validate date *) 269 match validate_date date_part with 270 - | (false, reason) -> 271 - DtError (match reason with Some r -> r | None -> "Invalid date") 272 | (true, _) -> 273 let date_old = has_old_year date_part in 274 (* Check if ends with Z *) 275 if String.length time_and_tz > 0 && time_and_tz.[String.length time_and_tz - 1] = 'Z' then begin 276 let time_part = String.sub time_and_tz 0 (String.length time_and_tz - 1) in 277 match validate_time time_part with 278 - | (false, _) -> DtError "The literal did not satisfy the datetime with timezone format" 279 | (true, _) -> 280 if date_old then DtWarning "Year may be mistyped" 281 else DtOk ··· 296 let time_part = String.sub time_and_tz 0 tp in 297 let tz_part = String.sub time_and_tz tp (String.length time_and_tz - tp) in 298 match validate_time time_part with 299 - | (false, _) -> DtError "The literal did not satisfy the datetime with timezone format" 300 | (true, _) -> 301 match validate_timezone_offset tz_part with 302 - | TzError _ -> DtError "The literal did not satisfy the datetime with timezone format" 303 | TzWarning w -> 304 DtWarning w 305 | TzOk -> ··· 400 | Some e -> Printf.sprintf "Bad date: %s." e 401 | None -> "Bad date: The literal did not satisfy the date format." 402 in 403 - Error (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: %s %s" 404 - value attr_name element_name tz_msg date_msg) 405 end 406 407 (** Checker state *)
··· 27 let validate_date s = 28 let pattern = Str.regexp "^\\([0-9]+\\)-\\([0-9][0-9]\\)-\\([0-9][0-9]\\)$" in 29 if not (Str.string_match pattern s 0) then 30 + (false, Some "The literal did not satisfy the date format") 31 else 32 let year_s = Str.matched_group 1 s in 33 let month_s = Str.matched_group 2 s in 34 let day_s = Str.matched_group 3 s in 35 if String.length year_s < 4 then 36 + (false, Some "The literal did not satisfy the date format") 37 else 38 match (parse_int year_s, parse_int month_s, parse_int day_s) with 39 | None, _, _ | _, None, _ | _, _, None -> 40 (false, Some "Invalid year, month or day") 41 | Some year, Some month, Some day -> 42 if year < 1 then (false, Some "Year cannot be less than 1") 43 + else if month = 0 then (false, Some "Month cannot be less than 1") 44 + else if month > 12 then (false, Some "Month cannot be greater than 12") 45 else if day < 1 then (false, Some "Day cannot be less than 1") 46 else 47 let max_day = max_day_for_month year month in ··· 72 let validate_time s = 73 let pattern = Str.regexp "^\\([0-9][0-9]\\):\\([0-9][0-9]\\)\\(:\\([0-9][0-9]\\)\\(\\.\\([0-9]+\\)\\)?\\)?$" in 74 if not (Str.string_match pattern s 0) then 75 + (false, None) (* Format error - return None so caller uses generic message *) 76 else 77 let hour_s = Str.matched_group 1 s in 78 let minute_s = Str.matched_group 2 s in 79 match (parse_int hour_s, parse_int minute_s) with 80 | None, _ | _, None -> (false, Some "Invalid hour or minute") 81 | Some hour, Some minute -> 82 + if hour > 23 then (false, Some "Hour cannot be greater than 23") 83 + else if minute > 59 then (false, Some "Minute cannot be greater than 59") 84 else 85 let second_s = try Some (Str.matched_group 4 s) with Not_found -> None in 86 match second_s with ··· 89 match parse_int sec_s with 90 | None -> (false, Some "Invalid seconds") 91 | Some sec -> 92 + if sec > 59 then (false, Some "Second cannot be greater than 59") 93 else 94 (* Check milliseconds if present *) 95 let millis_s = try Some (Str.matched_group 6 s) with Not_found -> None in ··· 109 else 110 let year_s = Str.matched_group 1 s in 111 if String.length year_s < 4 then 112 + (false, Some "The literal did not satisfy the date format") 113 else 114 match parse_int year_s with 115 | None -> (false, Some "Invalid year") ··· 126 let year_s = Str.matched_group 1 s in 127 let month_s = Str.matched_group 2 s in 128 if String.length year_s < 4 then 129 + (false, Some "The literal did not satisfy the date format") 130 else 131 match (parse_int year_s, parse_int month_s) with 132 | None, _ | _, None -> (false, Some "Invalid year or month") ··· 144 let year_s = Str.matched_group 1 s in 145 let week_s = Str.matched_group 2 s in 146 if String.length year_s < 4 then 147 + (false, Some "The literal did not satisfy the date format") 148 else 149 match (parse_int year_s, parse_int week_s) with 150 | None, _ | _, None -> (false, Some "Invalid year or week") ··· 223 (false, "+") 224 in 225 if not matched then 226 + TzError "The literal did not satisfy the datetime with timezone format" 227 else 228 let hour_s = Str.matched_group 2 s in 229 let minute_s = Str.matched_group 3 s in 230 match (parse_int hour_s, parse_int minute_s) with 231 | None, _ | _, None -> TzError "Invalid timezone" 232 | Some hour, Some minute -> 233 + if hour > 23 then TzError "Hours out of range in time zone designator" 234 + else if minute > 59 then TzError "Minutes out of range in time zone designator" 235 else begin 236 (* Check for unusual but valid offsets *) 237 let unusual_range = ··· 269 let time_and_tz = String.sub s (pos + 1) (String.length s - pos - 1) in 270 (* Validate date *) 271 match validate_date date_part with 272 + | (false, _) -> 273 + DtError "The literal did not satisfy the datetime with timezone format" 274 | (true, _) -> 275 let date_old = has_old_year date_part in 276 (* Check if ends with Z *) 277 if String.length time_and_tz > 0 && time_and_tz.[String.length time_and_tz - 1] = 'Z' then begin 278 let time_part = String.sub time_and_tz 0 (String.length time_and_tz - 1) in 279 match validate_time time_part with 280 + | (false, Some reason) -> DtError reason 281 + | (false, None) -> DtError "The literal did not satisfy the datetime with timezone format" 282 | (true, _) -> 283 if date_old then DtWarning "Year may be mistyped" 284 else DtOk ··· 299 let time_part = String.sub time_and_tz 0 tp in 300 let tz_part = String.sub time_and_tz tp (String.length time_and_tz - tp) in 301 match validate_time time_part with 302 + | (false, Some reason) -> DtError reason 303 + | (false, None) -> DtError "The literal did not satisfy the datetime with timezone format" 304 | (true, _) -> 305 match validate_timezone_offset tz_part with 306 + | TzError e -> DtError e 307 | TzWarning w -> 308 DtWarning w 309 | TzOk -> ··· 404 | Some e -> Printf.sprintf "Bad date: %s." e 405 | None -> "Bad date: The literal did not satisfy the date format." 406 in 407 + (* Order depends on error type. The Nu validator has specific patterns: 408 + - Time hour/minute errors (not timezone) -> datetime first 409 + - Timezone hours error -> datetime first 410 + - Timezone minutes error -> date first 411 + - Time fraction error -> date first 412 + - Date "less than" error -> date first 413 + - Date "greater than" error -> datetime first 414 + - Generic errors both sides -> datetime first *) 415 + let is_generic_tz = tz_error = "The literal did not satisfy the datetime with timezone format" in 416 + let is_tz_hours_error = String.length tz_error >= 5 && String.sub tz_error 0 5 = "Hours" in 417 + let is_tz_minutes_error = String.length tz_error >= 7 && String.sub tz_error 0 7 = "Minutes" in 418 + let is_time_minute_or_hour_error = 419 + (try ignore (Str.search_forward (Str.regexp "Minute cannot\\|Hour cannot") tz_error 0); true with Not_found -> false) 420 + in 421 + let is_fraction_error = try ignore (Str.search_forward (Str.regexp "fraction") tz_error 0); true with Not_found -> false in 422 + let is_month_less_than_error = match date_error with 423 + | Some e -> (try ignore (Str.search_forward (Str.regexp "Month cannot be less than") e 0); true with Not_found -> false) 424 + | None -> false 425 + in 426 + (* Datetime first for: generic tz, tz hours error, time minute/hour errors, year errors 427 + Date first for: "Month cannot be less than" date error, tz minutes error, fraction error *) 428 + if is_month_less_than_error then 429 + Error (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: %s %s" 430 + value attr_name element_name date_msg tz_msg) 431 + else if is_tz_minutes_error || is_fraction_error then 432 + Error (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: %s %s" 433 + value attr_name element_name date_msg tz_msg) 434 + else if is_tz_hours_error || is_time_minute_or_hour_error || is_generic_tz then 435 + Error (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: %s %s" 436 + value attr_name element_name tz_msg date_msg) 437 + else 438 + Error (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: %s %s" 439 + value attr_name element_name tz_msg date_msg) 440 end 441 442 (** Checker state *)
+58 -27
lib/html5_checker/specialized/dl_checker.ml
··· 8 mutable contains_div : bool; 9 mutable contains_dt_dd : bool; 10 mutable dd_before_dt_error_reported : bool; (* Track if we've reported dd-before-dt error *) 11 } 12 13 type div_context = { ··· 72 else begin 73 match name_lower with 74 | "template" -> 75 - state.in_template <- state.in_template + 1 76 77 | "dl" when state.in_template = 0 -> 78 - (* Check for nested dl - only error if direct child (not inside dt/dd) *) 79 - begin match current_dl state with 80 - | Some _ when state.in_dt_dd = 0 && state.div_in_dl_stack = [] -> 81 Message_collector.add_error collector 82 - ~message:"Element \xe2\x80\x9cdl\xe2\x80\x9d not allowed as child of element \xe2\x80\x9cdl\xe2\x80\x9d in this context. (Suppressing further errors from this subtree.)" 83 ~code:"disallowed-child" 84 ~element:"dl" () 85 - | _ -> () 86 end; 87 let ctx = { 88 has_dt = false; ··· 91 contains_div = false; 92 contains_dt_dd = false; 93 dd_before_dt_error_reported = false; 94 } in 95 state.dl_stack <- ctx :: state.dl_stack 96 ··· 131 state.in_dt_dd <- state.in_dt_dd + 1; 132 begin match current_div state with 133 | Some div_ctx -> 134 - div_ctx.has_dt <- true; 135 - (* If we've seen dd, this dt starts a new group *) 136 if div_ctx.in_dd_part then begin 137 div_ctx.group_count <- div_ctx.group_count + 1; 138 div_ctx.in_dd_part <- false 139 - end 140 | None -> 141 match current_dl state with 142 | Some dl_ctx -> ··· 236 ~message:"Element \xe2\x80\x9cdl\xe2\x80\x9d is missing a required child element." 237 ~code:"missing-required-child" 238 ~element:"dl" () 239 - else if not ctx.has_dd then 240 - Message_collector.add_error collector 241 - ~message:"Element \xe2\x80\x9cdl\xe2\x80\x9d is missing required child element \xe2\x80\x9cdd\xe2\x80\x9d." 242 - ~code:"missing-required-child" 243 - ~element:"dl" () 244 else if ctx.last_was_dt then 245 - (* Ended with dt, missing dd *) 246 Message_collector.add_error collector 247 ~message:"Element \xe2\x80\x9cdl\xe2\x80\x9d is missing required child element \xe2\x80\x9cdd\xe2\x80\x9d." 248 ~code:"missing-required-child" ··· 274 ~message:"Element \xe2\x80\x9cdiv\xe2\x80\x9d is missing required child element \xe2\x80\x9cdd\xe2\x80\x9d." 275 ~code:"missing-required-child" 276 ~element:"div" () 277 - else if div_ctx.group_count > 1 then 278 - (* Multiple name-value groups in a single div is not allowed *) 279 - Message_collector.add_error collector 280 - ~message:"A child \xe2\x80\x9cdiv\xe2\x80\x9d element of a \xe2\x80\x9cdl\xe2\x80\x9d element must contain only one name-value group." 281 - ~code:"multiple-groups-in-div" 282 - ~element:"div" () 283 | [] -> () 284 end 285 ··· 292 else begin 293 let trimmed = String.trim text in 294 if trimmed <> "" then begin 295 - (* Check for text directly in dl *) 296 - match current_dl state with 297 - | Some _ when state.div_in_dl_stack = [] -> 298 Message_collector.add_error collector 299 - ~message:"Text not allowed in element \xe2\x80\x9cdl\xe2\x80\x9d in this context." 300 ~code:"text-not-allowed" 301 - ~element:"dl" () 302 - | _ -> () 303 end 304 end 305
··· 8 mutable contains_div : bool; 9 mutable contains_dt_dd : bool; 10 mutable dd_before_dt_error_reported : bool; (* Track if we've reported dd-before-dt error *) 11 + mutable has_template : bool; (* Track if template element was seen inside dl *) 12 } 13 14 type div_context = { ··· 73 else begin 74 match name_lower with 75 | "template" -> 76 + state.in_template <- state.in_template + 1; 77 + (* Track if template is direct child of dl *) 78 + begin match current_dl state with 79 + | Some dl_ctx when state.div_in_dl_stack = [] -> 80 + dl_ctx.has_template <- true 81 + | _ -> () 82 + end 83 84 | "dl" when state.in_template = 0 -> 85 + (* Check for nested dl - error if direct child of dl OR inside div-in-dl *) 86 + begin match current_div state with 87 + | Some _ -> 88 + (* dl inside div-in-dl is not allowed *) 89 Message_collector.add_error collector 90 + ~message:"Element \xe2\x80\x9cdl\xe2\x80\x9d not allowed as child of element \xe2\x80\x9cdiv\xe2\x80\x9d in this context. (Suppressing further errors from this subtree.)" 91 ~code:"disallowed-child" 92 ~element:"dl" () 93 + | None -> 94 + match current_dl state with 95 + | Some _ when state.in_dt_dd = 0 -> 96 + Message_collector.add_error collector 97 + ~message:"Element \xe2\x80\x9cdl\xe2\x80\x9d not allowed as child of element \xe2\x80\x9cdl\xe2\x80\x9d in this context. (Suppressing further errors from this subtree.)" 98 + ~code:"disallowed-child" 99 + ~element:"dl" () 100 + | _ -> () 101 end; 102 let ctx = { 103 has_dt = false; ··· 106 contains_div = false; 107 contains_dt_dd = false; 108 dd_before_dt_error_reported = false; 109 + has_template = false; 110 } in 111 state.dl_stack <- ctx :: state.dl_stack 112 ··· 147 state.in_dt_dd <- state.in_dt_dd + 1; 148 begin match current_div state with 149 | Some div_ctx -> 150 + (* If we've already seen dd, this dt starts a new group - which is not allowed *) 151 if div_ctx.in_dd_part then begin 152 + Message_collector.add_error collector 153 + ~message:"Element \xe2\x80\x9cdt\xe2\x80\x9d not allowed as child of element \xe2\x80\x9cdiv\xe2\x80\x9d in this context. (Suppressing further errors from this subtree.)" 154 + ~code:"disallowed-child" 155 + ~element:"dt" (); 156 div_ctx.group_count <- div_ctx.group_count + 1; 157 div_ctx.in_dd_part <- false 158 + end; 159 + div_ctx.has_dt <- true 160 | None -> 161 match current_dl state with 162 | Some dl_ctx -> ··· 256 ~message:"Element \xe2\x80\x9cdl\xe2\x80\x9d is missing a required child element." 257 ~code:"missing-required-child" 258 ~element:"dl" () 259 + else if not ctx.has_dd then begin 260 + (* If template is present in dl, use list format; otherwise use simple format *) 261 + if ctx.has_template then 262 + Message_collector.add_error collector 263 + ~message:"Element \xe2\x80\x9cdl\xe2\x80\x9d is missing one or more of the following child elements: [dd]." 264 + ~code:"missing-required-child" 265 + ~element:"dl" () 266 + else 267 + Message_collector.add_error collector 268 + ~message:"Element \xe2\x80\x9cdl\xe2\x80\x9d is missing required child element \xe2\x80\x9cdd\xe2\x80\x9d." 269 + ~code:"missing-required-child" 270 + ~element:"dl" () 271 + end 272 else if ctx.last_was_dt then 273 + (* Ended with dt, missing dd for the last group *) 274 Message_collector.add_error collector 275 ~message:"Element \xe2\x80\x9cdl\xe2\x80\x9d is missing required child element \xe2\x80\x9cdd\xe2\x80\x9d." 276 ~code:"missing-required-child" ··· 302 ~message:"Element \xe2\x80\x9cdiv\xe2\x80\x9d is missing required child element \xe2\x80\x9cdd\xe2\x80\x9d." 303 ~code:"missing-required-child" 304 ~element:"div" () 305 + (* Multiple groups error is now reported inline when dt appears after dd *) 306 | [] -> () 307 end 308 ··· 315 else begin 316 let trimmed = String.trim text in 317 if trimmed <> "" then begin 318 + (* Check for text directly in dl or div-in-dl *) 319 + match current_div state with 320 + | Some _ -> 321 + (* Text in div within dl is not allowed *) 322 Message_collector.add_error collector 323 + ~message:"Text not allowed in element \xe2\x80\x9cdiv\xe2\x80\x9d in this context." 324 ~code:"text-not-allowed" 325 + ~element:"div" () 326 + | None -> 327 + match current_dl state with 328 + | Some _ -> 329 + Message_collector.add_error collector 330 + ~message:"Text not allowed in element \xe2\x80\x9cdl\xe2\x80\x9d in this context." 331 + ~code:"text-not-allowed" 332 + ~element:"dl" () 333 + | None -> () 334 end 335 end 336
+13 -5
lib/html5_checker/specialized/microdata_checker.ml
··· 68 String.contains s ':' 69 70 (** Validate that a URL is a valid absolute URL for itemtype/itemid. 71 - Uses the comprehensive URL validation from Url_checker. *) 72 - let validate_microdata_url url element attr_name = 73 let url_trimmed = String.trim url in 74 if String.length url_trimmed = 0 then 75 Some (Printf.sprintf 76 "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad absolute URL: Must be non-empty." 77 - url attr_name element) 78 else 79 (* First check if it has a scheme (required for absolute URL) *) 80 match Url_checker.extract_scheme url_trimmed with 81 | None -> 82 Some (Printf.sprintf 83 "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad absolute URL: The string \xe2\x80\x9c%s\xe2\x80\x9d is not an absolute URL." 84 - url attr_name element url) 85 | Some _ -> 86 (* Has a scheme - do comprehensive URL validation *) 87 match Url_checker.validate_url url element attr_name with ··· 89 | Some error_msg -> 90 (* Replace "Bad URL:" with "Bad absolute URL:" for microdata *) 91 let error_msg = Str.global_replace (Str.regexp "Bad URL:") "Bad absolute URL:" error_msg in 92 Some error_msg 93 94 (** Check if itemprop value is valid. *) ··· 201 () 202 else 203 List.iter (fun url -> 204 - match validate_microdata_url url element "itemtype" with 205 | None -> () 206 | Some error_msg -> 207 Message_collector.add_error collector
··· 68 String.contains s ':' 69 70 (** Validate that a URL is a valid absolute URL for itemtype/itemid. 71 + Uses the comprehensive URL validation from Url_checker. 72 + original_value is the full attribute value (for error messages when split by whitespace) *) 73 + let validate_microdata_url url element attr_name original_value = 74 let url_trimmed = String.trim url in 75 if String.length url_trimmed = 0 then 76 Some (Printf.sprintf 77 "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad absolute URL: Must be non-empty." 78 + original_value attr_name element) 79 else 80 (* First check if it has a scheme (required for absolute URL) *) 81 match Url_checker.extract_scheme url_trimmed with 82 | None -> 83 Some (Printf.sprintf 84 "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad absolute URL: The string \xe2\x80\x9c%s\xe2\x80\x9d is not an absolute URL." 85 + original_value attr_name element url) 86 | Some _ -> 87 (* Has a scheme - do comprehensive URL validation *) 88 match Url_checker.validate_url url element attr_name with ··· 90 | Some error_msg -> 91 (* Replace "Bad URL:" with "Bad absolute URL:" for microdata *) 92 let error_msg = Str.global_replace (Str.regexp "Bad URL:") "Bad absolute URL:" error_msg in 93 + (* Also replace the URL value with the original value in case they differ *) 94 + (* Escape backslashes in replacement string for Str.global_replace *) 95 + let escaped_original = Str.global_replace (Str.regexp "\\\\") "\\\\\\\\" original_value in 96 + let error_msg = Str.global_replace 97 + (Str.regexp_string (Printf.sprintf "\xe2\x80\x9c%s\xe2\x80\x9d for attribute" url)) 98 + (Printf.sprintf "\xe2\x80\x9c%s\xe2\x80\x9d for attribute" escaped_original) 99 + error_msg in 100 Some error_msg 101 102 (** Check if itemprop value is valid. *) ··· 209 () 210 else 211 List.iter (fun url -> 212 + match validate_microdata_url url element "itemtype" itemtype with 213 | None -> () 214 | Some error_msg -> 215 Message_collector.add_error collector
+60 -13
lib/html5_checker/specialized/picture_checker.ml
··· 34 mutable has_source_after_img : bool; 35 mutable has_always_matching_source : bool; (* source without media/type *) 36 mutable source_after_always_matching : bool; (* source after always-matching source *) 37 mutable parent_stack : string list; (* track parent elements *) 38 } 39 ··· 46 has_source_after_img = false; 47 has_always_matching_source = false; 48 source_after_always_matching = false; 49 parent_stack = []; 50 } 51 ··· 58 state.parent_stack <- []; 59 state.has_source_after_img <- false; 60 state.has_always_matching_source <- false; 61 - state.source_after_always_matching <- false 62 63 (** Check if an attribute list contains a specific attribute. *) 64 let has_attr name attrs = ··· 151 if String.lowercase_ascii attr_name = "media" then Some v else None 152 ) attrs in 153 let has_type = has_attr "type" attrs in 154 let is_always_matching = match media_value with 155 | None -> not has_type (* no media, check if no type either *) 156 | Some v -> 157 let trimmed = String.trim v in 158 trimmed = "" || String.lowercase_ascii trimmed = "all" 159 in 160 - if is_always_matching then 161 - state.has_always_matching_source <- true 162 163 | "img" when state.in_picture && state.picture_depth = 1 -> 164 check_img_attrs attrs collector; ··· 170 if img_count > 1 then 171 report_disallowed_child "picture" "img" collector; 172 (* Check if always-matching source is followed by img with srcset *) 173 - if state.has_always_matching_source && has_attr "srcset" attrs then 174 - Message_collector.add_error collector 175 - ~message:"A \xe2\x80\x9csource\xe2\x80\x9d element that has a following sibling \xe2\x80\x9csource\xe2\x80\x9d element or \xe2\x80\x9cimg\xe2\x80\x9d element with a \xe2\x80\x9csrcset\xe2\x80\x9d attribute must have a \xe2\x80\x9cmedia\xe2\x80\x9d attribute and/or \xe2\x80\x9ctype\xe2\x80\x9d attribute." 176 - ~code:"always-matching-source-followed-by-srcset" 177 - ~element:"source" () 178 179 | "script" when state.in_picture && state.picture_depth = 1 -> 180 state.children_in_picture <- "script" :: state.children_in_picture ··· 216 if state.has_source_after_img then 217 report_disallowed_child "picture" "source" collector; 218 (* Check for source after always-matching source *) 219 - if state.source_after_always_matching then 220 - Message_collector.add_error collector 221 - ~message:"A \xe2\x80\x9csource\xe2\x80\x9d element that matches all media types cannot be followed by another \xe2\x80\x9csource\xe2\x80\x9d element." 222 - ~code:"always-matching-source" 223 - ~element:"source" (); 224 225 state.in_picture <- false 226 end;
··· 34 mutable has_source_after_img : bool; 35 mutable has_always_matching_source : bool; (* source without media/type *) 36 mutable source_after_always_matching : bool; (* source after always-matching source *) 37 + mutable always_matching_is_media_all : bool; (* true if caused by media="all" *) 38 + mutable always_matching_is_media_empty : bool; (* true if caused by media="" or whitespace *) 39 mutable parent_stack : string list; (* track parent elements *) 40 } 41 ··· 48 has_source_after_img = false; 49 has_always_matching_source = false; 50 source_after_always_matching = false; 51 + always_matching_is_media_all = false; 52 + always_matching_is_media_empty = false; 53 parent_stack = []; 54 } 55 ··· 62 state.parent_stack <- []; 63 state.has_source_after_img <- false; 64 state.has_always_matching_source <- false; 65 + state.source_after_always_matching <- false; 66 + state.always_matching_is_media_all <- false; 67 + state.always_matching_is_media_empty <- false 68 69 (** Check if an attribute list contains a specific attribute. *) 70 let has_attr name attrs = ··· 157 if String.lowercase_ascii attr_name = "media" then Some v else None 158 ) attrs in 159 let has_type = has_attr "type" attrs in 160 + let is_media_all = match media_value with 161 + | Some v -> String.lowercase_ascii (String.trim v) = "all" 162 + | None -> false 163 + in 164 + let is_media_empty = match media_value with 165 + | Some v -> String.trim v = "" 166 + | None -> false 167 + in 168 let is_always_matching = match media_value with 169 | None -> not has_type (* no media, check if no type either *) 170 | Some v -> 171 let trimmed = String.trim v in 172 trimmed = "" || String.lowercase_ascii trimmed = "all" 173 in 174 + if is_always_matching then begin 175 + state.has_always_matching_source <- true; 176 + if is_media_all then 177 + state.always_matching_is_media_all <- true 178 + else if is_media_empty then 179 + state.always_matching_is_media_empty <- true 180 + end 181 182 | "img" when state.in_picture && state.picture_depth = 1 -> 183 check_img_attrs attrs collector; ··· 189 if img_count > 1 then 190 report_disallowed_child "picture" "img" collector; 191 (* Check if always-matching source is followed by img with srcset *) 192 + if state.has_always_matching_source && has_attr "srcset" attrs then begin 193 + if state.always_matching_is_media_all then 194 + Message_collector.add_error collector 195 + ~message:"Value of \xe2\x80\x9cmedia\xe2\x80\x9d attribute here must not be \xe2\x80\x9call\xe2\x80\x9d." 196 + ~code:"media-all-not-allowed" 197 + ~element:"source" 198 + ~attribute:"media" () 199 + else if state.always_matching_is_media_empty then 200 + Message_collector.add_error collector 201 + ~message:"Value of \xe2\x80\x9cmedia\xe2\x80\x9d attribute here must not be empty." 202 + ~code:"media-empty-not-allowed" 203 + ~element:"source" 204 + ~attribute:"media" () 205 + else 206 + Message_collector.add_error collector 207 + ~message:"A \xe2\x80\x9csource\xe2\x80\x9d element that has a following sibling \xe2\x80\x9csource\xe2\x80\x9d element or \xe2\x80\x9cimg\xe2\x80\x9d element with a \xe2\x80\x9csrcset\xe2\x80\x9d attribute must have a \xe2\x80\x9cmedia\xe2\x80\x9d attribute and/or \xe2\x80\x9ctype\xe2\x80\x9d attribute." 208 + ~code:"always-matching-source-followed-by-srcset" 209 + ~element:"source" () 210 + end 211 212 | "script" when state.in_picture && state.picture_depth = 1 -> 213 state.children_in_picture <- "script" :: state.children_in_picture ··· 249 if state.has_source_after_img then 250 report_disallowed_child "picture" "source" collector; 251 (* Check for source after always-matching source *) 252 + if state.source_after_always_matching then begin 253 + if state.always_matching_is_media_all then 254 + Message_collector.add_error collector 255 + ~message:"Value of \xe2\x80\x9cmedia\xe2\x80\x9d attribute here must not be \xe2\x80\x9call\xe2\x80\x9d." 256 + ~code:"media-all-not-allowed" 257 + ~element:"source" 258 + ~attribute:"media" () 259 + else if state.always_matching_is_media_empty then 260 + Message_collector.add_error collector 261 + ~message:"Value of \xe2\x80\x9cmedia\xe2\x80\x9d attribute here must not be empty." 262 + ~code:"media-empty-not-allowed" 263 + ~element:"source" 264 + ~attribute:"media" () 265 + else 266 + Message_collector.add_error collector 267 + ~message:"A \xe2\x80\x9csource\xe2\x80\x9d element that has a following sibling \xe2\x80\x9csource\xe2\x80\x9d element or \xe2\x80\x9cimg\xe2\x80\x9d element with a \xe2\x80\x9csrcset\xe2\x80\x9d attribute must have a \xe2\x80\x9cmedia\xe2\x80\x9d attribute and/or \xe2\x80\x9ctype\xe2\x80\x9d attribute." 268 + ~code:"always-matching-source" 269 + ~element:"source" () 270 + end; 271 272 state.in_picture <- false 273 end;
+508 -136
lib/html5_checker/specialized/srcset_sizes_checker.ml
··· 54 Buffer.contents buf 55 56 (** Check if a size value has a valid CSS length unit and non-negative value *) 57 - type size_check_result = Valid | InvalidUnit | NegativeValue | CssCommentInside | BadScientificNotation 58 59 (** Check if CSS comment appears in an invalid position: 60 - Between sign and number (+/**/50vw) 61 - Between number and unit (50/**/vw) 62 Trailing comments (50vw/**/) are valid. *) 63 - let has_invalid_css_comment s = 64 let len = String.length s in 65 (* Find comment position *) 66 let rec find_comment i = ··· 69 else find_comment (i + 1) 70 in 71 match find_comment 0 with 72 - | None -> false 73 | Some comment_pos -> 74 let before = String.sub s 0 comment_pos in 75 let trimmed_before = String.trim before in 76 - if String.length trimmed_before = 0 then false (* Leading comment is OK *) 77 else begin 78 (* Find end of comment *) 79 let rec find_end i = ··· 84 let end_pos = find_end (comment_pos + 2) in 85 let after = if end_pos < len then String.sub s end_pos (len - end_pos) else "" in 86 let trimmed_after = String.trim (strip_css_comments after) in 87 - if trimmed_after = "" then false (* Trailing comment is OK *) 88 else begin 89 (* Comment is in the middle - check if it breaks a number/unit combo *) 90 let last = trimmed_before.[String.length trimmed_before - 1] in 91 - (* Invalid if comment appears after +/- or after a digit (before more non-whitespace) *) 92 - (last >= '0' && last <= '9') || last = '+' || last = '-' || last = '.' 93 end 94 end 95 96 (** Check if scientific notation has invalid exponent (like 1e+1.5 - decimal in exponent) *) 97 let has_invalid_scientific_notation s = ··· 109 in 110 String.contains after_sign '.' 111 112 let check_size_value size_value = 113 let trimmed = String.trim size_value in 114 - if trimmed = "" then InvalidUnit 115 - (* Check for CSS comments inside numbers - this is invalid *) 116 - else if has_invalid_css_comment trimmed then CssCommentInside 117 else begin 118 (* Strip valid leading/trailing CSS comments for further checks *) 119 let value_no_comments = String.trim (strip_css_comments trimmed) in 120 (* Check for invalid scientific notation like 1e+1.5px *) 121 if has_invalid_scientific_notation value_no_comments then BadScientificNotation 122 (* "auto" is only valid with lazy loading, which requires checking the element context. 123 For general validation, treat "auto" alone as invalid in sizes. *) 124 - else if String.lowercase_ascii value_no_comments = "auto" then InvalidUnit 125 - else if value_no_comments = "" then InvalidUnit 126 else begin 127 let lower = String.lowercase_ascii value_no_comments in 128 - (* Check for invalid units first *) 129 - let has_invalid = List.exists (fun unit -> 130 - let len = String.length unit in 131 - String.length lower > len && 132 - String.sub lower (String.length lower - len) len = unit 133 - ) invalid_size_units in 134 - if has_invalid then InvalidUnit 135 else begin 136 - (* Check for valid CSS length units *) 137 - let has_valid_unit = List.exists (fun unit -> 138 - let len = String.length unit in 139 - String.length lower > len && 140 - String.sub lower (String.length lower - len) len = unit 141 - ) valid_length_units in 142 - if has_valid_unit then begin 143 - (* Check if it's negative (starts with - but not -0) *) 144 - if String.length value_no_comments > 0 && value_no_comments.[0] = '-' then begin 145 - (* Check if it's -0 which is valid *) 146 - let after_minus = String.sub value_no_comments 1 (String.length value_no_comments - 1) in 147 - try 148 - let num_str = Str.global_replace (Str.regexp "[a-zA-Z]+$") "" after_minus in 149 - let f = float_of_string num_str in 150 - if f = 0.0 then Valid else NegativeValue 151 - with _ -> NegativeValue 152 - end else 153 - Valid 154 - end 155 - (* Could be calc() or other CSS functions - allow those *) 156 - else if String.contains value_no_comments '(' then Valid 157 else begin 158 - (* Check if it's a zero value (0, -0, +0) - these are valid without units *) 159 - let stripped = 160 - let s = value_no_comments in 161 - let s = if String.length s > 0 && (s.[0] = '+' || s.[0] = '-') then String.sub s 1 (String.length s - 1) else s in 162 - s 163 - in 164 - (* Check if it's zero or a numeric value starting with 0 *) 165 - try 166 - let f = float_of_string stripped in 167 - if f = 0.0 then Valid else InvalidUnit 168 - with _ -> InvalidUnit 169 end 170 end 171 end ··· 174 let has_valid_size_unit size_value = 175 match check_size_value size_value with 176 | Valid -> true 177 - | InvalidUnit | NegativeValue | CssCommentInside | BadScientificNotation -> false 178 179 (** Check if a sizes entry has a media condition (starts with '(') *) 180 let has_media_condition entry = ··· 236 if not (has_media_condition trimmed) then 237 trimmed 238 else begin 239 - (* Find matching closing paren, then get the size value after it *) 240 let len = String.length trimmed in 241 - let rec find_close_paren i depth = 242 if i >= len then len 243 - else match trimmed.[i] with 244 - | '(' -> find_close_paren (i + 1) (depth + 1) 245 - | ')' -> if depth = 1 then i + 1 else find_close_paren (i + 1) (depth - 1) 246 - | _ -> find_close_paren (i + 1) depth 247 in 248 - let after_paren = find_close_paren 0 0 in 249 - if after_paren >= len then "" 250 - else String.trim (String.sub trimmed after_paren (len - after_paren)) 251 end 252 253 (** Validate sizes attribute value *) ··· 275 (* Check for trailing comma *) 276 let last_entry = String.trim (List.nth entries (List.length entries - 1)) in 277 if List.length entries > 1 && last_entry = "" then begin 278 Message_collector.add_error collector 279 - ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad source size list: Ends with trailing comma." value element_name) 280 ~code:"bad-sizes-value" 281 ~element:element_name ~attribute:"sizes" (); 282 false ··· 285 286 (* Check for default-first pattern: unconditional value before conditional ones *) 287 let non_empty_entries = List.filter (fun e -> String.trim e <> "") entries in 288 - if List.length non_empty_entries > 1 then begin 289 - let first = List.hd non_empty_entries in 290 - let rest = List.tl non_empty_entries in 291 (* If first entry has no media condition but later ones do, that's invalid *) 292 if not (has_media_condition first) && List.exists has_media_condition rest then begin 293 Message_collector.add_error collector 294 - ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad source size list: Default size must be last." value element_name) 295 ~code:"bad-sizes-value" 296 ~element:element_name ~attribute:"sizes" (); 297 valid := false 298 end; 299 - (* Check for multiple consecutive defaults (entries without media conditions) *) 300 - let defaults_without_media = List.filter (fun e -> not (has_media_condition e)) non_empty_entries in 301 - if List.length defaults_without_media > 1 then begin 302 - Message_collector.add_error collector 303 - ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad source size list: Multiple source sizes without media conditions." value element_name) 304 - ~code:"bad-sizes-value" 305 - ~element:element_name ~attribute:"sizes" (); 306 - valid := false 307 end 308 end; 309 310 (* Validate each entry's media condition and size value *) 311 - List.iter (fun entry -> 312 let trimmed = String.trim entry in 313 if trimmed <> "" then begin 314 (* Check for invalid media condition *) 315 (match has_invalid_media_condition trimmed with 316 | Some err_msg -> 317 Message_collector.add_error collector 318 - ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad source size list: %s." value element_name err_msg) 319 ~code:"bad-sizes-value" 320 ~element:element_name ~attribute:"sizes" (); 321 valid := false ··· 323 324 let size_val = extract_size_value trimmed in 325 if size_val <> "" then begin 326 - match check_size_value size_val with 327 | Valid -> () 328 | NegativeValue -> 329 Message_collector.add_error collector 330 - ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Source size value cannot be negative." value element_name) 331 ~code:"bad-sizes-value" 332 ~element:element_name ~attribute:"sizes" (); 333 valid := false 334 - | CssCommentInside -> 335 Message_collector.add_error collector 336 - ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad source size list: Bad CSS number token." value element_name) 337 ~code:"bad-sizes-value" 338 ~element:element_name ~attribute:"sizes" (); 339 valid := false 340 | BadScientificNotation -> 341 Message_collector.add_error collector 342 - ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad source size list: Bad CSS number token." value element_name) 343 ~code:"bad-sizes-value" 344 ~element:element_name ~attribute:"sizes" (); 345 valid := false 346 - | InvalidUnit -> 347 Message_collector.add_error collector 348 - ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad source size value." value element_name) 349 ~code:"bad-sizes-value" 350 ~element:element_name ~attribute:"sizes" (); 351 valid := false ··· 359 end 360 361 (** Validate srcset descriptor *) 362 - let validate_srcset_descriptor desc element_name srcset_value collector = 363 let desc_lower = String.lowercase_ascii (String.trim desc) in 364 if String.length desc_lower = 0 then true 365 else begin ··· 371 (* Width descriptor - must be positive integer, no leading + *) 372 let trimmed_desc = String.trim desc in 373 if String.length trimmed_desc > 0 && trimmed_desc.[0] = '+' then begin 374 Message_collector.add_error collector 375 - ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Expected number without leading plus sign but found \xe2\x80\x9c%s\xe2\x80\x9d at \xe2\x80\x9c%s\xe2\x80\x9d." srcset_value element_name trimmed_desc srcset_value) 376 ~code:"bad-srcset-value" 377 ~element:element_name ~attribute:"srcset" (); 378 false ··· 381 let n = int_of_string num_part in 382 if n <= 0 then begin 383 Message_collector.add_error collector 384 - ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad srcset descriptor: Width must be positive." srcset_value element_name) 385 ~code:"bad-srcset-value" 386 ~element:element_name ~attribute:"srcset" (); 387 false ··· 390 let original_last = desc.[String.length desc - 1] in 391 if original_last = 'W' then begin 392 Message_collector.add_error collector 393 - ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad srcset descriptor: Width descriptor must use lowercase \xe2\x80\x9cw\xe2\x80\x9d." srcset_value element_name) 394 ~code:"bad-srcset-value" 395 ~element:element_name ~attribute:"srcset" (); 396 false 397 end else true 398 end 399 with _ -> 400 - (* Check for scientific notation or decimal *) 401 - if String.contains num_part 'e' || String.contains num_part 'E' then begin 402 Message_collector.add_error collector 403 - ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad srcset descriptor: Scientific notation not allowed in width descriptor." srcset_value element_name) 404 ~code:"bad-srcset-value" 405 ~element:element_name ~attribute:"srcset" (); 406 false ··· 415 (* Pixel density descriptor - must be positive number, no leading + *) 416 let trimmed_desc = String.trim desc in 417 if String.length trimmed_desc > 0 && trimmed_desc.[0] = '+' then begin 418 Message_collector.add_error collector 419 - ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad srcset descriptor: Leading plus sign not allowed." srcset_value element_name) 420 ~code:"bad-srcset-value" 421 ~element:element_name ~attribute:"srcset" (); 422 false ··· 424 (try 425 let n = float_of_string num_part in 426 if Float.is_nan n then begin 427 Message_collector.add_error collector 428 - ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad srcset descriptor: NaN not allowed." srcset_value element_name) 429 ~code:"bad-srcset-value" 430 ~element:element_name ~attribute:"srcset" (); 431 false 432 - end else if n <= 0.0 then begin 433 Message_collector.add_error collector 434 - ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad srcset descriptor: Density must be positive." srcset_value element_name) 435 ~code:"bad-srcset-value" 436 ~element:element_name ~attribute:"srcset" (); 437 false 438 end else if n = neg_infinity || n = infinity then begin 439 Message_collector.add_error collector 440 - ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad srcset descriptor: Infinity not allowed." srcset_value element_name) 441 ~code:"bad-srcset-value" 442 ~element:element_name ~attribute:"srcset" (); 443 false ··· 451 end 452 | 'h' -> 453 (* Height descriptor - not allowed *) 454 - Message_collector.add_error collector 455 - ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad srcset descriptor: Height descriptor \xe2\x80\x9ch\xe2\x80\x9d is not allowed." srcset_value element_name) 456 - ~code:"bad-srcset-value" 457 - ~element:element_name ~attribute:"srcset" (); 458 false 459 | _ -> 460 - (* Unknown descriptor *) 461 Message_collector.add_error collector 462 - ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad srcset descriptor." srcset_value element_name) 463 ~code:"bad-srcset-value" 464 ~element:element_name ~attribute:"srcset" (); 465 false ··· 489 let entries = String.split_on_char ',' value in 490 let has_w_descriptor = ref false in 491 let has_x_descriptor = ref false in 492 - let has_no_descriptor = ref false in (* Track if any entry has no descriptor *) 493 - let seen_descriptors = Hashtbl.create 8 in (* Track seen descriptor values *) 494 495 (* Check for empty srcset *) 496 if String.trim value = "" then begin 497 Message_collector.add_error collector 498 - ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Must not be empty." value element_name) 499 ~code:"bad-srcset-value" 500 ~element:element_name ~attribute:"srcset" () 501 end; ··· 503 (* Check for leading comma *) 504 if String.length value > 0 && value.[0] = ',' then begin 505 Message_collector.add_error collector 506 - ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad srcset: Leading comma." value element_name) 507 ~code:"bad-srcset-value" 508 ~element:element_name ~attribute:"srcset" () 509 end; 510 511 - (* Check for trailing comma *) 512 let trimmed_value = String.trim value in 513 if String.length trimmed_value > 0 && trimmed_value.[String.length trimmed_value - 1] = ',' then begin 514 - Message_collector.add_error collector 515 - ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad srcset: Trailing comma." value element_name) 516 - ~code:"bad-srcset-value" 517 - ~element:element_name ~attribute:"srcset" () 518 end; 519 520 List.iter (fun entry -> ··· 532 let scheme_colon = scheme ^ ":" in 533 if url_lower = scheme_colon then 534 Message_collector.add_error collector 535 - ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Expected a slash (\"/\")." value element_name) 536 ~code:"bad-srcset-url" 537 ~element:element_name ~attribute:"srcset" () 538 ) special_schemes ··· 542 | [url] -> 543 check_srcset_url url; 544 (* URL only = implicit 1x descriptor - only flag if explicit 1x also seen *) 545 - has_no_descriptor := true; 546 - if Hashtbl.mem seen_descriptors "explicit-1x" then begin 547 Message_collector.add_error collector 548 - ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Duplicate descriptor." value element_name) 549 ~code:"bad-srcset-value" 550 ~element:element_name ~attribute:"srcset" () 551 - end else 552 - Hashtbl.add seen_descriptors "implicit-1x" true 553 | url :: desc :: rest -> 554 (* Check URL for broken schemes *) 555 check_srcset_url url; 556 (* Check for extra junk - multiple descriptors are not allowed *) 557 if rest <> [] then begin 558 Message_collector.add_error collector 559 - ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad srcset: Multiple descriptors in candidate." value element_name) 560 ~code:"bad-srcset-value" 561 ~element:element_name ~attribute:"srcset" () 562 end; ··· 565 if String.length desc_lower > 0 then begin 566 let last_char = desc_lower.[String.length desc_lower - 1] in 567 if last_char = 'w' then has_w_descriptor := true 568 - else if last_char = 'x' then has_x_descriptor := true; 569 570 (* Check for duplicate descriptors - use normalized form *) 571 let normalized = normalize_descriptor desc in 572 let is_1x = (normalized = "1x") in 573 - if Hashtbl.mem seen_descriptors normalized then begin 574 - Message_collector.add_error collector 575 - ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Duplicate descriptor." value element_name) 576 - ~code:"bad-srcset-value" 577 - ~element:element_name ~attribute:"srcset" () 578 - end else if is_1x && Hashtbl.mem seen_descriptors "implicit-1x" then begin 579 - (* Explicit 1x conflicts with implicit 1x *) 580 Message_collector.add_error collector 581 - ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Duplicate descriptor." value element_name) 582 ~code:"bad-srcset-value" 583 ~element:element_name ~attribute:"srcset" () 584 - end else begin 585 - Hashtbl.add seen_descriptors normalized true; 586 - if is_1x then Hashtbl.add seen_descriptors "explicit-1x" true 587 end 588 end; 589 590 - ignore (validate_srcset_descriptor desc element_name value collector) 591 end 592 ) entries; 593 594 (* Check: if w descriptor used and no sizes, that's an error for img and source *) 595 if !has_w_descriptor && not has_sizes then 596 Message_collector.add_error collector 597 - ~message:(Printf.sprintf "When the \xe2\x80\x9csrcset\xe2\x80\x9d attribute on the \xe2\x80\x9c%s\xe2\x80\x9d element uses width descriptors, the \xe2\x80\x9csizes\xe2\x80\x9d attribute must also be present." element_name) 598 ~code:"srcset-w-without-sizes" 599 ~element:element_name ~attribute:"srcset" (); 600 601 (* Check: if sizes is present, all entries must have width descriptors *) 602 - if has_sizes && !has_no_descriptor then 603 Message_collector.add_error collector 604 - ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: No width specified for image. (When the \xe2\x80\x9csizes\xe2\x80\x9d attribute is present, all image candidate strings must specify a width.)" value element_name) 605 ~code:"bad-srcset-value" 606 - ~element:element_name ~attribute:"srcset" (); 607 608 - (* Check: if sizes is present and srcset uses x descriptors, that's an error *) 609 - if has_sizes && !has_x_descriptor then 610 Message_collector.add_error collector 611 ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: When the \xe2\x80\x9csizes\xe2\x80\x9d attribute is present, all image candidate strings must specify a width." value element_name) 612 ~code:"bad-srcset-value"
··· 54 Buffer.contents buf 55 56 (** Check if a size value has a valid CSS length unit and non-negative value *) 57 + type size_check_result = 58 + | Valid 59 + | InvalidUnit of string * string (* (found_unit, context) *) 60 + | NegativeValue 61 + | CssCommentAfterSign of string * string (* what was found, context *) 62 + | CssCommentBeforeUnit of string * string (* what was found, context *) 63 + | BadScientificNotation 64 + | BadCssNumber of char * string (* (first_char, context) - not starting with digit or minus *) 65 + 66 + (** CSS comment error types *) 67 + type css_comment_error = 68 + | NoCommentError 69 + | CommentAfterSign of string * string (* what was found, context *) 70 + | CommentBetweenNumberAndUnit of string * string (* what was found at comment position, context *) 71 72 (** Check if CSS comment appears in an invalid position: 73 - Between sign and number (+/**/50vw) 74 - Between number and unit (50/**/vw) 75 Trailing comments (50vw/**/) are valid. *) 76 + let check_css_comment_position s = 77 let len = String.length s in 78 (* Find comment position *) 79 let rec find_comment i = ··· 82 else find_comment (i + 1) 83 in 84 match find_comment 0 with 85 + | None -> NoCommentError 86 | Some comment_pos -> 87 let before = String.sub s 0 comment_pos in 88 let trimmed_before = String.trim before in 89 + if String.length trimmed_before = 0 then NoCommentError (* Leading comment is OK *) 90 else begin 91 (* Find end of comment *) 92 let rec find_end i = ··· 97 let end_pos = find_end (comment_pos + 2) in 98 let after = if end_pos < len then String.sub s end_pos (len - end_pos) else "" in 99 let trimmed_after = String.trim (strip_css_comments after) in 100 + if trimmed_after = "" then NoCommentError (* Trailing comment is OK *) 101 else begin 102 (* Comment is in the middle - check if it breaks a number/unit combo *) 103 let last = trimmed_before.[String.length trimmed_before - 1] in 104 + (* What's at the comment position? Just show "/" *) 105 + let slash = "/" in 106 + (* Invalid if comment appears after +/- *) 107 + if last = '+' || last = '-' then 108 + CommentAfterSign (trimmed_before ^ slash, s) 109 + (* Invalid if comment appears after digit (before more content) *) 110 + else if (last >= '0' && last <= '9') || last = '.' then 111 + CommentBetweenNumberAndUnit (slash ^ trimmed_after, s) 112 + else 113 + NoCommentError 114 end 115 end 116 + 117 + (** For backward compatibility *) 118 + let has_invalid_css_comment s = 119 + match check_css_comment_position s with 120 + | NoCommentError -> false 121 + | _ -> true 122 123 (** Check if scientific notation has invalid exponent (like 1e+1.5 - decimal in exponent) *) 124 let has_invalid_scientific_notation s = ··· 136 in 137 String.contains after_sign '.' 138 139 + (** Extract unit from a size value like "10px" -> "px", "100vw" -> "vw", "50%" -> "%" 140 + Returns the unit with original case preserved *) 141 + let extract_unit s = 142 + let trimmed = String.trim s in 143 + let len = String.length trimmed in 144 + if len = 0 then "" 145 + (* Check for % at the end *) 146 + else if trimmed.[len - 1] = '%' then "%" 147 + else begin 148 + let lower = String.lowercase_ascii trimmed in 149 + (* Try to find a unit at the end (letters only) *) 150 + let rec find_unit_length i = 151 + if i < 0 then 0 152 + else if lower.[i] >= 'a' && lower.[i] <= 'z' then find_unit_length (i - 1) 153 + else i + 1 154 + in 155 + let start = find_unit_length (len - 1) in 156 + if start < len then 157 + (* Return the unit from the original string (preserving case) *) 158 + String.sub trimmed start (len - start) 159 + else "" 160 + end 161 + 162 let check_size_value size_value = 163 let trimmed = String.trim size_value in 164 + if trimmed = "" then InvalidUnit ("", trimmed) 165 else begin 166 + (* Check for CSS comments inside numbers - this is invalid *) 167 + match check_css_comment_position trimmed with 168 + | CommentAfterSign (found, ctx) -> CssCommentAfterSign (found, ctx) 169 + | CommentBetweenNumberAndUnit (found, ctx) -> CssCommentBeforeUnit (found, ctx) 170 + | NoCommentError -> 171 (* Strip valid leading/trailing CSS comments for further checks *) 172 let value_no_comments = String.trim (strip_css_comments trimmed) in 173 (* Check for invalid scientific notation like 1e+1.5px *) 174 if has_invalid_scientific_notation value_no_comments then BadScientificNotation 175 (* "auto" is only valid with lazy loading, which requires checking the element context. 176 For general validation, treat "auto" alone as invalid in sizes. *) 177 + else if String.lowercase_ascii value_no_comments = "auto" then 178 + BadCssNumber (value_no_comments.[0], trimmed) 179 + else if value_no_comments = "" then InvalidUnit ("", trimmed) 180 else begin 181 let lower = String.lowercase_ascii value_no_comments in 182 + (* Check for calc() or other CSS functions first - these are always valid *) 183 + if String.contains value_no_comments '(' then Valid 184 else begin 185 + (* Check if the value starts with a digit, minus, or plus sign *) 186 + let first_char = value_no_comments.[0] in 187 + let starts_with_number = 188 + (first_char >= '0' && first_char <= '9') || 189 + first_char = '-' || 190 + first_char = '+' || 191 + first_char = '.' (* decimal point like .5px *) 192 + in 193 + if not starts_with_number then 194 + (* Not a valid CSS number token - doesn't start with digit or sign *) 195 + BadCssNumber (first_char, trimmed) 196 else begin 197 + (* Check for invalid units first *) 198 + let found_invalid = List.find_opt (fun unit -> 199 + let len = String.length unit in 200 + String.length lower > len && 201 + String.sub lower (String.length lower - len) len = unit 202 + ) invalid_size_units in 203 + match found_invalid with 204 + | Some _unit -> InvalidUnit (extract_unit value_no_comments, trimmed) 205 + | None -> 206 + (* Check for valid CSS length units *) 207 + let has_valid_unit = List.exists (fun unit -> 208 + let len = String.length unit in 209 + String.length lower > len && 210 + String.sub lower (String.length lower - len) len = unit 211 + ) valid_length_units in 212 + if has_valid_unit then begin 213 + (* Check if it's negative (starts with - but not -0) *) 214 + if String.length value_no_comments > 0 && value_no_comments.[0] = '-' then begin 215 + (* Check if it's -0 which is valid *) 216 + let after_minus = String.sub value_no_comments 1 (String.length value_no_comments - 1) in 217 + try 218 + let num_str = Str.global_replace (Str.regexp "[a-zA-Z]+$") "" after_minus in 219 + let f = float_of_string num_str in 220 + if f = 0.0 then Valid else NegativeValue 221 + with _ -> NegativeValue 222 + end else 223 + Valid 224 + end 225 + else begin 226 + (* Check if it's a zero value (0, -0, +0) - these are valid without units *) 227 + let stripped = 228 + let s = value_no_comments in 229 + let s = if String.length s > 0 && (s.[0] = '+' || s.[0] = '-') then String.sub s 1 (String.length s - 1) else s in 230 + s 231 + in 232 + (* Check if it's zero or a numeric value starting with 0 *) 233 + try 234 + let f = float_of_string stripped in 235 + if f = 0.0 then Valid else InvalidUnit (extract_unit value_no_comments, trimmed) 236 + with _ -> InvalidUnit (extract_unit value_no_comments, trimmed) 237 + end 238 end 239 end 240 end ··· 243 let has_valid_size_unit size_value = 244 match check_size_value size_value with 245 | Valid -> true 246 + | InvalidUnit (_, _) | NegativeValue | CssCommentAfterSign (_, _) | CssCommentBeforeUnit (_, _) | BadScientificNotation | BadCssNumber (_, _) -> false 247 248 (** Check if a sizes entry has a media condition (starts with '(') *) 249 let has_media_condition entry = ··· 305 if not (has_media_condition trimmed) then 306 trimmed 307 else begin 308 + (* Media conditions can have "and", "or", "not" operators connecting 309 + multiple parenthesized groups, e.g., "(not (width:500px)) and (width:500px) 500px" 310 + We need to skip all media condition parts to find the size value *) 311 let len = String.length trimmed in 312 + let rec skip_media_condition i = 313 if i >= len then len 314 + else begin 315 + let remaining = String.trim (String.sub trimmed i (len - i)) in 316 + let remaining_len = String.length remaining in 317 + if remaining_len = 0 then len 318 + else begin 319 + let first_char = remaining.[0] in 320 + if first_char = '(' then begin 321 + (* Skip this parenthesized group *) 322 + let rec find_close_paren j depth = 323 + if j >= remaining_len then remaining_len 324 + else match remaining.[j] with 325 + | '(' -> find_close_paren (j + 1) (depth + 1) 326 + | ')' -> if depth = 1 then j + 1 else find_close_paren (j + 1) (depth - 1) 327 + | _ -> find_close_paren (j + 1) depth 328 + in 329 + let after_paren = find_close_paren 0 0 in 330 + let new_pos = i + (len - i) - remaining_len + after_paren in 331 + skip_media_condition new_pos 332 + end 333 + else begin 334 + (* Check if remaining starts with "and", "or", "not" followed by space or paren *) 335 + let lower_remaining = String.lowercase_ascii remaining in 336 + if remaining_len >= 4 && String.sub lower_remaining 0 4 = "and " then 337 + skip_media_condition (i + (len - i) - remaining_len + 4) 338 + else if remaining_len >= 3 && String.sub lower_remaining 0 3 = "or " then 339 + skip_media_condition (i + (len - i) - remaining_len + 3) 340 + else if remaining_len >= 4 && String.sub lower_remaining 0 4 = "not " then 341 + skip_media_condition (i + (len - i) - remaining_len + 4) 342 + else if remaining_len >= 4 && String.sub lower_remaining 0 4 = "and(" then 343 + skip_media_condition (i + (len - i) - remaining_len + 3) 344 + else if remaining_len >= 3 && String.sub lower_remaining 0 3 = "or(" then 345 + skip_media_condition (i + (len - i) - remaining_len + 2) 346 + else if remaining_len >= 4 && String.sub lower_remaining 0 4 = "not(" then 347 + skip_media_condition (i + (len - i) - remaining_len + 3) 348 + else 349 + (* Found something that's not a media condition part - this is the size value *) 350 + i + (len - i) - remaining_len 351 + end 352 + end 353 + end 354 in 355 + let size_start = skip_media_condition 0 in 356 + if size_start >= len then "" 357 + else String.trim (String.sub trimmed size_start (len - size_start)) 358 end 359 360 (** Validate sizes attribute value *) ··· 382 (* Check for trailing comma *) 383 let last_entry = String.trim (List.nth entries (List.length entries - 1)) in 384 if List.length entries > 1 && last_entry = "" then begin 385 + (* Generate abbreviated context - show last ~25 chars with ellipsis if needed *) 386 + let context = 387 + if String.length value > 25 then 388 + "\xe2\x80\xa6" ^ String.sub value (String.length value - 25) 25 389 + else value 390 + in 391 Message_collector.add_error collector 392 + ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad source size list: Expected media condition before \xe2\x80\x9c\xe2\x80\x9d at \xe2\x80\x9c%s\xe2\x80\x9d." value element_name context) 393 ~code:"bad-sizes-value" 394 ~element:element_name ~attribute:"sizes" (); 395 false ··· 398 399 (* Check for default-first pattern: unconditional value before conditional ones *) 400 let non_empty_entries = List.filter (fun e -> String.trim e <> "") entries in 401 + (* Filter out entries that have invalid media conditions - they'll be reported separately *) 402 + let valid_entries = List.filter (fun e -> 403 + has_invalid_media_condition (String.trim e) = None 404 + ) non_empty_entries in 405 + if List.length valid_entries > 1 then begin 406 + let first = List.hd valid_entries in 407 + let rest = List.tl valid_entries in 408 (* If first entry has no media condition but later ones do, that's invalid *) 409 if not (has_media_condition first) && List.exists has_media_condition rest then begin 410 + (* Context is the first entry with a comma *) 411 + let context = (String.trim first) ^ "," in 412 Message_collector.add_error collector 413 + ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad source size list: Expected media condition before \xe2\x80\x9c\xe2\x80\x9d at \xe2\x80\x9c%s\xe2\x80\x9d." value element_name context) 414 ~code:"bad-sizes-value" 415 ~element:element_name ~attribute:"sizes" (); 416 valid := false 417 end; 418 + (* Check for multiple entries without media conditions. 419 + When the first entry has no media condition, report "Expected media condition" 420 + regardless of whether later entries have media conditions or not *) 421 + if not (has_media_condition first) && !valid then begin 422 + (* Only report if we haven't already reported the default-first error *) 423 + if not (List.exists has_media_condition rest) then begin 424 + (* Multiple defaults - report as "Expected media condition" *) 425 + let context = (String.trim first) ^ "," in 426 + Message_collector.add_error collector 427 + ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad source size list: Expected media condition before \xe2\x80\x9c\xe2\x80\x9d at \xe2\x80\x9c%s\xe2\x80\x9d." value element_name context) 428 + ~code:"bad-sizes-value" 429 + ~element:element_name ~attribute:"sizes" (); 430 + valid := false 431 + end 432 end 433 end; 434 435 (* Validate each entry's media condition and size value *) 436 + let num_entries = List.length entries in 437 + List.iteri (fun idx entry -> 438 let trimmed = String.trim entry in 439 if trimmed <> "" then begin 440 (* Check for invalid media condition *) 441 (match has_invalid_media_condition trimmed with 442 | Some err_msg -> 443 + (* Generate context: "entry," with ellipsis if needed *) 444 + let context = (String.trim entry) ^ "," in 445 + let context = 446 + if String.length context > 25 then 447 + "\xe2\x80\xa6" ^ String.sub context (String.length context - 25) 25 448 + else context 449 + in 450 Message_collector.add_error collector 451 + ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad source size list: %s at \xe2\x80\x9c%s\xe2\x80\x9d." value element_name err_msg context) 452 ~code:"bad-sizes-value" 453 ~element:element_name ~attribute:"sizes" (); 454 valid := false ··· 456 457 let size_val = extract_size_value trimmed in 458 if size_val <> "" then begin 459 + (* Check if there are multiple space-separated words in the size value. 460 + Only the first word should be the size, rest is junk. *) 461 + let size_parts = String.split_on_char ' ' size_val |> List.filter (fun s -> s <> "") in 462 + let first_size = match size_parts with [] -> size_val | hd :: _ -> hd in 463 + let extra_parts = match size_parts with [] -> [] | _ :: tl -> tl in 464 + 465 + (* Check if first word looks like it should have been a media condition 466 + (doesn't start with digit, sign, decimal, '/', or look like a CSS function) *) 467 + let first_char = if String.length first_size > 0 then first_size.[0] else 'x' in 468 + let has_paren = String.contains size_val '(' in (* calc(), etc. *) 469 + let looks_like_junk_entry = 470 + not (has_media_condition trimmed) && 471 + not has_paren && (* Allow CSS functions like calc() *) 472 + not (first_char = '/') && (* Allow leading CSS comments *) 473 + not ((first_char >= '0' && first_char <= '9') || 474 + first_char = '+' || first_char = '-' || first_char = '.') 475 + in 476 + 477 + (* If this entry looks like junk and there are multiple entries, 478 + report "Expected media condition" instead of "Bad CSS number". 479 + For single entries with invalid values, fall through to BadCssNumber. *) 480 + if looks_like_junk_entry && num_entries > 1 then begin 481 + (* Find the context ending with the previous entry *) 482 + let prev_entries = List.filter (fun e -> String.trim e <> "" && e <> entry) entries in 483 + let context = 484 + if List.length prev_entries > 0 then 485 + let prev_value = String.concat ", " (List.map String.trim prev_entries) ^ "," in 486 + if String.length prev_value > 25 then 487 + "\xe2\x80\xa6" ^ String.sub prev_value (String.length prev_value - 25) 25 488 + else prev_value 489 + else value 490 + in 491 + Message_collector.add_error collector 492 + ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad source size list: Expected media condition before \xe2\x80\x9c\xe2\x80\x9d at \xe2\x80\x9c%s\xe2\x80\x9d." value element_name context) 493 + ~code:"bad-sizes-value" 494 + ~element:element_name ~attribute:"sizes" (); 495 + valid := false 496 + end 497 + (* If there's extra junk after the size, report BadCssNumber error for it *) 498 + else if extra_parts <> [] then begin 499 + let junk = String.concat " " extra_parts in 500 + let last_junk = List.nth extra_parts (List.length extra_parts - 1) in 501 + let first_char = if String.length last_junk > 0 then last_junk.[0] else 'x' in 502 + (* Context depends on whether this is the last entry: 503 + - For non-last entries: entry with trailing comma, truncated from beginning 504 + - For last entry: full value truncated from beginning (no trailing comma) *) 505 + let is_last_entry = idx = num_entries - 1 in 506 + let context = 507 + if is_last_entry then begin 508 + (* Last entry: use full value truncated *) 509 + if String.length value > 25 then 510 + "\xe2\x80\xa6" ^ String.sub value (String.length value - 25) 25 511 + else value 512 + end else begin 513 + (* Non-last entry: use entry with comma, truncated *) 514 + let entry_with_comma = trimmed ^ "," in 515 + if String.length entry_with_comma > 25 then 516 + "\xe2\x80\xa6" ^ String.sub entry_with_comma (String.length entry_with_comma - 25) 25 517 + else entry_with_comma 518 + end 519 + in 520 + let _ = junk in 521 + Message_collector.add_error collector 522 + ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad source size list: Bad CSS number token: Expected a minus sign or a digit but saw \xe2\x80\x9c%c\xe2\x80\x9d instead at \xe2\x80\x9c%s\xe2\x80\x9d." value element_name first_char context) 523 + ~code:"bad-sizes-value" 524 + ~element:element_name ~attribute:"sizes" (); 525 + valid := false 526 + end 527 + else 528 + match check_size_value first_size with 529 | Valid -> () 530 | NegativeValue -> 531 + let full_context = 532 + if List.length entries > 1 then size_val ^ "," 533 + else size_val 534 + in 535 + let _ = full_context in 536 Message_collector.add_error collector 537 + ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad source size list: Expected positive size value but found \xe2\x80\x9c%s\xe2\x80\x9d at \xe2\x80\x9c%s\xe2\x80\x9d." value element_name size_val size_val) 538 + ~code:"bad-sizes-value" 539 + ~element:element_name ~attribute:"sizes" (); 540 + valid := false 541 + | CssCommentAfterSign (found, context) -> 542 + (* e.g., +/**/50vw - expected number after sign *) 543 + Message_collector.add_error collector 544 + ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad source size list: Expected number but found \xe2\x80\x9c%s\xe2\x80\x9d at \xe2\x80\x9c%s\xe2\x80\x9d." value element_name found context) 545 ~code:"bad-sizes-value" 546 ~element:element_name ~attribute:"sizes" (); 547 valid := false 548 + | CssCommentBeforeUnit (found, context) -> 549 + (* e.g., 50/**/vw - expected units after number *) 550 + let units_list = List.map (fun u -> Printf.sprintf "\xe2\x80\x9c%s\xe2\x80\x9d" u) valid_length_units in 551 + let units_str = String.concat ", " units_list in 552 Message_collector.add_error collector 553 + ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad source size list: Expected units (one of %s) but found \xe2\x80\x9c%s\xe2\x80\x9d at \xe2\x80\x9c%s\xe2\x80\x9d." value element_name units_str found context) 554 ~code:"bad-sizes-value" 555 ~element:element_name ~attribute:"sizes" (); 556 valid := false 557 | BadScientificNotation -> 558 + (* For scientific notation with bad exponent, show what char was expected vs found *) 559 + let context = 560 + if List.length entries > 1 then trimmed ^ "," 561 + else trimmed 562 + in 563 + (* Find the period in the exponent *) 564 + let _ = context in 565 Message_collector.add_error collector 566 + ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad source size list: Bad CSS number token: Expected a digit but saw \xe2\x80\x9c.\xe2\x80\x9d instead at \xe2\x80\x9c%s\xe2\x80\x9d." value element_name size_val) 567 ~code:"bad-sizes-value" 568 ~element:element_name ~attribute:"sizes" (); 569 valid := false 570 + | BadCssNumber (first_char, context) -> 571 + (* Value doesn't start with a digit or minus sign *) 572 + let full_context = 573 + if List.length entries > 1 then context ^ "," 574 + else context 575 + in 576 + let _ = full_context in 577 + Message_collector.add_error collector 578 + ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad source size list: Bad CSS number token: Expected a minus sign or a digit but saw \xe2\x80\x9c%c\xe2\x80\x9d instead at \xe2\x80\x9c%s\xe2\x80\x9d." value element_name first_char context) 579 + ~code:"bad-sizes-value" 580 + ~element:element_name ~attribute:"sizes" (); 581 + valid := false 582 + | InvalidUnit (found_unit, _context) -> 583 + (* Generate the full list of expected units *) 584 + let units_list = List.map (fun u -> Printf.sprintf "\xe2\x80\x9c%s\xe2\x80\x9d" u) valid_length_units in 585 + let units_str = String.concat ", " units_list in 586 + (* Context should be the full entry, with comma only if there are multiple entries *) 587 + let full_context = 588 + if List.length entries > 1 then trimmed ^ "," 589 + else trimmed 590 + in 591 + (* When found_unit is empty, say "no units" instead of quoting empty string *) 592 + let found_str = 593 + if found_unit = "" then "no units" 594 + else Printf.sprintf "\xe2\x80\x9c%s\xe2\x80\x9d" found_unit 595 + in 596 Message_collector.add_error collector 597 + ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad source size list: Expected units (one of %s) but found %s at \xe2\x80\x9c%s\xe2\x80\x9d." value element_name units_str found_str full_context) 598 ~code:"bad-sizes-value" 599 ~element:element_name ~attribute:"sizes" (); 600 valid := false ··· 608 end 609 610 (** Validate srcset descriptor *) 611 + let validate_srcset_descriptor desc element_name srcset_value has_sizes collector = 612 let desc_lower = String.lowercase_ascii (String.trim desc) in 613 if String.length desc_lower = 0 then true 614 else begin ··· 620 (* Width descriptor - must be positive integer, no leading + *) 621 let trimmed_desc = String.trim desc in 622 if String.length trimmed_desc > 0 && trimmed_desc.[0] = '+' then begin 623 + (* Show just the number part (without the 'w') *) 624 + let num_part_for_msg = String.sub trimmed_desc 0 (String.length trimmed_desc - 1) in 625 Message_collector.add_error collector 626 + ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Expected number without leading plus sign but found \xe2\x80\x9c%s\xe2\x80\x9d at \xe2\x80\x9c%s\xe2\x80\x9d." srcset_value element_name num_part_for_msg srcset_value) 627 ~code:"bad-srcset-value" 628 ~element:element_name ~attribute:"srcset" (); 629 false ··· 632 let n = int_of_string num_part in 633 if n <= 0 then begin 634 Message_collector.add_error collector 635 + ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Expected number greater than zero but found \xe2\x80\x9c%s\xe2\x80\x9d at \xe2\x80\x9c%s\xe2\x80\x9d." srcset_value element_name num_part srcset_value) 636 ~code:"bad-srcset-value" 637 ~element:element_name ~attribute:"srcset" (); 638 false ··· 641 let original_last = desc.[String.length desc - 1] in 642 if original_last = 'W' then begin 643 Message_collector.add_error collector 644 + ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Expected width descriptor but found \xe2\x80\x9c%s\xe2\x80\x9d at \xe2\x80\x9c%s\xe2\x80\x9d. (When the \xe2\x80\x9csizes\xe2\x80\x9d attribute is present, all image candidate strings must specify a width.)" srcset_value element_name desc srcset_value) 645 ~code:"bad-srcset-value" 646 ~element:element_name ~attribute:"srcset" (); 647 false 648 end else true 649 end 650 with _ -> 651 + (* Check for scientific notation, decimal, or other non-integer values *) 652 + if String.contains num_part 'e' || String.contains num_part 'E' || String.contains num_part '.' then begin 653 Message_collector.add_error collector 654 + ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Expected integer but found \xe2\x80\x9c%s\xe2\x80\x9d at \xe2\x80\x9c%s\xe2\x80\x9d." srcset_value element_name num_part srcset_value) 655 ~code:"bad-srcset-value" 656 ~element:element_name ~attribute:"srcset" (); 657 false ··· 666 (* Pixel density descriptor - must be positive number, no leading + *) 667 let trimmed_desc = String.trim desc in 668 if String.length trimmed_desc > 0 && trimmed_desc.[0] = '+' then begin 669 + (* Extract the number part including the plus sign *) 670 + let num_with_plus = String.sub trimmed_desc 0 (String.length trimmed_desc - 1) in 671 Message_collector.add_error collector 672 + ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Expected number without leading plus sign but found \xe2\x80\x9c%s\xe2\x80\x9d at \xe2\x80\x9c%s\xe2\x80\x9d." srcset_value element_name num_with_plus srcset_value) 673 ~code:"bad-srcset-value" 674 ~element:element_name ~attribute:"srcset" (); 675 false ··· 677 (try 678 let n = float_of_string num_part in 679 if Float.is_nan n then begin 680 + (* NaN is not a valid float - report as parse error with first char from ORIGINAL desc *) 681 + let trimmed_desc = String.trim desc in 682 + let orig_num_part = String.sub trimmed_desc 0 (String.length trimmed_desc - 1) in 683 + let first_char = if String.length orig_num_part > 0 then String.make 1 orig_num_part.[0] else "" in 684 Message_collector.add_error collector 685 + ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad positive floating point number: Expected a digit but saw \xe2\x80\x9c%s\xe2\x80\x9d instead at \xe2\x80\x9c%s\xe2\x80\x9d." srcset_value element_name first_char srcset_value) 686 ~code:"bad-srcset-value" 687 ~element:element_name ~attribute:"srcset" (); 688 false 689 + end else if n = 0.0 then begin 690 + (* Check if it's -0 (starts with minus) - report as "greater than zero" error *) 691 + let trimmed_desc = String.trim desc in 692 + let orig_num_part = String.sub trimmed_desc 0 (String.length trimmed_desc - 1) in 693 + if String.length orig_num_part > 0 && orig_num_part.[0] = '-' then begin 694 + Message_collector.add_error collector 695 + ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Expected number greater than zero but found \xe2\x80\x9c%s\xe2\x80\x9d at \xe2\x80\x9c%s\xe2\x80\x9d." srcset_value element_name orig_num_part srcset_value) 696 + ~code:"bad-srcset-value" 697 + ~element:element_name ~attribute:"srcset" () 698 + end else begin 699 + Message_collector.add_error collector 700 + ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad positive floating point number: Zero is not a valid positive floating point number at \xe2\x80\x9c%s\xe2\x80\x9d." srcset_value element_name srcset_value) 701 + ~code:"bad-srcset-value" 702 + ~element:element_name ~attribute:"srcset" () 703 + end; 704 + false 705 + end else if n < 0.0 then begin 706 Message_collector.add_error collector 707 + ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Expected number greater than zero but found \xe2\x80\x9c%s\xe2\x80\x9d at \xe2\x80\x9c%s\xe2\x80\x9d." srcset_value element_name num_part srcset_value) 708 ~code:"bad-srcset-value" 709 ~element:element_name ~attribute:"srcset" (); 710 false 711 end else if n = neg_infinity || n = infinity then begin 712 + (* Infinity is not a valid float - report as parse error with first char from ORIGINAL desc *) 713 + let trimmed_desc = String.trim desc in 714 + let orig_num_part = String.sub trimmed_desc 0 (String.length trimmed_desc - 1) in 715 + let first_char = if String.length orig_num_part > 0 then String.make 1 orig_num_part.[0] else "" in 716 Message_collector.add_error collector 717 + ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad positive floating point number: Expected a digit but saw \xe2\x80\x9c%s\xe2\x80\x9d instead at \xe2\x80\x9c%s\xe2\x80\x9d." srcset_value element_name first_char srcset_value) 718 ~code:"bad-srcset-value" 719 ~element:element_name ~attribute:"srcset" (); 720 false ··· 728 end 729 | 'h' -> 730 (* Height descriptor - not allowed *) 731 + let trimmed_desc = String.trim desc in 732 + (* Generate context: find where this entry appears *) 733 + let context = 734 + try 735 + let pos = Str.search_forward (Str.regexp_string trimmed_desc) srcset_value 0 in 736 + (* Get the entry context ending with comma *) 737 + let search_from = max 0 (pos - 3) in 738 + let comma_pos = try Str.search_forward (Str.regexp_string ",") srcset_value pos with Not_found -> String.length srcset_value - 1 in 739 + let end_pos = min (comma_pos + 1) (String.length srcset_value) in 740 + let len = end_pos - search_from in 741 + if len > 0 then String.trim (String.sub srcset_value search_from len) else srcset_value 742 + with Not_found | Invalid_argument _ -> srcset_value 743 + in 744 + if has_sizes then 745 + Message_collector.add_error collector 746 + ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Expected width descriptor but found \xe2\x80\x9c%s\xe2\x80\x9d at \xe2\x80\x9c%s\xe2\x80\x9d. (When the \xe2\x80\x9csizes\xe2\x80\x9d attribute is present, all image candidate strings must specify a width.)" srcset_value element_name trimmed_desc context) 747 + ~code:"bad-srcset-value" 748 + ~element:element_name ~attribute:"srcset" () 749 + else 750 + Message_collector.add_error collector 751 + ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad srcset descriptor: Height descriptor \xe2\x80\x9ch\xe2\x80\x9d is not allowed." srcset_value element_name) 752 + ~code:"bad-srcset-value" 753 + ~element:element_name ~attribute:"srcset" (); 754 false 755 | _ -> 756 + (* Unknown descriptor - find context in srcset_value *) 757 + let trimmed_desc = String.trim desc in 758 + (* Try to find the context: find where this descriptor appears in srcset_value *) 759 + let context = 760 + try 761 + let pos = Str.search_forward (Str.regexp_string trimmed_desc) srcset_value 0 in 762 + (* Get the context up to and including the descriptor and the comma after *) 763 + let end_pos = min (pos + String.length trimmed_desc + 1) (String.length srcset_value) in 764 + let start_pos = max 0 (pos - 2) in 765 + String.trim (String.sub srcset_value start_pos (end_pos - start_pos)) 766 + with Not_found -> srcset_value 767 + in 768 Message_collector.add_error collector 769 + ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Expected number followed by \xe2\x80\x9cw\xe2\x80\x9d or \xe2\x80\x9cx\xe2\x80\x9d but found \xe2\x80\x9c%s\xe2\x80\x9d at \xe2\x80\x9c%s\xe2\x80\x9d." srcset_value element_name trimmed_desc context) 770 ~code:"bad-srcset-value" 771 ~element:element_name ~attribute:"srcset" (); 772 false ··· 796 let entries = String.split_on_char ',' value in 797 let has_w_descriptor = ref false in 798 let has_x_descriptor = ref false in 799 + let no_descriptor_url = ref None in (* Track URL of first entry without width descriptor *) 800 + let x_with_sizes_error_reported = ref false in (* Track if we already reported x-with-sizes error *) 801 + let seen_descriptors = Hashtbl.create 8 in (* Track seen descriptor values -> first URL *) 802 803 (* Check for empty srcset *) 804 if String.trim value = "" then begin 805 Message_collector.add_error collector 806 + ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Must contain one or more image candidate strings." value element_name) 807 ~code:"bad-srcset-value" 808 ~element:element_name ~attribute:"srcset" () 809 end; ··· 811 (* Check for leading comma *) 812 if String.length value > 0 && value.[0] = ',' then begin 813 Message_collector.add_error collector 814 + ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Starts with empty image-candidate string." value element_name) 815 ~code:"bad-srcset-value" 816 ~element:element_name ~attribute:"srcset" () 817 end; 818 819 + (* Check for trailing comma(s) / empty entries *) 820 let trimmed_value = String.trim value in 821 if String.length trimmed_value > 0 && trimmed_value.[String.length trimmed_value - 1] = ',' then begin 822 + (* Count consecutive trailing commas *) 823 + let rec count_trailing_commas s idx count = 824 + if idx < 0 then count 825 + else if s.[idx] = ',' then count_trailing_commas s (idx - 1) (count + 1) 826 + else if s.[idx] = ' ' || s.[idx] = '\t' then count_trailing_commas s (idx - 1) count 827 + else count 828 + in 829 + let trailing_commas = count_trailing_commas trimmed_value (String.length trimmed_value - 1) 0 in 830 + if trailing_commas > 1 then 831 + (* Multiple trailing commas: "Empty image-candidate string at" *) 832 + Message_collector.add_error collector 833 + ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Empty image-candidate string at \xe2\x80\x9c%s\xe2\x80\x9d." value element_name value) 834 + ~code:"bad-srcset-value" 835 + ~element:element_name ~attribute:"srcset" () 836 + else 837 + (* Single trailing comma: "Ends with empty image-candidate string." *) 838 + Message_collector.add_error collector 839 + ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Ends with empty image-candidate string." value element_name) 840 + ~code:"bad-srcset-value" 841 + ~element:element_name ~attribute:"srcset" () 842 end; 843 844 List.iter (fun entry -> ··· 856 let scheme_colon = scheme ^ ":" in 857 if url_lower = scheme_colon then 858 Message_collector.add_error collector 859 + ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad image-candidate URL: \xe2\x80\x9c%s\xe2\x80\x9d: Expected a slash (\"/\")." value element_name url) 860 ~code:"bad-srcset-url" 861 ~element:element_name ~attribute:"srcset" () 862 ) special_schemes ··· 866 | [url] -> 867 check_srcset_url url; 868 (* URL only = implicit 1x descriptor - only flag if explicit 1x also seen *) 869 + if !no_descriptor_url = None then no_descriptor_url := Some url; 870 + begin match Hashtbl.find_opt seen_descriptors "explicit-1x" with 871 + | Some first_url -> 872 Message_collector.add_error collector 873 + ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Density for image \xe2\x80\x9c%s\xe2\x80\x9d is identical to density for image \xe2\x80\x9c%s\xe2\x80\x9d." value element_name url first_url) 874 ~code:"bad-srcset-value" 875 ~element:element_name ~attribute:"srcset" () 876 + | None -> 877 + Hashtbl.add seen_descriptors "implicit-1x" url 878 + end 879 | url :: desc :: rest -> 880 (* Check URL for broken schemes *) 881 check_srcset_url url; 882 (* Check for extra junk - multiple descriptors are not allowed *) 883 if rest <> [] then begin 884 + let extra_desc = List.hd rest in 885 Message_collector.add_error collector 886 + ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Expected single descriptor but found extraneous descriptor \xe2\x80\x9c%s\xe2\x80\x9d at \xe2\x80\x9c%s\xe2\x80\x9d." value element_name extra_desc value) 887 ~code:"bad-srcset-value" 888 ~element:element_name ~attribute:"srcset" () 889 end; ··· 892 if String.length desc_lower > 0 then begin 893 let last_char = desc_lower.[String.length desc_lower - 1] in 894 if last_char = 'w' then has_w_descriptor := true 895 + else if last_char = 'x' then begin 896 + has_x_descriptor := true; 897 + (* If sizes is present and we have an x descriptor, generate detailed error *) 898 + if has_sizes && not !x_with_sizes_error_reported then begin 899 + x_with_sizes_error_reported := true; 900 + (* Build context: 901 + - If entry has extra parts (multiple descriptors): show "url descriptor " 902 + - Else if entry has trailing comma: show "url descriptor," 903 + - Else (last entry, no extra parts): show full srcset value *) 904 + let trimmed_url = String.trim url in 905 + let trimmed_desc = String.trim desc in 906 + let entry_context = 907 + if rest <> [] then 908 + (* Entry has multiple descriptors - show URL + first descriptor + space *) 909 + trimmed_url ^ " " ^ trimmed_desc ^ " " 910 + else 911 + (* Check if entry ends with comma in original value *) 912 + let trimmed_entry = String.trim entry in 913 + try 914 + let entry_start = Str.search_forward (Str.regexp_string trimmed_url) value 0 in 915 + let entry_end = entry_start + String.length trimmed_entry in 916 + let has_trailing_comma = entry_end < String.length value && value.[entry_end] = ',' in 917 + if has_trailing_comma then 918 + (* Entry followed by comma - show "url descriptor," *) 919 + trimmed_url ^ " " ^ trimmed_desc ^ "," 920 + else 921 + (* Last entry - show full srcset value *) 922 + value 923 + with Not_found -> 924 + value 925 + in 926 + Message_collector.add_error collector 927 + ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Expected width descriptor but found \xe2\x80\x9c%s\xe2\x80\x9d at \xe2\x80\x9c%s\xe2\x80\x9d. (When the \xe2\x80\x9csizes\xe2\x80\x9d attribute is present, all image candidate strings must specify a width.)" value element_name trimmed_desc entry_context) 928 + ~code:"bad-srcset-value" 929 + ~element:element_name ~attribute:"srcset" () 930 + end 931 + end; 932 933 (* Check for duplicate descriptors - use normalized form *) 934 let normalized = normalize_descriptor desc in 935 let is_1x = (normalized = "1x") in 936 + let is_width = (last_char = 'w') in 937 + let dup_type = if is_width then "Width" else "Density" in 938 + begin match Hashtbl.find_opt seen_descriptors normalized with 939 + | Some first_url -> 940 Message_collector.add_error collector 941 + ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: %s for image \xe2\x80\x9c%s\xe2\x80\x9d is identical to %s for image \xe2\x80\x9c%s\xe2\x80\x9d." value element_name dup_type url (String.lowercase_ascii dup_type) first_url) 942 ~code:"bad-srcset-value" 943 ~element:element_name ~attribute:"srcset" () 944 + | None -> 945 + begin match (if is_1x then Hashtbl.find_opt seen_descriptors "implicit-1x" else None) with 946 + | Some first_url -> 947 + (* Explicit 1x conflicts with implicit 1x *) 948 + Message_collector.add_error collector 949 + ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: %s for image \xe2\x80\x9c%s\xe2\x80\x9d is identical to %s for image \xe2\x80\x9c%s\xe2\x80\x9d." value element_name dup_type url (String.lowercase_ascii dup_type) first_url) 950 + ~code:"bad-srcset-value" 951 + ~element:element_name ~attribute:"srcset" () 952 + | None -> 953 + Hashtbl.add seen_descriptors normalized url; 954 + if is_1x then Hashtbl.add seen_descriptors "explicit-1x" url 955 + end 956 end 957 end; 958 959 + ignore (validate_srcset_descriptor desc element_name value has_sizes collector) 960 end 961 ) entries; 962 963 (* Check: if w descriptor used and no sizes, that's an error for img and source *) 964 if !has_w_descriptor && not has_sizes then 965 Message_collector.add_error collector 966 + ~message:"When the \xe2\x80\x9csrcset\xe2\x80\x9d attribute has any image candidate string with a width descriptor, the \xe2\x80\x9csizes\xe2\x80\x9d attribute must also be specified." 967 ~code:"srcset-w-without-sizes" 968 ~element:element_name ~attribute:"srcset" (); 969 970 (* Check: if sizes is present, all entries must have width descriptors *) 971 + (match !no_descriptor_url with 972 + | Some url when has_sizes -> 973 Message_collector.add_error collector 974 + ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: No width specified for image \xe2\x80\x9c%s\xe2\x80\x9d. (When the \xe2\x80\x9csizes\xe2\x80\x9d attribute is present, all image candidate strings must specify a width.)" value element_name url) 975 ~code:"bad-srcset-value" 976 + ~element:element_name ~attribute:"srcset" () 977 + | _ -> ()); 978 979 + (* Check: if sizes is present and srcset uses x descriptors, that's an error. 980 + Only report if we haven't already reported the detailed error. *) 981 + if has_sizes && !has_x_descriptor && not !x_with_sizes_error_reported then 982 Message_collector.add_error collector 983 ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: When the \xe2\x80\x9csizes\xe2\x80\x9d attribute is present, all image candidate strings must specify a width." value element_name) 984 ~code:"bad-srcset-value"
+3
lib/html5_checker/specialized/xhtml_content_checker.ml
··· 34 | "menu" -> 35 (* menu only allows li, script, template *) 36 List.mem child ["li"; "script"; "template"] 37 | _ -> true 38 39 (* Check if text is allowed in element *)
··· 34 | "menu" -> 35 (* menu only allows li, script, template *) 36 List.mem child ["li"; "script"; "template"] 37 + | "table" -> 38 + (* col must be in colgroup, not directly in table *) 39 + child <> "col" 40 | _ -> true 41 42 (* Check if text is allowed in element *)
+13 -13
lib/html5rw/parser/parser_tree_builder.ml
··· 787 t.open_elements <- [html]; 788 t.mode <- Parser_insertion_mode.Before_head; 789 process_token t token 790 - | Token.Tag { kind = Token.End; _ } -> 791 - parse_error t "unexpected-end-tag" 792 | _ -> 793 let html = insert_element t "html" [] in 794 t.open_elements <- [html]; ··· 813 t.head_element <- Some head; 814 t.mode <- Parser_insertion_mode.In_head; 815 process_token t token 816 - | Token.Tag { kind = Token.End; _ } -> 817 - parse_error t "unexpected-end-tag" 818 | _ -> 819 let head = insert_element t "head" [] in 820 t.open_elements <- head :: t.open_elements; ··· 902 end 903 | Token.Tag { kind = Token.Start; name = "head"; _ } -> 904 parse_error t "unexpected-start-tag" 905 - | Token.Tag { kind = Token.End; _ } -> 906 - parse_error t "unexpected-end-tag" 907 | _ -> 908 pop_current t; 909 t.mode <- Parser_insertion_mode.After_head; ··· 943 pop_current t; (* Pop noscript *) 944 t.mode <- Parser_insertion_mode.In_head; 945 process_token t token 946 - | Token.Tag { kind = Token.End; _ } -> 947 - parse_error t "unexpected-end-tag" 948 | Token.EOF -> 949 parse_error t "expected-closing-tag-but-got-eof"; 950 pop_current t; (* Pop noscript *) ··· 998 process_token t token 999 | Token.Tag { kind = Token.Start; name = "head"; _ } -> 1000 parse_error t "unexpected-start-tag" 1001 - | Token.Tag { kind = Token.End; _ } -> 1002 - parse_error t "unexpected-end-tag" 1003 | _ -> 1004 let body = insert_element t "body" [] in 1005 t.open_elements <- body :: t.open_elements; ··· 1447 | _ -> ()); 1448 pop_until t (fun n -> n == node) 1449 end else if is_special_element node then 1450 - parse_error t "unexpected-end-tag" 1451 else 1452 check rest 1453 in ··· 2056 t.template_modes <- Parser_insertion_mode.In_body :: t.template_modes; 2057 t.mode <- Parser_insertion_mode.In_body; 2058 process_token t token 2059 - | Token.Tag { kind = Token.End; _ } -> 2060 - parse_error t "unexpected-end-tag" 2061 | Token.EOF -> 2062 if not (List.exists (fun n -> n.Dom.name = "template" && is_in_html_namespace n) t.open_elements) then 2063 () (* Stop parsing *)
··· 787 t.open_elements <- [html]; 788 t.mode <- Parser_insertion_mode.Before_head; 789 process_token t token 790 + | Token.Tag { kind = Token.End; name; _ } -> 791 + parse_error t ("unexpected-end-tag:" ^ name) 792 | _ -> 793 let html = insert_element t "html" [] in 794 t.open_elements <- [html]; ··· 813 t.head_element <- Some head; 814 t.mode <- Parser_insertion_mode.In_head; 815 process_token t token 816 + | Token.Tag { kind = Token.End; name; _ } -> 817 + parse_error t ("unexpected-end-tag:" ^ name) 818 | _ -> 819 let head = insert_element t "head" [] in 820 t.open_elements <- head :: t.open_elements; ··· 902 end 903 | Token.Tag { kind = Token.Start; name = "head"; _ } -> 904 parse_error t "unexpected-start-tag" 905 + | Token.Tag { kind = Token.End; name; _ } -> 906 + parse_error t ("unexpected-end-tag:" ^ name) 907 | _ -> 908 pop_current t; 909 t.mode <- Parser_insertion_mode.After_head; ··· 943 pop_current t; (* Pop noscript *) 944 t.mode <- Parser_insertion_mode.In_head; 945 process_token t token 946 + | Token.Tag { kind = Token.End; name; _ } -> 947 + parse_error t ("unexpected-end-tag:" ^ name) 948 | Token.EOF -> 949 parse_error t "expected-closing-tag-but-got-eof"; 950 pop_current t; (* Pop noscript *) ··· 998 process_token t token 999 | Token.Tag { kind = Token.Start; name = "head"; _ } -> 1000 parse_error t "unexpected-start-tag" 1001 + | Token.Tag { kind = Token.End; name; _ } -> 1002 + parse_error t ("unexpected-end-tag:" ^ name) 1003 | _ -> 1004 let body = insert_element t "body" [] in 1005 t.open_elements <- body :: t.open_elements; ··· 1447 | _ -> ()); 1448 pop_until t (fun n -> n == node) 1449 end else if is_special_element node then 1450 + parse_error t ("unexpected-end-tag:" ^ name) 1451 else 1452 check rest 1453 in ··· 2056 t.template_modes <- Parser_insertion_mode.In_body :: t.template_modes; 2057 t.mode <- Parser_insertion_mode.In_body; 2058 process_token t token 2059 + | Token.Tag { kind = Token.End; name; _ } -> 2060 + parse_error t ("unexpected-end-tag:" ^ name) 2061 | Token.EOF -> 2062 if not (List.exists (fun n -> n.Dom.name = "template" && is_in_html_namespace n) t.open_elements) then 2063 () (* Stop parsing *)