OCaml HTML5 parser/serialiser based on Python's JustHTML

more

Changed files
+510 -3
lib
+1
lib/html5_checker/checker_registry.ml
··· 32 32 Hashtbl.replace reg "label" Label_checker.checker; 33 33 Hashtbl.replace reg "ruby" Ruby_checker.checker; 34 34 Hashtbl.replace reg "h1" H1_checker.checker; 35 + Hashtbl.replace reg "srcset-sizes" Srcset_sizes_checker.checker; 35 36 (* Hashtbl.replace reg "table" Table_checker.checker; *) 36 37 (* Hashtbl.replace reg "heading" Heading_checker.checker; *) 37 38 (* Hashtbl.replace reg "microdata" Microdata_checker.checker; *)
+36 -3
lib/html5_checker/specialized/picture_checker.ml
··· 27 27 mutable children_in_picture : string list; 28 28 mutable last_was_img : bool; 29 29 mutable has_source_after_img : bool; 30 + mutable has_always_matching_source : bool; (* source without media/type *) 31 + mutable source_after_always_matching : bool; (* source after always-matching source *) 30 32 } 31 33 32 34 let create () = { ··· 36 38 children_in_picture = []; 37 39 last_was_img = false; 38 40 has_source_after_img = false; 41 + has_always_matching_source = false; 42 + source_after_always_matching = false; 39 43 } 40 44 41 45 let reset state = ··· 44 48 state.picture_depth <- 0; 45 49 state.children_in_picture <- []; 46 50 state.last_was_img <- false; 47 - state.has_source_after_img <- false 51 + state.has_source_after_img <- false; 52 + state.has_always_matching_source <- false; 53 + state.source_after_always_matching <- false 48 54 49 55 (** Check if an attribute list contains a specific attribute. *) 50 56 let has_attr name attrs = ··· 109 115 state.picture_depth <- 0; (* Will be incremented to 1 at end of function *) 110 116 state.children_in_picture <- []; 111 117 state.last_was_img <- false; 112 - state.has_source_after_img <- false 118 + state.has_source_after_img <- false; 119 + state.has_always_matching_source <- false; 120 + state.source_after_always_matching <- false 113 121 114 122 | "source" when state.in_picture && state.picture_depth = 1 -> 115 123 check_source_attrs_in_picture attrs collector; 116 124 state.children_in_picture <- "source" :: state.children_in_picture; 117 125 if state.last_was_img then 118 - state.has_source_after_img <- true 126 + state.has_source_after_img <- true; 127 + (* Check for always-matching source followed by another source *) 128 + if state.has_always_matching_source then 129 + state.source_after_always_matching <- true; 130 + (* A source is "always matching" if it has: 131 + - no media and no type attribute, OR 132 + - media attribute with empty/whitespace-only value, OR 133 + - media="all" (with optional whitespace) *) 134 + let media_value = List.find_map (fun (attr_name, v) -> 135 + if String.lowercase_ascii attr_name = "media" then Some v else None 136 + ) attrs in 137 + let has_type = has_attr "type" attrs in 138 + let is_always_matching = match media_value with 139 + | None -> not has_type (* no media, check if no type either *) 140 + | Some v -> 141 + let trimmed = String.trim v in 142 + trimmed = "" || String.lowercase_ascii trimmed = "all" 143 + in 144 + if is_always_matching then 145 + state.has_always_matching_source <- true 119 146 120 147 | "img" when state.in_picture && state.picture_depth = 1 -> 121 148 check_img_attrs attrs collector; ··· 162 189 (* Check for source after img *) 163 190 if state.has_source_after_img then 164 191 report_disallowed_child "picture" "source" collector; 192 + (* Check for source after always-matching source *) 193 + if state.source_after_always_matching then 194 + Message_collector.add_error collector 195 + ~message:"A \xe2\x80\x9csource\xe2\x80\x9d element that matches all media types cannot be followed by another \xe2\x80\x9csource\xe2\x80\x9d element." 196 + ~code:"always-matching-source" 197 + ~element:"source" (); 165 198 166 199 state.in_picture <- false 167 200 end
+473
lib/html5_checker/specialized/srcset_sizes_checker.ml
··· 1 + (** Srcset and sizes attribute validation checker. *) 2 + 3 + (** Valid CSS length units for sizes attribute *) 4 + let valid_length_units = [ 5 + "em"; "ex"; "ch"; "rem"; "cap"; "ic"; 6 + "vw"; "svw"; "lvw"; "dvw"; "vh"; "svh"; "lvh"; "dvh"; 7 + "vi"; "svi"; "lvi"; "dvi"; "vb"; "svb"; "lvb"; "dvb"; 8 + "vmin"; "svmin"; "lvmin"; "dvmin"; "vmax"; "svmax"; "lvmax"; "dvmax"; 9 + "cm"; "mm"; "q"; "in"; "pc"; "pt"; "px" 10 + ] 11 + 12 + type state = unit 13 + 14 + let create () = () 15 + let reset _state = () 16 + 17 + (** Get attribute value *) 18 + let get_attr name attrs = 19 + List.find_map (fun (n, v) -> 20 + if String.lowercase_ascii n = name then Some v else None 21 + ) attrs 22 + 23 + (** Check if string contains only whitespace *) 24 + let is_whitespace_only s = 25 + String.for_all (fun c -> c = ' ' || c = '\t' || c = '\n' || c = '\r') s 26 + 27 + (** Invalid units that are not CSS lengths but might be confused for them *) 28 + let invalid_size_units = [ 29 + "deg"; "grad"; "rad"; "turn"; (* angle units *) 30 + "s"; "ms"; (* time units *) 31 + "hz"; "khz"; (* frequency units *) 32 + "dpi"; "dpcm"; "dppx"; (* resolution units *) 33 + "%" (* percentage - not valid in sizes *) 34 + ] 35 + 36 + (** Strip CSS comments from a value *) 37 + let strip_css_comments s = 38 + let buf = Buffer.create (String.length s) in 39 + let len = String.length s in 40 + let i = ref 0 in 41 + while !i < len do 42 + if !i + 1 < len && s.[!i] = '/' && s.[!i + 1] = '*' then begin 43 + (* Start of comment, find end *) 44 + i := !i + 2; 45 + while !i + 1 < len && not (s.[!i] = '*' && s.[!i + 1] = '/') do 46 + incr i 47 + done; 48 + if !i + 1 < len then i := !i + 2 49 + end else begin 50 + Buffer.add_char buf s.[!i]; 51 + incr i 52 + end 53 + done; 54 + Buffer.contents buf 55 + 56 + (** Check if a size value has a valid CSS length unit and non-negative value *) 57 + type size_check_result = Valid | InvalidUnit | NegativeValue 58 + 59 + let check_size_value size_value = 60 + let trimmed = String.trim (strip_css_comments size_value) in 61 + if trimmed = "" then InvalidUnit 62 + else if trimmed = "auto" then Valid (* "auto" is valid *) 63 + else begin 64 + let lower = String.lowercase_ascii trimmed in 65 + (* Check for invalid units first *) 66 + let has_invalid = List.exists (fun unit -> 67 + let len = String.length unit in 68 + String.length lower > len && 69 + String.sub lower (String.length lower - len) len = unit 70 + ) invalid_size_units in 71 + if has_invalid then InvalidUnit 72 + else begin 73 + (* Check for valid CSS length units *) 74 + let has_valid_unit = List.exists (fun unit -> 75 + let len = String.length unit in 76 + String.length lower > len && 77 + String.sub lower (String.length lower - len) len = unit 78 + ) valid_length_units in 79 + if has_valid_unit then begin 80 + (* Check if it's negative (starts with - but not -0) *) 81 + if String.length trimmed > 0 && trimmed.[0] = '-' then begin 82 + (* Check if it's -0 which is valid *) 83 + let after_minus = String.sub trimmed 1 (String.length trimmed - 1) in 84 + let after_minus_stripped = String.trim (strip_css_comments after_minus) in 85 + try 86 + let num_str = Str.global_replace (Str.regexp "[a-zA-Z]+$") "" after_minus_stripped in 87 + let f = float_of_string num_str in 88 + if f = 0.0 then Valid else NegativeValue 89 + with _ -> NegativeValue 90 + end else 91 + Valid 92 + end 93 + (* Could be calc() or other CSS functions - allow those *) 94 + else if String.contains trimmed '(' then Valid 95 + else begin 96 + (* Check if it's a zero value (0, -0, +0) - these are valid without units *) 97 + let stripped = 98 + let s = trimmed in 99 + let s = if String.length s > 0 && (s.[0] = '+' || s.[0] = '-') then String.sub s 1 (String.length s - 1) else s in 100 + s 101 + in 102 + (* Check if it's zero or a numeric value starting with 0 *) 103 + try 104 + let f = float_of_string stripped in 105 + if f = 0.0 then Valid else InvalidUnit 106 + with _ -> InvalidUnit 107 + end 108 + end 109 + end 110 + 111 + let has_valid_size_unit size_value = 112 + match check_size_value size_value with 113 + | Valid -> true 114 + | InvalidUnit | NegativeValue -> false 115 + 116 + (** Check if a sizes entry has a media condition (starts with '(') *) 117 + let has_media_condition entry = 118 + let trimmed = String.trim entry in 119 + String.length trimmed > 0 && trimmed.[0] = '(' 120 + 121 + (** Extract the size value from a sizes entry (after media condition if any) *) 122 + let extract_size_value entry = 123 + let trimmed = String.trim entry in 124 + if not (has_media_condition trimmed) then 125 + trimmed 126 + else begin 127 + (* Find matching closing paren, then get the size value after it *) 128 + let len = String.length trimmed in 129 + let rec find_close_paren i depth = 130 + if i >= len then len 131 + else match trimmed.[i] with 132 + | '(' -> find_close_paren (i + 1) (depth + 1) 133 + | ')' -> if depth = 1 then i + 1 else find_close_paren (i + 1) (depth - 1) 134 + | _ -> find_close_paren (i + 1) depth 135 + in 136 + let after_paren = find_close_paren 0 0 in 137 + if after_paren >= len then "" 138 + else String.trim (String.sub trimmed after_paren (len - after_paren)) 139 + end 140 + 141 + (** Validate sizes attribute value *) 142 + let validate_sizes value element_name collector = 143 + (* Empty sizes is invalid *) 144 + if String.trim value = "" then begin 145 + Message_collector.add_error collector 146 + ~message:(Printf.sprintf "Bad value \xe2\x80\x9c\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad source size list: Must not be empty." element_name) 147 + ~code:"bad-sizes-value" 148 + ~element:element_name ~attribute:"sizes" (); 149 + false 150 + end else begin 151 + (* Split on comma and check each entry *) 152 + let entries = String.split_on_char ',' value in 153 + let first_entry = String.trim (List.hd entries) in 154 + 155 + (* Check if starts with comma (empty first entry) *) 156 + if first_entry = "" then begin 157 + Message_collector.add_error collector 158 + ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad source size list: Starts with empty source size." value element_name) 159 + ~code:"bad-sizes-value" 160 + ~element:element_name ~attribute:"sizes" (); 161 + false 162 + end else begin 163 + (* Check for trailing comma *) 164 + let last_entry = String.trim (List.nth entries (List.length entries - 1)) in 165 + if List.length entries > 1 && last_entry = "" then begin 166 + Message_collector.add_error collector 167 + ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad source size list: Ends with trailing comma." value element_name) 168 + ~code:"bad-sizes-value" 169 + ~element:element_name ~attribute:"sizes" (); 170 + false 171 + end else begin 172 + let valid = ref true in 173 + 174 + (* Check for default-first pattern: unconditional value before conditional ones *) 175 + let non_empty_entries = List.filter (fun e -> String.trim e <> "") entries in 176 + if List.length non_empty_entries > 1 then begin 177 + let first = List.hd non_empty_entries in 178 + let rest = List.tl non_empty_entries in 179 + (* If first entry has no media condition but later ones do, that's invalid *) 180 + if not (has_media_condition first) && List.exists has_media_condition rest then begin 181 + Message_collector.add_error collector 182 + ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad source size list: Default size must be last." value element_name) 183 + ~code:"bad-sizes-value" 184 + ~element:element_name ~attribute:"sizes" (); 185 + valid := false 186 + end 187 + end; 188 + 189 + (* Validate each entry's size value has valid unit and is not negative *) 190 + List.iter (fun entry -> 191 + let trimmed = String.trim entry in 192 + if trimmed <> "" then begin 193 + let size_val = extract_size_value trimmed in 194 + if size_val <> "" then begin 195 + match check_size_value size_val with 196 + | Valid -> () 197 + | NegativeValue -> 198 + Message_collector.add_error collector 199 + ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Source size value cannot be negative." value element_name) 200 + ~code:"bad-sizes-value" 201 + ~element:element_name ~attribute:"sizes" (); 202 + valid := false 203 + | InvalidUnit -> 204 + Message_collector.add_error collector 205 + ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad source size value." value element_name) 206 + ~code:"bad-sizes-value" 207 + ~element:element_name ~attribute:"sizes" (); 208 + valid := false 209 + end 210 + end 211 + ) entries; 212 + 213 + !valid 214 + end 215 + end 216 + end 217 + 218 + (** Validate srcset descriptor *) 219 + let validate_srcset_descriptor desc element_name srcset_value collector = 220 + let desc_lower = String.lowercase_ascii (String.trim desc) in 221 + if String.length desc_lower = 0 then true 222 + else begin 223 + let last_char = desc_lower.[String.length desc_lower - 1] in 224 + let num_part = String.sub desc_lower 0 (String.length desc_lower - 1) in 225 + 226 + match last_char with 227 + | 'w' -> 228 + (* Width descriptor - must be positive integer *) 229 + (try 230 + let n = int_of_string num_part in 231 + if n <= 0 then begin 232 + Message_collector.add_error collector 233 + ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad srcset descriptor: Width must be positive." srcset_value element_name) 234 + ~code:"bad-srcset-value" 235 + ~element:element_name ~attribute:"srcset" (); 236 + false 237 + end else begin 238 + (* Check for uppercase W - compare original desc with lowercase version *) 239 + let original_last = desc.[String.length desc - 1] in 240 + if original_last = 'W' then begin 241 + Message_collector.add_error collector 242 + ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad srcset descriptor: Width descriptor must use lowercase \xe2\x80\x9cw\xe2\x80\x9d." srcset_value element_name) 243 + ~code:"bad-srcset-value" 244 + ~element:element_name ~attribute:"srcset" (); 245 + false 246 + end else true 247 + end 248 + with _ -> 249 + (* Check for scientific notation or decimal *) 250 + if String.contains num_part 'e' || String.contains num_part 'E' then begin 251 + Message_collector.add_error collector 252 + ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad srcset descriptor: Scientific notation not allowed in width descriptor." srcset_value element_name) 253 + ~code:"bad-srcset-value" 254 + ~element:element_name ~attribute:"srcset" (); 255 + false 256 + end else begin 257 + Message_collector.add_error collector 258 + ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad srcset descriptor: Invalid width descriptor." srcset_value element_name) 259 + ~code:"bad-srcset-value" 260 + ~element:element_name ~attribute:"srcset" (); 261 + false 262 + end) 263 + | 'x' -> 264 + (* Pixel density descriptor - must be positive number, no leading + *) 265 + let trimmed_desc = String.trim desc in 266 + if String.length trimmed_desc > 0 && trimmed_desc.[0] = '+' then begin 267 + Message_collector.add_error collector 268 + ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad srcset descriptor: Leading plus sign not allowed." srcset_value element_name) 269 + ~code:"bad-srcset-value" 270 + ~element:element_name ~attribute:"srcset" (); 271 + false 272 + end else begin 273 + (try 274 + let n = float_of_string num_part in 275 + if Float.is_nan n then begin 276 + Message_collector.add_error collector 277 + ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad srcset descriptor: NaN not allowed." srcset_value element_name) 278 + ~code:"bad-srcset-value" 279 + ~element:element_name ~attribute:"srcset" (); 280 + false 281 + end else if n <= 0.0 then begin 282 + Message_collector.add_error collector 283 + ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad srcset descriptor: Density must be positive." srcset_value element_name) 284 + ~code:"bad-srcset-value" 285 + ~element:element_name ~attribute:"srcset" (); 286 + false 287 + end else if n = neg_infinity || n = infinity then begin 288 + Message_collector.add_error collector 289 + ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad srcset descriptor: Infinity not allowed." srcset_value element_name) 290 + ~code:"bad-srcset-value" 291 + ~element:element_name ~attribute:"srcset" (); 292 + false 293 + end else true 294 + with _ -> 295 + Message_collector.add_error collector 296 + ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad srcset descriptor: Invalid density descriptor." srcset_value element_name) 297 + ~code:"bad-srcset-value" 298 + ~element:element_name ~attribute:"srcset" (); 299 + false) 300 + end 301 + | 'h' -> 302 + (* Height descriptor - not allowed *) 303 + Message_collector.add_error collector 304 + ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad srcset descriptor: Height descriptor \xe2\x80\x9ch\xe2\x80\x9d is not allowed." srcset_value element_name) 305 + ~code:"bad-srcset-value" 306 + ~element:element_name ~attribute:"srcset" (); 307 + false 308 + | _ -> 309 + (* Unknown descriptor *) 310 + Message_collector.add_error collector 311 + ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad srcset descriptor." srcset_value element_name) 312 + ~code:"bad-srcset-value" 313 + ~element:element_name ~attribute:"srcset" (); 314 + false 315 + end 316 + 317 + (** Normalize descriptor for duplicate detection (e.g., 1x = 1.0x) *) 318 + let normalize_descriptor desc = 319 + let desc_lower = String.lowercase_ascii (String.trim desc) in 320 + if String.length desc_lower = 0 then desc_lower 321 + else 322 + let last_char = desc_lower.[String.length desc_lower - 1] in 323 + let num_part = String.sub desc_lower 0 (String.length desc_lower - 1) in 324 + match last_char with 325 + | 'x' -> 326 + (* Normalize density to a float string for comparison *) 327 + (try 328 + let f = float_of_string num_part in 329 + Printf.sprintf "%gx" f (* %g removes trailing zeros *) 330 + with _ -> desc_lower) 331 + | 'w' -> 332 + (* Width should be integer, just return as-is *) 333 + desc_lower 334 + | _ -> desc_lower 335 + 336 + (** Parse and validate srcset attribute value *) 337 + let validate_srcset value element_name has_sizes collector = 338 + let entries = String.split_on_char ',' value in 339 + let has_w_descriptor = ref false in 340 + let has_x_descriptor = ref false in 341 + let seen_descriptors = Hashtbl.create 8 in (* Track seen descriptor values *) 342 + 343 + (* Check for empty srcset *) 344 + if String.trim value = "" then begin 345 + Message_collector.add_error collector 346 + ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Must not be empty." value element_name) 347 + ~code:"bad-srcset-value" 348 + ~element:element_name ~attribute:"srcset" () 349 + end; 350 + 351 + (* Check for leading comma *) 352 + if String.length value > 0 && value.[0] = ',' then begin 353 + Message_collector.add_error collector 354 + ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad srcset: Leading comma." value element_name) 355 + ~code:"bad-srcset-value" 356 + ~element:element_name ~attribute:"srcset" () 357 + end; 358 + 359 + (* Check for trailing comma *) 360 + let trimmed_value = String.trim value in 361 + if String.length trimmed_value > 0 && trimmed_value.[String.length trimmed_value - 1] = ',' then begin 362 + Message_collector.add_error collector 363 + ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad srcset: Trailing comma." value element_name) 364 + ~code:"bad-srcset-value" 365 + ~element:element_name ~attribute:"srcset" () 366 + end; 367 + 368 + List.iter (fun entry -> 369 + let entry = String.trim entry in 370 + if entry <> "" then begin 371 + (* Split entry into URL and optional descriptor *) 372 + let parts = String.split_on_char ' ' entry |> List.filter (fun s -> s <> "") in 373 + match parts with 374 + | [] -> () 375 + | [_url] -> 376 + (* URL only = implicit 1x descriptor - only flag if explicit 1x also seen *) 377 + if Hashtbl.mem seen_descriptors "explicit-1x" then begin 378 + Message_collector.add_error collector 379 + ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Duplicate descriptor." value element_name) 380 + ~code:"bad-srcset-value" 381 + ~element:element_name ~attribute:"srcset" () 382 + end else 383 + Hashtbl.add seen_descriptors "implicit-1x" true 384 + | _url :: desc :: rest -> 385 + (* Check for extra junk - multiple descriptors are not allowed *) 386 + if rest <> [] then begin 387 + Message_collector.add_error collector 388 + ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad srcset: Multiple descriptors in candidate." value element_name) 389 + ~code:"bad-srcset-value" 390 + ~element:element_name ~attribute:"srcset" () 391 + end; 392 + 393 + let desc_lower = String.lowercase_ascii (String.trim desc) in 394 + if String.length desc_lower > 0 then begin 395 + let last_char = desc_lower.[String.length desc_lower - 1] in 396 + if last_char = 'w' then has_w_descriptor := true 397 + else if last_char = 'x' then has_x_descriptor := true; 398 + 399 + (* Check for duplicate descriptors - use normalized form *) 400 + let normalized = normalize_descriptor desc in 401 + let is_1x = (normalized = "1x") in 402 + if Hashtbl.mem seen_descriptors normalized then begin 403 + Message_collector.add_error collector 404 + ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Duplicate descriptor." value element_name) 405 + ~code:"bad-srcset-value" 406 + ~element:element_name ~attribute:"srcset" () 407 + end else if is_1x && Hashtbl.mem seen_descriptors "implicit-1x" then begin 408 + (* Explicit 1x conflicts with implicit 1x *) 409 + Message_collector.add_error collector 410 + ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Duplicate descriptor." value element_name) 411 + ~code:"bad-srcset-value" 412 + ~element:element_name ~attribute:"srcset" () 413 + end else begin 414 + Hashtbl.add seen_descriptors normalized true; 415 + if is_1x then Hashtbl.add seen_descriptors "explicit-1x" true 416 + end 417 + end; 418 + 419 + ignore (validate_srcset_descriptor desc element_name value collector) 420 + end 421 + ) entries; 422 + 423 + (* Check: if w descriptor used and no sizes, that's an error for img and source *) 424 + if !has_w_descriptor && not has_sizes then 425 + Message_collector.add_error collector 426 + ~message:(Printf.sprintf "When the \xe2\x80\x9csrcset\xe2\x80\x9d attribute on the \xe2\x80\x9c%s\xe2\x80\x9d element uses width descriptors, the \xe2\x80\x9csizes\xe2\x80\x9d attribute must also be present." element_name) 427 + ~code:"srcset-w-without-sizes" 428 + ~element:element_name ~attribute:"srcset" (); 429 + 430 + (* Check for mixing w and x descriptors *) 431 + if !has_w_descriptor && !has_x_descriptor then 432 + Message_collector.add_error collector 433 + ~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Mixing width and density descriptors is not allowed." value element_name) 434 + ~code:"bad-srcset-value" 435 + ~element:element_name ~attribute:"srcset" () 436 + 437 + let start_element _state ~name ~namespace ~attrs collector = 438 + if namespace <> None then () 439 + else begin 440 + let name_lower = String.lowercase_ascii name in 441 + 442 + (* Check sizes and srcset on img and source *) 443 + if name_lower = "img" || name_lower = "source" then begin 444 + let sizes_value = get_attr "sizes" attrs in 445 + let srcset_value = get_attr "srcset" attrs in 446 + let has_sizes = sizes_value <> None in 447 + 448 + (* Validate sizes if present *) 449 + (match sizes_value with 450 + | Some v -> ignore (validate_sizes v name_lower collector) 451 + | None -> ()); 452 + 453 + (* Validate srcset if present *) 454 + (match srcset_value with 455 + | Some v -> validate_srcset v name_lower has_sizes collector 456 + | None -> ()) 457 + end 458 + end 459 + 460 + let end_element _state ~name:_ ~namespace:_ _collector = () 461 + let characters _state _text _collector = () 462 + let end_document _state _collector = () 463 + 464 + let checker = 465 + (module struct 466 + type nonrec state = state 467 + let create = create 468 + let reset = reset 469 + let start_element = start_element 470 + let end_element = end_element 471 + let characters = characters 472 + let end_document = end_document 473 + end : Checker.S)