+19
-1
lib/html5_checker/datatype/dt_autocomplete.ml
+19
-1
lib/html5_checker/datatype/dt_autocomplete.ml
···
162
162
List.find_opt (fun t -> starts_with t "section-") tokens
163
163
in
164
164
165
+
(* Check if webauthn appears anywhere except as the very last token *)
166
+
let check_webauthn_position tokens =
167
+
let rec check = function
168
+
| [] -> None
169
+
| ["webauthn"] -> None (* webauthn as last token is ok *)
170
+
| "webauthn" :: _ :: _ -> Some () (* webauthn not last is error *)
171
+
| _ :: rest -> check rest
172
+
in
173
+
check tokens
174
+
in
175
+
165
176
(* Process remaining tokens *)
166
177
let process_field_tokens tokens =
178
+
(* First check if webauthn appears but not at the very end *)
179
+
(match check_webauthn_position tokens with
180
+
| Some () ->
181
+
Error
182
+
"The token \"webauthn\" must only appear as the very last token in a \
183
+
list of autofill detail tokens."
184
+
| None ->
167
185
match tokens with
168
186
| [] -> Error "A list of autofill details tokens must contain an autofill field name."
169
187
| [ "webauthn" ] ->
···
246
264
| None ->
247
265
Error
248
266
"A list of autofill details tokens must not contain more than one \
249
-
autofill field name.")
267
+
autofill field name."))
250
268
in
251
269
process_field_tokens !tokens
252
270
+8
-8
lib/html5_checker/error_code.ml
+8
-8
lib/html5_checker/error_code.ml
···
382
382
Printf.sprintf "Element %s is missing required attribute %s."
383
383
(q element) (q attr)
384
384
| Missing_required_attr_one_of { element; attrs } ->
385
-
let attrs_str = String.concat ", " (List.map q attrs) in
385
+
let attrs_str = String.concat ", " attrs in
386
386
Printf.sprintf "Element %s is missing one or more of the following attributes: [%s]."
387
387
(q element) attrs_str
388
388
| Bad_attr_value { element; attr; value; reason } ->
···
420
420
Printf.sprintf "Element %s is missing required child element %s."
421
421
(q parent) (q child)
422
422
| Missing_required_child_one_of { parent; children } ->
423
-
let children_str = String.concat ", " (List.map q children) in
423
+
let children_str = String.concat ", " children in
424
424
Printf.sprintf "Element %s is missing one or more of the following child elements: [%s]."
425
425
(q parent) children_str
426
426
| Missing_required_child_generic { parent } ->
···
488
488
Printf.sprintf "An %s element must have an %s attribute, except under certain conditions. For details, consult guidance on providing text alternatives for images."
489
489
(q "img") (q "alt")
490
490
| Img_missing_src_or_srcset ->
491
-
Printf.sprintf "Element %s is missing one or more of the following attributes: [%s, %s]."
492
-
(q "img") (q "src") (q "srcset")
491
+
Printf.sprintf "Element %s is missing one or more of the following attributes: [src, srcset]."
492
+
(q "img")
493
493
| Option_empty_without_label ->
494
494
Printf.sprintf "Element %s without attribute %s must not be empty."
495
495
(q "option") (q "label")
···
499
499
Printf.sprintf "The value of %s attribute for the %s element must not be %s."
500
500
(q "dir") (q "bdo") (q "auto")
501
501
| Base_missing_href_or_target ->
502
-
Printf.sprintf "Element %s is missing one or more of the following attributes: [%s, %s]."
503
-
(q "base") (q "href") (q "target")
502
+
Printf.sprintf "Element %s is missing one or more of the following attributes: [href, target]."
503
+
(q "base")
504
504
| Base_after_link_script ->
505
505
Printf.sprintf "The %s element must come before any %s or %s elements in the document."
506
506
(q "base") (q "link") (q "script")
···
551
551
Printf.sprintf "Element %s is missing required attribute %s."
552
552
(q "summary") (q "role")
553
553
| Summary_missing_attrs ->
554
-
Printf.sprintf "Element %s is missing one or more of the following attributes: [%s, %s, %s]."
555
-
(q "summary") (q "aria-checked") (q "aria-level") (q "role")
554
+
Printf.sprintf "Element %s is missing one or more of the following attributes: [aria-checked, aria-level, role]."
555
+
(q "summary")
556
556
| Autocomplete_webauthn_on_select ->
557
557
Printf.sprintf "The value of the %s attribute for the %s element must not contain %s."
558
558
(q "autocomplete") (q "select") (q "webauthn")
+7
lib/html5_checker/parse_error_bridge.ml
+7
lib/html5_checker/parse_error_bridge.ml
···
14
14
let (message, final_code) = match code with
15
15
| Html5rw.Parse_error_code.Non_void_html_element_start_tag_with_trailing_solidus ->
16
16
("Self-closing syntax (\"/>\") used on a non-void HTML element. Ignoring the slash and treating as a start tag.", code_str)
17
+
| Html5rw.Parse_error_code.Null_character_reference ->
18
+
("Character reference expands to zero.", "null-character-reference")
17
19
| Html5rw.Parse_error_code.Tree_construction_error s ->
18
20
(* Check for control-character/noncharacter/surrogate with codepoint info *)
19
21
(try
···
67
69
("End tag \xe2\x80\x9cp\xe2\x80\x9d implied, but there were open elements.", "end-tag-p-implied")
68
70
else if s = "end-tag-br" then
69
71
("End tag \xe2\x80\x9cbr\xe2\x80\x9d.", "end-tag-br")
72
+
else if s = "expected-closing-tag-but-got-eof" then
73
+
("End of file seen and there were open elements.", "eof-in-open-element")
74
+
else if String.length s > 19 && String.sub s 0 19 = "unexpected-end-tag:" then
75
+
let element = String.sub s 19 (String.length s - 19) in
76
+
(Printf.sprintf "Stray end tag \xe2\x80\x9c%s\xe2\x80\x9d." element, "stray-end-tag")
70
77
else
71
78
(Printf.sprintf "Parse error: %s" s, s)
72
79
with _ -> (Printf.sprintf "Parse error: %s" s, s))
+86
-10
lib/html5_checker/semantic/nesting_checker.ml
+86
-10
lib/html5_checker/semantic/nesting_checker.ml
···
32
32
let ancestor_mask_by_descendant : (string, int) Hashtbl.t =
33
33
Hashtbl.create 64
34
34
35
+
(** Map from descendant element name to bitmask of ancestors that cause content model violations.
36
+
(These use different error messages than nesting violations.) *)
37
+
let content_model_violation_mask : (string, int) Hashtbl.t =
38
+
Hashtbl.create 64
39
+
35
40
(** Register that [ancestor] is prohibited for [descendant]. *)
36
41
let register_prohibited_ancestor ancestor descendant =
37
42
let number = special_ancestor_number ancestor in
···
44
49
in
45
50
let new_mask = mask lor (1 lsl number) in
46
51
Hashtbl.replace ancestor_mask_by_descendant descendant new_mask
52
+
53
+
(** Register a content model violation (phrasing-only element containing flow content). *)
54
+
let register_content_model_violation ancestor descendant =
55
+
register_prohibited_ancestor ancestor descendant;
56
+
let number = special_ancestor_number ancestor in
57
+
let mask =
58
+
match Hashtbl.find_opt content_model_violation_mask descendant with
59
+
| None -> 0
60
+
| Some m -> m
61
+
in
62
+
let new_mask = mask lor (1 lsl number) in
63
+
Hashtbl.replace content_model_violation_mask descendant new_mask
47
64
48
65
(** Initialize the prohibited ancestor map. *)
49
66
let () =
···
113
130
) interactive_elements;
114
131
115
132
(* Phrasing-only elements: cannot contain flow content like p, div, h1-h6, etc. *)
133
+
(* These are content model violations, not nesting violations. *)
116
134
let phrasing_only = ["span"; "strong"; "em"; "b"; "i"; "u"; "s"; "small"; "mark";
117
135
"abbr"; "cite"; "code"; "q"; "sub"; "sup"; "samp"; "kbd"; "var"] in
118
136
let flow_content = ["p"; "div"; "article"; "section"; "nav"; "aside"; "header"; "footer";
···
120
138
"ol"; "ul"; "dl"; "pre"; "blockquote"; "hr"] in
121
139
List.iter (fun ancestor ->
122
140
List.iter (fun descendant ->
123
-
register_prohibited_ancestor ancestor descendant
141
+
register_content_model_violation ancestor descendant
124
142
) flow_content
125
143
) phrasing_only
126
144
···
134
152
let map_num = special_ancestor_number "map" in
135
153
1 lsl map_num
136
154
155
+
(** Transparent elements - inherit content model from parent *)
156
+
let transparent_elements = ["a"; "canvas"; "video"; "audio"; "object"; "ins"; "del"; "map"]
157
+
137
158
(** Stack node representing an element's context. *)
138
159
type stack_node = {
139
160
ancestor_mask : int;
140
-
_name : string; [@warning "-69"]
161
+
name : string;
162
+
is_transparent : bool;
141
163
}
142
164
143
165
(** Checker state. *)
···
181
203
| _ ->
182
204
false
183
205
206
+
(** Find the nearest transparent element in the ancestor stack, if any.
207
+
Returns the immediate parent's name if it's transparent, otherwise None. *)
208
+
let find_nearest_transparent_parent state =
209
+
match state.stack with
210
+
| parent :: _ when parent.is_transparent -> Some parent.name
211
+
| _ -> None
212
+
184
213
(** Report nesting violations. *)
185
214
let check_nesting state name attrs collector =
186
215
(* Compute the prohibited ancestor mask for this element *)
···
190
219
| None -> 0
191
220
in
192
221
222
+
(* Get content model violation mask for this element *)
223
+
let content_model_mask =
224
+
match Hashtbl.find_opt content_model_violation_mask name with
225
+
| Some m -> m
226
+
| None -> 0
227
+
in
228
+
193
229
(* Add interactive element restrictions if applicable *)
194
230
let mask =
195
231
if is_interactive_element name attrs then
···
212
248
| "object" when has_attr attrs "usemap" -> Some "usemap"
213
249
| _ -> None
214
250
in
251
+
(* Find the transparent parent (like canvas) if any *)
252
+
let transparent_parent = find_nearest_transparent_parent state in
215
253
(* Find which ancestors are violated *)
216
254
Array.iteri (fun i ancestor ->
217
255
let bit = 1 lsl i in
218
-
if (mask_hit land bit) <> 0 then
219
-
Message_collector.add_typed collector
220
-
(Error_code.Element_must_not_be_descendant {
221
-
element = name;
222
-
attr;
223
-
ancestor
224
-
})
256
+
if (mask_hit land bit) <> 0 then begin
257
+
(* Check if this is a content model violation or a nesting violation *)
258
+
if (content_model_mask land bit) <> 0 then begin
259
+
(* Content model violation: use "not allowed as child" format *)
260
+
(* If there's a transparent parent, use that instead of the ancestor *)
261
+
let parent = match transparent_parent with
262
+
| Some p -> p
263
+
| None -> ancestor
264
+
in
265
+
Message_collector.add_typed collector
266
+
(Error_code.Element_not_allowed_as_child {
267
+
child = name;
268
+
parent
269
+
})
270
+
end else
271
+
(* Nesting violation: use "must not be descendant" format *)
272
+
Message_collector.add_typed collector
273
+
(Error_code.Element_must_not_be_descendant {
274
+
element = name;
275
+
attr;
276
+
ancestor
277
+
})
278
+
end
225
279
) special_ancestors
226
280
end
227
281
end
···
238
292
})
239
293
| _ -> ()
240
294
295
+
(** Check for metadata-only elements appearing outside valid contexts.
296
+
style element is only valid in head or in noscript (in head). *)
297
+
let check_metadata_element_context state name collector =
298
+
match name with
299
+
| "style" ->
300
+
(* style is only valid inside head or noscript *)
301
+
begin match state.stack with
302
+
| parent :: _ when parent.name = "head" -> () (* valid *)
303
+
| parent :: _ when parent.name = "noscript" -> () (* valid in noscript in head *)
304
+
| parent :: _ ->
305
+
(* style inside any other element is not allowed *)
306
+
Message_collector.add_typed collector
307
+
(Error_code.Element_not_allowed_as_child {
308
+
child = "style";
309
+
parent = parent.name
310
+
})
311
+
| [] -> () (* at root level, would be caught elsewhere *)
312
+
end
313
+
| _ -> ()
314
+
241
315
let start_element state ~name ~namespace ~attrs collector =
242
316
(* Only check HTML elements, not SVG or MathML *)
243
317
match namespace with
···
246
320
(* Check for nesting violations *)
247
321
check_nesting state name attrs collector;
248
322
check_required_ancestors state name collector;
323
+
check_metadata_element_context state name collector;
249
324
250
325
(* Update ancestor mask if this is a special ancestor *)
251
326
let new_mask = state.ancestor_mask in
···
267
342
in
268
343
269
344
(* Push onto stack *)
270
-
let node = { ancestor_mask = state.ancestor_mask; _name = name } in
345
+
let is_transparent = List.mem name transparent_elements in
346
+
let node = { ancestor_mask = state.ancestor_mask; name; is_transparent } in
271
347
state.stack <- node :: state.stack;
272
348
state.ancestor_mask <- new_mask
273
349
+52
-31
lib/html5_checker/semantic/obsolete_checker.ml
+52
-31
lib/html5_checker/semantic/obsolete_checker.ml
···
242
242
tbl
243
243
244
244
(** Checker state *)
245
-
type state = unit
245
+
type state = {
246
+
mutable in_head : bool;
247
+
}
246
248
247
-
let create () = ()
249
+
let create () = { in_head = false }
248
250
249
-
let reset _state = ()
251
+
let reset state = state.in_head <- false
250
252
251
-
let start_element _state ~name ~namespace ~attrs collector =
253
+
let start_element state ~name ~namespace ~attrs collector =
252
254
(* Only check HTML elements (no namespace or explicit HTML namespace) *)
253
255
let is_html = match namespace with
254
256
| None -> true
···
259
261
else begin
260
262
let name_lower = String.lowercase_ascii name in
261
263
264
+
(* Track head context *)
265
+
if name_lower = "head" then state.in_head <- true;
266
+
262
267
(* Check for obsolete element *)
263
268
(match Hashtbl.find_opt obsolete_elements name_lower with
264
269
| None -> ()
···
270
275
List.iter (fun (attr_name, _attr_value) ->
271
276
let attr_lower = String.lowercase_ascii attr_name in
272
277
273
-
(* Check specific obsolete attributes for this element *)
274
-
(match Hashtbl.find_opt obsolete_attributes attr_lower with
275
-
| None -> ()
276
-
| Some element_map ->
277
-
(match Hashtbl.find_opt element_map name_lower with
278
-
| None -> ()
279
-
| Some suggestion ->
280
-
Message_collector.add_typed collector
281
-
(Error_code.Obsolete_attr { element = name; attr = attr_name; suggestion = Some suggestion })));
282
-
283
-
(* Check obsolete style attributes *)
284
-
(match Hashtbl.find_opt obsolete_style_attrs attr_lower with
285
-
| None -> ()
286
-
| Some elements ->
287
-
if List.mem name_lower elements then
288
-
Message_collector.add_typed collector
289
-
(Error_code.Obsolete_attr { element = name; attr = attr_name; suggestion = Some "Use CSS instead." }));
290
-
291
-
(* Check obsolete global attributes *)
292
-
(match Hashtbl.find_opt obsolete_global_attrs attr_lower with
293
-
| None -> ()
294
-
| Some suggestion ->
295
-
(* Global attributes use a different format - just "The X attribute is obsolete. Y" *)
278
+
(* Special handling for scoped attribute on style *)
279
+
if attr_lower = "scoped" && name_lower = "style" then begin
280
+
(* Only report if style is in head (correct context) - otherwise the content model
281
+
error from nesting_checker takes precedence *)
282
+
if state.in_head then
296
283
Message_collector.add_error collector
297
-
~message:(Printf.sprintf "The %s attribute is obsolete. %s" (Error_code.q attr_name) suggestion)
298
-
~code:"obsolete-global-attribute"
284
+
~message:(Printf.sprintf "Attribute %s not allowed on element %s at this point."
285
+
(Error_code.q attr_name) (Error_code.q name))
286
+
~code:"disallowed-attribute"
299
287
~element:name
300
288
~attribute:attr_name
301
-
())
289
+
()
290
+
end else begin
291
+
(* Check specific obsolete attributes for this element *)
292
+
(match Hashtbl.find_opt obsolete_attributes attr_lower with
293
+
| None -> ()
294
+
| Some element_map ->
295
+
(match Hashtbl.find_opt element_map name_lower with
296
+
| None -> ()
297
+
| Some suggestion ->
298
+
Message_collector.add_typed collector
299
+
(Error_code.Obsolete_attr { element = name; attr = attr_name; suggestion = Some suggestion })));
300
+
301
+
(* Check obsolete style attributes *)
302
+
(match Hashtbl.find_opt obsolete_style_attrs attr_lower with
303
+
| None -> ()
304
+
| Some elements ->
305
+
if List.mem name_lower elements then
306
+
Message_collector.add_typed collector
307
+
(Error_code.Obsolete_attr { element = name; attr = attr_name; suggestion = Some "Use CSS instead." }));
308
+
309
+
(* Check obsolete global attributes *)
310
+
(match Hashtbl.find_opt obsolete_global_attrs attr_lower with
311
+
| None -> ()
312
+
| Some suggestion ->
313
+
(* Global attributes use a different format - just "The X attribute is obsolete. Y" *)
314
+
Message_collector.add_error collector
315
+
~message:(Printf.sprintf "The %s attribute is obsolete. %s" (Error_code.q attr_name) suggestion)
316
+
~code:"obsolete-global-attribute"
317
+
~element:name
318
+
~attribute:attr_name
319
+
())
320
+
end
302
321
) attrs
303
322
end
304
323
305
-
let end_element _state ~name:_ ~namespace:_ _collector = ()
324
+
let end_element state ~name ~namespace:_ _collector =
325
+
let name_lower = String.lowercase_ascii name in
326
+
if name_lower = "head" then state.in_head <- false
306
327
307
328
let characters _state _text _collector = ()
308
329
+3
-5
lib/html5_checker/semantic/required_attr_checker.ml
+3
-5
lib/html5_checker/semantic/required_attr_checker.ml
···
143
143
(* Valid values: empty string, auto, manual, hint *)
144
144
if value_lower <> "" && value_lower <> "auto" && value_lower <> "manual" && value_lower <> "hint" then
145
145
Message_collector.add_typed collector
146
-
(Error_code.Bad_attr_value {
147
-
element = element_name;
148
-
attr = "popover";
149
-
value;
150
-
reason = "Must be a valid popover state (auto, manual, or hint)."
146
+
(Error_code.Bad_attr_value_generic {
147
+
message = Printf.sprintf "Bad value %s for attribute %s on element %s."
148
+
(Error_code.q value) (Error_code.q "popover") (Error_code.q element_name)
151
149
})
152
150
| None -> ()
153
151
+12
-6
lib/html5_checker/specialized/aria_checker.ml
+12
-6
lib/html5_checker/specialized/aria_checker.ml
···
673
673
| _ -> ()
674
674
end;
675
675
676
-
(* Validate explicit roles *)
677
-
List.iter (fun role ->
678
-
(* Check if role is valid *)
679
-
if not (Hashtbl.mem valid_aria_roles role) then
676
+
(* Validate explicit roles - report full attribute value if any role is invalid *)
677
+
let has_invalid_role = List.exists (fun role ->
678
+
not (Hashtbl.mem valid_aria_roles role)
679
+
) explicit_roles in
680
+
if has_invalid_role then begin
681
+
match role_attr with
682
+
| Some role_value ->
680
683
Message_collector.add_error collector
681
684
~message:(Printf.sprintf
682
685
"Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9crole\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d."
683
-
role name)
686
+
role_value name)
684
687
~code:"bad-role"
685
688
~element:name
686
689
~attribute:"role"
687
-
();
690
+
()
691
+
| None -> ()
692
+
end;
688
693
694
+
List.iter (fun role ->
689
695
(* Check if role cannot be named *)
690
696
if Hashtbl.mem roles_which_cannot_be_named role && has_accessible_name then
691
697
Message_collector.add_error collector
+54
-19
lib/html5_checker/specialized/datetime_checker.ml
+54
-19
lib/html5_checker/specialized/datetime_checker.ml
···
27
27
let validate_date s =
28
28
let pattern = Str.regexp "^\\([0-9]+\\)-\\([0-9][0-9]\\)-\\([0-9][0-9]\\)$" in
29
29
if not (Str.string_match pattern s 0) then
30
-
(false, Some "Date must be in YYYY-MM-DD format")
30
+
(false, Some "The literal did not satisfy the date format")
31
31
else
32
32
let year_s = Str.matched_group 1 s in
33
33
let month_s = Str.matched_group 2 s in
34
34
let day_s = Str.matched_group 3 s in
35
35
if String.length year_s < 4 then
36
-
(false, Some "Year must be at least 4 digits")
36
+
(false, Some "The literal did not satisfy the date format")
37
37
else
38
38
match (parse_int year_s, parse_int month_s, parse_int day_s) with
39
39
| None, _, _ | _, None, _ | _, _, None ->
40
40
(false, Some "Invalid year, month or day")
41
41
| Some year, Some month, Some day ->
42
42
if year < 1 then (false, Some "Year cannot be less than 1")
43
-
else if month < 1 || month > 12 then (false, Some "Month out of range")
43
+
else if month = 0 then (false, Some "Month cannot be less than 1")
44
+
else if month > 12 then (false, Some "Month cannot be greater than 12")
44
45
else if day < 1 then (false, Some "Day cannot be less than 1")
45
46
else
46
47
let max_day = max_day_for_month year month in
···
71
72
let validate_time s =
72
73
let pattern = Str.regexp "^\\([0-9][0-9]\\):\\([0-9][0-9]\\)\\(:\\([0-9][0-9]\\)\\(\\.\\([0-9]+\\)\\)?\\)?$" in
73
74
if not (Str.string_match pattern s 0) then
74
-
(false, Some "Time must be in HH:MM format")
75
+
(false, None) (* Format error - return None so caller uses generic message *)
75
76
else
76
77
let hour_s = Str.matched_group 1 s in
77
78
let minute_s = Str.matched_group 2 s in
78
79
match (parse_int hour_s, parse_int minute_s) with
79
80
| None, _ | _, None -> (false, Some "Invalid hour or minute")
80
81
| Some hour, Some minute ->
81
-
if hour > 23 then (false, Some "Hour out of range")
82
-
else if minute > 59 then (false, Some "Minute out of range")
82
+
if hour > 23 then (false, Some "Hour cannot be greater than 23")
83
+
else if minute > 59 then (false, Some "Minute cannot be greater than 59")
83
84
else
84
85
let second_s = try Some (Str.matched_group 4 s) with Not_found -> None in
85
86
match second_s with
···
88
89
match parse_int sec_s with
89
90
| None -> (false, Some "Invalid seconds")
90
91
| Some sec ->
91
-
if sec > 59 then (false, Some "Second out of range")
92
+
if sec > 59 then (false, Some "Second cannot be greater than 59")
92
93
else
93
94
(* Check milliseconds if present *)
94
95
let millis_s = try Some (Str.matched_group 6 s) with Not_found -> None in
···
108
109
else
109
110
let year_s = Str.matched_group 1 s in
110
111
if String.length year_s < 4 then
111
-
(false, Some "Year must be at least 4 digits")
112
+
(false, Some "The literal did not satisfy the date format")
112
113
else
113
114
match parse_int year_s with
114
115
| None -> (false, Some "Invalid year")
···
125
126
let year_s = Str.matched_group 1 s in
126
127
let month_s = Str.matched_group 2 s in
127
128
if String.length year_s < 4 then
128
-
(false, Some "Year must be at least 4 digits")
129
+
(false, Some "The literal did not satisfy the date format")
129
130
else
130
131
match (parse_int year_s, parse_int month_s) with
131
132
| None, _ | _, None -> (false, Some "Invalid year or month")
···
143
144
let year_s = Str.matched_group 1 s in
144
145
let week_s = Str.matched_group 2 s in
145
146
if String.length year_s < 4 then
146
-
(false, Some "Year must be at least 4 digits")
147
+
(false, Some "The literal did not satisfy the date format")
147
148
else
148
149
match (parse_int year_s, parse_int week_s) with
149
150
| None, _ | _, None -> (false, Some "Invalid year or week")
···
222
223
(false, "+")
223
224
in
224
225
if not matched then
225
-
TzError "Invalid timezone offset"
226
+
TzError "The literal did not satisfy the datetime with timezone format"
226
227
else
227
228
let hour_s = Str.matched_group 2 s in
228
229
let minute_s = Str.matched_group 3 s in
229
230
match (parse_int hour_s, parse_int minute_s) with
230
231
| None, _ | _, None -> TzError "Invalid timezone"
231
232
| Some hour, Some minute ->
232
-
if hour > 23 || minute > 59 then TzError "Timezone offset out of range"
233
+
if hour > 23 then TzError "Hours out of range in time zone designator"
234
+
else if minute > 59 then TzError "Minutes out of range in time zone designator"
233
235
else begin
234
236
(* Check for unusual but valid offsets *)
235
237
let unusual_range =
···
267
269
let time_and_tz = String.sub s (pos + 1) (String.length s - pos - 1) in
268
270
(* Validate date *)
269
271
match validate_date date_part with
270
-
| (false, reason) ->
271
-
DtError (match reason with Some r -> r | None -> "Invalid date")
272
+
| (false, _) ->
273
+
DtError "The literal did not satisfy the datetime with timezone format"
272
274
| (true, _) ->
273
275
let date_old = has_old_year date_part in
274
276
(* Check if ends with Z *)
275
277
if String.length time_and_tz > 0 && time_and_tz.[String.length time_and_tz - 1] = 'Z' then begin
276
278
let time_part = String.sub time_and_tz 0 (String.length time_and_tz - 1) in
277
279
match validate_time time_part with
278
-
| (false, _) -> DtError "The literal did not satisfy the datetime with timezone format"
280
+
| (false, Some reason) -> DtError reason
281
+
| (false, None) -> DtError "The literal did not satisfy the datetime with timezone format"
279
282
| (true, _) ->
280
283
if date_old then DtWarning "Year may be mistyped"
281
284
else DtOk
···
296
299
let time_part = String.sub time_and_tz 0 tp in
297
300
let tz_part = String.sub time_and_tz tp (String.length time_and_tz - tp) in
298
301
match validate_time time_part with
299
-
| (false, _) -> DtError "The literal did not satisfy the datetime with timezone format"
302
+
| (false, Some reason) -> DtError reason
303
+
| (false, None) -> DtError "The literal did not satisfy the datetime with timezone format"
300
304
| (true, _) ->
301
305
match validate_timezone_offset tz_part with
302
-
| TzError _ -> DtError "The literal did not satisfy the datetime with timezone format"
306
+
| TzError e -> DtError e
303
307
| TzWarning w ->
304
308
DtWarning w
305
309
| TzOk ->
···
400
404
| Some e -> Printf.sprintf "Bad date: %s." e
401
405
| None -> "Bad date: The literal did not satisfy the date format."
402
406
in
403
-
Error (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: %s %s"
404
-
value attr_name element_name tz_msg date_msg)
407
+
(* Order depends on error type. The Nu validator has specific patterns:
408
+
- Time hour/minute errors (not timezone) -> datetime first
409
+
- Timezone hours error -> datetime first
410
+
- Timezone minutes error -> date first
411
+
- Time fraction error -> date first
412
+
- Date "less than" error -> date first
413
+
- Date "greater than" error -> datetime first
414
+
- Generic errors both sides -> datetime first *)
415
+
let is_generic_tz = tz_error = "The literal did not satisfy the datetime with timezone format" in
416
+
let is_tz_hours_error = String.length tz_error >= 5 && String.sub tz_error 0 5 = "Hours" in
417
+
let is_tz_minutes_error = String.length tz_error >= 7 && String.sub tz_error 0 7 = "Minutes" in
418
+
let is_time_minute_or_hour_error =
419
+
(try ignore (Str.search_forward (Str.regexp "Minute cannot\\|Hour cannot") tz_error 0); true with Not_found -> false)
420
+
in
421
+
let is_fraction_error = try ignore (Str.search_forward (Str.regexp "fraction") tz_error 0); true with Not_found -> false in
422
+
let is_month_less_than_error = match date_error with
423
+
| Some e -> (try ignore (Str.search_forward (Str.regexp "Month cannot be less than") e 0); true with Not_found -> false)
424
+
| None -> false
425
+
in
426
+
(* Datetime first for: generic tz, tz hours error, time minute/hour errors, year errors
427
+
Date first for: "Month cannot be less than" date error, tz minutes error, fraction error *)
428
+
if is_month_less_than_error then
429
+
Error (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: %s %s"
430
+
value attr_name element_name date_msg tz_msg)
431
+
else if is_tz_minutes_error || is_fraction_error then
432
+
Error (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: %s %s"
433
+
value attr_name element_name date_msg tz_msg)
434
+
else if is_tz_hours_error || is_time_minute_or_hour_error || is_generic_tz then
435
+
Error (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: %s %s"
436
+
value attr_name element_name tz_msg date_msg)
437
+
else
438
+
Error (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: %s %s"
439
+
value attr_name element_name tz_msg date_msg)
405
440
end
406
441
407
442
(** Checker state *)
+58
-27
lib/html5_checker/specialized/dl_checker.ml
+58
-27
lib/html5_checker/specialized/dl_checker.ml
···
8
8
mutable contains_div : bool;
9
9
mutable contains_dt_dd : bool;
10
10
mutable dd_before_dt_error_reported : bool; (* Track if we've reported dd-before-dt error *)
11
+
mutable has_template : bool; (* Track if template element was seen inside dl *)
11
12
}
12
13
13
14
type div_context = {
···
72
73
else begin
73
74
match name_lower with
74
75
| "template" ->
75
-
state.in_template <- state.in_template + 1
76
+
state.in_template <- state.in_template + 1;
77
+
(* Track if template is direct child of dl *)
78
+
begin match current_dl state with
79
+
| Some dl_ctx when state.div_in_dl_stack = [] ->
80
+
dl_ctx.has_template <- true
81
+
| _ -> ()
82
+
end
76
83
77
84
| "dl" when state.in_template = 0 ->
78
-
(* Check for nested dl - only error if direct child (not inside dt/dd) *)
79
-
begin match current_dl state with
80
-
| Some _ when state.in_dt_dd = 0 && state.div_in_dl_stack = [] ->
85
+
(* Check for nested dl - error if direct child of dl OR inside div-in-dl *)
86
+
begin match current_div state with
87
+
| Some _ ->
88
+
(* dl inside div-in-dl is not allowed *)
81
89
Message_collector.add_error collector
82
-
~message:"Element \xe2\x80\x9cdl\xe2\x80\x9d not allowed as child of element \xe2\x80\x9cdl\xe2\x80\x9d in this context. (Suppressing further errors from this subtree.)"
90
+
~message:"Element \xe2\x80\x9cdl\xe2\x80\x9d not allowed as child of element \xe2\x80\x9cdiv\xe2\x80\x9d in this context. (Suppressing further errors from this subtree.)"
83
91
~code:"disallowed-child"
84
92
~element:"dl" ()
85
-
| _ -> ()
93
+
| None ->
94
+
match current_dl state with
95
+
| Some _ when state.in_dt_dd = 0 ->
96
+
Message_collector.add_error collector
97
+
~message:"Element \xe2\x80\x9cdl\xe2\x80\x9d not allowed as child of element \xe2\x80\x9cdl\xe2\x80\x9d in this context. (Suppressing further errors from this subtree.)"
98
+
~code:"disallowed-child"
99
+
~element:"dl" ()
100
+
| _ -> ()
86
101
end;
87
102
let ctx = {
88
103
has_dt = false;
···
91
106
contains_div = false;
92
107
contains_dt_dd = false;
93
108
dd_before_dt_error_reported = false;
109
+
has_template = false;
94
110
} in
95
111
state.dl_stack <- ctx :: state.dl_stack
96
112
···
131
147
state.in_dt_dd <- state.in_dt_dd + 1;
132
148
begin match current_div state with
133
149
| Some div_ctx ->
134
-
div_ctx.has_dt <- true;
135
-
(* If we've seen dd, this dt starts a new group *)
150
+
(* If we've already seen dd, this dt starts a new group - which is not allowed *)
136
151
if div_ctx.in_dd_part then begin
152
+
Message_collector.add_error collector
153
+
~message:"Element \xe2\x80\x9cdt\xe2\x80\x9d not allowed as child of element \xe2\x80\x9cdiv\xe2\x80\x9d in this context. (Suppressing further errors from this subtree.)"
154
+
~code:"disallowed-child"
155
+
~element:"dt" ();
137
156
div_ctx.group_count <- div_ctx.group_count + 1;
138
157
div_ctx.in_dd_part <- false
139
-
end
158
+
end;
159
+
div_ctx.has_dt <- true
140
160
| None ->
141
161
match current_dl state with
142
162
| Some dl_ctx ->
···
236
256
~message:"Element \xe2\x80\x9cdl\xe2\x80\x9d is missing a required child element."
237
257
~code:"missing-required-child"
238
258
~element:"dl" ()
239
-
else if not ctx.has_dd then
240
-
Message_collector.add_error collector
241
-
~message:"Element \xe2\x80\x9cdl\xe2\x80\x9d is missing required child element \xe2\x80\x9cdd\xe2\x80\x9d."
242
-
~code:"missing-required-child"
243
-
~element:"dl" ()
259
+
else if not ctx.has_dd then begin
260
+
(* If template is present in dl, use list format; otherwise use simple format *)
261
+
if ctx.has_template then
262
+
Message_collector.add_error collector
263
+
~message:"Element \xe2\x80\x9cdl\xe2\x80\x9d is missing one or more of the following child elements: [dd]."
264
+
~code:"missing-required-child"
265
+
~element:"dl" ()
266
+
else
267
+
Message_collector.add_error collector
268
+
~message:"Element \xe2\x80\x9cdl\xe2\x80\x9d is missing required child element \xe2\x80\x9cdd\xe2\x80\x9d."
269
+
~code:"missing-required-child"
270
+
~element:"dl" ()
271
+
end
244
272
else if ctx.last_was_dt then
245
-
(* Ended with dt, missing dd *)
273
+
(* Ended with dt, missing dd for the last group *)
246
274
Message_collector.add_error collector
247
275
~message:"Element \xe2\x80\x9cdl\xe2\x80\x9d is missing required child element \xe2\x80\x9cdd\xe2\x80\x9d."
248
276
~code:"missing-required-child"
···
274
302
~message:"Element \xe2\x80\x9cdiv\xe2\x80\x9d is missing required child element \xe2\x80\x9cdd\xe2\x80\x9d."
275
303
~code:"missing-required-child"
276
304
~element:"div" ()
277
-
else if div_ctx.group_count > 1 then
278
-
(* Multiple name-value groups in a single div is not allowed *)
279
-
Message_collector.add_error collector
280
-
~message:"A child \xe2\x80\x9cdiv\xe2\x80\x9d element of a \xe2\x80\x9cdl\xe2\x80\x9d element must contain only one name-value group."
281
-
~code:"multiple-groups-in-div"
282
-
~element:"div" ()
305
+
(* Multiple groups error is now reported inline when dt appears after dd *)
283
306
| [] -> ()
284
307
end
285
308
···
292
315
else begin
293
316
let trimmed = String.trim text in
294
317
if trimmed <> "" then begin
295
-
(* Check for text directly in dl *)
296
-
match current_dl state with
297
-
| Some _ when state.div_in_dl_stack = [] ->
318
+
(* Check for text directly in dl or div-in-dl *)
319
+
match current_div state with
320
+
| Some _ ->
321
+
(* Text in div within dl is not allowed *)
298
322
Message_collector.add_error collector
299
-
~message:"Text not allowed in element \xe2\x80\x9cdl\xe2\x80\x9d in this context."
323
+
~message:"Text not allowed in element \xe2\x80\x9cdiv\xe2\x80\x9d in this context."
300
324
~code:"text-not-allowed"
301
-
~element:"dl" ()
302
-
| _ -> ()
325
+
~element:"div" ()
326
+
| None ->
327
+
match current_dl state with
328
+
| Some _ ->
329
+
Message_collector.add_error collector
330
+
~message:"Text not allowed in element \xe2\x80\x9cdl\xe2\x80\x9d in this context."
331
+
~code:"text-not-allowed"
332
+
~element:"dl" ()
333
+
| None -> ()
303
334
end
304
335
end
305
336
+13
-5
lib/html5_checker/specialized/microdata_checker.ml
+13
-5
lib/html5_checker/specialized/microdata_checker.ml
···
68
68
String.contains s ':'
69
69
70
70
(** Validate that a URL is a valid absolute URL for itemtype/itemid.
71
-
Uses the comprehensive URL validation from Url_checker. *)
72
-
let validate_microdata_url url element attr_name =
71
+
Uses the comprehensive URL validation from Url_checker.
72
+
original_value is the full attribute value (for error messages when split by whitespace) *)
73
+
let validate_microdata_url url element attr_name original_value =
73
74
let url_trimmed = String.trim url in
74
75
if String.length url_trimmed = 0 then
75
76
Some (Printf.sprintf
76
77
"Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad absolute URL: Must be non-empty."
77
-
url attr_name element)
78
+
original_value attr_name element)
78
79
else
79
80
(* First check if it has a scheme (required for absolute URL) *)
80
81
match Url_checker.extract_scheme url_trimmed with
81
82
| None ->
82
83
Some (Printf.sprintf
83
84
"Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad absolute URL: The string \xe2\x80\x9c%s\xe2\x80\x9d is not an absolute URL."
84
-
url attr_name element url)
85
+
original_value attr_name element url)
85
86
| Some _ ->
86
87
(* Has a scheme - do comprehensive URL validation *)
87
88
match Url_checker.validate_url url element attr_name with
···
89
90
| Some error_msg ->
90
91
(* Replace "Bad URL:" with "Bad absolute URL:" for microdata *)
91
92
let error_msg = Str.global_replace (Str.regexp "Bad URL:") "Bad absolute URL:" error_msg in
93
+
(* Also replace the URL value with the original value in case they differ *)
94
+
(* Escape backslashes in replacement string for Str.global_replace *)
95
+
let escaped_original = Str.global_replace (Str.regexp "\\\\") "\\\\\\\\" original_value in
96
+
let error_msg = Str.global_replace
97
+
(Str.regexp_string (Printf.sprintf "\xe2\x80\x9c%s\xe2\x80\x9d for attribute" url))
98
+
(Printf.sprintf "\xe2\x80\x9c%s\xe2\x80\x9d for attribute" escaped_original)
99
+
error_msg in
92
100
Some error_msg
93
101
94
102
(** Check if itemprop value is valid. *)
···
201
209
()
202
210
else
203
211
List.iter (fun url ->
204
-
match validate_microdata_url url element "itemtype" with
212
+
match validate_microdata_url url element "itemtype" itemtype with
205
213
| None -> ()
206
214
| Some error_msg ->
207
215
Message_collector.add_error collector
+60
-13
lib/html5_checker/specialized/picture_checker.ml
+60
-13
lib/html5_checker/specialized/picture_checker.ml
···
34
34
mutable has_source_after_img : bool;
35
35
mutable has_always_matching_source : bool; (* source without media/type *)
36
36
mutable source_after_always_matching : bool; (* source after always-matching source *)
37
+
mutable always_matching_is_media_all : bool; (* true if caused by media="all" *)
38
+
mutable always_matching_is_media_empty : bool; (* true if caused by media="" or whitespace *)
37
39
mutable parent_stack : string list; (* track parent elements *)
38
40
}
39
41
···
46
48
has_source_after_img = false;
47
49
has_always_matching_source = false;
48
50
source_after_always_matching = false;
51
+
always_matching_is_media_all = false;
52
+
always_matching_is_media_empty = false;
49
53
parent_stack = [];
50
54
}
51
55
···
58
62
state.parent_stack <- [];
59
63
state.has_source_after_img <- false;
60
64
state.has_always_matching_source <- false;
61
-
state.source_after_always_matching <- false
65
+
state.source_after_always_matching <- false;
66
+
state.always_matching_is_media_all <- false;
67
+
state.always_matching_is_media_empty <- false
62
68
63
69
(** Check if an attribute list contains a specific attribute. *)
64
70
let has_attr name attrs =
···
151
157
if String.lowercase_ascii attr_name = "media" then Some v else None
152
158
) attrs in
153
159
let has_type = has_attr "type" attrs in
160
+
let is_media_all = match media_value with
161
+
| Some v -> String.lowercase_ascii (String.trim v) = "all"
162
+
| None -> false
163
+
in
164
+
let is_media_empty = match media_value with
165
+
| Some v -> String.trim v = ""
166
+
| None -> false
167
+
in
154
168
let is_always_matching = match media_value with
155
169
| None -> not has_type (* no media, check if no type either *)
156
170
| Some v ->
157
171
let trimmed = String.trim v in
158
172
trimmed = "" || String.lowercase_ascii trimmed = "all"
159
173
in
160
-
if is_always_matching then
161
-
state.has_always_matching_source <- true
174
+
if is_always_matching then begin
175
+
state.has_always_matching_source <- true;
176
+
if is_media_all then
177
+
state.always_matching_is_media_all <- true
178
+
else if is_media_empty then
179
+
state.always_matching_is_media_empty <- true
180
+
end
162
181
163
182
| "img" when state.in_picture && state.picture_depth = 1 ->
164
183
check_img_attrs attrs collector;
···
170
189
if img_count > 1 then
171
190
report_disallowed_child "picture" "img" collector;
172
191
(* Check if always-matching source is followed by img with srcset *)
173
-
if state.has_always_matching_source && has_attr "srcset" attrs then
174
-
Message_collector.add_error collector
175
-
~message:"A \xe2\x80\x9csource\xe2\x80\x9d element that has a following sibling \xe2\x80\x9csource\xe2\x80\x9d element or \xe2\x80\x9cimg\xe2\x80\x9d element with a \xe2\x80\x9csrcset\xe2\x80\x9d attribute must have a \xe2\x80\x9cmedia\xe2\x80\x9d attribute and/or \xe2\x80\x9ctype\xe2\x80\x9d attribute."
176
-
~code:"always-matching-source-followed-by-srcset"
177
-
~element:"source" ()
192
+
if state.has_always_matching_source && has_attr "srcset" attrs then begin
193
+
if state.always_matching_is_media_all then
194
+
Message_collector.add_error collector
195
+
~message:"Value of \xe2\x80\x9cmedia\xe2\x80\x9d attribute here must not be \xe2\x80\x9call\xe2\x80\x9d."
196
+
~code:"media-all-not-allowed"
197
+
~element:"source"
198
+
~attribute:"media" ()
199
+
else if state.always_matching_is_media_empty then
200
+
Message_collector.add_error collector
201
+
~message:"Value of \xe2\x80\x9cmedia\xe2\x80\x9d attribute here must not be empty."
202
+
~code:"media-empty-not-allowed"
203
+
~element:"source"
204
+
~attribute:"media" ()
205
+
else
206
+
Message_collector.add_error collector
207
+
~message:"A \xe2\x80\x9csource\xe2\x80\x9d element that has a following sibling \xe2\x80\x9csource\xe2\x80\x9d element or \xe2\x80\x9cimg\xe2\x80\x9d element with a \xe2\x80\x9csrcset\xe2\x80\x9d attribute must have a \xe2\x80\x9cmedia\xe2\x80\x9d attribute and/or \xe2\x80\x9ctype\xe2\x80\x9d attribute."
208
+
~code:"always-matching-source-followed-by-srcset"
209
+
~element:"source" ()
210
+
end
178
211
179
212
| "script" when state.in_picture && state.picture_depth = 1 ->
180
213
state.children_in_picture <- "script" :: state.children_in_picture
···
216
249
if state.has_source_after_img then
217
250
report_disallowed_child "picture" "source" collector;
218
251
(* Check for source after always-matching source *)
219
-
if state.source_after_always_matching then
220
-
Message_collector.add_error collector
221
-
~message:"A \xe2\x80\x9csource\xe2\x80\x9d element that matches all media types cannot be followed by another \xe2\x80\x9csource\xe2\x80\x9d element."
222
-
~code:"always-matching-source"
223
-
~element:"source" ();
252
+
if state.source_after_always_matching then begin
253
+
if state.always_matching_is_media_all then
254
+
Message_collector.add_error collector
255
+
~message:"Value of \xe2\x80\x9cmedia\xe2\x80\x9d attribute here must not be \xe2\x80\x9call\xe2\x80\x9d."
256
+
~code:"media-all-not-allowed"
257
+
~element:"source"
258
+
~attribute:"media" ()
259
+
else if state.always_matching_is_media_empty then
260
+
Message_collector.add_error collector
261
+
~message:"Value of \xe2\x80\x9cmedia\xe2\x80\x9d attribute here must not be empty."
262
+
~code:"media-empty-not-allowed"
263
+
~element:"source"
264
+
~attribute:"media" ()
265
+
else
266
+
Message_collector.add_error collector
267
+
~message:"A \xe2\x80\x9csource\xe2\x80\x9d element that has a following sibling \xe2\x80\x9csource\xe2\x80\x9d element or \xe2\x80\x9cimg\xe2\x80\x9d element with a \xe2\x80\x9csrcset\xe2\x80\x9d attribute must have a \xe2\x80\x9cmedia\xe2\x80\x9d attribute and/or \xe2\x80\x9ctype\xe2\x80\x9d attribute."
268
+
~code:"always-matching-source"
269
+
~element:"source" ()
270
+
end;
224
271
225
272
state.in_picture <- false
226
273
end;
+508
-136
lib/html5_checker/specialized/srcset_sizes_checker.ml
+508
-136
lib/html5_checker/specialized/srcset_sizes_checker.ml
···
54
54
Buffer.contents buf
55
55
56
56
(** Check if a size value has a valid CSS length unit and non-negative value *)
57
-
type size_check_result = Valid | InvalidUnit | NegativeValue | CssCommentInside | BadScientificNotation
57
+
type size_check_result =
58
+
| Valid
59
+
| InvalidUnit of string * string (* (found_unit, context) *)
60
+
| NegativeValue
61
+
| CssCommentAfterSign of string * string (* what was found, context *)
62
+
| CssCommentBeforeUnit of string * string (* what was found, context *)
63
+
| BadScientificNotation
64
+
| BadCssNumber of char * string (* (first_char, context) - not starting with digit or minus *)
65
+
66
+
(** CSS comment error types *)
67
+
type css_comment_error =
68
+
| NoCommentError
69
+
| CommentAfterSign of string * string (* what was found, context *)
70
+
| CommentBetweenNumberAndUnit of string * string (* what was found at comment position, context *)
58
71
59
72
(** Check if CSS comment appears in an invalid position:
60
73
- Between sign and number (+/**/50vw)
61
74
- Between number and unit (50/**/vw)
62
75
Trailing comments (50vw/**/) are valid. *)
63
-
let has_invalid_css_comment s =
76
+
let check_css_comment_position s =
64
77
let len = String.length s in
65
78
(* Find comment position *)
66
79
let rec find_comment i =
···
69
82
else find_comment (i + 1)
70
83
in
71
84
match find_comment 0 with
72
-
| None -> false
85
+
| None -> NoCommentError
73
86
| Some comment_pos ->
74
87
let before = String.sub s 0 comment_pos in
75
88
let trimmed_before = String.trim before in
76
-
if String.length trimmed_before = 0 then false (* Leading comment is OK *)
89
+
if String.length trimmed_before = 0 then NoCommentError (* Leading comment is OK *)
77
90
else begin
78
91
(* Find end of comment *)
79
92
let rec find_end i =
···
84
97
let end_pos = find_end (comment_pos + 2) in
85
98
let after = if end_pos < len then String.sub s end_pos (len - end_pos) else "" in
86
99
let trimmed_after = String.trim (strip_css_comments after) in
87
-
if trimmed_after = "" then false (* Trailing comment is OK *)
100
+
if trimmed_after = "" then NoCommentError (* Trailing comment is OK *)
88
101
else begin
89
102
(* Comment is in the middle - check if it breaks a number/unit combo *)
90
103
let last = trimmed_before.[String.length trimmed_before - 1] in
91
-
(* Invalid if comment appears after +/- or after a digit (before more non-whitespace) *)
92
-
(last >= '0' && last <= '9') || last = '+' || last = '-' || last = '.'
104
+
(* What's at the comment position? Just show "/" *)
105
+
let slash = "/" in
106
+
(* Invalid if comment appears after +/- *)
107
+
if last = '+' || last = '-' then
108
+
CommentAfterSign (trimmed_before ^ slash, s)
109
+
(* Invalid if comment appears after digit (before more content) *)
110
+
else if (last >= '0' && last <= '9') || last = '.' then
111
+
CommentBetweenNumberAndUnit (slash ^ trimmed_after, s)
112
+
else
113
+
NoCommentError
93
114
end
94
115
end
116
+
117
+
(** For backward compatibility *)
118
+
let has_invalid_css_comment s =
119
+
match check_css_comment_position s with
120
+
| NoCommentError -> false
121
+
| _ -> true
95
122
96
123
(** Check if scientific notation has invalid exponent (like 1e+1.5 - decimal in exponent) *)
97
124
let has_invalid_scientific_notation s =
···
109
136
in
110
137
String.contains after_sign '.'
111
138
139
+
(** Extract unit from a size value like "10px" -> "px", "100vw" -> "vw", "50%" -> "%"
140
+
Returns the unit with original case preserved *)
141
+
let extract_unit s =
142
+
let trimmed = String.trim s in
143
+
let len = String.length trimmed in
144
+
if len = 0 then ""
145
+
(* Check for % at the end *)
146
+
else if trimmed.[len - 1] = '%' then "%"
147
+
else begin
148
+
let lower = String.lowercase_ascii trimmed in
149
+
(* Try to find a unit at the end (letters only) *)
150
+
let rec find_unit_length i =
151
+
if i < 0 then 0
152
+
else if lower.[i] >= 'a' && lower.[i] <= 'z' then find_unit_length (i - 1)
153
+
else i + 1
154
+
in
155
+
let start = find_unit_length (len - 1) in
156
+
if start < len then
157
+
(* Return the unit from the original string (preserving case) *)
158
+
String.sub trimmed start (len - start)
159
+
else ""
160
+
end
161
+
112
162
let check_size_value size_value =
113
163
let trimmed = String.trim size_value in
114
-
if trimmed = "" then InvalidUnit
115
-
(* Check for CSS comments inside numbers - this is invalid *)
116
-
else if has_invalid_css_comment trimmed then CssCommentInside
164
+
if trimmed = "" then InvalidUnit ("", trimmed)
117
165
else begin
166
+
(* Check for CSS comments inside numbers - this is invalid *)
167
+
match check_css_comment_position trimmed with
168
+
| CommentAfterSign (found, ctx) -> CssCommentAfterSign (found, ctx)
169
+
| CommentBetweenNumberAndUnit (found, ctx) -> CssCommentBeforeUnit (found, ctx)
170
+
| NoCommentError ->
118
171
(* Strip valid leading/trailing CSS comments for further checks *)
119
172
let value_no_comments = String.trim (strip_css_comments trimmed) in
120
173
(* Check for invalid scientific notation like 1e+1.5px *)
121
174
if has_invalid_scientific_notation value_no_comments then BadScientificNotation
122
175
(* "auto" is only valid with lazy loading, which requires checking the element context.
123
176
For general validation, treat "auto" alone as invalid in sizes. *)
124
-
else if String.lowercase_ascii value_no_comments = "auto" then InvalidUnit
125
-
else if value_no_comments = "" then InvalidUnit
177
+
else if String.lowercase_ascii value_no_comments = "auto" then
178
+
BadCssNumber (value_no_comments.[0], trimmed)
179
+
else if value_no_comments = "" then InvalidUnit ("", trimmed)
126
180
else begin
127
181
let lower = String.lowercase_ascii value_no_comments in
128
-
(* Check for invalid units first *)
129
-
let has_invalid = List.exists (fun unit ->
130
-
let len = String.length unit in
131
-
String.length lower > len &&
132
-
String.sub lower (String.length lower - len) len = unit
133
-
) invalid_size_units in
134
-
if has_invalid then InvalidUnit
182
+
(* Check for calc() or other CSS functions first - these are always valid *)
183
+
if String.contains value_no_comments '(' then Valid
135
184
else begin
136
-
(* Check for valid CSS length units *)
137
-
let has_valid_unit = List.exists (fun unit ->
138
-
let len = String.length unit in
139
-
String.length lower > len &&
140
-
String.sub lower (String.length lower - len) len = unit
141
-
) valid_length_units in
142
-
if has_valid_unit then begin
143
-
(* Check if it's negative (starts with - but not -0) *)
144
-
if String.length value_no_comments > 0 && value_no_comments.[0] = '-' then begin
145
-
(* Check if it's -0 which is valid *)
146
-
let after_minus = String.sub value_no_comments 1 (String.length value_no_comments - 1) in
147
-
try
148
-
let num_str = Str.global_replace (Str.regexp "[a-zA-Z]+$") "" after_minus in
149
-
let f = float_of_string num_str in
150
-
if f = 0.0 then Valid else NegativeValue
151
-
with _ -> NegativeValue
152
-
end else
153
-
Valid
154
-
end
155
-
(* Could be calc() or other CSS functions - allow those *)
156
-
else if String.contains value_no_comments '(' then Valid
185
+
(* Check if the value starts with a digit, minus, or plus sign *)
186
+
let first_char = value_no_comments.[0] in
187
+
let starts_with_number =
188
+
(first_char >= '0' && first_char <= '9') ||
189
+
first_char = '-' ||
190
+
first_char = '+' ||
191
+
first_char = '.' (* decimal point like .5px *)
192
+
in
193
+
if not starts_with_number then
194
+
(* Not a valid CSS number token - doesn't start with digit or sign *)
195
+
BadCssNumber (first_char, trimmed)
157
196
else begin
158
-
(* Check if it's a zero value (0, -0, +0) - these are valid without units *)
159
-
let stripped =
160
-
let s = value_no_comments in
161
-
let s = if String.length s > 0 && (s.[0] = '+' || s.[0] = '-') then String.sub s 1 (String.length s - 1) else s in
162
-
s
163
-
in
164
-
(* Check if it's zero or a numeric value starting with 0 *)
165
-
try
166
-
let f = float_of_string stripped in
167
-
if f = 0.0 then Valid else InvalidUnit
168
-
with _ -> InvalidUnit
197
+
(* Check for invalid units first *)
198
+
let found_invalid = List.find_opt (fun unit ->
199
+
let len = String.length unit in
200
+
String.length lower > len &&
201
+
String.sub lower (String.length lower - len) len = unit
202
+
) invalid_size_units in
203
+
match found_invalid with
204
+
| Some _unit -> InvalidUnit (extract_unit value_no_comments, trimmed)
205
+
| None ->
206
+
(* Check for valid CSS length units *)
207
+
let has_valid_unit = List.exists (fun unit ->
208
+
let len = String.length unit in
209
+
String.length lower > len &&
210
+
String.sub lower (String.length lower - len) len = unit
211
+
) valid_length_units in
212
+
if has_valid_unit then begin
213
+
(* Check if it's negative (starts with - but not -0) *)
214
+
if String.length value_no_comments > 0 && value_no_comments.[0] = '-' then begin
215
+
(* Check if it's -0 which is valid *)
216
+
let after_minus = String.sub value_no_comments 1 (String.length value_no_comments - 1) in
217
+
try
218
+
let num_str = Str.global_replace (Str.regexp "[a-zA-Z]+$") "" after_minus in
219
+
let f = float_of_string num_str in
220
+
if f = 0.0 then Valid else NegativeValue
221
+
with _ -> NegativeValue
222
+
end else
223
+
Valid
224
+
end
225
+
else begin
226
+
(* Check if it's a zero value (0, -0, +0) - these are valid without units *)
227
+
let stripped =
228
+
let s = value_no_comments in
229
+
let s = if String.length s > 0 && (s.[0] = '+' || s.[0] = '-') then String.sub s 1 (String.length s - 1) else s in
230
+
s
231
+
in
232
+
(* Check if it's zero or a numeric value starting with 0 *)
233
+
try
234
+
let f = float_of_string stripped in
235
+
if f = 0.0 then Valid else InvalidUnit (extract_unit value_no_comments, trimmed)
236
+
with _ -> InvalidUnit (extract_unit value_no_comments, trimmed)
237
+
end
169
238
end
170
239
end
171
240
end
···
174
243
let has_valid_size_unit size_value =
175
244
match check_size_value size_value with
176
245
| Valid -> true
177
-
| InvalidUnit | NegativeValue | CssCommentInside | BadScientificNotation -> false
246
+
| InvalidUnit (_, _) | NegativeValue | CssCommentAfterSign (_, _) | CssCommentBeforeUnit (_, _) | BadScientificNotation | BadCssNumber (_, _) -> false
178
247
179
248
(** Check if a sizes entry has a media condition (starts with '(') *)
180
249
let has_media_condition entry =
···
236
305
if not (has_media_condition trimmed) then
237
306
trimmed
238
307
else begin
239
-
(* Find matching closing paren, then get the size value after it *)
308
+
(* Media conditions can have "and", "or", "not" operators connecting
309
+
multiple parenthesized groups, e.g., "(not (width:500px)) and (width:500px) 500px"
310
+
We need to skip all media condition parts to find the size value *)
240
311
let len = String.length trimmed in
241
-
let rec find_close_paren i depth =
312
+
let rec skip_media_condition i =
242
313
if i >= len then len
243
-
else match trimmed.[i] with
244
-
| '(' -> find_close_paren (i + 1) (depth + 1)
245
-
| ')' -> if depth = 1 then i + 1 else find_close_paren (i + 1) (depth - 1)
246
-
| _ -> find_close_paren (i + 1) depth
314
+
else begin
315
+
let remaining = String.trim (String.sub trimmed i (len - i)) in
316
+
let remaining_len = String.length remaining in
317
+
if remaining_len = 0 then len
318
+
else begin
319
+
let first_char = remaining.[0] in
320
+
if first_char = '(' then begin
321
+
(* Skip this parenthesized group *)
322
+
let rec find_close_paren j depth =
323
+
if j >= remaining_len then remaining_len
324
+
else match remaining.[j] with
325
+
| '(' -> find_close_paren (j + 1) (depth + 1)
326
+
| ')' -> if depth = 1 then j + 1 else find_close_paren (j + 1) (depth - 1)
327
+
| _ -> find_close_paren (j + 1) depth
328
+
in
329
+
let after_paren = find_close_paren 0 0 in
330
+
let new_pos = i + (len - i) - remaining_len + after_paren in
331
+
skip_media_condition new_pos
332
+
end
333
+
else begin
334
+
(* Check if remaining starts with "and", "or", "not" followed by space or paren *)
335
+
let lower_remaining = String.lowercase_ascii remaining in
336
+
if remaining_len >= 4 && String.sub lower_remaining 0 4 = "and " then
337
+
skip_media_condition (i + (len - i) - remaining_len + 4)
338
+
else if remaining_len >= 3 && String.sub lower_remaining 0 3 = "or " then
339
+
skip_media_condition (i + (len - i) - remaining_len + 3)
340
+
else if remaining_len >= 4 && String.sub lower_remaining 0 4 = "not " then
341
+
skip_media_condition (i + (len - i) - remaining_len + 4)
342
+
else if remaining_len >= 4 && String.sub lower_remaining 0 4 = "and(" then
343
+
skip_media_condition (i + (len - i) - remaining_len + 3)
344
+
else if remaining_len >= 3 && String.sub lower_remaining 0 3 = "or(" then
345
+
skip_media_condition (i + (len - i) - remaining_len + 2)
346
+
else if remaining_len >= 4 && String.sub lower_remaining 0 4 = "not(" then
347
+
skip_media_condition (i + (len - i) - remaining_len + 3)
348
+
else
349
+
(* Found something that's not a media condition part - this is the size value *)
350
+
i + (len - i) - remaining_len
351
+
end
352
+
end
353
+
end
247
354
in
248
-
let after_paren = find_close_paren 0 0 in
249
-
if after_paren >= len then ""
250
-
else String.trim (String.sub trimmed after_paren (len - after_paren))
355
+
let size_start = skip_media_condition 0 in
356
+
if size_start >= len then ""
357
+
else String.trim (String.sub trimmed size_start (len - size_start))
251
358
end
252
359
253
360
(** Validate sizes attribute value *)
···
275
382
(* Check for trailing comma *)
276
383
let last_entry = String.trim (List.nth entries (List.length entries - 1)) in
277
384
if List.length entries > 1 && last_entry = "" then begin
385
+
(* Generate abbreviated context - show last ~25 chars with ellipsis if needed *)
386
+
let context =
387
+
if String.length value > 25 then
388
+
"\xe2\x80\xa6" ^ String.sub value (String.length value - 25) 25
389
+
else value
390
+
in
278
391
Message_collector.add_error collector
279
-
~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad source size list: Ends with trailing comma." value element_name)
392
+
~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad source size list: Expected media condition before \xe2\x80\x9c\xe2\x80\x9d at \xe2\x80\x9c%s\xe2\x80\x9d." value element_name context)
280
393
~code:"bad-sizes-value"
281
394
~element:element_name ~attribute:"sizes" ();
282
395
false
···
285
398
286
399
(* Check for default-first pattern: unconditional value before conditional ones *)
287
400
let non_empty_entries = List.filter (fun e -> String.trim e <> "") entries in
288
-
if List.length non_empty_entries > 1 then begin
289
-
let first = List.hd non_empty_entries in
290
-
let rest = List.tl non_empty_entries in
401
+
(* Filter out entries that have invalid media conditions - they'll be reported separately *)
402
+
let valid_entries = List.filter (fun e ->
403
+
has_invalid_media_condition (String.trim e) = None
404
+
) non_empty_entries in
405
+
if List.length valid_entries > 1 then begin
406
+
let first = List.hd valid_entries in
407
+
let rest = List.tl valid_entries in
291
408
(* If first entry has no media condition but later ones do, that's invalid *)
292
409
if not (has_media_condition first) && List.exists has_media_condition rest then begin
410
+
(* Context is the first entry with a comma *)
411
+
let context = (String.trim first) ^ "," in
293
412
Message_collector.add_error collector
294
-
~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad source size list: Default size must be last." value element_name)
413
+
~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad source size list: Expected media condition before \xe2\x80\x9c\xe2\x80\x9d at \xe2\x80\x9c%s\xe2\x80\x9d." value element_name context)
295
414
~code:"bad-sizes-value"
296
415
~element:element_name ~attribute:"sizes" ();
297
416
valid := false
298
417
end;
299
-
(* Check for multiple consecutive defaults (entries without media conditions) *)
300
-
let defaults_without_media = List.filter (fun e -> not (has_media_condition e)) non_empty_entries in
301
-
if List.length defaults_without_media > 1 then begin
302
-
Message_collector.add_error collector
303
-
~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad source size list: Multiple source sizes without media conditions." value element_name)
304
-
~code:"bad-sizes-value"
305
-
~element:element_name ~attribute:"sizes" ();
306
-
valid := false
418
+
(* Check for multiple entries without media conditions.
419
+
When the first entry has no media condition, report "Expected media condition"
420
+
regardless of whether later entries have media conditions or not *)
421
+
if not (has_media_condition first) && !valid then begin
422
+
(* Only report if we haven't already reported the default-first error *)
423
+
if not (List.exists has_media_condition rest) then begin
424
+
(* Multiple defaults - report as "Expected media condition" *)
425
+
let context = (String.trim first) ^ "," in
426
+
Message_collector.add_error collector
427
+
~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad source size list: Expected media condition before \xe2\x80\x9c\xe2\x80\x9d at \xe2\x80\x9c%s\xe2\x80\x9d." value element_name context)
428
+
~code:"bad-sizes-value"
429
+
~element:element_name ~attribute:"sizes" ();
430
+
valid := false
431
+
end
307
432
end
308
433
end;
309
434
310
435
(* Validate each entry's media condition and size value *)
311
-
List.iter (fun entry ->
436
+
let num_entries = List.length entries in
437
+
List.iteri (fun idx entry ->
312
438
let trimmed = String.trim entry in
313
439
if trimmed <> "" then begin
314
440
(* Check for invalid media condition *)
315
441
(match has_invalid_media_condition trimmed with
316
442
| Some err_msg ->
443
+
(* Generate context: "entry," with ellipsis if needed *)
444
+
let context = (String.trim entry) ^ "," in
445
+
let context =
446
+
if String.length context > 25 then
447
+
"\xe2\x80\xa6" ^ String.sub context (String.length context - 25) 25
448
+
else context
449
+
in
317
450
Message_collector.add_error collector
318
-
~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad source size list: %s." value element_name err_msg)
451
+
~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad source size list: %s at \xe2\x80\x9c%s\xe2\x80\x9d." value element_name err_msg context)
319
452
~code:"bad-sizes-value"
320
453
~element:element_name ~attribute:"sizes" ();
321
454
valid := false
···
323
456
324
457
let size_val = extract_size_value trimmed in
325
458
if size_val <> "" then begin
326
-
match check_size_value size_val with
459
+
(* Check if there are multiple space-separated words in the size value.
460
+
Only the first word should be the size, rest is junk. *)
461
+
let size_parts = String.split_on_char ' ' size_val |> List.filter (fun s -> s <> "") in
462
+
let first_size = match size_parts with [] -> size_val | hd :: _ -> hd in
463
+
let extra_parts = match size_parts with [] -> [] | _ :: tl -> tl in
464
+
465
+
(* Check if first word looks like it should have been a media condition
466
+
(doesn't start with digit, sign, decimal, '/', or look like a CSS function) *)
467
+
let first_char = if String.length first_size > 0 then first_size.[0] else 'x' in
468
+
let has_paren = String.contains size_val '(' in (* calc(), etc. *)
469
+
let looks_like_junk_entry =
470
+
not (has_media_condition trimmed) &&
471
+
not has_paren && (* Allow CSS functions like calc() *)
472
+
not (first_char = '/') && (* Allow leading CSS comments *)
473
+
not ((first_char >= '0' && first_char <= '9') ||
474
+
first_char = '+' || first_char = '-' || first_char = '.')
475
+
in
476
+
477
+
(* If this entry looks like junk and there are multiple entries,
478
+
report "Expected media condition" instead of "Bad CSS number".
479
+
For single entries with invalid values, fall through to BadCssNumber. *)
480
+
if looks_like_junk_entry && num_entries > 1 then begin
481
+
(* Find the context ending with the previous entry *)
482
+
let prev_entries = List.filter (fun e -> String.trim e <> "" && e <> entry) entries in
483
+
let context =
484
+
if List.length prev_entries > 0 then
485
+
let prev_value = String.concat ", " (List.map String.trim prev_entries) ^ "," in
486
+
if String.length prev_value > 25 then
487
+
"\xe2\x80\xa6" ^ String.sub prev_value (String.length prev_value - 25) 25
488
+
else prev_value
489
+
else value
490
+
in
491
+
Message_collector.add_error collector
492
+
~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad source size list: Expected media condition before \xe2\x80\x9c\xe2\x80\x9d at \xe2\x80\x9c%s\xe2\x80\x9d." value element_name context)
493
+
~code:"bad-sizes-value"
494
+
~element:element_name ~attribute:"sizes" ();
495
+
valid := false
496
+
end
497
+
(* If there's extra junk after the size, report BadCssNumber error for it *)
498
+
else if extra_parts <> [] then begin
499
+
let junk = String.concat " " extra_parts in
500
+
let last_junk = List.nth extra_parts (List.length extra_parts - 1) in
501
+
let first_char = if String.length last_junk > 0 then last_junk.[0] else 'x' in
502
+
(* Context depends on whether this is the last entry:
503
+
- For non-last entries: entry with trailing comma, truncated from beginning
504
+
- For last entry: full value truncated from beginning (no trailing comma) *)
505
+
let is_last_entry = idx = num_entries - 1 in
506
+
let context =
507
+
if is_last_entry then begin
508
+
(* Last entry: use full value truncated *)
509
+
if String.length value > 25 then
510
+
"\xe2\x80\xa6" ^ String.sub value (String.length value - 25) 25
511
+
else value
512
+
end else begin
513
+
(* Non-last entry: use entry with comma, truncated *)
514
+
let entry_with_comma = trimmed ^ "," in
515
+
if String.length entry_with_comma > 25 then
516
+
"\xe2\x80\xa6" ^ String.sub entry_with_comma (String.length entry_with_comma - 25) 25
517
+
else entry_with_comma
518
+
end
519
+
in
520
+
let _ = junk in
521
+
Message_collector.add_error collector
522
+
~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad source size list: Bad CSS number token: Expected a minus sign or a digit but saw \xe2\x80\x9c%c\xe2\x80\x9d instead at \xe2\x80\x9c%s\xe2\x80\x9d." value element_name first_char context)
523
+
~code:"bad-sizes-value"
524
+
~element:element_name ~attribute:"sizes" ();
525
+
valid := false
526
+
end
527
+
else
528
+
match check_size_value first_size with
327
529
| Valid -> ()
328
530
| NegativeValue ->
531
+
let full_context =
532
+
if List.length entries > 1 then size_val ^ ","
533
+
else size_val
534
+
in
535
+
let _ = full_context in
329
536
Message_collector.add_error collector
330
-
~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Source size value cannot be negative." value element_name)
537
+
~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad source size list: Expected positive size value but found \xe2\x80\x9c%s\xe2\x80\x9d at \xe2\x80\x9c%s\xe2\x80\x9d." value element_name size_val size_val)
538
+
~code:"bad-sizes-value"
539
+
~element:element_name ~attribute:"sizes" ();
540
+
valid := false
541
+
| CssCommentAfterSign (found, context) ->
542
+
(* e.g., +/**/50vw - expected number after sign *)
543
+
Message_collector.add_error collector
544
+
~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad source size list: Expected number but found \xe2\x80\x9c%s\xe2\x80\x9d at \xe2\x80\x9c%s\xe2\x80\x9d." value element_name found context)
331
545
~code:"bad-sizes-value"
332
546
~element:element_name ~attribute:"sizes" ();
333
547
valid := false
334
-
| CssCommentInside ->
548
+
| CssCommentBeforeUnit (found, context) ->
549
+
(* e.g., 50/**/vw - expected units after number *)
550
+
let units_list = List.map (fun u -> Printf.sprintf "\xe2\x80\x9c%s\xe2\x80\x9d" u) valid_length_units in
551
+
let units_str = String.concat ", " units_list in
335
552
Message_collector.add_error collector
336
-
~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad source size list: Bad CSS number token." value element_name)
553
+
~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad source size list: Expected units (one of %s) but found \xe2\x80\x9c%s\xe2\x80\x9d at \xe2\x80\x9c%s\xe2\x80\x9d." value element_name units_str found context)
337
554
~code:"bad-sizes-value"
338
555
~element:element_name ~attribute:"sizes" ();
339
556
valid := false
340
557
| BadScientificNotation ->
558
+
(* For scientific notation with bad exponent, show what char was expected vs found *)
559
+
let context =
560
+
if List.length entries > 1 then trimmed ^ ","
561
+
else trimmed
562
+
in
563
+
(* Find the period in the exponent *)
564
+
let _ = context in
341
565
Message_collector.add_error collector
342
-
~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad source size list: Bad CSS number token." value element_name)
566
+
~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad source size list: Bad CSS number token: Expected a digit but saw \xe2\x80\x9c.\xe2\x80\x9d instead at \xe2\x80\x9c%s\xe2\x80\x9d." value element_name size_val)
343
567
~code:"bad-sizes-value"
344
568
~element:element_name ~attribute:"sizes" ();
345
569
valid := false
346
-
| InvalidUnit ->
570
+
| BadCssNumber (first_char, context) ->
571
+
(* Value doesn't start with a digit or minus sign *)
572
+
let full_context =
573
+
if List.length entries > 1 then context ^ ","
574
+
else context
575
+
in
576
+
let _ = full_context in
577
+
Message_collector.add_error collector
578
+
~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad source size list: Bad CSS number token: Expected a minus sign or a digit but saw \xe2\x80\x9c%c\xe2\x80\x9d instead at \xe2\x80\x9c%s\xe2\x80\x9d." value element_name first_char context)
579
+
~code:"bad-sizes-value"
580
+
~element:element_name ~attribute:"sizes" ();
581
+
valid := false
582
+
| InvalidUnit (found_unit, _context) ->
583
+
(* Generate the full list of expected units *)
584
+
let units_list = List.map (fun u -> Printf.sprintf "\xe2\x80\x9c%s\xe2\x80\x9d" u) valid_length_units in
585
+
let units_str = String.concat ", " units_list in
586
+
(* Context should be the full entry, with comma only if there are multiple entries *)
587
+
let full_context =
588
+
if List.length entries > 1 then trimmed ^ ","
589
+
else trimmed
590
+
in
591
+
(* When found_unit is empty, say "no units" instead of quoting empty string *)
592
+
let found_str =
593
+
if found_unit = "" then "no units"
594
+
else Printf.sprintf "\xe2\x80\x9c%s\xe2\x80\x9d" found_unit
595
+
in
347
596
Message_collector.add_error collector
348
-
~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad source size value." value element_name)
597
+
~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad source size list: Expected units (one of %s) but found %s at \xe2\x80\x9c%s\xe2\x80\x9d." value element_name units_str found_str full_context)
349
598
~code:"bad-sizes-value"
350
599
~element:element_name ~attribute:"sizes" ();
351
600
valid := false
···
359
608
end
360
609
361
610
(** Validate srcset descriptor *)
362
-
let validate_srcset_descriptor desc element_name srcset_value collector =
611
+
let validate_srcset_descriptor desc element_name srcset_value has_sizes collector =
363
612
let desc_lower = String.lowercase_ascii (String.trim desc) in
364
613
if String.length desc_lower = 0 then true
365
614
else begin
···
371
620
(* Width descriptor - must be positive integer, no leading + *)
372
621
let trimmed_desc = String.trim desc in
373
622
if String.length trimmed_desc > 0 && trimmed_desc.[0] = '+' then begin
623
+
(* Show just the number part (without the 'w') *)
624
+
let num_part_for_msg = String.sub trimmed_desc 0 (String.length trimmed_desc - 1) in
374
625
Message_collector.add_error collector
375
-
~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Expected number without leading plus sign but found \xe2\x80\x9c%s\xe2\x80\x9d at \xe2\x80\x9c%s\xe2\x80\x9d." srcset_value element_name trimmed_desc srcset_value)
626
+
~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Expected number without leading plus sign but found \xe2\x80\x9c%s\xe2\x80\x9d at \xe2\x80\x9c%s\xe2\x80\x9d." srcset_value element_name num_part_for_msg srcset_value)
376
627
~code:"bad-srcset-value"
377
628
~element:element_name ~attribute:"srcset" ();
378
629
false
···
381
632
let n = int_of_string num_part in
382
633
if n <= 0 then begin
383
634
Message_collector.add_error collector
384
-
~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad srcset descriptor: Width must be positive." srcset_value element_name)
635
+
~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Expected number greater than zero but found \xe2\x80\x9c%s\xe2\x80\x9d at \xe2\x80\x9c%s\xe2\x80\x9d." srcset_value element_name num_part srcset_value)
385
636
~code:"bad-srcset-value"
386
637
~element:element_name ~attribute:"srcset" ();
387
638
false
···
390
641
let original_last = desc.[String.length desc - 1] in
391
642
if original_last = 'W' then begin
392
643
Message_collector.add_error collector
393
-
~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad srcset descriptor: Width descriptor must use lowercase \xe2\x80\x9cw\xe2\x80\x9d." srcset_value element_name)
644
+
~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Expected width descriptor but found \xe2\x80\x9c%s\xe2\x80\x9d at \xe2\x80\x9c%s\xe2\x80\x9d. (When the \xe2\x80\x9csizes\xe2\x80\x9d attribute is present, all image candidate strings must specify a width.)" srcset_value element_name desc srcset_value)
394
645
~code:"bad-srcset-value"
395
646
~element:element_name ~attribute:"srcset" ();
396
647
false
397
648
end else true
398
649
end
399
650
with _ ->
400
-
(* Check for scientific notation or decimal *)
401
-
if String.contains num_part 'e' || String.contains num_part 'E' then begin
651
+
(* Check for scientific notation, decimal, or other non-integer values *)
652
+
if String.contains num_part 'e' || String.contains num_part 'E' || String.contains num_part '.' then begin
402
653
Message_collector.add_error collector
403
-
~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad srcset descriptor: Scientific notation not allowed in width descriptor." srcset_value element_name)
654
+
~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Expected integer but found \xe2\x80\x9c%s\xe2\x80\x9d at \xe2\x80\x9c%s\xe2\x80\x9d." srcset_value element_name num_part srcset_value)
404
655
~code:"bad-srcset-value"
405
656
~element:element_name ~attribute:"srcset" ();
406
657
false
···
415
666
(* Pixel density descriptor - must be positive number, no leading + *)
416
667
let trimmed_desc = String.trim desc in
417
668
if String.length trimmed_desc > 0 && trimmed_desc.[0] = '+' then begin
669
+
(* Extract the number part including the plus sign *)
670
+
let num_with_plus = String.sub trimmed_desc 0 (String.length trimmed_desc - 1) in
418
671
Message_collector.add_error collector
419
-
~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad srcset descriptor: Leading plus sign not allowed." srcset_value element_name)
672
+
~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Expected number without leading plus sign but found \xe2\x80\x9c%s\xe2\x80\x9d at \xe2\x80\x9c%s\xe2\x80\x9d." srcset_value element_name num_with_plus srcset_value)
420
673
~code:"bad-srcset-value"
421
674
~element:element_name ~attribute:"srcset" ();
422
675
false
···
424
677
(try
425
678
let n = float_of_string num_part in
426
679
if Float.is_nan n then begin
680
+
(* NaN is not a valid float - report as parse error with first char from ORIGINAL desc *)
681
+
let trimmed_desc = String.trim desc in
682
+
let orig_num_part = String.sub trimmed_desc 0 (String.length trimmed_desc - 1) in
683
+
let first_char = if String.length orig_num_part > 0 then String.make 1 orig_num_part.[0] else "" in
427
684
Message_collector.add_error collector
428
-
~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad srcset descriptor: NaN not allowed." srcset_value element_name)
685
+
~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad positive floating point number: Expected a digit but saw \xe2\x80\x9c%s\xe2\x80\x9d instead at \xe2\x80\x9c%s\xe2\x80\x9d." srcset_value element_name first_char srcset_value)
429
686
~code:"bad-srcset-value"
430
687
~element:element_name ~attribute:"srcset" ();
431
688
false
432
-
end else if n <= 0.0 then begin
689
+
end else if n = 0.0 then begin
690
+
(* Check if it's -0 (starts with minus) - report as "greater than zero" error *)
691
+
let trimmed_desc = String.trim desc in
692
+
let orig_num_part = String.sub trimmed_desc 0 (String.length trimmed_desc - 1) in
693
+
if String.length orig_num_part > 0 && orig_num_part.[0] = '-' then begin
694
+
Message_collector.add_error collector
695
+
~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Expected number greater than zero but found \xe2\x80\x9c%s\xe2\x80\x9d at \xe2\x80\x9c%s\xe2\x80\x9d." srcset_value element_name orig_num_part srcset_value)
696
+
~code:"bad-srcset-value"
697
+
~element:element_name ~attribute:"srcset" ()
698
+
end else begin
699
+
Message_collector.add_error collector
700
+
~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad positive floating point number: Zero is not a valid positive floating point number at \xe2\x80\x9c%s\xe2\x80\x9d." srcset_value element_name srcset_value)
701
+
~code:"bad-srcset-value"
702
+
~element:element_name ~attribute:"srcset" ()
703
+
end;
704
+
false
705
+
end else if n < 0.0 then begin
433
706
Message_collector.add_error collector
434
-
~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad srcset descriptor: Density must be positive." srcset_value element_name)
707
+
~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Expected number greater than zero but found \xe2\x80\x9c%s\xe2\x80\x9d at \xe2\x80\x9c%s\xe2\x80\x9d." srcset_value element_name num_part srcset_value)
435
708
~code:"bad-srcset-value"
436
709
~element:element_name ~attribute:"srcset" ();
437
710
false
438
711
end else if n = neg_infinity || n = infinity then begin
712
+
(* Infinity is not a valid float - report as parse error with first char from ORIGINAL desc *)
713
+
let trimmed_desc = String.trim desc in
714
+
let orig_num_part = String.sub trimmed_desc 0 (String.length trimmed_desc - 1) in
715
+
let first_char = if String.length orig_num_part > 0 then String.make 1 orig_num_part.[0] else "" in
439
716
Message_collector.add_error collector
440
-
~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad srcset descriptor: Infinity not allowed." srcset_value element_name)
717
+
~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad positive floating point number: Expected a digit but saw \xe2\x80\x9c%s\xe2\x80\x9d instead at \xe2\x80\x9c%s\xe2\x80\x9d." srcset_value element_name first_char srcset_value)
441
718
~code:"bad-srcset-value"
442
719
~element:element_name ~attribute:"srcset" ();
443
720
false
···
451
728
end
452
729
| 'h' ->
453
730
(* Height descriptor - not allowed *)
454
-
Message_collector.add_error collector
455
-
~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad srcset descriptor: Height descriptor \xe2\x80\x9ch\xe2\x80\x9d is not allowed." srcset_value element_name)
456
-
~code:"bad-srcset-value"
457
-
~element:element_name ~attribute:"srcset" ();
731
+
let trimmed_desc = String.trim desc in
732
+
(* Generate context: find where this entry appears *)
733
+
let context =
734
+
try
735
+
let pos = Str.search_forward (Str.regexp_string trimmed_desc) srcset_value 0 in
736
+
(* Get the entry context ending with comma *)
737
+
let search_from = max 0 (pos - 3) in
738
+
let comma_pos = try Str.search_forward (Str.regexp_string ",") srcset_value pos with Not_found -> String.length srcset_value - 1 in
739
+
let end_pos = min (comma_pos + 1) (String.length srcset_value) in
740
+
let len = end_pos - search_from in
741
+
if len > 0 then String.trim (String.sub srcset_value search_from len) else srcset_value
742
+
with Not_found | Invalid_argument _ -> srcset_value
743
+
in
744
+
if has_sizes then
745
+
Message_collector.add_error collector
746
+
~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Expected width descriptor but found \xe2\x80\x9c%s\xe2\x80\x9d at \xe2\x80\x9c%s\xe2\x80\x9d. (When the \xe2\x80\x9csizes\xe2\x80\x9d attribute is present, all image candidate strings must specify a width.)" srcset_value element_name trimmed_desc context)
747
+
~code:"bad-srcset-value"
748
+
~element:element_name ~attribute:"srcset" ()
749
+
else
750
+
Message_collector.add_error collector
751
+
~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad srcset descriptor: Height descriptor \xe2\x80\x9ch\xe2\x80\x9d is not allowed." srcset_value element_name)
752
+
~code:"bad-srcset-value"
753
+
~element:element_name ~attribute:"srcset" ();
458
754
false
459
755
| _ ->
460
-
(* Unknown descriptor *)
756
+
(* Unknown descriptor - find context in srcset_value *)
757
+
let trimmed_desc = String.trim desc in
758
+
(* Try to find the context: find where this descriptor appears in srcset_value *)
759
+
let context =
760
+
try
761
+
let pos = Str.search_forward (Str.regexp_string trimmed_desc) srcset_value 0 in
762
+
(* Get the context up to and including the descriptor and the comma after *)
763
+
let end_pos = min (pos + String.length trimmed_desc + 1) (String.length srcset_value) in
764
+
let start_pos = max 0 (pos - 2) in
765
+
String.trim (String.sub srcset_value start_pos (end_pos - start_pos))
766
+
with Not_found -> srcset_value
767
+
in
461
768
Message_collector.add_error collector
462
-
~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad srcset descriptor." srcset_value element_name)
769
+
~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Expected number followed by \xe2\x80\x9cw\xe2\x80\x9d or \xe2\x80\x9cx\xe2\x80\x9d but found \xe2\x80\x9c%s\xe2\x80\x9d at \xe2\x80\x9c%s\xe2\x80\x9d." srcset_value element_name trimmed_desc context)
463
770
~code:"bad-srcset-value"
464
771
~element:element_name ~attribute:"srcset" ();
465
772
false
···
489
796
let entries = String.split_on_char ',' value in
490
797
let has_w_descriptor = ref false in
491
798
let has_x_descriptor = ref false in
492
-
let has_no_descriptor = ref false in (* Track if any entry has no descriptor *)
493
-
let seen_descriptors = Hashtbl.create 8 in (* Track seen descriptor values *)
799
+
let no_descriptor_url = ref None in (* Track URL of first entry without width descriptor *)
800
+
let x_with_sizes_error_reported = ref false in (* Track if we already reported x-with-sizes error *)
801
+
let seen_descriptors = Hashtbl.create 8 in (* Track seen descriptor values -> first URL *)
494
802
495
803
(* Check for empty srcset *)
496
804
if String.trim value = "" then begin
497
805
Message_collector.add_error collector
498
-
~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Must not be empty." value element_name)
806
+
~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Must contain one or more image candidate strings." value element_name)
499
807
~code:"bad-srcset-value"
500
808
~element:element_name ~attribute:"srcset" ()
501
809
end;
···
503
811
(* Check for leading comma *)
504
812
if String.length value > 0 && value.[0] = ',' then begin
505
813
Message_collector.add_error collector
506
-
~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad srcset: Leading comma." value element_name)
814
+
~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Starts with empty image-candidate string." value element_name)
507
815
~code:"bad-srcset-value"
508
816
~element:element_name ~attribute:"srcset" ()
509
817
end;
510
818
511
-
(* Check for trailing comma *)
819
+
(* Check for trailing comma(s) / empty entries *)
512
820
let trimmed_value = String.trim value in
513
821
if String.length trimmed_value > 0 && trimmed_value.[String.length trimmed_value - 1] = ',' then begin
514
-
Message_collector.add_error collector
515
-
~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad srcset: Trailing comma." value element_name)
516
-
~code:"bad-srcset-value"
517
-
~element:element_name ~attribute:"srcset" ()
822
+
(* Count consecutive trailing commas *)
823
+
let rec count_trailing_commas s idx count =
824
+
if idx < 0 then count
825
+
else if s.[idx] = ',' then count_trailing_commas s (idx - 1) (count + 1)
826
+
else if s.[idx] = ' ' || s.[idx] = '\t' then count_trailing_commas s (idx - 1) count
827
+
else count
828
+
in
829
+
let trailing_commas = count_trailing_commas trimmed_value (String.length trimmed_value - 1) 0 in
830
+
if trailing_commas > 1 then
831
+
(* Multiple trailing commas: "Empty image-candidate string at" *)
832
+
Message_collector.add_error collector
833
+
~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Empty image-candidate string at \xe2\x80\x9c%s\xe2\x80\x9d." value element_name value)
834
+
~code:"bad-srcset-value"
835
+
~element:element_name ~attribute:"srcset" ()
836
+
else
837
+
(* Single trailing comma: "Ends with empty image-candidate string." *)
838
+
Message_collector.add_error collector
839
+
~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Ends with empty image-candidate string." value element_name)
840
+
~code:"bad-srcset-value"
841
+
~element:element_name ~attribute:"srcset" ()
518
842
end;
519
843
520
844
List.iter (fun entry ->
···
532
856
let scheme_colon = scheme ^ ":" in
533
857
if url_lower = scheme_colon then
534
858
Message_collector.add_error collector
535
-
~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Expected a slash (\"/\")." value element_name)
859
+
~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad image-candidate URL: \xe2\x80\x9c%s\xe2\x80\x9d: Expected a slash (\"/\")." value element_name url)
536
860
~code:"bad-srcset-url"
537
861
~element:element_name ~attribute:"srcset" ()
538
862
) special_schemes
···
542
866
| [url] ->
543
867
check_srcset_url url;
544
868
(* URL only = implicit 1x descriptor - only flag if explicit 1x also seen *)
545
-
has_no_descriptor := true;
546
-
if Hashtbl.mem seen_descriptors "explicit-1x" then begin
869
+
if !no_descriptor_url = None then no_descriptor_url := Some url;
870
+
begin match Hashtbl.find_opt seen_descriptors "explicit-1x" with
871
+
| Some first_url ->
547
872
Message_collector.add_error collector
548
-
~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Duplicate descriptor." value element_name)
873
+
~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Density for image \xe2\x80\x9c%s\xe2\x80\x9d is identical to density for image \xe2\x80\x9c%s\xe2\x80\x9d." value element_name url first_url)
549
874
~code:"bad-srcset-value"
550
875
~element:element_name ~attribute:"srcset" ()
551
-
end else
552
-
Hashtbl.add seen_descriptors "implicit-1x" true
876
+
| None ->
877
+
Hashtbl.add seen_descriptors "implicit-1x" url
878
+
end
553
879
| url :: desc :: rest ->
554
880
(* Check URL for broken schemes *)
555
881
check_srcset_url url;
556
882
(* Check for extra junk - multiple descriptors are not allowed *)
557
883
if rest <> [] then begin
884
+
let extra_desc = List.hd rest in
558
885
Message_collector.add_error collector
559
-
~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad srcset: Multiple descriptors in candidate." value element_name)
886
+
~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Expected single descriptor but found extraneous descriptor \xe2\x80\x9c%s\xe2\x80\x9d at \xe2\x80\x9c%s\xe2\x80\x9d." value element_name extra_desc value)
560
887
~code:"bad-srcset-value"
561
888
~element:element_name ~attribute:"srcset" ()
562
889
end;
···
565
892
if String.length desc_lower > 0 then begin
566
893
let last_char = desc_lower.[String.length desc_lower - 1] in
567
894
if last_char = 'w' then has_w_descriptor := true
568
-
else if last_char = 'x' then has_x_descriptor := true;
895
+
else if last_char = 'x' then begin
896
+
has_x_descriptor := true;
897
+
(* If sizes is present and we have an x descriptor, generate detailed error *)
898
+
if has_sizes && not !x_with_sizes_error_reported then begin
899
+
x_with_sizes_error_reported := true;
900
+
(* Build context:
901
+
- If entry has extra parts (multiple descriptors): show "url descriptor "
902
+
- Else if entry has trailing comma: show "url descriptor,"
903
+
- Else (last entry, no extra parts): show full srcset value *)
904
+
let trimmed_url = String.trim url in
905
+
let trimmed_desc = String.trim desc in
906
+
let entry_context =
907
+
if rest <> [] then
908
+
(* Entry has multiple descriptors - show URL + first descriptor + space *)
909
+
trimmed_url ^ " " ^ trimmed_desc ^ " "
910
+
else
911
+
(* Check if entry ends with comma in original value *)
912
+
let trimmed_entry = String.trim entry in
913
+
try
914
+
let entry_start = Str.search_forward (Str.regexp_string trimmed_url) value 0 in
915
+
let entry_end = entry_start + String.length trimmed_entry in
916
+
let has_trailing_comma = entry_end < String.length value && value.[entry_end] = ',' in
917
+
if has_trailing_comma then
918
+
(* Entry followed by comma - show "url descriptor," *)
919
+
trimmed_url ^ " " ^ trimmed_desc ^ ","
920
+
else
921
+
(* Last entry - show full srcset value *)
922
+
value
923
+
with Not_found ->
924
+
value
925
+
in
926
+
Message_collector.add_error collector
927
+
~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Expected width descriptor but found \xe2\x80\x9c%s\xe2\x80\x9d at \xe2\x80\x9c%s\xe2\x80\x9d. (When the \xe2\x80\x9csizes\xe2\x80\x9d attribute is present, all image candidate strings must specify a width.)" value element_name trimmed_desc entry_context)
928
+
~code:"bad-srcset-value"
929
+
~element:element_name ~attribute:"srcset" ()
930
+
end
931
+
end;
569
932
570
933
(* Check for duplicate descriptors - use normalized form *)
571
934
let normalized = normalize_descriptor desc in
572
935
let is_1x = (normalized = "1x") in
573
-
if Hashtbl.mem seen_descriptors normalized then begin
574
-
Message_collector.add_error collector
575
-
~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Duplicate descriptor." value element_name)
576
-
~code:"bad-srcset-value"
577
-
~element:element_name ~attribute:"srcset" ()
578
-
end else if is_1x && Hashtbl.mem seen_descriptors "implicit-1x" then begin
579
-
(* Explicit 1x conflicts with implicit 1x *)
936
+
let is_width = (last_char = 'w') in
937
+
let dup_type = if is_width then "Width" else "Density" in
938
+
begin match Hashtbl.find_opt seen_descriptors normalized with
939
+
| Some first_url ->
580
940
Message_collector.add_error collector
581
-
~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Duplicate descriptor." value element_name)
941
+
~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: %s for image \xe2\x80\x9c%s\xe2\x80\x9d is identical to %s for image \xe2\x80\x9c%s\xe2\x80\x9d." value element_name dup_type url (String.lowercase_ascii dup_type) first_url)
582
942
~code:"bad-srcset-value"
583
943
~element:element_name ~attribute:"srcset" ()
584
-
end else begin
585
-
Hashtbl.add seen_descriptors normalized true;
586
-
if is_1x then Hashtbl.add seen_descriptors "explicit-1x" true
944
+
| None ->
945
+
begin match (if is_1x then Hashtbl.find_opt seen_descriptors "implicit-1x" else None) with
946
+
| Some first_url ->
947
+
(* Explicit 1x conflicts with implicit 1x *)
948
+
Message_collector.add_error collector
949
+
~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: %s for image \xe2\x80\x9c%s\xe2\x80\x9d is identical to %s for image \xe2\x80\x9c%s\xe2\x80\x9d." value element_name dup_type url (String.lowercase_ascii dup_type) first_url)
950
+
~code:"bad-srcset-value"
951
+
~element:element_name ~attribute:"srcset" ()
952
+
| None ->
953
+
Hashtbl.add seen_descriptors normalized url;
954
+
if is_1x then Hashtbl.add seen_descriptors "explicit-1x" url
955
+
end
587
956
end
588
957
end;
589
958
590
-
ignore (validate_srcset_descriptor desc element_name value collector)
959
+
ignore (validate_srcset_descriptor desc element_name value has_sizes collector)
591
960
end
592
961
) entries;
593
962
594
963
(* Check: if w descriptor used and no sizes, that's an error for img and source *)
595
964
if !has_w_descriptor && not has_sizes then
596
965
Message_collector.add_error collector
597
-
~message:(Printf.sprintf "When the \xe2\x80\x9csrcset\xe2\x80\x9d attribute on the \xe2\x80\x9c%s\xe2\x80\x9d element uses width descriptors, the \xe2\x80\x9csizes\xe2\x80\x9d attribute must also be present." element_name)
966
+
~message:"When the \xe2\x80\x9csrcset\xe2\x80\x9d attribute has any image candidate string with a width descriptor, the \xe2\x80\x9csizes\xe2\x80\x9d attribute must also be specified."
598
967
~code:"srcset-w-without-sizes"
599
968
~element:element_name ~attribute:"srcset" ();
600
969
601
970
(* Check: if sizes is present, all entries must have width descriptors *)
602
-
if has_sizes && !has_no_descriptor then
971
+
(match !no_descriptor_url with
972
+
| Some url when has_sizes ->
603
973
Message_collector.add_error collector
604
-
~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: No width specified for image. (When the \xe2\x80\x9csizes\xe2\x80\x9d attribute is present, all image candidate strings must specify a width.)" value element_name)
974
+
~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: No width specified for image \xe2\x80\x9c%s\xe2\x80\x9d. (When the \xe2\x80\x9csizes\xe2\x80\x9d attribute is present, all image candidate strings must specify a width.)" value element_name url)
605
975
~code:"bad-srcset-value"
606
-
~element:element_name ~attribute:"srcset" ();
976
+
~element:element_name ~attribute:"srcset" ()
977
+
| _ -> ());
607
978
608
-
(* Check: if sizes is present and srcset uses x descriptors, that's an error *)
609
-
if has_sizes && !has_x_descriptor then
979
+
(* Check: if sizes is present and srcset uses x descriptors, that's an error.
980
+
Only report if we haven't already reported the detailed error. *)
981
+
if has_sizes && !has_x_descriptor && not !x_with_sizes_error_reported then
610
982
Message_collector.add_error collector
611
983
~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: When the \xe2\x80\x9csizes\xe2\x80\x9d attribute is present, all image candidate strings must specify a width." value element_name)
612
984
~code:"bad-srcset-value"
+3
lib/html5_checker/specialized/xhtml_content_checker.ml
+3
lib/html5_checker/specialized/xhtml_content_checker.ml
+13
-13
lib/html5rw/parser/parser_tree_builder.ml
+13
-13
lib/html5rw/parser/parser_tree_builder.ml
···
787
787
t.open_elements <- [html];
788
788
t.mode <- Parser_insertion_mode.Before_head;
789
789
process_token t token
790
-
| Token.Tag { kind = Token.End; _ } ->
791
-
parse_error t "unexpected-end-tag"
790
+
| Token.Tag { kind = Token.End; name; _ } ->
791
+
parse_error t ("unexpected-end-tag:" ^ name)
792
792
| _ ->
793
793
let html = insert_element t "html" [] in
794
794
t.open_elements <- [html];
···
813
813
t.head_element <- Some head;
814
814
t.mode <- Parser_insertion_mode.In_head;
815
815
process_token t token
816
-
| Token.Tag { kind = Token.End; _ } ->
817
-
parse_error t "unexpected-end-tag"
816
+
| Token.Tag { kind = Token.End; name; _ } ->
817
+
parse_error t ("unexpected-end-tag:" ^ name)
818
818
| _ ->
819
819
let head = insert_element t "head" [] in
820
820
t.open_elements <- head :: t.open_elements;
···
902
902
end
903
903
| Token.Tag { kind = Token.Start; name = "head"; _ } ->
904
904
parse_error t "unexpected-start-tag"
905
-
| Token.Tag { kind = Token.End; _ } ->
906
-
parse_error t "unexpected-end-tag"
905
+
| Token.Tag { kind = Token.End; name; _ } ->
906
+
parse_error t ("unexpected-end-tag:" ^ name)
907
907
| _ ->
908
908
pop_current t;
909
909
t.mode <- Parser_insertion_mode.After_head;
···
943
943
pop_current t; (* Pop noscript *)
944
944
t.mode <- Parser_insertion_mode.In_head;
945
945
process_token t token
946
-
| Token.Tag { kind = Token.End; _ } ->
947
-
parse_error t "unexpected-end-tag"
946
+
| Token.Tag { kind = Token.End; name; _ } ->
947
+
parse_error t ("unexpected-end-tag:" ^ name)
948
948
| Token.EOF ->
949
949
parse_error t "expected-closing-tag-but-got-eof";
950
950
pop_current t; (* Pop noscript *)
···
998
998
process_token t token
999
999
| Token.Tag { kind = Token.Start; name = "head"; _ } ->
1000
1000
parse_error t "unexpected-start-tag"
1001
-
| Token.Tag { kind = Token.End; _ } ->
1002
-
parse_error t "unexpected-end-tag"
1001
+
| Token.Tag { kind = Token.End; name; _ } ->
1002
+
parse_error t ("unexpected-end-tag:" ^ name)
1003
1003
| _ ->
1004
1004
let body = insert_element t "body" [] in
1005
1005
t.open_elements <- body :: t.open_elements;
···
1447
1447
| _ -> ());
1448
1448
pop_until t (fun n -> n == node)
1449
1449
end else if is_special_element node then
1450
-
parse_error t "unexpected-end-tag"
1450
+
parse_error t ("unexpected-end-tag:" ^ name)
1451
1451
else
1452
1452
check rest
1453
1453
in
···
2056
2056
t.template_modes <- Parser_insertion_mode.In_body :: t.template_modes;
2057
2057
t.mode <- Parser_insertion_mode.In_body;
2058
2058
process_token t token
2059
-
| Token.Tag { kind = Token.End; _ } ->
2060
-
parse_error t "unexpected-end-tag"
2059
+
| Token.Tag { kind = Token.End; name; _ } ->
2060
+
parse_error t ("unexpected-end-tag:" ^ name)
2061
2061
| Token.EOF ->
2062
2062
if not (List.exists (fun n -> n.Dom.name = "template" && is_in_html_namespace n) t.open_elements) then
2063
2063
() (* Stop parsing *)