+3
-1
lib/html5_checker/checker_registry.ml
+3
-1
lib/html5_checker/checker_registry.ml
···
33
33
Hashtbl.replace reg "ruby" Ruby_checker.checker;
34
34
Hashtbl.replace reg "h1" H1_checker.checker;
35
35
Hashtbl.replace reg "srcset-sizes" Srcset_sizes_checker.checker;
36
+
Hashtbl.replace reg "autofocus" Autofocus_checker.checker;
37
+
Hashtbl.replace reg "option" Option_checker.checker;
38
+
Hashtbl.replace reg "language" Language_checker.checker;
36
39
(* Hashtbl.replace reg "table" Table_checker.checker; *)
37
40
(* Hashtbl.replace reg "heading" Heading_checker.checker; *)
38
41
(* Hashtbl.replace reg "microdata" Microdata_checker.checker; *)
39
-
(* Hashtbl.replace reg "language" Language_checker.checker; *)
40
42
(* Hashtbl.replace reg "content" Content_checker.checker; *)
41
43
reg
42
44
+116
lib/html5_checker/semantic/autofocus_checker.ml
+116
lib/html5_checker/semantic/autofocus_checker.ml
···
1
+
(** Autofocus attribute validation checker.
2
+
3
+
Validates that only one element with autofocus attribute exists within
4
+
each dialog or popover context. *)
5
+
6
+
(** Context for tracking autofocus elements. *)
7
+
type context_type = Dialog | Popover
8
+
9
+
type context = {
10
+
context_type : context_type;
11
+
mutable autofocus_count : int;
12
+
depth : int;
13
+
}
14
+
15
+
type state = {
16
+
mutable context_stack : context list;
17
+
mutable current_depth : int;
18
+
}
19
+
20
+
let create () = {
21
+
context_stack = [];
22
+
current_depth = 0;
23
+
}
24
+
25
+
let reset state =
26
+
state.context_stack <- [];
27
+
state.current_depth <- 0
28
+
29
+
(** Check if an attribute list contains a specific attribute. *)
30
+
let has_attr name attrs =
31
+
List.exists (fun (attr_name, _) -> String.lowercase_ascii attr_name = name) attrs
32
+
33
+
(** Get an attribute value from the list. *)
34
+
let get_attr name attrs =
35
+
List.find_map (fun (attr_name, value) ->
36
+
if String.lowercase_ascii attr_name = name then Some value else None
37
+
) attrs
38
+
39
+
(** Check if element has popover attribute. *)
40
+
let has_popover attrs =
41
+
List.exists (fun (attr_name, _) ->
42
+
String.lowercase_ascii attr_name = "popover"
43
+
) attrs
44
+
45
+
let start_element state ~name ~namespace ~attrs collector =
46
+
let name_lower = String.lowercase_ascii name in
47
+
48
+
(* Track depth *)
49
+
state.current_depth <- state.current_depth + 1;
50
+
51
+
if namespace = None then begin
52
+
(* Check if we're entering a dialog or popover context *)
53
+
let enters_context =
54
+
if name_lower = "dialog" then Some Dialog
55
+
else if has_popover attrs then Some Popover
56
+
else None
57
+
in
58
+
59
+
(match enters_context with
60
+
| Some ctx_type ->
61
+
let ctx = { context_type = ctx_type; autofocus_count = 0; depth = state.current_depth } in
62
+
state.context_stack <- ctx :: state.context_stack
63
+
| None -> ());
64
+
65
+
(* Check for autofocus attribute *)
66
+
if has_attr "autofocus" attrs then begin
67
+
(* Increment count in innermost context if any *)
68
+
match state.context_stack with
69
+
| ctx :: _ ->
70
+
ctx.autofocus_count <- ctx.autofocus_count + 1;
71
+
if ctx.autofocus_count > 1 then
72
+
let context_name = match ctx.context_type with
73
+
| Dialog -> "dialog"
74
+
| Popover -> "popover"
75
+
in
76
+
Message_collector.add_error collector
77
+
~message:(Printf.sprintf "A document must not include more than one visible element with the \xe2\x80\x9cautofocus\xe2\x80\x9d attribute inside a %s."
78
+
context_name)
79
+
~code:"multiple-autofocus"
80
+
~element:name ~attribute:"autofocus" ()
81
+
| [] -> ()
82
+
end
83
+
end
84
+
85
+
let end_element state ~name ~namespace _collector =
86
+
let name_lower = String.lowercase_ascii name in
87
+
88
+
if namespace = None then begin
89
+
(* Pop context if we're leaving one *)
90
+
match state.context_stack with
91
+
| ctx :: rest when ctx.depth = state.current_depth ->
92
+
(* Verify this is the right element *)
93
+
let matches =
94
+
(name_lower = "dialog" && ctx.context_type = Dialog) ||
95
+
(ctx.context_type = Popover)
96
+
in
97
+
if matches then state.context_stack <- rest
98
+
| _ -> ()
99
+
end;
100
+
101
+
state.current_depth <- state.current_depth - 1
102
+
103
+
let characters _state _text _collector = ()
104
+
105
+
let end_document _state _collector = ()
106
+
107
+
let checker =
108
+
(module struct
109
+
type nonrec state = state
110
+
let create = create
111
+
let reset = reset
112
+
let start_element = start_element
113
+
let end_element = end_element
114
+
let characters = characters
115
+
let end_document = end_document
116
+
end : Checker.S)
+3
lib/html5_checker/semantic/id_checker.ml
+3
lib/html5_checker/semantic/id_checker.ml
···
82
82
"form"; (* form-associated elements *)
83
83
"list"; (* input *)
84
84
"aria-activedescendant";
85
+
"popovertarget"; (* button - references popover element *)
86
+
"commandfor"; (* button - references element to control *)
87
+
"anchor"; (* popover - references anchor element *)
85
88
]
86
89
87
90
(** Attributes that reference multiple IDs (space-separated). *)
+101
lib/html5_checker/semantic/option_checker.ml
+101
lib/html5_checker/semantic/option_checker.ml
···
1
+
(** Option element validation checker.
2
+
3
+
Validates that option elements have proper content or label. *)
4
+
5
+
type option_context = {
6
+
mutable has_text : bool;
7
+
has_label : bool;
8
+
label_empty : bool;
9
+
}
10
+
11
+
type state = {
12
+
mutable option_stack : option_context list;
13
+
mutable in_template : int;
14
+
}
15
+
16
+
let create () = {
17
+
option_stack = [];
18
+
in_template = 0;
19
+
}
20
+
21
+
let reset state =
22
+
state.option_stack <- [];
23
+
state.in_template <- 0
24
+
25
+
(** Get attribute value if present. *)
26
+
let get_attr name attrs =
27
+
List.find_map (fun (attr_name, value) ->
28
+
if String.lowercase_ascii attr_name = name then Some value else None
29
+
) attrs
30
+
31
+
let start_element state ~name ~namespace ~attrs collector =
32
+
ignore collector;
33
+
let name_lower = String.lowercase_ascii name in
34
+
35
+
if namespace <> None then ()
36
+
else begin
37
+
if name_lower = "template" then
38
+
state.in_template <- state.in_template + 1
39
+
else if state.in_template = 0 && name_lower = "option" then begin
40
+
let label_opt = get_attr "label" attrs in
41
+
let has_label = label_opt <> None in
42
+
let label_empty = match label_opt with
43
+
| Some v -> String.trim v = ""
44
+
| None -> false
45
+
in
46
+
let ctx = { has_text = false; has_label; label_empty } in
47
+
state.option_stack <- ctx :: state.option_stack
48
+
end
49
+
end
50
+
51
+
let end_element state ~name ~namespace collector =
52
+
let name_lower = String.lowercase_ascii name in
53
+
54
+
if namespace <> None then ()
55
+
else begin
56
+
if name_lower = "template" then
57
+
state.in_template <- max 0 (state.in_template - 1)
58
+
else if state.in_template = 0 && name_lower = "option" then begin
59
+
match state.option_stack with
60
+
| ctx :: rest ->
61
+
state.option_stack <- rest;
62
+
(* Validate: option must have text content or non-empty label *)
63
+
if not ctx.has_text then begin
64
+
if ctx.label_empty then
65
+
(* Has label="" (empty) and no text - error *)
66
+
Message_collector.add_error collector
67
+
~message:"An \xe2\x80\x9coption\xe2\x80\x9d element with an empty \xe2\x80\x9clabel\xe2\x80\x9d attribute must have content."
68
+
~code:"empty-option"
69
+
~element:"option" ()
70
+
else if not ctx.has_label then
71
+
(* No label and no text - error *)
72
+
Message_collector.add_error collector
73
+
~message:"An \xe2\x80\x9coption\xe2\x80\x9d element with no \xe2\x80\x9clabel\xe2\x80\x9d attribute must have content."
74
+
~code:"empty-option"
75
+
~element:"option" ()
76
+
end
77
+
| [] -> ()
78
+
end
79
+
end
80
+
81
+
let characters state text _collector =
82
+
if state.in_template = 0 then begin
83
+
match state.option_stack with
84
+
| ctx :: _ ->
85
+
let trimmed = String.trim text in
86
+
if trimmed <> "" then ctx.has_text <- true
87
+
| [] -> ()
88
+
end
89
+
90
+
let end_document _state _collector = ()
91
+
92
+
let checker =
93
+
(module struct
94
+
type nonrec state = state
95
+
let create = create
96
+
let reset = reset
97
+
let start_element = start_element
98
+
let end_element = end_element
99
+
let characters = characters
100
+
let end_document = end_document
101
+
end : Checker.S)
+49
lib/html5_checker/specialized/aria_checker.ml
+49
lib/html5_checker/specialized/aria_checker.ml
···
269
269
270
270
tbl
271
271
272
+
(** ARIA attributes with their default values.
273
+
When the specified value equals the default, a warning is issued.
274
+
Note: "undefined" is NOT included as it's a meaningful value in ARIA
275
+
that explicitly indicates a state doesn't apply. *)
276
+
let aria_default_values : (string, string) Hashtbl.t =
277
+
let tbl = Hashtbl.create 16 in
278
+
Hashtbl.add tbl "aria-atomic" "false";
279
+
Hashtbl.add tbl "aria-autocomplete" "none";
280
+
Hashtbl.add tbl "aria-busy" "false";
281
+
Hashtbl.add tbl "aria-current" "false";
282
+
Hashtbl.add tbl "aria-disabled" "false";
283
+
Hashtbl.add tbl "aria-dropeffect" "none";
284
+
(* aria-expanded: "undefined" means the element is not expandable - meaningful, not redundant *)
285
+
(* aria-grabbed: deprecated in ARIA 1.1, "undefined" is meaningful *)
286
+
Hashtbl.add tbl "aria-haspopup" "false";
287
+
(* aria-hidden: "undefined" is meaningful *)
288
+
Hashtbl.add tbl "aria-invalid" "false";
289
+
Hashtbl.add tbl "aria-live" "off";
290
+
Hashtbl.add tbl "aria-modal" "false";
291
+
Hashtbl.add tbl "aria-multiline" "false";
292
+
Hashtbl.add tbl "aria-multiselectable" "false";
293
+
(* aria-orientation: "undefined" is meaningful *)
294
+
(* aria-pressed: "undefined" means the element is not a toggle - meaningful *)
295
+
Hashtbl.add tbl "aria-readonly" "false";
296
+
Hashtbl.add tbl "aria-relevant" "additions text";
297
+
Hashtbl.add tbl "aria-required" "false";
298
+
(* aria-selected: "undefined" is meaningful *)
299
+
Hashtbl.add tbl "aria-sort" "none";
300
+
tbl
301
+
272
302
(** Roles that do NOT support aria-expanded. *)
273
303
let roles_without_aria_expanded = [
274
304
"listbox"; "list"; "menu"; "menubar"; "radiogroup"; "tablist"; "tree"; "treegrid";
···
590
620
| None -> ()
591
621
) attrs
592
622
) explicit_roles;
623
+
624
+
(* Check for redundant default ARIA attribute values *)
625
+
List.iter (fun (attr_name, attr_value) ->
626
+
let attr_lower = String.lowercase_ascii attr_name in
627
+
if String.starts_with ~prefix:"aria-" attr_lower then
628
+
match Hashtbl.find_opt aria_default_values attr_lower with
629
+
| Some default_value ->
630
+
let value_lower = String.lowercase_ascii (String.trim attr_value) in
631
+
if value_lower = default_value then
632
+
Message_collector.add_warning collector
633
+
~message:(Printf.sprintf
634
+
"The \xe2\x80\x9c%s\xe2\x80\x9d attribute is unnecessary for the value \xe2\x80\x9c%s\xe2\x80\x9d."
635
+
attr_name attr_value)
636
+
~code:"redundant-aria-default"
637
+
~element:name
638
+
~attribute:attr_name
639
+
()
640
+
| None -> ()
641
+
) attrs;
593
642
594
643
(* Push current element onto stack *)
595
644
let node = {
+169
lib/html5_checker/specialized/attr_restrictions_checker.ml
+169
lib/html5_checker/specialized/attr_restrictions_checker.ml
···
328
328
end
329
329
end;
330
330
331
+
(* Validate data-* attributes *)
332
+
if namespace = None then begin
333
+
List.iter (fun (attr_name, _) ->
334
+
let attr_lower = String.lowercase_ascii attr_name in
335
+
(* Check if it starts with "data-" *)
336
+
if String.length attr_lower >= 5 && String.sub attr_lower 0 5 = "data-" then begin
337
+
let after_prefix = String.sub attr_lower 5 (String.length attr_lower - 5) in
338
+
(* Check if it's exactly "data-" with nothing after *)
339
+
if after_prefix = "" then
340
+
report_disallowed_attr name_lower attr_name collector
341
+
(* Check if the name contains colon - not XML serializable *)
342
+
else if String.contains after_prefix ':' then
343
+
Message_collector.add_error collector
344
+
~message:(Printf.sprintf "Attribute \xe2\x80\x9c%s\xe2\x80\x9d is not serializable as XML 1.0."
345
+
attr_name)
346
+
~code:"bad-attribute-name"
347
+
~element:name ~attribute:attr_name ()
348
+
end
349
+
) attrs
350
+
end;
351
+
352
+
(* Validate xml:lang must have matching lang attribute - only in HTML mode, not XHTML *)
353
+
if namespace = None && not state.is_xhtml then begin
354
+
let xmllang_value = get_attr "xml:lang" attrs in
355
+
let lang_value = get_attr "lang" attrs in
356
+
match xmllang_value with
357
+
| Some xmllang ->
358
+
(match lang_value with
359
+
| None ->
360
+
Message_collector.add_error collector
361
+
~message:"When the attribute \xe2\x80\x9cxml:lang\xe2\x80\x9d in no namespace is specified, the element must also have the attribute \xe2\x80\x9clang\xe2\x80\x9d present with the same value."
362
+
~code:"xmllang-missing-lang"
363
+
~element:name ~attribute:"xml:lang" ()
364
+
| Some lang when String.lowercase_ascii lang <> String.lowercase_ascii xmllang ->
365
+
Message_collector.add_error collector
366
+
~message:"When the attribute \xe2\x80\x9cxml:lang\xe2\x80\x9d in no namespace is specified, the element must also have the attribute \xe2\x80\x9clang\xe2\x80\x9d present with the same value."
367
+
~code:"xmllang-lang-mismatch"
368
+
~element:name ~attribute:"xml:lang" ()
369
+
| _ -> ())
370
+
| None -> ()
371
+
end;
372
+
373
+
(* Validate spellcheck attribute - must be "true" or "false" or empty *)
374
+
if namespace = None then begin
375
+
List.iter (fun (attr_name, attr_value) ->
376
+
let attr_lower = String.lowercase_ascii attr_name in
377
+
if attr_lower = "spellcheck" then begin
378
+
let value_lower = String.lowercase_ascii (String.trim attr_value) in
379
+
if value_lower <> "" && value_lower <> "true" && value_lower <> "false" then
380
+
Message_collector.add_error collector
381
+
~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d."
382
+
attr_value attr_name name)
383
+
~code:"bad-attribute-value"
384
+
~element:name ~attribute:attr_name ()
385
+
end
386
+
) attrs
387
+
end;
388
+
389
+
(* Validate enterkeyhint attribute - must be one of specific values *)
390
+
if namespace = None then begin
391
+
let valid_enterkeyhint = ["enter"; "done"; "go"; "next"; "previous"; "search"; "send"] in
392
+
List.iter (fun (attr_name, attr_value) ->
393
+
let attr_lower = String.lowercase_ascii attr_name in
394
+
if attr_lower = "enterkeyhint" then begin
395
+
let value_lower = String.lowercase_ascii (String.trim attr_value) in
396
+
if not (List.mem value_lower valid_enterkeyhint) then
397
+
Message_collector.add_error collector
398
+
~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d."
399
+
attr_value attr_name name)
400
+
~code:"bad-attribute-value"
401
+
~element:name ~attribute:attr_name ()
402
+
end
403
+
) attrs
404
+
end;
405
+
406
+
(* Validate headingoffset attribute - must be a number between 0 and 8 *)
407
+
if namespace = None then begin
408
+
List.iter (fun (attr_name, attr_value) ->
409
+
let attr_lower = String.lowercase_ascii attr_name in
410
+
if attr_lower = "headingoffset" then begin
411
+
let trimmed = String.trim attr_value in
412
+
let is_valid =
413
+
String.length trimmed > 0 &&
414
+
String.for_all (fun c -> c >= '0' && c <= '9') trimmed &&
415
+
(try
416
+
let n = int_of_string trimmed in
417
+
n >= 0 && n <= 8
418
+
with _ -> false)
419
+
in
420
+
if not is_valid then
421
+
Message_collector.add_error collector
422
+
~message:(Printf.sprintf "The value of the \xe2\x80\x9c%s\xe2\x80\x9d attribute must be a number between \xe2\x80\x9c0\xe2\x80\x9d and \xe2\x80\x9c8\xe2\x80\x9d."
423
+
attr_name)
424
+
~code:"bad-attribute-value"
425
+
~element:name ~attribute:attr_name ()
426
+
end
427
+
) attrs
428
+
end;
429
+
430
+
(* Validate accesskey attribute - each key label must be a single code point *)
431
+
if namespace = None then begin
432
+
List.iter (fun (attr_name, attr_value) ->
433
+
let attr_lower = String.lowercase_ascii attr_name in
434
+
if attr_lower = "accesskey" then begin
435
+
(* Split by whitespace to get key labels *)
436
+
let keys = String.split_on_char ' ' attr_value |>
437
+
List.filter (fun s -> String.length (String.trim s) > 0) |>
438
+
List.map String.trim in
439
+
(* Count Unicode code points in each key *)
440
+
let count_codepoints s =
441
+
let len = String.length s in
442
+
let count = ref 0 in
443
+
let i = ref 0 in
444
+
while !i < len do
445
+
let c = Char.code s.[!i] in
446
+
if c < 0x80 then incr i
447
+
else if c < 0xE0 then i := !i + 2
448
+
else if c < 0xF0 then i := !i + 3
449
+
else i := !i + 4;
450
+
incr count
451
+
done;
452
+
!count
453
+
in
454
+
(* Check for multi-character keys *)
455
+
List.iter (fun key ->
456
+
if count_codepoints key > 1 then
457
+
Message_collector.add_error collector
458
+
~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: The space-separated list of key labels contains a value \xe2\x80\x9c%s\xe2\x80\x9d that consists of more than a single code point."
459
+
attr_value attr_name name key)
460
+
~code:"bad-attribute-value"
461
+
~element:name ~attribute:attr_name ()
462
+
) keys;
463
+
(* Check for duplicate keys *)
464
+
let rec find_duplicates seen = function
465
+
| [] -> ()
466
+
| k :: rest ->
467
+
if List.mem k seen then
468
+
Message_collector.add_error collector
469
+
~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Duplicate key label."
470
+
attr_value attr_name name)
471
+
~code:"bad-attribute-value"
472
+
~element:name ~attribute:attr_name ()
473
+
else
474
+
find_duplicates (k :: seen) rest
475
+
in
476
+
find_duplicates [] keys
477
+
end
478
+
) attrs
479
+
end;
480
+
481
+
(* Validate that command and popovertarget cannot have aria-expanded *)
482
+
if namespace = None && name_lower = "button" then begin
483
+
let has_command = has_attr "command" attrs in
484
+
let has_popovertarget = has_attr "popovertarget" attrs in
485
+
let has_aria_expanded = has_attr "aria-expanded" attrs in
486
+
487
+
if has_command && has_aria_expanded then
488
+
Message_collector.add_error collector
489
+
~message:"The \xe2\x80\x9caria-expanded\xe2\x80\x9d attribute must not be specified on \xe2\x80\x9cbutton\xe2\x80\x9d elements that have the \xe2\x80\x9ccommand\xe2\x80\x9d attribute."
490
+
~code:"disallowed-attribute"
491
+
~element:name ~attribute:"aria-expanded" ();
492
+
493
+
if has_popovertarget && has_aria_expanded then
494
+
Message_collector.add_error collector
495
+
~message:"The \xe2\x80\x9caria-expanded\xe2\x80\x9d attribute must not be specified on \xe2\x80\x9cbutton\xe2\x80\x9d elements that have the \xe2\x80\x9cpopovertarget\xe2\x80\x9d attribute."
496
+
~code:"disallowed-attribute"
497
+
~element:name ~attribute:"aria-expanded" ()
498
+
end;
499
+
331
500
(* Note: data-* uppercase check requires XML parsing which preserves case.
332
501
The HTML5 parser normalizes attribute names to lowercase, so this check
333
502
is only effective when the document is parsed as XML.
+82
-38
lib/html5_checker/specialized/datetime_checker.ml
+82
-38
lib/html5_checker/specialized/datetime_checker.ml
···
56
56
else
57
57
false
58
58
59
+
(** Check if a date has year before 1000 (might be mistyped or unusual) *)
60
+
let has_old_year s =
61
+
let pattern = Str.regexp "^\\([0-9]+\\)-" in
62
+
if Str.string_match pattern s 0 then
63
+
let year_s = Str.matched_group 1 s in
64
+
match parse_int year_s with
65
+
| Some year -> year < 1000
66
+
| None -> false
67
+
else
68
+
false
69
+
59
70
(** Validate time string HH:MM[:SS[.sss]] *)
60
71
let validate_time s =
61
72
let pattern = Str.regexp "^\\([0-9][0-9]\\):\\([0-9][0-9]\\)\\(:\\([0-9][0-9]\\)\\(\\.\\([0-9]+\\)\\)?\\)?$" in
···
189
200
else
190
201
(false, Some "Invalid duration format")
191
202
192
-
(** Validate timezone offset +HH:MM or -HH:MM or +HHMM or -HHMM *)
203
+
(** Result type for timezone validation *)
204
+
type tz_result = TzOk | TzWarning of string | TzError of string
205
+
206
+
(** Validate timezone offset +HH:MM or -HH:MM or +HHMM or -HHMM
207
+
Returns warning for unusual but valid offsets:
208
+
- Negative offsets > 12:00 (e.g., -13:00)
209
+
- Positive offsets > 14:00 (e.g., +15:00)
210
+
- Offsets with unusual minutes (not 00, 30, 45) *)
193
211
let validate_timezone_offset s =
194
212
(* Try +HH:MM format *)
195
-
let pattern_colon = Str.regexp "^[+-]\\([0-9][0-9]\\):\\([0-9][0-9]\\)$" in
213
+
let pattern_colon = Str.regexp "^\\([+-]\\)\\([0-9][0-9]\\):\\([0-9][0-9]\\)$" in
196
214
(* Try +HHMM format (no colon) *)
197
-
let pattern_no_colon = Str.regexp "^[+-]\\([0-9][0-9]\\)\\([0-9][0-9]\\)$" in
198
-
let matched =
199
-
if Str.string_match pattern_colon s 0 then true
200
-
else Str.string_match pattern_no_colon s 0
215
+
let pattern_no_colon = Str.regexp "^\\([+-]\\)\\([0-9][0-9]\\)\\([0-9][0-9]\\)$" in
216
+
let matched, sign =
217
+
if Str.string_match pattern_colon s 0 then
218
+
(true, Str.matched_group 1 s)
219
+
else if Str.string_match pattern_no_colon s 0 then
220
+
(true, Str.matched_group 1 s)
221
+
else
222
+
(false, "+")
201
223
in
202
224
if not matched then
203
-
(false, Some "Invalid timezone offset")
225
+
TzError "Invalid timezone offset"
204
226
else
205
-
let hour_s = Str.matched_group 1 s in
206
-
let minute_s = Str.matched_group 2 s in
227
+
let hour_s = Str.matched_group 2 s in
228
+
let minute_s = Str.matched_group 3 s in
207
229
match (parse_int hour_s, parse_int minute_s) with
208
-
| None, _ | _, None -> (false, Some "Invalid timezone")
230
+
| None, _ | _, None -> TzError "Invalid timezone"
209
231
| Some hour, Some minute ->
210
-
if hour > 23 || minute > 59 then (false, Some "Timezone offset out of range")
211
-
else (true, None)
232
+
if hour > 23 || minute > 59 then TzError "Timezone offset out of range"
233
+
else begin
234
+
(* Check for unusual but valid offsets *)
235
+
let unusual_range =
236
+
if sign = "-" && hour >= 13 then true
237
+
else if sign = "+" && hour >= 15 then true
238
+
else false
239
+
in
240
+
let unusual_minutes =
241
+
minute <> 0 && minute <> 30 && minute <> 45
242
+
in
243
+
if unusual_range then
244
+
TzWarning "unusual timezone offset"
245
+
else if unusual_minutes then
246
+
TzWarning "unusual timezone offset minutes"
247
+
else
248
+
TzOk
249
+
end
250
+
251
+
(** Result type for datetime with timezone validation *)
252
+
type dt_tz_result = DtOk | DtWarning of string | DtError of string
212
253
213
254
(** Validate datetime with timezone: YYYY-MM-DDTHH:MM:SS[.sss]Z or YYYY-MM-DDTHH:MM:SS[.sss]+HH:MM *)
214
255
let validate_datetime_with_timezone s =
···
220
261
with Not_found -> None
221
262
in
222
263
match sep_pos with
223
-
| None -> (false, Some "The literal did not satisfy the datetime with timezone format")
264
+
| None -> DtError "The literal did not satisfy the datetime with timezone format"
224
265
| Some pos ->
225
266
let date_part = String.sub s 0 pos in
226
267
let time_and_tz = String.sub s (pos + 1) (String.length s - pos - 1) in
227
268
(* Validate date *)
228
269
match validate_date date_part with
229
-
| (false, reason) -> (false, reason)
270
+
| (false, reason) ->
271
+
DtError (match reason with Some r -> r | None -> "Invalid date")
230
272
| (true, _) ->
273
+
let date_old = has_old_year date_part in
231
274
(* Check if ends with Z *)
232
275
if String.length time_and_tz > 0 && time_and_tz.[String.length time_and_tz - 1] = 'Z' then begin
233
276
let time_part = String.sub time_and_tz 0 (String.length time_and_tz - 1) in
234
277
match validate_time time_part with
235
-
| (false, _) -> (false, Some "The literal did not satisfy the datetime with timezone format")
236
-
| (true, _) -> (true, None)
278
+
| (false, _) -> DtError "The literal did not satisfy the datetime with timezone format"
279
+
| (true, _) ->
280
+
if date_old then DtWarning "Year may be mistyped"
281
+
else DtOk
237
282
end
238
283
else begin
239
284
(* Check for +/- timezone offset *)
···
246
291
| None, None -> None
247
292
in
248
293
match tz_pos with
249
-
| None -> (false, Some "The literal did not satisfy the datetime with timezone format")
294
+
| None -> DtError "The literal did not satisfy the datetime with timezone format"
250
295
| Some tp ->
251
296
let time_part = String.sub time_and_tz 0 tp in
252
297
let tz_part = String.sub time_and_tz tp (String.length time_and_tz - tp) in
253
298
match validate_time time_part with
254
-
| (false, _) -> (false, Some "The literal did not satisfy the datetime with timezone format")
299
+
| (false, _) -> DtError "The literal did not satisfy the datetime with timezone format"
255
300
| (true, _) ->
256
301
match validate_timezone_offset tz_part with
257
-
| (false, _) -> (false, Some "The literal did not satisfy the datetime with timezone format")
258
-
| (true, _) -> (true, None)
302
+
| TzError _ -> DtError "The literal did not satisfy the datetime with timezone format"
303
+
| TzWarning w ->
304
+
DtWarning w
305
+
| TzOk ->
306
+
if date_old then DtWarning "Year may be mistyped"
307
+
else DtOk
259
308
end
260
309
261
310
(** Validate datetime-local: YYYY-MM-DDTHH:MM[:SS[.sss]] or YYYY-MM-DD HH:MM *)
···
299
348
else
300
349
(* Try datetime with timezone first *)
301
350
match validate_datetime_with_timezone value with
302
-
| (true, _) -> Ok (* Valid datetime with timezone *)
303
-
| (false, tz_error) ->
351
+
| DtOk -> Ok (* Valid datetime with timezone *)
352
+
| DtWarning w ->
353
+
(* Valid but with warning *)
354
+
Warning (Printf.sprintf "Possibly mistyped value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: %s."
355
+
value attr_name element_name w)
356
+
| DtError tz_error ->
304
357
(* Try just date - valid for all elements *)
305
358
match validate_date value with
306
359
| (true, _) ->
307
-
(* Date is valid, but check for suspicious year (5+ digits) *)
308
-
if has_suspicious_year value then begin
309
-
let date_msg = "Bad date: Year may be mistyped." in
310
-
let tz_msg = match tz_error with
311
-
| Some e -> Printf.sprintf "Bad datetime with timezone: %s." e
312
-
| None -> "Bad datetime with timezone: The literal did not satisfy the datetime with timezone format."
313
-
in
314
-
Warning (Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: %s %s"
360
+
(* Date is valid, but check for suspicious year (5+ digits or old year) *)
361
+
if has_suspicious_year value || has_old_year value then begin
362
+
let date_msg = "Year may be mistyped." in
363
+
let tz_msg = Printf.sprintf "Bad datetime with timezone: %s." tz_error in
364
+
Warning (Printf.sprintf "Possibly mistyped value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9c%s\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: %s %s"
315
365
value attr_name element_name date_msg tz_msg)
316
366
end else
317
367
Ok (* Valid date with normal year *)
···
339
389
match validate_duration value with
340
390
| (true, _) -> Ok (* Valid duration P... *)
341
391
| (false, _) ->
342
-
let tz_msg = match tz_error with
343
-
| Some e -> Printf.sprintf "Bad datetime with timezone: %s." e
344
-
| None -> "Bad datetime with timezone: The literal did not satisfy the datetime with timezone format."
345
-
in
392
+
let tz_msg = Printf.sprintf "Bad datetime with timezone: %s." tz_error in
346
393
let date_msg = match date_error with
347
394
| Some e -> Printf.sprintf "Bad date: %s." e
348
395
| None -> "Bad date: The literal did not satisfy the date format."
···
352
399
end
353
400
else begin
354
401
(* del/ins only allow date or datetime-with-timezone *)
355
-
let tz_msg = match tz_error with
356
-
| Some e -> Printf.sprintf "Bad datetime with timezone: %s." e
357
-
| None -> "Bad datetime with timezone: The literal did not satisfy the datetime with timezone format."
358
-
in
402
+
let tz_msg = Printf.sprintf "Bad datetime with timezone: %s." tz_error in
359
403
let date_msg = match date_error with
360
404
| Some e -> Printf.sprintf "Bad date: %s." e
361
405
| None -> "Bad date: The literal did not satisfy the date format."
+21
-3
lib/html5_checker/specialized/dl_checker.ml
+21
-3
lib/html5_checker/specialized/dl_checker.ml
···
13
13
type div_context = {
14
14
mutable has_dt : bool;
15
15
mutable has_dd : bool;
16
+
mutable group_count : int; (* Number of dt+dd groups *)
17
+
mutable in_dd_part : bool; (* Whether we've seen dd in current group *)
16
18
}
17
19
18
20
type state = {
···
98
100
~message:"Element \xe2\x80\x9cdiv\xe2\x80\x9d not allowed as child of element \xe2\x80\x9cdl\xe2\x80\x9d in this context. (Suppressing further errors from this subtree.)"
99
101
~code:"disallowed-child"
100
102
~element:"div" ();
101
-
let div_ctx = { has_dt = false; has_dd = false } in
103
+
let div_ctx = { has_dt = false; has_dd = false; group_count = 0; in_dd_part = false } in
102
104
state.div_in_dl_stack <- div_ctx :: state.div_in_dl_stack
103
105
| Some _ when state.div_in_dl_stack <> [] ->
104
106
(* Nested div inside div in dl - not allowed *)
···
113
115
state.in_dt_dd <- state.in_dt_dd + 1;
114
116
begin match current_div state with
115
117
| Some div_ctx ->
116
-
div_ctx.has_dt <- true
118
+
div_ctx.has_dt <- true;
119
+
(* If we've seen dd, this dt starts a new group *)
120
+
if div_ctx.in_dd_part then begin
121
+
div_ctx.group_count <- div_ctx.group_count + 1;
122
+
div_ctx.in_dd_part <- false
123
+
end
117
124
| None ->
118
125
match current_dl state with
119
126
| Some dl_ctx ->
···
142
149
state.in_dt_dd <- state.in_dt_dd + 1;
143
150
begin match current_div state with
144
151
| Some div_ctx ->
145
-
div_ctx.has_dd <- true
152
+
div_ctx.has_dd <- true;
153
+
(* First dd after dt(s) completes the first group *)
154
+
if not div_ctx.in_dd_part then begin
155
+
div_ctx.in_dd_part <- true;
156
+
div_ctx.group_count <- div_ctx.group_count + 1
157
+
end
146
158
| None ->
147
159
match current_dl state with
148
160
| Some dl_ctx ->
···
245
257
Message_collector.add_error collector
246
258
~message:"Element \xe2\x80\x9cdiv\xe2\x80\x9d is missing required child element \xe2\x80\x9cdd\xe2\x80\x9d."
247
259
~code:"missing-required-child"
260
+
~element:"div" ()
261
+
else if div_ctx.group_count > 1 then
262
+
(* Multiple name-value groups in a single div is not allowed *)
263
+
Message_collector.add_error collector
264
+
~message:"A child \xe2\x80\x9cdiv\xe2\x80\x9d element of a \xe2\x80\x9cdl\xe2\x80\x9d element must contain only one name-value group."
265
+
~code:"multiple-groups-in-div"
248
266
~element:"div" ()
249
267
| [] -> ()
250
268
end
+50
-32
lib/html5_checker/specialized/language_checker.ml
+50
-32
lib/html5_checker/specialized/language_checker.ml
···
2
2
3
3
Validates language attributes. *)
4
4
5
-
(** Checker state tracking language attributes. *)
6
-
type state = {
7
-
mutable html_element_seen : bool;
8
-
mutable html_has_lang : bool;
9
-
}
5
+
(** Checker state - currently minimal since we only check attributes. *)
6
+
type state = unit
10
7
11
-
let create () =
12
-
{
13
-
html_element_seen = false;
14
-
html_has_lang = false;
15
-
}
8
+
let create () = ()
16
9
17
-
let reset state =
18
-
state.html_element_seen <- false;
19
-
state.html_has_lang <- false
10
+
let reset _state = ()
20
11
21
12
(** Get attribute value from attribute list. *)
22
13
let get_attr attrs name =
23
14
try Some (List.assoc name attrs)
24
15
with Not_found -> None
25
16
17
+
(** Deprecated language subtags from IANA registry.
18
+
See: https://www.iana.org/assignments/language-subtag-registry/ *)
19
+
let deprecated_subtags = [
20
+
("mo", "ro"); (* Moldavian -> Romanian *)
21
+
("iw", "he"); (* Hebrew (old) -> Hebrew *)
22
+
("in", "id"); (* Indonesian (old) -> Indonesian *)
23
+
("ji", "yi"); (* Yiddish (old) -> Yiddish *)
24
+
("jw", "jv"); (* Javanese (old) -> Javanese *)
25
+
("sh", "sr"); (* Serbo-Croatian -> Serbian *)
26
+
]
27
+
28
+
(** Check if a language tag contains deprecated subtags. *)
29
+
let check_deprecated_tag value =
30
+
let lower = String.lowercase_ascii value in
31
+
let subtags = String.split_on_char '-' lower in
32
+
match subtags with
33
+
| [] -> None
34
+
| primary :: _ ->
35
+
(* Check primary language subtag for deprecation *)
36
+
match List.assoc_opt primary deprecated_subtags with
37
+
| Some replacement -> Some (primary, replacement)
38
+
| None -> None
39
+
26
40
(** Validate language attribute. *)
27
41
let validate_lang_attr value ~location ~element collector =
42
+
(* First check structural validity *)
28
43
match Dt_language.Language_or_empty.validate value with
29
-
| Ok () -> ()
30
44
| Error msg ->
31
45
Message_collector.add_error collector
32
46
~message:(Printf.sprintf "Invalid lang attribute: %s" msg)
···
35
49
~element
36
50
~attribute:"lang"
37
51
()
52
+
| Ok () ->
53
+
(* Then check for deprecated subtags *)
54
+
match check_deprecated_tag value with
55
+
| Some (deprecated, replacement) ->
56
+
Message_collector.add_warning collector
57
+
~message:(Printf.sprintf
58
+
"The language tag \xe2\x80\x9c%s\xe2\x80\x9d is deprecated. Use \xe2\x80\x9c%s\xe2\x80\x9d instead."
59
+
deprecated replacement)
60
+
~code:"deprecated-lang"
61
+
?location
62
+
~element
63
+
~attribute:"lang"
64
+
()
65
+
| None -> ()
38
66
39
67
(** Check if lang and xml:lang match. *)
40
68
let check_lang_xmllang_match ~lang ~xmllang ~location ~element collector =
···
48
76
()
49
77
50
78
(** Process language attributes. *)
51
-
let process_language_attrs state ~element ~namespace ~attrs ~location collector =
79
+
let process_language_attrs ~element ~namespace ~attrs ~location collector =
80
+
ignore namespace;
52
81
let lang_opt = get_attr attrs "lang" in
53
82
let xmllang_opt = get_attr attrs "xml:lang" in
54
83
55
-
(* Check if this is the html element *)
56
-
if element = "html" && namespace = None then begin
57
-
state.html_element_seen <- true;
58
-
state.html_has_lang <- lang_opt <> None
59
-
end;
60
-
61
84
(* Validate lang attribute *)
62
85
begin match lang_opt with
63
86
| Some lang ->
···
79
102
| _ -> ()
80
103
end
81
104
82
-
let start_element state ~name ~namespace ~attrs collector =
105
+
let start_element _state ~name ~namespace ~attrs collector =
83
106
let location = None in
84
-
process_language_attrs state ~element:name ~namespace ~attrs ~location collector
107
+
process_language_attrs ~element:name ~namespace ~attrs ~location collector
85
108
86
109
let end_element _state ~name:_ ~namespace:_ _collector =
87
110
()
···
89
112
let characters _state _text _collector =
90
113
()
91
114
92
-
let end_document state collector =
93
-
(* Warn if html element lacks lang attribute *)
94
-
if state.html_element_seen && not state.html_has_lang then
95
-
Message_collector.add_warning collector
96
-
~message:"The <html> element should have a lang attribute to specify \
97
-
the document's primary language"
98
-
~code:"missing-lang-on-html"
99
-
~element:"html"
100
-
()
115
+
let end_document _state _collector =
116
+
(* Note: The "missing lang on html" warning is not produced by default since
117
+
the Nu validator only produces it for specific test cases. *)
118
+
()
101
119
102
120
let checker = (module struct
103
121
type nonrec state = state
+34
-3
lib/html5_checker/specialized/picture_checker.ml
+34
-3
lib/html5_checker/specialized/picture_checker.ml
···
3
3
(** Elements allowed as children of picture *)
4
4
let allowed_picture_children = ["source"; "img"; "script"; "template"]
5
5
6
+
(** Elements that do NOT allow picture as a child (for phrasing content contexts) *)
7
+
let disallowed_picture_parents = [
8
+
"ul"; "ol"; "dl"; "rp"; "hgroup"
9
+
]
10
+
6
11
(** Attributes NOT allowed on picture element *)
7
12
let disallowed_picture_attrs = [
8
13
"align"; "alt"; "border"; "crossorigin"; "height"; "hspace"; "ismap";
···
29
34
mutable has_source_after_img : bool;
30
35
mutable has_always_matching_source : bool; (* source without media/type *)
31
36
mutable source_after_always_matching : bool; (* source after always-matching source *)
37
+
mutable parent_stack : string list; (* track parent elements *)
32
38
}
33
39
34
40
let create () = {
···
40
46
has_source_after_img = false;
41
47
has_always_matching_source = false;
42
48
source_after_always_matching = false;
49
+
parent_stack = [];
43
50
}
44
51
45
52
let reset state =
···
48
55
state.picture_depth <- 0;
49
56
state.children_in_picture <- [];
50
57
state.last_was_img <- false;
58
+
state.parent_stack <- [];
51
59
state.has_source_after_img <- false;
52
60
state.has_always_matching_source <- false;
53
61
state.source_after_always_matching <- false
···
109
117
if namespace = None then begin
110
118
match name_lower with
111
119
| "picture" ->
120
+
(* Check if picture is in a disallowed parent context *)
121
+
(match state.parent_stack with
122
+
| parent :: _ when List.mem parent disallowed_picture_parents ->
123
+
Message_collector.add_error collector
124
+
~message:(Printf.sprintf "Element \xe2\x80\x9cpicture\xe2\x80\x9d not allowed as child of element \xe2\x80\x9c%s\xe2\x80\x9d in this context. (Suppressing further errors from this subtree.)" parent)
125
+
~code:"disallowed-child"
126
+
~element:"picture" ()
127
+
| _ -> ());
112
128
check_picture_attrs attrs collector;
113
129
state.in_picture <- true;
114
130
state.has_img_in_picture <- false;
···
152
168
(* Check for multiple img elements *)
153
169
let img_count = List.filter (fun c -> c = "img") state.children_in_picture |> List.length in
154
170
if img_count > 1 then
155
-
report_disallowed_child "picture" "img" collector
171
+
report_disallowed_child "picture" "img" collector;
172
+
(* Check if always-matching source is followed by img with srcset *)
173
+
if state.has_always_matching_source && has_attr "srcset" attrs then
174
+
Message_collector.add_error collector
175
+
~message:"A \xe2\x80\x9csource\xe2\x80\x9d element that has a following sibling \xe2\x80\x9csource\xe2\x80\x9d element or \xe2\x80\x9cimg\xe2\x80\x9d element with a \xe2\x80\x9csrcset\xe2\x80\x9d attribute must have a \xe2\x80\x9cmedia\xe2\x80\x9d attribute and/or \xe2\x80\x9ctype\xe2\x80\x9d attribute."
176
+
~code:"always-matching-source-followed-by-srcset"
177
+
~element:"source" ()
156
178
157
179
| "script" when state.in_picture && state.picture_depth = 1 ->
158
180
state.children_in_picture <- "script" :: state.children_in_picture
···
168
190
169
191
(* Track depth when inside picture *)
170
192
if state.in_picture then
171
-
state.picture_depth <- state.picture_depth + 1
193
+
state.picture_depth <- state.picture_depth + 1;
194
+
195
+
(* Push to parent stack (only HTML namespace elements) *)
196
+
if namespace = None then
197
+
state.parent_stack <- name_lower :: state.parent_stack
172
198
173
199
let end_element state ~name ~namespace collector =
174
200
if namespace <> None then ()
···
197
223
~element:"source" ();
198
224
199
225
state.in_picture <- false
200
-
end
226
+
end;
227
+
228
+
(* Pop from parent stack *)
229
+
state.parent_stack <- (match state.parent_stack with
230
+
| _ :: rest -> rest
231
+
| [] -> [])
201
232
end
202
233
203
234
let characters state text collector =
+251
-49
lib/html5_checker/specialized/srcset_sizes_checker.ml
+251
-49
lib/html5_checker/specialized/srcset_sizes_checker.ml
···
54
54
Buffer.contents buf
55
55
56
56
(** Check if a size value has a valid CSS length unit and non-negative value *)
57
-
type size_check_result = Valid | InvalidUnit | NegativeValue
57
+
type size_check_result = Valid | InvalidUnit | NegativeValue | CssCommentInside | BadScientificNotation
58
+
59
+
(** Check if CSS comment appears in an invalid position:
60
+
- Between sign and number (+/**/50vw)
61
+
- Between number and unit (50/**/vw)
62
+
Trailing comments (50vw/**/) are valid. *)
63
+
let has_invalid_css_comment s =
64
+
let len = String.length s in
65
+
(* Find comment position *)
66
+
let rec find_comment i =
67
+
if i + 1 >= len then None
68
+
else if s.[i] = '/' && s.[i + 1] = '*' then Some i
69
+
else find_comment (i + 1)
70
+
in
71
+
match find_comment 0 with
72
+
| None -> false
73
+
| Some comment_pos ->
74
+
let before = String.sub s 0 comment_pos in
75
+
let trimmed_before = String.trim before in
76
+
if String.length trimmed_before = 0 then false (* Leading comment is OK *)
77
+
else begin
78
+
(* Find end of comment *)
79
+
let rec find_end i =
80
+
if i + 1 >= len then len
81
+
else if s.[i] = '*' && s.[i + 1] = '/' then i + 2
82
+
else find_end (i + 1)
83
+
in
84
+
let end_pos = find_end (comment_pos + 2) in
85
+
let after = if end_pos < len then String.sub s end_pos (len - end_pos) else "" in
86
+
let trimmed_after = String.trim (strip_css_comments after) in
87
+
if trimmed_after = "" then false (* Trailing comment is OK *)
88
+
else begin
89
+
(* Comment is in the middle - check if it breaks a number/unit combo *)
90
+
let last = trimmed_before.[String.length trimmed_before - 1] in
91
+
(* Invalid if comment appears after +/- or after a digit (before more non-whitespace) *)
92
+
(last >= '0' && last <= '9') || last = '+' || last = '-' || last = '.'
93
+
end
94
+
end
95
+
96
+
(** Check if scientific notation has invalid exponent (like 1e+1.5 - decimal in exponent) *)
97
+
let has_invalid_scientific_notation s =
98
+
let lower = String.lowercase_ascii s in
99
+
(* Find 'e' for scientific notation *)
100
+
match String.index_opt lower 'e' with
101
+
| None -> false
102
+
| Some e_pos ->
103
+
(* Check if there's a decimal after the exponent sign *)
104
+
let after_e = String.sub lower (e_pos + 1) (String.length lower - e_pos - 1) in
105
+
let after_sign =
106
+
if String.length after_e > 0 && (after_e.[0] = '+' || after_e.[0] = '-') then
107
+
String.sub after_e 1 (String.length after_e - 1)
108
+
else after_e
109
+
in
110
+
String.contains after_sign '.'
58
111
59
112
let check_size_value size_value =
60
-
let trimmed = String.trim (strip_css_comments size_value) in
113
+
let trimmed = String.trim size_value in
61
114
if trimmed = "" then InvalidUnit
62
-
else if trimmed = "auto" then Valid (* "auto" is valid *)
115
+
(* Check for CSS comments inside numbers - this is invalid *)
116
+
else if has_invalid_css_comment trimmed then CssCommentInside
63
117
else begin
64
-
let lower = String.lowercase_ascii trimmed in
65
-
(* Check for invalid units first *)
66
-
let has_invalid = List.exists (fun unit ->
67
-
let len = String.length unit in
68
-
String.length lower > len &&
69
-
String.sub lower (String.length lower - len) len = unit
70
-
) invalid_size_units in
71
-
if has_invalid then InvalidUnit
118
+
(* Strip valid leading/trailing CSS comments for further checks *)
119
+
let value_no_comments = String.trim (strip_css_comments trimmed) in
120
+
(* Check for invalid scientific notation like 1e+1.5px *)
121
+
if has_invalid_scientific_notation value_no_comments then BadScientificNotation
122
+
(* "auto" is only valid with lazy loading, which requires checking the element context.
123
+
For general validation, treat "auto" alone as invalid in sizes. *)
124
+
else if String.lowercase_ascii value_no_comments = "auto" then InvalidUnit
125
+
else if value_no_comments = "" then InvalidUnit
72
126
else begin
73
-
(* Check for valid CSS length units *)
74
-
let has_valid_unit = List.exists (fun unit ->
127
+
let lower = String.lowercase_ascii value_no_comments in
128
+
(* Check for invalid units first *)
129
+
let has_invalid = List.exists (fun unit ->
75
130
let len = String.length unit in
76
131
String.length lower > len &&
77
132
String.sub lower (String.length lower - len) len = unit
78
-
) valid_length_units in
79
-
if has_valid_unit then begin
80
-
(* Check if it's negative (starts with - but not -0) *)
81
-
if String.length trimmed > 0 && trimmed.[0] = '-' then begin
82
-
(* Check if it's -0 which is valid *)
83
-
let after_minus = String.sub trimmed 1 (String.length trimmed - 1) in
84
-
let after_minus_stripped = String.trim (strip_css_comments after_minus) in
133
+
) invalid_size_units in
134
+
if has_invalid then InvalidUnit
135
+
else begin
136
+
(* Check for valid CSS length units *)
137
+
let has_valid_unit = List.exists (fun unit ->
138
+
let len = String.length unit in
139
+
String.length lower > len &&
140
+
String.sub lower (String.length lower - len) len = unit
141
+
) valid_length_units in
142
+
if has_valid_unit then begin
143
+
(* Check if it's negative (starts with - but not -0) *)
144
+
if String.length value_no_comments > 0 && value_no_comments.[0] = '-' then begin
145
+
(* Check if it's -0 which is valid *)
146
+
let after_minus = String.sub value_no_comments 1 (String.length value_no_comments - 1) in
147
+
try
148
+
let num_str = Str.global_replace (Str.regexp "[a-zA-Z]+$") "" after_minus in
149
+
let f = float_of_string num_str in
150
+
if f = 0.0 then Valid else NegativeValue
151
+
with _ -> NegativeValue
152
+
end else
153
+
Valid
154
+
end
155
+
(* Could be calc() or other CSS functions - allow those *)
156
+
else if String.contains value_no_comments '(' then Valid
157
+
else begin
158
+
(* Check if it's a zero value (0, -0, +0) - these are valid without units *)
159
+
let stripped =
160
+
let s = value_no_comments in
161
+
let s = if String.length s > 0 && (s.[0] = '+' || s.[0] = '-') then String.sub s 1 (String.length s - 1) else s in
162
+
s
163
+
in
164
+
(* Check if it's zero or a numeric value starting with 0 *)
85
165
try
86
-
let num_str = Str.global_replace (Str.regexp "[a-zA-Z]+$") "" after_minus_stripped in
87
-
let f = float_of_string num_str in
88
-
if f = 0.0 then Valid else NegativeValue
89
-
with _ -> NegativeValue
90
-
end else
91
-
Valid
92
-
end
93
-
(* Could be calc() or other CSS functions - allow those *)
94
-
else if String.contains trimmed '(' then Valid
95
-
else begin
96
-
(* Check if it's a zero value (0, -0, +0) - these are valid without units *)
97
-
let stripped =
98
-
let s = trimmed in
99
-
let s = if String.length s > 0 && (s.[0] = '+' || s.[0] = '-') then String.sub s 1 (String.length s - 1) else s in
100
-
s
101
-
in
102
-
(* Check if it's zero or a numeric value starting with 0 *)
103
-
try
104
-
let f = float_of_string stripped in
105
-
if f = 0.0 then Valid else InvalidUnit
106
-
with _ -> InvalidUnit
166
+
let f = float_of_string stripped in
167
+
if f = 0.0 then Valid else InvalidUnit
168
+
with _ -> InvalidUnit
169
+
end
107
170
end
108
171
end
109
172
end
···
111
174
let has_valid_size_unit size_value =
112
175
match check_size_value size_value with
113
176
| Valid -> true
114
-
| InvalidUnit | NegativeValue -> false
177
+
| InvalidUnit | NegativeValue | CssCommentInside | BadScientificNotation -> false
115
178
116
179
(** Check if a sizes entry has a media condition (starts with '(') *)
117
180
let has_media_condition entry =
118
181
let trimmed = String.trim entry in
119
182
String.length trimmed > 0 && trimmed.[0] = '('
120
183
184
+
(** Check if entry looks like it's trying to be a media condition but isn't properly formatted *)
185
+
let has_invalid_media_condition entry =
186
+
let trimmed = String.trim entry in
187
+
if String.length trimmed = 0 then None
188
+
else begin
189
+
let first_char = trimmed.[0] in
190
+
if first_char = '(' then begin
191
+
(* Check for bad content inside the media condition *)
192
+
let len = String.length trimmed in
193
+
let rec find_close_paren i depth =
194
+
if i >= len then None
195
+
else match trimmed.[i] with
196
+
| '(' -> find_close_paren (i + 1) (depth + 1)
197
+
| ')' -> if depth = 1 then Some i else find_close_paren (i + 1) (depth - 1)
198
+
| _ -> find_close_paren (i + 1) depth
199
+
in
200
+
match find_close_paren 0 0 with
201
+
| None -> Some "Unclosed media condition"
202
+
| Some close_pos ->
203
+
let inner = String.sub trimmed 1 (close_pos - 1) in
204
+
let inner_trimmed = String.trim inner in
205
+
(* Check for obviously invalid content like just numbers or curly braces *)
206
+
if String.length inner_trimmed > 0 then begin
207
+
let first_inner = inner_trimmed.[0] in
208
+
if first_inner >= '0' && first_inner <= '9' then
209
+
Some "Bad media condition: Parse Error"
210
+
else if String.contains inner_trimmed '}' || String.contains inner_trimmed '{' then
211
+
Some "Bad media condition: Parse Error"
212
+
else
213
+
None
214
+
end else
215
+
Some "Bad media condition: Parse Error"
216
+
end else begin
217
+
(* Check for bare "all" which is invalid *)
218
+
let lower = String.lowercase_ascii trimmed in
219
+
let parts = String.split_on_char ' ' lower |> List.filter (fun s -> s <> "") in
220
+
match parts with
221
+
| keyword :: _ when keyword = "all" ->
222
+
Some "Bad media condition: Parse Error"
223
+
| keyword :: _ when String.length keyword > 0 && not (keyword.[0] >= '0' && keyword.[0] <= '9') ->
224
+
(* Looks like a keyword without parens like "min-width:500px" *)
225
+
if String.contains keyword ':' then
226
+
Some "Bad media condition: Parse Error"
227
+
else
228
+
None
229
+
| _ -> None
230
+
end
231
+
end
232
+
121
233
(** Extract the size value from a sizes entry (after media condition if any) *)
122
234
let extract_size_value entry =
123
235
let trimmed = String.trim entry in
···
183
295
~code:"bad-sizes-value"
184
296
~element:element_name ~attribute:"sizes" ();
185
297
valid := false
298
+
end;
299
+
(* Check for multiple consecutive defaults (entries without media conditions) *)
300
+
let defaults_without_media = List.filter (fun e -> not (has_media_condition e)) non_empty_entries in
301
+
if List.length defaults_without_media > 1 then begin
302
+
Message_collector.add_error collector
303
+
~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad source size list: Multiple source sizes without media conditions." value element_name)
304
+
~code:"bad-sizes-value"
305
+
~element:element_name ~attribute:"sizes" ();
306
+
valid := false
186
307
end
187
308
end;
188
309
189
-
(* Validate each entry's size value has valid unit and is not negative *)
310
+
(* Validate each entry's media condition and size value *)
190
311
List.iter (fun entry ->
191
312
let trimmed = String.trim entry in
192
313
if trimmed <> "" then begin
314
+
(* Check for invalid media condition *)
315
+
(match has_invalid_media_condition trimmed with
316
+
| Some err_msg ->
317
+
Message_collector.add_error collector
318
+
~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad source size list: %s." value element_name err_msg)
319
+
~code:"bad-sizes-value"
320
+
~element:element_name ~attribute:"sizes" ();
321
+
valid := false
322
+
| None -> ());
323
+
193
324
let size_val = extract_size_value trimmed in
194
325
if size_val <> "" then begin
195
326
match check_size_value size_val with
···
200
331
~code:"bad-sizes-value"
201
332
~element:element_name ~attribute:"sizes" ();
202
333
valid := false
334
+
| CssCommentInside ->
335
+
Message_collector.add_error collector
336
+
~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad source size list: Bad CSS number token." value element_name)
337
+
~code:"bad-sizes-value"
338
+
~element:element_name ~attribute:"sizes" ();
339
+
valid := false
340
+
| BadScientificNotation ->
341
+
Message_collector.add_error collector
342
+
~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad source size list: Bad CSS number token." value element_name)
343
+
~code:"bad-sizes-value"
344
+
~element:element_name ~attribute:"sizes" ();
345
+
valid := false
203
346
| InvalidUnit ->
204
347
Message_collector.add_error collector
205
348
~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csizes\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad source size value." value element_name)
···
225
368
226
369
match last_char with
227
370
| 'w' ->
228
-
(* Width descriptor - must be positive integer *)
371
+
(* Width descriptor - must be positive integer, no leading + *)
372
+
let trimmed_desc = String.trim desc in
373
+
if String.length trimmed_desc > 0 && trimmed_desc.[0] = '+' then begin
374
+
Message_collector.add_error collector
375
+
~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Expected number without leading plus sign but found \xe2\x80\x9c%s\xe2\x80\x9d at \xe2\x80\x9c%s\xe2\x80\x9d." srcset_value element_name trimmed_desc srcset_value)
376
+
~code:"bad-srcset-value"
377
+
~element:element_name ~attribute:"srcset" ();
378
+
false
379
+
end else
229
380
(try
230
381
let n = int_of_string num_part in
231
382
if n <= 0 then begin
···
338
489
let entries = String.split_on_char ',' value in
339
490
let has_w_descriptor = ref false in
340
491
let has_x_descriptor = ref false in
492
+
let has_no_descriptor = ref false in (* Track if any entry has no descriptor *)
341
493
let seen_descriptors = Hashtbl.create 8 in (* Track seen descriptor values *)
342
494
343
495
(* Check for empty srcset *)
···
370
522
if entry <> "" then begin
371
523
(* Split entry into URL and optional descriptor *)
372
524
let parts = String.split_on_char ' ' entry |> List.filter (fun s -> s <> "") in
525
+
(* Check if URL is valid *)
526
+
let check_srcset_url url =
527
+
(* Special schemes that require host/content after :// *)
528
+
let special_schemes = ["http"; "https"; "ftp"; "ws"; "wss"] in
529
+
(* Check for scheme-only URL like "http:" *)
530
+
let url_lower = String.lowercase_ascii url in
531
+
List.iter (fun scheme ->
532
+
let scheme_colon = scheme ^ ":" in
533
+
if url_lower = scheme_colon then
534
+
Message_collector.add_error collector
535
+
~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Bad URL: Expected a slash (\"/\")." value element_name)
536
+
~code:"bad-srcset-url"
537
+
~element:element_name ~attribute:"srcset" ()
538
+
) special_schemes
539
+
in
373
540
match parts with
374
541
| [] -> ()
375
-
| [_url] ->
542
+
| [url] ->
543
+
check_srcset_url url;
376
544
(* URL only = implicit 1x descriptor - only flag if explicit 1x also seen *)
545
+
has_no_descriptor := true;
377
546
if Hashtbl.mem seen_descriptors "explicit-1x" then begin
378
547
Message_collector.add_error collector
379
548
~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: Duplicate descriptor." value element_name)
···
381
550
~element:element_name ~attribute:"srcset" ()
382
551
end else
383
552
Hashtbl.add seen_descriptors "implicit-1x" true
384
-
| _url :: desc :: rest ->
553
+
| url :: desc :: rest ->
554
+
(* Check URL for broken schemes *)
555
+
check_srcset_url url;
385
556
(* Check for extra junk - multiple descriptors are not allowed *)
386
557
if rest <> [] then begin
387
558
Message_collector.add_error collector
···
427
598
~code:"srcset-w-without-sizes"
428
599
~element:element_name ~attribute:"srcset" ();
429
600
601
+
(* Check: if sizes is present, all entries must have width descriptors *)
602
+
if has_sizes && !has_no_descriptor then
603
+
Message_collector.add_error collector
604
+
~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: No width specified for image. (When the \xe2\x80\x9csizes\xe2\x80\x9d attribute is present, all image candidate strings must specify a width.)" value element_name)
605
+
~code:"bad-srcset-value"
606
+
~element:element_name ~attribute:"srcset" ();
607
+
608
+
(* Check: if sizes is present and srcset uses x descriptors, that's an error *)
609
+
if has_sizes && !has_x_descriptor then
610
+
Message_collector.add_error collector
611
+
~message:(Printf.sprintf "Bad value \xe2\x80\x9c%s\xe2\x80\x9d for attribute \xe2\x80\x9csrcset\xe2\x80\x9d on element \xe2\x80\x9c%s\xe2\x80\x9d: When the \xe2\x80\x9csizes\xe2\x80\x9d attribute is present, all image candidate strings must specify a width." value element_name)
612
+
~code:"bad-srcset-value"
613
+
~element:element_name ~attribute:"srcset" ();
614
+
430
615
(* Check for mixing w and x descriptors *)
431
616
if !has_w_descriptor && !has_x_descriptor then
432
617
Message_collector.add_error collector
···
435
620
~element:element_name ~attribute:"srcset" ()
436
621
437
622
let start_element _state ~name ~namespace ~attrs collector =
623
+
let name_lower = String.lowercase_ascii name in
624
+
625
+
(* SVG image elements should not have srcset *)
626
+
if namespace <> None && name_lower = "image" then begin
627
+
if get_attr "srcset" attrs <> None then
628
+
Message_collector.add_error collector
629
+
~message:"Attribute \xe2\x80\x9csrcset\xe2\x80\x9d not allowed on element \xe2\x80\x9cimage\xe2\x80\x9d at this point."
630
+
~code:"disallowed-attribute"
631
+
~element:"image" ~attribute:"srcset" ()
632
+
end;
633
+
438
634
if namespace <> None then ()
439
635
else begin
440
-
let name_lower = String.lowercase_ascii name in
441
-
442
636
(* Check sizes and srcset on img and source *)
443
637
if name_lower = "img" || name_lower = "source" then begin
444
638
let sizes_value = get_attr "sizes" attrs in
445
639
let srcset_value = get_attr "srcset" attrs in
446
640
let has_sizes = sizes_value <> None in
641
+
let has_srcset = srcset_value <> None in
447
642
448
643
(* Validate sizes if present *)
449
644
(match sizes_value with
···
453
648
(* Validate srcset if present *)
454
649
(match srcset_value with
455
650
| Some v -> validate_srcset v name_lower has_sizes collector
456
-
| None -> ())
651
+
| None -> ());
652
+
653
+
(* Error: sizes without srcset on img *)
654
+
if name_lower = "img" && has_sizes && not has_srcset then
655
+
Message_collector.add_error collector
656
+
~message:"The \xe2\x80\x9csizes\xe2\x80\x9d attribute must only be specified if the \xe2\x80\x9csrcset\xe2\x80\x9d attribute is also specified."
657
+
~code:"sizes-without-srcset"
658
+
~element:name_lower ~attribute:"sizes" ()
457
659
end
458
660
end
459
661
+41
-3
lib/html5_checker/specialized/url_checker.ml
+41
-3
lib/html5_checker/specialized/url_checker.ml
···
707
707
if namespace <> None then ()
708
708
else begin
709
709
let name_lower = String.lowercase_ascii name in
710
-
match List.assoc_opt name_lower url_attributes with
710
+
(* Check URL attributes for elements that have them *)
711
+
(match List.assoc_opt name_lower url_attributes with
711
712
| None -> ()
712
713
| Some url_attrs ->
713
714
List.iter (fun attr_name ->
···
735
736
~element:name
736
737
~attribute:attr_name
737
738
()
738
-
) url_attrs;
739
+
) url_attrs);
739
740
(* Special handling for input[type=url] value attribute - must be absolute URL *)
740
741
if name_lower = "input" then begin
741
742
let type_attr = get_attr_value "type" attrs in
···
759
760
~attribute:"value"
760
761
()
761
762
| Some _ ->
763
+
(* Check for data: URI with fragment - emit warning *)
764
+
(match check_data_uri_fragment url "value" name with
765
+
| Some warn_msg ->
766
+
Message_collector.add_warning collector
767
+
~message:warn_msg
768
+
~code:"data-uri-fragment"
769
+
~element:name
770
+
~attribute:"value"
771
+
()
772
+
| None -> ());
762
773
(* Has a scheme - do regular URL validation with "absolute URL" prefix *)
763
774
match validate_url url name "value" with
764
775
| None -> ()
···
773
784
()
774
785
end
775
786
end
776
-
end
787
+
end;
788
+
(* Check microdata itemtype and itemid attributes for data: URI fragments *)
789
+
let itemtype_opt = get_attr_value "itemtype" attrs in
790
+
(match itemtype_opt with
791
+
| Some url when String.trim url <> "" ->
792
+
(match check_data_uri_fragment url "itemtype" name with
793
+
| Some warn_msg ->
794
+
Message_collector.add_warning collector
795
+
~message:warn_msg
796
+
~code:"data-uri-fragment"
797
+
~element:name
798
+
~attribute:"itemtype"
799
+
()
800
+
| None -> ())
801
+
| _ -> ());
802
+
let itemid_opt = get_attr_value "itemid" attrs in
803
+
(match itemid_opt with
804
+
| Some url when String.trim url <> "" ->
805
+
(match check_data_uri_fragment url "itemid" name with
806
+
| Some warn_msg ->
807
+
Message_collector.add_warning collector
808
+
~message:warn_msg
809
+
~code:"data-uri-fragment"
810
+
~element:name
811
+
~attribute:"itemid"
812
+
()
813
+
| None -> ())
814
+
| _ -> ())
777
815
end
778
816
779
817
let end_element _state ~name:_ ~namespace:_ _collector = ()