+4
bin/html5check/dune
+4
bin/html5check/dune
+168
bin/html5check/html5check.ml
+168
bin/html5check/html5check.ml
···
1
+
(*---------------------------------------------------------------------------
2
+
Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
3
+
SPDX-License-Identifier: MIT
4
+
---------------------------------------------------------------------------*)
5
+
6
+
(** html5check - HTML5 conformance checker CLI
7
+
8
+
Command line interface for validating HTML5 documents. *)
9
+
10
+
open Cmdliner
11
+
12
+
let version = "0.1.0"
13
+
14
+
(** Exit codes *)
15
+
module Exit_code = struct
16
+
let ok = Cmd.Exit.ok
17
+
let validation_errors = 1
18
+
let io_error = 2
19
+
end
20
+
21
+
(** Read input from file or stdin *)
22
+
let read_input file =
23
+
try
24
+
let ic =
25
+
if file = "-" then stdin
26
+
else open_in file
27
+
in
28
+
let reader = Bytesrw.Bytes.Reader.of_in_channel ic in
29
+
Ok (reader, ic, file)
30
+
with
31
+
| Sys_error msg ->
32
+
Error (`Io_error (Printf.sprintf "Cannot read file '%s': %s" file msg))
33
+
34
+
(** Format output based on the requested format *)
35
+
let format_output format result =
36
+
match format with
37
+
| `Text -> Html5_checker.format_text result
38
+
| `Json -> Html5_checker.format_json result
39
+
| `Gnu -> Html5_checker.format_gnu result
40
+
41
+
(** Run the validation *)
42
+
let run format errors_only exit_zero quiet verbose file =
43
+
match read_input file with
44
+
| Error (`Io_error msg) ->
45
+
if not quiet then Printf.eprintf "Error: %s\n" msg;
46
+
Exit_code.io_error
47
+
| Ok (reader, ic, system_id) ->
48
+
(* Run validation *)
49
+
let result = Html5_checker.check ~system_id reader in
50
+
51
+
(* Close input if it's not stdin *)
52
+
if file <> "-" then close_in ic;
53
+
54
+
(* Get messages based on filtering *)
55
+
let messages =
56
+
if errors_only then Html5_checker.errors result
57
+
else Html5_checker.messages result
58
+
in
59
+
60
+
(* Output based on mode *)
61
+
if quiet then begin
62
+
(* Only show counts *)
63
+
let error_count = List.length (Html5_checker.errors result) in
64
+
let warning_count = List.length (Html5_checker.warnings result) in
65
+
if errors_only then
66
+
Printf.printf "%d error%s\n" error_count (if error_count = 1 then "" else "s")
67
+
else
68
+
Printf.printf "%d error%s, %d warning%s\n"
69
+
error_count (if error_count = 1 then "" else "s")
70
+
warning_count (if warning_count = 1 then "" else "s")
71
+
end else begin
72
+
(* Format and print messages *)
73
+
let output = format_output format result in
74
+
if output <> "" then print_string output;
75
+
76
+
(* Show summary if verbose *)
77
+
if verbose && messages <> [] then begin
78
+
let error_count = List.length (Html5_checker.errors result) in
79
+
let warning_count = List.length (Html5_checker.warnings result) in
80
+
Printf.eprintf "\nSummary: %d error%s, %d warning%s\n"
81
+
error_count (if error_count = 1 then "" else "s")
82
+
warning_count (if warning_count = 1 then "" else "s")
83
+
end
84
+
end;
85
+
86
+
(* Determine exit code *)
87
+
if exit_zero || not (Html5_checker.has_errors result) then
88
+
Exit_code.ok
89
+
else
90
+
Exit_code.validation_errors
91
+
92
+
(** Command line argument definitions *)
93
+
94
+
let format_arg =
95
+
let formats = [("text", `Text); ("json", `Json); ("gnu", `Gnu)] in
96
+
let doc =
97
+
"Output format. $(docv) must be one of $(b,text) (human-readable, default), \
98
+
$(b,json) (Nu validator compatible JSON), or $(b,gnu) (GNU-style for IDE integration)."
99
+
in
100
+
Arg.(value & opt (enum formats) `Text & info ["format"] ~docv:"FORMAT" ~doc)
101
+
102
+
let errors_only_arg =
103
+
let doc = "Only show errors (suppress warnings)." in
104
+
Arg.(value & flag & info ["errors-only"] ~doc)
105
+
106
+
let exit_zero_arg =
107
+
let doc =
108
+
"Always exit with status code 0, even if validation errors are found. \
109
+
Useful for CI pipelines where you want to collect validation results \
110
+
but not fail the build."
111
+
in
112
+
Arg.(value & flag & info ["exit-zero"] ~doc)
113
+
114
+
let quiet_arg =
115
+
let doc = "Quiet mode - only show error and warning counts, no details." in
116
+
Arg.(value & flag & info ["q"; "quiet"] ~doc)
117
+
118
+
let verbose_arg =
119
+
let doc = "Verbose mode - show additional information including summary." in
120
+
Arg.(value & flag & info ["v"; "verbose"] ~doc)
121
+
122
+
let file_arg =
123
+
let doc =
124
+
"HTML file to validate. Use $(b,-) to read from standard input. \
125
+
If no file is specified, reads from stdin."
126
+
in
127
+
Arg.(value & pos 0 string "-" & info [] ~docv:"FILE" ~doc)
128
+
129
+
let cmd =
130
+
let doc = "validate HTML5 documents for conformance" in
131
+
let man = [
132
+
`S Manpage.s_description;
133
+
`P "$(tname) validates HTML5 documents against the WHATWG HTML5 specification. \
134
+
It reports parse errors, structural validation issues, and conformance problems.";
135
+
`P "The validator checks for:";
136
+
`I ("Parse errors", "Malformed HTML syntax according to the WHATWG specification");
137
+
`I ("Content model violations", "Elements in invalid parent/child relationships");
138
+
`I ("Attribute errors", "Invalid or missing required attributes");
139
+
`I ("Structural issues", "Other conformance problems");
140
+
`S Manpage.s_options;
141
+
`S "OUTPUT FORMATS";
142
+
`P "The validator supports three output formats:";
143
+
`I ("$(b,text)", "Human-readable format showing file:line:col: severity: message");
144
+
`I ("$(b,json)", "JSON format compatible with the Nu Html Checker (v.Nu)");
145
+
`I ("$(b,gnu)", "GNU-style format for IDE integration (file:line:column: message)");
146
+
`S "EXIT STATUS";
147
+
`P "The validator exits with one of the following status codes:";
148
+
`I ("0", "No validation errors found (or --exit-zero was specified)");
149
+
`I ("1", "Validation errors were found");
150
+
`I ("2", "File not found or I/O error");
151
+
`S Manpage.s_examples;
152
+
`P "Validate a file:";
153
+
`Pre " $(mname) index.html";
154
+
`P "Validate from stdin:";
155
+
`Pre " cat page.html | $(mname) -";
156
+
`P "Show only errors in JSON format:";
157
+
`Pre " $(mname) --format=json --errors-only page.html";
158
+
`P "Quiet mode for CI:";
159
+
`Pre " $(mname) --quiet --exit-zero index.html";
160
+
`S Manpage.s_bugs;
161
+
`P "Report bugs at https://tangled.org/@anil.recoil.org/ocaml-html5rw/issues";
162
+
] in
163
+
let info = Cmd.info "html5check" ~version ~doc ~man in
164
+
Cmd.v info Term.(const run $ format_arg $ errors_only_arg $ exit_zero_arg
165
+
$ quiet_arg $ verbose_arg $ file_arg)
166
+
167
+
let main () = Cmd.eval' cmd
168
+
let () = Stdlib.exit (main ())
+2
-1
dune-project
+2
-1
dune-project
+1
html5rw.opam
+1
html5rw.opam
+39
lib/html5_checker/checker.ml
+39
lib/html5_checker/checker.ml
···
1
+
(** Base checker module for HTML5 conformance checking. *)
2
+
3
+
module type S = sig
4
+
type state
5
+
6
+
val create : unit -> state
7
+
val reset : state -> unit
8
+
9
+
val start_element :
10
+
state ->
11
+
name:string ->
12
+
namespace:string option ->
13
+
attrs:(string * string) list ->
14
+
Message_collector.t ->
15
+
unit
16
+
17
+
val end_element :
18
+
state -> name:string -> namespace:string option -> Message_collector.t -> unit
19
+
20
+
val characters : state -> string -> Message_collector.t -> unit
21
+
val end_document : state -> Message_collector.t -> unit
22
+
end
23
+
24
+
type t = (module S)
25
+
26
+
(** No-operation checker implementation. *)
27
+
module Noop = struct
28
+
type state = unit
29
+
30
+
let create () = ()
31
+
let reset () = ()
32
+
33
+
let start_element () ~name:_ ~namespace:_ ~attrs:_ _ = ()
34
+
let end_element () ~name:_ ~namespace:_ _ = ()
35
+
let characters () _ _ = ()
36
+
let end_document () _ = ()
37
+
end
38
+
39
+
let noop () = (module Noop : S)
+177
lib/html5_checker/checker.mli
+177
lib/html5_checker/checker.mli
···
1
+
(** Base checker module for HTML5 conformance checking.
2
+
3
+
This module provides the core checker abstraction used throughout the
4
+
html5_checker library. A checker validates HTML5 documents by observing
5
+
DOM tree traversal events and emitting validation messages.
6
+
7
+
{2 Design Overview}
8
+
9
+
Checkers follow a SAX-like event model where they receive notifications
10
+
about elements, text, and document boundaries as a DOM tree is traversed.
11
+
This design allows for:
12
+
13
+
- {b Stateful validation}: Each checker maintains its own state across
14
+
multiple events
15
+
- {b Composability}: Multiple checkers can validate the same document
16
+
simultaneously
17
+
- {b Efficiency}: DOM traversal happens once regardless of checker count
18
+
19
+
{2 Checker Lifecycle}
20
+
21
+
A checker progresses through these phases:
22
+
23
+
1. {b Creation}: Initialize with {!create} to set up initial state
24
+
2. {b Traversal}: Receive {!start_element}, {!characters}, and
25
+
{!end_element} events as the DOM is walked
26
+
3. {b Completion}: Finalize validation with {!end_document}
27
+
4. {b Reset} (optional): Return to initial state with {!reset}
28
+
29
+
{3 Event Sequence}
30
+
31
+
For a document like [<p>Hello <b>world</b></p>], the event sequence is:
32
+
33
+
{v
34
+
start_element "p"
35
+
characters "Hello "
36
+
start_element "b"
37
+
characters "world"
38
+
end_element "b"
39
+
end_element "p"
40
+
end_document
41
+
v}
42
+
43
+
{2 First-Class Modules}
44
+
45
+
Checkers are represented as first-class modules implementing the {!S}
46
+
signature. This allows:
47
+
48
+
- Dynamic checker registration and discovery
49
+
- Heterogeneous collections of checkers
50
+
- Checker selection at runtime based on validation requirements
51
+
52
+
@see <https://v2.ocaml.org/manual/firstclassmodules.html>
53
+
OCaml manual: First-class modules
54
+
*)
55
+
56
+
(** {1 Module Signature} *)
57
+
58
+
(** The signature that all checker modules must implement.
59
+
60
+
A checker module maintains validation state and receives notifications
61
+
about DOM tree traversal events. *)
62
+
module type S = sig
63
+
(** The type of checker state.
64
+
65
+
This is an abstract type that holds the checker's internal validation
66
+
state. Different checkers will have different state representations
67
+
depending on what they need to track during validation. *)
68
+
type state
69
+
70
+
(** {1 Lifecycle Operations} *)
71
+
72
+
val create : unit -> state
73
+
(** [create ()] initializes a new checker state.
74
+
75
+
This function sets up the initial state needed for validation,
76
+
such as empty stacks for context tracking, counters, or lookup
77
+
tables. *)
78
+
79
+
val reset : state -> unit
80
+
(** [reset state] resets the checker to its initial state.
81
+
82
+
This allows reusing a checker for multiple documents without
83
+
reallocating. After reset, the checker behaves as if freshly
84
+
created with {!create}. *)
85
+
86
+
(** {1 DOM Traversal Events} *)
87
+
88
+
val start_element :
89
+
state ->
90
+
name:string ->
91
+
namespace:string option ->
92
+
attrs:(string * string) list ->
93
+
Message_collector.t ->
94
+
unit
95
+
(** [start_element state ~name ~namespace ~attrs collector] is called when
96
+
entering an element during DOM traversal.
97
+
98
+
@param state The checker state
99
+
@param name The element tag name (e.g., "div", "p", "span")
100
+
@param namespace The element namespace ([None] for HTML, [Some "svg"]
101
+
for SVG, [Some "mathml"] for MathML)
102
+
@param attrs The element's attributes as [(name, value)] pairs
103
+
@param collector The message collector for emitting validation messages
104
+
105
+
This is where checkers can validate:
106
+
- Whether the element is allowed in the current context
107
+
- Whether required attributes are present
108
+
- Whether attribute values are valid
109
+
- Whether the element opens a new validation context *)
110
+
111
+
val end_element :
112
+
state -> name:string -> namespace:string option -> Message_collector.t -> unit
113
+
(** [end_element state ~name ~namespace collector] is called when exiting
114
+
an element during DOM traversal.
115
+
116
+
@param state The checker state
117
+
@param name The element tag name
118
+
@param namespace The element namespace
119
+
@param collector The message collector for emitting validation messages
120
+
121
+
This is where checkers can:
122
+
- Pop validation contexts from stacks
123
+
- Validate that required child elements were present
124
+
- Emit messages about element-scoped validation rules *)
125
+
126
+
val characters : state -> string -> Message_collector.t -> unit
127
+
(** [characters state text collector] is called when text content is
128
+
encountered during DOM traversal.
129
+
130
+
@param state The checker state
131
+
@param text The text content
132
+
@param collector The message collector for emitting validation messages
133
+
134
+
This is where checkers can validate:
135
+
- Whether text is allowed in the current context
136
+
- Whether text content follows specific patterns
137
+
- Whether text matches expected formats *)
138
+
139
+
val end_document : state -> Message_collector.t -> unit
140
+
(** [end_document state collector] is called after the entire DOM tree has
141
+
been traversed.
142
+
143
+
@param state The checker state
144
+
@param collector The message collector for emitting validation messages
145
+
146
+
This is where checkers can:
147
+
- Emit messages about missing required elements
148
+
- Validate document-level constraints
149
+
- Check that all opened contexts were properly closed
150
+
- Report any accumulated validation failures *)
151
+
end
152
+
153
+
(** {1 Checker Values} *)
154
+
155
+
(** The type of a checker value.
156
+
157
+
This is a packed first-class module containing both the checker
158
+
implementation and its state. It enables storing heterogeneous
159
+
checkers in collections and passing them around dynamically. *)
160
+
type t = (module S)
161
+
162
+
(** {1 Built-in Checkers} *)
163
+
164
+
val noop : unit -> t
165
+
(** [noop ()] creates a no-operation checker that performs no validation.
166
+
167
+
This checker ignores all events and never emits messages. It is useful:
168
+
- As a placeholder in checker registries
169
+
- For testing checker infrastructure
170
+
- As a base for building new checkers
171
+
172
+
{b Example:}
173
+
{[
174
+
let checker = noop () in
175
+
(* Does nothing when walked over a DOM tree *)
176
+
]}
177
+
*)
+22
lib/html5_checker/checker_registry.ml
+22
lib/html5_checker/checker_registry.ml
···
1
+
(** Registry for HTML5 conformance checkers. *)
2
+
3
+
type t = (string, Checker.t) Hashtbl.t
4
+
5
+
let create () = Hashtbl.create 16
6
+
7
+
let default () =
8
+
(* In Phase 1, return an empty registry.
9
+
Built-in checkers will be added in later phases. *)
10
+
create ()
11
+
12
+
let register registry name checker = Hashtbl.replace registry name checker
13
+
14
+
let unregister registry name = Hashtbl.remove registry name
15
+
16
+
let get registry name = Hashtbl.find_opt registry name
17
+
18
+
let list_names registry =
19
+
Hashtbl.to_seq_keys registry |> List.of_seq
20
+
21
+
let all registry =
22
+
Hashtbl.to_seq_values registry |> List.of_seq
+156
lib/html5_checker/checker_registry.mli
+156
lib/html5_checker/checker_registry.mli
···
1
+
(** Registry for HTML5 conformance checkers.
2
+
3
+
This module provides a dynamic registry for managing collections of
4
+
checkers. It enables:
5
+
6
+
- {b Registration}: Add checkers under descriptive names
7
+
- {b Discovery}: Retrieve checkers by name or list all available ones
8
+
- {b Lifecycle management}: Register and unregister checkers at runtime
9
+
- {b Defaults}: Access a pre-configured set of built-in checkers
10
+
11
+
{2 Design Rationale}
12
+
13
+
The registry pattern separates checker implementation from checker usage.
14
+
Applications can:
15
+
16
+
1. Query available checkers to present options to users
17
+
2. Select specific checkers based on validation requirements
18
+
3. Add custom checkers without modifying library code
19
+
4. Share checker configurations across validation runs
20
+
21
+
{2 Usage Pattern}
22
+
23
+
{[
24
+
(* Start with default checkers *)
25
+
let reg = default () in
26
+
27
+
(* Add a custom checker *)
28
+
let my_checker = (module MyChecker : Checker.S) in
29
+
register reg "my-custom-check" my_checker;
30
+
31
+
(* List all available checkers *)
32
+
let names = list_names reg in
33
+
List.iter (Printf.printf "Available: %s\n") names;
34
+
35
+
(* Retrieve a specific checker *)
36
+
match get reg "my-custom-check" with
37
+
| Some checker -> (* Use the checker *)
38
+
| None -> (* Not found *)
39
+
40
+
(* Get all checkers for validation *)
41
+
let all_checkers = all reg in
42
+
(* Pass to dom_walker *)
43
+
]}
44
+
45
+
{2 Thread Safety}
46
+
47
+
This registry is not thread-safe. If shared across threads, external
48
+
synchronization is required. *)
49
+
50
+
(** {1 Types} *)
51
+
52
+
(** The type of a checker registry.
53
+
54
+
This is an opaque type representing a mutable collection of named
55
+
checkers. Internally implemented as a hash table for efficient lookups. *)
56
+
type t
57
+
58
+
(** {1 Creation} *)
59
+
60
+
val create : unit -> t
61
+
(** [create ()] creates a new empty checker registry.
62
+
63
+
Use this when you want to build a custom set of checkers from scratch,
64
+
without any defaults. *)
65
+
66
+
val default : unit -> t
67
+
(** [default ()] creates a registry with built-in checkers.
68
+
69
+
The default registry is initially empty but serves as a starting point
70
+
for adding standard validation checkers in future phases.
71
+
72
+
Built-in checkers will include:
73
+
- Document structure validation
74
+
- Attribute validation
75
+
- Content model checking
76
+
- Accessibility checks
77
+
78
+
Note: In Phase 1, the default registry is empty. Built-in checkers
79
+
will be added in subsequent phases. *)
80
+
81
+
(** {1 Registration} *)
82
+
83
+
val register : t -> string -> Checker.t -> unit
84
+
(** [register registry name checker] adds a checker to the registry.
85
+
86
+
@param registry The registry to add to
87
+
@param name A unique identifier for the checker (e.g., "obsolete-elements",
88
+
"required-attributes")
89
+
@param checker The checker implementation
90
+
91
+
If a checker with the same name already exists, it is replaced.
92
+
93
+
{b Example:}
94
+
{[
95
+
let reg = create () in
96
+
let checker = (module MyChecker : Checker.S) in
97
+
register reg "my-check" checker
98
+
]} *)
99
+
100
+
val unregister : t -> string -> unit
101
+
(** [unregister registry name] removes a checker from the registry.
102
+
103
+
@param registry The registry to remove from
104
+
@param name The checker name
105
+
106
+
If no checker with the given name exists, this is a no-op. *)
107
+
108
+
(** {1 Retrieval} *)
109
+
110
+
val get : t -> string -> Checker.t option
111
+
(** [get registry name] retrieves a checker by name.
112
+
113
+
@param registry The registry to search
114
+
@param name The checker name
115
+
@return [Some checker] if found, [None] otherwise
116
+
117
+
{b Example:}
118
+
{[
119
+
match get reg "obsolete-elements" with
120
+
| Some checker -> (* Use checker *)
121
+
| None -> (* Checker not registered *)
122
+
]} *)
123
+
124
+
val list_names : t -> string list
125
+
(** [list_names registry] returns all registered checker names.
126
+
127
+
@param registry The registry to query
128
+
@return A list of all checker names in arbitrary order
129
+
130
+
This is useful for:
131
+
- Displaying available checkers to users
132
+
- Debugging registry contents
133
+
- Iterating over specific subsets of checkers
134
+
135
+
{b Example:}
136
+
{[
137
+
let names = list_names reg in
138
+
Printf.printf "Available checkers: %s\n"
139
+
(String.concat ", " names)
140
+
]} *)
141
+
142
+
val all : t -> Checker.t list
143
+
(** [all registry] returns all registered checkers.
144
+
145
+
@param registry The registry to query
146
+
@return A list of all checkers in arbitrary order
147
+
148
+
This is the primary way to retrieve checkers for validation.
149
+
Pass the result to {!Dom_walker.walk_all} to run all registered
150
+
checkers on a DOM tree.
151
+
152
+
{b Example:}
153
+
{[
154
+
let checkers = all reg in
155
+
Dom_walker.walk_all checkers collector dom
156
+
]} *)
+25
lib/html5_checker/content_model/attr_spec.ml
+25
lib/html5_checker/content_model/attr_spec.ml
···
1
+
type t = {
2
+
name : string;
3
+
required : bool;
4
+
datatype : string option;
5
+
enum_values : string list option;
6
+
deprecated : bool;
7
+
}
8
+
9
+
let make name ?(required = false) ?datatype ?enum_values ?(deprecated = false) () =
10
+
{ name; required; datatype; enum_values; deprecated }
11
+
12
+
let pp fmt t =
13
+
Format.fprintf fmt "@[<v 2>{ name = %S;@ required = %b;@ " t.name t.required;
14
+
(match t.datatype with
15
+
| None -> Format.fprintf fmt "datatype = None;@ "
16
+
| Some dt -> Format.fprintf fmt "datatype = Some %S;@ " dt);
17
+
(match t.enum_values with
18
+
| None -> Format.fprintf fmt "enum_values = None;@ "
19
+
| Some vals ->
20
+
Format.fprintf fmt "enum_values = Some [%a];@ "
21
+
(Format.pp_print_list
22
+
~pp_sep:(fun fmt () -> Format.fprintf fmt "; ")
23
+
(fun fmt s -> Format.fprintf fmt "%S" s))
24
+
vals);
25
+
Format.fprintf fmt "deprecated = %b }@]" t.deprecated
+20
lib/html5_checker/content_model/attr_spec.mli
+20
lib/html5_checker/content_model/attr_spec.mli
···
1
+
(** HTML5 attribute specifications. *)
2
+
3
+
type t = {
4
+
name : string;
5
+
required : bool;
6
+
datatype : string option; (** Datatype name for validation *)
7
+
enum_values : string list option; (** Allowed values if enumerated *)
8
+
deprecated : bool;
9
+
}
10
+
11
+
val make :
12
+
string ->
13
+
?required:bool ->
14
+
?datatype:string ->
15
+
?enum_values:string list ->
16
+
?deprecated:bool ->
17
+
unit ->
18
+
t
19
+
20
+
val pp : Format.formatter -> t -> unit
+26
lib/html5_checker/content_model/attribute_spec.ml
+26
lib/html5_checker/content_model/attribute_spec.ml
···
1
+
type requirement = Required | Optional
2
+
3
+
type t = {
4
+
name : string;
5
+
requirement : requirement;
6
+
datatype : string option;
7
+
description : string;
8
+
}
9
+
10
+
let create ~name ?(requirement = Optional) ?datatype ~description () =
11
+
{ name; requirement; datatype; description }
12
+
13
+
let requirement_to_string = function
14
+
| Required -> "required"
15
+
| Optional -> "optional"
16
+
17
+
let to_string t =
18
+
let req = requirement_to_string t.requirement in
19
+
let dt =
20
+
match t.datatype with
21
+
| None -> ""
22
+
| Some d -> Printf.sprintf " (%s)" d
23
+
in
24
+
Printf.sprintf "%s [%s]%s: %s" t.name req dt t.description
25
+
26
+
let pp fmt t = Format.fprintf fmt "%s" (to_string t)
+40
lib/html5_checker/content_model/attribute_spec.mli
+40
lib/html5_checker/content_model/attribute_spec.mli
···
1
+
(** HTML5 attribute specifications.
2
+
3
+
Defines attribute requirements and constraints for HTML5 elements.
4
+
See https://html.spec.whatwg.org/multipage/indices.html#attributes-3 *)
5
+
6
+
type requirement =
7
+
| Required (** Attribute must be present *)
8
+
| Optional (** Attribute may be present *)
9
+
10
+
type t = {
11
+
name : string;
12
+
(** Attribute name *)
13
+
14
+
requirement : requirement;
15
+
(** Whether attribute is required or optional *)
16
+
17
+
datatype : string option;
18
+
(** Datatype validator name (e.g., "url", "integer", "boolean") *)
19
+
20
+
description : string;
21
+
(** Human-readable description *)
22
+
}
23
+
24
+
val create :
25
+
name:string ->
26
+
?requirement:requirement ->
27
+
?datatype:string ->
28
+
description:string ->
29
+
unit ->
30
+
t
31
+
(** Create an attribute specification.
32
+
33
+
@param name Attribute name
34
+
@param requirement Whether required or optional (default: Optional)
35
+
@param datatype Datatype validator name
36
+
@param description Human-readable description *)
37
+
38
+
val to_string : t -> string
39
+
40
+
val pp : Format.formatter -> t -> unit
+39
lib/html5_checker/content_model/category.ml
+39
lib/html5_checker/content_model/category.ml
···
1
+
type t =
2
+
| Metadata
3
+
| Flow
4
+
| Sectioning
5
+
| Heading
6
+
| Phrasing
7
+
| Embedded
8
+
| Interactive
9
+
| Palpable
10
+
| Script_supporting
11
+
| Form_associated
12
+
| Listed
13
+
| Labelable
14
+
| Submittable
15
+
| Resettable
16
+
| Autocapitalize_inheriting
17
+
| Transparent
18
+
19
+
let to_string = function
20
+
| Metadata -> "metadata"
21
+
| Flow -> "flow"
22
+
| Sectioning -> "sectioning"
23
+
| Heading -> "heading"
24
+
| Phrasing -> "phrasing"
25
+
| Embedded -> "embedded"
26
+
| Interactive -> "interactive"
27
+
| Palpable -> "palpable"
28
+
| Script_supporting -> "script-supporting"
29
+
| Form_associated -> "form-associated"
30
+
| Listed -> "listed"
31
+
| Labelable -> "labelable"
32
+
| Submittable -> "submittable"
33
+
| Resettable -> "resettable"
34
+
| Autocapitalize_inheriting -> "autocapitalize-inheriting"
35
+
| Transparent -> "transparent"
36
+
37
+
let compare (a : t) (b : t) = Stdlib.compare a b
38
+
39
+
let equal (a : t) (b : t) = a = b
+70
lib/html5_checker/content_model/category.mli
+70
lib/html5_checker/content_model/category.mli
···
1
+
(** HTML5 content categories.
2
+
3
+
This module defines the content categories used in HTML5 to classify elements
4
+
based on their characteristics and allowed contexts. Elements can belong to
5
+
multiple categories.
6
+
7
+
@see <https://html.spec.whatwg.org/multipage/dom.html#content-models> WHATWG HTML Specification *)
8
+
9
+
(** Content category type. *)
10
+
type t =
11
+
| Metadata
12
+
(** Metadata content sets up the presentation or behavior of the rest of
13
+
the content, or sets up the relationship of the document with other
14
+
documents, or conveys other "out of band" information. *)
15
+
| Flow
16
+
(** Most elements that are used in the body of documents and applications
17
+
are categorized as flow content. *)
18
+
| Sectioning
19
+
(** Sectioning content is content that defines the scope of headings and
20
+
footers. *)
21
+
| Heading
22
+
(** Heading content defines the heading of a section (whether explicitly
23
+
marked up using sectioning content elements, or implied by the heading
24
+
content itself). *)
25
+
| Phrasing
26
+
(** Phrasing content is the text of the document, as well as elements that
27
+
mark up that text at the intra-paragraph level. *)
28
+
| Embedded
29
+
(** Embedded content is content that imports another resource into the
30
+
document, or content from another vocabulary that is inserted into the
31
+
document. *)
32
+
| Interactive
33
+
(** Interactive content is content that is specifically intended for user
34
+
interaction. *)
35
+
| Palpable
36
+
(** As a general rule, elements whose content model allows any flow content
37
+
or phrasing content should have at least one node in its contents that
38
+
is palpable content and that does not have the hidden attribute specified. *)
39
+
| Script_supporting
40
+
(** Script-supporting elements are those that do not represent anything
41
+
themselves (i.e., they are not rendered), but are used to support scripts. *)
42
+
| Form_associated
43
+
(** Form-associated elements can have a form owner. *)
44
+
| Listed
45
+
(** Listed form-associated elements have a form attribute that can point
46
+
to a form element. *)
47
+
| Labelable
48
+
(** Labelable form-associated elements can be associated with label elements. *)
49
+
| Submittable
50
+
(** Submittable form-associated elements can be used for constructing the
51
+
entry list when a form element is submitted. *)
52
+
| Resettable
53
+
(** Resettable form-associated elements are affected when a form element
54
+
is reset. *)
55
+
| Autocapitalize_inheriting
56
+
(** Some elements inherit the autocapitalize attribute from their form owner. *)
57
+
| Transparent
58
+
(** Transparent content models adopt the content model of their parent
59
+
element. *)
60
+
61
+
(** {1 Predicates} *)
62
+
63
+
val to_string : t -> string
64
+
(** [to_string category] returns a string representation of the category. *)
65
+
66
+
val compare : t -> t -> int
67
+
(** [compare c1 c2] compares two categories for ordering. *)
68
+
69
+
val equal : t -> t -> bool
70
+
(** [equal c1 c2] returns [true] if the categories are equal. *)
+48
lib/html5_checker/content_model/content_category.ml
+48
lib/html5_checker/content_model/content_category.ml
···
1
+
type t =
2
+
| Metadata
3
+
| Flow
4
+
| Sectioning
5
+
| Heading
6
+
| Phrasing
7
+
| Embedded
8
+
| Interactive
9
+
| Palpable
10
+
| Script_supporting
11
+
12
+
let to_string = function
13
+
| Metadata -> "metadata"
14
+
| Flow -> "flow"
15
+
| Sectioning -> "sectioning"
16
+
| Heading -> "heading"
17
+
| Phrasing -> "phrasing"
18
+
| Embedded -> "embedded"
19
+
| Interactive -> "interactive"
20
+
| Palpable -> "palpable"
21
+
| Script_supporting -> "script-supporting"
22
+
23
+
let of_string = function
24
+
| "metadata" -> Some Metadata
25
+
| "flow" -> Some Flow
26
+
| "sectioning" -> Some Sectioning
27
+
| "heading" -> Some Heading
28
+
| "phrasing" -> Some Phrasing
29
+
| "embedded" -> Some Embedded
30
+
| "interactive" -> Some Interactive
31
+
| "palpable" -> Some Palpable
32
+
| "script-supporting" -> Some Script_supporting
33
+
| _ -> None
34
+
35
+
let pp fmt t = Format.fprintf fmt "%s" (to_string t)
36
+
37
+
let all =
38
+
[
39
+
Metadata;
40
+
Flow;
41
+
Sectioning;
42
+
Heading;
43
+
Phrasing;
44
+
Embedded;
45
+
Interactive;
46
+
Palpable;
47
+
Script_supporting;
48
+
]
+23
lib/html5_checker/content_model/content_category.mli
+23
lib/html5_checker/content_model/content_category.mli
···
1
+
(** HTML5 content categories.
2
+
3
+
Elements are categorized for determining valid parent-child relationships.
4
+
See https://html.spec.whatwg.org/multipage/dom.html#content-models *)
5
+
6
+
type t =
7
+
| Metadata (** head, title, meta, link, style, script, noscript, base *)
8
+
| Flow (** Most body elements *)
9
+
| Sectioning (** article, aside, nav, section *)
10
+
| Heading (** h1-h6, hgroup *)
11
+
| Phrasing (** Inline elements: a, em, strong, span, etc. *)
12
+
| Embedded (** img, video, audio, canvas, iframe, embed, object *)
13
+
| Interactive (** a (with href), button, input, select, textarea, details *)
14
+
| Palpable (** Elements with renderable content *)
15
+
| Script_supporting (** script, template *)
16
+
17
+
val to_string : t -> string
18
+
19
+
val of_string : string -> t option
20
+
21
+
val pp : Format.formatter -> t -> unit
22
+
23
+
val all : t list
+215
lib/html5_checker/content_model/content_checker.ml
+215
lib/html5_checker/content_model/content_checker.ml
···
1
+
type element_context = {
2
+
name : string;
3
+
spec : Element_spec.t;
4
+
children_count : int;
5
+
}
6
+
7
+
type state = {
8
+
registry : Element_registry.t;
9
+
mutable ancestor_stack : element_context list;
10
+
}
11
+
12
+
let create_with_registry ?(registry = Element_registry.default ()) _collector =
13
+
{ registry; ancestor_stack = [] }
14
+
15
+
let create () = create_with_registry (Message_collector.create ())
16
+
17
+
let reset state =
18
+
state.ancestor_stack <- []
19
+
20
+
(* Check if an element name matches a content model *)
21
+
let rec matches_content_model registry element_name content_model =
22
+
match content_model with
23
+
| Content_model.Nothing -> false
24
+
| Content_model.Text -> false (* Text, not element *)
25
+
| Content_model.Transparent -> true (* Inherits parent, allow for now *)
26
+
| Content_model.Categories cats -> (
27
+
match Element_registry.get registry element_name with
28
+
| None -> false
29
+
| Some spec ->
30
+
List.exists (fun cat -> Element_spec.has_category spec cat) cats)
31
+
| Content_model.Elements names ->
32
+
List.mem (String.lowercase_ascii element_name)
33
+
(List.map String.lowercase_ascii names)
34
+
| Content_model.Mixed cats -> (
35
+
match Element_registry.get registry element_name with
36
+
| None -> false
37
+
| Some spec ->
38
+
List.exists (fun cat -> Element_spec.has_category spec cat) cats)
39
+
| Content_model.One_or_more model -> matches_content_model registry element_name model
40
+
| Content_model.Zero_or_more model -> matches_content_model registry element_name model
41
+
| Content_model.Optional model -> matches_content_model registry element_name model
42
+
| Content_model.Sequence models ->
43
+
(* For sequences, allow any of the models for now (simplified) *)
44
+
List.exists (matches_content_model registry element_name) models
45
+
| Content_model.Choice models ->
46
+
List.exists (matches_content_model registry element_name) models
47
+
| Content_model.Except (model, excluded_cats) -> (
48
+
match Element_registry.get registry element_name with
49
+
| None -> matches_content_model registry element_name model
50
+
| Some spec ->
51
+
matches_content_model registry element_name model
52
+
&& not (List.exists (fun cat -> Element_spec.has_category spec cat) excluded_cats))
53
+
54
+
(* Check if text is allowed in a content model *)
55
+
let rec allows_text content_model =
56
+
match content_model with
57
+
| Content_model.Nothing -> false
58
+
| Content_model.Text -> true
59
+
| Content_model.Transparent -> true (* Inherits parent *)
60
+
| Content_model.Categories _ -> false (* Elements only *)
61
+
| Content_model.Elements _ -> false (* Specific elements only *)
62
+
| Content_model.Mixed _ -> true (* Text + elements *)
63
+
| Content_model.One_or_more model -> allows_text model
64
+
| Content_model.Zero_or_more model -> allows_text model
65
+
| Content_model.Optional model -> allows_text model
66
+
| Content_model.Sequence models -> List.exists allows_text models
67
+
| Content_model.Choice models -> List.exists allows_text models
68
+
| Content_model.Except (model, _) -> allows_text model
69
+
70
+
(* Check for prohibited ancestors *)
71
+
let check_prohibited_ancestors state name spec collector =
72
+
List.iter
73
+
(fun prohibited ->
74
+
if List.exists (fun ctx -> String.equal ctx.name prohibited) state.ancestor_stack then
75
+
Message_collector.add_error collector
76
+
~message:(Printf.sprintf "Element '%s' cannot be nested inside '%s'" name prohibited)
77
+
~code:"prohibited-ancestor"
78
+
~element:name
79
+
())
80
+
spec.Element_spec.prohibited_ancestors
81
+
82
+
(* Validate that a child element is allowed *)
83
+
let validate_child_element state child_name collector =
84
+
match state.ancestor_stack with
85
+
| [] ->
86
+
(* Root level - only html allowed *)
87
+
if not (String.equal (String.lowercase_ascii child_name) "html") then
88
+
Message_collector.add_error collector
89
+
~message:(Printf.sprintf "Element '%s' not allowed at document root (only 'html' allowed)" child_name)
90
+
~code:"invalid-root-element"
91
+
~element:child_name
92
+
()
93
+
| parent :: _ ->
94
+
let content_model = parent.spec.Element_spec.content_model in
95
+
if not (matches_content_model state.registry child_name content_model) then
96
+
Message_collector.add_error collector
97
+
~message:(Printf.sprintf
98
+
"Element '%s' not allowed as child of '%s' (content model: %s)"
99
+
child_name
100
+
parent.name
101
+
(Content_model.to_string content_model))
102
+
~code:"invalid-child-element"
103
+
~element:child_name
104
+
()
105
+
106
+
let start_element state ~name ~namespace:_ ~attrs:_ collector =
107
+
(* Look up element specification *)
108
+
let spec_opt = Element_registry.get state.registry name in
109
+
110
+
match spec_opt with
111
+
| None ->
112
+
(* Unknown element - emit warning *)
113
+
Message_collector.add_warning collector
114
+
~message:(Printf.sprintf "Unknown element '%s'" name)
115
+
~code:"unknown-element"
116
+
~element:name
117
+
()
118
+
| Some spec ->
119
+
(* Check prohibited ancestors *)
120
+
check_prohibited_ancestors state name spec collector;
121
+
122
+
(* Validate this element is allowed as child of parent *)
123
+
validate_child_element state name collector;
124
+
125
+
(* Push element context onto stack *)
126
+
let context = { name; spec; children_count = 0 } in
127
+
state.ancestor_stack <- context :: state.ancestor_stack
128
+
129
+
let end_element state ~name ~namespace:_ collector =
130
+
match state.ancestor_stack with
131
+
| [] ->
132
+
(* Unmatched closing tag *)
133
+
Message_collector.add_error collector
134
+
~message:(Printf.sprintf "Unmatched closing tag '%s'" name)
135
+
~code:"unmatched-closing-tag"
136
+
~element:name
137
+
()
138
+
| context :: rest ->
139
+
if not (String.equal context.name name) then
140
+
(* Mismatched tag *)
141
+
Message_collector.add_error collector
142
+
~message:(Printf.sprintf "Expected closing tag '%s' but got '%s'" context.name name)
143
+
~code:"mismatched-closing-tag"
144
+
~element:name
145
+
()
146
+
else (
147
+
(* Check if void element has children *)
148
+
if Element_spec.is_void context.spec && context.children_count > 0 then
149
+
Message_collector.add_error collector
150
+
~message:(Printf.sprintf "Void element '%s' must not have children" name)
151
+
~code:"void-element-has-children"
152
+
~element:name
153
+
();
154
+
155
+
(* Pop stack *)
156
+
state.ancestor_stack <- rest;
157
+
158
+
(* If there's a parent, increment its child count *)
159
+
match rest with
160
+
| [] -> ()
161
+
| parent :: rest_tail ->
162
+
let updated_parent = { parent with children_count = parent.children_count + 1 } in
163
+
state.ancestor_stack <- updated_parent :: rest_tail)
164
+
165
+
let characters state text collector =
166
+
(* Check if text is allowed in current context *)
167
+
match state.ancestor_stack with
168
+
| [] ->
169
+
(* Text at root level - only whitespace allowed *)
170
+
if not (String.trim text = "") then
171
+
Message_collector.add_error collector
172
+
~message:"Text content not allowed at document root"
173
+
~code:"text-at-root"
174
+
()
175
+
| parent :: rest ->
176
+
let content_model = parent.spec.Element_spec.content_model in
177
+
if not (allows_text content_model) then
178
+
(* Only report if non-whitespace text *)
179
+
if not (String.trim text = "") then
180
+
Message_collector.add_error collector
181
+
~message:(Printf.sprintf
182
+
"Text content not allowed in '%s' (content model: %s)"
183
+
parent.name
184
+
(Content_model.to_string content_model))
185
+
~code:"text-not-allowed"
186
+
~element:parent.name
187
+
()
188
+
else (
189
+
(* Text is allowed, increment child count *)
190
+
let updated_parent = { parent with children_count = parent.children_count + 1 } in
191
+
state.ancestor_stack <- updated_parent :: rest)
192
+
193
+
let end_document state collector =
194
+
(* Check for unclosed elements *)
195
+
List.iter
196
+
(fun context ->
197
+
Message_collector.add_error collector
198
+
~message:(Printf.sprintf "Unclosed element '%s'" context.name)
199
+
~code:"unclosed-element"
200
+
~element:context.name
201
+
())
202
+
state.ancestor_stack
203
+
204
+
(* Package as first-class module *)
205
+
let checker =
206
+
(module struct
207
+
type nonrec state = state
208
+
209
+
let create = create
210
+
let reset = reset
211
+
let start_element = start_element
212
+
let end_element = end_element
213
+
let characters = characters
214
+
let end_document = end_document
215
+
end : Checker.S)
+57
lib/html5_checker/content_model/content_checker.mli
+57
lib/html5_checker/content_model/content_checker.mli
···
1
+
(** Content model checker.
2
+
3
+
Validates that HTML elements conform to their content model specifications.
4
+
5
+
The content model checker performs structural validation of HTML documents
6
+
by ensuring that:
7
+
8
+
- Element children match the element's declared content model
9
+
- No prohibited ancestor relationships exist (e.g., no [<a>] inside [<a>])
10
+
- Void elements contain no children
11
+
- Required children are present where mandated
12
+
13
+
{2 Content Model Validation}
14
+
15
+
The checker validates content models by:
16
+
17
+
1. Looking up the element specification in the registry
18
+
2. Checking each child element or text node against the content model
19
+
3. Tracking the ancestor stack to detect prohibited relationships
20
+
4. Emitting appropriate errors or warnings for violations
21
+
22
+
{2 Usage Example}
23
+
24
+
{[
25
+
let checker = Content_checker.create (Message_collector.create ()) in
26
+
let module C = (val checker : Checker.S) in
27
+
let state = C.create () in
28
+
29
+
(* Walk the DOM tree *)
30
+
C.start_element state ~name:"div" ~namespace:None ~attrs:[] collector;
31
+
C.characters state "Hello, world!" collector;
32
+
C.end_element state ~name:"div" ~namespace:None collector;
33
+
C.end_document state collector
34
+
]}
35
+
*)
36
+
37
+
(** Include the standard checker signature. *)
38
+
include Checker.S
39
+
40
+
(** {1 Creation} *)
41
+
42
+
val create_with_registry : ?registry:Element_registry.t -> Message_collector.t -> state
43
+
(** [create_with_registry ?registry collector] creates a content checker with an
44
+
optional custom element registry.
45
+
46
+
If no registry is provided, uses {!Element_registry.default}.
47
+
48
+
@param registry Custom element registry (defaults to standard HTML5 elements)
49
+
@param collector Message collector for validation messages *)
50
+
51
+
(** {1 First-Class Module} *)
52
+
53
+
val checker : Checker.t
54
+
(** [checker] is the content checker packaged as a first-class module.
55
+
56
+
This allows the content checker to be used in checker registries and
57
+
other contexts that work with heterogeneous checker collections. *)
+60
lib/html5_checker/content_model/content_model.ml
+60
lib/html5_checker/content_model/content_model.ml
···
1
+
type t =
2
+
| Nothing
3
+
| Text
4
+
| Transparent
5
+
| Categories of Content_category.t list
6
+
| Elements of string list
7
+
| Mixed of Content_category.t list
8
+
| One_or_more of t
9
+
| Zero_or_more of t
10
+
| Optional of t
11
+
| Sequence of t list
12
+
| Choice of t list
13
+
| Except of t * Content_category.t list
14
+
15
+
let rec pp fmt = function
16
+
| Nothing -> Format.fprintf fmt "Nothing"
17
+
| Text -> Format.fprintf fmt "Text"
18
+
| Transparent -> Format.fprintf fmt "Transparent"
19
+
| Categories cats ->
20
+
Format.fprintf fmt "Categories [%a]"
21
+
(Format.pp_print_list
22
+
~pp_sep:(fun fmt () -> Format.fprintf fmt "; ")
23
+
Content_category.pp)
24
+
cats
25
+
| Elements elems ->
26
+
Format.fprintf fmt "Elements [%a]"
27
+
(Format.pp_print_list
28
+
~pp_sep:(fun fmt () -> Format.fprintf fmt "; ")
29
+
Format.pp_print_string)
30
+
elems
31
+
| Mixed cats ->
32
+
Format.fprintf fmt "Mixed [%a]"
33
+
(Format.pp_print_list
34
+
~pp_sep:(fun fmt () -> Format.fprintf fmt "; ")
35
+
Content_category.pp)
36
+
cats
37
+
| One_or_more t -> Format.fprintf fmt "One_or_more (%a)" pp t
38
+
| Zero_or_more t -> Format.fprintf fmt "Zero_or_more (%a)" pp t
39
+
| Optional t -> Format.fprintf fmt "Optional (%a)" pp t
40
+
| Sequence ts ->
41
+
Format.fprintf fmt "Sequence [%a]"
42
+
(Format.pp_print_list ~pp_sep:(fun fmt () -> Format.fprintf fmt "; ") pp)
43
+
ts
44
+
| Choice ts ->
45
+
Format.fprintf fmt "Choice [%a]"
46
+
(Format.pp_print_list ~pp_sep:(fun fmt () -> Format.fprintf fmt "; ") pp)
47
+
ts
48
+
| Except (t, cats) ->
49
+
Format.fprintf fmt "Except (%a, [%a])" pp t
50
+
(Format.pp_print_list
51
+
~pp_sep:(fun fmt () -> Format.fprintf fmt "; ")
52
+
Content_category.pp)
53
+
cats
54
+
55
+
let to_string t =
56
+
let buf = Buffer.create 256 in
57
+
let fmt = Format.formatter_of_buffer buf in
58
+
pp fmt t;
59
+
Format.pp_print_flush fmt ();
60
+
Buffer.contents buf
+21
lib/html5_checker/content_model/content_model.mli
+21
lib/html5_checker/content_model/content_model.mli
···
1
+
(** HTML5 element content models.
2
+
3
+
Defines what children an element can contain. *)
4
+
5
+
type t =
6
+
| Nothing (** No children allowed (void elements) *)
7
+
| Text (** Text only (no elements) *)
8
+
| Transparent (** Inherits parent's content model *)
9
+
| Categories of Content_category.t list (** Elements from categories *)
10
+
| Elements of string list (** Specific elements only *)
11
+
| Mixed of Content_category.t list (** Text + elements from categories *)
12
+
| One_or_more of t (** At least one child matching *)
13
+
| Zero_or_more of t (** Any number of children matching *)
14
+
| Optional of t (** Zero or one child matching *)
15
+
| Sequence of t list (** Ordered sequence *)
16
+
| Choice of t list (** Any one of *)
17
+
| Except of t * Content_category.t list (** t except categories *)
18
+
19
+
val pp : Format.formatter -> t -> unit
20
+
21
+
val to_string : t -> string
+43
lib/html5_checker/content_model/element_registry.ml
+43
lib/html5_checker/content_model/element_registry.ml
···
1
+
type t = (string, Element_spec.t) Hashtbl.t
2
+
3
+
let create () = Hashtbl.create 128
4
+
5
+
let register registry spec =
6
+
let name = String.lowercase_ascii spec.Element_spec.name in
7
+
Hashtbl.replace registry name spec
8
+
9
+
let get registry name =
10
+
let name = String.lowercase_ascii name in
11
+
Hashtbl.find_opt registry name
12
+
13
+
let list_names registry =
14
+
Hashtbl.to_seq_keys registry
15
+
|> List.of_seq
16
+
|> List.sort String.compare
17
+
18
+
let all registry =
19
+
Hashtbl.to_seq_values registry
20
+
|> List.of_seq
21
+
22
+
let default () =
23
+
let registry = create () in
24
+
25
+
(* Register document structure elements *)
26
+
List.iter (register registry) Elements_document.all;
27
+
28
+
(* Register text-level elements *)
29
+
List.iter (register registry) Elements_text.all;
30
+
31
+
(* Register form elements *)
32
+
List.iter (register registry) Elements_form.all;
33
+
34
+
(* Register embedded content elements *)
35
+
List.iter (register registry) Elements_embedded.all;
36
+
37
+
(* Register table elements *)
38
+
List.iter (register registry) Elements_table.all;
39
+
40
+
(* Register interactive elements *)
41
+
List.iter (register registry) Elements_interactive.all;
42
+
43
+
registry
+43
lib/html5_checker/content_model/element_registry.mli
+43
lib/html5_checker/content_model/element_registry.mli
···
1
+
(** Registry for HTML5 element specifications.
2
+
3
+
Provides fast lookup of element specs by name. *)
4
+
5
+
(** The type of an element registry. *)
6
+
type t
7
+
8
+
(** {1 Creation and Modification} *)
9
+
10
+
val create : unit -> t
11
+
(** [create ()] creates a new empty element registry. *)
12
+
13
+
val register : t -> Element_spec.t -> unit
14
+
(** [register registry spec] adds an element specification to the registry.
15
+
16
+
If an element with the same name already exists, it is replaced. *)
17
+
18
+
(** {1 Lookup} *)
19
+
20
+
val get : t -> string -> Element_spec.t option
21
+
(** [get registry name] looks up an element specification by tag name.
22
+
23
+
Returns [None] if the element is not registered. Tag names are
24
+
case-insensitive. *)
25
+
26
+
val list_names : t -> string list
27
+
(** [list_names registry] returns a sorted list of all registered element names. *)
28
+
29
+
val all : t -> Element_spec.t list
30
+
(** [all registry] returns all registered element specifications. *)
31
+
32
+
(** {1 Default Registry} *)
33
+
34
+
val default : unit -> t
35
+
(** [default ()] creates a registry pre-populated with all standard HTML5 elements.
36
+
37
+
The registry includes elements from:
38
+
- {!Elements_document} - Document structure and sectioning
39
+
- {!Elements_text} - Text-level semantics
40
+
- {!Elements_form} - Forms and input controls
41
+
- {!Elements_embedded} - Embedded content
42
+
- {!Elements_table} - Tables
43
+
- {!Elements_interactive} - Interactive elements *)
+62
lib/html5_checker/content_model/element_spec.ml
+62
lib/html5_checker/content_model/element_spec.ml
···
1
+
type t = {
2
+
name : string;
3
+
void : bool;
4
+
categories : Content_category.t list;
5
+
content_model : Content_model.t;
6
+
permitted_parents : string list option;
7
+
prohibited_ancestors : string list;
8
+
tag_omission : bool;
9
+
attrs : Attr_spec.t list;
10
+
implicit_aria_role : string option;
11
+
}
12
+
13
+
let make ~name ?(void = false) ?(categories = [])
14
+
?(content_model = Content_model.Nothing) ?permitted_parents
15
+
?(prohibited_ancestors = []) ?(tag_omission = false) ?(attrs = [])
16
+
?implicit_aria_role () =
17
+
{
18
+
name;
19
+
void;
20
+
categories;
21
+
content_model;
22
+
permitted_parents;
23
+
prohibited_ancestors;
24
+
tag_omission;
25
+
attrs;
26
+
implicit_aria_role;
27
+
}
28
+
29
+
let is_void t = t.void
30
+
31
+
let has_category t category = List.mem category t.categories
32
+
33
+
let pp fmt t =
34
+
Format.fprintf fmt "@[<v 2>{ name = %S;@ void = %b;@ " t.name t.void;
35
+
Format.fprintf fmt "categories = [%a];@ "
36
+
(Format.pp_print_list
37
+
~pp_sep:(fun fmt () -> Format.fprintf fmt "; ")
38
+
Content_category.pp)
39
+
t.categories;
40
+
Format.fprintf fmt "content_model = %a;@ " Content_model.pp t.content_model;
41
+
(match t.permitted_parents with
42
+
| None -> Format.fprintf fmt "permitted_parents = None;@ "
43
+
| Some parents ->
44
+
Format.fprintf fmt "permitted_parents = Some [%a];@ "
45
+
(Format.pp_print_list
46
+
~pp_sep:(fun fmt () -> Format.fprintf fmt "; ")
47
+
(fun fmt s -> Format.fprintf fmt "%S" s))
48
+
parents);
49
+
Format.fprintf fmt "prohibited_ancestors = [%a];@ "
50
+
(Format.pp_print_list
51
+
~pp_sep:(fun fmt () -> Format.fprintf fmt "; ")
52
+
(fun fmt s -> Format.fprintf fmt "%S" s))
53
+
t.prohibited_ancestors;
54
+
Format.fprintf fmt "tag_omission = %b;@ " t.tag_omission;
55
+
Format.fprintf fmt "attrs = [%a];@ "
56
+
(Format.pp_print_list
57
+
~pp_sep:(fun fmt () -> Format.fprintf fmt ";@ ")
58
+
Attr_spec.pp)
59
+
t.attrs;
60
+
(match t.implicit_aria_role with
61
+
| None -> Format.fprintf fmt "implicit_aria_role = None }@]"
62
+
| Some role -> Format.fprintf fmt "implicit_aria_role = Some %S }@]" role)
+35
lib/html5_checker/content_model/element_spec.mli
+35
lib/html5_checker/content_model/element_spec.mli
···
1
+
(** HTML5 element specifications.
2
+
3
+
Defines the complete specification for an HTML5 element including content
4
+
model, categories, and attributes. *)
5
+
6
+
type t = {
7
+
name : string;
8
+
void : bool; (** Is void element? *)
9
+
categories : Content_category.t list; (** What categories it belongs to *)
10
+
content_model : Content_model.t; (** What children are allowed *)
11
+
permitted_parents : string list option; (** Explicit parent restrictions *)
12
+
prohibited_ancestors : string list; (** Cannot appear inside these *)
13
+
tag_omission : bool; (** Can end tag be omitted? *)
14
+
attrs : Attr_spec.t list; (** Element-specific attributes *)
15
+
implicit_aria_role : string option; (** Default ARIA role *)
16
+
}
17
+
18
+
val make :
19
+
name:string ->
20
+
?void:bool ->
21
+
?categories:Content_category.t list ->
22
+
?content_model:Content_model.t ->
23
+
?permitted_parents:string list ->
24
+
?prohibited_ancestors:string list ->
25
+
?tag_omission:bool ->
26
+
?attrs:Attr_spec.t list ->
27
+
?implicit_aria_role:string ->
28
+
unit ->
29
+
t
30
+
31
+
val is_void : t -> bool
32
+
33
+
val has_category : t -> Content_category.t -> bool
34
+
35
+
val pp : Format.formatter -> t -> unit
+289
lib/html5_checker/content_model/elements_document.ml
+289
lib/html5_checker/content_model/elements_document.ml
···
1
+
(** Document structure elements *)
2
+
3
+
let html =
4
+
Element_spec.make ~name:"html"
5
+
~content_model:
6
+
(Content_model.Sequence
7
+
[
8
+
Content_model.Optional (Content_model.Elements [ "head" ]);
9
+
Content_model.Optional (Content_model.Elements [ "body" ]);
10
+
])
11
+
~implicit_aria_role:"document" ()
12
+
13
+
let head =
14
+
Element_spec.make ~name:"head" ~categories:[]
15
+
~content_model:
16
+
(Content_model.Zero_or_more (Content_model.Categories [ Metadata ]))
17
+
()
18
+
19
+
let body =
20
+
Element_spec.make ~name:"body" ~categories:[ Sectioning ]
21
+
~content_model:(Content_model.Categories [ Flow ]) ()
22
+
23
+
let title =
24
+
Element_spec.make ~name:"title" ~categories:[ Metadata ]
25
+
~content_model:Content_model.Text ()
26
+
27
+
let base =
28
+
Element_spec.make ~name:"base" ~void:true ~categories:[ Metadata ]
29
+
~content_model:Content_model.Nothing ()
30
+
31
+
let link =
32
+
Element_spec.make ~name:"link" ~void:true ~categories:[ Metadata ]
33
+
~content_model:Content_model.Nothing ()
34
+
35
+
let meta =
36
+
Element_spec.make ~name:"meta" ~void:true ~categories:[ Metadata ]
37
+
~content_model:Content_model.Nothing ()
38
+
39
+
let style =
40
+
Element_spec.make ~name:"style" ~categories:[ Metadata ]
41
+
~content_model:Content_model.Text ()
42
+
43
+
(** Sectioning elements *)
44
+
45
+
let article =
46
+
Element_spec.make ~name:"article" ~categories:[ Flow; Sectioning; Palpable ]
47
+
~content_model:(Content_model.Categories [ Flow ]) ()
48
+
49
+
let section =
50
+
Element_spec.make ~name:"section" ~categories:[ Flow; Sectioning; Palpable ]
51
+
~content_model:(Content_model.Categories [ Flow ]) ()
52
+
53
+
let nav =
54
+
Element_spec.make ~name:"nav" ~categories:[ Flow; Sectioning; Palpable ]
55
+
~content_model:(Content_model.Categories [ Flow ])
56
+
~implicit_aria_role:"navigation" ()
57
+
58
+
let aside =
59
+
Element_spec.make ~name:"aside" ~categories:[ Flow; Sectioning; Palpable ]
60
+
~content_model:(Content_model.Categories [ Flow ])
61
+
~implicit_aria_role:"complementary" ()
62
+
63
+
let h1 =
64
+
Element_spec.make ~name:"h1" ~categories:[ Flow; Heading; Palpable ]
65
+
~content_model:(Content_model.Categories [ Phrasing ])
66
+
~implicit_aria_role:"heading" ()
67
+
68
+
let h2 =
69
+
Element_spec.make ~name:"h2" ~categories:[ Flow; Heading; Palpable ]
70
+
~content_model:(Content_model.Categories [ Phrasing ])
71
+
~implicit_aria_role:"heading" ()
72
+
73
+
let h3 =
74
+
Element_spec.make ~name:"h3" ~categories:[ Flow; Heading; Palpable ]
75
+
~content_model:(Content_model.Categories [ Phrasing ])
76
+
~implicit_aria_role:"heading" ()
77
+
78
+
let h4 =
79
+
Element_spec.make ~name:"h4" ~categories:[ Flow; Heading; Palpable ]
80
+
~content_model:(Content_model.Categories [ Phrasing ])
81
+
~implicit_aria_role:"heading" ()
82
+
83
+
let h5 =
84
+
Element_spec.make ~name:"h5" ~categories:[ Flow; Heading; Palpable ]
85
+
~content_model:(Content_model.Categories [ Phrasing ])
86
+
~implicit_aria_role:"heading" ()
87
+
88
+
let h6 =
89
+
Element_spec.make ~name:"h6" ~categories:[ Flow; Heading; Palpable ]
90
+
~content_model:(Content_model.Categories [ Phrasing ])
91
+
~implicit_aria_role:"heading" ()
92
+
93
+
let hgroup =
94
+
Element_spec.make ~name:"hgroup" ~categories:[ Flow; Heading; Palpable ]
95
+
~content_model:
96
+
(Content_model.Sequence
97
+
[
98
+
Content_model.Zero_or_more
99
+
(Content_model.Choice
100
+
[
101
+
Content_model.Elements [ "p" ];
102
+
Content_model.Categories [ Heading ];
103
+
]);
104
+
])
105
+
~implicit_aria_role:"group" ()
106
+
107
+
let header =
108
+
Element_spec.make ~name:"header" ~categories:[ Flow; Palpable ]
109
+
~content_model:
110
+
(Content_model.Except
111
+
( Content_model.Categories [ Flow ],
112
+
[] (* no header/footer descendants *) ))
113
+
~prohibited_ancestors:[ "header"; "footer" ] ~implicit_aria_role:"banner" ()
114
+
115
+
let footer =
116
+
Element_spec.make ~name:"footer" ~categories:[ Flow; Palpable ]
117
+
~content_model:
118
+
(Content_model.Except
119
+
( Content_model.Categories [ Flow ],
120
+
[] (* no header/footer descendants *) ))
121
+
~prohibited_ancestors:[ "header"; "footer" ]
122
+
~implicit_aria_role:"contentinfo" ()
123
+
124
+
let address =
125
+
Element_spec.make ~name:"address" ~categories:[ Flow; Palpable ]
126
+
~content_model:
127
+
(Content_model.Except
128
+
( Content_model.Categories [ Flow ],
129
+
[ Heading; Sectioning ] (* no heading/sectioning content *) ))
130
+
~prohibited_ancestors:[ "address" ] ()
131
+
132
+
let main =
133
+
Element_spec.make ~name:"main" ~categories:[ Flow; Palpable ]
134
+
~content_model:(Content_model.Categories [ Flow ])
135
+
~implicit_aria_role:"main" ()
136
+
137
+
(** Grouping elements *)
138
+
139
+
let p =
140
+
Element_spec.make ~name:"p" ~categories:[ Flow; Palpable ]
141
+
~content_model:(Content_model.Categories [ Phrasing ]) ()
142
+
143
+
let hr =
144
+
Element_spec.make ~name:"hr" ~void:true ~categories:[ Flow ]
145
+
~content_model:Content_model.Nothing ~implicit_aria_role:"separator" ()
146
+
147
+
let pre =
148
+
Element_spec.make ~name:"pre" ~categories:[ Flow; Palpable ]
149
+
~content_model:(Content_model.Categories [ Phrasing ]) ()
150
+
151
+
let blockquote =
152
+
Element_spec.make ~name:"blockquote" ~categories:[ Flow; Palpable ]
153
+
~content_model:(Content_model.Categories [ Flow ]) ()
154
+
155
+
let ol =
156
+
Element_spec.make ~name:"ol" ~categories:[ Flow; Palpable ]
157
+
~content_model:(Content_model.Zero_or_more (Content_model.Elements [ "li" ]))
158
+
~implicit_aria_role:"list" ()
159
+
160
+
let ul =
161
+
Element_spec.make ~name:"ul" ~categories:[ Flow; Palpable ]
162
+
~content_model:(Content_model.Zero_or_more (Content_model.Elements [ "li" ]))
163
+
~implicit_aria_role:"list" ()
164
+
165
+
let menu =
166
+
Element_spec.make ~name:"menu" ~categories:[ Flow; Palpable ]
167
+
~content_model:
168
+
(Content_model.Zero_or_more
169
+
(Content_model.Choice
170
+
[
171
+
Content_model.Elements [ "li" ];
172
+
Content_model.Elements [ "script" ];
173
+
Content_model.Elements [ "template" ];
174
+
]))
175
+
~implicit_aria_role:"list" ()
176
+
177
+
let li =
178
+
Element_spec.make ~name:"li" ~categories:[]
179
+
~content_model:(Content_model.Categories [ Flow ])
180
+
~permitted_parents:[ "ol"; "ul"; "menu" ] ~implicit_aria_role:"listitem" ()
181
+
182
+
let dl =
183
+
Element_spec.make ~name:"dl" ~categories:[ Flow; Palpable ]
184
+
~content_model:
185
+
(Content_model.Choice
186
+
[
187
+
Content_model.Zero_or_more
188
+
(Content_model.Sequence
189
+
[
190
+
Content_model.One_or_more (Content_model.Elements [ "dt" ]);
191
+
Content_model.One_or_more (Content_model.Elements [ "dd" ]);
192
+
]);
193
+
Content_model.Zero_or_more
194
+
(Content_model.Choice
195
+
[
196
+
Content_model.Elements [ "div" ];
197
+
Content_model.Elements [ "script" ];
198
+
Content_model.Elements [ "template" ];
199
+
]);
200
+
])
201
+
()
202
+
203
+
let dt =
204
+
Element_spec.make ~name:"dt" ~categories:[]
205
+
~content_model:
206
+
(Content_model.Except
207
+
( Content_model.Categories [ Flow ],
208
+
[ Heading; Sectioning ] (* no heading/sectioning *) ))
209
+
~permitted_parents:[ "dl"; "div" ] ()
210
+
211
+
let dd =
212
+
Element_spec.make ~name:"dd" ~categories:[]
213
+
~content_model:(Content_model.Categories [ Flow ])
214
+
~permitted_parents:[ "dl"; "div" ] ()
215
+
216
+
let figure =
217
+
Element_spec.make ~name:"figure" ~categories:[ Flow; Palpable ]
218
+
~content_model:
219
+
(Content_model.Choice
220
+
[
221
+
Content_model.Sequence
222
+
[
223
+
Content_model.Optional
224
+
(Content_model.Elements [ "figcaption" ]);
225
+
Content_model.Categories [ Flow ];
226
+
];
227
+
Content_model.Sequence
228
+
[
229
+
Content_model.Categories [ Flow ];
230
+
Content_model.Optional
231
+
(Content_model.Elements [ "figcaption" ]);
232
+
];
233
+
])
234
+
~implicit_aria_role:"figure" ()
235
+
236
+
let figcaption =
237
+
Element_spec.make ~name:"figcaption" ~categories:[]
238
+
~content_model:(Content_model.Categories [ Flow ])
239
+
~permitted_parents:[ "figure" ] ()
240
+
241
+
let div =
242
+
Element_spec.make ~name:"div" ~categories:[ Flow; Palpable ]
243
+
~content_model:(Content_model.Categories [ Flow ]) ()
244
+
245
+
(** Element registry *)
246
+
247
+
let all =
248
+
[
249
+
(* Document structure *)
250
+
html;
251
+
head;
252
+
body;
253
+
title;
254
+
base;
255
+
link;
256
+
meta;
257
+
style;
258
+
(* Sectioning *)
259
+
article;
260
+
section;
261
+
nav;
262
+
aside;
263
+
h1;
264
+
h2;
265
+
h3;
266
+
h4;
267
+
h5;
268
+
h6;
269
+
hgroup;
270
+
header;
271
+
footer;
272
+
address;
273
+
main;
274
+
(* Grouping *)
275
+
p;
276
+
hr;
277
+
pre;
278
+
blockquote;
279
+
ol;
280
+
ul;
281
+
menu;
282
+
li;
283
+
dl;
284
+
dt;
285
+
dd;
286
+
figure;
287
+
figcaption;
288
+
div;
289
+
]
+147
lib/html5_checker/content_model/elements_document.mli
+147
lib/html5_checker/content_model/elements_document.mli
···
1
+
(** HTML5 structural and document element specifications.
2
+
3
+
This module defines element specifications for HTML5 document structure,
4
+
sectioning, and grouping elements according to the WHATWG HTML specification.
5
+
6
+
@see <https://html.spec.whatwg.org/multipage/> WHATWG HTML Specification *)
7
+
8
+
(** {1 Document structure elements} *)
9
+
10
+
val html : Element_spec.t
11
+
(** The [html] element represents the root of an HTML document. *)
12
+
13
+
val head : Element_spec.t
14
+
(** The [head] element represents a collection of metadata for the document. *)
15
+
16
+
val body : Element_spec.t
17
+
(** The [body] element represents the contents of the document. *)
18
+
19
+
val title : Element_spec.t
20
+
(** The [title] element represents the document's title or name. *)
21
+
22
+
val base : Element_spec.t
23
+
(** The [base] element specifies the document base URL and/or default browsing
24
+
context for navigation. *)
25
+
26
+
val link : Element_spec.t
27
+
(** The [link] element specifies relationships between the current document and
28
+
external resources. *)
29
+
30
+
val meta : Element_spec.t
31
+
(** The [meta] element represents various kinds of metadata that cannot be
32
+
expressed using other metadata elements. *)
33
+
34
+
val style : Element_spec.t
35
+
(** The [style] element allows authors to embed CSS style sheets in their documents. *)
36
+
37
+
(** {1 Sectioning elements} *)
38
+
39
+
val article : Element_spec.t
40
+
(** The [article] element represents a complete, or self-contained, composition
41
+
in a document, page, application, or site. *)
42
+
43
+
val section : Element_spec.t
44
+
(** The [section] element represents a generic section of a document or application. *)
45
+
46
+
val nav : Element_spec.t
47
+
(** The [nav] element represents a section of a page that links to other pages
48
+
or to parts within the page. *)
49
+
50
+
val aside : Element_spec.t
51
+
(** The [aside] element represents a section of a page that consists of content
52
+
that is tangentially related to the content around it. *)
53
+
54
+
val h1 : Element_spec.t
55
+
(** The [h1] element represents a heading at level 1. *)
56
+
57
+
val h2 : Element_spec.t
58
+
(** The [h2] element represents a heading at level 2. *)
59
+
60
+
val h3 : Element_spec.t
61
+
(** The [h3] element represents a heading at level 3. *)
62
+
63
+
val h4 : Element_spec.t
64
+
(** The [h4] element represents a heading at level 4. *)
65
+
66
+
val h5 : Element_spec.t
67
+
(** The [h5] element represents a heading at level 5. *)
68
+
69
+
val h6 : Element_spec.t
70
+
(** The [h6] element represents a heading at level 6. *)
71
+
72
+
val hgroup : Element_spec.t
73
+
(** The [hgroup] element represents a heading and related content, such as
74
+
subheadings, an alternative title, or a tagline. *)
75
+
76
+
val header : Element_spec.t
77
+
(** The [header] element represents introductory content for its nearest ancestor
78
+
sectioning content or sectioning root element. *)
79
+
80
+
val footer : Element_spec.t
81
+
(** The [footer] element represents a footer for its nearest ancestor sectioning
82
+
content or sectioning root element. *)
83
+
84
+
val address : Element_spec.t
85
+
(** The [address] element represents contact information for its nearest [article]
86
+
or [body] element ancestor. *)
87
+
88
+
val main : Element_spec.t
89
+
(** The [main] element represents the dominant contents of the document. *)
90
+
91
+
(** {1 Grouping elements} *)
92
+
93
+
val p : Element_spec.t
94
+
(** The [p] element represents a paragraph. *)
95
+
96
+
val hr : Element_spec.t
97
+
(** The [hr] element represents a thematic break between paragraph-level elements. *)
98
+
99
+
val pre : Element_spec.t
100
+
(** The [pre] element represents a block of preformatted text. *)
101
+
102
+
val blockquote : Element_spec.t
103
+
(** The [blockquote] element represents a section that is quoted from another source. *)
104
+
105
+
val ol : Element_spec.t
106
+
(** The [ol] element represents a list of items, where the items have been
107
+
intentionally ordered. *)
108
+
109
+
val ul : Element_spec.t
110
+
(** The [ul] element represents a list of items, where the order of the items
111
+
is not important. *)
112
+
113
+
val menu : Element_spec.t
114
+
(** The [menu] element represents a toolbar consisting of its contents, in the
115
+
form of an unordered list of items. *)
116
+
117
+
val li : Element_spec.t
118
+
(** The [li] element represents a list item. *)
119
+
120
+
val dl : Element_spec.t
121
+
(** The [dl] element represents an association list consisting of zero or more
122
+
name-value groups (a description list). *)
123
+
124
+
val dt : Element_spec.t
125
+
(** The [dt] element represents the term, or name, part of a term-description
126
+
group in a description list. *)
127
+
128
+
val dd : Element_spec.t
129
+
(** The [dd] element represents the description, definition, or value, part of
130
+
a term-description group in a description list. *)
131
+
132
+
val figure : Element_spec.t
133
+
(** The [figure] element represents some flow content, optionally with a caption,
134
+
that is self-contained and is typically referenced as a single unit from
135
+
the main flow of the document. *)
136
+
137
+
val figcaption : Element_spec.t
138
+
(** The [figcaption] element represents a caption or legend for the rest of the
139
+
contents of the parent [figure] element. *)
140
+
141
+
val div : Element_spec.t
142
+
(** The [div] element has no special meaning at all. It represents its children. *)
143
+
144
+
(** {1 Element registry} *)
145
+
146
+
val all : Element_spec.t list
147
+
(** [all] contains all element specifications defined in this module. *)
+220
lib/html5_checker/content_model/elements_embedded.ml
+220
lib/html5_checker/content_model/elements_embedded.ml
···
1
+
open Content_category
2
+
open Content_model
3
+
open Attr_spec
4
+
5
+
let picture =
6
+
Element_spec.make ~name:"picture"
7
+
~categories:[ Flow; Phrasing; Embedded; Palpable ]
8
+
~content_model:
9
+
(Sequence
10
+
[
11
+
Zero_or_more (Elements [ "source" ]);
12
+
Elements [ "img" ];
13
+
Zero_or_more (Elements [ "script"; "template" ]);
14
+
])
15
+
~attrs:[] ()
16
+
17
+
let source =
18
+
Element_spec.make ~name:"source" ~void:true
19
+
~categories:[]
20
+
~content_model:Nothing
21
+
~attrs:
22
+
[
23
+
make "type" ~datatype:"mime" ();
24
+
make "media" ~datatype:"media_query" ();
25
+
make "src" ~datatype:"url" ();
26
+
make "srcset" ~datatype:"srcset" ();
27
+
make "sizes" ();
28
+
make "width" ~datatype:"integer" ();
29
+
make "height" ~datatype:"integer" ();
30
+
]
31
+
()
32
+
33
+
let img =
34
+
Element_spec.make ~name:"img" ~void:true
35
+
~categories:[ Flow; Phrasing; Embedded; Palpable; Interactive ]
36
+
~content_model:Nothing
37
+
~attrs:
38
+
[
39
+
make "src" ~required:true ~datatype:"url" ();
40
+
make "alt" ();
41
+
make "srcset" ~datatype:"srcset" ();
42
+
make "sizes" ();
43
+
make "crossorigin" ~datatype:"crossorigin" ();
44
+
make "usemap" ~datatype:"hash" ();
45
+
make "ismap" ~datatype:"boolean" ();
46
+
make "width" ~datatype:"integer" ();
47
+
make "height" ~datatype:"integer" ();
48
+
make "referrerpolicy" ~datatype:"referrer" ();
49
+
make "decoding" ~datatype:"decoding" ();
50
+
make "loading" ~datatype:"loading" ();
51
+
make "fetchpriority" ~datatype:"fetchpriority" ();
52
+
]
53
+
~implicit_aria_role:"img" ()
54
+
55
+
let iframe =
56
+
Element_spec.make ~name:"iframe"
57
+
~categories:[ Flow; Phrasing; Embedded; Interactive; Palpable ]
58
+
~content_model:Nothing
59
+
~attrs:
60
+
[
61
+
make "src" ~datatype:"url" ();
62
+
make "srcdoc" ();
63
+
make "name" ~datatype:"target" ();
64
+
make "sandbox" ~datatype:"sandbox" ();
65
+
make "allow" ();
66
+
make "allowfullscreen" ~datatype:"boolean" ();
67
+
make "width" ~datatype:"integer" ();
68
+
make "height" ~datatype:"integer" ();
69
+
make "referrerpolicy" ~datatype:"referrer" ();
70
+
make "loading" ~datatype:"loading" ();
71
+
]
72
+
()
73
+
74
+
let embed =
75
+
Element_spec.make ~name:"embed" ~void:true
76
+
~categories:[ Flow; Phrasing; Embedded; Interactive; Palpable ]
77
+
~content_model:Nothing
78
+
~attrs:
79
+
[
80
+
make "src" ~required:true ~datatype:"url" ();
81
+
make "type" ~datatype:"mime" ();
82
+
make "width" ~datatype:"integer" ();
83
+
make "height" ~datatype:"integer" ();
84
+
]
85
+
()
86
+
87
+
let object_ =
88
+
Element_spec.make ~name:"object"
89
+
~categories:[ Flow; Phrasing; Embedded; Palpable; Interactive ]
90
+
~content_model:(Mixed [ Flow ])
91
+
~attrs:
92
+
[
93
+
make "data" ~datatype:"url" ();
94
+
make "type" ~datatype:"mime" ();
95
+
make "name" ~datatype:"target" ();
96
+
make "form" ~datatype:"id" ();
97
+
make "width" ~datatype:"integer" ();
98
+
make "height" ~datatype:"integer" ();
99
+
]
100
+
()
101
+
102
+
let param =
103
+
Element_spec.make ~name:"param" ~void:true
104
+
~categories:[]
105
+
~content_model:Nothing
106
+
~attrs:[ make "name" ~required:true (); make "value" ~required:true () ]
107
+
()
108
+
109
+
let video =
110
+
Element_spec.make ~name:"video"
111
+
~categories:[ Flow; Phrasing; Embedded; Palpable; Interactive ]
112
+
~content_model:
113
+
(Choice
114
+
[
115
+
Sequence
116
+
[
117
+
Zero_or_more (Elements [ "source" ]);
118
+
Zero_or_more (Elements [ "track" ]);
119
+
Zero_or_more Transparent;
120
+
];
121
+
Sequence [ Zero_or_more Transparent; Zero_or_more (Elements [ "track" ]) ];
122
+
])
123
+
~prohibited_ancestors:[ "audio"; "video" ]
124
+
~attrs:
125
+
[
126
+
make "src" ~datatype:"url" ();
127
+
make "crossorigin" ~datatype:"crossorigin" ();
128
+
make "poster" ~datatype:"url" ();
129
+
make "preload" ~datatype:"preload" ();
130
+
make "autoplay" ~datatype:"boolean" ();
131
+
make "playsinline" ~datatype:"boolean" ();
132
+
make "loop" ~datatype:"boolean" ();
133
+
make "muted" ~datatype:"boolean" ();
134
+
make "controls" ~datatype:"boolean" ();
135
+
make "width" ~datatype:"integer" ();
136
+
make "height" ~datatype:"integer" ();
137
+
]
138
+
()
139
+
140
+
let audio =
141
+
Element_spec.make ~name:"audio"
142
+
~categories:[ Flow; Phrasing; Embedded; Palpable; Interactive ]
143
+
~content_model:
144
+
(Choice
145
+
[
146
+
Sequence
147
+
[
148
+
Zero_or_more (Elements [ "source" ]);
149
+
Zero_or_more (Elements [ "track" ]);
150
+
Zero_or_more Transparent;
151
+
];
152
+
Sequence [ Zero_or_more Transparent; Zero_or_more (Elements [ "track" ]) ];
153
+
])
154
+
~prohibited_ancestors:[ "audio"; "video" ]
155
+
~attrs:
156
+
[
157
+
make "src" ~datatype:"url" ();
158
+
make "crossorigin" ~datatype:"crossorigin" ();
159
+
make "preload" ~datatype:"preload" ();
160
+
make "autoplay" ~datatype:"boolean" ();
161
+
make "loop" ~datatype:"boolean" ();
162
+
make "muted" ~datatype:"boolean" ();
163
+
make "controls" ~datatype:"boolean" ();
164
+
]
165
+
()
166
+
167
+
let track =
168
+
Element_spec.make ~name:"track" ~void:true
169
+
~categories:[]
170
+
~content_model:Nothing
171
+
~permitted_parents:[ "audio"; "video" ]
172
+
~attrs:
173
+
[
174
+
make "kind" ~datatype:"kind" ();
175
+
make "src" ~required:true ~datatype:"url" ();
176
+
make "srclang" ~datatype:"language" ();
177
+
make "label" ();
178
+
make "default" ~datatype:"boolean" ();
179
+
]
180
+
()
181
+
182
+
let map =
183
+
Element_spec.make ~name:"map"
184
+
~categories:[ Flow; Phrasing; Palpable ]
185
+
~content_model:Transparent
186
+
~attrs:[ make "name" ~required:true () ] ()
187
+
188
+
let area =
189
+
Element_spec.make ~name:"area" ~void:true
190
+
~categories:[ Flow; Phrasing ]
191
+
~content_model:Nothing
192
+
~attrs:
193
+
[
194
+
make "alt" ();
195
+
make "coords" ~datatype:"coords" ();
196
+
make "shape" ~datatype:"shape" ();
197
+
make "href" ~datatype:"url" ();
198
+
make "target" ~datatype:"target" ();
199
+
make "download" ();
200
+
make "ping" ();
201
+
make "rel" ~datatype:"link_type" ();
202
+
make "referrerpolicy" ~datatype:"referrer" ();
203
+
]
204
+
~implicit_aria_role:"link" ()
205
+
206
+
let all =
207
+
[
208
+
picture;
209
+
source;
210
+
img;
211
+
iframe;
212
+
embed;
213
+
object_;
214
+
param;
215
+
video;
216
+
audio;
217
+
track;
218
+
map;
219
+
area;
220
+
]
+54
lib/html5_checker/content_model/elements_embedded.mli
+54
lib/html5_checker/content_model/elements_embedded.mli
···
1
+
(** HTML5 embedded content element specifications.
2
+
3
+
Embedded content elements import resources into the document.
4
+
See https://html.spec.whatwg.org/multipage/embedded-content.html *)
5
+
6
+
val picture : Element_spec.t
7
+
(** The picture element contains zero or more source elements followed by
8
+
one img element to offer alternative versions of an image for different
9
+
display scenarios. *)
10
+
11
+
val source : Element_spec.t
12
+
(** The source element specifies multiple media resources for picture, audio,
13
+
and video elements. It is a void element. *)
14
+
15
+
val img : Element_spec.t
16
+
(** The img element represents an image. It is a void element. *)
17
+
18
+
val iframe : Element_spec.t
19
+
(** The iframe element represents a nested browsing context. *)
20
+
21
+
val embed : Element_spec.t
22
+
(** The embed element provides an integration point for an external application
23
+
or interactive content. It is a void element. *)
24
+
25
+
val object_ : Element_spec.t
26
+
(** The object element can represent an external resource, which is treated as
27
+
an image, a nested browsing context, or a plugin. *)
28
+
29
+
val param : Element_spec.t
30
+
(** The param element defines parameters for plugins invoked by object elements.
31
+
Deprecated in favor of using data attributes. It is a void element. *)
32
+
33
+
val video : Element_spec.t
34
+
(** The video element is used for playing videos or movies, and audio files
35
+
with captions. *)
36
+
37
+
val audio : Element_spec.t
38
+
(** The audio element represents a sound or audio stream. *)
39
+
40
+
val track : Element_spec.t
41
+
(** The track element allows authors to specify explicit external timed text
42
+
tracks for media elements. It is a void element. *)
43
+
44
+
val map : Element_spec.t
45
+
(** The map element, in conjunction with img and area elements, defines an
46
+
image map. *)
47
+
48
+
val area : Element_spec.t
49
+
(** The area element represents either a hyperlink with some text and a
50
+
corresponding area on an image map, or a dead area on an image map.
51
+
It is a void element. *)
52
+
53
+
val all : Element_spec.t list
54
+
(** List of all embedded content element specifications. *)
+237
lib/html5_checker/content_model/elements_form.ml
+237
lib/html5_checker/content_model/elements_form.ml
···
1
+
open Content_category
2
+
open Content_model
3
+
4
+
let form =
5
+
Element_spec.make ~name:"form"
6
+
~categories:[Flow; Palpable]
7
+
~content_model:(Except (Categories [Flow], []))
8
+
~attrs:[
9
+
Attr_spec.make "accept-charset" ~datatype:"string" ();
10
+
Attr_spec.make "action" ~datatype:"url" ();
11
+
Attr_spec.make "autocomplete" ~datatype:"autocomplete" ();
12
+
Attr_spec.make "enctype" ~datatype:"encoding" ();
13
+
Attr_spec.make "method" ~datatype:"method" ();
14
+
Attr_spec.make "name" ~datatype:"string" ();
15
+
Attr_spec.make "novalidate" ~datatype:"boolean" ();
16
+
Attr_spec.make "target" ~datatype:"target" ();
17
+
Attr_spec.make "rel" ~datatype:"relationship" ();
18
+
]
19
+
~prohibited_ancestors:["form"]
20
+
()
21
+
22
+
let label =
23
+
Element_spec.make ~name:"label"
24
+
~categories:[Flow; Phrasing; Interactive; Palpable]
25
+
~content_model:(Categories [Phrasing])
26
+
~attrs:[
27
+
Attr_spec.make "for" ~datatype:"idref" ();
28
+
]
29
+
()
30
+
31
+
let input =
32
+
Element_spec.make ~name:"input"
33
+
~void:true
34
+
~categories:[Flow; Phrasing; Interactive; Palpable]
35
+
~content_model:Nothing
36
+
~attrs:[
37
+
Attr_spec.make "accept" ~datatype:"string" ();
38
+
Attr_spec.make "alt" ~datatype:"string" ();
39
+
Attr_spec.make "autocomplete" ~datatype:"autocomplete" ();
40
+
Attr_spec.make "checked" ~datatype:"boolean" ();
41
+
Attr_spec.make "dirname" ~datatype:"directionality" ();
42
+
Attr_spec.make "disabled" ~datatype:"boolean" ();
43
+
Attr_spec.make "form" ~datatype:"idref" ();
44
+
Attr_spec.make "formaction" ~datatype:"url" ();
45
+
Attr_spec.make "formenctype" ~datatype:"encoding" ();
46
+
Attr_spec.make "formmethod" ~datatype:"method" ();
47
+
Attr_spec.make "formnovalidate" ~datatype:"boolean" ();
48
+
Attr_spec.make "formtarget" ~datatype:"target" ();
49
+
Attr_spec.make "height" ~datatype:"integer" ();
50
+
Attr_spec.make "list" ~datatype:"idref" ();
51
+
Attr_spec.make "max" ~datatype:"string" ();
52
+
Attr_spec.make "maxlength" ~datatype:"integer" ();
53
+
Attr_spec.make "min" ~datatype:"string" ();
54
+
Attr_spec.make "minlength" ~datatype:"integer" ();
55
+
Attr_spec.make "multiple" ~datatype:"boolean" ();
56
+
Attr_spec.make "name" ~datatype:"string" ();
57
+
Attr_spec.make "pattern" ~datatype:"string" ();
58
+
Attr_spec.make "placeholder" ~datatype:"string" ();
59
+
Attr_spec.make "popovertarget" ~datatype:"string" ();
60
+
Attr_spec.make "popovertargetaction" ~datatype:"popovertargetaction" ();
61
+
Attr_spec.make "readonly" ~datatype:"boolean" ();
62
+
Attr_spec.make "required" ~datatype:"boolean" ();
63
+
Attr_spec.make "size" ~datatype:"integer" ();
64
+
Attr_spec.make "src" ~datatype:"url" ();
65
+
Attr_spec.make "step" ~datatype:"string" ();
66
+
Attr_spec.make "type" ~enum_values:[
67
+
"hidden"; "text"; "search"; "tel"; "url"; "email"; "password";
68
+
"date"; "month"; "week"; "time"; "datetime-local"; "number";
69
+
"range"; "color"; "checkbox"; "radio"; "file"; "submit"; "image";
70
+
"reset"; "button"
71
+
] ();
72
+
Attr_spec.make "value" ~datatype:"string" ();
73
+
Attr_spec.make "width" ~datatype:"integer" ();
74
+
]
75
+
()
76
+
77
+
let button =
78
+
Element_spec.make ~name:"button"
79
+
~categories:[Flow; Phrasing; Interactive; Palpable]
80
+
~content_model:(Except (Categories [Phrasing], [Interactive]))
81
+
~attrs:[
82
+
Attr_spec.make "disabled" ~datatype:"boolean" ();
83
+
Attr_spec.make "form" ~datatype:"idref" ();
84
+
Attr_spec.make "formaction" ~datatype:"url" ();
85
+
Attr_spec.make "formenctype" ~datatype:"encoding" ();
86
+
Attr_spec.make "formmethod" ~datatype:"method" ();
87
+
Attr_spec.make "formnovalidate" ~datatype:"boolean" ();
88
+
Attr_spec.make "formtarget" ~datatype:"target" ();
89
+
Attr_spec.make "name" ~datatype:"string" ();
90
+
Attr_spec.make "popovertarget" ~datatype:"string" ();
91
+
Attr_spec.make "popovertargetaction" ~datatype:"popovertargetaction" ();
92
+
Attr_spec.make "type" ~enum_values:["submit"; "reset"; "button"] ();
93
+
Attr_spec.make "value" ~datatype:"string" ();
94
+
]
95
+
()
96
+
97
+
let select =
98
+
Element_spec.make ~name:"select"
99
+
~categories:[Flow; Phrasing; Interactive; Palpable]
100
+
~content_model:(Elements ["option"; "optgroup"; "script"; "template"])
101
+
~attrs:[
102
+
Attr_spec.make "autocomplete" ~datatype:"autocomplete" ();
103
+
Attr_spec.make "disabled" ~datatype:"boolean" ();
104
+
Attr_spec.make "form" ~datatype:"idref" ();
105
+
Attr_spec.make "multiple" ~datatype:"boolean" ();
106
+
Attr_spec.make "name" ~datatype:"string" ();
107
+
Attr_spec.make "required" ~datatype:"boolean" ();
108
+
Attr_spec.make "size" ~datatype:"integer" ();
109
+
]
110
+
~implicit_aria_role:"combobox"
111
+
()
112
+
113
+
let datalist =
114
+
Element_spec.make ~name:"datalist"
115
+
~categories:[Flow; Phrasing]
116
+
~content_model:(Choice [
117
+
Categories [Phrasing];
118
+
Elements ["option"; "script"; "template"]
119
+
])
120
+
~implicit_aria_role:"listbox"
121
+
()
122
+
123
+
let optgroup =
124
+
Element_spec.make ~name:"optgroup"
125
+
~categories:[]
126
+
~content_model:(Elements ["option"; "script"; "template"])
127
+
~attrs:[
128
+
Attr_spec.make "disabled" ~datatype:"boolean" ();
129
+
Attr_spec.make "label" ~required:true ~datatype:"string" ();
130
+
]
131
+
~permitted_parents:["select"]
132
+
~implicit_aria_role:"group"
133
+
()
134
+
135
+
let option =
136
+
Element_spec.make ~name:"option"
137
+
~categories:[]
138
+
~content_model:Text
139
+
~attrs:[
140
+
Attr_spec.make "disabled" ~datatype:"boolean" ();
141
+
Attr_spec.make "label" ~datatype:"string" ();
142
+
Attr_spec.make "selected" ~datatype:"boolean" ();
143
+
Attr_spec.make "value" ~datatype:"string" ();
144
+
]
145
+
~permitted_parents:["select"; "datalist"; "optgroup"]
146
+
~implicit_aria_role:"option"
147
+
()
148
+
149
+
let textarea =
150
+
Element_spec.make ~name:"textarea"
151
+
~categories:[Flow; Phrasing; Interactive; Palpable]
152
+
~content_model:Text
153
+
~attrs:[
154
+
Attr_spec.make "autocomplete" ~datatype:"autocomplete" ();
155
+
Attr_spec.make "cols" ~datatype:"integer" ();
156
+
Attr_spec.make "dirname" ~datatype:"directionality" ();
157
+
Attr_spec.make "disabled" ~datatype:"boolean" ();
158
+
Attr_spec.make "form" ~datatype:"idref" ();
159
+
Attr_spec.make "maxlength" ~datatype:"integer" ();
160
+
Attr_spec.make "minlength" ~datatype:"integer" ();
161
+
Attr_spec.make "name" ~datatype:"string" ();
162
+
Attr_spec.make "placeholder" ~datatype:"string" ();
163
+
Attr_spec.make "readonly" ~datatype:"boolean" ();
164
+
Attr_spec.make "required" ~datatype:"boolean" ();
165
+
Attr_spec.make "rows" ~datatype:"integer" ();
166
+
Attr_spec.make "wrap" ~enum_values:["soft"; "hard"] ();
167
+
]
168
+
~implicit_aria_role:"textbox"
169
+
()
170
+
171
+
let output =
172
+
Element_spec.make ~name:"output"
173
+
~categories:[Flow; Phrasing; Palpable]
174
+
~content_model:(Categories [Phrasing])
175
+
~attrs:[
176
+
Attr_spec.make "for" ~datatype:"idrefs" ();
177
+
Attr_spec.make "form" ~datatype:"idref" ();
178
+
Attr_spec.make "name" ~datatype:"string" ();
179
+
]
180
+
~implicit_aria_role:"status"
181
+
()
182
+
183
+
let progress =
184
+
Element_spec.make ~name:"progress"
185
+
~categories:[Flow; Phrasing; Palpable]
186
+
~content_model:(Except (Categories [Phrasing], []))
187
+
~attrs:[
188
+
Attr_spec.make "value" ~datatype:"float" ();
189
+
Attr_spec.make "max" ~datatype:"float" ();
190
+
]
191
+
~prohibited_ancestors:["progress"]
192
+
()
193
+
194
+
let meter =
195
+
Element_spec.make ~name:"meter"
196
+
~categories:[Flow; Phrasing; Palpable]
197
+
~content_model:(Except (Categories [Phrasing], []))
198
+
~attrs:[
199
+
Attr_spec.make "value" ~required:true ~datatype:"float" ();
200
+
Attr_spec.make "min" ~datatype:"float" ();
201
+
Attr_spec.make "max" ~datatype:"float" ();
202
+
Attr_spec.make "low" ~datatype:"float" ();
203
+
Attr_spec.make "high" ~datatype:"float" ();
204
+
Attr_spec.make "optimum" ~datatype:"float" ();
205
+
]
206
+
~prohibited_ancestors:["meter"]
207
+
()
208
+
209
+
let fieldset =
210
+
Element_spec.make ~name:"fieldset"
211
+
~categories:[Flow; Palpable]
212
+
~content_model:(Sequence [
213
+
Optional (Elements ["legend"]);
214
+
Categories [Flow]
215
+
])
216
+
~attrs:[
217
+
Attr_spec.make "disabled" ~datatype:"boolean" ();
218
+
Attr_spec.make "form" ~datatype:"idref" ();
219
+
Attr_spec.make "name" ~datatype:"string" ();
220
+
]
221
+
~implicit_aria_role:"group"
222
+
()
223
+
224
+
let legend =
225
+
Element_spec.make ~name:"legend"
226
+
~categories:[]
227
+
~content_model:(Choice [
228
+
Categories [Phrasing];
229
+
Categories [Phrasing; Heading]
230
+
])
231
+
~permitted_parents:["fieldset"]
232
+
()
233
+
234
+
let all = [
235
+
form; label; input; button; select; datalist; optgroup; option;
236
+
textarea; output; progress; meter; fieldset; legend
237
+
]
+109
lib/html5_checker/content_model/elements_form.mli
+109
lib/html5_checker/content_model/elements_form.mli
···
1
+
(** HTML5 form element specifications.
2
+
3
+
Form-associated elements for user input and form submission.
4
+
See https://html.spec.whatwg.org/multipage/forms.html *)
5
+
6
+
(** {1 Form Container} *)
7
+
8
+
val form : Element_spec.t
9
+
(** The form element represents a hyperlink that can be manipulated through a
10
+
collection of form-associated elements, some of which can represent editable
11
+
values that can be submitted to a server for processing.
12
+
13
+
Content model: Flow content, but must not contain form element descendants. *)
14
+
15
+
(** {1 Form Controls} *)
16
+
17
+
val label : Element_spec.t
18
+
(** The label element represents a caption in a user interface. The caption can
19
+
be associated with a specific form control, known as the label element's
20
+
labeled control.
21
+
22
+
Content model: Phrasing content, but must not contain descendant label
23
+
elements, and must not contain form control descendants other than the
24
+
labeled control. *)
25
+
26
+
val input : Element_spec.t
27
+
(** The input element represents a typed data field, usually with a form control
28
+
to allow the user to edit the data. It is a void element.
29
+
30
+
Content model: Nothing (void element).
31
+
32
+
The type attribute controls the data type (and associated control) of the
33
+
element. *)
34
+
35
+
val button : Element_spec.t
36
+
(** The button element represents a button labeled by its contents.
37
+
38
+
Content model: Phrasing content, but must not contain interactive content
39
+
descendants. *)
40
+
41
+
val select : Element_spec.t
42
+
(** The select element represents a control for selecting amongst a set of
43
+
options.
44
+
45
+
Content model: Zero or more option, optgroup, and script-supporting elements. *)
46
+
47
+
val datalist : Element_spec.t
48
+
(** The datalist element represents a set of option elements that represent
49
+
predefined options for other controls. In the rendering, the datalist element
50
+
represents nothing and it, along with its children, should be hidden.
51
+
52
+
Content model: Either phrasing content or zero or more option and
53
+
script-supporting elements. *)
54
+
55
+
val optgroup : Element_spec.t
56
+
(** The optgroup element represents a group of option elements with a common
57
+
label.
58
+
59
+
Content model: Zero or more option and script-supporting elements. *)
60
+
61
+
val option : Element_spec.t
62
+
(** The option element represents an option in a select element or as part of a
63
+
list of suggestions in a datalist element.
64
+
65
+
Content model: Text, or empty if label attribute is present. *)
66
+
67
+
val textarea : Element_spec.t
68
+
(** The textarea element represents a multiline plain text edit control for the
69
+
element's raw value.
70
+
71
+
Content model: Text. *)
72
+
73
+
val output : Element_spec.t
74
+
(** The output element represents the result of a calculation performed by the
75
+
application, or the result of a user action.
76
+
77
+
Content model: Phrasing content. *)
78
+
79
+
val progress : Element_spec.t
80
+
(** The progress element represents the completion progress of a task.
81
+
82
+
Content model: Phrasing content, but must not contain progress element
83
+
descendants. *)
84
+
85
+
val meter : Element_spec.t
86
+
(** The meter element represents a scalar measurement within a known range, or a
87
+
fractional value; for example disk usage, the relevance of a query result, or
88
+
the fraction of a voting population to have selected a particular candidate.
89
+
90
+
Content model: Phrasing content, but must not contain meter element
91
+
descendants. *)
92
+
93
+
val fieldset : Element_spec.t
94
+
(** The fieldset element represents a set of form controls (or other content)
95
+
grouped together, optionally with a caption.
96
+
97
+
Content model: Optionally a legend element, followed by flow content. *)
98
+
99
+
val legend : Element_spec.t
100
+
(** The legend element represents a caption for the rest of the contents of the
101
+
legend element's parent fieldset element, if any.
102
+
103
+
Content model: Phrasing content, and optionally intermixed with heading
104
+
content. *)
105
+
106
+
(** {1 Element List} *)
107
+
108
+
val all : Element_spec.t list
109
+
(** List of all form element specifications. *)
+82
lib/html5_checker/content_model/elements_interactive.ml
+82
lib/html5_checker/content_model/elements_interactive.ml
···
1
+
open Content_category
2
+
open Content_model
3
+
open Attr_spec
4
+
5
+
let details =
6
+
Element_spec.make ~name:"details"
7
+
~categories:[ Flow; Sectioning; Palpable; Interactive ]
8
+
~content_model:
9
+
(Sequence [ Elements [ "summary" ]; Zero_or_more (Categories [ Flow ]) ])
10
+
~attrs:[ make "open" ~datatype:"boolean" () ] ()
11
+
12
+
let summary =
13
+
Element_spec.make ~name:"summary"
14
+
~categories:[]
15
+
~content_model:
16
+
(Choice [ Categories [ Phrasing ]; Categories [ Heading ] ])
17
+
~permitted_parents:[ "details" ]
18
+
~attrs:[] ()
19
+
20
+
let dialog =
21
+
Element_spec.make ~name:"dialog"
22
+
~categories:[ Flow; Sectioning ]
23
+
~content_model:(Categories [ Flow ])
24
+
~attrs:[ make "open" ~datatype:"boolean" () ] ~implicit_aria_role:"dialog" ()
25
+
26
+
let script =
27
+
Element_spec.make ~name:"script"
28
+
~categories:[ Metadata; Flow; Phrasing; Script_supporting ]
29
+
~content_model:Text
30
+
~attrs:
31
+
[
32
+
make "src" ~datatype:"url" ();
33
+
make "type" ();
34
+
make "nomodule" ~datatype:"boolean" ();
35
+
make "async" ~datatype:"boolean" ();
36
+
make "defer" ~datatype:"boolean" ();
37
+
make "crossorigin" ~datatype:"crossorigin" ();
38
+
make "integrity" ~datatype:"integrity" ();
39
+
make "referrerpolicy" ~datatype:"referrer" ();
40
+
make "blocking" ();
41
+
make "fetchpriority" ~datatype:"fetchpriority" ();
42
+
]
43
+
()
44
+
45
+
let noscript =
46
+
Element_spec.make ~name:"noscript"
47
+
~categories:[ Metadata; Flow; Phrasing ]
48
+
~content_model:
49
+
(Choice
50
+
[
51
+
(* In head: link, style, meta *)
52
+
Categories [ Metadata ];
53
+
(* In body: transparent, but no noscript descendants *)
54
+
Transparent;
55
+
])
56
+
~prohibited_ancestors:[ "noscript" ]
57
+
~attrs:[] ()
58
+
59
+
let template =
60
+
Element_spec.make ~name:"template"
61
+
~categories:[ Metadata; Flow; Phrasing; Script_supporting ]
62
+
~content_model:Nothing
63
+
~attrs:[] ()
64
+
65
+
let slot =
66
+
Element_spec.make ~name:"slot"
67
+
~categories:[ Flow; Phrasing ]
68
+
~content_model:Transparent
69
+
~attrs:[ make "name" () ] ()
70
+
71
+
let canvas =
72
+
Element_spec.make ~name:"canvas"
73
+
~categories:[ Flow; Phrasing; Embedded; Palpable ]
74
+
~content_model:Transparent
75
+
~attrs:
76
+
[
77
+
make "width" ~datatype:"integer" ();
78
+
make "height" ~datatype:"integer" ();
79
+
]
80
+
()
81
+
82
+
let all = [ details; summary; dialog; script; noscript; template; slot; canvas ]
+41
lib/html5_checker/content_model/elements_interactive.mli
+41
lib/html5_checker/content_model/elements_interactive.mli
···
1
+
(** HTML5 interactive and scripting element specifications.
2
+
3
+
Interactive elements are specifically intended for user interaction,
4
+
and scripting elements support scripts in the document.
5
+
See https://html.spec.whatwg.org/multipage/interactive-elements.html
6
+
and https://html.spec.whatwg.org/multipage/scripting.html *)
7
+
8
+
val details : Element_spec.t
9
+
(** The details element represents a disclosure widget from which the user can
10
+
obtain additional information or controls. *)
11
+
12
+
val summary : Element_spec.t
13
+
(** The summary element represents a summary, caption, or legend for the rest
14
+
of the contents of the summary element's parent details element. *)
15
+
16
+
val dialog : Element_spec.t
17
+
(** The dialog element represents a part of an application that a user
18
+
interacts with to perform a task, such as a dialog box or modal. *)
19
+
20
+
val script : Element_spec.t
21
+
(** The script element allows authors to include dynamic script and data blocks
22
+
in their documents. *)
23
+
24
+
val noscript : Element_spec.t
25
+
(** The noscript element represents fallback content for when scripting is
26
+
disabled or not supported. *)
27
+
28
+
val template : Element_spec.t
29
+
(** The template element is used to declare fragments of HTML that can be cloned
30
+
and inserted in the document by script. *)
31
+
32
+
val slot : Element_spec.t
33
+
(** The slot element is used as a placeholder inside a web component that users
34
+
can fill with their own markup. *)
35
+
36
+
val canvas : Element_spec.t
37
+
(** The canvas element provides scripts with a resolution-dependent bitmap
38
+
canvas for rendering graphs, game graphics, art, or other visual images. *)
39
+
40
+
val all : Element_spec.t list
41
+
(** List of all interactive and scripting element specifications. *)
+133
lib/html5_checker/content_model/elements_table.ml
+133
lib/html5_checker/content_model/elements_table.ml
···
1
+
open Content_category
2
+
open Content_model
3
+
open Attr_spec
4
+
5
+
let table =
6
+
Element_spec.make ~name:"table"
7
+
~categories:[ Flow; Palpable ]
8
+
~content_model:
9
+
(Sequence
10
+
[
11
+
Optional (Elements [ "caption" ]);
12
+
Zero_or_more (Elements [ "colgroup" ]);
13
+
Optional (Elements [ "thead" ]);
14
+
Choice
15
+
[
16
+
Sequence
17
+
[
18
+
Zero_or_more (Elements [ "tbody" ]);
19
+
Optional (Elements [ "tfoot" ]);
20
+
];
21
+
Sequence
22
+
[
23
+
Optional (Elements [ "tfoot" ]);
24
+
Zero_or_more (Elements [ "tbody" ]);
25
+
];
26
+
One_or_more (Elements [ "tr" ]);
27
+
];
28
+
Zero_or_more (Elements [ "script"; "template" ]);
29
+
])
30
+
~attrs:[] ~implicit_aria_role:"table" ()
31
+
32
+
let caption =
33
+
Element_spec.make ~name:"caption"
34
+
~categories:[]
35
+
~content_model:(Categories [ Flow ])
36
+
~permitted_parents:[ "table" ]
37
+
~prohibited_ancestors:[ "table" ]
38
+
~attrs:[] ()
39
+
40
+
let colgroup =
41
+
Element_spec.make ~name:"colgroup"
42
+
~categories:[]
43
+
~content_model:
44
+
(Choice
45
+
[
46
+
Zero_or_more (Elements [ "col" ]);
47
+
Zero_or_more (Elements [ "template" ]);
48
+
])
49
+
~permitted_parents:[ "table" ]
50
+
~attrs:[ make "span" ~datatype:"integer" () ] ()
51
+
52
+
let col =
53
+
Element_spec.make ~name:"col" ~void:true
54
+
~categories:[]
55
+
~content_model:Nothing
56
+
~permitted_parents:[ "colgroup" ]
57
+
~attrs:[ make "span" ~datatype:"integer" () ] ()
58
+
59
+
let tbody =
60
+
Element_spec.make ~name:"tbody"
61
+
~categories:[]
62
+
~content_model:
63
+
(Choice
64
+
[
65
+
One_or_more (Elements [ "tr" ]); Zero_or_more (Elements [ "script"; "template" ]);
66
+
])
67
+
~permitted_parents:[ "table" ]
68
+
~attrs:[] ~implicit_aria_role:"rowgroup" ()
69
+
70
+
let thead =
71
+
Element_spec.make ~name:"thead"
72
+
~categories:[]
73
+
~content_model:
74
+
(Choice
75
+
[
76
+
One_or_more (Elements [ "tr" ]); Zero_or_more (Elements [ "script"; "template" ]);
77
+
])
78
+
~permitted_parents:[ "table" ]
79
+
~attrs:[] ~implicit_aria_role:"rowgroup" ()
80
+
81
+
let tfoot =
82
+
Element_spec.make ~name:"tfoot"
83
+
~categories:[]
84
+
~content_model:
85
+
(Choice
86
+
[
87
+
One_or_more (Elements [ "tr" ]); Zero_or_more (Elements [ "script"; "template" ]);
88
+
])
89
+
~permitted_parents:[ "table" ]
90
+
~attrs:[] ~implicit_aria_role:"rowgroup" ()
91
+
92
+
let tr =
93
+
Element_spec.make ~name:"tr"
94
+
~categories:[]
95
+
~content_model:
96
+
(Choice
97
+
[
98
+
One_or_more (Elements [ "td"; "th" ]);
99
+
Zero_or_more (Elements [ "script"; "template" ]);
100
+
])
101
+
~permitted_parents:[ "table"; "thead"; "tbody"; "tfoot" ]
102
+
~attrs:[] ~implicit_aria_role:"row" ()
103
+
104
+
let td =
105
+
Element_spec.make ~name:"td"
106
+
~categories:[ Sectioning ]
107
+
~content_model:(Categories [ Flow ])
108
+
~permitted_parents:[ "tr" ]
109
+
~attrs:
110
+
[
111
+
make "colspan" ~datatype:"integer" ();
112
+
make "rowspan" ~datatype:"integer" ();
113
+
make "headers" ();
114
+
]
115
+
~implicit_aria_role:"cell" ()
116
+
117
+
let th =
118
+
Element_spec.make ~name:"th"
119
+
~categories:[]
120
+
~content_model:(Mixed [ Flow ])
121
+
~prohibited_ancestors:[ "header"; "footer" ]
122
+
~permitted_parents:[ "tr" ]
123
+
~attrs:
124
+
[
125
+
make "colspan" ~datatype:"integer" ();
126
+
make "rowspan" ~datatype:"integer" ();
127
+
make "headers" ();
128
+
make "scope" ~datatype:"scope" ();
129
+
make "abbr" ();
130
+
]
131
+
~implicit_aria_role:"columnheader" ()
132
+
133
+
let all = [ table; caption; colgroup; col; tbody; thead; tfoot; tr; td; th ]
+42
lib/html5_checker/content_model/elements_table.mli
+42
lib/html5_checker/content_model/elements_table.mli
···
1
+
(** HTML5 table element specifications.
2
+
3
+
Table elements represent data with more than one dimension.
4
+
See https://html.spec.whatwg.org/multipage/tables.html *)
5
+
6
+
val table : Element_spec.t
7
+
(** The table element represents data with more than one dimension in the form
8
+
of a table. *)
9
+
10
+
val caption : Element_spec.t
11
+
(** The caption element represents the title of the table. *)
12
+
13
+
val colgroup : Element_spec.t
14
+
(** The colgroup element represents a group of one or more columns in the table. *)
15
+
16
+
val col : Element_spec.t
17
+
(** The col element represents one or more columns in the table. It is a void
18
+
element. *)
19
+
20
+
val tbody : Element_spec.t
21
+
(** The tbody element represents a block of rows that consist of a body of data
22
+
for the table. *)
23
+
24
+
val thead : Element_spec.t
25
+
(** The thead element represents the block of rows that consist of the column
26
+
labels (headers) for the table. *)
27
+
28
+
val tfoot : Element_spec.t
29
+
(** The tfoot element represents the block of rows that consist of the column
30
+
summaries (footers) for the table. *)
31
+
32
+
val tr : Element_spec.t
33
+
(** The tr element represents a row of cells in a table. *)
34
+
35
+
val td : Element_spec.t
36
+
(** The td element represents a data cell in a table. *)
37
+
38
+
val th : Element_spec.t
39
+
(** The th element represents a header cell in a table. *)
40
+
41
+
val all : Element_spec.t list
42
+
(** List of all table element specifications. *)
+231
lib/html5_checker/content_model/elements_text.ml
+231
lib/html5_checker/content_model/elements_text.ml
···
1
+
open Content_category
2
+
open Content_model
3
+
4
+
let a =
5
+
Element_spec.make ~name:"a"
6
+
~categories:[Flow; Phrasing; Interactive; Palpable]
7
+
~content_model:(Except (Transparent, [Interactive]))
8
+
~attrs:[
9
+
Attr_spec.make "href" ~datatype:"url" ();
10
+
Attr_spec.make "target" ~datatype:"target" ();
11
+
Attr_spec.make "download" ~datatype:"string" ();
12
+
Attr_spec.make "ping" ~datatype:"string" ();
13
+
Attr_spec.make "referrerpolicy" ~datatype:"referrerpolicy" ();
14
+
Attr_spec.make "rel" ~datatype:"relationship" ();
15
+
Attr_spec.make "hreflang" ~datatype:"string" ();
16
+
Attr_spec.make "type" ~datatype:"mimetype" ();
17
+
]
18
+
~implicit_aria_role:"link"
19
+
()
20
+
21
+
let em =
22
+
Element_spec.make ~name:"em"
23
+
~categories:[Flow; Phrasing; Palpable]
24
+
~content_model:(Categories [Phrasing])
25
+
()
26
+
27
+
let strong =
28
+
Element_spec.make ~name:"strong"
29
+
~categories:[Flow; Phrasing; Palpable]
30
+
~content_model:(Categories [Phrasing])
31
+
()
32
+
33
+
let small =
34
+
Element_spec.make ~name:"small"
35
+
~categories:[Flow; Phrasing; Palpable]
36
+
~content_model:(Categories [Phrasing])
37
+
()
38
+
39
+
let s =
40
+
Element_spec.make ~name:"s"
41
+
~categories:[Flow; Phrasing; Palpable]
42
+
~content_model:(Categories [Phrasing])
43
+
()
44
+
45
+
let cite =
46
+
Element_spec.make ~name:"cite"
47
+
~categories:[Flow; Phrasing; Palpable]
48
+
~content_model:(Categories [Phrasing])
49
+
()
50
+
51
+
let q =
52
+
Element_spec.make ~name:"q"
53
+
~categories:[Flow; Phrasing; Palpable]
54
+
~content_model:(Categories [Phrasing])
55
+
~attrs:[
56
+
Attr_spec.make "cite" ~datatype:"url" ();
57
+
]
58
+
()
59
+
60
+
let dfn =
61
+
Element_spec.make ~name:"dfn"
62
+
~categories:[Flow; Phrasing; Palpable]
63
+
~content_model:(Except (Categories [Phrasing], []))
64
+
~prohibited_ancestors:["dfn"]
65
+
()
66
+
67
+
let abbr =
68
+
Element_spec.make ~name:"abbr"
69
+
~categories:[Flow; Phrasing; Palpable]
70
+
~content_model:(Categories [Phrasing])
71
+
()
72
+
73
+
let ruby =
74
+
Element_spec.make ~name:"ruby"
75
+
~categories:[Flow; Phrasing; Palpable]
76
+
~content_model:(Categories [Phrasing])
77
+
()
78
+
79
+
let rt =
80
+
Element_spec.make ~name:"rt"
81
+
~categories:[Phrasing]
82
+
~content_model:(Categories [Phrasing])
83
+
~permitted_parents:["ruby"; "rtc"]
84
+
()
85
+
86
+
let rp =
87
+
Element_spec.make ~name:"rp"
88
+
~categories:[]
89
+
~content_model:Text
90
+
~permitted_parents:["ruby"; "rtc"]
91
+
()
92
+
93
+
let data =
94
+
Element_spec.make ~name:"data"
95
+
~categories:[Flow; Phrasing; Palpable]
96
+
~content_model:(Categories [Phrasing])
97
+
~attrs:[
98
+
Attr_spec.make "value" ~required:true ~datatype:"string" ();
99
+
]
100
+
()
101
+
102
+
let time =
103
+
Element_spec.make ~name:"time"
104
+
~categories:[Flow; Phrasing; Palpable]
105
+
~content_model:(Categories [Phrasing])
106
+
~attrs:[
107
+
Attr_spec.make "datetime" ~datatype:"datetime" ();
108
+
]
109
+
~prohibited_ancestors:["time"]
110
+
()
111
+
112
+
let code =
113
+
Element_spec.make ~name:"code"
114
+
~categories:[Flow; Phrasing; Palpable]
115
+
~content_model:(Categories [Phrasing])
116
+
()
117
+
118
+
let var =
119
+
Element_spec.make ~name:"var"
120
+
~categories:[Flow; Phrasing; Palpable]
121
+
~content_model:(Categories [Phrasing])
122
+
()
123
+
124
+
let samp =
125
+
Element_spec.make ~name:"samp"
126
+
~categories:[Flow; Phrasing; Palpable]
127
+
~content_model:(Categories [Phrasing])
128
+
()
129
+
130
+
let kbd =
131
+
Element_spec.make ~name:"kbd"
132
+
~categories:[Flow; Phrasing; Palpable]
133
+
~content_model:(Categories [Phrasing])
134
+
()
135
+
136
+
let sub =
137
+
Element_spec.make ~name:"sub"
138
+
~categories:[Flow; Phrasing; Palpable]
139
+
~content_model:(Categories [Phrasing])
140
+
()
141
+
142
+
let sup =
143
+
Element_spec.make ~name:"sup"
144
+
~categories:[Flow; Phrasing; Palpable]
145
+
~content_model:(Categories [Phrasing])
146
+
()
147
+
148
+
let i =
149
+
Element_spec.make ~name:"i"
150
+
~categories:[Flow; Phrasing; Palpable]
151
+
~content_model:(Categories [Phrasing])
152
+
()
153
+
154
+
let b =
155
+
Element_spec.make ~name:"b"
156
+
~categories:[Flow; Phrasing; Palpable]
157
+
~content_model:(Categories [Phrasing])
158
+
()
159
+
160
+
let u =
161
+
Element_spec.make ~name:"u"
162
+
~categories:[Flow; Phrasing; Palpable]
163
+
~content_model:(Categories [Phrasing])
164
+
()
165
+
166
+
let mark =
167
+
Element_spec.make ~name:"mark"
168
+
~categories:[Flow; Phrasing; Palpable]
169
+
~content_model:(Categories [Phrasing])
170
+
()
171
+
172
+
let bdi =
173
+
Element_spec.make ~name:"bdi"
174
+
~categories:[Flow; Phrasing; Palpable]
175
+
~content_model:(Categories [Phrasing])
176
+
()
177
+
178
+
let bdo =
179
+
Element_spec.make ~name:"bdo"
180
+
~categories:[Flow; Phrasing; Palpable]
181
+
~content_model:(Categories [Phrasing])
182
+
~attrs:[
183
+
Attr_spec.make "dir" ~required:true ~datatype:"dir" ();
184
+
]
185
+
()
186
+
187
+
let span =
188
+
Element_spec.make ~name:"span"
189
+
~categories:[Flow; Phrasing; Palpable]
190
+
~content_model:(Categories [Phrasing])
191
+
()
192
+
193
+
let br =
194
+
Element_spec.make ~name:"br"
195
+
~void:true
196
+
~categories:[Flow; Phrasing]
197
+
~content_model:Nothing
198
+
()
199
+
200
+
let wbr =
201
+
Element_spec.make ~name:"wbr"
202
+
~void:true
203
+
~categories:[Flow; Phrasing]
204
+
~content_model:Nothing
205
+
()
206
+
207
+
let ins =
208
+
Element_spec.make ~name:"ins"
209
+
~categories:[Flow; Phrasing; Palpable]
210
+
~content_model:Transparent
211
+
~attrs:[
212
+
Attr_spec.make "cite" ~datatype:"url" ();
213
+
Attr_spec.make "datetime" ~datatype:"datetime" ();
214
+
]
215
+
()
216
+
217
+
let del =
218
+
Element_spec.make ~name:"del"
219
+
~categories:[Flow; Phrasing]
220
+
~content_model:Transparent
221
+
~attrs:[
222
+
Attr_spec.make "cite" ~datatype:"url" ();
223
+
Attr_spec.make "datetime" ~datatype:"datetime" ();
224
+
]
225
+
()
226
+
227
+
let all = [
228
+
a; em; strong; small; s; cite; q; dfn; abbr; ruby; rt; rp;
229
+
data; time; code; var; samp; kbd; sub; sup; i; b; u; mark;
230
+
bdi; bdo; span; br; wbr; ins; del
231
+
]
+204
lib/html5_checker/content_model/elements_text.mli
+204
lib/html5_checker/content_model/elements_text.mli
···
1
+
(** HTML5 text-level and edit element specifications.
2
+
3
+
Text-level semantic elements and edit tracking elements.
4
+
See https://html.spec.whatwg.org/multipage/text-level-semantics.html
5
+
and https://html.spec.whatwg.org/multipage/edits.html *)
6
+
7
+
(** {1 Hyperlinks} *)
8
+
9
+
val a : Element_spec.t
10
+
(** The a element represents a hyperlink. When it has an href attribute, it
11
+
represents a hyperlink (a hypertext anchor) labeled by its contents.
12
+
13
+
Content model: Transparent, but must not contain interactive content
14
+
descendants. *)
15
+
16
+
(** {1 Text-level Semantics} *)
17
+
18
+
val em : Element_spec.t
19
+
(** The em element represents stress emphasis of its contents. The level of
20
+
emphasis is given by its number of ancestor em elements.
21
+
22
+
Content model: Phrasing content. *)
23
+
24
+
val strong : Element_spec.t
25
+
(** The strong element represents strong importance, seriousness, or urgency
26
+
for its contents.
27
+
28
+
Content model: Phrasing content. *)
29
+
30
+
val small : Element_spec.t
31
+
(** The small element represents side comments such as small print. Small print
32
+
typically features disclaimers, caveats, legal restrictions, or copyrights.
33
+
34
+
Content model: Phrasing content. *)
35
+
36
+
val s : Element_spec.t
37
+
(** The s element represents contents that are no longer accurate or no longer
38
+
relevant.
39
+
40
+
Content model: Phrasing content. *)
41
+
42
+
val cite : Element_spec.t
43
+
(** The cite element represents the title of a work (e.g. a book, a paper,
44
+
an essay, a poem, a score, a song, a script, a film, a TV show, a game,
45
+
a sculpture, a painting, a theatre production, a play, an opera, a musical,
46
+
an exhibition, a legal case report, a computer program, etc).
47
+
48
+
Content model: Phrasing content. *)
49
+
50
+
val q : Element_spec.t
51
+
(** The q element represents some phrasing content quoted from another source.
52
+
53
+
Content model: Phrasing content. *)
54
+
55
+
val dfn : Element_spec.t
56
+
(** The dfn element represents the defining instance of a term.
57
+
58
+
Content model: Phrasing content, but must not contain dfn element descendants. *)
59
+
60
+
val abbr : Element_spec.t
61
+
(** The abbr element represents an abbreviation or acronym, optionally with its
62
+
expansion. The title attribute may be used to provide an expansion of the
63
+
abbreviation.
64
+
65
+
Content model: Phrasing content. *)
66
+
67
+
val ruby : Element_spec.t
68
+
(** The ruby element allows one or more spans of phrasing content to be marked
69
+
with ruby annotations. Ruby annotations are short runs of text presented
70
+
alongside base text, primarily used in East Asian typography.
71
+
72
+
Content model: Phrasing content, but must contain at least one rt or rp
73
+
element. *)
74
+
75
+
val rt : Element_spec.t
76
+
(** The rt element marks the ruby text component of a ruby annotation.
77
+
78
+
Content model: Phrasing content. *)
79
+
80
+
val rp : Element_spec.t
81
+
(** The rp element is used to provide fallback parentheses for browsers that
82
+
don't support ruby annotations.
83
+
84
+
Content model: Text, or phrasing content that represents what can be used
85
+
as fallback in annotations. *)
86
+
87
+
val data : Element_spec.t
88
+
(** The data element represents its contents, along with a machine-readable
89
+
form of those contents in the value attribute.
90
+
91
+
Content model: Phrasing content. *)
92
+
93
+
val time : Element_spec.t
94
+
(** The time element represents its contents, along with a machine-readable
95
+
form of those contents in the datetime attribute.
96
+
97
+
Content model: Phrasing content, but must not contain time element descendants. *)
98
+
99
+
val code : Element_spec.t
100
+
(** The code element represents a fragment of computer code.
101
+
102
+
Content model: Phrasing content. *)
103
+
104
+
val var : Element_spec.t
105
+
(** The var element represents a variable in a mathematical expression or a
106
+
programming context.
107
+
108
+
Content model: Phrasing content. *)
109
+
110
+
val samp : Element_spec.t
111
+
(** The samp element represents sample or quoted output from another program
112
+
or computing system.
113
+
114
+
Content model: Phrasing content. *)
115
+
116
+
val kbd : Element_spec.t
117
+
(** The kbd element represents user input (typically keyboard input, although
118
+
it may also be used to represent other input, such as voice commands).
119
+
120
+
Content model: Phrasing content. *)
121
+
122
+
val sub : Element_spec.t
123
+
(** The sub element represents a subscript.
124
+
125
+
Content model: Phrasing content. *)
126
+
127
+
val sup : Element_spec.t
128
+
(** The sup element represents a superscript.
129
+
130
+
Content model: Phrasing content. *)
131
+
132
+
val i : Element_spec.t
133
+
(** The i element represents a span of text in an alternate voice or mood, or
134
+
otherwise offset from the normal prose in a manner indicating a different
135
+
quality of text.
136
+
137
+
Content model: Phrasing content. *)
138
+
139
+
val b : Element_spec.t
140
+
(** The b element represents a span of text to which attention is being drawn
141
+
for utilitarian purposes without conveying any extra importance and with no
142
+
implication of an alternate voice or mood.
143
+
144
+
Content model: Phrasing content. *)
145
+
146
+
val u : Element_spec.t
147
+
(** The u element represents a span of text with an unarticulated, though
148
+
explicitly rendered, non-textual annotation, such as labeling the text as
149
+
being a proper name in Chinese text or labeling the text as being
150
+
misspelt.
151
+
152
+
Content model: Phrasing content. *)
153
+
154
+
val mark : Element_spec.t
155
+
(** The mark element represents a run of text in one document marked or
156
+
highlighted for reference purposes, due to its relevance in another context.
157
+
158
+
Content model: Phrasing content. *)
159
+
160
+
val bdi : Element_spec.t
161
+
(** The bdi element represents a span of text that is to be isolated from its
162
+
surroundings for the purposes of bidirectional text formatting.
163
+
164
+
Content model: Phrasing content. *)
165
+
166
+
val bdo : Element_spec.t
167
+
(** The bdo element represents explicit text directionality formatting control
168
+
for its children. It allows authors to override the Unicode bidirectional
169
+
algorithm.
170
+
171
+
Content model: Phrasing content. *)
172
+
173
+
val span : Element_spec.t
174
+
(** The span element doesn't mean anything on its own, but can be useful when
175
+
used together with the global attributes, e.g. class, lang, or dir.
176
+
177
+
Content model: Phrasing content. *)
178
+
179
+
val br : Element_spec.t
180
+
(** The br element represents a line break. It is a void element.
181
+
182
+
Content model: Nothing (void element). *)
183
+
184
+
val wbr : Element_spec.t
185
+
(** The wbr element represents a line break opportunity. It is a void element.
186
+
187
+
Content model: Nothing (void element). *)
188
+
189
+
(** {1 Edits} *)
190
+
191
+
val ins : Element_spec.t
192
+
(** The ins element represents an addition to the document.
193
+
194
+
Content model: Transparent. *)
195
+
196
+
val del : Element_spec.t
197
+
(** The del element represents a removal from the document.
198
+
199
+
Content model: Transparent. *)
200
+
201
+
(** {1 Element List} *)
202
+
203
+
val all : Element_spec.t list
204
+
(** List of all text-level and edit element specifications. *)
lib/html5_checker/datatype/datatype.cmi
lib/html5_checker/datatype/datatype.cmi
This is a binary file and will not be displayed.
+42
lib/html5_checker/datatype/datatype.ml
+42
lib/html5_checker/datatype/datatype.ml
···
1
+
module type S = sig
2
+
val name : string
3
+
val validate : string -> (unit, string) result
4
+
val is_valid : string -> bool
5
+
end
6
+
7
+
type t = (module S)
8
+
9
+
let name (module D : S) = D.name
10
+
let validate (module D : S) s = D.validate s
11
+
let is_valid (module D : S) s = D.is_valid s
12
+
13
+
(* Helper utilities *)
14
+
15
+
let is_whitespace = function
16
+
| ' ' | '\t' | '\n' | '\r' | '\012' (* FF *) -> true
17
+
| _ -> false
18
+
19
+
let is_ascii_digit = function '0' .. '9' -> true | _ -> false
20
+
21
+
let to_ascii_lowercase c =
22
+
match c with 'A' .. 'Z' -> Char.chr (Char.code c + 32) | _ -> c
23
+
24
+
let string_to_ascii_lowercase s =
25
+
String.map to_ascii_lowercase s
26
+
27
+
let trim_html_spaces s =
28
+
let len = String.length s in
29
+
let rec find_start i =
30
+
if i >= len then len
31
+
else if is_whitespace s.[i] then find_start (i + 1)
32
+
else i
33
+
in
34
+
let rec find_end i =
35
+
if i < 0 then -1
36
+
else if is_whitespace s.[i] then find_end (i - 1)
37
+
else i
38
+
in
39
+
let start = find_start 0 in
40
+
let end_pos = find_end (len - 1) in
41
+
if start > end_pos then ""
42
+
else String.sub s start (end_pos - start + 1)
+45
lib/html5_checker/datatype/datatype.mli
+45
lib/html5_checker/datatype/datatype.mli
···
1
+
(** HTML5 datatype validation.
2
+
3
+
This module provides the base interface for HTML5 attribute datatype
4
+
validators. Each datatype validates string values according to HTML5 spec. *)
5
+
6
+
(** A datatype validator *)
7
+
module type S = sig
8
+
(** Name of this datatype (e.g., "integer", "url") *)
9
+
val name : string
10
+
11
+
(** Validate a string value. Returns Ok () if valid, Error message otherwise *)
12
+
val validate : string -> (unit, string) result
13
+
14
+
(** Check if value is valid (convenience function) *)
15
+
val is_valid : string -> bool
16
+
end
17
+
18
+
(** A datatype packed as a first-class module *)
19
+
type t = (module S)
20
+
21
+
(** Get the name of a datatype *)
22
+
val name : t -> string
23
+
24
+
(** Validate a value with a datatype *)
25
+
val validate : t -> string -> (unit, string) result
26
+
27
+
(** Check if a value is valid *)
28
+
val is_valid : t -> string -> bool
29
+
30
+
(** Helper utilities for implementing datatype validators. *)
31
+
32
+
(** Check if a character is HTML5 whitespace (space, tab, LF, FF, or CR). *)
33
+
val is_whitespace : char -> bool
34
+
35
+
(** Check if a character is an ASCII digit (0-9). *)
36
+
val is_ascii_digit : char -> bool
37
+
38
+
(** Convert an ASCII character to lowercase. *)
39
+
val to_ascii_lowercase : char -> char
40
+
41
+
(** Convert an ASCII string to lowercase. *)
42
+
val string_to_ascii_lowercase : string -> string
43
+
44
+
(** Trim HTML5 whitespace from both ends of a string. *)
45
+
val trim_html_spaces : string -> string
+31
lib/html5_checker/datatype/datatype_registry.ml
+31
lib/html5_checker/datatype/datatype_registry.ml
···
1
+
type t = { datatypes : (string, Datatype.t) Hashtbl.t }
2
+
3
+
let create () = { datatypes = Hashtbl.create 16 }
4
+
5
+
let register t dt =
6
+
let name = Datatype.name dt in
7
+
Hashtbl.replace t.datatypes name dt
8
+
9
+
let get t name = Hashtbl.find_opt t.datatypes name
10
+
11
+
let list_names t =
12
+
Hashtbl.fold (fun name _ acc -> name :: acc) t.datatypes []
13
+
|> List.sort String.compare
14
+
15
+
let default =
16
+
let registry = ref None in
17
+
fun () ->
18
+
match !registry with
19
+
| Some r -> r
20
+
| None ->
21
+
let r = create () in
22
+
(* Register built-in datatypes *)
23
+
register r (module Dt_integer.Integer : Datatype.S);
24
+
register r (module Dt_integer.Integer_non_negative : Datatype.S);
25
+
register r (module Dt_integer.Integer_positive : Datatype.S);
26
+
register r (module Dt_float.Float_ : Datatype.S);
27
+
register r (module Dt_float.Float_non_negative : Datatype.S);
28
+
register r (module Dt_float.Float_positive : Datatype.S);
29
+
register r (module Dt_boolean.Boolean : Datatype.S);
30
+
registry := Some r;
31
+
r
+19
lib/html5_checker/datatype/datatype_registry.mli
+19
lib/html5_checker/datatype/datatype_registry.mli
···
1
+
(** Registry for HTML5 datatypes *)
2
+
3
+
(** Registry type that holds datatypes indexed by name *)
4
+
type t
5
+
6
+
(** Create a new empty registry *)
7
+
val create : unit -> t
8
+
9
+
(** Register a datatype in the registry *)
10
+
val register : t -> Datatype.t -> unit
11
+
12
+
(** Get a datatype by name. Returns None if not found *)
13
+
val get : t -> string -> Datatype.t option
14
+
15
+
(** List all registered datatype names *)
16
+
val list_names : t -> string list
17
+
18
+
(** Default registry with all built-in datatypes *)
19
+
val default : unit -> t
+229
lib/html5_checker/datatype/dt_autocomplete.ml
+229
lib/html5_checker/datatype/dt_autocomplete.ml
···
1
+
(** Autocomplete attribute validation based on HTML5 spec *)
2
+
3
+
(** Check if character is whitespace *)
4
+
let is_whitespace c = c = ' ' || c = '\t' || c = '\n' || c = '\r'
5
+
6
+
(** Convert character to ASCII lowercase *)
7
+
let to_ascii_lowercase c =
8
+
if c >= 'A' && c <= 'Z' then Char.chr (Char.code c + 32) else c
9
+
10
+
(** Trim whitespace from string *)
11
+
let trim_whitespace s =
12
+
let s = String.trim s in
13
+
(* Also collapse internal whitespace *)
14
+
let buf = Buffer.create (String.length s) in
15
+
let in_space = ref false in
16
+
String.iter
17
+
(fun c ->
18
+
if is_whitespace c then
19
+
if not !in_space then (
20
+
Buffer.add_char buf ' ';
21
+
in_space := true)
22
+
else (
23
+
Buffer.add_char buf (to_ascii_lowercase c);
24
+
in_space := false))
25
+
s;
26
+
Buffer.contents buf
27
+
28
+
(** Contact type tokens *)
29
+
let contact_types = [ "home"; "work"; "mobile"; "fax"; "pager" ]
30
+
31
+
(** All autofill field names *)
32
+
let all_field_names =
33
+
[
34
+
"name";
35
+
"honorific-prefix";
36
+
"given-name";
37
+
"additional-name";
38
+
"family-name";
39
+
"honorific-suffix";
40
+
"nickname";
41
+
"organization-title";
42
+
"username";
43
+
"new-password";
44
+
"current-password";
45
+
"one-time-code";
46
+
"organization";
47
+
"street-address";
48
+
"address-line1";
49
+
"address-line2";
50
+
"address-line3";
51
+
"address-level4";
52
+
"address-level3";
53
+
"address-level2";
54
+
"address-level1";
55
+
"country";
56
+
"country-name";
57
+
"postal-code";
58
+
"cc-name";
59
+
"cc-given-name";
60
+
"cc-additional-name";
61
+
"cc-family-name";
62
+
"cc-number";
63
+
"cc-exp";
64
+
"cc-exp-month";
65
+
"cc-exp-year";
66
+
"cc-csc";
67
+
"cc-type";
68
+
"transaction-currency";
69
+
"transaction-amount";
70
+
"language";
71
+
"bday";
72
+
"bday-day";
73
+
"bday-month";
74
+
"bday-year";
75
+
"sex";
76
+
"url";
77
+
"photo";
78
+
"tel";
79
+
"tel-country-code";
80
+
"tel-national";
81
+
"tel-area-code";
82
+
"tel-local";
83
+
"tel-local-prefix";
84
+
"tel-local-suffix";
85
+
"tel-extension";
86
+
"email";
87
+
"impp";
88
+
]
89
+
90
+
(** Contact field names (subset that can be used with contact types) *)
91
+
let contact_field_names =
92
+
[
93
+
"tel";
94
+
"tel-country-code";
95
+
"tel-national";
96
+
"tel-area-code";
97
+
"tel-local";
98
+
"tel-local-prefix";
99
+
"tel-local-suffix";
100
+
"tel-extension";
101
+
"email";
102
+
"impp";
103
+
]
104
+
105
+
(** Split string on whitespace *)
106
+
let split_on_whitespace s =
107
+
let rec split acc start i =
108
+
if i >= String.length s then
109
+
if start < i then List.rev (String.sub s start (i - start) :: acc)
110
+
else List.rev acc
111
+
else if is_whitespace s.[i] then
112
+
if start < i then
113
+
split (String.sub s start (i - start) :: acc) (i + 1) (i + 1)
114
+
else split acc (i + 1) (i + 1)
115
+
else split acc start (i + 1)
116
+
in
117
+
split [] 0 0
118
+
119
+
(** Check if string starts with prefix *)
120
+
let starts_with s prefix =
121
+
String.length s >= String.length prefix
122
+
&& String.sub s 0 (String.length prefix) = prefix
123
+
124
+
(** Validate detail tokens *)
125
+
let check_tokens tokens =
126
+
let tokens = ref tokens in
127
+
let is_contact_details = ref false in
128
+
129
+
(* Check for section-* *)
130
+
(match !tokens with
131
+
| token :: rest when starts_with token "section-" ->
132
+
tokens := rest
133
+
| _ -> ());
134
+
135
+
(* Check for shipping/billing *)
136
+
(match !tokens with
137
+
| "shipping" :: rest | "billing" :: rest ->
138
+
tokens := rest
139
+
| _ -> ());
140
+
141
+
(* Check for contact type *)
142
+
(match !tokens with
143
+
| token :: rest when List.mem token contact_types ->
144
+
tokens := rest;
145
+
is_contact_details := true
146
+
| _ -> ());
147
+
148
+
(* Process remaining tokens *)
149
+
let process_field_tokens = function
150
+
| [] -> Error "A list of autofill details tokens must contain an autofill field name"
151
+
| [ "webauthn" ] ->
152
+
Error
153
+
"The token \"webauthn\" must not be the only token in a list of \
154
+
autofill detail tokens"
155
+
| [ field_name ] ->
156
+
if not (List.mem field_name all_field_names) then
157
+
Error
158
+
(Printf.sprintf
159
+
"The string \"%s\" is not a valid autofill field name"
160
+
field_name)
161
+
else if !is_contact_details && not (List.mem field_name contact_field_names)
162
+
then
163
+
Error
164
+
(Printf.sprintf
165
+
"The autofill field name \"%s\" is not allowed in contact \
166
+
context"
167
+
field_name)
168
+
else Ok ()
169
+
| [ field_name; "webauthn" ] ->
170
+
if not (List.mem field_name all_field_names) then
171
+
Error
172
+
(Printf.sprintf
173
+
"The string \"%s\" is not a valid autofill field name"
174
+
field_name)
175
+
else if !is_contact_details && not (List.mem field_name contact_field_names)
176
+
then
177
+
Error
178
+
(Printf.sprintf
179
+
"The autofill field name \"%s\" is not allowed in contact \
180
+
context"
181
+
field_name)
182
+
else Ok ()
183
+
| token :: _ when List.mem token contact_types ->
184
+
Error
185
+
(Printf.sprintf
186
+
"The token \"%s\" must only appear before any autofill field names"
187
+
token)
188
+
| token :: _ when starts_with token "section-" ->
189
+
Error
190
+
"A \"section-*\" indicator must only appear as the first token in a \
191
+
list of autofill detail tokens"
192
+
| "shipping" :: _ | "billing" :: _ as toks ->
193
+
Error
194
+
(Printf.sprintf
195
+
"The token \"%s\" must only appear as either the first token in a \
196
+
list of autofill detail tokens, or, if the first token is a \
197
+
\"section-*\" indicator, as the second token"
198
+
(List.hd toks))
199
+
| _ :: "webauthn" :: _ :: _ ->
200
+
Error
201
+
"The token \"webauthn\" must only appear as the very last token in a \
202
+
list of autofill detail tokens"
203
+
| _ :: _ :: _ ->
204
+
Error
205
+
"A list of autofill details tokens must not contain more than one \
206
+
autofill field name"
207
+
in
208
+
process_field_tokens !tokens
209
+
210
+
(** Validate autocomplete value *)
211
+
let validate_autocomplete s =
212
+
let trimmed = trim_whitespace s in
213
+
if String.length trimmed = 0 then Error "Must not be empty"
214
+
else if trimmed = "on" || trimmed = "off" then Ok ()
215
+
else
216
+
let tokens = split_on_whitespace trimmed in
217
+
check_tokens tokens
218
+
219
+
module Autocomplete = struct
220
+
let name = "autocomplete"
221
+
let validate = validate_autocomplete
222
+
223
+
let is_valid s =
224
+
match validate s with
225
+
| Ok () -> true
226
+
| Error _ -> false
227
+
end
228
+
229
+
let datatypes = [ (module Autocomplete : Datatype.S) ]
+41
lib/html5_checker/datatype/dt_autocomplete.mli
+41
lib/html5_checker/datatype/dt_autocomplete.mli
···
1
+
(** Autocomplete attribute datatype validator.
2
+
3
+
This module provides a validator for the autocomplete attribute used on
4
+
form fields, as defined by the HTML5 specification. *)
5
+
6
+
(** Autocomplete attribute validator.
7
+
8
+
Validates autocomplete attribute values which can be:
9
+
- "on" or "off" (simple values)
10
+
- Autofill detail tokens in the format:
11
+
[section-*] [shipping|billing] [contact-type] field-name [webauthn]
12
+
13
+
Contact types: home, work, mobile, fax, pager
14
+
15
+
Field names include:
16
+
- Name fields: name, honorific-prefix, given-name, additional-name,
17
+
family-name, honorific-suffix, nickname, organization-title
18
+
- Authentication: username, new-password, current-password, one-time-code
19
+
- Organization: organization
20
+
- Address: street-address, address-line1, address-line2, address-line3,
21
+
address-level1, address-level2, address-level3, address-level4,
22
+
country, country-name, postal-code
23
+
- Credit card: cc-name, cc-given-name, cc-additional-name, cc-family-name,
24
+
cc-number, cc-exp, cc-exp-month, cc-exp-year, cc-csc, cc-type
25
+
- Transaction: transaction-currency, transaction-amount
26
+
- Other: language, bday, bday-day, bday-month, bday-year, sex, url, photo
27
+
- Contact: tel, tel-country-code, tel-national, tel-area-code, tel-local,
28
+
tel-local-prefix, tel-local-suffix, tel-extension, email, impp
29
+
30
+
Examples:
31
+
- "on"
32
+
- "off"
33
+
- "name"
34
+
- "email"
35
+
- "shipping street-address"
36
+
- "section-blue billing email"
37
+
- "work tel" *)
38
+
module Autocomplete : Datatype.S
39
+
40
+
(** List of all datatypes defined in this module *)
41
+
val datatypes : Datatype.t list
+38
lib/html5_checker/datatype/dt_boolean.ml
+38
lib/html5_checker/datatype/dt_boolean.ml
···
1
+
(** Boolean attribute validation for HTML5 *)
2
+
module Boolean = struct
3
+
let name = "boolean"
4
+
5
+
let validate s =
6
+
match s with
7
+
| "" | "true" | "false" -> Ok ()
8
+
| _ ->
9
+
Error
10
+
(Printf.sprintf
11
+
"The value '%s' is not a valid boolean. Expected empty string, \
12
+
'true', or 'false'."
13
+
s)
14
+
15
+
let is_valid s = Result.is_ok (validate s)
16
+
17
+
let with_name attr_name =
18
+
let module M = struct
19
+
let name = "boolean"
20
+
21
+
let validate s =
22
+
match s with
23
+
| "" | "true" | "false" -> Ok ()
24
+
| _ ->
25
+
let s_lower = Datatype.string_to_ascii_lowercase s in
26
+
let attr_lower = Datatype.string_to_ascii_lowercase attr_name in
27
+
if s_lower = attr_lower then Ok ()
28
+
else
29
+
Error
30
+
(Printf.sprintf
31
+
"The value '%s' is not a valid boolean. Expected empty \
32
+
string, 'true', 'false', or '%s'."
33
+
s attr_name)
34
+
35
+
let is_valid s = Result.is_ok (validate s)
36
+
end in
37
+
(module M : Datatype.S)
38
+
end
+19
lib/html5_checker/datatype/dt_boolean.mli
+19
lib/html5_checker/datatype/dt_boolean.mli
···
1
+
(** Boolean attribute datatype validator for HTML5 *)
2
+
3
+
(** Boolean attribute validation.
4
+
5
+
In HTML5, boolean attributes can have the following values:
6
+
- Empty string
7
+
- "true"
8
+
- "false"
9
+
- The attribute name itself (case-insensitive)
10
+
11
+
For attribute-name validation, use [Boolean.with_name]. *)
12
+
module Boolean : sig
13
+
include Datatype.S
14
+
15
+
(** Create a boolean validator that also accepts a specific attribute name.
16
+
For example, [with_name "disabled"] will accept "", "true", "false", or
17
+
"disabled" (case-insensitive). *)
18
+
val with_name : string -> (module Datatype.S)
19
+
end
+136
lib/html5_checker/datatype/dt_charset.ml
+136
lib/html5_checker/datatype/dt_charset.ml
···
1
+
(** Helper functions for charset validation *)
2
+
3
+
let is_valid_charset_char c =
4
+
(c >= '0' && c <= '9') ||
5
+
(c >= 'a' && c <= 'z') ||
6
+
(c >= 'A' && c <= 'Z') ||
7
+
c = '-' || c = '!' || c = '#' || c = '$' || c = '%' || c = '&' ||
8
+
c = '\'' || c = '+' || c = '_' || c = '`' || c = '{' || c = '}' ||
9
+
c = '~' || c = '^'
10
+
11
+
let to_lower s = String.lowercase_ascii s
12
+
13
+
(** Common encoding labels recognized by WHATWG Encoding Standard.
14
+
This is a subset of the full list. *)
15
+
let known_charsets = [
16
+
(* UTF-8 *)
17
+
"utf-8"; "utf8"; "unicode-1-1-utf-8";
18
+
(* Legacy single-byte encodings *)
19
+
"iso-8859-1"; "iso8859-1"; "latin1"; "iso-8859-2"; "iso-8859-3";
20
+
"iso-8859-4"; "iso-8859-5"; "iso-8859-6"; "iso-8859-7"; "iso-8859-8";
21
+
"iso-8859-9"; "iso-8859-10"; "iso-8859-13"; "iso-8859-14"; "iso-8859-15";
22
+
"iso-8859-16";
23
+
(* Windows code pages *)
24
+
"windows-1250"; "windows-1251"; "windows-1252"; "windows-1253";
25
+
"windows-1254"; "windows-1255"; "windows-1256"; "windows-1257";
26
+
"windows-1258";
27
+
(* Other common encodings *)
28
+
"us-ascii"; "ascii"; "utf-16"; "utf-16le"; "utf-16be";
29
+
"gb2312"; "gbk"; "gb18030"; "big5"; "euc-jp"; "iso-2022-jp";
30
+
"shift_jis"; "euc-kr"; "koi8-r"; "koi8-u";
31
+
(* Macintosh encodings *)
32
+
"macintosh"; "x-mac-roman";
33
+
]
34
+
35
+
(** Check if a charset name is recognized *)
36
+
let is_known_charset name =
37
+
let lower = to_lower name in
38
+
List.mem lower known_charsets
39
+
40
+
module Charset = struct
41
+
let name = "encoding name"
42
+
43
+
let validate s =
44
+
if String.length s = 0 then
45
+
Error "The empty string is not a valid character encoding name"
46
+
else
47
+
(* Check all characters are valid *)
48
+
let rec check_chars i =
49
+
if i >= String.length s then
50
+
Ok ()
51
+
else
52
+
let c = s.[i] in
53
+
if not (is_valid_charset_char c) then
54
+
Error (Printf.sprintf "Value contained '%c', which is not a valid character in an encoding name" c)
55
+
else
56
+
check_chars (i + 1)
57
+
in
58
+
match check_chars 0 with
59
+
| Error e -> Error e
60
+
| Ok () ->
61
+
let lower = to_lower s in
62
+
(* Reject "replacement" encoding *)
63
+
if lower = "replacement" then
64
+
Error (Printf.sprintf "'%s' is not a valid character encoding name" s)
65
+
(* Check if it's a known charset *)
66
+
else if not (is_known_charset lower) then
67
+
Error (Printf.sprintf "'%s' is not a valid character encoding name" s)
68
+
else
69
+
Ok ()
70
+
71
+
let is_valid s = Result.is_ok (validate s)
72
+
end
73
+
74
+
module Meta_charset = struct
75
+
let name = "legacy character encoding declaration"
76
+
77
+
let is_whitespace c =
78
+
c = ' ' || c = '\t' || c = '\n' || c = '\012' || c = '\r'
79
+
80
+
let validate s =
81
+
let lower = to_lower s in
82
+
if not (String.starts_with ~prefix:"text/html;" lower) then
83
+
Error "The legacy encoding declaration did not start with 'text/html;'"
84
+
else if String.length lower = 10 then
85
+
Error "The legacy encoding declaration ended prematurely"
86
+
else
87
+
(* Skip whitespace after semicolon *)
88
+
let rec skip_ws i =
89
+
if i >= String.length lower then
90
+
Error "The legacy encoding declaration did not contain 'charset=' after the semicolon"
91
+
else
92
+
let c = lower.[i] in
93
+
if is_whitespace c then
94
+
skip_ws (i + 1)
95
+
else if c = 'c' then
96
+
Ok i
97
+
else
98
+
Error (Printf.sprintf "The legacy encoding declaration did not start with space characters or 'charset=' after the semicolon. Found '%c' instead" c)
99
+
in
100
+
match skip_ws 10 with
101
+
| Error e -> Error e
102
+
| Ok offset ->
103
+
if not (String.sub lower offset (String.length lower - offset) |> String.starts_with ~prefix:"charset=") then
104
+
Error "The legacy encoding declaration did not contain 'charset=' after the semicolon"
105
+
else
106
+
let charset_offset = offset + 8 in
107
+
if charset_offset >= String.length lower then
108
+
Error "The empty string is not a valid character encoding name"
109
+
else
110
+
(* Validate remaining characters *)
111
+
let rec check_chars i =
112
+
if i >= String.length lower then
113
+
Ok ()
114
+
else
115
+
let c = lower.[i] in
116
+
if not (is_valid_charset_char c) then
117
+
Error (Printf.sprintf "The legacy encoding contained '%c', which is not a valid character in an encoding name" c)
118
+
else
119
+
check_chars (i + 1)
120
+
in
121
+
match check_chars charset_offset with
122
+
| Error e -> Error e
123
+
| Ok () ->
124
+
let encoding_name = String.sub lower charset_offset (String.length lower - charset_offset) in
125
+
if encoding_name <> "utf-8" then
126
+
Error "'charset=' must be followed by 'utf-8'"
127
+
else
128
+
Ok ()
129
+
130
+
let is_valid s = Result.is_ok (validate s)
131
+
end
132
+
133
+
let datatypes = [
134
+
(module Charset : Datatype.S);
135
+
(module Meta_charset : Datatype.S);
136
+
]
+37
lib/html5_checker/datatype/dt_charset.mli
+37
lib/html5_checker/datatype/dt_charset.mli
···
1
+
(** Character encoding datatype validators for HTML5.
2
+
3
+
This module provides validators for character encoding names as used in
4
+
HTML5. Encoding names must conform to the WHATWG Encoding Standard. *)
5
+
6
+
(** Character encoding name datatype.
7
+
8
+
Validates a character encoding name according to the WHATWG Encoding
9
+
Standard. Valid encoding names include:
10
+
- UTF-8 (and variants like "utf-8", "utf8")
11
+
- Legacy encodings (ISO-8859-1, windows-1252, etc.)
12
+
13
+
The validator checks:
14
+
- Non-empty string
15
+
- Valid characters (alphanumeric, hyphen, and special chars: ! # $ % & ' + _ ` \{ \} ~ ^)
16
+
- Recognizes common encoding labels
17
+
18
+
Note: This is a simplified validator that recognizes common encoding
19
+
names but does not include the full WHATWG encoding label table.
20
+
It accepts labels case-insensitively. *)
21
+
module Charset : Datatype.S
22
+
23
+
(** Meta charset datatype for legacy encoding declarations.
24
+
25
+
Validates the charset attribute value in legacy meta elements of the form:
26
+
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
27
+
28
+
The validator checks that:
29
+
- String starts with "text/html;"
30
+
- After optional whitespace, contains "charset="
31
+
- The charset value is "utf-8" (the only allowed value in modern HTML5)
32
+
33
+
Example valid value: "text/html; charset=utf-8" *)
34
+
module Meta_charset : Datatype.S
35
+
36
+
(** List of all charset datatypes *)
37
+
val datatypes : Datatype.t list
+243
lib/html5_checker/datatype/dt_color.ml
+243
lib/html5_checker/datatype/dt_color.ml
···
1
+
(** Color validation *)
2
+
3
+
(** Named CSS colors *)
4
+
let named_colors =
5
+
[
6
+
"aliceblue";
7
+
"antiquewhite";
8
+
"aqua";
9
+
"aquamarine";
10
+
"azure";
11
+
"beige";
12
+
"bisque";
13
+
"black";
14
+
"blanchedalmond";
15
+
"blue";
16
+
"blueviolet";
17
+
"brown";
18
+
"burlywood";
19
+
"cadetblue";
20
+
"chartreuse";
21
+
"chocolate";
22
+
"coral";
23
+
"cornflowerblue";
24
+
"cornsilk";
25
+
"crimson";
26
+
"cyan";
27
+
"darkblue";
28
+
"darkcyan";
29
+
"darkgoldenrod";
30
+
"darkgray";
31
+
"darkgrey";
32
+
"darkgreen";
33
+
"darkkhaki";
34
+
"darkmagenta";
35
+
"darkolivegreen";
36
+
"darkorange";
37
+
"darkorchid";
38
+
"darkred";
39
+
"darksalmon";
40
+
"darkseagreen";
41
+
"darkslateblue";
42
+
"darkslategray";
43
+
"darkslategrey";
44
+
"darkturquoise";
45
+
"darkviolet";
46
+
"deeppink";
47
+
"deepskyblue";
48
+
"dimgray";
49
+
"dimgrey";
50
+
"dodgerblue";
51
+
"firebrick";
52
+
"floralwhite";
53
+
"forestgreen";
54
+
"fuchsia";
55
+
"gainsboro";
56
+
"ghostwhite";
57
+
"gold";
58
+
"goldenrod";
59
+
"gray";
60
+
"grey";
61
+
"green";
62
+
"greenyellow";
63
+
"honeydew";
64
+
"hotpink";
65
+
"indianred";
66
+
"indigo";
67
+
"ivory";
68
+
"khaki";
69
+
"lavender";
70
+
"lavenderblush";
71
+
"lawngreen";
72
+
"lemonchiffon";
73
+
"lightblue";
74
+
"lightcoral";
75
+
"lightcyan";
76
+
"lightgoldenrodyellow";
77
+
"lightgray";
78
+
"lightgrey";
79
+
"lightgreen";
80
+
"lightpink";
81
+
"lightsalmon";
82
+
"lightseagreen";
83
+
"lightskyblue";
84
+
"lightslategray";
85
+
"lightslategrey";
86
+
"lightsteelblue";
87
+
"lightyellow";
88
+
"lime";
89
+
"limegreen";
90
+
"linen";
91
+
"magenta";
92
+
"maroon";
93
+
"mediumaquamarine";
94
+
"mediumblue";
95
+
"mediumorchid";
96
+
"mediumpurple";
97
+
"mediumseagreen";
98
+
"mediumslateblue";
99
+
"mediumspringgreen";
100
+
"mediumturquoise";
101
+
"mediumvioletred";
102
+
"midnightblue";
103
+
"mintcream";
104
+
"mistyrose";
105
+
"moccasin";
106
+
"navajowhite";
107
+
"navy";
108
+
"oldlace";
109
+
"olive";
110
+
"olivedrab";
111
+
"orange";
112
+
"orangered";
113
+
"orchid";
114
+
"palegoldenrod";
115
+
"palegreen";
116
+
"paleturquoise";
117
+
"palevioletred";
118
+
"papayawhip";
119
+
"peachpuff";
120
+
"peru";
121
+
"pink";
122
+
"plum";
123
+
"powderblue";
124
+
"purple";
125
+
"red";
126
+
"rosybrown";
127
+
"royalblue";
128
+
"saddlebrown";
129
+
"salmon";
130
+
"sandybrown";
131
+
"seagreen";
132
+
"seashell";
133
+
"sienna";
134
+
"silver";
135
+
"skyblue";
136
+
"slateblue";
137
+
"slategray";
138
+
"slategrey";
139
+
"snow";
140
+
"springgreen";
141
+
"steelblue";
142
+
"tan";
143
+
"teal";
144
+
"thistle";
145
+
"tomato";
146
+
"transparent";
147
+
"turquoise";
148
+
"violet";
149
+
"wheat";
150
+
"white";
151
+
"whitesmoke";
152
+
"yellow";
153
+
"yellowgreen";
154
+
]
155
+
156
+
(** Check if character is hex digit *)
157
+
let is_hex_digit c =
158
+
(c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F')
159
+
160
+
(** Validate hex color (#RGB or #RRGGBB) *)
161
+
let validate_hex_color s =
162
+
let len = String.length s in
163
+
if len <> 4 && len <> 7 then
164
+
Error "Hex color must be #RGB or #RRGGBB format"
165
+
else if s.[0] <> '#' then
166
+
Error "Hex color must start with '#'"
167
+
else
168
+
let rec check_hex i =
169
+
if i >= len then Ok ()
170
+
else if is_hex_digit s.[i] then check_hex (i + 1)
171
+
else
172
+
Error
173
+
(Printf.sprintf "Invalid hex digit '%c' at position %d" s.[i] i)
174
+
in
175
+
check_hex 1
176
+
177
+
(** Simple color validator - strict #RRGGBB format *)
178
+
module Simple_color = struct
179
+
let name = "simple color"
180
+
181
+
let validate s =
182
+
let s = String.trim s in
183
+
if String.length s <> 7 then
184
+
Error "Incorrect length for color string (must be 7 characters)"
185
+
else if s.[0] <> '#' then
186
+
Error
187
+
(Printf.sprintf
188
+
"Color starts with incorrect character '%c'. Expected the number \
189
+
sign '#'"
190
+
s.[0])
191
+
else
192
+
let rec check_hex i =
193
+
if i >= 7 then Ok ()
194
+
else if is_hex_digit s.[i] then check_hex (i + 1)
195
+
else
196
+
Error
197
+
(Printf.sprintf "'%c' is not a valid hexadecimal digit" s.[i])
198
+
in
199
+
check_hex 1
200
+
201
+
let is_valid s =
202
+
match validate s with
203
+
| Ok () -> true
204
+
| Error _ -> false
205
+
end
206
+
207
+
(** CSS color validator - supports multiple formats *)
208
+
module Color = struct
209
+
let name = "color"
210
+
211
+
let validate s =
212
+
let s = String.trim s |> String.lowercase_ascii in
213
+
if String.length s = 0 then Error "Color value must not be empty"
214
+
else if List.mem s named_colors then Ok ()
215
+
else if String.length s > 0 && s.[0] = '#' then validate_hex_color s
216
+
else if
217
+
String.length s > 4
218
+
&& (String.sub s 0 4 = "rgb(" || String.sub s 0 5 = "rgba(")
219
+
then
220
+
(* Basic validation for rgb/rgba - just check balanced parens *)
221
+
if s.[String.length s - 1] = ')' then Ok ()
222
+
else Error "rgb/rgba function must end with ')'"
223
+
else if
224
+
String.length s > 4
225
+
&& (String.sub s 0 4 = "hsl(" || String.sub s 0 5 = "hsla(")
226
+
then
227
+
(* Basic validation for hsl/hsla - just check balanced parens *)
228
+
if s.[String.length s - 1] = ')' then Ok ()
229
+
else Error "hsl/hsla function must end with ')'"
230
+
else
231
+
Error
232
+
(Printf.sprintf
233
+
"Unrecognized color format '%s' (expected named color, hex, rgb(), \
234
+
rgba(), hsl(), or hsla())"
235
+
s)
236
+
237
+
let is_valid s =
238
+
match validate s with
239
+
| Ok () -> true
240
+
| Error _ -> false
241
+
end
242
+
243
+
let datatypes = [ (module Color : Datatype.S); (module Simple_color : Datatype.S) ]
+29
lib/html5_checker/datatype/dt_color.mli
+29
lib/html5_checker/datatype/dt_color.mli
···
1
+
(** Color value datatype validators.
2
+
3
+
This module provides validators for CSS color values and simple colors
4
+
as defined by the HTML5 specification. *)
5
+
6
+
(** CSS color value validator.
7
+
8
+
Validates various CSS color formats:
9
+
- Named colors (e.g., "red", "blue", "transparent")
10
+
- Hex colors: #RGB or #RRGGBB
11
+
- rgb() and rgba() functional notation
12
+
- hsl() and hsla() functional notation
13
+
14
+
This is a simplified validator and does not validate all edge cases. *)
15
+
module Color : Datatype.S
16
+
17
+
(** Simple color validator (for input[type=color]).
18
+
19
+
Validates the simple color format:
20
+
- Must be exactly 7 characters
21
+
- Must start with '#'
22
+
- Followed by exactly 6 hexadecimal digits (0-9, a-f, A-F)
23
+
- Format: #RRGGBB
24
+
25
+
This is the strict format required for input[type=color]. *)
26
+
module Simple_color : Datatype.S
27
+
28
+
(** List of all datatypes defined in this module *)
29
+
val datatypes : Datatype.t list
+20
lib/html5_checker/datatype/dt_contenteditable.ml
+20
lib/html5_checker/datatype/dt_contenteditable.ml
···
1
+
(** Contenteditable attribute validation for HTML5 *)
2
+
3
+
module Contenteditable = struct
4
+
let name = "contenteditable"
5
+
6
+
let validate s =
7
+
let s_lower = Datatype.string_to_ascii_lowercase s in
8
+
match s_lower with
9
+
| "" | "true" | "false" | "plaintext-only" -> Ok ()
10
+
| _ ->
11
+
Error
12
+
(Printf.sprintf
13
+
"The value '%s' is not a valid contenteditable value. Expected \
14
+
'true', 'false', 'plaintext-only', or empty string."
15
+
s)
16
+
17
+
let is_valid s = Result.is_ok (validate s)
18
+
end
19
+
20
+
let datatypes = [ (module Contenteditable : Datatype.S) ]
+25
lib/html5_checker/datatype/dt_contenteditable.mli
+25
lib/html5_checker/datatype/dt_contenteditable.mli
···
1
+
(** Contenteditable attribute datatype validator for HTML5.
2
+
3
+
This module provides a validator for the contenteditable attribute, as
4
+
defined by the HTML5 specification. *)
5
+
6
+
(** Contenteditable attribute validator.
7
+
8
+
Validates contenteditable attribute values which can be:
9
+
- "true" - the element is editable
10
+
- "false" - the element is not editable
11
+
- "" (empty string) - equivalent to "true"
12
+
- "plaintext-only" - the element is editable, but rich text formatting is
13
+
disabled
14
+
15
+
Values are case-insensitive.
16
+
17
+
Examples:
18
+
- "true"
19
+
- "false"
20
+
- ""
21
+
- "plaintext-only" *)
22
+
module Contenteditable : Datatype.S
23
+
24
+
(** List of all datatypes defined in this module *)
25
+
val datatypes : Datatype.t list
+45
lib/html5_checker/datatype/dt_coords.ml
+45
lib/html5_checker/datatype/dt_coords.ml
···
1
+
(** Coordinates attribute validation for HTML5 *)
2
+
3
+
module Coords = struct
4
+
let name = "coords"
5
+
6
+
let validate s =
7
+
(* Empty string is valid for default shape *)
8
+
if s = "" then Ok ()
9
+
else
10
+
(* Split on comma and validate each part is an integer *)
11
+
let parts =
12
+
String.split_on_char ',' s
13
+
|> List.map (fun p ->
14
+
let trimmed = String.trim p in
15
+
(* Check if it's a valid integer *)
16
+
try
17
+
let _ = int_of_string trimmed in
18
+
Ok ()
19
+
with Failure _ ->
20
+
Error (Printf.sprintf "The value '%s' is not a valid integer" p))
21
+
in
22
+
(* Check if all parts are valid *)
23
+
let rec check = function
24
+
| [] -> Ok ()
25
+
| Ok () :: rest -> check rest
26
+
| (Error msg) :: _ -> Error msg
27
+
in
28
+
match check parts with
29
+
| Error msg ->
30
+
Error
31
+
(Printf.sprintf
32
+
"The coords value '%s' is not valid. %s. Expected a \
33
+
comma-separated list of integers."
34
+
s msg)
35
+
| Ok () -> (
36
+
(* Verify we have at least some values *)
37
+
let count = List.length parts in
38
+
match count with
39
+
| 0 -> Error "The coords value must not be empty unless for default shape"
40
+
| _ -> Ok ())
41
+
42
+
let is_valid s = Result.is_ok (validate s)
43
+
end
44
+
45
+
let datatypes = [ (module Coords : Datatype.S) ]
+25
lib/html5_checker/datatype/dt_coords.mli
+25
lib/html5_checker/datatype/dt_coords.mli
···
1
+
(** Coordinates attribute datatype validator for HTML5.
2
+
3
+
This module provides a validator for the coords attribute used on area
4
+
elements within image maps, as defined by the HTML5 specification. *)
5
+
6
+
(** Coordinates attribute validator.
7
+
8
+
Validates coords attribute values which must be a comma-separated list of
9
+
valid integers. The number of values depends on the shape:
10
+
- rect: exactly 4 values (x1,y1,x2,y2)
11
+
- circle: exactly 3 values (x,y,radius)
12
+
- poly: even number of values >= 6 (x1,y1,x2,y2,x3,y3,...)
13
+
- default: should be empty or not present
14
+
15
+
Note: This validator only checks that the value is a valid comma-separated
16
+
list of integers. Shape-specific validation should be done at a higher level.
17
+
18
+
Examples:
19
+
- "0,0,10,10" (rect)
20
+
- "50,50,25" (circle)
21
+
- "0,0,50,0,50,50,0,50" (poly) *)
22
+
module Coords : Datatype.S
23
+
24
+
(** List of all datatypes defined in this module *)
25
+
val datatypes : Datatype.t list
+20
lib/html5_checker/datatype/dt_crossorigin.ml
+20
lib/html5_checker/datatype/dt_crossorigin.ml
···
1
+
(** CORS crossorigin attribute validation for HTML5 *)
2
+
3
+
module Crossorigin = struct
4
+
let name = "crossorigin"
5
+
6
+
let validate s =
7
+
let s_lower = Datatype.string_to_ascii_lowercase s in
8
+
match s_lower with
9
+
| "" | "anonymous" | "use-credentials" -> Ok ()
10
+
| _ ->
11
+
Error
12
+
(Printf.sprintf
13
+
"The value '%s' is not a valid crossorigin value. Expected \
14
+
empty string, 'anonymous', or 'use-credentials'."
15
+
s)
16
+
17
+
let is_valid s = Result.is_ok (validate s)
18
+
end
19
+
20
+
let datatypes = [ (module Crossorigin : Datatype.S) ]
+23
lib/html5_checker/datatype/dt_crossorigin.mli
+23
lib/html5_checker/datatype/dt_crossorigin.mli
···
1
+
(** CORS crossorigin attribute datatype validator.
2
+
3
+
This module provides a validator for the crossorigin attribute used on
4
+
script, link, img, audio, video elements, as defined by the HTML5 spec. *)
5
+
6
+
(** Crossorigin attribute validator.
7
+
8
+
Validates crossorigin attribute values which can be:
9
+
- "" (empty string, equivalent to anonymous)
10
+
- "anonymous" (requests use CORS without credentials)
11
+
- "use-credentials" (requests use CORS with credentials)
12
+
13
+
Values are case-insensitive after ASCII lowercasing.
14
+
15
+
Examples:
16
+
- ""
17
+
- "anonymous"
18
+
- "use-credentials"
19
+
- "Anonymous" (equivalent to "anonymous") *)
20
+
module Crossorigin : Datatype.S
21
+
22
+
(** List of all datatypes defined in this module *)
23
+
val datatypes : Datatype.t list
+235
lib/html5_checker/datatype/dt_datetime.ml
+235
lib/html5_checker/datatype/dt_datetime.ml
···
1
+
(** Helper functions for datetime validation *)
2
+
3
+
let is_digit c = c >= '0' && c <= '9'
4
+
5
+
let is_all_digits s =
6
+
String.for_all is_digit s
7
+
8
+
let parse_int s =
9
+
try Some (int_of_string s)
10
+
with Failure _ -> None
11
+
12
+
(** Days in each month (non-leap year) *)
13
+
let days_in_month = [| 31; 28; 31; 30; 31; 30; 31; 31; 30; 31; 30; 31 |]
14
+
15
+
(** Check if a year is a leap year *)
16
+
let is_leap_year year =
17
+
(year mod 400 = 0) || (year mod 4 = 0 && year mod 100 <> 0)
18
+
19
+
(** Get maximum day for a given month/year *)
20
+
let max_day_for_month year month =
21
+
if month = 2 && is_leap_year year then 29
22
+
else days_in_month.(month - 1)
23
+
24
+
(** Years in the 400-year cycle that have 53 weeks *)
25
+
let special_years_mod_400 = [|
26
+
4; 9; 15; 20; 26; 32; 37; 43; 48; 54; 60; 65; 71; 76; 82; 88; 93; 99;
27
+
105; 111; 116; 122; 128; 133; 139; 144; 150; 156; 161; 167; 172; 178;
28
+
184; 189; 195; 201; 207; 212; 218; 224; 229; 235; 240; 246; 252; 257;
29
+
263; 268; 274; 280; 285; 291; 296; 303; 308; 314; 320; 325; 331; 336;
30
+
342; 348; 353; 359; 364; 370; 376; 381; 387; 392; 398
31
+
|]
32
+
33
+
(** Check if a year has 53 weeks *)
34
+
let has_53_weeks year =
35
+
let year_mod = year mod 400 in
36
+
Array.exists (fun y -> y = year_mod) special_years_mod_400
37
+
38
+
module Year = struct
39
+
let name = "year"
40
+
41
+
let validate s =
42
+
let len = String.length s in
43
+
if len < 4 then
44
+
Error "Year must be at least 4 digits"
45
+
else if not (is_all_digits s) then
46
+
Error "Year must contain only digits"
47
+
else
48
+
match parse_int s with
49
+
| None -> Error "Year value out of range"
50
+
| Some year ->
51
+
if year < 1 then
52
+
Error "Year cannot be less than 1"
53
+
else
54
+
Ok ()
55
+
56
+
let is_valid s = Result.is_ok (validate s)
57
+
end
58
+
59
+
module Month = struct
60
+
let name = "month"
61
+
62
+
let validate s =
63
+
if String.length s < 7 then
64
+
Error "Month must be in YYYY-MM format"
65
+
else
66
+
let pattern = Str.regexp "^\\([0-9]+\\)-\\([0-9][0-9]\\)$" in
67
+
if not (Str.string_match pattern s 0) then
68
+
Error "Month must be in YYYY-MM format"
69
+
else
70
+
let year_s = Str.matched_group 1 s in
71
+
let month_s = Str.matched_group 2 s in
72
+
match (parse_int year_s, parse_int month_s) with
73
+
| None, _ | _, None -> Error "Year or month out of range"
74
+
| Some year, Some month ->
75
+
if year < 1 then
76
+
Error "Year cannot be less than 1"
77
+
else if month < 1 then
78
+
Error "Month cannot be less than 1"
79
+
else if month > 12 then
80
+
Error "Month cannot be greater than 12"
81
+
else
82
+
Ok ()
83
+
84
+
let is_valid s = Result.is_ok (validate s)
85
+
end
86
+
87
+
module Week = struct
88
+
let name = "week"
89
+
90
+
let validate s =
91
+
let pattern = Str.regexp "^\\([0-9]+\\)-W\\([0-9][0-9]\\)$" in
92
+
if not (Str.string_match pattern s 0) then
93
+
Error "Week must be in YYYY-Www format"
94
+
else
95
+
let year_s = Str.matched_group 1 s in
96
+
let week_s = Str.matched_group 2 s in
97
+
match (parse_int year_s, parse_int week_s) with
98
+
| None, _ | _, None -> Error "Year or week out of range"
99
+
| Some year, Some week ->
100
+
if year < 1 then
101
+
Error "Year cannot be less than 1"
102
+
else if week < 1 then
103
+
Error "Week cannot be less than 1"
104
+
else if week > 53 then
105
+
Error "Week out of range"
106
+
else if week = 53 && not (has_53_weeks year) then
107
+
Error "Week out of range"
108
+
else
109
+
Ok ()
110
+
111
+
let is_valid s = Result.is_ok (validate s)
112
+
end
113
+
114
+
module Date = struct
115
+
let name = "date"
116
+
117
+
let validate s =
118
+
let pattern = Str.regexp "^\\([0-9]+\\)-\\([0-9][0-9]\\)-\\([0-9][0-9]\\)$" in
119
+
if not (Str.string_match pattern s 0) then
120
+
Error "Date must be in YYYY-MM-DD format"
121
+
else
122
+
let year_s = Str.matched_group 1 s in
123
+
let month_s = Str.matched_group 2 s in
124
+
let day_s = Str.matched_group 3 s in
125
+
if String.length year_s < 4 then
126
+
Error "Year must be at least 4 digits"
127
+
else
128
+
match (parse_int year_s, parse_int month_s, parse_int day_s) with
129
+
| None, _, _ | _, None, _ | _, _, None ->
130
+
Error "Year, month, or day out of range"
131
+
| Some year, Some month, Some day ->
132
+
if year < 1 then
133
+
Error "Year cannot be less than 1"
134
+
else if month < 1 then
135
+
Error "Month cannot be less than 1"
136
+
else if month > 12 then
137
+
Error "Month cannot be greater than 12"
138
+
else if day < 1 then
139
+
Error "Day cannot be less than 1"
140
+
else
141
+
let max_day = max_day_for_month year month in
142
+
if day > max_day then
143
+
Error "Day out of range"
144
+
else
145
+
Ok ()
146
+
147
+
let is_valid s = Result.is_ok (validate s)
148
+
end
149
+
150
+
module Time = struct
151
+
let name = "time"
152
+
153
+
let validate s =
154
+
let pattern = Str.regexp "^\\([0-9][0-9]\\):\\([0-9][0-9]\\)\\(:\\([0-9][0-9]\\)\\(\\.\\([0-9]+\\)\\)?\\)?$" in
155
+
if not (Str.string_match pattern s 0) then
156
+
Error "Time must be in HH:MM[:SS[.sss]] format"
157
+
else
158
+
let hour_s = Str.matched_group 1 s in
159
+
let minute_s = Str.matched_group 2 s in
160
+
let second_s = try Some (Str.matched_group 4 s) with Not_found -> None in
161
+
let millis_s = try Some (Str.matched_group 6 s) with Not_found -> None in
162
+
match (parse_int hour_s, parse_int minute_s) with
163
+
| None, _ | _, None -> Error "Hour or minute out of range"
164
+
| Some hour, Some minute ->
165
+
if hour > 23 then
166
+
Error "Hour cannot be greater than 23"
167
+
else if minute > 59 then
168
+
Error "Minute cannot be greater than 59"
169
+
else
170
+
match second_s with
171
+
| None -> Ok ()
172
+
| Some sec_s ->
173
+
match parse_int sec_s with
174
+
| None -> Error "Seconds out of range"
175
+
| Some second ->
176
+
if second > 59 then
177
+
Error "Second cannot be greater than 59"
178
+
else
179
+
match millis_s with
180
+
| None -> Ok ()
181
+
| Some ms ->
182
+
if String.length ms > 3 then
183
+
Error "A fraction of a second must be one, two, or three digits"
184
+
else if not (is_all_digits ms) then
185
+
Error "Invalid milliseconds"
186
+
else
187
+
Ok ()
188
+
189
+
let is_valid s = Result.is_ok (validate s)
190
+
end
191
+
192
+
module Datetime_local = struct
193
+
let name = "local datetime"
194
+
195
+
let validate s =
196
+
let pattern = Str.regexp "^\\(.+\\)[T ]\\(.+\\)$" in
197
+
if not (Str.string_match pattern s 0) then
198
+
Error "Datetime must be in YYYY-MM-DD[T ]HH:MM[:SS[.sss]] format"
199
+
else
200
+
let date_s = Str.matched_group 1 s in
201
+
let time_s = Str.matched_group 2 s in
202
+
match Date.validate date_s with
203
+
| Error e -> Error ("Invalid date: " ^ e)
204
+
| Ok () ->
205
+
match Time.validate time_s with
206
+
| Error e -> Error ("Invalid time: " ^ e)
207
+
| Ok () -> Ok ()
208
+
209
+
let is_valid s = Result.is_ok (validate s)
210
+
end
211
+
212
+
module Datetime = struct
213
+
let name = "datetime"
214
+
215
+
let validate s =
216
+
if not (String.ends_with ~suffix:"Z" s) then
217
+
Error "Global datetime must end with 'Z'"
218
+
else
219
+
let s_without_z = String.sub s 0 (String.length s - 1) in
220
+
match Datetime_local.validate s_without_z with
221
+
| Error e -> Error e
222
+
| Ok () -> Ok ()
223
+
224
+
let is_valid s = Result.is_ok (validate s)
225
+
end
226
+
227
+
let datatypes = [
228
+
(module Year : Datatype.S);
229
+
(module Month : Datatype.S);
230
+
(module Week : Datatype.S);
231
+
(module Date : Datatype.S);
232
+
(module Time : Datatype.S);
233
+
(module Datetime_local : Datatype.S);
234
+
(module Datetime : Datatype.S);
235
+
]
+74
lib/html5_checker/datatype/dt_datetime.mli
+74
lib/html5_checker/datatype/dt_datetime.mli
···
1
+
(** Date and time datatype validators for HTML5.
2
+
3
+
This module provides validators for various HTML5 date and time formats
4
+
as specified in the HTML5 standard. Each validator checks that strings
5
+
conform to the specific format and contain valid values. *)
6
+
7
+
(** Year datatype (YYYY format, minimum 1).
8
+
9
+
Validates a year string consisting of 4 or more ASCII digits.
10
+
The year must be at least 1. *)
11
+
module Year : Datatype.S
12
+
13
+
(** Month datatype (YYYY-MM format).
14
+
15
+
Validates a month string in the format YYYY-MM where:
16
+
- YYYY is a valid year (>= 1)
17
+
- MM is a month from 01 to 12 *)
18
+
module Month : Datatype.S
19
+
20
+
(** Week datatype (YYYY-Www format).
21
+
22
+
Validates a week string in the format YYYY-Www where:
23
+
- YYYY is a valid year (>= 1)
24
+
- W is the literal character 'W'
25
+
- ww is a week number from 01 to 53
26
+
27
+
Week 53 is only valid for years that have 53 weeks in the ISO 8601
28
+
week-numbering calendar. *)
29
+
module Week : Datatype.S
30
+
31
+
(** Date datatype (YYYY-MM-DD format).
32
+
33
+
Validates a date string in the format YYYY-MM-DD where:
34
+
- YYYY is a valid year (>= 1)
35
+
- MM is a month from 01 to 12
36
+
- DD is a day valid for the given month/year
37
+
38
+
Handles leap years correctly. *)
39
+
module Date : Datatype.S
40
+
41
+
(** Time datatype (HH:MM[:SS[.sss]] format).
42
+
43
+
Validates a time string where:
44
+
- HH is hours from 00 to 23
45
+
- MM is minutes from 00 to 59
46
+
- SS (optional) is seconds from 00 to 59
47
+
- sss (optional) is milliseconds (1-3 digits)
48
+
49
+
Valid formats:
50
+
- HH:MM
51
+
- HH:MM:SS
52
+
- HH:MM:SS.s
53
+
- HH:MM:SS.ss
54
+
- HH:MM:SS.sss *)
55
+
module Time : Datatype.S
56
+
57
+
(** Local datetime datatype (YYYY-MM-DD[T ]HH:MM[:SS[.sss]] format).
58
+
59
+
Validates a local datetime string combining date and time with 'T' or
60
+
space separator. The date must be valid and the time must be valid.
61
+
62
+
This format does not include timezone information. *)
63
+
module Datetime_local : Datatype.S
64
+
65
+
(** Global datetime datatype (YYYY-MM-DD[T ]HH:MM[:SS[.sss]]Z format).
66
+
67
+
Validates a global datetime string in UTC (ending with 'Z').
68
+
The date must be valid and the time must be valid.
69
+
70
+
This is the format for datetime values that include timezone (UTC). *)
71
+
module Datetime : Datatype.S
72
+
73
+
(** List of all datetime datatypes *)
74
+
val datatypes : Datatype.t list
+20
lib/html5_checker/datatype/dt_decoding.ml
+20
lib/html5_checker/datatype/dt_decoding.ml
···
1
+
(** Image decoding attribute validation for HTML5 *)
2
+
3
+
module Decoding = struct
4
+
let name = "decoding"
5
+
6
+
let validate s =
7
+
let s_lower = Datatype.string_to_ascii_lowercase s in
8
+
match s_lower with
9
+
| "" | "sync" | "async" | "auto" -> Ok ()
10
+
| _ ->
11
+
Error
12
+
(Printf.sprintf
13
+
"The value '%s' is not a valid decoding value. Expected empty \
14
+
string, 'sync', 'async', or 'auto'."
15
+
s)
16
+
17
+
let is_valid s = Result.is_ok (validate s)
18
+
end
19
+
20
+
let datatypes = [ (module Decoding : Datatype.S) ]
+24
lib/html5_checker/datatype/dt_decoding.mli
+24
lib/html5_checker/datatype/dt_decoding.mli
···
1
+
(** Image decoding attribute datatype validator.
2
+
3
+
This module provides a validator for the decoding attribute used to provide
4
+
a hint for image decoding, as defined by the HTML5 spec. *)
5
+
6
+
(** Decoding attribute validator.
7
+
8
+
Validates decoding attribute values which can be:
9
+
- "" (empty string, default decoding behavior)
10
+
- "sync" (decode synchronously for atomic presentation)
11
+
- "async" (decode asynchronously to avoid delaying other content)
12
+
- "auto" (no preference for decoding mode)
13
+
14
+
Values are case-insensitive after ASCII lowercasing.
15
+
16
+
Examples:
17
+
- ""
18
+
- "sync"
19
+
- "async"
20
+
- "auto" *)
21
+
module Decoding : Datatype.S
22
+
23
+
(** List of all datatypes defined in this module *)
24
+
val datatypes : Datatype.t list
+20
lib/html5_checker/datatype/dt_dir.ml
+20
lib/html5_checker/datatype/dt_dir.ml
···
1
+
(** Text direction attribute validation for HTML5 *)
2
+
3
+
module Dir = struct
4
+
let name = "dir"
5
+
6
+
let validate s =
7
+
let s_lower = Datatype.string_to_ascii_lowercase s in
8
+
match s_lower with
9
+
| "" | "ltr" | "rtl" | "auto" -> Ok ()
10
+
| _ ->
11
+
Error
12
+
(Printf.sprintf
13
+
"The value '%s' is not a valid dir value. Expected empty \
14
+
string, 'ltr', 'rtl', or 'auto'."
15
+
s)
16
+
17
+
let is_valid s = Result.is_ok (validate s)
18
+
end
19
+
20
+
let datatypes = [ (module Dir : Datatype.S) ]
+24
lib/html5_checker/datatype/dt_dir.mli
+24
lib/html5_checker/datatype/dt_dir.mli
···
1
+
(** Text direction attribute datatype validator.
2
+
3
+
This module provides a validator for the dir attribute used to specify
4
+
the text directionality of element content, as defined by the HTML5 spec. *)
5
+
6
+
(** Dir attribute validator.
7
+
8
+
Validates dir attribute values which can be:
9
+
- "" (empty string, inherits directionality)
10
+
- "ltr" (left-to-right text direction)
11
+
- "rtl" (right-to-left text direction)
12
+
- "auto" (directionality determined from content)
13
+
14
+
Values are case-insensitive after ASCII lowercasing.
15
+
16
+
Examples:
17
+
- ""
18
+
- "ltr"
19
+
- "rtl"
20
+
- "auto" *)
21
+
module Dir : Datatype.S
22
+
23
+
(** List of all datatypes defined in this module *)
24
+
val datatypes : Datatype.t list
+20
lib/html5_checker/datatype/dt_draggable.ml
+20
lib/html5_checker/datatype/dt_draggable.ml
···
1
+
(** Draggable attribute validation for HTML5 *)
2
+
3
+
module Draggable = struct
4
+
let name = "draggable"
5
+
6
+
let validate s =
7
+
let s_lower = Datatype.string_to_ascii_lowercase s in
8
+
match s_lower with
9
+
| "true" | "false" -> Ok ()
10
+
| _ ->
11
+
Error
12
+
(Printf.sprintf
13
+
"The value '%s' is not a valid draggable value. Expected 'true' \
14
+
or 'false'."
15
+
s)
16
+
17
+
let is_valid s = Result.is_ok (validate s)
18
+
end
19
+
20
+
let datatypes = [ (module Draggable : Datatype.S) ]
+20
lib/html5_checker/datatype/dt_draggable.mli
+20
lib/html5_checker/datatype/dt_draggable.mli
···
1
+
(** Draggable attribute datatype validator for HTML5.
2
+
3
+
This module provides a validator for the draggable attribute, as defined by
4
+
the HTML5 specification. *)
5
+
6
+
(** Draggable attribute validator.
7
+
8
+
Validates draggable attribute values which can be:
9
+
- "true" - the element is draggable
10
+
- "false" - the element is not draggable
11
+
12
+
Values are case-insensitive.
13
+
14
+
Examples:
15
+
- "true"
16
+
- "false" *)
17
+
module Draggable : Datatype.S
18
+
19
+
(** List of all datatypes defined in this module *)
20
+
val datatypes : Datatype.t list
+88
lib/html5_checker/datatype/dt_email.ml
+88
lib/html5_checker/datatype/dt_email.ml
···
1
+
(** Email address validation *)
2
+
3
+
(** Helper to check if a character is valid in email local/domain parts *)
4
+
let is_email_char c =
5
+
(c >= 'a' && c <= 'z')
6
+
|| (c >= 'A' && c <= 'Z')
7
+
|| (c >= '0' && c <= '9')
8
+
|| c = '.' || c = '-' || c = '_' || c = '+' || c = '='
9
+
10
+
(** Validate a single email address using simplified rules *)
11
+
let validate_email s =
12
+
let s = String.trim s in
13
+
if String.length s = 0 then Error "Email address must not be empty"
14
+
else
15
+
(* Check for exactly one @ symbol *)
16
+
let at_count = ref 0 in
17
+
let at_pos = ref (-1) in
18
+
String.iteri
19
+
(fun i c -> if c = '@' then (
20
+
incr at_count;
21
+
at_pos := i
22
+
))
23
+
s;
24
+
if !at_count = 0 then Error "Email address must contain an '@' character"
25
+
else if !at_count > 1 then
26
+
Error "Email address must contain exactly one '@' character"
27
+
else
28
+
let local = String.sub s 0 !at_pos in
29
+
let domain = String.sub s (!at_pos + 1) (String.length s - !at_pos - 1) in
30
+
31
+
(* Validate local part *)
32
+
if String.length local = 0 then
33
+
Error "Email address must have a local part before '@'"
34
+
else if local.[0] = '.' || local.[String.length local - 1] = '.' then
35
+
Error "Email local part must not start or end with '.'"
36
+
else if not (String.for_all is_email_char local) then
37
+
Error "Email local part contains invalid characters"
38
+
else (* Validate domain part *)
39
+
if String.length domain = 0 then
40
+
Error "Email address must have a domain part after '@'"
41
+
else if not (String.contains domain '.') then
42
+
Error "Email domain must contain at least one '.'"
43
+
else if domain.[0] = '.' || domain.[String.length domain - 1] = '.' then
44
+
Error "Email domain must not start or end with '.'"
45
+
else if
46
+
not
47
+
(String.for_all
48
+
(fun c -> is_email_char c || c = '.')
49
+
domain)
50
+
then Error "Email domain contains invalid characters"
51
+
else Ok ()
52
+
53
+
module Email = struct
54
+
let name = "email address"
55
+
let validate = validate_email
56
+
57
+
let is_valid s =
58
+
match validate s with
59
+
| Ok () -> true
60
+
| Error _ -> false
61
+
end
62
+
63
+
module Email_list = struct
64
+
let name = "email address list"
65
+
66
+
let validate s =
67
+
let s = String.trim s in
68
+
if String.length s = 0 then Error "Email list must not be empty"
69
+
else
70
+
(* Split on commas and validate each email *)
71
+
let emails = String.split_on_char ',' s in
72
+
let rec check_all = function
73
+
| [] -> Ok ()
74
+
| email :: rest -> (
75
+
match validate_email email with
76
+
| Ok () -> check_all rest
77
+
| Error msg ->
78
+
Error (Printf.sprintf "Invalid email in list: %s" msg))
79
+
in
80
+
check_all emails
81
+
82
+
let is_valid s =
83
+
match validate s with
84
+
| Ok () -> true
85
+
| Error _ -> false
86
+
end
87
+
88
+
let datatypes = [ (module Email : Datatype.S); (module Email_list : Datatype.S) ]
+23
lib/html5_checker/datatype/dt_email.mli
+23
lib/html5_checker/datatype/dt_email.mli
···
1
+
(** Email address datatype validators.
2
+
3
+
This module provides validators for email addresses and email address lists
4
+
as defined by the HTML5 specification. *)
5
+
6
+
(** Valid email address validator.
7
+
8
+
Validates a single email address. Uses simplified validation rules:
9
+
- Must contain exactly one '@' character
10
+
- Local part (before @) must be non-empty
11
+
- Domain part (after @) must be non-empty and contain at least one '.'
12
+
- Only ASCII characters allowed *)
13
+
module Email : Datatype.S
14
+
15
+
(** Comma-separated email address list validator.
16
+
17
+
Validates a comma-separated list of email addresses.
18
+
Each address in the list must be valid according to {!Email} rules.
19
+
Whitespace around commas is ignored. *)
20
+
module Email_list : Datatype.S
21
+
22
+
(** List of all datatypes defined in this module *)
23
+
val datatypes : Datatype.t list
+22
lib/html5_checker/datatype/dt_enterkeyhint.ml
+22
lib/html5_checker/datatype/dt_enterkeyhint.ml
···
1
+
(** Enter key hint attribute validation for HTML5 *)
2
+
3
+
module Enterkeyhint = struct
4
+
let name = "enterkeyhint"
5
+
6
+
let validate s =
7
+
let s_lower = Datatype.string_to_ascii_lowercase s in
8
+
match s_lower with
9
+
| "" | "enter" | "done" | "go" | "next" | "previous" | "search" | "send" ->
10
+
Ok ()
11
+
| _ ->
12
+
Error
13
+
(Printf.sprintf
14
+
"The value '%s' is not a valid enterkeyhint value. Expected \
15
+
one of: empty string, 'enter', 'done', 'go', 'next', \
16
+
'previous', 'search', or 'send'."
17
+
s)
18
+
19
+
let is_valid s = Result.is_ok (validate s)
20
+
end
21
+
22
+
let datatypes = [ (module Enterkeyhint : Datatype.S) ]
+28
lib/html5_checker/datatype/dt_enterkeyhint.mli
+28
lib/html5_checker/datatype/dt_enterkeyhint.mli
···
1
+
(** Enter key hint attribute datatype validator.
2
+
3
+
This module provides a validator for the enterkeyhint attribute used to
4
+
customize the enter key label on virtual keyboards, as defined by the HTML5 spec. *)
5
+
6
+
(** Enterkeyhint attribute validator.
7
+
8
+
Validates enterkeyhint attribute values which can be:
9
+
- "" (empty string, default enter key)
10
+
- "enter" (insert new line)
11
+
- "done" (close input method editor)
12
+
- "go" (navigate to target)
13
+
- "next" (advance to next field)
14
+
- "previous" (go back to previous field)
15
+
- "search" (perform search)
16
+
- "send" (submit or deliver)
17
+
18
+
Values are case-insensitive after ASCII lowercasing.
19
+
20
+
Examples:
21
+
- ""
22
+
- "done"
23
+
- "next"
24
+
- "search" *)
25
+
module Enterkeyhint : Datatype.S
26
+
27
+
(** List of all datatypes defined in this module *)
28
+
val datatypes : Datatype.t list
+20
lib/html5_checker/datatype/dt_fetchpriority.ml
+20
lib/html5_checker/datatype/dt_fetchpriority.ml
···
1
+
(** Fetch priority attribute validation for HTML5 *)
2
+
3
+
module Fetchpriority = struct
4
+
let name = "fetchpriority"
5
+
6
+
let validate s =
7
+
let s_lower = Datatype.string_to_ascii_lowercase s in
8
+
match s_lower with
9
+
| "" | "high" | "low" | "auto" -> Ok ()
10
+
| _ ->
11
+
Error
12
+
(Printf.sprintf
13
+
"The value '%s' is not a valid fetchpriority value. Expected \
14
+
empty string, 'high', 'low', or 'auto'."
15
+
s)
16
+
17
+
let is_valid s = Result.is_ok (validate s)
18
+
end
19
+
20
+
let datatypes = [ (module Fetchpriority : Datatype.S) ]
+24
lib/html5_checker/datatype/dt_fetchpriority.mli
+24
lib/html5_checker/datatype/dt_fetchpriority.mli
···
1
+
(** Fetch priority attribute datatype validator.
2
+
3
+
This module provides a validator for the fetchpriority attribute used to
4
+
provide a hint for resource fetch priority, as defined by the HTML5 spec. *)
5
+
6
+
(** Fetchpriority attribute validator.
7
+
8
+
Validates fetchpriority attribute values which can be:
9
+
- "" (empty string, default fetch priority)
10
+
- "high" (fetch at high priority relative to other resources)
11
+
- "low" (fetch at low priority relative to other resources)
12
+
- "auto" (no preference for fetch priority)
13
+
14
+
Values are case-insensitive after ASCII lowercasing.
15
+
16
+
Examples:
17
+
- ""
18
+
- "high"
19
+
- "low"
20
+
- "auto" *)
21
+
module Fetchpriority : Datatype.S
22
+
23
+
(** List of all datatypes defined in this module *)
24
+
val datatypes : Datatype.t list
+254
lib/html5_checker/datatype/dt_float.ml
+254
lib/html5_checker/datatype/dt_float.ml
···
1
+
(** Valid HTML5 floating point number *)
2
+
module Float_ = struct
3
+
let name = "floating point number"
4
+
5
+
type state =
6
+
| At_start
7
+
| At_start_minus_seen
8
+
| In_integer_part_digits_seen
9
+
| Dot_seen
10
+
| E_seen
11
+
| In_decimal_part_digits_seen
12
+
| In_exponent_sign_seen
13
+
| In_exponent_digits_seen
14
+
15
+
let validate s =
16
+
let len = String.length s in
17
+
let rec parse i state =
18
+
if i >= len then
19
+
match state with
20
+
| In_integer_part_digits_seen | In_decimal_part_digits_seen
21
+
| In_exponent_digits_seen ->
22
+
Ok ()
23
+
| At_start -> Error "The empty string is not a valid floating point number."
24
+
| At_start_minus_seen ->
25
+
Error "The minus sign alone is not a valid floating point number."
26
+
| Dot_seen ->
27
+
Error "A floating point number must not end with the decimal point."
28
+
| E_seen ->
29
+
Error "A floating point number must not end with the exponent 'e'."
30
+
| In_exponent_sign_seen ->
31
+
Error
32
+
"A floating point number must not end with only a sign in the \
33
+
exponent."
34
+
else
35
+
let c = s.[i] in
36
+
match state with
37
+
| At_start ->
38
+
if c = '-' then parse (i + 1) At_start_minus_seen
39
+
else if c = '.' then parse (i + 1) Dot_seen
40
+
else if Datatype.is_ascii_digit c then
41
+
parse (i + 1) In_integer_part_digits_seen
42
+
else
43
+
Error
44
+
(Printf.sprintf
45
+
"Expected a minus sign or a digit but saw '%c' instead." c)
46
+
| At_start_minus_seen ->
47
+
if Datatype.is_ascii_digit c then
48
+
parse (i + 1) In_integer_part_digits_seen
49
+
else Error (Printf.sprintf "Expected a digit but saw '%c' instead." c)
50
+
| In_integer_part_digits_seen ->
51
+
if c = '.' then parse (i + 1) Dot_seen
52
+
else if c = 'e' || c = 'E' then parse (i + 1) E_seen
53
+
else if Datatype.is_ascii_digit c then
54
+
parse (i + 1) In_integer_part_digits_seen
55
+
else
56
+
Error
57
+
(Printf.sprintf
58
+
"Expected a decimal point, 'e', 'E' or a digit but saw '%c' \
59
+
instead."
60
+
c)
61
+
| Dot_seen ->
62
+
if Datatype.is_ascii_digit c then
63
+
parse (i + 1) In_decimal_part_digits_seen
64
+
else
65
+
Error
66
+
(Printf.sprintf
67
+
"Expected a digit after the decimal point but saw '%c' instead."
68
+
c)
69
+
| In_decimal_part_digits_seen ->
70
+
if Datatype.is_ascii_digit c then
71
+
parse (i + 1) In_decimal_part_digits_seen
72
+
else if c = 'e' || c = 'E' then parse (i + 1) E_seen
73
+
else
74
+
Error
75
+
(Printf.sprintf "Expected 'e', 'E' or a digit but saw '%c' instead."
76
+
c)
77
+
| E_seen ->
78
+
if c = '-' || c = '+' then parse (i + 1) In_exponent_sign_seen
79
+
else if Datatype.is_ascii_digit c then
80
+
parse (i + 1) In_exponent_digits_seen
81
+
else
82
+
Error
83
+
(Printf.sprintf
84
+
"Expected a minus sign, a plus sign or a digit but saw '%c' \
85
+
instead."
86
+
c)
87
+
| In_exponent_sign_seen ->
88
+
if Datatype.is_ascii_digit c then
89
+
parse (i + 1) In_exponent_digits_seen
90
+
else Error (Printf.sprintf "Expected a digit but saw '%c' instead." c)
91
+
| In_exponent_digits_seen ->
92
+
if Datatype.is_ascii_digit c then
93
+
parse (i + 1) In_exponent_digits_seen
94
+
else Error (Printf.sprintf "Expected a digit but saw '%c' instead." c)
95
+
in
96
+
parse 0 At_start
97
+
98
+
let is_valid s = Result.is_ok (validate s)
99
+
end
100
+
101
+
(** Non-negative floating point number (>= 0) *)
102
+
module Float_non_negative = struct
103
+
let name = "non-negative floating point number"
104
+
105
+
type state =
106
+
| At_start
107
+
| At_start_minus_seen
108
+
| In_integer_part_digits_seen
109
+
| In_integer_part_digits_seen_zero
110
+
| Dot_seen
111
+
| Dot_seen_zero
112
+
| E_seen
113
+
| In_decimal_part_digits_seen
114
+
| In_decimal_part_digits_seen_zero
115
+
| In_exponent_sign_seen
116
+
| In_exponent_digits_seen
117
+
118
+
let validate s =
119
+
let len = String.length s in
120
+
let rec parse i state =
121
+
if i >= len then
122
+
match state with
123
+
| In_integer_part_digits_seen | In_decimal_part_digits_seen
124
+
| In_integer_part_digits_seen_zero | In_decimal_part_digits_seen_zero
125
+
| In_exponent_digits_seen ->
126
+
Ok ()
127
+
| At_start ->
128
+
Error "The empty string is not a valid non-negative floating point number."
129
+
| At_start_minus_seen ->
130
+
Error
131
+
"The minus sign alone is not a valid non-negative floating point \
132
+
number."
133
+
| Dot_seen | Dot_seen_zero ->
134
+
Error
135
+
"A non-negative floating point number must not end with the \
136
+
decimal point."
137
+
| E_seen ->
138
+
Error
139
+
"A non-negative floating point number must not end with the \
140
+
exponent 'e'."
141
+
| In_exponent_sign_seen ->
142
+
Error
143
+
"A non-negative floating point number must not end with only a \
144
+
sign in the exponent."
145
+
else
146
+
let c = s.[i] in
147
+
match state with
148
+
| At_start ->
149
+
if c = '-' then parse (i + 1) At_start_minus_seen
150
+
else if c = '.' then parse (i + 1) Dot_seen
151
+
else if Datatype.is_ascii_digit c then
152
+
parse (i + 1) In_integer_part_digits_seen
153
+
else
154
+
Error
155
+
(Printf.sprintf
156
+
"Expected a minus sign or a digit but saw '%c' instead." c)
157
+
| At_start_minus_seen ->
158
+
if c = '0' then parse (i + 1) In_integer_part_digits_seen_zero
159
+
else Error (Printf.sprintf "Expected a zero but saw '%c' instead." c)
160
+
| In_integer_part_digits_seen ->
161
+
if c = '.' then parse (i + 1) Dot_seen
162
+
else if c = 'e' || c = 'E' then parse (i + 1) E_seen
163
+
else if Datatype.is_ascii_digit c then
164
+
parse (i + 1) In_integer_part_digits_seen
165
+
else
166
+
Error
167
+
(Printf.sprintf
168
+
"Expected a decimal point, 'e', 'E' or a digit but saw '%c' \
169
+
instead."
170
+
c)
171
+
| In_integer_part_digits_seen_zero ->
172
+
if c = '.' then parse (i + 1) Dot_seen_zero
173
+
else if c = 'e' || c = 'E' then parse (i + 1) E_seen
174
+
else if c = '0' then parse (i + 1) In_integer_part_digits_seen_zero
175
+
else
176
+
Error
177
+
(Printf.sprintf
178
+
"Expected a decimal point, 'e', 'E' or a zero but saw '%c' \
179
+
instead."
180
+
c)
181
+
| Dot_seen ->
182
+
if Datatype.is_ascii_digit c then
183
+
parse (i + 1) In_decimal_part_digits_seen
184
+
else
185
+
Error
186
+
(Printf.sprintf
187
+
"Expected a digit after the decimal point but saw '%c' instead."
188
+
c)
189
+
| Dot_seen_zero ->
190
+
if c = '0' then parse (i + 1) In_decimal_part_digits_seen_zero
191
+
else
192
+
Error
193
+
(Printf.sprintf
194
+
"Expected a zero after the decimal point but saw '%c' instead."
195
+
c)
196
+
| In_decimal_part_digits_seen ->
197
+
if Datatype.is_ascii_digit c then
198
+
parse (i + 1) In_decimal_part_digits_seen
199
+
else if c = 'e' || c = 'E' then parse (i + 1) E_seen
200
+
else
201
+
Error
202
+
(Printf.sprintf "Expected 'e', 'E' or a digit but saw '%c' instead."
203
+
c)
204
+
| In_decimal_part_digits_seen_zero ->
205
+
if c = '0' then parse (i + 1) In_decimal_part_digits_seen_zero
206
+
else if c = 'e' || c = 'E' then parse (i + 1) E_seen
207
+
else
208
+
Error
209
+
(Printf.sprintf "Expected 'e', 'E' or a zero but saw '%c' instead."
210
+
c)
211
+
| E_seen ->
212
+
if c = '-' || c = '+' then parse (i + 1) In_exponent_sign_seen
213
+
else if Datatype.is_ascii_digit c then
214
+
parse (i + 1) In_exponent_digits_seen
215
+
else
216
+
Error
217
+
(Printf.sprintf
218
+
"Expected a minus sign, a plus sign or a digit but saw '%c' \
219
+
instead."
220
+
c)
221
+
| In_exponent_sign_seen ->
222
+
if Datatype.is_ascii_digit c then
223
+
parse (i + 1) In_exponent_digits_seen
224
+
else Error (Printf.sprintf "Expected a digit but saw '%c' instead." c)
225
+
| In_exponent_digits_seen ->
226
+
if Datatype.is_ascii_digit c then
227
+
parse (i + 1) In_exponent_digits_seen
228
+
else Error (Printf.sprintf "Expected a digit but saw '%c' instead." c)
229
+
in
230
+
parse 0 At_start
231
+
232
+
let is_valid s = Result.is_ok (validate s)
233
+
end
234
+
235
+
(** Positive floating point number (> 0) *)
236
+
module Float_positive = struct
237
+
let name = "positive floating point number"
238
+
239
+
(* For positive floats, we validate it's a valid non-negative float,
240
+
then check it's not zero *)
241
+
let validate s =
242
+
match Float_non_negative.validate s with
243
+
| Error _ as e -> e
244
+
| Ok () -> (
245
+
(* Parse as float and check if it's positive *)
246
+
try
247
+
let f = float_of_string s in
248
+
if f > 0.0 then Ok ()
249
+
else Error "The value must be a positive floating point number."
250
+
with Failure _ ->
251
+
Error "Invalid floating point number format.")
252
+
253
+
let is_valid s = Result.is_ok (validate s)
254
+
end
+16
lib/html5_checker/datatype/dt_float.mli
+16
lib/html5_checker/datatype/dt_float.mli
···
1
+
(** Floating point datatype validators for HTML5 *)
2
+
3
+
(** Valid HTML5 floating point number *)
4
+
module Float_ : sig
5
+
include Datatype.S
6
+
end
7
+
8
+
(** Non-negative floating point number (>= 0) *)
9
+
module Float_non_negative : sig
10
+
include Datatype.S
11
+
end
12
+
13
+
(** Positive floating point number (> 0) *)
14
+
module Float_positive : sig
15
+
include Datatype.S
16
+
end
+28
lib/html5_checker/datatype/dt_form_enctype.ml
+28
lib/html5_checker/datatype/dt_form_enctype.ml
···
1
+
(** Form encoding type attribute validation based on HTML5 spec *)
2
+
3
+
(** Valid form enctype values *)
4
+
let valid_enctypes =
5
+
[
6
+
"application/x-www-form-urlencoded";
7
+
"multipart/form-data";
8
+
"text/plain";
9
+
]
10
+
11
+
module Form_enctype = struct
12
+
let name = "form-enctype"
13
+
14
+
let validate s =
15
+
let s_lower = Datatype.string_to_ascii_lowercase s in
16
+
if List.mem s_lower valid_enctypes then Ok ()
17
+
else
18
+
Error
19
+
(Printf.sprintf
20
+
"The value '%s' is not a valid form encoding type. Expected one of: \
21
+
%s."
22
+
s
23
+
(String.concat ", " valid_enctypes))
24
+
25
+
let is_valid s = Result.is_ok (validate s)
26
+
end
27
+
28
+
let datatypes = [ (module Form_enctype : Datatype.S) ]
+22
lib/html5_checker/datatype/dt_form_enctype.mli
+22
lib/html5_checker/datatype/dt_form_enctype.mli
···
1
+
(** Form encoding type attribute datatype validator.
2
+
3
+
This module provides a validator for the enctype and formenctype attributes
4
+
used on form and input/button elements, as defined by the HTML5 specification. *)
5
+
6
+
(** Form encoding type attribute validator.
7
+
8
+
Validates form enctype/formenctype attribute values which can be:
9
+
- application/x-www-form-urlencoded - Default encoding (form fields as name=value pairs)
10
+
- multipart/form-data - Multipart encoding (required for file uploads)
11
+
- text/plain - Plain text encoding (mostly for debugging)
12
+
13
+
Values are matched case-insensitively according to HTML5 spec.
14
+
15
+
Examples:
16
+
- "application/x-www-form-urlencoded"
17
+
- "multipart/form-data"
18
+
- "text/plain" *)
19
+
module Form_enctype : Datatype.S
20
+
21
+
(** List of all datatypes defined in this module *)
22
+
val datatypes : Datatype.t list
+22
lib/html5_checker/datatype/dt_form_method.ml
+22
lib/html5_checker/datatype/dt_form_method.ml
···
1
+
(** Form method attribute validation based on HTML5 spec *)
2
+
3
+
(** Valid form method values *)
4
+
let valid_methods = [ "get"; "post"; "dialog" ]
5
+
6
+
module Form_method = struct
7
+
let name = "form-method"
8
+
9
+
let validate s =
10
+
let s_lower = Datatype.string_to_ascii_lowercase s in
11
+
if List.mem s_lower valid_methods then Ok ()
12
+
else
13
+
Error
14
+
(Printf.sprintf
15
+
"The value '%s' is not a valid form method. Expected one of: %s."
16
+
s
17
+
(String.concat ", " valid_methods))
18
+
19
+
let is_valid s = Result.is_ok (validate s)
20
+
end
21
+
22
+
let datatypes = [ (module Form_method : Datatype.S) ]
+22
lib/html5_checker/datatype/dt_form_method.mli
+22
lib/html5_checker/datatype/dt_form_method.mli
···
1
+
(** Form method attribute datatype validator.
2
+
3
+
This module provides a validator for the method attribute used on
4
+
form elements, as defined by the HTML5 specification. *)
5
+
6
+
(** Form method attribute validator.
7
+
8
+
Validates form method attribute values which can be:
9
+
- get - GET method (form data submitted in URL query string)
10
+
- post - POST method (form data submitted in request body)
11
+
- dialog - Dialog method (closes the dialog containing the form)
12
+
13
+
Values are matched case-insensitively according to HTML5 spec.
14
+
15
+
Examples:
16
+
- "get"
17
+
- "post"
18
+
- "dialog" *)
19
+
module Form_method : Datatype.S
20
+
21
+
(** List of all datatypes defined in this module *)
22
+
val datatypes : Datatype.t list
+29
lib/html5_checker/datatype/dt_hash.ml
+29
lib/html5_checker/datatype/dt_hash.ml
···
1
+
(** Hash-name and fragment identifier datatype validators for HTML5. *)
2
+
3
+
module Hash_name : Datatype.S = struct
4
+
let name = "hash-name reference"
5
+
6
+
let validate s =
7
+
let len = String.length s in
8
+
if len = 0 then Error "The empty string is not a valid hash-name reference."
9
+
else if s.[0] <> '#' then
10
+
Error "A hash-name reference must start with \"#\"."
11
+
else if len = 1 then
12
+
Error "A hash-name reference must have at least one character after \"#\"."
13
+
else Ok ()
14
+
15
+
let is_valid s = Result.is_ok (validate s)
16
+
end
17
+
18
+
module Hash_or_empty : Datatype.S = struct
19
+
let name = "hash-name reference (potentially empty)"
20
+
21
+
let validate s =
22
+
if String.length s = 0 then Ok ()
23
+
else Hash_name.validate s
24
+
25
+
let is_valid s = Result.is_ok (validate s)
26
+
end
27
+
28
+
let datatypes =
29
+
[ (module Hash_name : Datatype.S); (module Hash_or_empty : Datatype.S) ]
+24
lib/html5_checker/datatype/dt_hash.mli
+24
lib/html5_checker/datatype/dt_hash.mli
···
1
+
(** Hash-name and fragment identifier datatype validators for HTML5.
2
+
3
+
This module provides validators for fragment identifiers (hash-name
4
+
references) used in URLs to reference specific parts of a document. *)
5
+
6
+
(** Hash-name reference validator.
7
+
8
+
A hash-name reference is a fragment identifier that starts with '#'
9
+
followed by one or more characters.
10
+
11
+
Requirements:
12
+
- Must not be empty
13
+
- Must start with '#'
14
+
- Must have at least one character after '#' *)
15
+
module Hash_name : Datatype.S
16
+
17
+
(** Hash-name or empty validator.
18
+
19
+
Same as Hash_name but allows empty strings. This is used for attributes
20
+
where the hash-name reference is optional. *)
21
+
module Hash_or_empty : Datatype.S
22
+
23
+
(** List of all hash-related datatypes for registration. *)
24
+
val datatypes : Datatype.t list
+54
lib/html5_checker/datatype/dt_id.ml
+54
lib/html5_checker/datatype/dt_id.ml
···
1
+
(** ID-related datatype validators for HTML5. *)
2
+
3
+
module Id : Datatype.S = struct
4
+
let name = "id"
5
+
6
+
let validate s =
7
+
let len = String.length s in
8
+
if len = 0 then Error "An ID must not be the empty string."
9
+
else
10
+
match String.index_opt s ' ' with
11
+
| Some _ -> Error "An ID must not contain whitespace."
12
+
| None -> (
13
+
(* Check for other whitespace characters *)
14
+
let rec check_whitespace i =
15
+
if i >= len then Ok ()
16
+
else if Datatype.is_whitespace s.[i] then
17
+
Error "An ID must not contain whitespace."
18
+
else check_whitespace (i + 1)
19
+
in
20
+
check_whitespace 0)
21
+
22
+
let is_valid s = Result.is_ok (validate s)
23
+
end
24
+
25
+
module Idref : Datatype.S = struct
26
+
let name = "id reference"
27
+
28
+
(* An IDREF has the same validation rules as an ID *)
29
+
let validate = Id.validate
30
+
let is_valid s = Result.is_ok (validate s)
31
+
end
32
+
33
+
module Idrefs : Datatype.S = struct
34
+
let name = "id references"
35
+
36
+
let validate s =
37
+
(* IDREFS must contain at least one non-whitespace character *)
38
+
let len = String.length s in
39
+
let rec check_non_whitespace i =
40
+
if i >= len then
41
+
Error "An IDREFS value must contain at least one non-whitespace character."
42
+
else if not (Datatype.is_whitespace s.[i]) then Ok ()
43
+
else check_non_whitespace (i + 1)
44
+
in
45
+
check_non_whitespace 0
46
+
47
+
let is_valid s = Result.is_ok (validate s)
48
+
end
49
+
50
+
let datatypes =
51
+
[ (module Id : Datatype.S)
52
+
; (module Idref : Datatype.S)
53
+
; (module Idrefs : Datatype.S)
54
+
]
+35
lib/html5_checker/datatype/dt_id.mli
+35
lib/html5_checker/datatype/dt_id.mli
···
1
+
(** ID-related datatype validators for HTML5.
2
+
3
+
This module provides validators for HTML5 ID attributes and ID references
4
+
based on the Nu HTML Checker's implementation. *)
5
+
6
+
(** ID validator.
7
+
8
+
Accepts any string that consists of one or more characters and does not
9
+
contain any whitespace characters.
10
+
11
+
An ID must be:
12
+
- Non-empty
13
+
- Contain no whitespace characters (space, tab, LF, FF, CR) *)
14
+
module Id : Datatype.S
15
+
16
+
(** ID reference validator.
17
+
18
+
An IDREF has the same validation rules as an ID - it must be non-empty
19
+
and contain no whitespace. The semantic difference is that an IDREF
20
+
references an existing ID rather than defining one. *)
21
+
module Idref : Datatype.S
22
+
23
+
(** ID references validator.
24
+
25
+
Accepts a space-separated list of ID references. The value must contain
26
+
at least one non-whitespace character.
27
+
28
+
IDREFS values:
29
+
- Must not be empty or contain only whitespace
30
+
- Can contain multiple space-separated ID references
31
+
- Each individual reference follows IDREF rules *)
32
+
module Idrefs : Datatype.S
33
+
34
+
(** List of all ID-related datatypes for registration. *)
35
+
val datatypes : Datatype.t list
+46
lib/html5_checker/datatype/dt_input_type.ml
+46
lib/html5_checker/datatype/dt_input_type.ml
···
1
+
(** Input type attribute validation based on HTML5 spec *)
2
+
3
+
(** Valid input type values *)
4
+
let valid_types =
5
+
[
6
+
"hidden";
7
+
"text";
8
+
"search";
9
+
"tel";
10
+
"url";
11
+
"email";
12
+
"password";
13
+
"date";
14
+
"month";
15
+
"week";
16
+
"time";
17
+
"datetime-local";
18
+
"number";
19
+
"range";
20
+
"color";
21
+
"checkbox";
22
+
"radio";
23
+
"file";
24
+
"submit";
25
+
"image";
26
+
"reset";
27
+
"button";
28
+
]
29
+
30
+
module Input_type = struct
31
+
let name = "input-type"
32
+
33
+
let validate s =
34
+
let s_lower = Datatype.string_to_ascii_lowercase s in
35
+
if List.mem s_lower valid_types then Ok ()
36
+
else
37
+
Error
38
+
(Printf.sprintf
39
+
"The value '%s' is not a valid input type. Expected one of: %s."
40
+
s
41
+
(String.concat ", " valid_types))
42
+
43
+
let is_valid s = Result.is_ok (validate s)
44
+
end
45
+
46
+
let datatypes = [ (module Input_type : Datatype.S) ]
+42
lib/html5_checker/datatype/dt_input_type.mli
+42
lib/html5_checker/datatype/dt_input_type.mli
···
1
+
(** Input type attribute datatype validator.
2
+
3
+
This module provides a validator for the type attribute used on
4
+
input elements, as defined by the HTML5 specification. *)
5
+
6
+
(** Input type attribute validator.
7
+
8
+
Validates input type attribute values which can be:
9
+
- hidden - Hidden input field
10
+
- text - Single-line text field
11
+
- search - Search input field
12
+
- tel - Telephone number input field
13
+
- url - URL input field
14
+
- email - Email address input field
15
+
- password - Password input field
16
+
- date - Date input field (year, month, day)
17
+
- month - Month input field (year, month)
18
+
- week - Week input field (year, week)
19
+
- time - Time input field (hour, minute, seconds, fractional seconds)
20
+
- datetime-local - Local date and time input field
21
+
- number - Numeric input field
22
+
- range - Range control (slider)
23
+
- color - Color picker
24
+
- checkbox - Checkbox
25
+
- radio - Radio button
26
+
- file - File upload control
27
+
- submit - Submit button
28
+
- image - Image submit button
29
+
- reset - Reset button
30
+
- button - Push button
31
+
32
+
Values are matched case-insensitively according to HTML5 spec.
33
+
34
+
Examples:
35
+
- "text"
36
+
- "email"
37
+
- "datetime-local"
38
+
- "submit" *)
39
+
module Input_type : Datatype.S
40
+
41
+
(** List of all datatypes defined in this module *)
42
+
val datatypes : Datatype.t list
+23
lib/html5_checker/datatype/dt_inputmode.ml
+23
lib/html5_checker/datatype/dt_inputmode.ml
···
1
+
(** Input mode attribute validation for HTML5 *)
2
+
3
+
module Inputmode = struct
4
+
let name = "inputmode"
5
+
6
+
let validate s =
7
+
let s_lower = Datatype.string_to_ascii_lowercase s in
8
+
match s_lower with
9
+
| "" | "none" | "text" | "decimal" | "numeric" | "tel" | "search"
10
+
| "email" | "url" ->
11
+
Ok ()
12
+
| _ ->
13
+
Error
14
+
(Printf.sprintf
15
+
"The value '%s' is not a valid inputmode value. Expected one \
16
+
of: empty string, 'none', 'text', 'decimal', 'numeric', 'tel', \
17
+
'search', 'email', or 'url'."
18
+
s)
19
+
20
+
let is_valid s = Result.is_ok (validate s)
21
+
end
22
+
23
+
let datatypes = [ (module Inputmode : Datatype.S) ]
+29
lib/html5_checker/datatype/dt_inputmode.mli
+29
lib/html5_checker/datatype/dt_inputmode.mli
···
1
+
(** Input mode attribute datatype validator.
2
+
3
+
This module provides a validator for the inputmode attribute used to hint
4
+
at the type of data the user might enter, as defined by the HTML5 spec. *)
5
+
6
+
(** Inputmode attribute validator.
7
+
8
+
Validates inputmode attribute values which can be:
9
+
- "" (empty string, no specific input mode)
10
+
- "none" (no virtual keyboard)
11
+
- "text" (standard text input)
12
+
- "decimal" (decimal numeric input with locale-appropriate format)
13
+
- "numeric" (numeric input)
14
+
- "tel" (telephone number input)
15
+
- "search" (search input)
16
+
- "email" (email address input)
17
+
- "url" (URL input)
18
+
19
+
Values are case-insensitive after ASCII lowercasing.
20
+
21
+
Examples:
22
+
- ""
23
+
- "numeric"
24
+
- "email"
25
+
- "tel" *)
26
+
module Inputmode : Datatype.S
27
+
28
+
(** List of all datatypes defined in this module *)
29
+
val datatypes : Datatype.t list
+71
lib/html5_checker/datatype/dt_integer.ml
+71
lib/html5_checker/datatype/dt_integer.ml
···
1
+
(** Valid HTML5 integer (optional sign followed by digits) *)
2
+
module Integer = struct
3
+
let name = "integer"
4
+
5
+
let validate s =
6
+
let len = String.length s in
7
+
if len = 0 then Error "The empty string is not a valid integer."
8
+
else
9
+
let start_pos =
10
+
if s.[0] = '-' then
11
+
if len = 1 then failwith "unreachable"
12
+
else 1
13
+
else 0
14
+
in
15
+
(* First character must be minus or digit *)
16
+
if start_pos = 0 && not (Datatype.is_ascii_digit s.[0]) then
17
+
Error
18
+
(Printf.sprintf "Expected a minus sign or a digit but saw '%c' instead."
19
+
s.[0])
20
+
else
21
+
(* Rest must be digits *)
22
+
let rec check_digits i =
23
+
if i >= len then Ok ()
24
+
else if Datatype.is_ascii_digit s.[i] then check_digits (i + 1)
25
+
else
26
+
Error (Printf.sprintf "Expected a digit but saw '%c' instead." s.[i])
27
+
in
28
+
check_digits start_pos
29
+
30
+
let is_valid s = Result.is_ok (validate s)
31
+
end
32
+
33
+
(** Non-negative integer (>= 0) *)
34
+
module Integer_non_negative = struct
35
+
let name = "non-negative integer"
36
+
37
+
let validate s =
38
+
let len = String.length s in
39
+
if len = 0 then Error "The empty string is not a valid non-negative integer."
40
+
else
41
+
(* All characters must be digits *)
42
+
let rec check_digits i =
43
+
if i >= len then Ok ()
44
+
else if Datatype.is_ascii_digit s.[i] then check_digits (i + 1)
45
+
else Error (Printf.sprintf "Expected a digit but saw '%c' instead." s.[i])
46
+
in
47
+
check_digits 0
48
+
49
+
let is_valid s = Result.is_ok (validate s)
50
+
end
51
+
52
+
(** Positive integer (> 0) *)
53
+
module Integer_positive = struct
54
+
let name = "positive integer"
55
+
56
+
let validate s =
57
+
let len = String.length s in
58
+
if len = 0 then Error "The empty string is not a valid positive integer."
59
+
else
60
+
(* All characters must be digits *)
61
+
let rec check_digits i all_zeros =
62
+
if i >= len then
63
+
if all_zeros then Error "Zero is not a positive integer." else Ok ()
64
+
else if Datatype.is_ascii_digit s.[i] then
65
+
check_digits (i + 1) (all_zeros && s.[i] = '0')
66
+
else Error (Printf.sprintf "Expected a digit but saw '%c' instead." s.[i])
67
+
in
68
+
check_digits 0 true
69
+
70
+
let is_valid s = Result.is_ok (validate s)
71
+
end
+16
lib/html5_checker/datatype/dt_integer.mli
+16
lib/html5_checker/datatype/dt_integer.mli
···
1
+
(** Integer datatype validators for HTML5 *)
2
+
3
+
(** Valid HTML5 integer (optional sign followed by digits) *)
4
+
module Integer : sig
5
+
include Datatype.S
6
+
end
7
+
8
+
(** Non-negative integer (>= 0) *)
9
+
module Integer_non_negative : sig
10
+
include Datatype.S
11
+
end
12
+
13
+
(** Positive integer (> 0) *)
14
+
module Integer_positive : sig
15
+
include Datatype.S
16
+
end
+103
lib/html5_checker/datatype/dt_integrity.ml
+103
lib/html5_checker/datatype/dt_integrity.ml
···
1
+
(** Subresource integrity attribute validation *)
2
+
3
+
(** Valid hash algorithms *)
4
+
let valid_algorithms = [ "sha256"; "sha384"; "sha512" ]
5
+
6
+
(** Check if character is valid base64 character *)
7
+
let is_base64_char c =
8
+
(c >= 'A' && c <= 'Z')
9
+
|| (c >= 'a' && c <= 'z')
10
+
|| (c >= '0' && c <= '9')
11
+
|| c = '+' || c = '/' || c = '='
12
+
13
+
(** Validate base64 encoding *)
14
+
let validate_base64 s =
15
+
if String.length s = 0 then false
16
+
else
17
+
(* Check all characters are valid base64 *)
18
+
let all_valid = ref true in
19
+
for i = 0 to String.length s - 1 do
20
+
if not (is_base64_char s.[i]) then all_valid := false
21
+
done;
22
+
if not !all_valid then false
23
+
else
24
+
(* Check padding is at the end only *)
25
+
let has_padding = String.contains s '=' in
26
+
if not has_padding then true
27
+
else
28
+
(* Find first '=' *)
29
+
let first_eq = String.index s '=' in
30
+
(* All chars after first '=' must be '=' *)
31
+
let valid_padding = ref true in
32
+
for i = first_eq to String.length s - 1 do
33
+
if s.[i] <> '=' then valid_padding := false
34
+
done;
35
+
!valid_padding
36
+
&& (* At most 2 padding characters *)
37
+
String.length s - first_eq <= 2
38
+
39
+
(** Validate a single hash value *)
40
+
let validate_hash_value s =
41
+
let trimmed = Datatype.trim_html_spaces s in
42
+
if trimmed = "" then Error "Hash value must not be empty"
43
+
else
44
+
(* Split on '-' to get algorithm and hash *)
45
+
match String.index_opt trimmed '-' with
46
+
| None ->
47
+
Error
48
+
(Printf.sprintf
49
+
"Hash value '%s' must be in format 'algorithm-base64hash'" trimmed)
50
+
| Some dash_pos ->
51
+
let algorithm = String.sub trimmed 0 dash_pos in
52
+
let algorithm_lower = Datatype.string_to_ascii_lowercase algorithm in
53
+
if not (List.mem algorithm_lower valid_algorithms) then
54
+
Error
55
+
(Printf.sprintf
56
+
"Hash algorithm '%s' is not supported. Must be one of: %s"
57
+
algorithm (String.concat ", " valid_algorithms))
58
+
else
59
+
let rest = String.sub trimmed (dash_pos + 1) (String.length trimmed - dash_pos - 1) in
60
+
(* Split on '?' to separate hash from options *)
61
+
let hash_part =
62
+
match String.index_opt rest '?' with
63
+
| None -> rest
64
+
| Some q_pos -> String.sub rest 0 q_pos
65
+
in
66
+
if String.length hash_part = 0 then
67
+
Error "Hash value after algorithm must not be empty"
68
+
else if not (validate_base64 hash_part) then
69
+
Error
70
+
(Printf.sprintf
71
+
"Hash value '%s' is not valid base64 encoding" hash_part)
72
+
else Ok ()
73
+
74
+
(** Validate integrity attribute value *)
75
+
let validate_integrity s =
76
+
let trimmed = Datatype.trim_html_spaces s in
77
+
if trimmed = "" then Error "Integrity attribute must not be empty"
78
+
else
79
+
(* Split on whitespace *)
80
+
let hash_values = String.split_on_char ' ' trimmed in
81
+
let hash_values =
82
+
List.filter (fun h -> Datatype.trim_html_spaces h <> "") hash_values
83
+
in
84
+
if hash_values = [] then
85
+
Error "Integrity attribute must contain at least one hash value"
86
+
else
87
+
(* Validate each hash value *)
88
+
let rec check_hashes = function
89
+
| [] -> Ok ()
90
+
| h :: rest -> (
91
+
match validate_hash_value h with
92
+
| Error e -> Error e
93
+
| Ok () -> check_hashes rest)
94
+
in
95
+
check_hashes hash_values
96
+
97
+
module Integrity = struct
98
+
let name = "integrity"
99
+
let validate = validate_integrity
100
+
let is_valid s = Result.is_ok (validate s)
101
+
end
102
+
103
+
let datatypes = [ (module Integrity : Datatype.S) ]
+27
lib/html5_checker/datatype/dt_integrity.mli
+27
lib/html5_checker/datatype/dt_integrity.mli
···
1
+
(** Subresource integrity attribute validator.
2
+
3
+
This module provides a validator for the integrity attribute used on
4
+
script and link elements for subresource integrity checks, as defined
5
+
by the W3C Subresource Integrity specification. *)
6
+
7
+
(** Integrity attribute validator.
8
+
9
+
Validates integrity attribute values which contain space-separated hash
10
+
values. Each hash value consists of:
11
+
- An algorithm identifier (sha256, sha384, or sha512)
12
+
- A hyphen (-)
13
+
- The base64-encoded hash value
14
+
- Optional options preceded by '?'
15
+
16
+
Examples:
17
+
- "sha256-abc123..."
18
+
- "sha384-xyz789..."
19
+
- "sha256-abc123... sha512-def456..."
20
+
- "sha256-abc123...?ct=application/javascript"
21
+
22
+
The base64 encoding must be valid and the algorithm must be one of the
23
+
supported hash functions. *)
24
+
module Integrity : Datatype.S
25
+
26
+
(** List of all datatypes defined in this module *)
27
+
val datatypes : Datatype.t list
+20
lib/html5_checker/datatype/dt_kind.ml
+20
lib/html5_checker/datatype/dt_kind.ml
···
1
+
(** Kind attribute validation for HTML5 *)
2
+
3
+
module Kind = struct
4
+
let name = "kind"
5
+
6
+
let validate s =
7
+
let s_lower = Datatype.string_to_ascii_lowercase s in
8
+
match s_lower with
9
+
| "subtitles" | "captions" | "descriptions" | "chapters" | "metadata" -> Ok ()
10
+
| _ ->
11
+
Error
12
+
(Printf.sprintf
13
+
"The value '%s' is not a valid kind. Expected 'subtitles', \
14
+
'captions', 'descriptions', 'chapters', or 'metadata'."
15
+
s)
16
+
17
+
let is_valid s = Result.is_ok (validate s)
18
+
end
19
+
20
+
let datatypes = [ (module Kind : Datatype.S) ]
+29
lib/html5_checker/datatype/dt_kind.mli
+29
lib/html5_checker/datatype/dt_kind.mli
···
1
+
(** Kind attribute datatype validator for HTML5.
2
+
3
+
This module provides a validator for the kind attribute used on track
4
+
elements, as defined by the HTML5 specification. *)
5
+
6
+
(** Kind attribute validator.
7
+
8
+
Validates kind attribute values which can be:
9
+
- "subtitles" - Transcription or translation of the dialogue, suitable for
10
+
when the sound is available but not understood
11
+
- "captions" - Transcription or translation of the dialogue, sound effects,
12
+
relevant musical cues, and other relevant audio information, suitable for
13
+
when sound is unavailable or not clearly audible
14
+
- "descriptions" - Textual descriptions of the video component, suitable for
15
+
audio synthesis when the visual component is unavailable
16
+
- "chapters" - Chapter titles, for use in navigating the media resource
17
+
- "metadata" - Tracks intended for use from script, not displayed by the user
18
+
agent
19
+
20
+
Values are case-insensitive.
21
+
22
+
Examples:
23
+
- "subtitles"
24
+
- "captions"
25
+
- "descriptions" *)
26
+
module Kind : Datatype.S
27
+
28
+
(** List of all datatypes defined in this module *)
29
+
val datatypes : Datatype.t list
+110
lib/html5_checker/datatype/dt_language.ml
+110
lib/html5_checker/datatype/dt_language.ml
···
1
+
(** Helper functions for language tag validation *)
2
+
3
+
let is_lower_alpha c = c >= 'a' && c <= 'z'
4
+
let is_upper_alpha c = c >= 'A' && c <= 'Z'
5
+
let is_alpha c = is_lower_alpha c || is_upper_alpha c
6
+
let is_digit c = c >= '0' && c <= '9'
7
+
let is_alphanumeric c = is_alpha c || is_digit c
8
+
9
+
let is_all_alpha s =
10
+
String.for_all is_alpha s
11
+
12
+
let _is_all_digits s =
13
+
String.for_all is_digit s
14
+
15
+
let is_all_alphanumeric s =
16
+
String.for_all is_alphanumeric s
17
+
18
+
let to_lower s =
19
+
String.lowercase_ascii s
20
+
21
+
(** Validate language tag structure according to BCP 47.
22
+
This is a simplified validator that checks structural validity
23
+
but does not validate against the IANA registry. *)
24
+
let validate_language_structure s =
25
+
if String.length s = 0 then
26
+
Error "The empty string is not a valid language tag"
27
+
else if String.starts_with ~prefix:"-" s then
28
+
Error "Language tag must not start with HYPHEN-MINUS"
29
+
else if String.ends_with ~suffix:"-" s then
30
+
Error "Language tag must not end with HYPHEN-MINUS"
31
+
else
32
+
let subtags = String.split_on_char '-' s in
33
+
34
+
(* Check for empty subtags and length constraints *)
35
+
let rec check_subtag_constraints = function
36
+
| [] -> Ok ()
37
+
| subtag :: rest ->
38
+
let len = String.length subtag in
39
+
if len = 0 then
40
+
Error "Zero-length subtag"
41
+
else if len > 8 then
42
+
Error "Subtags must not exceed 8 characters in length"
43
+
else
44
+
check_subtag_constraints rest
45
+
in
46
+
47
+
match check_subtag_constraints subtags with
48
+
| Error e -> Error e
49
+
| Ok () ->
50
+
(* Primary language subtag validation *)
51
+
match subtags with
52
+
| [] -> Error "Language tag must have at least one subtag"
53
+
| first :: rest ->
54
+
let first_lower = to_lower first in
55
+
let len = String.length first_lower in
56
+
57
+
(* Check for private use tag *)
58
+
if first_lower = "x" then
59
+
if rest = [] then
60
+
Error "No subtags in private use sequence"
61
+
else
62
+
(* Private use subtags must be 1-8 alphanumeric *)
63
+
let rec check_private_use = function
64
+
| [] -> Ok ()
65
+
| subtag :: rest ->
66
+
let subtag_lower = to_lower subtag in
67
+
if String.length subtag_lower < 1 then
68
+
Error "Private use subtag is too short"
69
+
else if not (is_all_alphanumeric subtag_lower) then
70
+
Error "Bad character in private use subtag"
71
+
else
72
+
check_private_use rest
73
+
in
74
+
check_private_use rest
75
+
(* Primary language: 2-3 letters (ISO 639) *)
76
+
else if (len = 2 || len = 3) && is_all_alpha first_lower then
77
+
Ok ()
78
+
(* Reserved: 4 letters *)
79
+
else if len = 4 && is_all_alpha first_lower then
80
+
Error "Found reserved language tag"
81
+
(* Registered: 5+ letters *)
82
+
else if len >= 5 && is_all_alpha first_lower then
83
+
Ok ()
84
+
else
85
+
Error "Invalid language subtag format"
86
+
87
+
module Language = struct
88
+
let name = "language tag"
89
+
90
+
let validate s = validate_language_structure s
91
+
92
+
let is_valid s = Result.is_ok (validate s)
93
+
end
94
+
95
+
module Language_or_empty = struct
96
+
let name = "language tag or empty"
97
+
98
+
let validate s =
99
+
if String.length s = 0 then
100
+
Ok ()
101
+
else
102
+
validate_language_structure s
103
+
104
+
let is_valid s = Result.is_ok (validate s)
105
+
end
106
+
107
+
let datatypes = [
108
+
(module Language : Datatype.S);
109
+
(module Language_or_empty : Datatype.S);
110
+
]
+43
lib/html5_checker/datatype/dt_language.mli
+43
lib/html5_checker/datatype/dt_language.mli
···
1
+
(** Language tag datatype validators for HTML5.
2
+
3
+
This module provides validators for BCP 47 language tags as used in HTML5.
4
+
Language tags identify natural languages and consist of subtags separated
5
+
by hyphens, following the IETF BCP 47 standard. *)
6
+
7
+
(** Language tag datatype (BCP 47 format).
8
+
9
+
Validates a language tag according to BCP 47. A language tag consists of:
10
+
- Primary language subtag (2-3 letters, or 5+ letters for registered languages)
11
+
- Optional extended language subtag (3 letters)
12
+
- Optional script subtag (4 letters)
13
+
- Optional region subtag (2 letters or 3 digits)
14
+
- Optional variant subtags (5-8 alphanumeric characters, or 4 starting with digit)
15
+
- Optional extension subtags (single letter + subtags)
16
+
- Optional private use subtags (starting with 'x-')
17
+
18
+
Examples:
19
+
- "en" (English)
20
+
- "en-US" (US English)
21
+
- "zh-Hans" (Simplified Chinese)
22
+
- "zh-Hans-CN" (Simplified Chinese as used in China)
23
+
24
+
The validator performs basic structural validation:
25
+
- Tag cannot be empty
26
+
- Tag cannot start or end with hyphen
27
+
- Subtags cannot be empty
28
+
- Subtags cannot exceed 8 characters (except for registered values)
29
+
- Primary language subtag must be 2-3 letters (ISO 639) or 5+ letters (registered)
30
+
- 4-letter primary subtags are reserved
31
+
32
+
Note: This implementation does NOT validate against the IANA language
33
+
subtag registry. It only validates the structural format. *)
34
+
module Language : Datatype.S
35
+
36
+
(** Language tag or empty string.
37
+
38
+
Like Language but also accepts the empty string. This is used for cases
39
+
where lang="" is valid to indicate an unknown or unspecified language. *)
40
+
module Language_or_empty : Datatype.S
41
+
42
+
(** List of all language datatypes *)
43
+
val datatypes : Datatype.t list
+79
lib/html5_checker/datatype/dt_link_type.ml
+79
lib/html5_checker/datatype/dt_link_type.ml
···
1
+
(** Link relationship type validation *)
2
+
3
+
(** Valid link relationship types *)
4
+
let valid_link_types =
5
+
[
6
+
"alternate";
7
+
"author";
8
+
"bookmark";
9
+
"canonical";
10
+
"dns-prefetch";
11
+
"external";
12
+
"help";
13
+
"icon";
14
+
"license";
15
+
"manifest";
16
+
"modulepreload";
17
+
"next";
18
+
"nofollow";
19
+
"noopener";
20
+
"noreferrer";
21
+
"opener";
22
+
"pingback";
23
+
"preconnect";
24
+
"prefetch";
25
+
"preload";
26
+
"prerender";
27
+
"prev";
28
+
"search";
29
+
"stylesheet";
30
+
"tag";
31
+
]
32
+
33
+
(** Validate a single link type *)
34
+
let validate_link_type s =
35
+
let trimmed = Datatype.trim_html_spaces s in
36
+
if trimmed = "" then Error "Link type must not be empty"
37
+
else
38
+
let lower = Datatype.string_to_ascii_lowercase trimmed in
39
+
if List.mem lower valid_link_types then Ok ()
40
+
else
41
+
Error
42
+
(Printf.sprintf
43
+
"The value '%s' is not a valid link type. Valid link types are: %s"
44
+
s (String.concat ", " valid_link_types))
45
+
46
+
module Link_type = struct
47
+
let name = "link-type"
48
+
let validate = validate_link_type
49
+
let is_valid s = Result.is_ok (validate s)
50
+
end
51
+
52
+
(** Validate space-separated link types *)
53
+
let validate_link_types s =
54
+
let trimmed = Datatype.trim_html_spaces s in
55
+
if trimmed = "" then Error "Link types must not be empty"
56
+
else
57
+
(* Split on whitespace *)
58
+
let types = String.split_on_char ' ' trimmed in
59
+
let types = List.filter (fun t -> Datatype.trim_html_spaces t <> "") types in
60
+
if types = [] then Error "Link types must contain at least one link type"
61
+
else
62
+
(* Validate each link type *)
63
+
let rec check_types = function
64
+
| [] -> Ok ()
65
+
| t :: rest -> (
66
+
match validate_link_type t with
67
+
| Error e -> Error e
68
+
| Ok () -> check_types rest)
69
+
in
70
+
check_types types
71
+
72
+
module Link_types = struct
73
+
let name = "link-types"
74
+
let validate = validate_link_types
75
+
let is_valid s = Result.is_ok (validate s)
76
+
end
77
+
78
+
let datatypes =
79
+
[ (module Link_type : Datatype.S); (module Link_types : Datatype.S) ]
+59
lib/html5_checker/datatype/dt_link_type.mli
+59
lib/html5_checker/datatype/dt_link_type.mli
···
1
+
(** Link relationship type validators.
2
+
3
+
This module provides validators for link relationship types used in rel
4
+
attributes on a and link elements, as defined by the HTML5 specification. *)
5
+
6
+
(** Single link type validator.
7
+
8
+
Validates a single link relationship type value. Valid link types include:
9
+
10
+
- alternate: Alternate representation of the current document
11
+
- author: Link to author information
12
+
- bookmark: Permanent URL for nearest ancestor article
13
+
- canonical: Preferred URL for the current document
14
+
- dns-prefetch: Hint that the browser should prefetch DNS for the target
15
+
- external: Link to a different website
16
+
- help: Link to context-sensitive help
17
+
- icon: Icon representing the current document
18
+
- license: Copyright license for the current document
19
+
- manifest: Web app manifest
20
+
- modulepreload: Preload a JavaScript module
21
+
- next: Next document in a sequence
22
+
- nofollow: Do not follow this link for ranking
23
+
- noopener: Do not grant window.opener access
24
+
- noreferrer: Do not send Referer header
25
+
- opener: Grant window.opener access
26
+
- pingback: Pingback server address
27
+
- preconnect: Hint to preconnect to target origin
28
+
- prefetch: Hint to prefetch the target resource
29
+
- preload: Preload a resource
30
+
- prerender: Hint to prerender the target page
31
+
- prev: Previous document in a sequence
32
+
- search: Link to search tool for the current document
33
+
- stylesheet: External stylesheet
34
+
- tag: Tag (keyword) for the current document
35
+
36
+
Examples:
37
+
- "stylesheet"
38
+
- "icon"
39
+
- "preload"
40
+
41
+
Link types are case-insensitive. *)
42
+
module Link_type : Datatype.S
43
+
44
+
(** Space-separated link types validator.
45
+
46
+
Validates space-separated link relationship types. Multiple link types
47
+
can be specified separated by ASCII whitespace.
48
+
49
+
Examples:
50
+
- "stylesheet"
51
+
- "icon preload"
52
+
- "nofollow noopener noreferrer"
53
+
54
+
Each token must be a valid link type. Duplicate link types are allowed
55
+
but not recommended. *)
56
+
module Link_types : Datatype.S
57
+
58
+
(** List of all datatypes defined in this module *)
59
+
val datatypes : Datatype.t list
+42
lib/html5_checker/datatype/dt_list_type.ml
+42
lib/html5_checker/datatype/dt_list_type.ml
···
1
+
(** List type attribute validation based on HTML5 spec *)
2
+
3
+
(** Valid ol type values (case-sensitive) *)
4
+
let valid_ol_types = [ "1"; "a"; "A"; "i"; "I" ]
5
+
6
+
(** Valid ul type values (deprecated but still parsed) *)
7
+
let valid_ul_types = [ "disc"; "circle"; "square" ]
8
+
9
+
module Ol_type = struct
10
+
let name = "ol-type"
11
+
12
+
let validate s =
13
+
(* Note: ol type is case-sensitive *)
14
+
if List.mem s valid_ol_types then Ok ()
15
+
else
16
+
Error
17
+
(Printf.sprintf
18
+
"The value '%s' is not a valid ol type. Expected one of: %s."
19
+
s
20
+
(String.concat ", " valid_ol_types))
21
+
22
+
let is_valid s = Result.is_ok (validate s)
23
+
end
24
+
25
+
module Ul_type = struct
26
+
let name = "ul-type"
27
+
28
+
let validate s =
29
+
let s_lower = Datatype.string_to_ascii_lowercase s in
30
+
if List.mem s_lower valid_ul_types then Ok ()
31
+
else
32
+
Error
33
+
(Printf.sprintf
34
+
"The value '%s' is not a valid ul type. Expected one of: %s."
35
+
s
36
+
(String.concat ", " valid_ul_types))
37
+
38
+
let is_valid s = Result.is_ok (validate s)
39
+
end
40
+
41
+
let datatypes =
42
+
[ (module Ol_type : Datatype.S); (module Ul_type : Datatype.S) ]
+44
lib/html5_checker/datatype/dt_list_type.mli
+44
lib/html5_checker/datatype/dt_list_type.mli
···
1
+
(** List type attribute datatype validators.
2
+
3
+
This module provides validators for the type attribute used on
4
+
ol and ul elements, as defined by the HTML5 specification. *)
5
+
6
+
(** Ordered list type attribute validator.
7
+
8
+
Validates ol type attribute values which can be:
9
+
- 1 - Decimal numbers (1, 2, 3, ...)
10
+
- a - Lowercase Latin letters (a, b, c, ...)
11
+
- A - Uppercase Latin letters (A, B, C, ...)
12
+
- i - Lowercase Roman numerals (i, ii, iii, ...)
13
+
- I - Uppercase Roman numerals (I, II, III, ...)
14
+
15
+
Values are matched case-sensitively for ol type.
16
+
17
+
Examples:
18
+
- "1"
19
+
- "a"
20
+
- "A"
21
+
- "i"
22
+
- "I" *)
23
+
module Ol_type : Datatype.S
24
+
25
+
(** Unordered list type attribute validator.
26
+
27
+
Validates ul type attribute values which can be:
28
+
- disc - Filled circle (default)
29
+
- circle - Hollow circle
30
+
- square - Filled square
31
+
32
+
Note: The type attribute on ul is deprecated in HTML5 but may still
33
+
be parsed for backwards compatibility.
34
+
35
+
Values are matched case-insensitively according to HTML5 spec.
36
+
37
+
Examples:
38
+
- "disc"
39
+
- "circle"
40
+
- "square" *)
41
+
module Ul_type : Datatype.S
42
+
43
+
(** List of all datatypes defined in this module *)
44
+
val datatypes : Datatype.t list
+20
lib/html5_checker/datatype/dt_loading.ml
+20
lib/html5_checker/datatype/dt_loading.ml
···
1
+
(** Lazy loading attribute validation for HTML5 *)
2
+
3
+
module Loading = struct
4
+
let name = "loading"
5
+
6
+
let validate s =
7
+
let s_lower = Datatype.string_to_ascii_lowercase s in
8
+
match s_lower with
9
+
| "" | "lazy" | "eager" -> Ok ()
10
+
| _ ->
11
+
Error
12
+
(Printf.sprintf
13
+
"The value '%s' is not a valid loading value. Expected empty \
14
+
string, 'lazy', or 'eager'."
15
+
s)
16
+
17
+
let is_valid s = Result.is_ok (validate s)
18
+
end
19
+
20
+
let datatypes = [ (module Loading : Datatype.S) ]
+22
lib/html5_checker/datatype/dt_loading.mli
+22
lib/html5_checker/datatype/dt_loading.mli
···
1
+
(** Lazy loading attribute datatype validator.
2
+
3
+
This module provides a validator for the loading attribute used to control
4
+
lazy loading behavior for images and iframes, as defined by the HTML5 spec. *)
5
+
6
+
(** Loading attribute validator.
7
+
8
+
Validates loading attribute values which can be:
9
+
- "" (empty string, default loading behavior)
10
+
- "lazy" (defer loading until needed)
11
+
- "eager" (load immediately)
12
+
13
+
Values are case-insensitive after ASCII lowercasing.
14
+
15
+
Examples:
16
+
- ""
17
+
- "lazy"
18
+
- "eager" *)
19
+
module Loading : Datatype.S
20
+
21
+
(** List of all datatypes defined in this module *)
22
+
val datatypes : Datatype.t list
+144
lib/html5_checker/datatype/dt_media_query.ml
+144
lib/html5_checker/datatype/dt_media_query.ml
···
1
+
(** Media query validation - simplified implementation *)
2
+
3
+
(** Media types *)
4
+
let media_types =
5
+
[
6
+
"all";
7
+
"screen";
8
+
"print";
9
+
"speech";
10
+
"aural";
11
+
"braille";
12
+
"handheld";
13
+
"projection";
14
+
"tty";
15
+
"tv";
16
+
"embossed";
17
+
]
18
+
19
+
(** Media query keywords *)
20
+
let media_keywords = [ "and"; "or"; "not"; "only" ]
21
+
22
+
(** Check if character is whitespace *)
23
+
let is_whitespace c = c = ' ' || c = '\t' || c = '\n' || c = '\r'
24
+
25
+
(** Check if character can start an identifier *)
26
+
let is_ident_start c =
27
+
(c >= 'a' && c <= 'z')
28
+
|| (c >= 'A' && c <= 'Z')
29
+
|| c = '_' || c = '-' || Char.code c >= 128
30
+
31
+
(** Check if character can be in an identifier *)
32
+
let is_ident_char c =
33
+
is_ident_start c || (c >= '0' && c <= '9')
34
+
35
+
(** Check balanced parentheses *)
36
+
let check_balanced_parens s =
37
+
let rec check depth i =
38
+
if i >= String.length s then
39
+
if depth = 0 then Ok ()
40
+
else Error "Unbalanced parentheses: unclosed '('"
41
+
else
42
+
let c = s.[i] in
43
+
match c with
44
+
| '(' -> check (depth + 1) (i + 1)
45
+
| ')' ->
46
+
if depth = 0 then Error "Unbalanced parentheses: unexpected ')'"
47
+
else check (depth - 1) (i + 1)
48
+
| _ -> check depth (i + 1)
49
+
in
50
+
check 0 0
51
+
52
+
(** Extract words (identifiers and keywords) from media query *)
53
+
let extract_words s =
54
+
let words = ref [] in
55
+
let buf = Buffer.create 16 in
56
+
let in_parens = ref 0 in
57
+
58
+
for i = 0 to String.length s - 1 do
59
+
let c = s.[i] in
60
+
match c with
61
+
| '(' ->
62
+
if Buffer.length buf > 0 then (
63
+
words := Buffer.contents buf :: !words;
64
+
Buffer.clear buf);
65
+
incr in_parens
66
+
| ')' ->
67
+
if Buffer.length buf > 0 then (
68
+
words := Buffer.contents buf :: !words;
69
+
Buffer.clear buf);
70
+
decr in_parens
71
+
| _ ->
72
+
if !in_parens = 0 then
73
+
if is_ident_char c then Buffer.add_char buf c
74
+
else if is_whitespace c then
75
+
if Buffer.length buf > 0 then (
76
+
words := Buffer.contents buf :: !words;
77
+
Buffer.clear buf)
78
+
else ()
79
+
else if Buffer.length buf > 0 then (
80
+
words := Buffer.contents buf :: !words;
81
+
Buffer.clear buf)
82
+
done;
83
+
84
+
if Buffer.length buf > 0 then words := Buffer.contents buf :: !words;
85
+
List.rev !words
86
+
87
+
(** Validate media query structure *)
88
+
let validate_media_query s =
89
+
let s = String.trim s in
90
+
if String.length s = 0 then Error "Media query must not be empty"
91
+
else
92
+
(* Check balanced parentheses *)
93
+
match check_balanced_parens s with
94
+
| Error _ as e -> e
95
+
| Ok () ->
96
+
(* Extract and validate words *)
97
+
let words = extract_words s in
98
+
let words_lower = List.map String.lowercase_ascii words in
99
+
100
+
(* Basic validation: check for invalid keyword combinations *)
101
+
let rec validate_words prev = function
102
+
| [] -> Ok ()
103
+
| word :: rest -> (
104
+
let word_lower = String.lowercase_ascii word in
105
+
match (prev, word_lower) with
106
+
| None, "and" | None, "or" ->
107
+
Error
108
+
(Printf.sprintf
109
+
"Media query cannot start with keyword '%s'" word)
110
+
| Some "and", "and" | Some "or", "or" | Some "not", "not" ->
111
+
Error
112
+
(Printf.sprintf "Consecutive '%s' keywords are not allowed"
113
+
word)
114
+
| Some "only", "only" ->
115
+
Error "Consecutive 'only' keywords are not allowed"
116
+
| _, _ -> validate_words (Some word_lower) rest)
117
+
in
118
+
119
+
(* Check if query contains valid media types or features *)
120
+
let has_media_type =
121
+
List.exists
122
+
(fun w -> List.mem (String.lowercase_ascii w) media_types)
123
+
words
124
+
in
125
+
let has_features = String.contains s '(' in
126
+
127
+
if not (has_media_type || has_features) then
128
+
(* Only keywords, no actual media type or features *)
129
+
if List.for_all (fun w -> List.mem w media_keywords) words_lower then
130
+
Error "Media query contains only keywords without media type or features"
131
+
else Ok () (* Assume other identifiers are valid *)
132
+
else validate_words None words
133
+
134
+
module Media_query = struct
135
+
let name = "media query"
136
+
let validate = validate_media_query
137
+
138
+
let is_valid s =
139
+
match validate s with
140
+
| Ok () -> true
141
+
| Error _ -> false
142
+
end
143
+
144
+
let datatypes = [ (module Media_query : Datatype.S) ]
+28
lib/html5_checker/datatype/dt_media_query.mli
+28
lib/html5_checker/datatype/dt_media_query.mli
···
1
+
(** Media query datatype validator.
2
+
3
+
This module provides a validator for CSS media queries as used in HTML5. *)
4
+
5
+
(** Media query validator.
6
+
7
+
Validates CSS media queries used in media attributes and CSS @media rules.
8
+
9
+
Examples:
10
+
- "screen"
11
+
- "print"
12
+
- "(min-width: 600px)"
13
+
- "screen and (color)"
14
+
- "not screen and (color)"
15
+
- "(min-width: 600px) and (max-width: 800px)"
16
+
17
+
This is a simplified validator that checks:
18
+
- Balanced parentheses
19
+
- Basic media type keywords (all, screen, print, etc.)
20
+
- Basic logical operators (and, or, not, only)
21
+
- Valid feature queries in parentheses
22
+
23
+
Note: This does not perform full CSS media query parsing. For production
24
+
use, consider integrating with a full CSS parser. *)
25
+
module Media_query : Datatype.S
26
+
27
+
(** List of all datatypes defined in this module *)
28
+
val datatypes : Datatype.t list
+210
lib/html5_checker/datatype/dt_mime.ml
+210
lib/html5_checker/datatype/dt_mime.ml
···
1
+
(** MIME type validation based on RFC 2045 and HTML5 spec *)
2
+
3
+
(** Check if character is whitespace *)
4
+
let is_whitespace c = c = ' ' || c = '\t' || c = '\n' || c = '\r'
5
+
6
+
(** Check if character is a token character (RFC 2045) *)
7
+
let is_token_char c =
8
+
(c >= '\033' && c <= '\126')
9
+
&& not
10
+
(c = '(' || c = ')' || c = '<' || c = '>' || c = '@' || c = ','
11
+
|| c = ';' || c = ':' || c = '\\' || c = '"' || c = '/' || c = '['
12
+
|| c = ']' || c = '?' || c = '=' || c = '{' || c = '}')
13
+
14
+
(** Check if character is valid in quoted string (qdtext) *)
15
+
let is_qdtext_char c =
16
+
(c >= ' ' && c <= '\126') || c = '\n' || c = '\r' || c = '\t'
17
+
18
+
(** States for MIME type parser *)
19
+
type parse_state =
20
+
| At_start
21
+
| In_supertype
22
+
| At_subtype_start
23
+
| In_subtype
24
+
| Semicolon_seen
25
+
| Ws_before_semicolon
26
+
| In_param_name
27
+
| Equals_seen
28
+
| In_quoted_string
29
+
| In_unquoted_string
30
+
| In_quoted_pair
31
+
| Close_quote_seen
32
+
33
+
(** JavaScript MIME types that should not have parameters *)
34
+
let javascript_mime_types =
35
+
[
36
+
"application/ecmascript";
37
+
"application/javascript";
38
+
"application/x-ecmascript";
39
+
"application/x-javascript";
40
+
"text/ecmascript";
41
+
"text/javascript";
42
+
"text/javascript1.0";
43
+
"text/javascript1.1";
44
+
"text/javascript1.2";
45
+
"text/javascript1.3";
46
+
"text/javascript1.4";
47
+
"text/javascript1.5";
48
+
"text/jscript";
49
+
"text/livescript";
50
+
"text/x-ecmascript";
51
+
"text/x-javascript";
52
+
]
53
+
54
+
(** Validate a single MIME type *)
55
+
let validate_mime_type s =
56
+
let len = String.length s in
57
+
let rec parse state i =
58
+
if i >= len then
59
+
(* End of string - check final state *)
60
+
match state with
61
+
| In_subtype | In_unquoted_string | Close_quote_seen -> Ok ()
62
+
| At_start -> Error "Expected a MIME type but saw the empty string"
63
+
| In_supertype | At_subtype_start -> Error "Subtype missing"
64
+
| Equals_seen | In_param_name -> Error "Parameter value missing"
65
+
| In_quoted_pair | In_quoted_string -> Error "Unfinished quoted string"
66
+
| Semicolon_seen ->
67
+
Error "Semicolon seen but there was no parameter following it"
68
+
| Ws_before_semicolon -> Error "Extraneous trailing whitespace"
69
+
else
70
+
let c = s.[i] in
71
+
match state with
72
+
| At_start ->
73
+
if is_token_char c then parse In_supertype (i + 1)
74
+
else
75
+
Error
76
+
(Printf.sprintf
77
+
"Expected a token character but saw '%c' instead" c)
78
+
| In_supertype ->
79
+
if is_token_char c then parse In_supertype (i + 1)
80
+
else if c = '/' then parse At_subtype_start (i + 1)
81
+
else
82
+
Error
83
+
(Printf.sprintf
84
+
"Expected a token character or '/' but saw '%c' instead" c)
85
+
| At_subtype_start ->
86
+
if is_token_char c then parse In_subtype (i + 1)
87
+
else
88
+
Error
89
+
(Printf.sprintf
90
+
"Expected a token character but saw '%c' instead" c)
91
+
| In_subtype ->
92
+
if is_token_char c then parse In_subtype (i + 1)
93
+
else if c = ';' then
94
+
(* Check if this is a JavaScript MIME type *)
95
+
let mime_type = String.sub s 0 i |> String.lowercase_ascii in
96
+
if List.mem mime_type javascript_mime_types then
97
+
Error
98
+
"A JavaScript MIME type must not contain any characters after \
99
+
the subtype"
100
+
else parse Semicolon_seen (i + 1)
101
+
else if is_whitespace c then parse Ws_before_semicolon (i + 1)
102
+
else
103
+
Error
104
+
(Printf.sprintf
105
+
"Expected a token character, whitespace or a semicolon but saw \
106
+
'%c' instead"
107
+
c)
108
+
| Ws_before_semicolon ->
109
+
if is_whitespace c then parse Ws_before_semicolon (i + 1)
110
+
else if c = ';' then parse Semicolon_seen (i + 1)
111
+
else
112
+
Error
113
+
(Printf.sprintf
114
+
"Expected whitespace or a semicolon but saw '%c' instead" c)
115
+
| Semicolon_seen ->
116
+
if is_whitespace c then parse Semicolon_seen (i + 1)
117
+
else if is_token_char c then parse In_param_name (i + 1)
118
+
else
119
+
Error
120
+
(Printf.sprintf
121
+
"Expected whitespace or a token character but saw '%c' instead"
122
+
c)
123
+
| In_param_name ->
124
+
if is_token_char c then parse In_param_name (i + 1)
125
+
else if c = '=' then parse Equals_seen (i + 1)
126
+
else
127
+
Error
128
+
(Printf.sprintf "Expected a token character or '=' but saw '%c' instead"
129
+
c)
130
+
| Equals_seen ->
131
+
if c = '"' then parse In_quoted_string (i + 1)
132
+
else if is_token_char c then parse In_unquoted_string (i + 1)
133
+
else
134
+
Error
135
+
(Printf.sprintf
136
+
"Expected a double quote or a token character but saw '%c' \
137
+
instead"
138
+
c)
139
+
| In_quoted_string ->
140
+
if c = '\\' then parse In_quoted_pair (i + 1)
141
+
else if c = '"' then parse Close_quote_seen (i + 1)
142
+
else if is_qdtext_char c then parse In_quoted_string (i + 1)
143
+
else
144
+
Error
145
+
(Printf.sprintf
146
+
"Expected a non-control ASCII character but saw '%c' instead" c)
147
+
| In_quoted_pair ->
148
+
if Char.code c <= 127 then parse In_quoted_string (i + 1)
149
+
else
150
+
Error
151
+
(Printf.sprintf "Expected an ASCII character but saw '%c' instead"
152
+
c)
153
+
| Close_quote_seen ->
154
+
if c = ';' then parse Semicolon_seen (i + 1)
155
+
else if is_whitespace c then parse Ws_before_semicolon (i + 1)
156
+
else
157
+
Error
158
+
(Printf.sprintf
159
+
"Expected a semicolon or whitespace but saw '%c' instead" c)
160
+
| In_unquoted_string ->
161
+
if is_token_char c then parse In_unquoted_string (i + 1)
162
+
else if c = ';' then parse Semicolon_seen (i + 1)
163
+
else if is_whitespace c then parse Ws_before_semicolon (i + 1)
164
+
else
165
+
Error
166
+
(Printf.sprintf
167
+
"Expected a token character, whitespace or a semicolon but saw \
168
+
'%c' instead"
169
+
c)
170
+
in
171
+
parse At_start 0
172
+
173
+
module Mime_type = struct
174
+
let name = "MIME type"
175
+
let validate = validate_mime_type
176
+
177
+
let is_valid s =
178
+
match validate s with
179
+
| Ok () -> true
180
+
| Error _ -> false
181
+
end
182
+
183
+
module Mime_type_list = struct
184
+
let name = "MIME type list"
185
+
186
+
let validate s =
187
+
let s = String.trim s in
188
+
if String.length s = 0 then Error "MIME type list must not be empty"
189
+
else
190
+
(* Split on commas and validate each MIME type *)
191
+
let mime_types = String.split_on_char ',' s in
192
+
let rec check_all = function
193
+
| [] -> Ok ()
194
+
| mime :: rest -> (
195
+
let mime = String.trim mime in
196
+
match validate_mime_type mime with
197
+
| Ok () -> check_all rest
198
+
| Error msg ->
199
+
Error (Printf.sprintf "Invalid MIME type in list: %s" msg))
200
+
in
201
+
check_all mime_types
202
+
203
+
let is_valid s =
204
+
match validate s with
205
+
| Ok () -> true
206
+
| Error _ -> false
207
+
end
208
+
209
+
let datatypes =
210
+
[ (module Mime_type : Datatype.S); (module Mime_type_list : Datatype.S) ]
+32
lib/html5_checker/datatype/dt_mime.mli
+32
lib/html5_checker/datatype/dt_mime.mli
···
1
+
(** MIME type datatype validators.
2
+
3
+
This module provides validators for MIME types (media types) as defined
4
+
by RFC 2045 and used in HTML5. *)
5
+
6
+
(** MIME type validator.
7
+
8
+
Validates a MIME type in the format: type/subtype[; parameters]
9
+
10
+
Examples:
11
+
- text/html
12
+
- application/json
13
+
- image/png
14
+
- text/html; charset=utf-8
15
+
16
+
Validation rules:
17
+
- Must have a supertype (before /) and subtype (after /)
18
+
- Supertype and subtype must be token characters
19
+
- Optional semicolon-separated parameters
20
+
- Parameters must be name=value pairs
21
+
- Values can be quoted strings or tokens *)
22
+
module Mime_type : Datatype.S
23
+
24
+
(** MIME type list validator.
25
+
26
+
Validates a comma-separated list of MIME types.
27
+
Each MIME type in the list must be valid according to {!Mime_type} rules.
28
+
This is used for the 'accept' attribute on input elements. *)
29
+
module Mime_type_list : Datatype.S
30
+
31
+
(** List of all datatypes defined in this module *)
32
+
val datatypes : Datatype.t list
+20
lib/html5_checker/datatype/dt_popover.ml
+20
lib/html5_checker/datatype/dt_popover.ml
···
1
+
(** Popover attribute validation for HTML5 *)
2
+
3
+
module Popover = struct
4
+
let name = "popover"
5
+
6
+
let validate s =
7
+
let s_lower = Datatype.string_to_ascii_lowercase s in
8
+
match s_lower with
9
+
| "" | "auto" | "manual" -> Ok ()
10
+
| _ ->
11
+
Error
12
+
(Printf.sprintf
13
+
"The value '%s' is not a valid popover value. Expected 'auto', \
14
+
'manual', or empty string."
15
+
s)
16
+
17
+
let is_valid s = Result.is_ok (validate s)
18
+
end
19
+
20
+
let datatypes = [ (module Popover : Datatype.S) ]
+22
lib/html5_checker/datatype/dt_popover.mli
+22
lib/html5_checker/datatype/dt_popover.mli
···
1
+
(** Popover attribute datatype validator for HTML5.
2
+
3
+
This module provides a validator for the popover attribute, as defined by
4
+
the HTML5 specification. *)
5
+
6
+
(** Popover attribute validator.
7
+
8
+
Validates popover attribute values which can be:
9
+
- "auto" - the popover can be light-dismissed (closed by clicking outside)
10
+
- "manual" - the popover must be explicitly closed
11
+
- "" (empty string) - equivalent to "auto"
12
+
13
+
Values are case-insensitive.
14
+
15
+
Examples:
16
+
- "auto"
17
+
- "manual"
18
+
- "" *)
19
+
module Popover : Datatype.S
20
+
21
+
(** List of all datatypes defined in this module *)
22
+
val datatypes : Datatype.t list
+22
lib/html5_checker/datatype/dt_preload.ml
+22
lib/html5_checker/datatype/dt_preload.ml
···
1
+
(** Media preload attribute validation based on HTML5 spec *)
2
+
3
+
(** Valid preload values *)
4
+
let valid_preloads = [ "none"; "metadata"; "auto"; "" ]
5
+
6
+
module Preload = struct
7
+
let name = "preload"
8
+
9
+
let validate s =
10
+
let s_lower = Datatype.string_to_ascii_lowercase s in
11
+
if List.mem s_lower valid_preloads then Ok ()
12
+
else
13
+
Error
14
+
(Printf.sprintf
15
+
"The value '%s' is not a valid preload value. Expected one of: \
16
+
'none', 'metadata', 'auto', or empty string."
17
+
s)
18
+
19
+
let is_valid s = Result.is_ok (validate s)
20
+
end
21
+
22
+
let datatypes = [ (module Preload : Datatype.S) ]
+24
lib/html5_checker/datatype/dt_preload.mli
+24
lib/html5_checker/datatype/dt_preload.mli
···
1
+
(** Media preload attribute datatype validator.
2
+
3
+
This module provides a validator for the preload attribute used on
4
+
audio and video elements, as defined by the HTML5 specification. *)
5
+
6
+
(** Media preload attribute validator.
7
+
8
+
Validates media preload attribute values which can be:
9
+
- none - No preloading (only load metadata when user starts playback)
10
+
- metadata - Preload metadata only (dimensions, duration, etc.)
11
+
- auto - Preload the entire resource if possible
12
+
- "" (empty string) - Equivalent to auto
13
+
14
+
Values are matched case-insensitively according to HTML5 spec.
15
+
16
+
Examples:
17
+
- "none"
18
+
- "metadata"
19
+
- "auto"
20
+
- "" *)
21
+
module Preload : Datatype.S
22
+
23
+
(** List of all datatypes defined in this module *)
24
+
val datatypes : Datatype.t list
+32
lib/html5_checker/datatype/dt_referrer.ml
+32
lib/html5_checker/datatype/dt_referrer.ml
···
1
+
(** Referrer policy attribute validation for HTML5 *)
2
+
3
+
module Referrer_policy = struct
4
+
let name = "referrerpolicy"
5
+
6
+
let validate s =
7
+
let s_lower = Datatype.string_to_ascii_lowercase s in
8
+
match s_lower with
9
+
| ""
10
+
| "no-referrer"
11
+
| "no-referrer-when-downgrade"
12
+
| "origin"
13
+
| "origin-when-cross-origin"
14
+
| "same-origin"
15
+
| "strict-origin"
16
+
| "strict-origin-when-cross-origin"
17
+
| "unsafe-url" ->
18
+
Ok ()
19
+
| _ ->
20
+
Error
21
+
(Printf.sprintf
22
+
"The value '%s' is not a valid referrerpolicy value. Expected \
23
+
one of: empty string, 'no-referrer', \
24
+
'no-referrer-when-downgrade', 'origin', \
25
+
'origin-when-cross-origin', 'same-origin', 'strict-origin', \
26
+
'strict-origin-when-cross-origin', or 'unsafe-url'."
27
+
s)
28
+
29
+
let is_valid s = Result.is_ok (validate s)
30
+
end
31
+
32
+
let datatypes = [ (module Referrer_policy : Datatype.S) ]
+29
lib/html5_checker/datatype/dt_referrer.mli
+29
lib/html5_checker/datatype/dt_referrer.mli
···
1
+
(** Referrer policy attribute datatype validator.
2
+
3
+
This module provides a validator for the referrerpolicy attribute used to
4
+
control referrer information sent with requests, as defined by the HTML5 spec. *)
5
+
6
+
(** Referrer policy attribute validator.
7
+
8
+
Validates referrerpolicy attribute values which can be:
9
+
- "" (empty string, uses default policy)
10
+
- "no-referrer" (never send referrer)
11
+
- "no-referrer-when-downgrade" (send referrer to same security level)
12
+
- "origin" (send origin only)
13
+
- "origin-when-cross-origin" (full URL for same-origin, origin for cross-origin)
14
+
- "same-origin" (send referrer for same-origin only)
15
+
- "strict-origin" (send origin for same security level)
16
+
- "strict-origin-when-cross-origin" (full URL same-origin, origin cross-origin same security)
17
+
- "unsafe-url" (always send full URL)
18
+
19
+
Values are case-insensitive after ASCII lowercasing.
20
+
21
+
Examples:
22
+
- ""
23
+
- "no-referrer"
24
+
- "origin"
25
+
- "strict-origin-when-cross-origin" *)
26
+
module Referrer_policy : Datatype.S
27
+
28
+
(** List of all datatypes defined in this module *)
29
+
val datatypes : Datatype.t list
+53
lib/html5_checker/datatype/dt_sandbox.ml
+53
lib/html5_checker/datatype/dt_sandbox.ml
···
1
+
(** Sandbox tokens validation *)
2
+
3
+
(** Valid sandbox tokens (case-sensitive) *)
4
+
let valid_sandbox_tokens =
5
+
[
6
+
"allow-downloads";
7
+
"allow-forms";
8
+
"allow-modals";
9
+
"allow-orientation-lock";
10
+
"allow-pointer-lock";
11
+
"allow-popups";
12
+
"allow-popups-to-escape-sandbox";
13
+
"allow-presentation";
14
+
"allow-same-origin";
15
+
"allow-scripts";
16
+
"allow-top-navigation";
17
+
"allow-top-navigation-by-user-activation";
18
+
"allow-top-navigation-to-custom-protocols";
19
+
]
20
+
21
+
(** Validate sandbox attribute value *)
22
+
let validate_sandbox s =
23
+
let trimmed = Datatype.trim_html_spaces s in
24
+
(* Empty value is valid (maximum restrictions) *)
25
+
if trimmed = "" then Ok ()
26
+
else
27
+
(* Split on whitespace *)
28
+
let tokens = String.split_on_char ' ' trimmed in
29
+
let tokens = List.filter (fun t -> Datatype.trim_html_spaces t <> "") tokens in
30
+
if tokens = [] then Ok () (* All whitespace is like empty *)
31
+
else
32
+
(* Validate each token *)
33
+
let rec check_tokens = function
34
+
| [] -> Ok ()
35
+
| token :: rest ->
36
+
(* Sandbox tokens are case-sensitive *)
37
+
if List.mem token valid_sandbox_tokens then check_tokens rest
38
+
else
39
+
Error
40
+
(Printf.sprintf
41
+
"The value '%s' is not a valid sandbox token. Valid tokens \
42
+
are: %s"
43
+
token (String.concat ", " valid_sandbox_tokens))
44
+
in
45
+
check_tokens tokens
46
+
47
+
module Sandbox = struct
48
+
let name = "sandbox"
49
+
let validate = validate_sandbox
50
+
let is_valid s = Result.is_ok (validate s)
51
+
end
52
+
53
+
let datatypes = [ (module Sandbox : Datatype.S) ]
+37
lib/html5_checker/datatype/dt_sandbox.mli
+37
lib/html5_checker/datatype/dt_sandbox.mli
···
1
+
(** Sandbox tokens validator.
2
+
3
+
This module provides a validator for the sandbox attribute used on iframe
4
+
elements, as defined by the HTML5 specification. *)
5
+
6
+
(** Sandbox attribute validator.
7
+
8
+
Validates sandbox attribute values which contain space-separated sandbox
9
+
tokens. Each token enables a specific capability for the sandboxed iframe.
10
+
11
+
Valid tokens:
12
+
- allow-downloads: Allow downloads
13
+
- allow-forms: Allow form submission
14
+
- allow-modals: Allow modal dialogs (alert, confirm, etc.)
15
+
- allow-orientation-lock: Allow orientation lock
16
+
- allow-pointer-lock: Allow pointer lock
17
+
- allow-popups: Allow popups (window.open, target="_blank", etc.)
18
+
- allow-popups-to-escape-sandbox: Allow popups that don't inherit sandboxing
19
+
- allow-presentation: Allow presentation sessions
20
+
- allow-same-origin: Allow same-origin access
21
+
- allow-scripts: Allow script execution
22
+
- allow-top-navigation: Allow navigating top-level browsing context
23
+
- allow-top-navigation-by-user-activation: Allow top navigation with user gesture
24
+
- allow-top-navigation-to-custom-protocols: Allow top navigation to custom protocols
25
+
26
+
Examples:
27
+
- "" (empty = maximum restrictions)
28
+
- "allow-scripts"
29
+
- "allow-same-origin allow-scripts"
30
+
- "allow-forms allow-popups allow-scripts"
31
+
32
+
Tokens are case-sensitive and must match exactly. Duplicate tokens are
33
+
allowed but redundant. An empty value means maximum sandbox restrictions. *)
34
+
module Sandbox : Datatype.S
35
+
36
+
(** List of all datatypes defined in this module *)
37
+
val datatypes : Datatype.t list
+22
lib/html5_checker/datatype/dt_scope.ml
+22
lib/html5_checker/datatype/dt_scope.ml
···
1
+
(** Table header scope attribute validation based on HTML5 spec *)
2
+
3
+
(** Valid scope values *)
4
+
let valid_scopes = [ "row"; "col"; "rowgroup"; "colgroup" ]
5
+
6
+
module Scope = struct
7
+
let name = "scope"
8
+
9
+
let validate s =
10
+
let s_lower = Datatype.string_to_ascii_lowercase s in
11
+
if List.mem s_lower valid_scopes then Ok ()
12
+
else
13
+
Error
14
+
(Printf.sprintf
15
+
"The value '%s' is not a valid scope value. Expected one of: %s."
16
+
s
17
+
(String.concat ", " valid_scopes))
18
+
19
+
let is_valid s = Result.is_ok (validate s)
20
+
end
21
+
22
+
let datatypes = [ (module Scope : Datatype.S) ]
+24
lib/html5_checker/datatype/dt_scope.mli
+24
lib/html5_checker/datatype/dt_scope.mli
···
1
+
(** Table header scope attribute datatype validator.
2
+
3
+
This module provides a validator for the scope attribute used on
4
+
th elements, as defined by the HTML5 specification. *)
5
+
6
+
(** Table header scope attribute validator.
7
+
8
+
Validates th scope attribute values which can be:
9
+
- row - Header cell applies to some of the subsequent cells in the same row(s)
10
+
- col - Header cell applies to some of the subsequent cells in the same column(s)
11
+
- rowgroup - Header cell applies to all remaining cells in the row group
12
+
- colgroup - Header cell applies to all remaining cells in the column group
13
+
14
+
Values are matched case-insensitively according to HTML5 spec.
15
+
16
+
Examples:
17
+
- "row"
18
+
- "col"
19
+
- "rowgroup"
20
+
- "colgroup" *)
21
+
module Scope : Datatype.S
22
+
23
+
(** List of all datatypes defined in this module *)
24
+
val datatypes : Datatype.t list
+20
lib/html5_checker/datatype/dt_shape.ml
+20
lib/html5_checker/datatype/dt_shape.ml
···
1
+
(** Shape attribute validation for HTML5 *)
2
+
3
+
module Shape = struct
4
+
let name = "shape"
5
+
6
+
let validate s =
7
+
let s_lower = Datatype.string_to_ascii_lowercase s in
8
+
match s_lower with
9
+
| "default" | "rect" | "circle" | "poly" -> Ok ()
10
+
| _ ->
11
+
Error
12
+
(Printf.sprintf
13
+
"The value '%s' is not a valid shape. Expected 'default', 'rect', \
14
+
'circle', or 'poly'."
15
+
s)
16
+
17
+
let is_valid s = Result.is_ok (validate s)
18
+
end
19
+
20
+
let datatypes = [ (module Shape : Datatype.S) ]
+24
lib/html5_checker/datatype/dt_shape.mli
+24
lib/html5_checker/datatype/dt_shape.mli
···
1
+
(** Shape attribute datatype validator for HTML5.
2
+
3
+
This module provides a validator for the shape attribute used on area
4
+
elements within image maps, as defined by the HTML5 specification. *)
5
+
6
+
(** Shape attribute validator.
7
+
8
+
Validates shape attribute values which can be:
9
+
- "default" - entire region
10
+
- "rect" - rectangular region
11
+
- "circle" - circular region
12
+
- "poly" - polygonal region
13
+
14
+
Values are case-insensitive.
15
+
16
+
Examples:
17
+
- "rect"
18
+
- "circle"
19
+
- "poly"
20
+
- "default" *)
21
+
module Shape : Datatype.S
22
+
23
+
(** List of all datatypes defined in this module *)
24
+
val datatypes : Datatype.t list
+20
lib/html5_checker/datatype/dt_spellcheck.ml
+20
lib/html5_checker/datatype/dt_spellcheck.ml
···
1
+
(** Spellcheck attribute validation for HTML5 *)
2
+
3
+
module Spellcheck = struct
4
+
let name = "spellcheck"
5
+
6
+
let validate s =
7
+
let s_lower = Datatype.string_to_ascii_lowercase s in
8
+
match s_lower with
9
+
| "" | "true" | "false" -> Ok ()
10
+
| _ ->
11
+
Error
12
+
(Printf.sprintf
13
+
"The value '%s' is not a valid spellcheck value. Expected 'true', \
14
+
'false', or empty string."
15
+
s)
16
+
17
+
let is_valid s = Result.is_ok (validate s)
18
+
end
19
+
20
+
let datatypes = [ (module Spellcheck : Datatype.S) ]
+22
lib/html5_checker/datatype/dt_spellcheck.mli
+22
lib/html5_checker/datatype/dt_spellcheck.mli
···
1
+
(** Spellcheck attribute datatype validator for HTML5.
2
+
3
+
This module provides a validator for the spellcheck attribute, as defined by
4
+
the HTML5 specification. *)
5
+
6
+
(** Spellcheck attribute validator.
7
+
8
+
Validates spellcheck attribute values which can be:
9
+
- "true" - spelling and grammar checking is enabled
10
+
- "false" - spelling and grammar checking is disabled
11
+
- "" (empty string) - default behavior (typically inherits from parent)
12
+
13
+
Values are case-insensitive.
14
+
15
+
Examples:
16
+
- "true"
17
+
- "false"
18
+
- "" *)
19
+
module Spellcheck : Datatype.S
20
+
21
+
(** List of all datatypes defined in this module *)
22
+
val datatypes : Datatype.t list
+142
lib/html5_checker/datatype/dt_srcset.ml
+142
lib/html5_checker/datatype/dt_srcset.ml
···
1
+
(** Image source set and sizes attribute validation *)
2
+
3
+
(** Split string on commas, preserving parentheses groups *)
4
+
let split_on_commas s =
5
+
let len = String.length s in
6
+
let rec find_splits paren_depth start i acc =
7
+
if i >= len then
8
+
if start < len then List.rev (String.sub s start (len - start) :: acc)
9
+
else List.rev acc
10
+
else
11
+
match s.[i] with
12
+
| '(' -> find_splits (paren_depth + 1) start (i + 1) acc
13
+
| ')' -> find_splits (max 0 (paren_depth - 1)) start (i + 1) acc
14
+
| ',' when paren_depth = 0 ->
15
+
let part = String.sub s start (i - start) in
16
+
find_splits 0 (i + 1) (i + 1) (part :: acc)
17
+
| _ -> find_splits paren_depth start (i + 1) acc
18
+
in
19
+
find_splits 0 0 0 []
20
+
21
+
(** Parse a descriptor (width or pixel density) *)
22
+
let parse_descriptor s =
23
+
let trimmed = Datatype.trim_html_spaces s in
24
+
let len = String.length trimmed in
25
+
if len < 2 then None
26
+
else
27
+
let suffix = trimmed.[len - 1] in
28
+
let num_part = String.sub trimmed 0 (len - 1) in
29
+
match suffix with
30
+
| 'w' ->
31
+
(try
32
+
let n = int_of_string num_part in
33
+
if n > 0 then Some (`Width n) else None
34
+
with _ -> None)
35
+
| 'x' ->
36
+
(try
37
+
let f = float_of_string num_part in
38
+
if f > 0.0 then Some (`Density f) else None
39
+
with _ -> None)
40
+
| _ -> None
41
+
42
+
(** Validate a single image candidate *)
43
+
let validate_image_candidate s =
44
+
let trimmed = Datatype.trim_html_spaces s in
45
+
if trimmed = "" then Error "Image candidate must not be empty"
46
+
else
47
+
(* Split on whitespace to get URL and optional descriptor *)
48
+
let parts = String.split_on_char ' ' trimmed in
49
+
let parts = List.filter (fun p -> Datatype.trim_html_spaces p <> "") parts in
50
+
match parts with
51
+
| [] -> Error "Image candidate must not be empty"
52
+
| [ _url ] -> Ok None (* Just URL, no descriptor *)
53
+
| [ _url; desc ] -> (
54
+
match parse_descriptor desc with
55
+
| Some d -> Ok (Some d)
56
+
| None ->
57
+
Error
58
+
(Printf.sprintf
59
+
"Invalid descriptor '%s'. Must be a positive integer followed \
60
+
by 'w' or a positive number followed by 'x'"
61
+
desc))
62
+
| _ ->
63
+
Error
64
+
"Image candidate must be a URL optionally followed by one descriptor"
65
+
66
+
(** Validate srcset value *)
67
+
let validate_srcset s =
68
+
let trimmed = Datatype.trim_html_spaces s in
69
+
if trimmed = "" then Error "Srcset must not be empty" else
70
+
let candidates = split_on_commas trimmed in
71
+
let candidates = List.filter (fun c -> Datatype.trim_html_spaces c <> "") candidates in
72
+
if candidates = [] then Error "Srcset must contain at least one image candidate"
73
+
else
74
+
(* Validate each candidate and check for descriptor type consistency *)
75
+
let rec check_candidates has_width has_density = function
76
+
| [] -> Ok ()
77
+
| cand :: rest -> (
78
+
match validate_image_candidate cand with
79
+
| Error e -> Error e
80
+
| Ok None -> check_candidates has_width has_density rest
81
+
| Ok (Some (`Width _)) ->
82
+
if has_density then
83
+
Error
84
+
"Cannot mix width descriptors (w) and pixel density \
85
+
descriptors (x) in the same srcset"
86
+
else check_candidates true has_density rest
87
+
| Ok (Some (`Density _)) ->
88
+
if has_width then
89
+
Error
90
+
"Cannot mix width descriptors (w) and pixel density \
91
+
descriptors (x) in the same srcset"
92
+
else check_candidates has_width true rest)
93
+
in
94
+
check_candidates false false candidates
95
+
96
+
module Srcset = struct
97
+
let name = "srcset"
98
+
let validate = validate_srcset
99
+
let is_valid s = Result.is_ok (validate s)
100
+
end
101
+
102
+
(** Validate sizes attribute *)
103
+
let validate_sizes s =
104
+
let trimmed = Datatype.trim_html_spaces s in
105
+
if trimmed = "" then Error "Sizes attribute must not be empty"
106
+
else
107
+
(* Split on commas *)
108
+
let entries = split_on_commas trimmed in
109
+
let entries = List.filter (fun e -> Datatype.trim_html_spaces e <> "") entries in
110
+
if entries = [] then Error "Sizes attribute must contain at least one entry"
111
+
else
112
+
(* Each entry except the last should have a media condition
113
+
The last entry is just a size value
114
+
We do basic validation here *)
115
+
let rec check_entries = function
116
+
| [] -> Ok ()
117
+
| [ _last ] ->
118
+
(* Last entry - just a size value, accept anything non-empty *)
119
+
Ok ()
120
+
| entry :: rest ->
121
+
let entry_trimmed = Datatype.trim_html_spaces entry in
122
+
(* Check if it looks like it has a media condition (starts with '(') *)
123
+
if String.length entry_trimmed = 0 then
124
+
Error "Size entry must not be empty"
125
+
else if entry_trimmed.[0] <> '(' then
126
+
Error
127
+
(Printf.sprintf
128
+
"Size entry '%s' should start with a media condition in \
129
+
parentheses"
130
+
entry_trimmed)
131
+
else check_entries rest
132
+
in
133
+
check_entries entries
134
+
135
+
module Sizes = struct
136
+
let name = "sizes"
137
+
let validate = validate_sizes
138
+
let is_valid s = Result.is_ok (validate s)
139
+
end
140
+
141
+
let datatypes =
142
+
[ (module Srcset : Datatype.S); (module Sizes : Datatype.S) ]
+44
lib/html5_checker/datatype/dt_srcset.mli
+44
lib/html5_checker/datatype/dt_srcset.mli
···
1
+
(** Image source set and sizes attribute validators.
2
+
3
+
This module provides validators for srcset and sizes attributes used on
4
+
img and source elements, as defined by the HTML5 specification. *)
5
+
6
+
(** Srcset attribute validator.
7
+
8
+
Validates srcset attribute values which contain comma-separated image
9
+
candidates. Each image candidate consists of:
10
+
- A URL
11
+
- Optional whitespace followed by a width descriptor (e.g., "100w") or
12
+
pixel density descriptor (e.g., "2x")
13
+
14
+
Examples:
15
+
- "image.jpg"
16
+
- "image.jpg 1x"
17
+
- "image-320.jpg 320w, image-640.jpg 640w"
18
+
- "image-1x.jpg 1x, image-2x.jpg 2x"
19
+
20
+
Width descriptors must be positive integers followed by 'w'.
21
+
Pixel density descriptors must be positive numbers followed by 'x'.
22
+
Cannot mix width and density descriptors in the same srcset. *)
23
+
module Srcset : Datatype.S
24
+
25
+
(** Sizes attribute validator.
26
+
27
+
Validates sizes attribute values which contain comma-separated source size
28
+
entries. Each entry (except the last) consists of:
29
+
- A media condition
30
+
- Whitespace
31
+
- A source size value (length or "auto")
32
+
33
+
The last entry is just a source size value (without media condition).
34
+
35
+
Examples:
36
+
- "100vw"
37
+
- "(max-width: 600px) 100vw, 50vw"
38
+
- "(min-width: 800px) 800px, 100vw"
39
+
40
+
This validator performs basic syntax checking. *)
41
+
module Sizes : Datatype.S
42
+
43
+
(** List of all datatypes defined in this module *)
44
+
val datatypes : Datatype.t list
+42
lib/html5_checker/datatype/dt_target.ml
+42
lib/html5_checker/datatype/dt_target.ml
···
1
+
(** Browsing context and target attribute validation *)
2
+
3
+
(** Valid special target keywords (case-insensitive) *)
4
+
let special_keywords = [ "_blank"; "_self"; "_parent"; "_top" ]
5
+
6
+
(** Validate a browsing context name *)
7
+
let validate_browsing_context s =
8
+
if String.length s = 0 then Error "Browsing context name must not be empty"
9
+
else if s.[0] = '_' then
10
+
(* If starts with underscore, must be a special keyword *)
11
+
let lower = Datatype.string_to_ascii_lowercase s in
12
+
if List.mem lower special_keywords then Ok ()
13
+
else
14
+
Error
15
+
(Printf.sprintf
16
+
"Browsing context name '%s' starts with underscore but is not one \
17
+
of the special keywords: %s"
18
+
s (String.concat ", " special_keywords))
19
+
else Ok ()
20
+
21
+
module Browsing_context = struct
22
+
let name = "browsing-context"
23
+
let validate = validate_browsing_context
24
+
let is_valid s = Result.is_ok (validate s)
25
+
end
26
+
27
+
(** Validate a target attribute value
28
+
(For now, this is the same as browsing context validation) *)
29
+
let validate_target s =
30
+
if String.length s = 0 then Error "Target attribute must not be empty"
31
+
else validate_browsing_context s
32
+
33
+
module Target = struct
34
+
let name = "target"
35
+
let validate = validate_target
36
+
let is_valid s = Result.is_ok (validate s)
37
+
end
38
+
39
+
let datatypes =
40
+
[
41
+
(module Target : Datatype.S); (module Browsing_context : Datatype.S);
42
+
]
+50
lib/html5_checker/datatype/dt_target.mli
+50
lib/html5_checker/datatype/dt_target.mli
···
1
+
(** Browsing context and target attribute validators.
2
+
3
+
This module provides validators for browsing context names and target
4
+
attributes used on a, area, base, and form elements, as defined by the
5
+
HTML5 specification. *)
6
+
7
+
(** Target attribute validator.
8
+
9
+
Validates target attribute values which specify where to display linked
10
+
content or form responses. Valid values include:
11
+
12
+
Special keywords (case-insensitive):
13
+
- _blank: New window or tab
14
+
- _self: Same frame (default)
15
+
- _parent: Parent frame
16
+
- _top: Top-level window
17
+
18
+
Or a valid browsing context name:
19
+
- Non-empty string
20
+
- If starts with underscore, must be one of the special keywords above
21
+
22
+
Examples:
23
+
- "_blank"
24
+
- "_self"
25
+
- "myframe"
26
+
- "content-frame" *)
27
+
module Target : Datatype.S
28
+
29
+
(** Browsing context name validator.
30
+
31
+
Validates browsing context names used to identify frames and windows.
32
+
A valid browsing context name is:
33
+
- A non-empty string
34
+
- If it starts with an underscore (_), it must be one of the special
35
+
keywords: _blank, _self, _parent, or _top (case-insensitive)
36
+
- Otherwise, any non-empty string is valid
37
+
38
+
Examples:
39
+
- "myframe"
40
+
- "content"
41
+
- "navigation-frame"
42
+
- "_blank" (special keyword)
43
+
44
+
Invalid examples:
45
+
- "" (empty string)
46
+
- "_custom" (underscore prefix but not a special keyword) *)
47
+
module Browsing_context : Datatype.S
48
+
49
+
(** List of all datatypes defined in this module *)
50
+
val datatypes : Datatype.t list
+20
lib/html5_checker/datatype/dt_translate.ml
+20
lib/html5_checker/datatype/dt_translate.ml
···
1
+
(** Translate attribute validation for HTML5 *)
2
+
3
+
module Translate = struct
4
+
let name = "translate"
5
+
6
+
let validate s =
7
+
let s_lower = Datatype.string_to_ascii_lowercase s in
8
+
match s_lower with
9
+
| "" | "yes" | "no" -> Ok ()
10
+
| _ ->
11
+
Error
12
+
(Printf.sprintf
13
+
"The value '%s' is not a valid translate value. Expected 'yes', \
14
+
'no', or empty string."
15
+
s)
16
+
17
+
let is_valid s = Result.is_ok (validate s)
18
+
end
19
+
20
+
let datatypes = [ (module Translate : Datatype.S) ]
+22
lib/html5_checker/datatype/dt_translate.mli
+22
lib/html5_checker/datatype/dt_translate.mli
···
1
+
(** Translate attribute datatype validator for HTML5.
2
+
3
+
This module provides a validator for the translate attribute, as defined by
4
+
the HTML5 specification. *)
5
+
6
+
(** Translate attribute validator.
7
+
8
+
Validates translate attribute values which can be:
9
+
- "yes" - the element should be translated
10
+
- "no" - the element should not be translated
11
+
- "" (empty string) - equivalent to "yes"
12
+
13
+
Values are case-insensitive.
14
+
15
+
Examples:
16
+
- "yes"
17
+
- "no"
18
+
- "" *)
19
+
module Translate : Datatype.S
20
+
21
+
(** List of all datatypes defined in this module *)
22
+
val datatypes : Datatype.t list
+124
lib/html5_checker/datatype/dt_url.ml
+124
lib/html5_checker/datatype/dt_url.ml
···
1
+
(** URL and IRI datatype validators for HTML5. *)
2
+
3
+
(** Check if a character is valid in a URL scheme name.
4
+
Scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) *)
5
+
let is_scheme_char_initial = function 'a' .. 'z' | 'A' .. 'Z' -> true | _ -> false
6
+
7
+
let is_scheme_char_subsequent = function
8
+
| 'a' .. 'z' | 'A' .. 'Z' | '0' .. '9' | '+' | '-' | '.' -> true
9
+
| _ -> false
10
+
11
+
(** Split a URL into scheme and remainder.
12
+
Returns Some (scheme, rest) if a valid scheme is found, None otherwise.
13
+
Skips leading HTML whitespace before checking for scheme. *)
14
+
let split_scheme s =
15
+
let len = String.length s in
16
+
(* Skip leading HTML whitespace *)
17
+
let rec skip_whitespace i =
18
+
if i >= len then len
19
+
else if Datatype.is_whitespace s.[i] then skip_whitespace (i + 1)
20
+
else i
21
+
in
22
+
let start = skip_whitespace 0 in
23
+
if start >= len then None
24
+
else if not (is_scheme_char_initial s.[start]) then None
25
+
else
26
+
(* Look for scheme *)
27
+
let rec find_colon i =
28
+
if i >= len then None
29
+
else
30
+
match s.[i] with
31
+
| ':' ->
32
+
let scheme =
33
+
String.sub s start (i - start) |> Datatype.string_to_ascii_lowercase
34
+
in
35
+
let rest = String.sub s (i + 1) (len - i - 1) in
36
+
Some (scheme, rest)
37
+
| c when is_scheme_char_subsequent c -> find_colon (i + 1)
38
+
| _ -> None
39
+
in
40
+
find_colon (start + 1)
41
+
42
+
(** Check if a scheme is well-known (http, https, ftp, mailto, file) *)
43
+
let is_well_known_scheme = function
44
+
| "http" | "https" | "ftp" | "mailto" | "file" -> true
45
+
| _ -> false
46
+
47
+
module Url : Datatype.S = struct
48
+
let name = "URL"
49
+
50
+
let validate s =
51
+
let trimmed = Datatype.trim_html_spaces s in
52
+
if trimmed = "" then Error "Must be non-empty."
53
+
else
54
+
(* Basic validation - check for control characters *)
55
+
let len = String.length s in
56
+
let rec check_chars i =
57
+
if i >= len then Ok ()
58
+
else
59
+
match s.[i] with
60
+
| '\x00' .. '\x1F' | '\x7F' ->
61
+
Error "URLs must not contain control characters."
62
+
| _ -> check_chars (i + 1)
63
+
in
64
+
check_chars 0
65
+
66
+
let is_valid s = Result.is_ok (validate s)
67
+
end
68
+
69
+
module Url_potentially_empty : Datatype.S = struct
70
+
let name = "URL (potentially empty)"
71
+
72
+
let validate s =
73
+
let trimmed = Datatype.trim_html_spaces s in
74
+
if trimmed = "" then Ok ()
75
+
else
76
+
(* Use same validation as Url for non-empty values *)
77
+
Url.validate s
78
+
79
+
let is_valid s = Result.is_ok (validate s)
80
+
end
81
+
82
+
module Url_absolute : Datatype.S = struct
83
+
let name = "absolute URL"
84
+
85
+
let validate s =
86
+
let trimmed = Datatype.trim_html_spaces s in
87
+
if trimmed = "" then Error "Must be non-empty."
88
+
else
89
+
match split_scheme s with
90
+
| None ->
91
+
Error (Printf.sprintf "The string \"%s\" is not an absolute URL." s)
92
+
| Some (scheme, _rest) ->
93
+
if is_well_known_scheme scheme || String.length scheme > 0 then
94
+
(* For well-known schemes, we could do more validation,
95
+
but for now we just check that it has a scheme *)
96
+
Ok ()
97
+
else Error "The string is not an absolute URL."
98
+
99
+
let is_valid s = Result.is_ok (validate s)
100
+
end
101
+
102
+
module Iri : Datatype.S = struct
103
+
let name = "absolute URL"
104
+
105
+
(* IRI validation is the same as absolute URL validation *)
106
+
let validate = Url_absolute.validate
107
+
let is_valid s = Result.is_ok (validate s)
108
+
end
109
+
110
+
module Iri_ref : Datatype.S = struct
111
+
let name = "URL"
112
+
113
+
(* IRI reference validation is the same as URL validation *)
114
+
let validate = Url.validate
115
+
let is_valid s = Result.is_ok (validate s)
116
+
end
117
+
118
+
let datatypes =
119
+
[ (module Url : Datatype.S)
120
+
; (module Url_potentially_empty : Datatype.S)
121
+
; (module Url_absolute : Datatype.S)
122
+
; (module Iri : Datatype.S)
123
+
; (module Iri_ref : Datatype.S)
124
+
]
+48
lib/html5_checker/datatype/dt_url.mli
+48
lib/html5_checker/datatype/dt_url.mli
···
1
+
(** URL and IRI datatype validators for HTML5.
2
+
3
+
This module provides validators for URLs and Internationalized Resource
4
+
Identifiers (IRIs) based on the Nu HTML Checker's implementation.
5
+
6
+
The validators perform basic structural validation. Full URL parsing and
7
+
validation according to WHATWG URL spec would require a complete URL parser. *)
8
+
9
+
(** URL validator (IriRef in Nu validator).
10
+
11
+
Validates URL references which can be either absolute or relative URLs.
12
+
Basic validation ensures:
13
+
- Non-empty after trimming HTML whitespace
14
+
- No control characters
15
+
- Basic structural correctness
16
+
17
+
This corresponds to the general URL/IRI reference type. *)
18
+
module Url : Datatype.S
19
+
20
+
(** URL validator that allows empty values.
21
+
22
+
Same as Url but permits empty strings. This is used for optional URL
23
+
attributes. *)
24
+
module Url_potentially_empty : Datatype.S
25
+
26
+
(** Absolute URL validator (Iri in Nu validator).
27
+
28
+
Validates that a URL is absolute (has a scheme). An absolute URL must:
29
+
- Start with a valid scheme (e.g., http:, https:, ftp:, mailto:)
30
+
- Not be empty
31
+
- Follow URL structure rules
32
+
33
+
Scheme format: ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) *)
34
+
module Url_absolute : Datatype.S
35
+
36
+
(** IRI (Internationalized Resource Identifier) validator.
37
+
38
+
Alias for Url_absolute. IRIs are the internationalized version of URIs,
39
+
allowing Unicode characters. *)
40
+
module Iri : Datatype.S
41
+
42
+
(** IRI reference validator.
43
+
44
+
Alias for Url. IRI references can be relative or absolute. *)
45
+
module Iri_ref : Datatype.S
46
+
47
+
(** List of all URL/IRI-related datatypes for registration. *)
48
+
val datatypes : Datatype.t list
+22
lib/html5_checker/datatype/dt_wrap.ml
+22
lib/html5_checker/datatype/dt_wrap.ml
···
1
+
(** Textarea wrap attribute validation based on HTML5 spec *)
2
+
3
+
(** Valid wrap values *)
4
+
let valid_wraps = [ "soft"; "hard" ]
5
+
6
+
module Wrap = struct
7
+
let name = "wrap"
8
+
9
+
let validate s =
10
+
let s_lower = Datatype.string_to_ascii_lowercase s in
11
+
if List.mem s_lower valid_wraps then Ok ()
12
+
else
13
+
Error
14
+
(Printf.sprintf
15
+
"The value '%s' is not a valid wrap value. Expected one of: %s."
16
+
s
17
+
(String.concat ", " valid_wraps))
18
+
19
+
let is_valid s = Result.is_ok (validate s)
20
+
end
21
+
22
+
let datatypes = [ (module Wrap : Datatype.S) ]
+20
lib/html5_checker/datatype/dt_wrap.mli
+20
lib/html5_checker/datatype/dt_wrap.mli
···
1
+
(** Textarea wrap attribute datatype validator.
2
+
3
+
This module provides a validator for the wrap attribute used on
4
+
textarea elements, as defined by the HTML5 specification. *)
5
+
6
+
(** Textarea wrap attribute validator.
7
+
8
+
Validates textarea wrap attribute values which can be:
9
+
- soft - Soft wrapping (line breaks not submitted, default)
10
+
- hard - Hard wrapping (line breaks are submitted)
11
+
12
+
Values are matched case-insensitively according to HTML5 spec.
13
+
14
+
Examples:
15
+
- "soft"
16
+
- "hard" *)
17
+
module Wrap : Datatype.S
18
+
19
+
(** List of all datatypes defined in this module *)
20
+
val datatypes : Datatype.t list
+94
lib/html5_checker/dom_walker.ml
+94
lib/html5_checker/dom_walker.ml
···
1
+
(** DOM tree traversal for HTML5 conformance checking. *)
2
+
3
+
(** Package a checker with its state for traversal. *)
4
+
type checker_state = {
5
+
start_element :
6
+
name:string ->
7
+
namespace:string option ->
8
+
attrs:(string * string) list ->
9
+
Message_collector.t ->
10
+
unit;
11
+
end_element :
12
+
name:string -> namespace:string option -> Message_collector.t -> unit;
13
+
characters : string -> Message_collector.t -> unit;
14
+
end_document : Message_collector.t -> unit;
15
+
}
16
+
17
+
(** Create a checker state package from a first-class module. *)
18
+
let make_checker_state (module C : Checker.S) =
19
+
let state = C.create () in
20
+
{
21
+
start_element = (fun ~name ~namespace ~attrs collector ->
22
+
C.start_element state ~name ~namespace ~attrs collector);
23
+
end_element = (fun ~name ~namespace collector ->
24
+
C.end_element state ~name ~namespace collector);
25
+
characters = (fun text collector ->
26
+
C.characters state text collector);
27
+
end_document = (fun collector ->
28
+
C.end_document state collector);
29
+
}
30
+
31
+
(** Walk a DOM node with a single checker state. *)
32
+
let rec walk_node_single cs collector node =
33
+
let open Html5rw.Dom in
34
+
match node.name with
35
+
| "#text" ->
36
+
(* Text node: emit characters event *)
37
+
cs.characters node.data collector
38
+
| "#comment" ->
39
+
(* Comment node: emit characters event with comment text *)
40
+
cs.characters node.data collector
41
+
| "#document" | "#document-fragment" ->
42
+
(* Document/fragment nodes: just traverse children *)
43
+
List.iter (walk_node_single cs collector) node.children
44
+
| "!doctype" ->
45
+
(* Doctype node: skip (no validation events for doctype) *)
46
+
()
47
+
| _ ->
48
+
(* Element node: emit start, traverse children, emit end *)
49
+
cs.start_element ~name:node.name ~namespace:node.namespace ~attrs:node.attrs collector;
50
+
List.iter (walk_node_single cs collector) node.children;
51
+
cs.end_element ~name:node.name ~namespace:node.namespace collector
52
+
53
+
let walk checker collector node =
54
+
let cs = make_checker_state checker in
55
+
walk_node_single cs collector node;
56
+
cs.end_document collector
57
+
58
+
(** Walk a DOM node with multiple checker states. *)
59
+
let rec walk_node_all css collector node =
60
+
let open Html5rw.Dom in
61
+
match node.name with
62
+
| "#text" ->
63
+
(* Text node: emit characters event to all checkers *)
64
+
List.iter (fun cs -> cs.characters node.data collector) css
65
+
| "#comment" ->
66
+
(* Comment node: emit characters event with comment text to all checkers *)
67
+
List.iter (fun cs -> cs.characters node.data collector) css
68
+
| "#document" | "#document-fragment" ->
69
+
(* Document/fragment nodes: just traverse children *)
70
+
List.iter (walk_node_all css collector) node.children
71
+
| "!doctype" ->
72
+
(* Doctype node: skip *)
73
+
()
74
+
| _ ->
75
+
(* Element node: emit start to all checkers, traverse children, emit end to all *)
76
+
List.iter (fun cs ->
77
+
cs.start_element ~name:node.name ~namespace:node.namespace ~attrs:node.attrs collector
78
+
) css;
79
+
List.iter (walk_node_all css collector) node.children;
80
+
List.iter (fun cs ->
81
+
cs.end_element ~name:node.name ~namespace:node.namespace collector
82
+
) css
83
+
84
+
let walk_all checkers collector node =
85
+
(* Create checker state packages *)
86
+
let css = List.map make_checker_state checkers in
87
+
(* Traverse with all checkers *)
88
+
walk_node_all css collector node;
89
+
(* Call end_document on all checkers *)
90
+
List.iter (fun cs -> cs.end_document collector) css
91
+
92
+
let walk_registry registry collector node =
93
+
let checkers = Checker_registry.all registry in
94
+
walk_all checkers collector node
+197
lib/html5_checker/dom_walker.mli
+197
lib/html5_checker/dom_walker.mli
···
1
+
(** DOM tree traversal for HTML5 conformance checking.
2
+
3
+
This module provides functions to traverse DOM trees and apply checkers
4
+
to validate HTML5 documents. It implements a depth-first, in-order
5
+
traversal that visits every node in the tree and notifies checkers
6
+
of traversal events.
7
+
8
+
{2 Traversal Model}
9
+
10
+
The walker follows a SAX-like event model, emitting events as it
11
+
encounters different node types during traversal:
12
+
13
+
{v
14
+
Document
15
+
└── html (start_element "html")
16
+
├── head (start_element "head")
17
+
│ └── title (start_element "title")
18
+
│ ├── #text "Page Title" (characters)
19
+
│ └── (end_element "title")
20
+
└── body (start_element "body")
21
+
└── p (start_element "p")
22
+
├── #text "Hello " (characters)
23
+
├── b (start_element "b")
24
+
│ ├── #text "world" (characters)
25
+
│ └── (end_element "b")
26
+
├── #text "!" (characters)
27
+
└── (end_element "p")
28
+
end_document
29
+
v}
30
+
31
+
{2 Event Sequence}
32
+
33
+
For each element node:
34
+
1. {!Checker.S.start_element} is called when entering the element
35
+
2. Children are recursively traversed
36
+
3. {!Checker.S.end_element} is called when exiting the element
37
+
38
+
For text and comment nodes:
39
+
- {!Checker.S.characters} is called with the text content
40
+
41
+
After the entire tree is traversed:
42
+
- {!Checker.S.end_document} is called on all checkers
43
+
44
+
{2 Checker Coordination}
45
+
46
+
When multiple checkers are used:
47
+
- All checkers receive the same event sequence
48
+
- Events are delivered to checkers in the order they appear in the list
49
+
- Each checker maintains independent state
50
+
- Messages from all checkers are collected together
51
+
52
+
This allows composing orthogonal validation rules without interference.
53
+
54
+
{2 Usage Examples}
55
+
56
+
{b Single checker:}
57
+
{[
58
+
let checker = Checker.noop () in
59
+
let collector = Message_collector.create () in
60
+
walk checker collector dom;
61
+
let messages = Message_collector.messages collector in
62
+
List.iter Message.pp messages
63
+
]}
64
+
65
+
{b Multiple checkers:}
66
+
{[
67
+
let checkers = [checker1; checker2; checker3] in
68
+
let collector = Message_collector.create () in
69
+
walk_all checkers collector dom;
70
+
(* Analyze messages from all checkers *)
71
+
]}
72
+
73
+
{b Registry of checkers:}
74
+
{[
75
+
let registry = Checker_registry.default () in
76
+
let collector = Message_collector.create () in
77
+
walk_registry registry collector dom;
78
+
(* All registered checkers have validated the DOM *)
79
+
]} *)
80
+
81
+
(** {1 Single Checker Traversal} *)
82
+
83
+
val walk : Checker.t -> Message_collector.t -> Html5rw.Dom.node -> unit
84
+
(** [walk checker collector node] traverses a DOM tree with a single checker.
85
+
86
+
@param checker The checker to apply during traversal
87
+
@param collector The message collector for validation messages
88
+
@param node The root node to start traversal from
89
+
90
+
The traversal is depth-first and in-order: for each element, the
91
+
checker receives a {!Checker.S.start_element} event, then children
92
+
are recursively traversed, then an {!Checker.S.end_element} event
93
+
is emitted.
94
+
95
+
After the entire tree is traversed, {!Checker.S.end_document} is
96
+
called to allow the checker to emit any final validation messages.
97
+
98
+
{b Example:}
99
+
{[
100
+
(* Validate a parsed HTML document *)
101
+
let checker = Checker.noop () in
102
+
let collector = Message_collector.create () in
103
+
walk checker collector document_node;
104
+
105
+
(* Check for errors *)
106
+
let messages = Message_collector.messages collector in
107
+
let errors = List.filter
108
+
(fun msg -> msg.Message.severity = Message.Error)
109
+
messages in
110
+
if errors <> [] then
111
+
Printf.printf "Found %d errors\n" (List.length errors)
112
+
]}
113
+
114
+
{b Notes:}
115
+
- Only element nodes trigger start/end events
116
+
- Text and comment nodes trigger character events
117
+
- Document and doctype nodes are silently skipped
118
+
- The traversal follows document order (parent before children,
119
+
earlier siblings before later ones) *)
120
+
121
+
(** {1 Multiple Checker Traversal} *)
122
+
123
+
val walk_all :
124
+
Checker.t list -> Message_collector.t -> Html5rw.Dom.node -> unit
125
+
(** [walk_all checkers collector node] traverses a DOM tree with multiple
126
+
checkers.
127
+
128
+
@param checkers List of checkers to apply during traversal
129
+
@param collector The message collector for validation messages
130
+
@param node The root node to start traversal from
131
+
132
+
This performs a single tree traversal, delivering each event to all
133
+
checkers in sequence. This is more efficient than calling {!walk}
134
+
multiple times.
135
+
136
+
All checkers receive events in the order they appear in the list.
137
+
Each checker maintains independent state, so validation rules can
138
+
be composed without interference.
139
+
140
+
{b Example:}
141
+
{[
142
+
(* Run multiple validation passes in one traversal *)
143
+
let structure_checker = (module StructureChecker : Checker.S) in
144
+
let attribute_checker = (module AttributeChecker : Checker.S) in
145
+
let obsolete_checker = (module ObsoleteChecker : Checker.S) in
146
+
147
+
let checkers = [structure_checker; attribute_checker; obsolete_checker] in
148
+
let collector = Message_collector.create () in
149
+
150
+
walk_all checkers collector document_node;
151
+
152
+
(* All three checkers have validated the document *)
153
+
let messages = Message_collector.messages collector in
154
+
Message_format.print_messages messages
155
+
]}
156
+
157
+
{b Empty list behavior:}
158
+
If the checkers list is empty, the tree is traversed but no validation
159
+
is performed. This is equivalent to calling [walk (Checker.noop ()) ...]. *)
160
+
161
+
(** {1 Registry-Based Traversal} *)
162
+
163
+
val walk_registry :
164
+
Checker_registry.t -> Message_collector.t -> Html5rw.Dom.node -> unit
165
+
(** [walk_registry registry collector node] traverses a DOM tree with all
166
+
checkers from a registry.
167
+
168
+
@param registry The registry containing checkers to apply
169
+
@param collector The message collector for validation messages
170
+
@param node The root node to start traversal from
171
+
172
+
This is equivalent to:
173
+
{[
174
+
let checkers = Checker_registry.all registry in
175
+
walk_all checkers collector node
176
+
]}
177
+
178
+
Use this when you want to run a pre-configured set of checkers
179
+
without manually extracting them from the registry.
180
+
181
+
{b Example:}
182
+
{[
183
+
(* Set up registry with desired checkers *)
184
+
let registry = Checker_registry.default () in
185
+
Checker_registry.register registry "custom" my_checker;
186
+
187
+
(* Validate multiple documents with same checker set *)
188
+
List.iter (fun doc ->
189
+
let collector = Message_collector.create () in
190
+
walk_registry registry collector doc;
191
+
report_results collector
192
+
) documents
193
+
]}
194
+
195
+
{b Empty registry behavior:}
196
+
If the registry is empty, the tree is traversed but no validation
197
+
is performed. *)
+7
lib/html5_checker/dune
+7
lib/html5_checker/dune
+78
lib/html5_checker/html5_checker.ml
+78
lib/html5_checker/html5_checker.ml
···
1
+
(*---------------------------------------------------------------------------
2
+
Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
3
+
SPDX-License-Identifier: MIT
4
+
---------------------------------------------------------------------------*)
5
+
6
+
module Message = Message
7
+
module Message_collector = Message_collector
8
+
module Message_format = Message_format
9
+
module Parse_error_bridge = Parse_error_bridge
10
+
module Content_category = Content_category
11
+
module Content_model = Content_model
12
+
module Attr_spec = Attr_spec
13
+
module Element_spec = Element_spec
14
+
15
+
type t = {
16
+
doc : Html5rw.t;
17
+
msgs : Message.t list;
18
+
system_id : string option;
19
+
}
20
+
21
+
let check ?(collect_parse_errors = true) ?system_id reader =
22
+
let doc = Html5rw.parse ~collect_errors:collect_parse_errors reader in
23
+
let collector = Message_collector.create () in
24
+
25
+
(* Add parse errors if collected *)
26
+
if collect_parse_errors then begin
27
+
let parse_errors = Parse_error_bridge.collect_parse_errors ?system_id doc in
28
+
List.iter (Message_collector.add collector) parse_errors
29
+
end;
30
+
31
+
(* TODO: Run checkers via dom_walker when available *)
32
+
(* Dom_walker.walk_registry registry (Html5rw.root doc) collector; *)
33
+
34
+
{ doc; msgs = Message_collector.messages collector; system_id }
35
+
36
+
let check_dom ?(collect_parse_errors = true) ?system_id doc =
37
+
let collector = Message_collector.create () in
38
+
39
+
(* Add parse errors if requested *)
40
+
if collect_parse_errors then begin
41
+
let parse_errors = Parse_error_bridge.collect_parse_errors ?system_id doc in
42
+
List.iter (Message_collector.add collector) parse_errors
43
+
end;
44
+
45
+
(* TODO: Run checkers via dom_walker when available *)
46
+
(* Dom_walker.walk_registry registry (Html5rw.root doc) collector; *)
47
+
48
+
{ doc; msgs = Message_collector.messages collector; system_id }
49
+
50
+
let messages t = t.msgs
51
+
52
+
let errors t =
53
+
List.filter
54
+
(fun msg -> msg.Message.severity = Message.Error)
55
+
t.msgs
56
+
57
+
let warnings t =
58
+
List.filter
59
+
(fun msg -> msg.Message.severity = Message.Warning)
60
+
t.msgs
61
+
62
+
let has_errors t =
63
+
List.exists
64
+
(fun msg -> msg.Message.severity = Message.Error)
65
+
t.msgs
66
+
67
+
let document t = t.doc
68
+
69
+
let system_id t = t.system_id
70
+
71
+
let format_text t =
72
+
Message_format.format_text ?system_id:t.system_id t.msgs
73
+
74
+
let format_json t =
75
+
Message_format.format_json ?system_id:t.system_id t.msgs
76
+
77
+
let format_gnu t =
78
+
Message_format.format_gnu ?system_id:t.system_id t.msgs
+102
lib/html5_checker/html5_checker.mli
+102
lib/html5_checker/html5_checker.mli
···
1
+
(*---------------------------------------------------------------------------
2
+
Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
3
+
SPDX-License-Identifier: MIT
4
+
---------------------------------------------------------------------------*)
5
+
6
+
(** HTML5 conformance checker.
7
+
8
+
This module provides HTML5 validation and conformance checking,
9
+
combining parse error detection with structural validation rules. *)
10
+
11
+
(** {1 Re-exported modules} *)
12
+
13
+
(** Validation message types and constructors. *)
14
+
module Message = Message
15
+
16
+
(** Message collection utilities. *)
17
+
module Message_collector = Message_collector
18
+
19
+
(** Message output formatters. *)
20
+
module Message_format = Message_format
21
+
22
+
(** Parse error bridge. *)
23
+
module Parse_error_bridge = Parse_error_bridge
24
+
25
+
(** {2 Content Model Framework} *)
26
+
27
+
(** HTML5 content categories. *)
28
+
module Content_category = Content_category
29
+
30
+
(** HTML5 element content models. *)
31
+
module Content_model = Content_model
32
+
33
+
(** HTML5 attribute specifications. *)
34
+
module Attr_spec = Attr_spec
35
+
36
+
(** HTML5 element specifications. *)
37
+
module Element_spec = Element_spec
38
+
39
+
(** {1 Core Types} *)
40
+
41
+
(** Result of checking an HTML document. *)
42
+
type t
43
+
44
+
(** {1 Checking Functions} *)
45
+
46
+
(** Parse and validate HTML from a reader.
47
+
48
+
This function parses the HTML input and optionally collects parse errors.
49
+
Future versions will also run conformance checkers on the resulting DOM.
50
+
51
+
@param collect_parse_errors If true, collect and include parse errors. Default: true.
52
+
@param system_id Optional file path or URL for error reporting.
53
+
@param reader Bytesrw reader containing HTML input. *)
54
+
val check :
55
+
?collect_parse_errors:bool ->
56
+
?system_id:string ->
57
+
Bytesrw.Bytes.Reader.t ->
58
+
t
59
+
60
+
(** Validate an already-parsed HTML document.
61
+
62
+
This function takes an existing Html5rw.t parse result and validates it.
63
+
64
+
@param collect_parse_errors If true, collect and include parse errors from the result. Default: true.
65
+
@param system_id Optional file path or URL for error reporting.
66
+
@param result Already-parsed HTML document. *)
67
+
val check_dom :
68
+
?collect_parse_errors:bool ->
69
+
?system_id:string ->
70
+
Html5rw.t ->
71
+
t
72
+
73
+
(** {1 Result Accessors} *)
74
+
75
+
(** Get all validation messages. *)
76
+
val messages : t -> Message.t list
77
+
78
+
(** Get only error messages. *)
79
+
val errors : t -> Message.t list
80
+
81
+
(** Get only warning messages. *)
82
+
val warnings : t -> Message.t list
83
+
84
+
(** Check if there are any errors. *)
85
+
val has_errors : t -> bool
86
+
87
+
(** Get the underlying parsed document. *)
88
+
val document : t -> Html5rw.t
89
+
90
+
(** Get the system identifier if set. *)
91
+
val system_id : t -> string option
92
+
93
+
(** {1 Formatting} *)
94
+
95
+
(** Format messages as human-readable text. *)
96
+
val format_text : t -> string
97
+
98
+
(** Format messages as JSON. *)
99
+
val format_json : t -> string
100
+
101
+
(** Format messages in GNU style. *)
102
+
val format_gnu : t -> string
lib/html5_checker/message.cmi
lib/html5_checker/message.cmi
This is a binary file and will not be displayed.
+80
lib/html5_checker/message.ml
+80
lib/html5_checker/message.ml
···
1
+
type severity = Error | Warning | Info
2
+
3
+
type location = {
4
+
line : int;
5
+
column : int;
6
+
end_line : int option;
7
+
end_column : int option;
8
+
system_id : string option;
9
+
}
10
+
11
+
type t = {
12
+
severity : severity;
13
+
message : string;
14
+
code : string option;
15
+
location : location option;
16
+
element : string option;
17
+
attribute : string option;
18
+
extract : string option;
19
+
}
20
+
21
+
let make ~severity ~message ?code ?location ?element ?attribute ?extract () =
22
+
{ severity; message; code; location; element; attribute; extract }
23
+
24
+
let error ~message ?code ?location ?element ?attribute ?extract () =
25
+
make ~severity:Error ~message ?code ?location ?element ?attribute ?extract ()
26
+
27
+
let warning ~message ?code ?location ?element ?attribute ?extract () =
28
+
make ~severity:Warning ~message ?code ?location ?element ?attribute ?extract
29
+
()
30
+
31
+
let info ~message ?code ?location ?element ?attribute ?extract () =
32
+
make ~severity:Info ~message ?code ?location ?element ?attribute ?extract ()
33
+
34
+
let make_location ~line ~column ?end_line ?end_column ?system_id () =
35
+
{ line; column; end_line; end_column; system_id }
36
+
37
+
let severity_to_string = function
38
+
| Error -> "error"
39
+
| Warning -> "warning"
40
+
| Info -> "info"
41
+
42
+
let pp_severity fmt severity =
43
+
Format.pp_print_string fmt (severity_to_string severity)
44
+
45
+
let pp_location fmt loc =
46
+
match loc.system_id with
47
+
| Some sid -> Format.fprintf fmt "%s:" sid
48
+
| None -> ();
49
+
Format.fprintf fmt "%d:%d" loc.line loc.column;
50
+
match (loc.end_line, loc.end_column) with
51
+
| Some el, Some ec when el = loc.line && ec > loc.column ->
52
+
Format.fprintf fmt "-%d" ec
53
+
| Some el, Some ec when el > loc.line ->
54
+
Format.fprintf fmt "-%d:%d" el ec
55
+
| _ -> ()
56
+
57
+
let pp fmt msg =
58
+
(match msg.location with
59
+
| Some loc ->
60
+
pp_location fmt loc;
61
+
Format.fprintf fmt ": "
62
+
| None -> ());
63
+
pp_severity fmt msg.severity;
64
+
(match msg.code with
65
+
| Some code -> Format.fprintf fmt " [%s]" code
66
+
| None -> ());
67
+
Format.fprintf fmt ": %s" msg.message;
68
+
(match msg.element with
69
+
| Some elem -> Format.fprintf fmt " (element: %s)" elem
70
+
| None -> ());
71
+
match msg.attribute with
72
+
| Some attr -> Format.fprintf fmt " (attribute: %s)" attr
73
+
| None -> ()
74
+
75
+
let to_string msg =
76
+
let buf = Buffer.create 256 in
77
+
let fmt = Format.formatter_of_buffer buf in
78
+
pp fmt msg;
79
+
Format.pp_print_flush fmt ();
80
+
Buffer.contents buf
+104
lib/html5_checker/message.mli
+104
lib/html5_checker/message.mli
···
1
+
(** HTML5 validation messages.
2
+
3
+
This module provides types for validation messages including errors,
4
+
warnings, and informational messages with source location tracking. *)
5
+
6
+
(** Message severity levels. *)
7
+
type severity =
8
+
| Error (** Conformance error - document is invalid *)
9
+
| Warning (** Conformance warning - likely problematic *)
10
+
| Info (** Informational - suggestions for improvement *)
11
+
12
+
(** Source location information. *)
13
+
type location = {
14
+
line : int; (** 1-indexed line number *)
15
+
column : int; (** 1-indexed column number *)
16
+
end_line : int option; (** Optional end line for ranges *)
17
+
end_column : int option; (** Optional end column *)
18
+
system_id : string option; (** File path or URL *)
19
+
}
20
+
21
+
(** A validation message. *)
22
+
type t = {
23
+
severity : severity;
24
+
message : string; (** Human-readable description *)
25
+
code : string option; (** Machine-readable error code *)
26
+
location : location option;
27
+
element : string option; (** Element name if relevant *)
28
+
attribute : string option; (** Attribute name if relevant *)
29
+
extract : string option; (** Source excerpt *)
30
+
}
31
+
32
+
(** {1 Constructors} *)
33
+
34
+
(** Create a validation message with specified severity. *)
35
+
val make :
36
+
severity:severity ->
37
+
message:string ->
38
+
?code:string ->
39
+
?location:location ->
40
+
?element:string ->
41
+
?attribute:string ->
42
+
?extract:string ->
43
+
unit ->
44
+
t
45
+
46
+
(** Create an error message. *)
47
+
val error :
48
+
message:string ->
49
+
?code:string ->
50
+
?location:location ->
51
+
?element:string ->
52
+
?attribute:string ->
53
+
?extract:string ->
54
+
unit ->
55
+
t
56
+
57
+
(** Create a warning message. *)
58
+
val warning :
59
+
message:string ->
60
+
?code:string ->
61
+
?location:location ->
62
+
?element:string ->
63
+
?attribute:string ->
64
+
?extract:string ->
65
+
unit ->
66
+
t
67
+
68
+
(** Create an informational message. *)
69
+
val info :
70
+
message:string ->
71
+
?code:string ->
72
+
?location:location ->
73
+
?element:string ->
74
+
?attribute:string ->
75
+
?extract:string ->
76
+
unit ->
77
+
t
78
+
79
+
(** Create a location record. *)
80
+
val make_location :
81
+
line:int ->
82
+
column:int ->
83
+
?end_line:int ->
84
+
?end_column:int ->
85
+
?system_id:string ->
86
+
unit ->
87
+
location
88
+
89
+
(** {1 Formatting} *)
90
+
91
+
(** Convert severity to string representation. *)
92
+
val severity_to_string : severity -> string
93
+
94
+
(** Pretty-print severity. *)
95
+
val pp_severity : Format.formatter -> severity -> unit
96
+
97
+
(** Pretty-print location. *)
98
+
val pp_location : Format.formatter -> location -> unit
99
+
100
+
(** Pretty-print a validation message. *)
101
+
val pp : Format.formatter -> t -> unit
102
+
103
+
(** Convert a validation message to a string. *)
104
+
val to_string : t -> string
+38
lib/html5_checker/message_collector.ml
+38
lib/html5_checker/message_collector.ml
···
1
+
type t = { mutable messages : Message.t list }
2
+
3
+
let create () = { messages = [] }
4
+
5
+
let add t msg = t.messages <- msg :: t.messages
6
+
7
+
let add_error t ~message ?code ?location ?element ?attribute ?extract () =
8
+
let msg =
9
+
Message.error ~message ?code ?location ?element ?attribute ?extract ()
10
+
in
11
+
add t msg
12
+
13
+
let add_warning t ~message ?code ?location ?element ?attribute ?extract () =
14
+
let msg =
15
+
Message.warning ~message ?code ?location ?element ?attribute ?extract ()
16
+
in
17
+
add t msg
18
+
19
+
let messages t = List.rev t.messages
20
+
21
+
let errors t =
22
+
List.filter (fun msg -> msg.Message.severity = Message.Error) (messages t)
23
+
24
+
let warnings t =
25
+
List.filter (fun msg -> msg.Message.severity = Message.Warning) (messages t)
26
+
27
+
let has_errors t =
28
+
List.exists (fun msg -> msg.Message.severity = Message.Error) t.messages
29
+
30
+
let count t = List.length t.messages
31
+
32
+
let error_count t =
33
+
List.fold_left
34
+
(fun acc msg ->
35
+
if msg.Message.severity = Message.Error then acc + 1 else acc)
36
+
0 t.messages
37
+
38
+
let clear t = t.messages <- []
+65
lib/html5_checker/message_collector.mli
+65
lib/html5_checker/message_collector.mli
···
1
+
(** Message collector for accumulating validation messages. *)
2
+
3
+
(** The type of a message collector. *)
4
+
type t
5
+
6
+
(** {1 Creation} *)
7
+
8
+
(** Create a new empty message collector. *)
9
+
val create : unit -> t
10
+
11
+
(** {1 Adding Messages} *)
12
+
13
+
(** Add a message to the collector. *)
14
+
val add : t -> Message.t -> unit
15
+
16
+
(** Add an error message to the collector. *)
17
+
val add_error :
18
+
t ->
19
+
message:string ->
20
+
?code:string ->
21
+
?location:Message.location ->
22
+
?element:string ->
23
+
?attribute:string ->
24
+
?extract:string ->
25
+
unit ->
26
+
unit
27
+
28
+
(** Add a warning message to the collector. *)
29
+
val add_warning :
30
+
t ->
31
+
message:string ->
32
+
?code:string ->
33
+
?location:Message.location ->
34
+
?element:string ->
35
+
?attribute:string ->
36
+
?extract:string ->
37
+
unit ->
38
+
unit
39
+
40
+
(** {1 Retrieving Messages} *)
41
+
42
+
(** Get all messages in the order they were added. *)
43
+
val messages : t -> Message.t list
44
+
45
+
(** Get only error messages. *)
46
+
val errors : t -> Message.t list
47
+
48
+
(** Get only warning messages. *)
49
+
val warnings : t -> Message.t list
50
+
51
+
(** {1 Status Queries} *)
52
+
53
+
(** Check if the collector contains any error messages. *)
54
+
val has_errors : t -> bool
55
+
56
+
(** Get the total number of messages. *)
57
+
val count : t -> int
58
+
59
+
(** Get the number of error messages. *)
60
+
val error_count : t -> int
61
+
62
+
(** {1 Modification} *)
63
+
64
+
(** Clear all messages from the collector. *)
65
+
val clear : t -> unit
+130
lib/html5_checker/message_format.ml
+130
lib/html5_checker/message_format.ml
···
1
+
let format_text ?system_id messages =
2
+
let buf = Buffer.create 1024 in
3
+
List.iter
4
+
(fun msg ->
5
+
let loc_str =
6
+
match msg.Message.location with
7
+
| Some loc -> (
8
+
let sid =
9
+
match loc.Message.system_id with
10
+
| Some s -> s
11
+
| None -> (
12
+
match system_id with Some s -> s | None -> "input")
13
+
in
14
+
let col_info =
15
+
match (loc.end_line, loc.end_column) with
16
+
| Some el, Some ec when el = loc.line && ec > loc.column ->
17
+
Printf.sprintf "%d.%d-%d" loc.line loc.column ec
18
+
| Some el, Some ec when el > loc.line ->
19
+
Printf.sprintf "%d.%d-%d.%d" loc.line loc.column el ec
20
+
| _ -> Printf.sprintf "%d.%d" loc.line loc.column
21
+
in
22
+
Printf.sprintf "%s:%s" sid col_info)
23
+
| None -> (
24
+
match system_id with Some s -> s | None -> "input")
25
+
in
26
+
let severity_str = Message.severity_to_string msg.Message.severity in
27
+
let code_str =
28
+
match msg.Message.code with Some c -> " [" ^ c ^ "]" | None -> ""
29
+
in
30
+
let elem_str =
31
+
match msg.Message.element with
32
+
| Some e -> " (element: " ^ e ^ ")"
33
+
| None -> ""
34
+
in
35
+
let attr_str =
36
+
match msg.Message.attribute with
37
+
| Some a -> " (attribute: " ^ a ^ ")"
38
+
| None -> ""
39
+
in
40
+
Buffer.add_string buf
41
+
(Printf.sprintf "%s: %s%s: %s%s%s\n" loc_str severity_str code_str
42
+
msg.Message.message elem_str attr_str))
43
+
messages;
44
+
Buffer.contents buf
45
+
46
+
let format_gnu ?system_id messages =
47
+
let buf = Buffer.create 1024 in
48
+
List.iter
49
+
(fun msg ->
50
+
let loc_str =
51
+
match msg.Message.location with
52
+
| Some loc -> (
53
+
let sid =
54
+
match loc.Message.system_id with
55
+
| Some s -> s
56
+
| None -> (
57
+
match system_id with Some s -> s | None -> "input")
58
+
in
59
+
Printf.sprintf "%s:%d:%d" sid loc.line loc.column)
60
+
| None -> (
61
+
match system_id with Some s -> s ^ ":0:0" | None -> "input:0:0")
62
+
in
63
+
let severity_str = Message.severity_to_string msg.Message.severity in
64
+
let code_str =
65
+
match msg.Message.code with Some c -> " [" ^ c ^ "]" | None -> ""
66
+
in
67
+
Buffer.add_string buf
68
+
(Printf.sprintf "%s: %s%s: %s\n" loc_str severity_str code_str
69
+
msg.Message.message))
70
+
messages;
71
+
Buffer.contents buf
72
+
73
+
let message_to_json ?system_id msg =
74
+
let open Jsont in
75
+
let severity = String (Message.severity_to_string msg.Message.severity, Meta.none) in
76
+
let message_text = String (msg.Message.message, Meta.none) in
77
+
let base = [ (("type", Meta.none), severity); (("message", Meta.none), message_text) ] in
78
+
let with_code =
79
+
match msg.Message.code with
80
+
| Some c -> (("subType", Meta.none), String (c, Meta.none)) :: base
81
+
| None -> base
82
+
in
83
+
let with_location =
84
+
match msg.Message.location with
85
+
| Some loc ->
86
+
let line = Number (float_of_int loc.Message.line, Meta.none) in
87
+
let column = Number (float_of_int loc.Message.column, Meta.none) in
88
+
let loc_fields =
89
+
[ (("firstLine", Meta.none), line); (("firstColumn", Meta.none), column) ]
90
+
in
91
+
let loc_fields =
92
+
match loc.Message.end_line with
93
+
| Some el ->
94
+
(("lastLine", Meta.none), Number (float_of_int el, Meta.none)) :: loc_fields
95
+
| None -> loc_fields
96
+
in
97
+
let loc_fields =
98
+
match loc.Message.end_column with
99
+
| Some ec ->
100
+
(("lastColumn", Meta.none), Number (float_of_int ec, Meta.none))
101
+
:: loc_fields
102
+
| None -> loc_fields
103
+
in
104
+
let url =
105
+
match loc.Message.system_id with
106
+
| Some s -> s
107
+
| None -> (
108
+
match system_id with Some s -> s | None -> "input")
109
+
in
110
+
(("url", Meta.none), String (url, Meta.none)) :: loc_fields @ with_code
111
+
| None ->
112
+
let url =
113
+
match system_id with Some s -> s | None -> "input"
114
+
in
115
+
(("url", Meta.none), String (url, Meta.none)) :: with_code
116
+
in
117
+
let with_extract =
118
+
match msg.Message.extract with
119
+
| Some e -> (("extract", Meta.none), String (e, Meta.none)) :: with_location
120
+
| None -> with_location
121
+
in
122
+
Object (with_extract, Meta.none)
123
+
124
+
let format_json ?system_id messages =
125
+
let open Jsont in
126
+
let msg_array = Array (List.map (message_to_json ?system_id) messages, Meta.none) in
127
+
let obj = Object ([ (("messages", Meta.none), msg_array) ], Meta.none) in
128
+
match Jsont_bytesrw.encode_string ~format:Minify json obj with
129
+
| Ok s -> s
130
+
| Error e -> failwith ("JSON encoding error: " ^ e)
+28
lib/html5_checker/message_format.mli
+28
lib/html5_checker/message_format.mli
···
1
+
(** Message output formatters.
2
+
3
+
This module provides various output formats for validation messages,
4
+
including text, JSON, and GNU-style formats for IDE integration. *)
5
+
6
+
(** {1 Formatters} *)
7
+
8
+
(** Format messages as human-readable text.
9
+
10
+
Output format: [file:line:col: severity: message]
11
+
12
+
@param system_id Optional default system identifier for messages without location. *)
13
+
val format_text : ?system_id:string -> Message.t list -> string
14
+
15
+
(** Format messages as JSON.
16
+
17
+
Produces output compatible with the Nu HTML Validator JSON format.
18
+
19
+
@param system_id Optional default system identifier for messages without location. *)
20
+
val format_json : ?system_id:string -> Message.t list -> string
21
+
22
+
(** Format messages in GNU style for IDE integration.
23
+
24
+
Output format follows GNU conventions for error messages, compatible
25
+
with most IDEs and editors.
26
+
27
+
@param system_id Optional default system identifier for messages without location. *)
28
+
val format_gnu : ?system_id:string -> Message.t list -> string
+22
lib/html5_checker/parse_error_bridge.ml
+22
lib/html5_checker/parse_error_bridge.ml
···
1
+
(*---------------------------------------------------------------------------
2
+
Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
3
+
SPDX-License-Identifier: MIT
4
+
---------------------------------------------------------------------------*)
5
+
6
+
let of_parse_error ?system_id err =
7
+
let code = Html5rw.error_code err in
8
+
let line = Html5rw.error_line err in
9
+
let column = Html5rw.error_column err in
10
+
let location =
11
+
Message.make_location ~line ~column ?system_id ()
12
+
in
13
+
let code_str = Html5rw.Parse_error_code.to_string code in
14
+
Message.error
15
+
~message:(Printf.sprintf "Parse error: %s" code_str)
16
+
~code:code_str
17
+
~location
18
+
()
19
+
20
+
let collect_parse_errors ?system_id result =
21
+
let errors = Html5rw.errors result in
22
+
List.map (of_parse_error ?system_id) errors
+25
lib/html5_checker/parse_error_bridge.mli
+25
lib/html5_checker/parse_error_bridge.mli
···
1
+
(*---------------------------------------------------------------------------
2
+
Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
3
+
SPDX-License-Identifier: MIT
4
+
---------------------------------------------------------------------------*)
5
+
6
+
(** Bridge between Html5rw parse errors and validation messages.
7
+
8
+
This module converts parse errors from the Html5rw parser into
9
+
standardized validation messages. *)
10
+
11
+
(** Convert a parse error to a validation message.
12
+
13
+
Extracts error code, line, column, and creates a Message.t with
14
+
severity set to Error.
15
+
16
+
@param system_id Optional file path or URL to include in location *)
17
+
val of_parse_error : ?system_id:string -> Html5rw.parse_error -> Message.t
18
+
19
+
(** Collect all parse errors from a parse result.
20
+
21
+
Extracts all parse errors from the Html5rw.t result and converts
22
+
them to validation messages.
23
+
24
+
@param system_id Optional file path or URL to include in locations *)
25
+
val collect_parse_errors : ?system_id:string -> Html5rw.t -> Message.t list
+246
lib/html5_checker/semantic/form_checker.ml
+246
lib/html5_checker/semantic/form_checker.ml
···
1
+
(** Form-related validation checker implementation. *)
2
+
3
+
type state = {
4
+
mutable in_form : bool;
5
+
(** Track if we're currently inside a <form> element *)
6
+
mutable form_ids : string list;
7
+
(** Stack of form IDs we're currently nested in *)
8
+
mutable label_for_refs : string list;
9
+
(** Collect all label[for] references to validate later *)
10
+
mutable element_ids : string list;
11
+
(** Collect all element IDs to validate label references *)
12
+
mutable unlabeled_controls : (string * string option) list;
13
+
(** Controls that might need labels: (type, id) *)
14
+
}
15
+
16
+
let create () =
17
+
{
18
+
in_form = false;
19
+
form_ids = [];
20
+
label_for_refs = [];
21
+
element_ids = [];
22
+
unlabeled_controls = [];
23
+
}
24
+
25
+
let reset state =
26
+
state.in_form <- false;
27
+
state.form_ids <- [];
28
+
state.label_for_refs <- [];
29
+
state.element_ids <- [];
30
+
state.unlabeled_controls <- []
31
+
32
+
(** Check if an attribute list contains a specific attribute. *)
33
+
let has_attr name attrs =
34
+
List.exists (fun (attr_name, _) -> String.equal attr_name name) attrs
35
+
36
+
(** Get the value of an attribute if present. *)
37
+
let get_attr name attrs =
38
+
List.find_map
39
+
(fun (attr_name, value) ->
40
+
if String.equal attr_name name then Some value else None)
41
+
attrs
42
+
43
+
(** Check if an element is labelable. *)
44
+
let _is_labelable_element name input_type =
45
+
match name with
46
+
| "button" | "meter" | "output" | "progress" | "select" | "textarea" -> true
47
+
| "input" -> (
48
+
match input_type with Some "hidden" -> false | _ -> true)
49
+
| _ -> false
50
+
51
+
(** Valid autocomplete tokens for various input types. *)
52
+
let valid_autocomplete_tokens =
53
+
[
54
+
"on";
55
+
"off";
56
+
"name";
57
+
"honorific-prefix";
58
+
"given-name";
59
+
"additional-name";
60
+
"family-name";
61
+
"honorific-suffix";
62
+
"nickname";
63
+
"email";
64
+
"username";
65
+
"new-password";
66
+
"current-password";
67
+
"one-time-code";
68
+
"organization-title";
69
+
"organization";
70
+
"street-address";
71
+
"address-line1";
72
+
"address-line2";
73
+
"address-line3";
74
+
"address-level4";
75
+
"address-level3";
76
+
"address-level2";
77
+
"address-level1";
78
+
"country";
79
+
"country-name";
80
+
"postal-code";
81
+
"cc-name";
82
+
"cc-given-name";
83
+
"cc-additional-name";
84
+
"cc-family-name";
85
+
"cc-number";
86
+
"cc-exp";
87
+
"cc-exp-month";
88
+
"cc-exp-year";
89
+
"cc-csc";
90
+
"cc-type";
91
+
"transaction-currency";
92
+
"transaction-amount";
93
+
"language";
94
+
"bday";
95
+
"bday-day";
96
+
"bday-month";
97
+
"bday-year";
98
+
"sex";
99
+
"tel";
100
+
"tel-country-code";
101
+
"tel-national";
102
+
"tel-area-code";
103
+
"tel-local";
104
+
"tel-extension";
105
+
"impp";
106
+
"url";
107
+
"photo";
108
+
]
109
+
110
+
let check_autocomplete_value value _input_type collector =
111
+
(* Parse autocomplete value - can be space-separated tokens *)
112
+
let tokens = String.split_on_char ' ' value |> List.map String.trim in
113
+
let tokens = List.filter (fun s -> String.length s > 0) tokens in
114
+
115
+
(* The last token should be a valid autocomplete token *)
116
+
match List.rev tokens with
117
+
| [] -> ()
118
+
| last_token :: _prefix_tokens ->
119
+
if not (List.mem last_token valid_autocomplete_tokens) then
120
+
Message_collector.add_warning collector
121
+
~message:
122
+
(Printf.sprintf "Unknown autocomplete value: %s" last_token)
123
+
~code:"invalid-autocomplete-value" ~element:"input"
124
+
~attribute:"autocomplete" ()
125
+
126
+
let check_input_element state attrs collector =
127
+
let input_type = get_attr "type" attrs in
128
+
let id = get_attr "id" attrs in
129
+
130
+
(* Track this input's ID if present *)
131
+
(match id with
132
+
| Some id_val -> state.element_ids <- id_val :: state.element_ids
133
+
| None -> ());
134
+
135
+
(* Check various input-specific rules *)
136
+
(match input_type with
137
+
| Some "radio" | Some "checkbox" ->
138
+
(* Radio and checkbox should have labels *)
139
+
state.unlabeled_controls <-
140
+
(Option.value input_type ~default:"text", id)
141
+
:: state.unlabeled_controls
142
+
| Some "submit" | Some "button" | Some "reset" ->
143
+
(* These don't need labels *)
144
+
()
145
+
| _ -> ());
146
+
147
+
(* Check autocomplete attribute *)
148
+
(match get_attr "autocomplete" attrs with
149
+
| Some autocomplete_value ->
150
+
check_autocomplete_value autocomplete_value input_type collector
151
+
| None -> ());
152
+
153
+
(* Check for select multiple with size=1 *)
154
+
()
155
+
156
+
let check_select_element attrs collector =
157
+
let multiple = has_attr "multiple" attrs in
158
+
let size = get_attr "size" attrs in
159
+
160
+
match (multiple, size) with
161
+
| true, Some "1" ->
162
+
Message_collector.add_warning collector
163
+
~message:"select element with multiple should not have size=\"1\""
164
+
~code:"contradictory-attributes" ~element:"select" ~attribute:"size"
165
+
()
166
+
| _ -> ()
167
+
168
+
let check_button_element state attrs collector =
169
+
(* button[type=submit] should be in form or have form attribute *)
170
+
let button_type = get_attr "type" attrs in
171
+
let has_form_attr = has_attr "form" attrs in
172
+
173
+
match button_type with
174
+
| Some "submit" | None ->
175
+
(* Default type is submit *)
176
+
if (not state.in_form) && not has_form_attr then
177
+
Message_collector.add_warning collector
178
+
~message:
179
+
"button element with type=\"submit\" should be inside a form or \
180
+
have form attribute"
181
+
~code:"submit-button-outside-form" ~element:"button" ()
182
+
| _ -> ()
183
+
184
+
let check_label_element state attrs _collector =
185
+
(* Collect label[for] references *)
186
+
match get_attr "for" attrs with
187
+
| Some for_id -> state.label_for_refs <- for_id :: state.label_for_refs
188
+
| None -> ()
189
+
190
+
let start_element state ~name ~namespace:_ ~attrs collector =
191
+
(* Track element IDs *)
192
+
(match get_attr "id" attrs with
193
+
| Some id_val -> state.element_ids <- id_val :: state.element_ids
194
+
| None -> ());
195
+
196
+
match name with
197
+
| "form" ->
198
+
state.in_form <- true;
199
+
(match get_attr "id" attrs with
200
+
| Some id -> state.form_ids <- id :: state.form_ids
201
+
| None -> ())
202
+
| "input" -> check_input_element state attrs collector
203
+
| "select" -> check_select_element attrs collector
204
+
| "button" -> check_button_element state attrs collector
205
+
| "label" -> check_label_element state attrs collector
206
+
| _ -> ()
207
+
208
+
let end_element state ~name ~namespace:_ _collector =
209
+
match name with
210
+
| "form" ->
211
+
state.in_form <- false;
212
+
(match state.form_ids with
213
+
| _ :: rest -> state.form_ids <- rest
214
+
| [] -> ())
215
+
| _ -> ()
216
+
217
+
let characters _state _text _collector = ()
218
+
219
+
let end_document state collector =
220
+
(* Validate label[for] references *)
221
+
List.iter
222
+
(fun for_id ->
223
+
if not (List.mem for_id state.element_ids) then
224
+
Message_collector.add_warning collector
225
+
~message:
226
+
(Printf.sprintf
227
+
"label element references non-existent ID: %s" for_id)
228
+
~code:"invalid-label-reference" ~element:"label" ~attribute:"for"
229
+
())
230
+
state.label_for_refs;
231
+
232
+
(* Note: We can't reliably detect unlabeled controls without tracking
233
+
label parent-child relationships, which would require more complex
234
+
state tracking. For now, we just validate explicit label[for] references. *)
235
+
()
236
+
237
+
let checker = (module struct
238
+
type nonrec state = state
239
+
240
+
let create = create
241
+
let reset = reset
242
+
let start_element = start_element
243
+
let end_element = end_element
244
+
let characters = characters
245
+
let end_document = end_document
246
+
end : Checker.S)
+62
lib/html5_checker/semantic/form_checker.mli
+62
lib/html5_checker/semantic/form_checker.mli
···
1
+
(** Form-related validation checker.
2
+
3
+
Validates form control associations, label references, and form structure
4
+
according to HTML5 accessibility and semantic requirements. This checker
5
+
ensures that:
6
+
7
+
- Form controls have proper labels
8
+
- Label associations are valid
9
+
- Form attributes are correctly configured
10
+
- Form structure follows HTML5 constraints
11
+
12
+
{2 Validation Rules}
13
+
14
+
{b Label Associations}
15
+
- [label] with [for] attribute should reference a labelable element ID
16
+
- [input\[type=radio\]] should have an associated visible label
17
+
- [input\[type=checkbox\]] should have an associated visible label
18
+
19
+
{b Form Control Validation}
20
+
- [button\[type=submit\]] should be inside a [form] or have [form] attribute
21
+
- [input\[type=image\]] requires [alt] attribute (validated by required_attr_checker)
22
+
- [input\[type=hidden\]] should not have [required] attribute (validated by required_attr_checker)
23
+
- [input\[type=file\]] should not have [value] attribute (validated by required_attr_checker)
24
+
25
+
{b Autocomplete}
26
+
- [autocomplete] values should be appropriate for the [input] type
27
+
- Common autocomplete values include: [on], [off], [name], [email],
28
+
[username], [current-password], [new-password], [street-address], etc.
29
+
30
+
{b Select Elements}
31
+
- [select\[multiple\]] should not have [size="1"] (contradictory)
32
+
- [select] should contain at least one [option] or [optgroup]
33
+
34
+
{b Accessibility}
35
+
- Form controls should be reachable and operable via keyboard
36
+
- Radio buttons with the same [name] should form a logical group
37
+
38
+
{3 Labelable Elements}
39
+
40
+
The following elements can be associated with a [label]:
41
+
- [button]
42
+
- [input] (except [type=hidden])
43
+
- [meter]
44
+
- [output]
45
+
- [progress]
46
+
- [select]
47
+
- [textarea]
48
+
49
+
@see <https://html.spec.whatwg.org/multipage/forms.html> WHATWG HTML: Forms
50
+
@see <https://www.w3.org/WAI/WCAG21/Understanding/labels-or-instructions.html>
51
+
WCAG: Labels or Instructions *)
52
+
53
+
include Checker.S
54
+
55
+
val checker : Checker.t
56
+
(** A first-class module instance of this checker.
57
+
58
+
{b Usage:}
59
+
{[
60
+
let checker = Form_checker.checker in
61
+
Checker_registry.register "form-validation" checker
62
+
]} *)
+219
lib/html5_checker/semantic/id_checker.ml
+219
lib/html5_checker/semantic/id_checker.ml
···
1
+
(** ID uniqueness and reference checker.
2
+
3
+
This checker validates that:
4
+
- ID attributes are unique within the document
5
+
- ID references point to existing IDs
6
+
- ID values conform to HTML5 requirements *)
7
+
8
+
(** Location information for ID occurrences. *)
9
+
type id_location = {
10
+
element : string;
11
+
location : Message.location option;
12
+
}
13
+
14
+
(** Information about an ID reference. *)
15
+
type id_reference = {
16
+
referring_element : string;
17
+
attribute : string;
18
+
referenced_id : string;
19
+
location : Message.location option;
20
+
}
21
+
22
+
(** Checker state tracking IDs and references. *)
23
+
type state = {
24
+
ids : (string, id_location) Hashtbl.t;
25
+
mutable references : id_reference list;
26
+
}
27
+
28
+
let create () =
29
+
{
30
+
ids = Hashtbl.create 64;
31
+
references = [];
32
+
}
33
+
34
+
let reset state =
35
+
Hashtbl.clear state.ids;
36
+
state.references <- []
37
+
38
+
(** Check if a string contains whitespace. *)
39
+
let contains_whitespace s =
40
+
String.contains s ' ' || String.contains s '\t' ||
41
+
String.contains s '\n' || String.contains s '\r'
42
+
43
+
(** Extract ID from a usemap value (removes leading #). *)
44
+
let extract_usemap_id value =
45
+
if String.length value > 0 && value.[0] = '#' then
46
+
Some (String.sub value 1 (String.length value - 1))
47
+
else
48
+
None
49
+
50
+
(** Split whitespace-separated ID references. *)
51
+
let split_ids value =
52
+
let rec split acc start i =
53
+
if i >= String.length value then
54
+
if i > start then
55
+
(String.sub value start (i - start)) :: acc
56
+
else
57
+
acc
58
+
else
59
+
match value.[i] with
60
+
| ' ' | '\t' | '\n' | '\r' ->
61
+
let acc' =
62
+
if i > start then
63
+
(String.sub value start (i - start)) :: acc
64
+
else
65
+
acc
66
+
in
67
+
split acc' (i + 1) (i + 1)
68
+
| _ ->
69
+
split acc start (i + 1)
70
+
in
71
+
List.rev (split [] 0 0)
72
+
73
+
(** Attributes that reference a single ID. *)
74
+
let single_id_ref_attrs = [
75
+
"for"; (* label *)
76
+
"form"; (* form-associated elements *)
77
+
"list"; (* input *)
78
+
"aria-activedescendant";
79
+
]
80
+
81
+
(** Attributes that reference multiple IDs (space-separated). *)
82
+
let multi_id_ref_attrs = [
83
+
"headers"; (* td, th *)
84
+
"aria-labelledby";
85
+
"aria-describedby";
86
+
"aria-controls";
87
+
"aria-flowto";
88
+
"aria-owns";
89
+
"itemref";
90
+
]
91
+
92
+
(** Check and store an ID attribute. *)
93
+
let check_id state ~element ~id ~location collector =
94
+
(* Check for empty ID *)
95
+
if String.length id = 0 then
96
+
Message_collector.add_error collector
97
+
~message:"ID attribute must not be empty"
98
+
~code:"empty-id"
99
+
?location
100
+
~element
101
+
~attribute:"id"
102
+
()
103
+
(* Check for whitespace in ID *)
104
+
else if contains_whitespace id then
105
+
Message_collector.add_error collector
106
+
~message:(Printf.sprintf "ID attribute value '%s' must not contain whitespace" id)
107
+
~code:"id-whitespace"
108
+
?location
109
+
~element
110
+
~attribute:"id"
111
+
()
112
+
(* Check for duplicate ID *)
113
+
else if Hashtbl.mem state.ids id then
114
+
let first_occurrence = Hashtbl.find state.ids id in
115
+
let first_loc_str = match first_occurrence.location with
116
+
| None -> ""
117
+
| Some loc -> Printf.sprintf " at line %d, column %d" loc.line loc.column
118
+
in
119
+
Message_collector.add_error collector
120
+
~message:(Printf.sprintf
121
+
"Duplicate ID '%s': first used on <%s>%s, now on <%s>"
122
+
id first_occurrence.element first_loc_str element)
123
+
~code:"duplicate-id"
124
+
?location
125
+
~element
126
+
~attribute:"id"
127
+
()
128
+
else
129
+
(* Store the ID *)
130
+
Hashtbl.add state.ids id { element; location }
131
+
132
+
(** Record a single ID reference. *)
133
+
let add_reference state ~referring_element ~attribute ~referenced_id ~location =
134
+
if String.length referenced_id > 0 then
135
+
state.references <- {
136
+
referring_element;
137
+
attribute;
138
+
referenced_id;
139
+
location;
140
+
} :: state.references
141
+
142
+
(** Process attributes to check IDs and collect references. *)
143
+
let process_attrs state ~element ~attrs ~location collector =
144
+
List.iter (fun (name, value) ->
145
+
match name with
146
+
| "id" ->
147
+
check_id state ~element ~id:value ~location collector
148
+
149
+
| "usemap" ->
150
+
(* usemap references a map name, which is like an ID reference *)
151
+
begin match extract_usemap_id value with
152
+
| Some id ->
153
+
add_reference state ~referring_element:element
154
+
~attribute:name ~referenced_id:id ~location
155
+
| None ->
156
+
if String.length value > 0 then
157
+
Message_collector.add_error collector
158
+
~message:(Printf.sprintf
159
+
"usemap attribute value '%s' must start with '#'" value)
160
+
~code:"invalid-usemap"
161
+
?location
162
+
~element
163
+
~attribute:name
164
+
()
165
+
end
166
+
167
+
| attr when List.mem attr single_id_ref_attrs ->
168
+
add_reference state ~referring_element:element
169
+
~attribute:attr ~referenced_id:value ~location
170
+
171
+
| attr when List.mem attr multi_id_ref_attrs ->
172
+
(* Split space-separated IDs and add each as a reference *)
173
+
let ids = split_ids value in
174
+
List.iter (fun id ->
175
+
add_reference state ~referring_element:element
176
+
~attribute:attr ~referenced_id:id ~location
177
+
) ids
178
+
179
+
| _ -> ()
180
+
) attrs
181
+
182
+
let start_element state ~name ~namespace:_ ~attrs collector =
183
+
(* For now, we don't have location information from the DOM walker,
184
+
so we pass None. In a full implementation, this would be passed
185
+
from the parser. *)
186
+
let location = None in
187
+
process_attrs state ~element:name ~attrs ~location collector
188
+
189
+
let end_element _state ~name:_ ~namespace:_ _collector =
190
+
()
191
+
192
+
let characters _state _text _collector =
193
+
()
194
+
195
+
let end_document state collector =
196
+
(* Check all references point to existing IDs *)
197
+
List.iter (fun ref ->
198
+
if not (Hashtbl.mem state.ids ref.referenced_id) then
199
+
Message_collector.add_error collector
200
+
~message:(Printf.sprintf
201
+
"The '%s' attribute on <%s> refers to ID '%s' which does not exist"
202
+
ref.attribute ref.referring_element ref.referenced_id)
203
+
~code:"dangling-id-reference"
204
+
?location:ref.location
205
+
~element:ref.referring_element
206
+
~attribute:ref.attribute
207
+
()
208
+
) state.references
209
+
210
+
let checker = (module struct
211
+
type nonrec state = state
212
+
213
+
let create = create
214
+
let reset = reset
215
+
let start_element = start_element
216
+
let end_element = end_element
217
+
let characters = characters
218
+
let end_document = end_document
219
+
end : Checker.S)
+11
lib/html5_checker/semantic/id_checker.mli
+11
lib/html5_checker/semantic/id_checker.mli
···
1
+
(** ID uniqueness and reference checker.
2
+
3
+
Validates:
4
+
- ID attributes are unique within the document
5
+
- ID references (for, headers, aria-*, etc.) point to existing IDs
6
+
- ID values conform to HTML5 requirements *)
7
+
8
+
include Checker.S
9
+
10
+
val checker : Checker.t
11
+
(** [checker] is a checker instance for validating ID uniqueness and references. *)
+296
lib/html5_checker/semantic/nesting_checker.ml
+296
lib/html5_checker/semantic/nesting_checker.ml
···
1
+
(** Interactive element nesting checker implementation. *)
2
+
3
+
(** Special ancestors that need tracking for nesting validation.
4
+
5
+
This array defines the elements whose presence in the ancestor chain
6
+
affects validation of descendant elements. The order is significant
7
+
as it determines bit positions in the ancestor bitmask. *)
8
+
let special_ancestors =
9
+
[| "a"; "address"; "body"; "button"; "caption"; "dfn"; "dt"; "figcaption";
10
+
"figure"; "footer"; "form"; "header"; "label"; "map"; "noscript"; "th";
11
+
"time"; "progress"; "meter"; "article"; "section"; "aside"; "nav"; "h1";
12
+
"h2"; "h3"; "h4"; "h5"; "h6" |]
13
+
14
+
(** Get the bit position for a special ancestor element.
15
+
Returns [-1] if the element is not a special ancestor. *)
16
+
let special_ancestor_number name =
17
+
let rec find i =
18
+
if i >= Array.length special_ancestors then -1
19
+
else if special_ancestors.(i) = name then i
20
+
else find (i + 1)
21
+
in
22
+
find 0
23
+
24
+
(** Interactive elements that cannot be nested inside [a] or [button]. *)
25
+
let interactive_elements =
26
+
[| "a"; "button"; "details"; "embed"; "iframe"; "label"; "select";
27
+
"textarea" |]
28
+
29
+
(** Map from descendant element name to bitmask of prohibited ancestors. *)
30
+
let ancestor_mask_by_descendant : (string, int) Hashtbl.t =
31
+
Hashtbl.create 64
32
+
33
+
(** Register that [ancestor] is prohibited for [descendant]. *)
34
+
let register_prohibited_ancestor ancestor descendant =
35
+
let number = special_ancestor_number ancestor in
36
+
if number = -1 then
37
+
failwith ("Ancestor not found in array: " ^ ancestor);
38
+
let mask =
39
+
match Hashtbl.find_opt ancestor_mask_by_descendant descendant with
40
+
| None -> 0
41
+
| Some m -> m
42
+
in
43
+
let new_mask = mask lor (1 lsl number) in
44
+
Hashtbl.replace ancestor_mask_by_descendant descendant new_mask
45
+
46
+
(** Initialize the prohibited ancestor map. *)
47
+
let () =
48
+
(* Self-nesting restrictions *)
49
+
register_prohibited_ancestor "form" "form";
50
+
register_prohibited_ancestor "progress" "progress";
51
+
register_prohibited_ancestor "meter" "meter";
52
+
register_prohibited_ancestor "dfn" "dfn";
53
+
register_prohibited_ancestor "noscript" "noscript";
54
+
register_prohibited_ancestor "label" "label";
55
+
56
+
(* Address restrictions *)
57
+
register_prohibited_ancestor "address" "address";
58
+
register_prohibited_ancestor "address" "section";
59
+
register_prohibited_ancestor "address" "nav";
60
+
register_prohibited_ancestor "address" "article";
61
+
register_prohibited_ancestor "address" "header";
62
+
register_prohibited_ancestor "address" "footer";
63
+
register_prohibited_ancestor "address" "h1";
64
+
register_prohibited_ancestor "address" "h2";
65
+
register_prohibited_ancestor "address" "h3";
66
+
register_prohibited_ancestor "address" "h4";
67
+
register_prohibited_ancestor "address" "h5";
68
+
register_prohibited_ancestor "address" "h6";
69
+
70
+
(* Header/footer restrictions *)
71
+
register_prohibited_ancestor "header" "header";
72
+
register_prohibited_ancestor "footer" "header";
73
+
register_prohibited_ancestor "header" "footer";
74
+
register_prohibited_ancestor "footer" "footer";
75
+
76
+
(* dt restrictions *)
77
+
register_prohibited_ancestor "dt" "header";
78
+
register_prohibited_ancestor "dt" "footer";
79
+
register_prohibited_ancestor "dt" "article";
80
+
register_prohibited_ancestor "dt" "nav";
81
+
register_prohibited_ancestor "dt" "section";
82
+
register_prohibited_ancestor "dt" "h1";
83
+
register_prohibited_ancestor "dt" "h2";
84
+
register_prohibited_ancestor "dt" "h3";
85
+
register_prohibited_ancestor "dt" "h4";
86
+
register_prohibited_ancestor "dt" "h5";
87
+
register_prohibited_ancestor "dt" "h6";
88
+
register_prohibited_ancestor "dt" "hgroup";
89
+
90
+
(* th restrictions *)
91
+
register_prohibited_ancestor "th" "header";
92
+
register_prohibited_ancestor "th" "footer";
93
+
register_prohibited_ancestor "th" "article";
94
+
register_prohibited_ancestor "th" "nav";
95
+
register_prohibited_ancestor "th" "section";
96
+
register_prohibited_ancestor "th" "h1";
97
+
register_prohibited_ancestor "th" "h2";
98
+
register_prohibited_ancestor "th" "h3";
99
+
register_prohibited_ancestor "th" "h4";
100
+
register_prohibited_ancestor "th" "h5";
101
+
register_prohibited_ancestor "th" "h6";
102
+
register_prohibited_ancestor "th" "hgroup";
103
+
104
+
(* Caption restriction *)
105
+
register_prohibited_ancestor "caption" "table";
106
+
107
+
(* Interactive element restrictions: cannot be inside a or button *)
108
+
Array.iter (fun elem ->
109
+
register_prohibited_ancestor "a" elem;
110
+
register_prohibited_ancestor "button" elem
111
+
) interactive_elements
112
+
113
+
(** Bitmask constants for common checks. *)
114
+
let a_button_mask =
115
+
let a_num = special_ancestor_number "a" in
116
+
let button_num = special_ancestor_number "button" in
117
+
(1 lsl a_num) lor (1 lsl button_num)
118
+
119
+
let map_mask =
120
+
let map_num = special_ancestor_number "map" in
121
+
1 lsl map_num
122
+
123
+
(** Stack node representing an element's context. *)
124
+
type stack_node = {
125
+
ancestor_mask : int;
126
+
_name : string; [@warning "-69"]
127
+
}
128
+
129
+
(** Checker state. *)
130
+
type state = {
131
+
mutable stack : stack_node list;
132
+
mutable ancestor_mask : int;
133
+
}
134
+
135
+
let create () =
136
+
{ stack = []; ancestor_mask = 0 }
137
+
138
+
let reset state =
139
+
state.stack <- [];
140
+
state.ancestor_mask <- 0
141
+
142
+
(** Get attribute value by name from attribute list. *)
143
+
let get_attr attrs name =
144
+
List.assoc_opt name attrs
145
+
146
+
(** Check if an attribute exists. *)
147
+
let has_attr attrs name =
148
+
get_attr attrs name <> None
149
+
150
+
(** Check if element is interactive based on its attributes. *)
151
+
let is_interactive_element name attrs =
152
+
match name with
153
+
| "a" ->
154
+
has_attr attrs "href"
155
+
| "audio" | "video" ->
156
+
has_attr attrs "controls"
157
+
| "img" | "object" ->
158
+
has_attr attrs "usemap"
159
+
| "input" ->
160
+
begin match get_attr attrs "type" with
161
+
| Some "hidden" -> false
162
+
| _ -> true
163
+
end
164
+
| "button" | "details" | "embed" | "iframe" | "label" | "select"
165
+
| "textarea" ->
166
+
true
167
+
| _ ->
168
+
false
169
+
170
+
(** Get a human-readable description of an element for error messages. *)
171
+
let element_description name attrs =
172
+
match name with
173
+
| "a" when has_attr attrs "href" ->
174
+
"The element \"a\" with the attribute \"href\""
175
+
| "audio" when has_attr attrs "controls" ->
176
+
"The element \"audio\" with the attribute \"controls\""
177
+
| "video" when has_attr attrs "controls" ->
178
+
"The element \"video\" with the attribute \"controls\""
179
+
| "img" when has_attr attrs "usemap" ->
180
+
"The element \"img\" with the attribute \"usemap\""
181
+
| "object" when has_attr attrs "usemap" ->
182
+
"The element \"object\" with the attribute \"usemap\""
183
+
| _ ->
184
+
Printf.sprintf "The element \"%s\"" name
185
+
186
+
(** Report nesting violations. *)
187
+
let check_nesting state name attrs collector =
188
+
(* Compute the prohibited ancestor mask for this element *)
189
+
let base_mask =
190
+
match Hashtbl.find_opt ancestor_mask_by_descendant name with
191
+
| Some m -> m
192
+
| None -> 0
193
+
in
194
+
195
+
(* Add interactive element restrictions if applicable *)
196
+
let mask =
197
+
if is_interactive_element name attrs then
198
+
base_mask lor a_button_mask
199
+
else
200
+
base_mask
201
+
in
202
+
203
+
(* Check for violations *)
204
+
if mask <> 0 then begin
205
+
let mask_hit = state.ancestor_mask land mask in
206
+
if mask_hit <> 0 then begin
207
+
let desc = element_description name attrs in
208
+
(* Find which ancestors are violated *)
209
+
Array.iteri (fun i ancestor ->
210
+
let bit = 1 lsl i in
211
+
if (mask_hit land bit) <> 0 then
212
+
Message_collector.add_error collector
213
+
~message:(Printf.sprintf
214
+
"%s must not appear as a descendant of the \"%s\" element."
215
+
desc ancestor)
216
+
~element:name
217
+
()
218
+
) special_ancestors
219
+
end
220
+
end
221
+
222
+
(** Check for required ancestors. *)
223
+
let check_required_ancestors state name collector =
224
+
match name with
225
+
| "area" ->
226
+
if (state.ancestor_mask land map_mask) = 0 then
227
+
Message_collector.add_error collector
228
+
~message:"The \"area\" element must have a \"map\" ancestor."
229
+
~element:name
230
+
()
231
+
| _ -> ()
232
+
233
+
let start_element state ~name ~namespace ~attrs collector =
234
+
(* Only check HTML elements, not SVG or MathML *)
235
+
match namespace with
236
+
| Some _ -> ()
237
+
| None ->
238
+
(* Check for nesting violations *)
239
+
check_nesting state name attrs collector;
240
+
check_required_ancestors state name collector;
241
+
242
+
(* Update ancestor mask if this is a special ancestor *)
243
+
let new_mask = state.ancestor_mask in
244
+
let number = special_ancestor_number name in
245
+
let new_mask =
246
+
if number >= 0 then
247
+
new_mask lor (1 lsl number)
248
+
else
249
+
new_mask
250
+
in
251
+
252
+
(* Add href tracking for <a> elements *)
253
+
let new_mask =
254
+
if name = "a" && has_attr attrs "href" then
255
+
let a_num = special_ancestor_number "a" in
256
+
new_mask lor (1 lsl a_num)
257
+
else
258
+
new_mask
259
+
in
260
+
261
+
(* Push onto stack *)
262
+
let node = { ancestor_mask = state.ancestor_mask; _name = name } in
263
+
state.stack <- node :: state.stack;
264
+
state.ancestor_mask <- new_mask
265
+
266
+
let end_element state ~name:_ ~namespace _collector =
267
+
(* Only track HTML elements *)
268
+
match namespace with
269
+
| Some _ -> ()
270
+
| None ->
271
+
(* Pop from stack and restore ancestor mask *)
272
+
begin match state.stack with
273
+
| [] -> () (* Should not happen in well-formed documents *)
274
+
| node :: rest ->
275
+
state.stack <- rest;
276
+
state.ancestor_mask <- node.ancestor_mask
277
+
end
278
+
279
+
let characters _state _text _collector =
280
+
() (* No text-specific nesting checks *)
281
+
282
+
let end_document _state _collector =
283
+
() (* No document-level checks needed *)
284
+
285
+
(** Create the checker as a first-class module. *)
286
+
let checker =
287
+
let module M = struct
288
+
type nonrec state = state
289
+
let create = create
290
+
let reset = reset
291
+
let start_element = start_element
292
+
let end_element = end_element
293
+
let characters = characters
294
+
let end_document = end_document
295
+
end in
296
+
(module M : Checker.S)
+79
lib/html5_checker/semantic/nesting_checker.mli
+79
lib/html5_checker/semantic/nesting_checker.mli
···
1
+
(** Interactive element nesting checker.
2
+
3
+
Validates that interactive elements are not nested in ways that violate
4
+
HTML5 specifications. This checker tracks ancestor elements and ensures
5
+
that prohibited nesting patterns are detected and reported.
6
+
7
+
{2 Validation Rules}
8
+
9
+
The checker enforces the following prohibited nesting relationships:
10
+
11
+
{3 Self-nesting Restrictions}
12
+
13
+
These elements cannot be nested inside themselves:
14
+
- [form] cannot contain [form]
15
+
- [progress] cannot contain [progress]
16
+
- [meter] cannot contain [meter]
17
+
- [dfn] cannot contain [dfn]
18
+
- [noscript] cannot contain [noscript]
19
+
- [label] cannot contain [label]
20
+
21
+
{3 Structural Element Restrictions}
22
+
23
+
- [header] cannot be inside [header], [footer], or [address]
24
+
- [footer] cannot be inside [header], [footer], or [address]
25
+
- [address] cannot contain [header], [footer], [article], [section],
26
+
[nav], or heading elements ([h1]-[h6])
27
+
28
+
{3 Interactive Content Restrictions}
29
+
30
+
Interactive elements cannot be descendants of [a] (with [href]) or
31
+
[button]:
32
+
33
+
- [a] (when it has [href]) cannot be inside [a] or [button]
34
+
- [button] cannot be inside [a] or [button]
35
+
- [details] cannot be inside [a] or [button]
36
+
- [embed] cannot be inside [a] or [button]
37
+
- [iframe] cannot be inside [a] or [button]
38
+
- [label] cannot be inside [a] or [button]
39
+
- [select] cannot be inside [a] or [button]
40
+
- [textarea] cannot be inside [a] or [button]
41
+
- [audio] (with [controls]) cannot be inside [a] or [button]
42
+
- [video] (with [controls]) cannot be inside [a] or [button]
43
+
- [input] (except [type=hidden]) cannot be inside [a] or [button]
44
+
- [img] (with [usemap]) cannot be inside [a] or [button]
45
+
- [object] (with [usemap]) cannot be inside [a] or [button]
46
+
47
+
{3 Table Cell Restrictions}
48
+
49
+
- [dt] and [th] cannot contain [header], [footer], [article], [section],
50
+
[nav], heading elements ([h1]-[h6]), or [hgroup]
51
+
52
+
{3 Other Restrictions}
53
+
54
+
- [caption] cannot contain [table]
55
+
- [area] must have a [map] ancestor
56
+
57
+
{2 Implementation Details}
58
+
59
+
The checker uses a bitmask-based approach to efficiently track ancestor
60
+
elements. Each "special" ancestor element has a corresponding bit in the
61
+
ancestor mask. As elements are opened and closed during traversal, the
62
+
mask is updated to reflect the current ancestor context.
63
+
64
+
When an element is encountered, the checker:
65
+
1. Computes which ancestors would be prohibited for this element
66
+
2. Checks if any of those prohibited ancestors are present in the
67
+
current ancestor mask
68
+
3. Reports errors for any violations found
69
+
4. Updates the ancestor mask to include the current element (if it's
70
+
a special ancestor)
71
+
72
+
@see <https://html.spec.whatwg.org/multipage/dom.html#content-models>
73
+
HTML5 specification: Content models
74
+
*)
75
+
76
+
include Checker.S
77
+
78
+
val checker : Checker.t
79
+
(** [checker] is a checker instance for validating element nesting rules. *)
+339
lib/html5_checker/semantic/obsolete_checker.ml
+339
lib/html5_checker/semantic/obsolete_checker.ml
···
1
+
(** Obsolete elements map: element name -> suggestion message *)
2
+
let obsolete_elements =
3
+
let tbl = Hashtbl.create 32 in
4
+
Hashtbl.add tbl "applet" "Use \"embed\" or \"object\" element instead.";
5
+
Hashtbl.add tbl "acronym" "Use the \"abbr\" element instead.";
6
+
Hashtbl.add tbl "bgsound" "Use the \"audio\" element instead.";
7
+
Hashtbl.add tbl "dir" "Use the \"ul\" element instead.";
8
+
Hashtbl.add tbl "frame" "Use the \"iframe\" element and CSS instead, or use server-side includes.";
9
+
Hashtbl.add tbl "frameset" "Use the \"iframe\" element and CSS instead, or use server-side includes.";
10
+
Hashtbl.add tbl "noframes" "Use the \"iframe\" element and CSS instead, or use server-side includes.";
11
+
Hashtbl.add tbl "isindex" "Use the \"form\" element containing \"input\" element of type \"text\" instead.";
12
+
Hashtbl.add tbl "keygen" "";
13
+
Hashtbl.add tbl "listing" "Use \"pre\" or \"code\" element instead.";
14
+
Hashtbl.add tbl "menuitem" "Use script to handle \"contextmenu\" event instead.";
15
+
Hashtbl.add tbl "nextid" "Use GUIDs instead.";
16
+
Hashtbl.add tbl "noembed" "Use the \"object\" element instead.";
17
+
Hashtbl.add tbl "param" "Use the \"data\" attribute of the \"object\" element to set the URL of the external resource.";
18
+
Hashtbl.add tbl "plaintext" "Use the \"text/plain\" MIME type instead.";
19
+
Hashtbl.add tbl "rb" "";
20
+
Hashtbl.add tbl "rtc" "";
21
+
Hashtbl.add tbl "strike" "Use \"del\" or \"s\" element instead.";
22
+
Hashtbl.add tbl "xmp" "Use \"pre\" or \"code\" element instead.";
23
+
Hashtbl.add tbl "basefont" "Use CSS instead.";
24
+
Hashtbl.add tbl "big" "Use CSS instead.";
25
+
Hashtbl.add tbl "blink" "Use CSS instead.";
26
+
Hashtbl.add tbl "center" "Use CSS instead.";
27
+
Hashtbl.add tbl "font" "Use CSS instead.";
28
+
Hashtbl.add tbl "marquee" "Use CSS instead.";
29
+
Hashtbl.add tbl "multicol" "Use CSS instead.";
30
+
Hashtbl.add tbl "nobr" "Use CSS instead.";
31
+
Hashtbl.add tbl "spacer" "Use CSS instead.";
32
+
Hashtbl.add tbl "tt" "Use CSS instead.";
33
+
tbl
34
+
35
+
(** Obsolete attributes map: attr_name -> (element_name -> suggestion message) *)
36
+
let obsolete_attributes =
37
+
let tbl = Hashtbl.create 64 in
38
+
39
+
(* Helper to register an attribute for multiple elements *)
40
+
let register attr_name elements suggestion =
41
+
let element_map =
42
+
match Hashtbl.find_opt tbl attr_name with
43
+
| Some m -> m
44
+
| None ->
45
+
let m = Hashtbl.create 16 in
46
+
Hashtbl.add tbl attr_name m;
47
+
m
48
+
in
49
+
List.iter (fun elem -> Hashtbl.add element_map elem suggestion) elements
50
+
in
51
+
52
+
register "abbr" ["td"]
53
+
"Consider instead beginning the cell contents with concise text, followed by further elaboration if needed.";
54
+
55
+
register "accept" ["form"]
56
+
"Use the \"accept\" attribute directly on the \"input\" elements instead.";
57
+
58
+
register "archive" ["object"]
59
+
"Use the \"data\" and \"type\" attributes to invoke plugins.";
60
+
61
+
register "a" ["object"]
62
+
"Use the \"data\" and \"type\" attributes to invoke plugins.";
63
+
64
+
register "axis" ["td"; "th"]
65
+
"Use the \"scope\" attribute.";
66
+
67
+
register "border" ["input"; "img"; "object"; "table"]
68
+
"Consider specifying \"img { border: 0; }\" in CSS instead.";
69
+
70
+
register "charset" ["a"; "link"]
71
+
"Use an HTTP Content-Type header on the linked resource instead.";
72
+
73
+
register "classid" ["object"]
74
+
"Use the \"data\" and \"type\" attributes to invoke plugins.";
75
+
76
+
register "code" ["object"]
77
+
"Use the \"data\" and \"type\" attributes to invoke plugins.";
78
+
79
+
register "codebase" ["object"]
80
+
"Use the \"data\" and \"type\" attributes to invoke plugins.";
81
+
82
+
register "codetype" ["object"]
83
+
"Use the \"data\" and \"type\" attributes to invoke plugins.";
84
+
85
+
register "coords" ["a"]
86
+
"Use \"area\" instead of \"a\" for image maps.";
87
+
88
+
register "datafld" ["a"; "button"; "div"; "fieldset"; "iframe"; "img"; "input"; "label"; "legend"; "object"; "select"; "span"; "textarea"]
89
+
"Use script and a mechanism such as XMLHttpRequest to populate the page dynamically";
90
+
91
+
register "dataformatas" ["button"; "div"; "input"; "label"; "legend"; "object"; "option"; "select"; "span"; "table"]
92
+
"Use script and a mechanism such as XMLHttpRequest to populate the page dynamically";
93
+
94
+
register "datapagesize" ["table"]
95
+
"You can safely omit it.";
96
+
97
+
register "datasrc" ["a"; "button"; "div"; "iframe"; "img"; "input"; "label"; "legend"; "object"; "option"; "select"; "span"; "table"; "textarea"]
98
+
"Use script and a mechanism such as XMLHttpRequest to populate the page dynamically";
99
+
100
+
register "declare" ["object"]
101
+
"Repeat the \"object\" element completely each time the resource is to be reused.";
102
+
103
+
register "event" ["script"]
104
+
"Use DOM Events mechanisms to register event listeners.";
105
+
106
+
register "for" ["script"]
107
+
"Use DOM Events mechanisms to register event listeners.";
108
+
109
+
register "hreflang" ["area"]
110
+
"You can safely omit it.";
111
+
112
+
register "ismap" ["input"]
113
+
"You can safely omit it.";
114
+
115
+
register "label" ["menu"]
116
+
"Use script to handle \"contextmenu\" event instead.";
117
+
118
+
register "language" ["script"]
119
+
"Use the \"type\" attribute instead.";
120
+
121
+
register "longdesc" ["iframe"; "img"]
122
+
"Use a regular \"a\" element to link to the description.";
123
+
124
+
register "lowsrc" ["img"]
125
+
"Use a progressive JPEG image instead.";
126
+
127
+
register "manifest" ["html"]
128
+
"Use service workers instead.";
129
+
130
+
register "methods" ["a"; "link"]
131
+
"Use the HTTP OPTIONS feature instead.";
132
+
133
+
register "name" ["a"; "embed"; "img"; "option"]
134
+
"Use the \"id\" attribute instead.";
135
+
136
+
register "nohref" ["area"]
137
+
"Omitting the \"href\" attribute is sufficient.";
138
+
139
+
register "profile" ["head"]
140
+
"To declare which \"meta\" terms are used in the document, instead register the names as meta extensions. To trigger specific UA behaviors, use a \"link\" element instead.";
141
+
142
+
register "scheme" ["meta"]
143
+
"Use only one scheme per field, or make the scheme declaration part of the value.";
144
+
145
+
register "scope" ["td"]
146
+
"Use the \"scope\" attribute on a \"th\" element instead.";
147
+
148
+
register "shape" ["a"]
149
+
"Use \"area\" instead of \"a\" for image maps.";
150
+
151
+
register "standby" ["object"]
152
+
"Optimise the linked resource so that it loads quickly or, at least, incrementally.";
153
+
154
+
register "summary" ["table"]
155
+
"Consider describing the structure of the \"table\" in a \"caption\" element or in a \"figure\" element containing the \"table\"; or, simplify the structure of the \"table\" so that no description is needed.";
156
+
157
+
register "target" ["link"]
158
+
"You can safely omit it.";
159
+
160
+
register "type" ["param"; "area"; "menu"]
161
+
"You can safely omit it.";
162
+
163
+
register "typemustmatch" ["object"]
164
+
"Avoid using \"object\" elements with untrusted resources.";
165
+
166
+
register "urn" ["a"; "link"]
167
+
"Specify the preferred persistent identifier using the \"href\" attribute instead.";
168
+
169
+
register "usemap" ["input"; "object"]
170
+
"Use the \"img\" element instead.";
171
+
172
+
register "valuetype" ["param"]
173
+
"Use the \"name\" and \"value\" attributes without declaring value types.";
174
+
175
+
register "version" ["html"]
176
+
"You can safely omit it.";
177
+
178
+
tbl
179
+
180
+
(** Obsolete style attributes map: attr_name -> element_name list *)
181
+
let obsolete_style_attrs =
182
+
let tbl = Hashtbl.create 64 in
183
+
184
+
let register attr_name elements =
185
+
Hashtbl.add tbl attr_name elements
186
+
in
187
+
188
+
register "align" ["caption"; "iframe"; "img"; "input"; "object"; "embed"; "legend"; "table"; "hr"; "div"; "h1"; "h2"; "h3"; "h4"; "h5"; "h6"; "p"; "col"; "colgroup"; "tbody"; "td"; "tfoot"; "th"; "thead"; "tr"];
189
+
register "alink" ["body"];
190
+
register "allowtransparency" ["iframe"];
191
+
register "background" ["body"; "table"; "thead"; "tbody"; "tfoot"; "tr"; "td"; "th"];
192
+
register "bgcolor" ["table"; "tr"; "td"; "th"; "body"];
193
+
register "bordercolor" ["table"];
194
+
register "cellpadding" ["table"];
195
+
register "cellspacing" ["table"];
196
+
register "char" ["col"; "colgroup"; "tbody"; "td"; "tfoot"; "th"; "thead"; "tr"];
197
+
register "charoff" ["col"; "colgroup"; "tbody"; "td"; "tfoot"; "th"; "thead"; "tr"];
198
+
register "clear" ["br"];
199
+
register "color" ["hr"];
200
+
register "compact" ["dl"; "menu"; "ol"; "ul"];
201
+
register "frameborder" ["iframe"];
202
+
register "framespacing" ["iframe"];
203
+
register "frame" ["table"];
204
+
register "height" ["table"; "thead"; "tbody"; "tfoot"; "tr"; "td"; "th"];
205
+
register "hspace" ["embed"; "iframe"; "input"; "img"; "object"];
206
+
register "link" ["body"];
207
+
register "bottommargin" ["body"];
208
+
register "marginheight" ["iframe"; "body"];
209
+
register "leftmargin" ["body"];
210
+
register "rightmargin" ["body"];
211
+
register "topmargin" ["body"];
212
+
register "marginwidth" ["iframe"; "body"];
213
+
register "noshade" ["hr"];
214
+
register "nowrap" ["td"; "th"];
215
+
register "rules" ["table"];
216
+
register "scrolling" ["iframe"];
217
+
register "size" ["hr"];
218
+
register "text" ["body"];
219
+
register "type" ["li"; "ul"];
220
+
register "valign" ["col"; "colgroup"; "tbody"; "td"; "tfoot"; "th"; "thead"; "tr"];
221
+
register "vlink" ["body"];
222
+
register "vspace" ["embed"; "iframe"; "input"; "img"; "object"];
223
+
register "width" ["hr"; "table"; "td"; "th"; "col"; "colgroup"; "pre"];
224
+
225
+
tbl
226
+
227
+
(** Obsolete global attributes map: attr_name -> suggestion message *)
228
+
let obsolete_global_attrs =
229
+
let tbl = Hashtbl.create 8 in
230
+
Hashtbl.add tbl "contextmenu" "Use script to handle \"contextmenu\" event instead.";
231
+
Hashtbl.add tbl "dropzone" "Use script to handle the \"dragenter\" and \"dragover\" events instead.";
232
+
Hashtbl.add tbl "onshow" "Use script to handle \"contextmenu\" event instead.";
233
+
tbl
234
+
235
+
(** Checker state *)
236
+
type state = unit
237
+
238
+
let create () = ()
239
+
240
+
let reset _state = ()
241
+
242
+
let start_element _state ~name ~namespace ~attrs collector =
243
+
(* Only check HTML elements (no namespace or explicit HTML namespace) *)
244
+
let is_html = match namespace with
245
+
| None -> true
246
+
| Some ns -> String.equal (String.lowercase_ascii ns) "html"
247
+
in
248
+
249
+
if not is_html then ()
250
+
else begin
251
+
let name_lower = String.lowercase_ascii name in
252
+
253
+
(* Check for obsolete element *)
254
+
(match Hashtbl.find_opt obsolete_elements name_lower with
255
+
| None -> ()
256
+
| Some suggestion ->
257
+
let message =
258
+
if String.length suggestion = 0 then
259
+
Printf.sprintf "The \"%s\" element is obsolete." name
260
+
else
261
+
Printf.sprintf "The \"%s\" element is obsolete. %s" name suggestion
262
+
in
263
+
Message_collector.add_error collector
264
+
~message
265
+
~code:"obsolete-element"
266
+
~element:name
267
+
());
268
+
269
+
(* Check for obsolete attributes *)
270
+
List.iter (fun (attr_name, _attr_value) ->
271
+
let attr_lower = String.lowercase_ascii attr_name in
272
+
273
+
(* Check specific obsolete attributes for this element *)
274
+
(match Hashtbl.find_opt obsolete_attributes attr_lower with
275
+
| None -> ()
276
+
| Some element_map ->
277
+
(match Hashtbl.find_opt element_map name_lower with
278
+
| None -> ()
279
+
| Some suggestion ->
280
+
let message =
281
+
Printf.sprintf "The \"%s\" attribute on the \"%s\" element is obsolete. %s"
282
+
attr_name name suggestion
283
+
in
284
+
Message_collector.add_error collector
285
+
~message
286
+
~code:"obsolete-attribute"
287
+
~element:name
288
+
~attribute:attr_name
289
+
()));
290
+
291
+
(* Check obsolete style attributes *)
292
+
(match Hashtbl.find_opt obsolete_style_attrs attr_lower with
293
+
| None -> ()
294
+
| Some elements ->
295
+
if List.mem name_lower elements then
296
+
let message =
297
+
Printf.sprintf "The \"%s\" attribute on the \"%s\" element is obsolete. Use CSS instead."
298
+
attr_name name
299
+
in
300
+
Message_collector.add_error collector
301
+
~message
302
+
~code:"obsolete-style-attribute"
303
+
~element:name
304
+
~attribute:attr_name
305
+
());
306
+
307
+
(* Check obsolete global attributes *)
308
+
(match Hashtbl.find_opt obsolete_global_attrs attr_lower with
309
+
| None -> ()
310
+
| Some suggestion ->
311
+
let message =
312
+
Printf.sprintf "The \"%s\" attribute is obsolete. %s" attr_name suggestion
313
+
in
314
+
Message_collector.add_error collector
315
+
~message
316
+
~code:"obsolete-global-attribute"
317
+
~element:name
318
+
~attribute:attr_name
319
+
())
320
+
) attrs
321
+
end
322
+
323
+
let end_element _state ~name:_ ~namespace:_ _collector = ()
324
+
325
+
let characters _state _text _collector = ()
326
+
327
+
let end_document _state _collector = ()
328
+
329
+
let checker =
330
+
let module M = struct
331
+
type nonrec state = state
332
+
let create = create
333
+
let reset = reset
334
+
let start_element = start_element
335
+
let end_element = end_element
336
+
let characters = characters
337
+
let end_document = end_document
338
+
end in
339
+
(module M : Checker.S)
+67
lib/html5_checker/semantic/obsolete_checker.mli
+67
lib/html5_checker/semantic/obsolete_checker.mli
···
1
+
(** Obsolete element and attribute checker.
2
+
3
+
Reports errors for obsolete HTML elements and attributes that should
4
+
not be used in modern HTML5 documents.
5
+
6
+
This checker validates that documents do not use deprecated elements
7
+
or attributes from earlier HTML versions. It reports:
8
+
9
+
- {b Obsolete elements}: Elements like [<applet>], [<font>], [<center>]
10
+
that have been removed from HTML5
11
+
- {b Obsolete attributes}: Attributes like [align], [bgcolor], [border]
12
+
that should be replaced with CSS
13
+
- {b Obsolete global attributes}: Global attributes like [contextmenu]
14
+
that are no longer supported
15
+
16
+
{2 Obsolete Elements}
17
+
18
+
Elements that are flagged as obsolete include:
19
+
- Presentational elements: [<basefont>], [<big>], [<center>], [<font>],
20
+
[<strike>], [<tt>]
21
+
- Frame elements: [<frame>], [<frameset>], [<noframes>]
22
+
- Deprecated interactive elements: [<applet>], [<bgsound>], [<keygen>]
23
+
- Deprecated text elements: [<acronym>], [<dir>], [<listing>], [<xmp>]
24
+
- And many others
25
+
26
+
{2 Obsolete Attributes}
27
+
28
+
The checker validates against hundreds of obsolete attributes, including:
29
+
- Presentational attributes: [align], [bgcolor], [border], [color],
30
+
[height], [width] (on certain elements)
31
+
- Data binding attributes: [datafld], [dataformatas], [datasrc]
32
+
- Navigation attributes: [longdesc], [methods], [urn]
33
+
- And many element-specific obsolete attributes
34
+
35
+
{2 Example}
36
+
37
+
{[
38
+
let checker = Obsolete_checker.checker in
39
+
let module C = (val checker : Checker.S) in
40
+
let state = C.create () in
41
+
42
+
(* This will emit an error *)
43
+
C.start_element state ~name:"center" ~namespace:None ~attrs:[] collector;
44
+
(* Error: Element "center" is obsolete. Use CSS instead. *)
45
+
46
+
(* This will also emit an error *)
47
+
C.start_element state ~name:"div"
48
+
~namespace:None
49
+
~attrs:[("align", "center")]
50
+
collector;
51
+
(* Error: Attribute "align" on element "div" is obsolete. Use CSS instead. *)
52
+
]}
53
+
*)
54
+
55
+
(** Include the standard checker signature. *)
56
+
include Checker.S
57
+
58
+
(** {1 Checker Instance} *)
59
+
60
+
val checker : Checker.t
61
+
(** [checker] is a pre-configured obsolete checker instance that can be
62
+
registered with the checker registry.
63
+
64
+
{b Example:}
65
+
{[
66
+
Checker_registry.register registry "obsolete" Obsolete_checker.checker
67
+
]} *)
+143
lib/html5_checker/semantic/required_attr_checker.ml
+143
lib/html5_checker/semantic/required_attr_checker.ml
···
1
+
(** Required attribute checker implementation. *)
2
+
3
+
type state = {
4
+
mutable _in_figure : bool;
5
+
(** Track if we're inside a <figure> element (alt is more critical there) *)
6
+
}
7
+
8
+
let create () = { _in_figure = false }
9
+
10
+
let reset state = state._in_figure <- false
11
+
12
+
(** Check if an attribute list contains a specific attribute. *)
13
+
let has_attr name attrs =
14
+
List.exists (fun (attr_name, _) -> String.equal attr_name name) attrs
15
+
16
+
(** Get the value of an attribute if present. *)
17
+
let get_attr name attrs =
18
+
List.find_map
19
+
(fun (attr_name, value) ->
20
+
if String.equal attr_name name then Some value else None)
21
+
attrs
22
+
23
+
let check_img_element attrs collector =
24
+
(* Check for required src attribute *)
25
+
if not (has_attr "src" attrs) then
26
+
Message_collector.add_error collector ~message:"img element requires src attribute"
27
+
~code:"missing-required-attribute" ~element:"img" ~attribute:"src" ();
28
+
29
+
(* Check for alt attribute - always required *)
30
+
if not (has_attr "alt" attrs) then
31
+
Message_collector.add_error collector
32
+
~message:"img element requires alt attribute for accessibility"
33
+
~code:"missing-required-attribute" ~element:"img" ~attribute:"alt" ()
34
+
35
+
let check_area_element attrs collector =
36
+
(* area with href requires alt *)
37
+
if has_attr "href" attrs && not (has_attr "alt" attrs) then
38
+
Message_collector.add_error collector
39
+
~message:"area element with href requires alt attribute" ~code:"missing-required-attribute"
40
+
~element:"area" ~attribute:"alt" ()
41
+
42
+
let check_input_element attrs collector =
43
+
match get_attr "type" attrs with
44
+
| Some "image" ->
45
+
(* input[type=image] requires alt *)
46
+
if not (has_attr "alt" attrs) then
47
+
Message_collector.add_error collector
48
+
~message:"input element with type=\"image\" requires alt attribute"
49
+
~code:"missing-required-attribute" ~element:"input" ~attribute:"alt" ()
50
+
| Some "hidden" ->
51
+
(* input[type=hidden] should not have required attribute *)
52
+
if has_attr "required" attrs then
53
+
Message_collector.add_error collector
54
+
~message:"input element with type=\"hidden\" cannot have required attribute"
55
+
~code:"invalid-attribute-combination" ~element:"input" ~attribute:"required" ()
56
+
| Some "file" ->
57
+
(* input[type=file] should not have value attribute *)
58
+
if has_attr "value" attrs then
59
+
Message_collector.add_warning collector
60
+
~message:"input element with type=\"file\" should not have value attribute"
61
+
~code:"invalid-attribute-combination" ~element:"input" ~attribute:"value" ()
62
+
| _ -> ()
63
+
64
+
let check_script_element attrs _collector =
65
+
(* script requires src OR text content *)
66
+
if not (has_attr "src" attrs) then
67
+
(* We can't check for text content here; that would need to be done
68
+
in end_element or with state tracking *)
69
+
()
70
+
71
+
let check_meta_element attrs collector =
72
+
(* meta requires charset OR (name AND content) OR (http-equiv AND content) *)
73
+
let has_charset = has_attr "charset" attrs in
74
+
let has_name = has_attr "name" attrs in
75
+
let has_content = has_attr "content" attrs in
76
+
let has_http_equiv = has_attr "http-equiv" attrs in
77
+
78
+
let valid =
79
+
has_charset
80
+
|| (has_name && has_content)
81
+
|| (has_http_equiv && has_content)
82
+
in
83
+
84
+
if not valid then
85
+
Message_collector.add_error collector
86
+
~message:
87
+
"meta element requires either charset, or name+content, or http-equiv+content"
88
+
~code:"missing-required-attribute" ~element:"meta" ()
89
+
90
+
let check_link_element attrs collector =
91
+
(* link[rel="stylesheet"] requires href *)
92
+
match get_attr "rel" attrs with
93
+
| Some rel when String.equal rel "stylesheet" ->
94
+
if not (has_attr "href" attrs) then
95
+
Message_collector.add_error collector
96
+
~message:"link element with rel=\"stylesheet\" requires href attribute"
97
+
~code:"missing-required-attribute" ~element:"link" ~attribute:"href" ()
98
+
| _ -> ()
99
+
100
+
let check_a_element attrs collector =
101
+
(* a[download] requires href *)
102
+
if has_attr "download" attrs && not (has_attr "href" attrs) then
103
+
Message_collector.add_error collector
104
+
~message:"a element with download attribute requires href attribute"
105
+
~code:"missing-required-attribute" ~element:"a" ~attribute:"href" ()
106
+
107
+
let check_map_element attrs collector =
108
+
(* map requires name *)
109
+
if not (has_attr "name" attrs) then
110
+
Message_collector.add_error collector
111
+
~message:"map element requires name attribute" ~code:"missing-required-attribute"
112
+
~element:"map" ~attribute:"name" ()
113
+
114
+
let start_element state ~name ~namespace:_ ~attrs collector =
115
+
match name with
116
+
| "img" -> check_img_element attrs collector
117
+
| "area" -> check_area_element attrs collector
118
+
| "input" -> check_input_element attrs collector
119
+
| "script" -> check_script_element attrs collector
120
+
| "meta" -> check_meta_element attrs collector
121
+
| "link" -> check_link_element attrs collector
122
+
| "a" -> check_a_element attrs collector
123
+
| "map" -> check_map_element attrs collector
124
+
| "figure" -> state._in_figure <- true
125
+
| _ -> ()
126
+
127
+
let end_element state ~name ~namespace:_ _collector =
128
+
match name with "figure" -> state._in_figure <- false | _ -> ()
129
+
130
+
let characters _state _text _collector = ()
131
+
132
+
let end_document _state _collector = ()
133
+
134
+
let checker = (module struct
135
+
type nonrec state = state
136
+
137
+
let create = create
138
+
let reset = reset
139
+
let start_element = start_element
140
+
let end_element = end_element
141
+
let characters = characters
142
+
let end_document = end_document
143
+
end : Checker.S)
+50
lib/html5_checker/semantic/required_attr_checker.mli
+50
lib/html5_checker/semantic/required_attr_checker.mli
···
1
+
(** Required attribute checker.
2
+
3
+
Validates that elements have their required attributes according to the
4
+
HTML5 specification. This checker ensures that:
5
+
6
+
- Elements have all mandatory attributes
7
+
- Conditional attributes are present when required by context
8
+
- Attributes that must appear together are all present
9
+
10
+
{2 Validation Rules}
11
+
12
+
The checker validates these common required attributes:
13
+
14
+
{b Images and Media}
15
+
- [img] requires [src] attribute
16
+
- [img] requires [alt] attribute (error in most contexts, warning otherwise)
17
+
- [area] with [href] requires [alt] attribute
18
+
- [input\[type=image\]] requires [alt] attribute
19
+
20
+
{b Forms}
21
+
- [input] defaults to [type="text"] if [type] is omitted
22
+
- [map] requires [name] attribute
23
+
24
+
{b Scripts and Styles}
25
+
- [script] requires either [src] attribute OR text content
26
+
- [style] with [scoped] requires appropriate positioning
27
+
28
+
{b Metadata}
29
+
- [meta] requires one of:
30
+
- [charset] attribute, OR
31
+
- [name] and [content] attributes, OR
32
+
- [http-equiv] and [content] attributes
33
+
- [link\[rel="stylesheet"\]] requires [href] attribute
34
+
35
+
{b Links}
36
+
- [a] with [download] attribute requires [href] attribute
37
+
38
+
@see <https://html.spec.whatwg.org/multipage/indices.html#attributes-3>
39
+
WHATWG HTML: Attributes *)
40
+
41
+
include Checker.S
42
+
43
+
val checker : Checker.t
44
+
(** A first-class module instance of this checker.
45
+
46
+
{b Usage:}
47
+
{[
48
+
let checker = Required_attr_checker.checker in
49
+
Checker_registry.register "required-attributes" checker
50
+
]} *)
+404
lib/html5_checker/specialized/aria_checker.ml
+404
lib/html5_checker/specialized/aria_checker.ml
···
1
+
(** ARIA validation checker implementation. *)
2
+
3
+
(** Valid WAI-ARIA 1.2 roles.
4
+
5
+
These are all the valid role values according to the WAI-ARIA 1.2
6
+
specification. Abstract roles are included but should not be used
7
+
in HTML content. *)
8
+
let valid_aria_roles =
9
+
let roles = [
10
+
(* Document structure roles *)
11
+
"article"; "associationlist"; "associationlistitemkey";
12
+
"associationlistitemvalue"; "blockquote"; "caption"; "cell"; "code";
13
+
"definition"; "deletion"; "directory"; "document"; "emphasis"; "feed";
14
+
"figure"; "generic"; "group"; "heading"; "img"; "insertion"; "list";
15
+
"listitem"; "mark"; "math"; "meter"; "none"; "note"; "paragraph";
16
+
"presentation"; "row"; "rowgroup"; "strong"; "subscript"; "suggestion";
17
+
"superscript"; "table"; "term"; "time"; "toolbar"; "tooltip";
18
+
19
+
(* Widget roles *)
20
+
"button"; "checkbox"; "combobox"; "dialog"; "grid"; "gridcell"; "link";
21
+
"listbox"; "menu"; "menubar"; "menuitem"; "menuitemcheckbox";
22
+
"menuitemradio"; "option"; "progressbar"; "radio"; "radiogroup";
23
+
"scrollbar"; "searchbox"; "separator"; "slider"; "spinbutton"; "switch";
24
+
"tab"; "tablist"; "tabpanel"; "textbox"; "tree"; "treegrid"; "treeitem";
25
+
26
+
(* Landmark roles *)
27
+
"banner"; "complementary"; "contentinfo"; "form"; "main"; "navigation";
28
+
"region"; "search";
29
+
30
+
(* Live region roles *)
31
+
"alert"; "log"; "marquee"; "status"; "timer";
32
+
33
+
(* Window roles *)
34
+
"alertdialog";
35
+
36
+
(* Abstract roles - not for use in HTML content *)
37
+
"command"; "comment"; "composite"; "input"; "landmark"; "range";
38
+
"roletype"; "section"; "sectionhead"; "select"; "structure"; "widget";
39
+
"window";
40
+
41
+
(* Additional roles *)
42
+
"application"; "columnheader"; "rowheader";
43
+
] in
44
+
let tbl = Hashtbl.create (List.length roles) in
45
+
List.iter (fun role -> Hashtbl.add tbl role ()) roles;
46
+
tbl
47
+
48
+
(** Roles that cannot have accessible names.
49
+
50
+
These roles must not have aria-label or aria-labelledby attributes. *)
51
+
let roles_which_cannot_be_named =
52
+
let roles = [
53
+
"caption"; "code"; "deletion"; "emphasis"; "generic"; "insertion";
54
+
"paragraph"; "presentation"; "strong"; "subscript"; "superscript"
55
+
] in
56
+
let tbl = Hashtbl.create (List.length roles) in
57
+
List.iter (fun role -> Hashtbl.add tbl role ()) roles;
58
+
tbl
59
+
60
+
(** Map from descendant role to set of required ancestor roles. *)
61
+
let required_role_ancestor_by_descendant : (string, string list) Hashtbl.t =
62
+
Hashtbl.create 32
63
+
64
+
(** Register that a descendant role requires one of the given ancestor roles. *)
65
+
let register_required_ancestor_role parents child =
66
+
Hashtbl.add required_role_ancestor_by_descendant child parents
67
+
68
+
(** Initialize required ancestor role relationships. *)
69
+
let () =
70
+
register_required_ancestor_role ["listbox"] "option";
71
+
register_required_ancestor_role ["menu"; "menubar"] "menuitem";
72
+
register_required_ancestor_role ["menu"; "menubar"] "menuitemcheckbox";
73
+
register_required_ancestor_role ["menu"; "menubar"] "menuitemradio";
74
+
register_required_ancestor_role ["tablist"] "tab";
75
+
register_required_ancestor_role ["tree"; "group"] "treeitem";
76
+
register_required_ancestor_role ["list"; "group"] "listitem";
77
+
register_required_ancestor_role ["row"] "cell";
78
+
register_required_ancestor_role ["row"] "gridcell";
79
+
register_required_ancestor_role ["row"] "columnheader";
80
+
register_required_ancestor_role ["row"] "rowheader";
81
+
register_required_ancestor_role ["grid"; "rowgroup"; "table"; "treegrid"] "row";
82
+
register_required_ancestor_role ["grid"; "table"; "treegrid"] "rowgroup"
83
+
84
+
(** Map from HTML element name to implicit ARIA role. *)
85
+
let elements_with_implicit_role : (string, string) Hashtbl.t =
86
+
let tbl = Hashtbl.create 64 in
87
+
Hashtbl.add tbl "article" "article";
88
+
Hashtbl.add tbl "aside" "complementary";
89
+
Hashtbl.add tbl "body" "document";
90
+
Hashtbl.add tbl "button" "button";
91
+
Hashtbl.add tbl "datalist" "listbox";
92
+
Hashtbl.add tbl "dd" "definition";
93
+
Hashtbl.add tbl "details" "group";
94
+
Hashtbl.add tbl "dialog" "dialog";
95
+
Hashtbl.add tbl "dfn" "term";
96
+
Hashtbl.add tbl "dt" "term";
97
+
Hashtbl.add tbl "fieldset" "group";
98
+
Hashtbl.add tbl "figure" "figure";
99
+
Hashtbl.add tbl "form" "form";
100
+
Hashtbl.add tbl "footer" "contentinfo";
101
+
Hashtbl.add tbl "h1" "heading";
102
+
Hashtbl.add tbl "h2" "heading";
103
+
Hashtbl.add tbl "h3" "heading";
104
+
Hashtbl.add tbl "h4" "heading";
105
+
Hashtbl.add tbl "h5" "heading";
106
+
Hashtbl.add tbl "h6" "heading";
107
+
Hashtbl.add tbl "hr" "separator";
108
+
Hashtbl.add tbl "header" "banner";
109
+
Hashtbl.add tbl "img" "img";
110
+
Hashtbl.add tbl "li" "listitem";
111
+
Hashtbl.add tbl "link" "link";
112
+
Hashtbl.add tbl "main" "main";
113
+
Hashtbl.add tbl "nav" "navigation";
114
+
Hashtbl.add tbl "ol" "list";
115
+
Hashtbl.add tbl "output" "status";
116
+
Hashtbl.add tbl "progress" "progressbar";
117
+
Hashtbl.add tbl "section" "region";
118
+
Hashtbl.add tbl "summary" "button";
119
+
Hashtbl.add tbl "s" "deletion";
120
+
Hashtbl.add tbl "table" "table";
121
+
Hashtbl.add tbl "tbody" "rowgroup";
122
+
Hashtbl.add tbl "textarea" "textbox";
123
+
Hashtbl.add tbl "tfoot" "rowgroup";
124
+
Hashtbl.add tbl "thead" "rowgroup";
125
+
Hashtbl.add tbl "td" "cell";
126
+
Hashtbl.add tbl "tr" "row";
127
+
Hashtbl.add tbl "ul" "list";
128
+
tbl
129
+
130
+
(** Map from HTML element name to multiple possible implicit roles.
131
+
132
+
Some elements like 'th' can have different implicit roles depending
133
+
on context (columnheader or rowheader). *)
134
+
let elements_with_implicit_roles : (string, string array) Hashtbl.t =
135
+
let tbl = Hashtbl.create 4 in
136
+
Hashtbl.add tbl "th" [| "columnheader"; "rowheader" |];
137
+
tbl
138
+
139
+
(** Map from input type to implicit ARIA role. *)
140
+
let input_types_with_implicit_role : (string, string) Hashtbl.t =
141
+
let tbl = Hashtbl.create 8 in
142
+
Hashtbl.add tbl "button" "button";
143
+
Hashtbl.add tbl "checkbox" "checkbox";
144
+
Hashtbl.add tbl "image" "button";
145
+
Hashtbl.add tbl "number" "spinbutton";
146
+
Hashtbl.add tbl "radio" "radio";
147
+
Hashtbl.add tbl "range" "slider";
148
+
Hashtbl.add tbl "reset" "button";
149
+
Hashtbl.add tbl "submit" "button";
150
+
tbl
151
+
152
+
(** Map from ARIA attribute to array of roles for which it is deprecated.
153
+
154
+
These attributes should not be used on elements with these roles. *)
155
+
let aria_deprecated_attributes_by_role : (string, string array) Hashtbl.t =
156
+
let tbl = Hashtbl.create 8 in
157
+
158
+
(* aria-disabled deprecated for many roles *)
159
+
Hashtbl.add tbl "aria-disabled" [|
160
+
"alert"; "alertdialog"; "article"; "associationlist";
161
+
"associationlistitemkey"; "associationlistitemvalue"; "banner";
162
+
"blockquote"; "caption"; "cell"; "code"; "command"; "comment";
163
+
"complementary"; "contentinfo"; "definition"; "deletion";
164
+
"dialog"; "directory"; "document"; "emphasis"; "feed"; "figure";
165
+
"form"; "generic"; "heading"; "img"; "insertion"; "landmark";
166
+
"list"; "listitem"; "log"; "main"; "mark"; "marquee"; "math";
167
+
"meter"; "navigation"; "note"; "paragraph"; "presentation";
168
+
"progressbar"; "range"; "region"; "rowgroup"; "search";
169
+
"section"; "sectionhead"; "status"; "strong"; "structure";
170
+
"subscript"; "suggestion"; "superscript"; "table"; "tabpanel";
171
+
"term"; "time"; "timer"; "tooltip"; "widget"; "window"
172
+
|];
173
+
174
+
(* aria-errormessage deprecated for many roles *)
175
+
Hashtbl.add tbl "aria-errormessage" [|
176
+
"alert"; "alertdialog"; "article"; "associationlist";
177
+
"associationlistitemkey"; "associationlistitemvalue"; "banner";
178
+
"blockquote"; "button"; "caption"; "cell"; "code"; "command";
179
+
"comment"; "complementary"; "composite"; "contentinfo";
180
+
"definition"; "deletion"; "dialog"; "directory"; "document";
181
+
"emphasis"; "feed"; "figure"; "form"; "generic"; "grid"; "group";
182
+
"heading"; "img"; "input"; "insertion"; "landmark"; "link"; "list";
183
+
"listitem"; "log"; "main"; "mark"; "marquee"; "math"; "menu";
184
+
"menubar"; "menuitem"; "menuitemcheckbox"; "menuitemradio";
185
+
"meter"; "navigation"; "note"; "option"; "paragraph";
186
+
"presentation"; "progressbar"; "radio"; "range"; "region"; "row";
187
+
"rowgroup"; "scrollbar"; "search"; "section"; "sectionhead";
188
+
"select"; "separator"; "status"; "strong"; "structure";
189
+
"subscript"; "suggestion"; "superscript"; "tab"; "table";
190
+
"tablist"; "tabpanel"; "term"; "time"; "timer"; "toolbar";
191
+
"tooltip"; "treeitem"; "widget"; "window"
192
+
|];
193
+
194
+
(* aria-haspopup deprecated for many roles *)
195
+
Hashtbl.add tbl "aria-haspopup" [|
196
+
"alert"; "alertdialog"; "article"; "associationlist";
197
+
"associationlistitemkey"; "associationlistitemvalue"; "banner";
198
+
"blockquote"; "caption"; "cell"; "checkbox"; "code"; "command";
199
+
"comment"; "complementary"; "composite"; "contentinfo";
200
+
"definition"; "deletion"; "dialog"; "directory"; "document";
201
+
"emphasis"; "feed"; "figure"; "form"; "generic"; "grid"; "group";
202
+
"heading"; "img"; "input"; "insertion"; "landmark"; "list";
203
+
"listbox"; "listitem"; "log"; "main"; "mark"; "marquee"; "math";
204
+
"menu"; "menubar"; "meter"; "navigation"; "note"; "option";
205
+
"paragraph"; "presentation"; "progressbar"; "radio"; "radiogroup";
206
+
"range"; "region"; "row"; "rowgroup"; "scrollbar"; "search";
207
+
"section"; "sectionhead"; "select"; "separator"; "spinbutton";
208
+
"status"; "strong"; "structure"; "subscript"; "suggestion";
209
+
"superscript"; "switch"; "table"; "tablist"; "tabpanel"; "term";
210
+
"time"; "timer"; "toolbar"; "tooltip"; "tree"; "treegrid";
211
+
"widget"; "window"
212
+
|];
213
+
214
+
(* aria-invalid deprecated for many roles *)
215
+
Hashtbl.add tbl "aria-invalid" [|
216
+
"alert"; "alertdialog"; "article"; "associationlist";
217
+
"associationlistitemkey"; "associationlistitemvalue"; "banner";
218
+
"blockquote"; "button"; "caption"; "cell"; "code"; "command";
219
+
"comment"; "complementary"; "composite"; "contentinfo";
220
+
"definition"; "deletion"; "dialog"; "directory"; "document";
221
+
"emphasis"; "feed"; "figure"; "form"; "generic"; "grid"; "group";
222
+
"heading"; "img"; "input"; "insertion"; "landmark"; "link"; "list";
223
+
"listitem"; "log"; "main"; "mark"; "marquee"; "math"; "menu";
224
+
"menubar"; "menuitem"; "menuitemcheckbox"; "menuitemradio";
225
+
"meter"; "navigation"; "note"; "option"; "paragraph";
226
+
"presentation"; "progressbar"; "radio"; "range"; "region"; "row";
227
+
"rowgroup"; "scrollbar"; "search"; "section"; "sectionhead";
228
+
"select"; "separator"; "status"; "strong"; "structure";
229
+
"subscript"; "suggestion"; "superscript"; "tab"; "table";
230
+
"tablist"; "tabpanel"; "term"; "time"; "timer"; "toolbar";
231
+
"tooltip"; "treeitem"; "widget"; "window"
232
+
|];
233
+
234
+
(* aria-level deprecated for listitem *)
235
+
Hashtbl.add tbl "aria-level" [| "listitem" |];
236
+
237
+
tbl
238
+
239
+
(** Split a role attribute value into individual roles.
240
+
241
+
The role attribute can contain multiple space-separated role tokens. *)
242
+
let split_roles role_value =
243
+
let trimmed = String.trim role_value in
244
+
if trimmed = "" then []
245
+
else
246
+
String.split_on_char ' ' trimmed
247
+
|> List.filter (fun s -> String.trim s <> "")
248
+
|> List.map String.lowercase_ascii
249
+
250
+
(** Get the implicit role for an HTML element. *)
251
+
let get_implicit_role element_name attrs =
252
+
(* Check for input element with type attribute *)
253
+
if element_name = "input" then begin
254
+
match List.assoc_opt "type" attrs with
255
+
| Some input_type ->
256
+
let input_type = String.lowercase_ascii input_type in
257
+
Hashtbl.find_opt input_types_with_implicit_role input_type
258
+
| None -> Some "textbox" (* default input type is text *)
259
+
end
260
+
else
261
+
Hashtbl.find_opt elements_with_implicit_role element_name
262
+
263
+
(** Get all possible implicit roles for an element (for elements like 'th').
264
+
265
+
Note: This function is not currently used but is provided for completeness. *)
266
+
let _get_implicit_roles element_name =
267
+
match Hashtbl.find_opt elements_with_implicit_roles element_name with
268
+
| Some roles -> Array.to_list roles
269
+
| None ->
270
+
match get_implicit_role element_name [] with
271
+
| Some role -> [role]
272
+
| None -> []
273
+
274
+
(** Stack node representing an element in the ancestor chain. *)
275
+
type stack_node = {
276
+
explicit_roles : string list;
277
+
implicit_role : string option;
278
+
}
279
+
280
+
(** Checker state. *)
281
+
type state = {
282
+
mutable stack : stack_node list;
283
+
}
284
+
285
+
let create () = { stack = [] }
286
+
287
+
let reset state = state.stack <- []
288
+
289
+
(** Check if any ancestor has one of the required roles. *)
290
+
let has_required_ancestor_role state required_roles =
291
+
List.exists (fun ancestor ->
292
+
(* Check explicit roles *)
293
+
List.exists (fun role ->
294
+
List.mem role required_roles
295
+
) ancestor.explicit_roles
296
+
||
297
+
(* Check implicit role *)
298
+
match ancestor.implicit_role with
299
+
| Some implicit_role -> List.mem implicit_role required_roles
300
+
| None -> false
301
+
) state.stack
302
+
303
+
(** Render a list of roles as a human-readable string. *)
304
+
let render_role_set roles =
305
+
match roles with
306
+
| [] -> ""
307
+
| [role] -> "\"" ^ role ^ "\""
308
+
| _ ->
309
+
let quoted = List.map (fun r -> "\"" ^ r ^ "\"") roles in
310
+
String.concat " or " quoted
311
+
312
+
let start_element state ~name ~namespace ~attrs collector =
313
+
(* Only process HTML elements *)
314
+
match namespace with
315
+
| Some _ -> () (* Skip non-HTML elements *)
316
+
| None ->
317
+
let role_attr = List.assoc_opt "role" attrs in
318
+
let aria_label = List.assoc_opt "aria-label" attrs in
319
+
let aria_labelledby = List.assoc_opt "aria-labelledby" attrs in
320
+
let has_accessible_name =
321
+
(match aria_label with Some v -> String.trim v <> "" | None -> false) ||
322
+
(match aria_labelledby with Some v -> String.trim v <> "" | None -> false)
323
+
in
324
+
325
+
(* Parse explicit roles from role attribute *)
326
+
let explicit_roles = match role_attr with
327
+
| Some role_value -> split_roles role_value
328
+
| None -> []
329
+
in
330
+
331
+
(* Get implicit role for this element *)
332
+
let implicit_role = get_implicit_role name attrs in
333
+
334
+
(* Validate explicit roles *)
335
+
List.iter (fun role ->
336
+
(* Check if role is valid *)
337
+
if not (Hashtbl.mem valid_aria_roles role) then
338
+
Message_collector.add_error collector
339
+
~message:(Printf.sprintf "Invalid ARIA role \"%s\"." role) ();
340
+
341
+
(* Check if role cannot be named *)
342
+
if Hashtbl.mem roles_which_cannot_be_named role && has_accessible_name then
343
+
Message_collector.add_error collector
344
+
~message:(Printf.sprintf
345
+
"Elements with role=\"%s\" must not have accessible names (via aria-label or aria-labelledby)."
346
+
role) ();
347
+
348
+
(* Check for required ancestor roles *)
349
+
begin match Hashtbl.find_opt required_role_ancestor_by_descendant role with
350
+
| Some required_ancestors ->
351
+
if not (has_required_ancestor_role state required_ancestors) then
352
+
Message_collector.add_error collector
353
+
~message:(Printf.sprintf
354
+
"An element with \"role=%s\" must be contained in, or owned by, an element with the \"role\" value %s."
355
+
role
356
+
(render_role_set required_ancestors)) ()
357
+
| None -> ()
358
+
end;
359
+
360
+
(* Check for deprecated ARIA attributes for this role *)
361
+
List.iter (fun (attr_name, _attr_value) ->
362
+
if String.starts_with ~prefix:"aria-" attr_name then
363
+
match Hashtbl.find_opt aria_deprecated_attributes_by_role attr_name with
364
+
| Some deprecated_for_roles ->
365
+
(* Check if current role is in the deprecated list *)
366
+
if Array.mem role deprecated_for_roles then
367
+
Message_collector.add_warning collector
368
+
~message:(Printf.sprintf
369
+
"The \"%s\" attribute should not be used on any element which has \"role=%s\"."
370
+
attr_name role) ()
371
+
| None -> ()
372
+
) attrs
373
+
) explicit_roles;
374
+
375
+
(* Push current element onto stack *)
376
+
let node = {
377
+
explicit_roles;
378
+
implicit_role;
379
+
} in
380
+
state.stack <- node :: state.stack
381
+
382
+
let end_element state ~name:_ ~namespace _collector =
383
+
(* Only process HTML elements *)
384
+
match namespace with
385
+
| Some _ -> () (* Skip non-HTML elements *)
386
+
| None ->
387
+
(* Pop from stack *)
388
+
match state.stack with
389
+
| _ :: rest -> state.stack <- rest
390
+
| [] -> () (* Stack underflow - shouldn't happen in well-formed docs *)
391
+
392
+
let characters _state _text _collector = ()
393
+
394
+
let end_document _state _collector = ()
395
+
396
+
let checker = (module struct
397
+
type nonrec state = state
398
+
let create = create
399
+
let reset = reset
400
+
let start_element = start_element
401
+
let end_element = end_element
402
+
let characters = characters
403
+
let end_document = end_document
404
+
end : Checker.S)
+102
lib/html5_checker/specialized/aria_checker.mli
+102
lib/html5_checker/specialized/aria_checker.mli
···
1
+
(** ARIA (Accessible Rich Internet Applications) validation checker.
2
+
3
+
Validates ARIA roles, required ancestor roles, implicit roles, and
4
+
deprecated ARIA attributes according to the WAI-ARIA specification.
5
+
6
+
{2 Validation Rules}
7
+
8
+
The checker enforces the following ARIA validation rules:
9
+
10
+
{3 Valid ARIA Roles}
11
+
12
+
All valid WAI-ARIA 1.2 roles are recognized:
13
+
14
+
- {b Document structure roles}: article, definition, directory, document,
15
+
feed, figure, group, heading, img, list, listitem, math, none, note,
16
+
presentation, region, separator, table, term, toolbar, tooltip
17
+
- {b Widget roles}: button, checkbox, combobox, dialog, grid, gridcell,
18
+
link, listbox, menu, menubar, menuitem, menuitemcheckbox,
19
+
menuitemradio, option, progressbar, radio, radiogroup, scrollbar,
20
+
slider, spinbutton, switch, tab, tablist, tabpanel, textbox, tree,
21
+
treegrid, treeitem
22
+
- {b Landmark roles}: banner, complementary, contentinfo, form, main,
23
+
navigation, search
24
+
- {b Live region roles}: alert, log, marquee, status, timer
25
+
- {b Window roles}: alertdialog
26
+
- {b Abstract roles} (not for use in content): command, composite, input,
27
+
landmark, range, roletype, section, sectionhead, select, structure,
28
+
widget, window
29
+
30
+
{3 Required Ancestor Roles}
31
+
32
+
Certain ARIA roles require specific ancestor roles:
33
+
34
+
- [option] requires [listbox]
35
+
- [menuitem], [menuitemcheckbox], [menuitemradio] require [menu] or
36
+
[menubar]
37
+
- [tab] requires [tablist]
38
+
- [treeitem] requires [tree] or [group]
39
+
- [listitem] requires [list] or [group]
40
+
- [cell], [gridcell], [columnheader], [rowheader] require [row]
41
+
- [row] requires [grid], [rowgroup], [table], or [treegrid]
42
+
- [rowgroup] requires [grid], [table], or [treegrid]
43
+
44
+
{3 Roles That Cannot Be Named}
45
+
46
+
These roles must not have accessible names (via aria-label or
47
+
aria-labelledby):
48
+
49
+
- caption, code, deletion, emphasis, generic, insertion, paragraph,
50
+
presentation, strong, subscript, superscript
51
+
52
+
{3 Implicit ARIA Roles}
53
+
54
+
HTML elements have implicit ARIA roles:
55
+
56
+
- [<article>] has implicit role [article]
57
+
- [<aside>] has implicit role [complementary]
58
+
- [<button>] has implicit role [button]
59
+
- [<dialog>] has implicit role [dialog]
60
+
- [<footer>] has implicit role [contentinfo]
61
+
- [<header>] has implicit role [banner]
62
+
- [<main>] has implicit role [main]
63
+
- [<nav>] has implicit role [navigation]
64
+
- And many more...
65
+
66
+
{3 Deprecated ARIA Attributes}
67
+
68
+
Certain ARIA attributes are deprecated for specific roles:
69
+
70
+
- [aria-disabled] is deprecated for: alert, article, banner, cell,
71
+
document, feed, figure, heading, img, list, listitem, main, navigation,
72
+
region, and many others
73
+
- [aria-errormessage] is deprecated for: alert, article, banner, button,
74
+
cell, document, link, and many others
75
+
- [aria-haspopup] is deprecated for: alert, article, checkbox, listbox,
76
+
and many others
77
+
- [aria-invalid] is deprecated for: alert, article, button, cell,
78
+
document, link, and many others
79
+
- [aria-level] is deprecated for: listitem
80
+
81
+
{2 Implementation Details}
82
+
83
+
The checker maintains a stack of ancestor elements with their roles
84
+
(explicit and implicit) to validate required ancestor relationships.
85
+
When an element with a role attribute is encountered, the checker:
86
+
87
+
1. Parses the role attribute value (space-separated tokens)
88
+
2. Validates each role against the list of valid roles
89
+
3. Checks if the role requires an ancestor role
90
+
4. Verifies the required ancestor is present in the ancestor stack
91
+
5. Checks for deprecated ARIA attributes on elements with specific roles
92
+
6. Validates that roles which cannot be named do not have aria-label or
93
+
aria-labelledby attributes
94
+
95
+
@see <https://www.w3.org/TR/wai-aria-1.2/>
96
+
WAI-ARIA 1.2 specification
97
+
*)
98
+
99
+
include Checker.S
100
+
101
+
val checker : Checker.t
102
+
(** [checker] is a checker instance for validating ARIA roles and attributes. *)
+157
lib/html5_checker/specialized/heading_checker.ml
+157
lib/html5_checker/specialized/heading_checker.ml
···
1
+
(** Heading structure validation checker.
2
+
3
+
This checker validates that:
4
+
- Heading levels don't skip (e.g., h1 to h3)
5
+
- Documents have at least one heading
6
+
- Multiple h1 usage is noted
7
+
- Headings are not empty *)
8
+
9
+
(** Checker state tracking heading structure. *)
10
+
type state = {
11
+
mutable current_level : int option;
12
+
mutable h1_count : int;
13
+
mutable has_any_heading : bool;
14
+
mutable first_heading_checked : bool;
15
+
mutable in_heading : string option;
16
+
mutable heading_has_text : bool;
17
+
}
18
+
19
+
let create () =
20
+
{
21
+
current_level = None;
22
+
h1_count = 0;
23
+
has_any_heading = false;
24
+
first_heading_checked = false;
25
+
in_heading = None;
26
+
heading_has_text = false;
27
+
}
28
+
29
+
let reset state =
30
+
state.current_level <- None;
31
+
state.h1_count <- 0;
32
+
state.has_any_heading <- false;
33
+
state.first_heading_checked <- false;
34
+
state.in_heading <- None;
35
+
state.heading_has_text <- false
36
+
37
+
(** Extract heading level from tag name (e.g., "h1" -> 1). *)
38
+
let heading_level name =
39
+
match String.lowercase_ascii name with
40
+
| "h1" -> Some 1
41
+
| "h2" -> Some 2
42
+
| "h3" -> Some 3
43
+
| "h4" -> Some 4
44
+
| "h5" -> Some 5
45
+
| "h6" -> Some 6
46
+
| _ -> None
47
+
48
+
(** Check if text is effectively empty (only whitespace). *)
49
+
let is_empty_text text =
50
+
let rec check i =
51
+
if i >= String.length text then
52
+
true
53
+
else
54
+
match text.[i] with
55
+
| ' ' | '\t' | '\n' | '\r' -> check (i + 1)
56
+
| _ -> false
57
+
in
58
+
check 0
59
+
60
+
let start_element state ~name ~namespace:_ ~attrs:_ collector =
61
+
match heading_level name with
62
+
| Some level ->
63
+
state.has_any_heading <- true;
64
+
65
+
(* Check if this is the first heading *)
66
+
if not state.first_heading_checked then begin
67
+
state.first_heading_checked <- true;
68
+
if level <> 1 then
69
+
Message_collector.add_warning collector
70
+
~message:(Printf.sprintf
71
+
"First heading in document is <%s>, should typically be <h1>"
72
+
name)
73
+
~code:"first-heading-not-h1"
74
+
~element:name
75
+
()
76
+
end;
77
+
78
+
(* Track h1 count *)
79
+
if level = 1 then begin
80
+
state.h1_count <- state.h1_count + 1;
81
+
if state.h1_count > 1 then
82
+
Message_collector.add_warning collector
83
+
~message:"Multiple <h1> elements detected. While valid in HTML5 sectioning content, traditional advice suggests one <h1> per page"
84
+
~code:"multiple-h1"
85
+
~element:name
86
+
()
87
+
end;
88
+
89
+
(* Check for skipped levels *)
90
+
begin match state.current_level with
91
+
| None ->
92
+
state.current_level <- Some level
93
+
| Some prev_level ->
94
+
let diff = level - prev_level in
95
+
if diff > 1 then
96
+
Message_collector.add_warning collector
97
+
~message:(Printf.sprintf
98
+
"Heading level skipped: <%s> follows <h%d>, skipping %d level%s. This can confuse screen reader users"
99
+
name prev_level (diff - 1) (if diff > 2 then "s" else ""))
100
+
~code:"heading-level-skipped"
101
+
~element:name
102
+
();
103
+
state.current_level <- Some level
104
+
end;
105
+
106
+
(* Track that we're in a heading to check for empty content *)
107
+
state.in_heading <- Some name;
108
+
state.heading_has_text <- false
109
+
110
+
| None ->
111
+
(* Not a heading element *)
112
+
()
113
+
114
+
let end_element state ~name ~namespace:_ collector =
115
+
match state.in_heading with
116
+
| Some heading when heading = name ->
117
+
(* Exiting the heading we're tracking *)
118
+
if not state.heading_has_text then
119
+
Message_collector.add_error collector
120
+
~message:(Printf.sprintf
121
+
"Heading <%s> is empty or contains only whitespace. Empty headings are problematic for screen readers"
122
+
name)
123
+
~code:"empty-heading"
124
+
~element:name
125
+
();
126
+
state.in_heading <- None;
127
+
state.heading_has_text <- false
128
+
| _ ->
129
+
()
130
+
131
+
let characters state text _collector =
132
+
(* If we're inside a heading, check if this text is non-whitespace *)
133
+
match state.in_heading with
134
+
| Some _ ->
135
+
if not (is_empty_text text) then
136
+
state.heading_has_text <- true
137
+
| None ->
138
+
()
139
+
140
+
let end_document state collector =
141
+
(* Check if document has any headings *)
142
+
if not state.has_any_heading then
143
+
Message_collector.add_warning collector
144
+
~message:"Document contains no heading elements (h1-h6). Headings provide important document structure for accessibility"
145
+
~code:"no-headings"
146
+
()
147
+
148
+
let checker = (module struct
149
+
type nonrec state = state
150
+
151
+
let create = create
152
+
let reset = reset
153
+
let start_element = start_element
154
+
let end_element = end_element
155
+
let characters = characters
156
+
let end_document = end_document
157
+
end : Checker.S)
+12
lib/html5_checker/specialized/heading_checker.mli
+12
lib/html5_checker/specialized/heading_checker.mli
···
1
+
(** Heading structure validation checker.
2
+
3
+
Validates:
4
+
- Proper heading level hierarchy (no skipped levels)
5
+
- Document should have at least one heading
6
+
- Multiple h1 usage patterns
7
+
- Headings should not be empty *)
8
+
9
+
include Checker.S
10
+
11
+
val checker : Checker.t
12
+
(** [checker] is a checker instance for validating heading structure. *)
+111
lib/html5_checker/specialized/language_checker.ml
+111
lib/html5_checker/specialized/language_checker.ml
···
1
+
(** Language attribute validation checker.
2
+
3
+
Validates language attributes. *)
4
+
5
+
(** Checker state tracking language attributes. *)
6
+
type state = {
7
+
mutable html_element_seen : bool;
8
+
mutable html_has_lang : bool;
9
+
}
10
+
11
+
let create () =
12
+
{
13
+
html_element_seen = false;
14
+
html_has_lang = false;
15
+
}
16
+
17
+
let reset state =
18
+
state.html_element_seen <- false;
19
+
state.html_has_lang <- false
20
+
21
+
(** Get attribute value from attribute list. *)
22
+
let get_attr attrs name =
23
+
try Some (List.assoc name attrs)
24
+
with Not_found -> None
25
+
26
+
(** Validate language attribute. *)
27
+
let validate_lang_attr value ~location ~element collector =
28
+
match Dt_language.Language_or_empty.validate value with
29
+
| Ok () -> ()
30
+
| Error msg ->
31
+
Message_collector.add_error collector
32
+
~message:(Printf.sprintf "Invalid lang attribute: %s" msg)
33
+
~code:"invalid-lang"
34
+
?location
35
+
~element
36
+
~attribute:"lang"
37
+
()
38
+
39
+
(** Check if lang and xml:lang match. *)
40
+
let check_lang_xmllang_match ~lang ~xmllang ~location ~element collector =
41
+
if lang <> xmllang then
42
+
Message_collector.add_warning collector
43
+
~message:(Printf.sprintf
44
+
"lang attribute '%s' does not match xml:lang attribute '%s'" lang xmllang)
45
+
~code:"lang-xmllang-mismatch"
46
+
?location
47
+
~element
48
+
()
49
+
50
+
(** Process language attributes. *)
51
+
let process_language_attrs state ~element ~namespace ~attrs ~location collector =
52
+
let lang_opt = get_attr attrs "lang" in
53
+
let xmllang_opt = get_attr attrs "xml:lang" in
54
+
55
+
(* Check if this is the html element *)
56
+
if element = "html" && namespace = None then begin
57
+
state.html_element_seen <- true;
58
+
state.html_has_lang <- lang_opt <> None
59
+
end;
60
+
61
+
(* Validate lang attribute *)
62
+
begin match lang_opt with
63
+
| Some lang ->
64
+
validate_lang_attr lang ~location ~element collector
65
+
| None -> ()
66
+
end;
67
+
68
+
(* Validate xml:lang attribute *)
69
+
begin match xmllang_opt with
70
+
| Some xmllang ->
71
+
validate_lang_attr xmllang ~location ~element collector
72
+
| None -> ()
73
+
end;
74
+
75
+
(* Check that lang and xml:lang match if both present *)
76
+
begin match lang_opt, xmllang_opt with
77
+
| Some lang, Some xmllang ->
78
+
check_lang_xmllang_match ~lang ~xmllang ~location ~element collector
79
+
| _ -> ()
80
+
end
81
+
82
+
let start_element state ~name ~namespace ~attrs collector =
83
+
let location = None in
84
+
process_language_attrs state ~element:name ~namespace ~attrs ~location collector
85
+
86
+
let end_element _state ~name:_ ~namespace:_ _collector =
87
+
()
88
+
89
+
let characters _state _text _collector =
90
+
()
91
+
92
+
let end_document state collector =
93
+
(* Warn if html element lacks lang attribute *)
94
+
if state.html_element_seen && not state.html_has_lang then
95
+
Message_collector.add_warning collector
96
+
~message:"The <html> element should have a lang attribute to specify \
97
+
the document's primary language"
98
+
~code:"missing-lang-on-html"
99
+
~element:"html"
100
+
()
101
+
102
+
let checker = (module struct
103
+
type nonrec state = state
104
+
105
+
let create = create
106
+
let reset = reset
107
+
let start_element = start_element
108
+
let end_element = end_element
109
+
let characters = characters
110
+
let end_document = end_document
111
+
end : Checker.S)
+18
lib/html5_checker/specialized/language_checker.mli
+18
lib/html5_checker/specialized/language_checker.mli
···
1
+
(** Language attribute validation checker.
2
+
3
+
Validates:
4
+
- lang attribute values are valid BCP 47 tags
5
+
- xml:lang matches lang when both present
6
+
- Document has a lang attribute on root element
7
+
8
+
This checker ensures proper language markup:
9
+
- lang attribute values are validated using BCP 47 format
10
+
- When both lang and xml:lang are present, they must match
11
+
- Warning if <html> element lacks lang attribute
12
+
- Empty lang="" is valid (indicates unknown language)
13
+
- Primary language subtag should be valid *)
14
+
15
+
include Checker.S
16
+
17
+
val checker : Checker.t
18
+
(** [checker] is a checker instance for validating language attributes. *)
+304
lib/html5_checker/specialized/microdata_checker.ml
+304
lib/html5_checker/specialized/microdata_checker.ml
···
1
+
(** Microdata validation checker.
2
+
3
+
Validates HTML5 microdata attributes. *)
4
+
5
+
(** Information about an itemscope. *)
6
+
type item_scope = {
7
+
element : string;
8
+
location : Message.location option; [@warning "-69"]
9
+
itemtype : string option; [@warning "-69"]
10
+
itemid : string option; [@warning "-69"]
11
+
itemref : string list; [@warning "-69"]
12
+
}
13
+
14
+
(** Information about an itemref reference. *)
15
+
type itemref_reference = {
16
+
referring_element : string;
17
+
referenced_ids : string list;
18
+
location : Message.location option;
19
+
}
20
+
21
+
(** Checker state tracking microdata. *)
22
+
type state = {
23
+
mutable scope_stack : item_scope list;
24
+
mutable itemref_references : itemref_reference list;
25
+
mutable all_ids : (string, unit) Hashtbl.t; [@warning "-69"]
26
+
mutable html_element_seen : bool; [@warning "-69"]
27
+
}
28
+
29
+
let create () =
30
+
{
31
+
scope_stack = [];
32
+
itemref_references = [];
33
+
all_ids = Hashtbl.create 64;
34
+
html_element_seen = false;
35
+
}
36
+
37
+
let reset state =
38
+
state.scope_stack <- [];
39
+
state.itemref_references <- [];
40
+
Hashtbl.clear state.all_ids;
41
+
state.html_element_seen <- false
42
+
43
+
(** Split whitespace-separated values. *)
44
+
let split_whitespace value =
45
+
let rec split acc start i =
46
+
if i >= String.length value then
47
+
if i > start then
48
+
(String.sub value start (i - start)) :: acc
49
+
else
50
+
acc
51
+
else
52
+
match value.[i] with
53
+
| ' ' | '\t' | '\n' | '\r' ->
54
+
let acc' =
55
+
if i > start then
56
+
(String.sub value start (i - start)) :: acc
57
+
else
58
+
acc
59
+
in
60
+
split acc' (i + 1) (i + 1)
61
+
| _ ->
62
+
split acc start (i + 1)
63
+
in
64
+
List.rev (split [] 0 0)
65
+
66
+
(** Check if a string is a valid URL (contains a colon). *)
67
+
let is_url s =
68
+
String.contains s ':'
69
+
70
+
(** Check if itemprop value is valid. *)
71
+
let validate_itemprop_value value =
72
+
if String.length value = 0 then
73
+
Error "itemprop value must not be empty"
74
+
else if not (is_url value) && String.contains value ':' then
75
+
Error (Printf.sprintf
76
+
"itemprop value '%s' contains a colon but is not a URL" value)
77
+
else
78
+
Ok ()
79
+
80
+
(** Check if element is inside an itemscope or referenced by itemref. *)
81
+
let is_property_element state =
82
+
state.scope_stack <> []
83
+
84
+
(** Get attributes from attribute list. *)
85
+
let get_attr attrs name =
86
+
try Some (List.assoc name attrs)
87
+
with Not_found -> None
88
+
89
+
(** Process microdata attributes. *)
90
+
let process_microdata_attrs state ~element ~attrs ~location collector =
91
+
let has_itemscope = List.mem_assoc "itemscope" attrs in
92
+
let itemtype_opt = get_attr attrs "itemtype" in
93
+
let itemid_opt = get_attr attrs "itemid" in
94
+
let itemref_opt = get_attr attrs "itemref" in
95
+
let itemprop_opt = get_attr attrs "itemprop" in
96
+
97
+
(* Check itemid requires itemscope and itemtype *)
98
+
begin match itemid_opt with
99
+
| Some _itemid ->
100
+
if not has_itemscope then
101
+
Message_collector.add_error collector
102
+
~message:"itemid attribute requires itemscope attribute"
103
+
~code:"microdata-itemid-without-itemscope"
104
+
?location
105
+
~element
106
+
~attribute:"itemid"
107
+
();
108
+
if itemtype_opt = None then
109
+
Message_collector.add_error collector
110
+
~message:"itemid attribute requires itemtype attribute"
111
+
~code:"microdata-itemid-without-itemtype"
112
+
?location
113
+
~element
114
+
~attribute:"itemid"
115
+
()
116
+
| None -> ()
117
+
end;
118
+
119
+
(* Check itemref requires itemscope *)
120
+
begin match itemref_opt with
121
+
| Some itemref_value ->
122
+
if not has_itemscope then
123
+
Message_collector.add_error collector
124
+
~message:"itemref attribute requires itemscope attribute"
125
+
~code:"microdata-itemref-without-itemscope"
126
+
?location
127
+
~element
128
+
~attribute:"itemref"
129
+
()
130
+
else begin
131
+
(* Collect itemref references for later validation *)
132
+
let ids = split_whitespace itemref_value in
133
+
state.itemref_references <- {
134
+
referring_element = element;
135
+
referenced_ids = ids;
136
+
location;
137
+
} :: state.itemref_references
138
+
end
139
+
| None -> ()
140
+
end;
141
+
142
+
(* Check itemtype requires itemscope *)
143
+
begin match itemtype_opt with
144
+
| Some _itemtype ->
145
+
if not has_itemscope then
146
+
Message_collector.add_error collector
147
+
~message:"itemtype attribute requires itemscope attribute"
148
+
~code:"microdata-itemtype-without-itemscope"
149
+
?location
150
+
~element
151
+
~attribute:"itemtype"
152
+
()
153
+
| None -> ()
154
+
end;
155
+
156
+
(* Check itemprop value validity *)
157
+
begin match itemprop_opt with
158
+
| Some itemprop_value ->
159
+
let props = split_whitespace itemprop_value in
160
+
List.iter (fun prop ->
161
+
match validate_itemprop_value prop with
162
+
| Ok () -> ()
163
+
| Error msg ->
164
+
Message_collector.add_error collector
165
+
~message:msg
166
+
~code:"microdata-invalid-itemprop"
167
+
?location
168
+
~element
169
+
~attribute:"itemprop"
170
+
()
171
+
) props;
172
+
173
+
(* Check itemprop can only appear on property elements *)
174
+
if not (is_property_element state) then
175
+
Message_collector.add_error collector
176
+
~message:"itemprop attribute can only appear on elements that are \
177
+
properties of an item (descendant of itemscope or referenced by itemref)"
178
+
~code:"microdata-itemprop-outside-scope"
179
+
?location
180
+
~element
181
+
~attribute:"itemprop"
182
+
()
183
+
| None -> ()
184
+
end;
185
+
186
+
(* If this element has itemscope, push it onto the stack *)
187
+
if has_itemscope then begin
188
+
let itemref = match itemref_opt with
189
+
| Some v -> split_whitespace v
190
+
| None -> []
191
+
in
192
+
let scope = {
193
+
element;
194
+
location;
195
+
itemtype = itemtype_opt;
196
+
itemid = itemid_opt;
197
+
itemref;
198
+
} in
199
+
state.scope_stack <- scope :: state.scope_stack
200
+
end
201
+
202
+
(** Track IDs for itemref validation. *)
203
+
let track_id state attrs =
204
+
match get_attr attrs "id" with
205
+
| Some id ->
206
+
if String.length id > 0 then
207
+
Hashtbl.replace state.all_ids id ()
208
+
| None -> ()
209
+
210
+
(** Detect itemref cycles using depth-first search. *)
211
+
let detect_itemref_cycles state collector =
212
+
(* Build adjacency list from itemref references *)
213
+
let graph = Hashtbl.create 32 in
214
+
List.iter (fun ref ->
215
+
Hashtbl.replace graph ref.referring_element ref.referenced_ids
216
+
) state.itemref_references;
217
+
218
+
(* DFS to detect cycles *)
219
+
let rec visit visited stack node =
220
+
if List.mem node stack then
221
+
(* Found a cycle *)
222
+
Some (node :: stack)
223
+
else if List.mem node visited then
224
+
None
225
+
else
226
+
match Hashtbl.find_opt graph node with
227
+
| None -> None
228
+
| Some neighbors ->
229
+
let stack' = node :: stack in
230
+
let rec check_neighbors = function
231
+
| [] -> None
232
+
| neighbor :: rest ->
233
+
match visit visited stack' neighbor with
234
+
| Some cycle -> Some cycle
235
+
| None -> check_neighbors rest
236
+
in
237
+
check_neighbors neighbors
238
+
in
239
+
240
+
(* Check all nodes *)
241
+
let rec check_all_nodes visited nodes =
242
+
match nodes with
243
+
| [] -> ()
244
+
| node :: rest ->
245
+
begin match visit visited [] node with
246
+
| Some cycle ->
247
+
let cycle_str = String.concat " -> " (List.rev cycle) in
248
+
Message_collector.add_error collector
249
+
~message:(Printf.sprintf "itemref cycle detected: %s" cycle_str)
250
+
~code:"microdata-itemref-cycle"
251
+
()
252
+
| None -> ()
253
+
end;
254
+
check_all_nodes (node :: visited) rest
255
+
in
256
+
257
+
let all_nodes = Hashtbl.to_seq_keys graph |> List.of_seq in
258
+
check_all_nodes [] all_nodes
259
+
260
+
let start_element state ~name ~namespace:_ ~attrs collector =
261
+
let location = None in
262
+
track_id state attrs;
263
+
process_microdata_attrs state ~element:name ~attrs ~location collector
264
+
265
+
let end_element state ~name ~namespace:_ _collector =
266
+
(* Pop itemscope from stack if this element had one *)
267
+
match state.scope_stack with
268
+
| scope :: rest when scope.element = name ->
269
+
state.scope_stack <- rest
270
+
| _ -> ()
271
+
272
+
let characters _state _text _collector =
273
+
()
274
+
275
+
let end_document state collector =
276
+
(* Check all itemref references point to existing IDs *)
277
+
List.iter (fun ref ->
278
+
List.iter (fun id ->
279
+
if not (Hashtbl.mem state.all_ids id) then
280
+
Message_collector.add_error collector
281
+
~message:(Printf.sprintf
282
+
"itemref on <%s> refers to ID '%s' which does not exist"
283
+
ref.referring_element id)
284
+
~code:"microdata-itemref-dangling"
285
+
?location:ref.location
286
+
~element:ref.referring_element
287
+
~attribute:"itemref"
288
+
()
289
+
) ref.referenced_ids
290
+
) state.itemref_references;
291
+
292
+
(* Detect itemref cycles *)
293
+
detect_itemref_cycles state collector
294
+
295
+
let checker = (module struct
296
+
type nonrec state = state
297
+
298
+
let create = create
299
+
let reset = reset
300
+
let start_element = start_element
301
+
let end_element = end_element
302
+
let characters = characters
303
+
let end_document = end_document
304
+
end : Checker.S)
+18
lib/html5_checker/specialized/microdata_checker.mli
+18
lib/html5_checker/specialized/microdata_checker.mli
···
1
+
(** Microdata validation checker.
2
+
3
+
Validates HTML5 microdata (itemscope, itemtype, itemprop, itemid, itemref).
4
+
5
+
This checker verifies that microdata attributes are used correctly:
6
+
- itemprop can only appear on elements that are properties of an item
7
+
(descendant of itemscope or referenced by itemref)
8
+
- itemid requires both itemscope and itemtype
9
+
- itemref requires itemscope
10
+
- itemtype requires itemscope
11
+
- itemref values must reference existing IDs
12
+
- Detects itemref cycles (A references B, B references A)
13
+
- itemprop values must be valid property names (no colons unless URL) *)
14
+
15
+
include Checker.S
16
+
17
+
val checker : Checker.t
18
+
(** [checker] is a checker instance for validating microdata. *)
+833
lib/html5_checker/specialized/table_checker.ml
+833
lib/html5_checker/specialized/table_checker.ml
···
1
+
(** Table structure validation checker implementation.
2
+
3
+
This module implements comprehensive table structure validation including
4
+
cell overlap detection, span validation, and structural integrity checks. *)
5
+
6
+
(** HTML namespace constant *)
7
+
let html_ns = "http://www.w3.org/1999/xhtml"
8
+
9
+
(** Maximum allowed colspan value per HTML5 spec *)
10
+
let max_colspan = 1000
11
+
12
+
(** Maximum allowed rowspan value per HTML5 spec *)
13
+
let max_rowspan = 65534
14
+
15
+
(** Special rowspan value meaning "span to end of row group" *)
16
+
let rowspan_zero_magic = max_rowspan
17
+
18
+
(** {1 Cell Representation} *)
19
+
20
+
(** A table cell with positioning information *)
21
+
type cell = {
22
+
mutable left : int;
23
+
(** Column in which this cell starts (zero-indexed) *)
24
+
mutable right : int;
25
+
(** First column into which this cell does not span *)
26
+
mutable bottom : int;
27
+
(** First row onto which this cell does not span (or rowspan_zero_magic) *)
28
+
headers : string list;
29
+
(** IDs referenced by the headers attribute *)
30
+
is_header : bool;
31
+
(** Whether this is a th element *)
32
+
element_name : string;
33
+
(** "td" or "th" *)
34
+
}
35
+
36
+
(** Create a cell from colspan and rowspan values *)
37
+
let make_cell ~colspan ~rowspan ~headers ~is_header collector =
38
+
let colspan =
39
+
if colspan > max_colspan then (
40
+
Message_collector.add_error collector
41
+
~message:
42
+
(Printf.sprintf
43
+
{|The value of the "colspan" attribute must be less than or equal to %d.|}
44
+
max_colspan)
45
+
();
46
+
max_colspan)
47
+
else colspan
48
+
in
49
+
let rowspan =
50
+
if rowspan > max_rowspan then (
51
+
Message_collector.add_error collector
52
+
~message:
53
+
(Printf.sprintf
54
+
{|The value of the "rowspan" attribute must be less than or equal to %d.|}
55
+
max_rowspan)
56
+
();
57
+
max_rowspan)
58
+
else rowspan
59
+
in
60
+
{
61
+
left = 0;
62
+
right = colspan;
63
+
bottom = (if rowspan = 0 then rowspan_zero_magic else rowspan);
64
+
headers;
65
+
is_header;
66
+
element_name = (if is_header then "th" else "td");
67
+
}
68
+
69
+
(** Set the absolute position of a cell *)
70
+
let set_cell_position cell ~row ~col =
71
+
cell.left <- col;
72
+
cell.right <- cell.right + col;
73
+
if cell.bottom <> rowspan_zero_magic then cell.bottom <- cell.bottom + row
74
+
75
+
(** Check if a cell should be removed from the active set *)
76
+
let should_cull_cell cell ~row = row >= cell.bottom
77
+
78
+
(** Check if two cells overlap horizontally *)
79
+
let cells_overlap_horizontally cell1 cell2 =
80
+
not (cell2.right <= cell1.left || cell1.right <= cell2.left)
81
+
82
+
(** Emit error for horizontal cell overlap *)
83
+
let err_on_horizontal_overlap cell1 cell2 collector =
84
+
if cells_overlap_horizontally cell1 cell2 then (
85
+
Message_collector.add_error collector
86
+
~message:"Table cell is overlapped by later table cell." ();
87
+
Message_collector.add_error collector
88
+
~message:"Table cell overlaps an earlier table cell." ())
89
+
90
+
(** Check if cell spans past end of row group *)
91
+
let err_if_not_rowspan_zero cell ~row_group_type collector =
92
+
if cell.bottom <> rowspan_zero_magic then
93
+
let group_desc =
94
+
match row_group_type with
95
+
| None -> "implicit row group"
96
+
| Some t -> Printf.sprintf {|row group established by a "%s" element|} t
97
+
in
98
+
Message_collector.add_error collector
99
+
~message:
100
+
(Printf.sprintf
101
+
"Table cell spans past the end of its %s; clipped to the end of \
102
+
the row group."
103
+
group_desc)
104
+
()
105
+
106
+
(** {1 Column Range Tracking} *)
107
+
108
+
(** A contiguous range of columns without cells *)
109
+
type column_range = {
110
+
element : string;
111
+
(** Element that established this range (col/colgroup/td/th) *)
112
+
mutable left : int;
113
+
(** Leftmost column in range *)
114
+
mutable right : int;
115
+
(** First column to right not in range *)
116
+
mutable next : column_range option;
117
+
(** Next range in linked list *)
118
+
}
119
+
120
+
(** Create a column range *)
121
+
let make_column_range ~element ~left ~right =
122
+
{ element; left; right; next = None }
123
+
124
+
(** Check if column range contains a single column *)
125
+
let is_single_col range = range.left + 1 = range.right
126
+
127
+
(** Test if a column hits a range (-1=left, 0=in, 1=right) *)
128
+
let hits_column range column =
129
+
if column < range.left then -1
130
+
else if column >= range.right then 1
131
+
else 0
132
+
133
+
(** Remove a column from a range, returning the new range(s) *)
134
+
let remove_column range column =
135
+
if is_single_col range then None
136
+
else if column = range.left then (
137
+
range.left <- range.left + 1;
138
+
Some range)
139
+
else if column + 1 = range.right then (
140
+
range.right <- range.right - 1;
141
+
Some range)
142
+
else
143
+
(* Split into two ranges *)
144
+
let created = make_column_range ~element:range.element ~left:(column + 1) ~right:range.right in
145
+
created.next <- range.next;
146
+
range.next <- Some created;
147
+
range.right <- column;
148
+
Some created
149
+
150
+
(** {1 Row Group State} *)
151
+
152
+
(** State for a row group (explicit or implicit) *)
153
+
type row_group = {
154
+
mutable current_row : int;
155
+
(** Current row index within this group *)
156
+
mutable insertion_point : int;
157
+
(** Column position for next cell insertion *)
158
+
mutable next_old_cell : int;
159
+
(** Index into cells_on_current_row *)
160
+
mutable row_had_cells : bool;
161
+
(** Whether current row has any cells *)
162
+
cells_in_effect : ((int * int), cell) Hashtbl.t;
163
+
(** Cells from previous rows still spanning down, keyed by (bottom, left) *)
164
+
mutable cells_on_current_row : cell array;
165
+
(** Cells from previous rows affecting current row, sorted by left *)
166
+
row_group_type : string option;
167
+
(** Name of row group element (thead/tbody/tfoot) or None for implicit *)
168
+
}
169
+
170
+
(** Create a new row group *)
171
+
let make_row_group ~row_group_type =
172
+
{
173
+
current_row = -1;
174
+
insertion_point = 0;
175
+
next_old_cell = 0;
176
+
row_had_cells = false;
177
+
cells_in_effect = Hashtbl.create 16;
178
+
cells_on_current_row = [||];
179
+
row_group_type;
180
+
}
181
+
182
+
(** Start a new row in the row group *)
183
+
let start_row_in_group group =
184
+
group.current_row <- group.current_row + 1;
185
+
group.insertion_point <- 0;
186
+
group.next_old_cell <- 0;
187
+
group.row_had_cells <- false;
188
+
(* Collect cells still in effect and sort by left column *)
189
+
let active_cells : cell list =
190
+
Hashtbl.fold
191
+
(fun _ (cell : cell) acc -> if not (should_cull_cell cell ~row:group.current_row) then cell :: acc else acc)
192
+
group.cells_in_effect []
193
+
in
194
+
let sorted = List.sort (fun (c1 : cell) (c2 : cell) -> Int.compare c1.left c2.left) active_cells in
195
+
group.cells_on_current_row <- Array.of_list sorted
196
+
197
+
(** Find the next available insertion point *)
198
+
let rec find_insertion_point group =
199
+
if group.next_old_cell < Array.length group.cells_on_current_row then
200
+
let other = group.cells_on_current_row.(group.next_old_cell) in
201
+
if group.insertion_point < other.left then ()
202
+
else (
203
+
let right = other.right in
204
+
if right > group.insertion_point then group.insertion_point <- right;
205
+
group.next_old_cell <- group.next_old_cell + 1;
206
+
find_insertion_point group)
207
+
208
+
(** Add a cell to the row group *)
209
+
let add_cell_to_group group cell collector =
210
+
group.row_had_cells <- true;
211
+
find_insertion_point group;
212
+
set_cell_position cell ~row:group.current_row ~col:group.insertion_point;
213
+
214
+
(* Check for overlaps with cells from previous rows *)
215
+
for i = group.next_old_cell to Array.length group.cells_on_current_row - 1 do
216
+
err_on_horizontal_overlap group.cells_on_current_row.(i) cell collector
217
+
done;
218
+
219
+
(* Add to cells in effect if it spans beyond current row *)
220
+
if cell.bottom > group.current_row + 1 then
221
+
Hashtbl.add group.cells_in_effect (cell.bottom, cell.left) cell;
222
+
223
+
group.insertion_point <- cell.right
224
+
225
+
(** End the current row *)
226
+
let end_row_in_group group collector =
227
+
(if not group.row_had_cells then
228
+
let group_desc =
229
+
match group.row_group_type with
230
+
| None -> "an implicit row group"
231
+
| Some t -> Printf.sprintf {|a row group established by a "%s" element|} t
232
+
in
233
+
Message_collector.add_error collector
234
+
~message:
235
+
(Printf.sprintf {|Row %d of %s has no cells beginning on it.|}
236
+
(group.current_row + 1) group_desc)
237
+
());
238
+
239
+
find_insertion_point group;
240
+
group.cells_on_current_row <- [||];
241
+
242
+
(* Cull cells that don't span to next row *)
243
+
let to_remove = ref [] in
244
+
Hashtbl.iter
245
+
(fun key cell ->
246
+
if should_cull_cell cell ~row:(group.current_row + 1) then to_remove := key :: !to_remove)
247
+
group.cells_in_effect;
248
+
List.iter (Hashtbl.remove group.cells_in_effect) !to_remove;
249
+
250
+
(* Return the final insertion point (row width) *)
251
+
group.insertion_point
252
+
253
+
(** End the row group *)
254
+
let end_row_group group collector =
255
+
Hashtbl.iter
256
+
(fun _ cell -> err_if_not_rowspan_zero cell ~row_group_type:group.row_group_type collector)
257
+
group.cells_in_effect
258
+
259
+
(** {1 Table State} *)
260
+
261
+
(** Parser state within a table *)
262
+
type table_state =
263
+
| InTableAtStart
264
+
| InTableAtPotentialRowGroupStart
265
+
| InColgroup
266
+
| InColInColgroup
267
+
| InColInImplicitGroup
268
+
| InRowGroup
269
+
| InRowInRowGroup
270
+
| InCellInRowGroup
271
+
| InRowInImplicitRowGroup
272
+
| InImplicitRowGroup
273
+
| InCellInImplicitRowGroup
274
+
| InTableColsSeen
275
+
276
+
(** State for a single table *)
277
+
type table = {
278
+
mutable state : table_state;
279
+
mutable suppressed_starts : int;
280
+
(** Count of nested suppressed elements *)
281
+
mutable hard_width : bool;
282
+
(** Whether column count was set by col/colgroup *)
283
+
mutable column_count : int;
284
+
(** Established column count (-1 if not set) *)
285
+
mutable real_column_count : int;
286
+
(** Actual maximum column count seen *)
287
+
mutable pending_colgroup_span : int;
288
+
(** Span for colgroup without col children *)
289
+
header_ids : (string, unit) Hashtbl.t;
290
+
(** IDs of th elements *)
291
+
cells_with_headers : cell list ref;
292
+
(** Cells with headers attribute *)
293
+
mutable current_row_group : row_group option;
294
+
(** Current row group *)
295
+
mutable first_col_range : column_range option;
296
+
(** Head of column range list *)
297
+
mutable last_col_range : column_range option;
298
+
(** Tail of column range list *)
299
+
mutable current_col_range : column_range option;
300
+
(** Current range being inspected *)
301
+
mutable previous_col_range : column_range option;
302
+
(** Previous range inspected *)
303
+
}
304
+
305
+
(** Create a new table *)
306
+
let make_table () =
307
+
{
308
+
state = InTableAtStart;
309
+
suppressed_starts = 0;
310
+
hard_width = false;
311
+
column_count = -1;
312
+
real_column_count = 0;
313
+
pending_colgroup_span = 0;
314
+
header_ids = Hashtbl.create 16;
315
+
cells_with_headers = ref [];
316
+
current_row_group = None;
317
+
first_col_range = None;
318
+
last_col_range = None;
319
+
current_col_range = None;
320
+
previous_col_range = None;
321
+
}
322
+
323
+
(** Append a column range to the list *)
324
+
let append_column_range table range =
325
+
match table.last_col_range with
326
+
| None ->
327
+
table.first_col_range <- Some range;
328
+
table.last_col_range <- Some range
329
+
| Some last ->
330
+
last.next <- Some range;
331
+
table.last_col_range <- Some range
332
+
333
+
(** Report a cell back to table for column tracking *)
334
+
let report_cell_to_table table (cell : cell) =
335
+
let left = cell.left in
336
+
let right = cell.right in
337
+
338
+
(* Check if cell extends past known columns *)
339
+
if right > table.real_column_count then (
340
+
if left = table.real_column_count then (
341
+
(* Entirely past existing columns *)
342
+
if left + 1 <> right then
343
+
append_column_range table
344
+
(make_column_range ~element:cell.element_name ~left:(left + 1) ~right);
345
+
table.real_column_count <- right)
346
+
else (
347
+
(* Partially past existing columns *)
348
+
append_column_range table
349
+
(make_column_range ~element:cell.element_name ~left:table.real_column_count ~right);
350
+
table.real_column_count <- right));
351
+
352
+
(* Track column usage *)
353
+
let rec process_ranges () =
354
+
match table.current_col_range with
355
+
| None -> ()
356
+
| Some range ->
357
+
let hit = hits_column range left in
358
+
if hit = 0 then (
359
+
(* Column hits this range - remove it *)
360
+
match remove_column range left with
361
+
| None ->
362
+
(* Range destroyed *)
363
+
if Option.is_some table.previous_col_range then
364
+
(Option.get table.previous_col_range).next <- range.next;
365
+
if table.first_col_range = Some range then table.first_col_range <- range.next;
366
+
if table.last_col_range = Some range then table.last_col_range <- table.previous_col_range;
367
+
table.current_col_range <- range.next
368
+
| Some new_range ->
369
+
if table.last_col_range = Some range then table.last_col_range <- Some new_range;
370
+
table.current_col_range <- Some new_range)
371
+
else if hit = -1 then
372
+
()
373
+
else (
374
+
(* hit = 1, try next range *)
375
+
table.previous_col_range <- Some range;
376
+
table.current_col_range <- range.next;
377
+
process_ranges ())
378
+
in
379
+
process_ranges ()
380
+
381
+
(** {1 Attribute Parsing} *)
382
+
383
+
(** Parse a non-negative integer attribute, returning 1 if absent or invalid *)
384
+
let parse_non_negative_int attrs name =
385
+
match List.assoc_opt name attrs with
386
+
| None -> 1
387
+
| Some v -> (
388
+
try
389
+
let n = int_of_string v in
390
+
if n >= 0 then n else 1
391
+
with Failure _ -> 1)
392
+
393
+
(** Parse a positive integer attribute, returning 1 if absent or invalid *)
394
+
let parse_positive_int attrs name =
395
+
match List.assoc_opt name attrs with
396
+
| None -> 1
397
+
| Some v -> (
398
+
try
399
+
let n = int_of_string v in
400
+
if n > 0 then n else 1
401
+
with Failure _ -> 1)
402
+
403
+
(** Parse the headers attribute into a list of IDs *)
404
+
let parse_headers attrs =
405
+
match List.assoc_opt "headers" attrs with
406
+
| None -> []
407
+
| Some v ->
408
+
let parts = String.split_on_char ' ' v in
409
+
List.filter (fun s -> String.length s > 0) parts
410
+
411
+
(** Parse span attribute, clamping to max_colspan *)
412
+
let parse_span attrs collector =
413
+
let span = parse_non_negative_int attrs "span" in
414
+
if span > max_colspan then (
415
+
Message_collector.add_error collector
416
+
~message:
417
+
(Printf.sprintf {|The value of the "span" attribute must be less than or equal to %d.|}
418
+
max_colspan)
419
+
();
420
+
max_colspan)
421
+
else span
422
+
423
+
(** {1 Table Event Handlers} *)
424
+
425
+
(** Check if we should suppress the start event *)
426
+
let need_suppress_start table =
427
+
if table.suppressed_starts > 0 then (
428
+
table.suppressed_starts <- table.suppressed_starts + 1;
429
+
true)
430
+
else false
431
+
432
+
(** Check if we should suppress the end event *)
433
+
let need_suppress_end table =
434
+
if table.suppressed_starts > 0 then (
435
+
table.suppressed_starts <- table.suppressed_starts - 1;
436
+
true)
437
+
else false
438
+
439
+
(** Start a row group *)
440
+
let start_row_group table local_name collector =
441
+
if need_suppress_start table then ()
442
+
else
443
+
match table.state with
444
+
| InImplicitRowGroup -> (
445
+
match table.current_row_group with
446
+
| Some group ->
447
+
end_row_group group collector;
448
+
table.current_row_group <- Some (make_row_group ~row_group_type:(Some local_name));
449
+
table.state <- InRowGroup
450
+
| None -> failwith "Bug: InImplicitRowGroup but no current row group")
451
+
| InTableAtStart | InTableColsSeen | InTableAtPotentialRowGroupStart ->
452
+
table.current_row_group <- Some (make_row_group ~row_group_type:(Some local_name));
453
+
table.state <- InRowGroup
454
+
| _ -> table.suppressed_starts <- 1
455
+
456
+
(** End a row group *)
457
+
let end_row_group_handler table collector =
458
+
if need_suppress_end table then ()
459
+
else
460
+
match table.state with
461
+
| InRowGroup -> (
462
+
match table.current_row_group with
463
+
| Some group ->
464
+
end_row_group group collector;
465
+
table.current_row_group <- None;
466
+
table.state <- InTableAtPotentialRowGroupStart
467
+
| None -> failwith "Bug: InRowGroup but no current row group")
468
+
| _ -> failwith "Bug: end_row_group in wrong state"
469
+
470
+
(** Start a row *)
471
+
let start_row table collector =
472
+
if need_suppress_start table then ()
473
+
else
474
+
match table.state with
475
+
| InTableAtStart | InTableColsSeen | InTableAtPotentialRowGroupStart ->
476
+
table.current_row_group <- Some (make_row_group ~row_group_type:None);
477
+
table.state <- InRowInImplicitRowGroup;
478
+
table.current_col_range <- table.first_col_range;
479
+
table.previous_col_range <- None;
480
+
(match table.current_row_group with
481
+
| Some group -> start_row_in_group group
482
+
| None -> failwith "Bug: just created row group")
483
+
| InImplicitRowGroup ->
484
+
table.state <- InRowInImplicitRowGroup;
485
+
table.current_col_range <- table.first_col_range;
486
+
table.previous_col_range <- None;
487
+
(match table.current_row_group with
488
+
| Some group -> start_row_in_group group
489
+
| None -> failwith "Bug: InImplicitRowGroup but no row group")
490
+
| InRowGroup ->
491
+
table.state <- InRowInRowGroup;
492
+
table.current_col_range <- table.first_col_range;
493
+
table.previous_col_range <- None;
494
+
(match table.current_row_group with
495
+
| Some group -> start_row_in_group group
496
+
| None -> failwith "Bug: InRowGroup but no row group")
497
+
| _ -> table.suppressed_starts <- 1
498
+
499
+
(** End a row *)
500
+
let end_row table collector =
501
+
if need_suppress_end table then ()
502
+
else
503
+
match table.state with
504
+
| InRowInRowGroup ->
505
+
table.state <- InRowGroup;
506
+
(match table.current_row_group with
507
+
| Some group ->
508
+
let row_width = end_row_in_group group collector in
509
+
(* Check row width against column count *)
510
+
if table.hard_width then (
511
+
if row_width > table.column_count then
512
+
Message_collector.add_error collector
513
+
~message:
514
+
(Printf.sprintf
515
+
{|A table row was %d columns wide and exceeded the column count established using column markup (%d).|}
516
+
row_width table.column_count)
517
+
()
518
+
else if row_width < table.column_count then
519
+
Message_collector.add_error collector
520
+
~message:
521
+
(Printf.sprintf
522
+
{|A table row was %d columns wide, which is less than the column count established using column markup (%d).|}
523
+
row_width table.column_count)
524
+
())
525
+
else if table.column_count = -1 then
526
+
table.column_count <- row_width
527
+
else (
528
+
if row_width > table.column_count then
529
+
Message_collector.add_warning collector
530
+
~message:
531
+
(Printf.sprintf
532
+
{|A table row was %d columns wide and exceeded the column count established by the first row (%d).|}
533
+
row_width table.column_count)
534
+
()
535
+
else if row_width < table.column_count then
536
+
Message_collector.add_warning collector
537
+
~message:
538
+
(Printf.sprintf
539
+
{|A table row was %d columns wide, which is less than the column count established by the first row (%d).|}
540
+
row_width table.column_count)
541
+
())
542
+
| None -> failwith "Bug: InRowInRowGroup but no row group")
543
+
| InRowInImplicitRowGroup ->
544
+
table.state <- InImplicitRowGroup;
545
+
(match table.current_row_group with
546
+
| Some group ->
547
+
let row_width = end_row_in_group group collector in
548
+
(* Same column count checking as above *)
549
+
if table.hard_width then (
550
+
if row_width > table.column_count then
551
+
Message_collector.add_error collector
552
+
~message:
553
+
(Printf.sprintf
554
+
{|A table row was %d columns wide and exceeded the column count established using column markup (%d).|}
555
+
row_width table.column_count)
556
+
()
557
+
else if row_width < table.column_count then
558
+
Message_collector.add_error collector
559
+
~message:
560
+
(Printf.sprintf
561
+
{|A table row was %d columns wide, which is less than the column count established using column markup (%d).|}
562
+
row_width table.column_count)
563
+
())
564
+
else if table.column_count = -1 then
565
+
table.column_count <- row_width
566
+
else (
567
+
if row_width > table.column_count then
568
+
Message_collector.add_warning collector
569
+
~message:
570
+
(Printf.sprintf
571
+
{|A table row was %d columns wide and exceeded the column count established by the first row (%d).|}
572
+
row_width table.column_count)
573
+
()
574
+
else if row_width < table.column_count then
575
+
Message_collector.add_warning collector
576
+
~message:
577
+
(Printf.sprintf
578
+
{|A table row was %d columns wide, which is less than the column count established by the first row (%d).|}
579
+
row_width table.column_count)
580
+
())
581
+
| None -> failwith "Bug: InRowInImplicitRowGroup but no row group")
582
+
| _ -> failwith "Bug: end_row in wrong state"
583
+
584
+
(** Start a cell *)
585
+
let start_cell table is_header attrs collector =
586
+
if need_suppress_start table then ()
587
+
else
588
+
match table.state with
589
+
| InRowInRowGroup ->
590
+
table.state <- InCellInRowGroup;
591
+
(* Record header ID if present *)
592
+
if is_header then (
593
+
match List.assoc_opt "id" attrs with
594
+
| Some id when String.length id > 0 -> Hashtbl.replace table.header_ids id ()
595
+
| _ -> ());
596
+
(* Parse cell attributes *)
597
+
let colspan = abs (parse_positive_int attrs "colspan") in
598
+
let rowspan = abs (parse_non_negative_int attrs "rowspan") in
599
+
let headers = parse_headers attrs in
600
+
let cell = make_cell ~colspan ~rowspan ~headers ~is_header collector in
601
+
if List.length headers > 0 then table.cells_with_headers := cell :: !(table.cells_with_headers);
602
+
(match table.current_row_group with
603
+
| Some group ->
604
+
add_cell_to_group group cell collector;
605
+
report_cell_to_table table cell
606
+
| None -> failwith "Bug: InRowInRowGroup but no row group")
607
+
| InRowInImplicitRowGroup ->
608
+
table.state <- InCellInImplicitRowGroup;
609
+
(* Same logic as above *)
610
+
if is_header then (
611
+
match List.assoc_opt "id" attrs with
612
+
| Some id when String.length id > 0 -> Hashtbl.replace table.header_ids id ()
613
+
| _ -> ());
614
+
let colspan = abs (parse_positive_int attrs "colspan") in
615
+
let rowspan = abs (parse_non_negative_int attrs "rowspan") in
616
+
let headers = parse_headers attrs in
617
+
let cell = make_cell ~colspan ~rowspan ~headers ~is_header collector in
618
+
if List.length headers > 0 then table.cells_with_headers := cell :: !(table.cells_with_headers);
619
+
(match table.current_row_group with
620
+
| Some group ->
621
+
add_cell_to_group group cell collector;
622
+
report_cell_to_table table cell
623
+
| None -> failwith "Bug: InRowInImplicitRowGroup but no row group")
624
+
| _ -> table.suppressed_starts <- 1
625
+
626
+
(** End a cell *)
627
+
let end_cell table =
628
+
if need_suppress_end table then ()
629
+
else
630
+
match table.state with
631
+
| InCellInRowGroup -> table.state <- InRowInRowGroup
632
+
| InCellInImplicitRowGroup -> table.state <- InRowInImplicitRowGroup
633
+
| _ -> failwith "Bug: end_cell in wrong state"
634
+
635
+
(** Start a colgroup *)
636
+
let start_colgroup table attrs collector =
637
+
if need_suppress_start table then ()
638
+
else
639
+
match table.state with
640
+
| InTableAtStart ->
641
+
table.hard_width <- true;
642
+
table.column_count <- 0;
643
+
table.pending_colgroup_span <- parse_span attrs collector;
644
+
table.state <- InColgroup
645
+
| InTableColsSeen ->
646
+
table.pending_colgroup_span <- parse_span attrs collector;
647
+
table.state <- InColgroup
648
+
| _ -> table.suppressed_starts <- 1
649
+
650
+
(** End a colgroup *)
651
+
let end_colgroup table =
652
+
if need_suppress_end table then ()
653
+
else
654
+
match table.state with
655
+
| InColgroup ->
656
+
if table.pending_colgroup_span <> 0 then (
657
+
let right = table.column_count + abs table.pending_colgroup_span in
658
+
let range = make_column_range ~element:"colgroup" ~left:table.column_count ~right in
659
+
append_column_range table range;
660
+
table.column_count <- right);
661
+
table.real_column_count <- table.column_count;
662
+
table.state <- InTableColsSeen
663
+
| _ -> failwith "Bug: end_colgroup in wrong state"
664
+
665
+
(** Start a col *)
666
+
let start_col table attrs collector =
667
+
if need_suppress_start table then ()
668
+
else
669
+
match table.state with
670
+
| InTableAtStart ->
671
+
table.hard_width <- true;
672
+
table.column_count <- 0;
673
+
table.state <- InColInImplicitGroup;
674
+
let span = abs (parse_span attrs collector) in
675
+
let right = table.column_count + span in
676
+
let range = make_column_range ~element:"col" ~left:table.column_count ~right in
677
+
append_column_range table range;
678
+
table.column_count <- right;
679
+
table.real_column_count <- table.column_count
680
+
| InTableColsSeen ->
681
+
table.state <- InColInImplicitGroup;
682
+
let span = abs (parse_span attrs collector) in
683
+
let right = table.column_count + span in
684
+
let range = make_column_range ~element:"col" ~left:table.column_count ~right in
685
+
append_column_range table range;
686
+
table.column_count <- right;
687
+
table.real_column_count <- table.column_count
688
+
| InColgroup ->
689
+
if table.pending_colgroup_span > 0 then
690
+
Message_collector.add_warning collector
691
+
~message:
692
+
(Printf.sprintf
693
+
"A col element causes a span attribute with value %d to be ignored on the \
694
+
parent colgroup."
695
+
table.pending_colgroup_span)
696
+
();
697
+
table.pending_colgroup_span <- 0;
698
+
table.state <- InColInColgroup;
699
+
let span = abs (parse_span attrs collector) in
700
+
let right = table.column_count + span in
701
+
let range = make_column_range ~element:"col" ~left:table.column_count ~right in
702
+
append_column_range table range;
703
+
table.column_count <- right;
704
+
table.real_column_count <- table.column_count
705
+
| _ -> table.suppressed_starts <- 1
706
+
707
+
(** End a col *)
708
+
let end_col table =
709
+
if need_suppress_end table then ()
710
+
else
711
+
match table.state with
712
+
| InColInImplicitGroup -> table.state <- InTableColsSeen
713
+
| InColInColgroup -> table.state <- InColgroup
714
+
| _ -> failwith "Bug: end_col in wrong state"
715
+
716
+
(** End a table *)
717
+
let end_table table collector =
718
+
(match table.state with
719
+
| InImplicitRowGroup -> (
720
+
match table.current_row_group with
721
+
| Some group ->
722
+
end_row_group group collector;
723
+
table.current_row_group <- None
724
+
| None -> failwith "Bug: InImplicitRowGroup but no row group")
725
+
| InTableAtStart | InTableAtPotentialRowGroupStart | InTableColsSeen -> ()
726
+
| _ -> failwith "Bug: end_table in wrong state");
727
+
728
+
(* Check header reference integrity *)
729
+
List.iter
730
+
(fun cell ->
731
+
List.iter
732
+
(fun heading ->
733
+
if not (Hashtbl.mem table.header_ids heading) then
734
+
Message_collector.add_error collector
735
+
~message:
736
+
(Printf.sprintf
737
+
{|The "headers" attribute on the element "%s" refers to the ID "%s", but there is no "th" element with that ID in the same table.|}
738
+
cell.element_name heading)
739
+
())
740
+
cell.headers)
741
+
!(table.cells_with_headers);
742
+
743
+
(* Check that each column established by col/colgroup has cells *)
744
+
let rec check_ranges range =
745
+
match range with
746
+
| None -> ()
747
+
| Some r ->
748
+
if is_single_col r then
749
+
Message_collector.add_error collector
750
+
~message:
751
+
(Printf.sprintf {|Table column %d established by element "%s" has no cells beginning in it.|}
752
+
r.right r.element)
753
+
()
754
+
else
755
+
Message_collector.add_error collector
756
+
~message:
757
+
(Printf.sprintf
758
+
{|Table columns in range %d…%d established by element "%s" have no cells beginning in them.|}
759
+
(r.left + 1) r.right r.element)
760
+
();
761
+
check_ranges r.next
762
+
in
763
+
check_ranges table.first_col_range
764
+
765
+
(** {1 Checker State} *)
766
+
767
+
type state = { tables : table list ref (* Stack of nested tables *) }
768
+
769
+
let create () = { tables = ref [] }
770
+
771
+
let reset state = state.tables := []
772
+
773
+
let start_element state ~name ~namespace ~attrs collector =
774
+
match namespace with
775
+
| Some ns when ns = html_ns -> (
776
+
match name with
777
+
| "table" ->
778
+
(* Push a new table onto the stack *)
779
+
state.tables := make_table () :: !(state.tables)
780
+
| _ -> (
781
+
match !(state.tables) with
782
+
| [] -> ()
783
+
| table :: _ -> (
784
+
match name with
785
+
| "td" -> start_cell table false attrs collector
786
+
| "th" -> start_cell table true attrs collector
787
+
| "tr" -> start_row table collector
788
+
| "tbody" | "thead" | "tfoot" -> start_row_group table name collector
789
+
| "col" -> start_col table attrs collector
790
+
| "colgroup" -> start_colgroup table attrs collector
791
+
| _ -> ())))
792
+
| _ -> ()
793
+
794
+
let end_element state ~name ~namespace collector =
795
+
match namespace with
796
+
| Some ns when ns = html_ns -> (
797
+
match name with
798
+
| "table" -> (
799
+
match !(state.tables) with
800
+
| [] -> failwith "Bug: end table but no table on stack"
801
+
| table :: rest ->
802
+
end_table table collector;
803
+
state.tables := rest)
804
+
| _ -> (
805
+
match !(state.tables) with
806
+
| [] -> ()
807
+
| table :: _ -> (
808
+
match name with
809
+
| "td" | "th" -> end_cell table
810
+
| "tr" -> end_row table collector
811
+
| "tbody" | "thead" | "tfoot" -> end_row_group_handler table collector
812
+
| "col" -> end_col table
813
+
| "colgroup" -> end_colgroup table
814
+
| _ -> ())))
815
+
| _ -> ()
816
+
817
+
let characters _state _text _collector = ()
818
+
819
+
let end_document state collector =
820
+
if !(state.tables) <> [] then
821
+
Message_collector.add_error collector ~message:"Unclosed table element at end of document." ()
822
+
823
+
let checker =
824
+
(module struct
825
+
type nonrec state = state
826
+
827
+
let create = create
828
+
let reset = reset
829
+
let start_element = start_element
830
+
let end_element = end_element
831
+
let characters = characters
832
+
let end_document = end_document
833
+
end : Checker.S)
+83
lib/html5_checker/specialized/table_checker.mli
+83
lib/html5_checker/specialized/table_checker.mli
···
1
+
(** Table structure validation checker.
2
+
3
+
Validates HTML table integrity including:
4
+
- Cell overlap detection (rowspan/colspan causing overlap)
5
+
- Spanning past table boundaries
6
+
- Proper table structure (thead/tbody/tfoot ordering)
7
+
- Maximum colspan limit (1000 per HTML spec)
8
+
- Maximum rowspan limit (65534 per HTML spec)
9
+
10
+
{2 Validation Rules}
11
+
12
+
{b Cell Positioning}
13
+
- Detects when two cells claim the same grid position
14
+
- Validates that rowspan/colspan don't cause cells to extend past boundaries
15
+
- Tracks cell positions accounting for rowspan/colspan from previous rows
16
+
17
+
{b Span Limits}
18
+
- [colspan] must be positive and <= 1000
19
+
- [rowspan] must be non-negative and <= 65534
20
+
- [rowspan=0] is a special value meaning "span to end of row group"
21
+
22
+
{b Table Structure}
23
+
- [caption] must be first child of [table] if present
24
+
- [thead] must come before [tbody] and [tfoot]
25
+
- Only one [thead] and one [tfoot] allowed per table
26
+
- [col] elements can establish explicit column count
27
+
- [colgroup] elements can group columns
28
+
29
+
{b Row Validation}
30
+
- Each row must have at least one cell
31
+
- Row widths should match the established column count
32
+
- Cells cannot overlap horizontally in the same row
33
+
34
+
{b Header References}
35
+
- [headers] attribute on [td]/[th] must reference valid [th] IDs
36
+
- All referenced header IDs must exist in the same table
37
+
38
+
{3 Example Valid Table}
39
+
40
+
{v
41
+
<table>
42
+
<thead>
43
+
<tr>
44
+
<th id="h1">Header 1</th>
45
+
<th id="h2">Header 2</th>
46
+
</tr>
47
+
</thead>
48
+
<tbody>
49
+
<tr>
50
+
<td headers="h1">Cell 1</td>
51
+
<td headers="h2">Cell 2</td>
52
+
</tr>
53
+
</tbody>
54
+
</table>
55
+
v}
56
+
57
+
{3 Example Invalid Table (Overlapping Cells)}
58
+
59
+
{v
60
+
<table>
61
+
<tr>
62
+
<td rowspan="2">A</td>
63
+
<td>B</td>
64
+
</tr>
65
+
<tr>
66
+
<td>C</td> <!-- Would overlap with A's rowspan -->
67
+
</tr>
68
+
</table>
69
+
v}
70
+
71
+
@see <https://html.spec.whatwg.org/multipage/tables.html> WHATWG HTML: Tables
72
+
@see <https://www.w3.org/TR/html52/tabular-data.html> W3C HTML 5.2: Tables *)
73
+
74
+
include Checker.S
75
+
76
+
val checker : Checker.t
77
+
(** A first-class module instance of this checker.
78
+
79
+
{b Usage:}
80
+
{[
81
+
let checker = Table_checker.checker in
82
+
Checker_registry.register "table-structure" checker
83
+
]} *)
+2
-1
lib/html5rw/parser/parser_impl.ml
+2
-1
lib/html5rw/parser/parser_impl.ml
···
25
25
module TreeBuilderSink = struct
26
26
type t = Parser_tree_builder.t
27
27
28
-
let process tb token =
28
+
let process tb token ~line ~column =
29
+
Parser_tree_builder.set_position tb ~line ~column;
29
30
Parser_tree_builder.process_token tb token;
30
31
(* Check if we need to switch tokenizer state based on current element *)
31
32
(* Only switch for HTML namespace elements - SVG/MathML use different rules *)
+10
-1
lib/html5rw/parser/parser_tree_builder.ml
+10
-1
lib/html5rw/parser/parser_tree_builder.ml
···
42
42
fragment_context : fragment_context option;
43
43
mutable fragment_context_element : Dom.node option;
44
44
iframe_srcdoc : bool;
45
+
mutable current_line : int;
46
+
mutable current_column : int;
45
47
}
46
48
47
49
let create ?(collect_errors=false) ?fragment_context ?(iframe_srcdoc=false) () =
···
66
68
fragment_context;
67
69
fragment_context_element = None;
68
70
iframe_srcdoc;
71
+
current_line = 1;
72
+
current_column = 1;
69
73
} in
70
74
(* Initialize fragment parsing *)
71
75
(match fragment_context with
···
110
114
| None -> ());
111
115
t
112
116
117
+
(* Position tracking for error reporting *)
118
+
let set_position t ~line ~column =
119
+
t.current_line <- line;
120
+
t.current_column <- column
121
+
113
122
(* Error handling *)
114
123
let parse_error t code =
115
124
if t.collect_errors then
116
-
t.errors <- { code = Parse_error_code.of_string code; line = 0; column = 0 } :: t.errors
125
+
t.errors <- { code = Parse_error_code.of_string code; line = t.current_line; column = t.current_column } :: t.errors
117
126
118
127
(* Stack helpers *)
119
128
let current_node t =
+1
-1
lib/html5rw/tokenizer/tokenizer.mli
+1
-1
lib/html5rw/tokenizer/tokenizer.mli
···
194
194
*)
195
195
module type SINK = sig
196
196
type t
197
-
val process : t -> Tokenizer_token.t -> [ `Continue | `SwitchTo of Tokenizer_state.t ]
197
+
val process : t -> Tokenizer_token.t -> line:int -> column:int -> [ `Continue | `SwitchTo of Tokenizer_state.t ]
198
198
val adjusted_current_node_in_html_namespace : t -> bool
199
199
end
200
200
+41
-38
lib/html5rw/tokenizer/tokenizer_impl.ml
+41
-38
lib/html5rw/tokenizer/tokenizer_impl.ml
···
11
11
(* Token sink interface *)
12
12
module type SINK = sig
13
13
type t
14
-
val process : t -> Tokenizer_token.t -> [ `Continue | `SwitchTo of Tokenizer_state.t ]
14
+
val process : t -> Tokenizer_token.t -> line:int -> column:int -> [ `Continue | `SwitchTo of Tokenizer_state.t ]
15
15
val adjusted_current_node_in_html_namespace : t -> bool
16
16
end
17
17
···
184
184
let data = Buffer.contents t.pending_chars in
185
185
Buffer.clear t.pending_chars;
186
186
let data = if t.xml_mode then transform_xml_chars data else data in
187
-
ignore (S.process t.sink (Tokenizer_token.Character data))
187
+
let line, column = Tokenizer_stream.position t.stream in
188
+
ignore (S.process t.sink (Tokenizer_token.Character data) ~line ~column)
188
189
end
189
190
in
190
191
191
192
let emit token =
192
193
emit_pending_chars ();
193
-
match S.process t.sink token with
194
+
let line, column = Tokenizer_stream.position t.stream in
195
+
match S.process t.sink token ~line ~column with
194
196
| `Continue -> ()
195
197
| `SwitchTo new_state -> t.state <- new_state
196
198
in
···
278
280
handle_eof ()
279
281
end else if Tokenizer_stream.is_eof t.stream then begin
280
282
emit_pending_chars ();
281
-
ignore (S.process t.sink Tokenizer_token.EOF)
283
+
let line, column = Tokenizer_stream.position t.stream in ignore (S.process t.sink Tokenizer_token.EOF ~line ~column)
282
284
end else begin
283
285
step ();
284
286
process_state ()
···
288
290
match t.state with
289
291
| Tokenizer_state.Data ->
290
292
emit_pending_chars ();
291
-
ignore (S.process t.sink Tokenizer_token.EOF)
293
+
let line, column = Tokenizer_stream.position t.stream in ignore (S.process t.sink Tokenizer_token.EOF ~line ~column)
292
294
| Tokenizer_state.Tag_open ->
293
295
error t "eof-before-tag-name";
294
296
emit_char t '<';
295
297
emit_pending_chars ();
296
-
ignore (S.process t.sink Tokenizer_token.EOF)
298
+
let line, column = Tokenizer_stream.position t.stream in ignore (S.process t.sink Tokenizer_token.EOF ~line ~column)
297
299
| Tokenizer_state.End_tag_open ->
298
300
error t "eof-before-tag-name";
299
301
emit_str t "</";
300
302
emit_pending_chars ();
301
-
ignore (S.process t.sink Tokenizer_token.EOF)
303
+
let line, column = Tokenizer_stream.position t.stream in ignore (S.process t.sink Tokenizer_token.EOF ~line ~column)
302
304
| Tokenizer_state.Tag_name
303
305
| Tokenizer_state.Before_attribute_name
304
306
| Tokenizer_state.Attribute_name
···
311
313
| Tokenizer_state.Self_closing_start_tag ->
312
314
error t "eof-in-tag";
313
315
emit_pending_chars ();
314
-
ignore (S.process t.sink Tokenizer_token.EOF)
316
+
let line, column = Tokenizer_stream.position t.stream in ignore (S.process t.sink Tokenizer_token.EOF ~line ~column)
315
317
| Tokenizer_state.Rawtext ->
316
318
emit_pending_chars ();
317
-
ignore (S.process t.sink Tokenizer_token.EOF)
319
+
let line, column = Tokenizer_stream.position t.stream in ignore (S.process t.sink Tokenizer_token.EOF ~line ~column)
318
320
| Tokenizer_state.Rawtext_less_than_sign ->
319
321
emit_char t '<';
320
322
emit_pending_chars ();
321
-
ignore (S.process t.sink Tokenizer_token.EOF)
323
+
let line, column = Tokenizer_stream.position t.stream in ignore (S.process t.sink Tokenizer_token.EOF ~line ~column)
322
324
| Tokenizer_state.Rawtext_end_tag_open ->
323
325
emit_str t "</";
324
326
emit_pending_chars ();
325
-
ignore (S.process t.sink Tokenizer_token.EOF)
327
+
let line, column = Tokenizer_stream.position t.stream in ignore (S.process t.sink Tokenizer_token.EOF ~line ~column)
326
328
| Tokenizer_state.Rawtext_end_tag_name ->
327
329
emit_str t "</";
328
330
emit_str t (Buffer.contents t.temp_buffer);
329
331
emit_pending_chars ();
330
-
ignore (S.process t.sink Tokenizer_token.EOF)
332
+
let line, column = Tokenizer_stream.position t.stream in ignore (S.process t.sink Tokenizer_token.EOF ~line ~column)
331
333
| Tokenizer_state.Rcdata ->
332
334
emit_pending_chars ();
333
-
ignore (S.process t.sink Tokenizer_token.EOF)
335
+
let line, column = Tokenizer_stream.position t.stream in ignore (S.process t.sink Tokenizer_token.EOF ~line ~column)
334
336
| Tokenizer_state.Rcdata_less_than_sign ->
335
337
emit_char t '<';
336
338
emit_pending_chars ();
337
-
ignore (S.process t.sink Tokenizer_token.EOF)
339
+
let line, column = Tokenizer_stream.position t.stream in ignore (S.process t.sink Tokenizer_token.EOF ~line ~column)
338
340
| Tokenizer_state.Rcdata_end_tag_open ->
339
341
emit_str t "</";
340
342
emit_pending_chars ();
341
-
ignore (S.process t.sink Tokenizer_token.EOF)
343
+
let line, column = Tokenizer_stream.position t.stream in ignore (S.process t.sink Tokenizer_token.EOF ~line ~column)
342
344
| Tokenizer_state.Rcdata_end_tag_name ->
343
345
emit_str t "</";
344
346
emit_str t (Buffer.contents t.temp_buffer);
345
347
emit_pending_chars ();
346
-
ignore (S.process t.sink Tokenizer_token.EOF)
348
+
let line, column = Tokenizer_stream.position t.stream in ignore (S.process t.sink Tokenizer_token.EOF ~line ~column)
347
349
| Tokenizer_state.Script_data ->
348
350
emit_pending_chars ();
349
-
ignore (S.process t.sink Tokenizer_token.EOF)
351
+
let line, column = Tokenizer_stream.position t.stream in ignore (S.process t.sink Tokenizer_token.EOF ~line ~column)
350
352
| Tokenizer_state.Script_data_less_than_sign ->
351
353
emit_char t '<';
352
354
emit_pending_chars ();
353
-
ignore (S.process t.sink Tokenizer_token.EOF)
355
+
let line, column = Tokenizer_stream.position t.stream in ignore (S.process t.sink Tokenizer_token.EOF ~line ~column)
354
356
| Tokenizer_state.Script_data_end_tag_open ->
355
357
emit_str t "</";
356
358
emit_pending_chars ();
357
-
ignore (S.process t.sink Tokenizer_token.EOF)
359
+
let line, column = Tokenizer_stream.position t.stream in ignore (S.process t.sink Tokenizer_token.EOF ~line ~column)
358
360
| Tokenizer_state.Script_data_end_tag_name ->
359
361
emit_str t "</";
360
362
emit_str t (Buffer.contents t.temp_buffer);
361
363
emit_pending_chars ();
362
-
ignore (S.process t.sink Tokenizer_token.EOF)
364
+
let line, column = Tokenizer_stream.position t.stream in ignore (S.process t.sink Tokenizer_token.EOF ~line ~column)
363
365
| Tokenizer_state.Script_data_escape_start
364
366
| Tokenizer_state.Script_data_escape_start_dash
365
367
| Tokenizer_state.Script_data_escaped
···
367
369
| Tokenizer_state.Script_data_escaped_dash_dash ->
368
370
error t "eof-in-script-html-comment-like-text";
369
371
emit_pending_chars ();
370
-
ignore (S.process t.sink Tokenizer_token.EOF)
372
+
let line, column = Tokenizer_stream.position t.stream in ignore (S.process t.sink Tokenizer_token.EOF ~line ~column)
371
373
| Tokenizer_state.Script_data_escaped_less_than_sign ->
372
374
emit_char t '<';
373
375
emit_pending_chars ();
374
-
ignore (S.process t.sink Tokenizer_token.EOF)
376
+
let line, column = Tokenizer_stream.position t.stream in ignore (S.process t.sink Tokenizer_token.EOF ~line ~column)
375
377
| Tokenizer_state.Script_data_escaped_end_tag_open ->
376
378
emit_str t "</";
377
379
emit_pending_chars ();
378
-
ignore (S.process t.sink Tokenizer_token.EOF)
380
+
let line, column = Tokenizer_stream.position t.stream in ignore (S.process t.sink Tokenizer_token.EOF ~line ~column)
379
381
| Tokenizer_state.Script_data_escaped_end_tag_name ->
380
382
emit_str t "</";
381
383
emit_str t (Buffer.contents t.temp_buffer);
382
384
emit_pending_chars ();
383
-
ignore (S.process t.sink Tokenizer_token.EOF)
385
+
let line, column = Tokenizer_stream.position t.stream in ignore (S.process t.sink Tokenizer_token.EOF ~line ~column)
384
386
| Tokenizer_state.Script_data_double_escape_start
385
387
| Tokenizer_state.Script_data_double_escaped
386
388
| Tokenizer_state.Script_data_double_escaped_dash
387
389
| Tokenizer_state.Script_data_double_escaped_dash_dash ->
388
390
error t "eof-in-script-html-comment-like-text";
389
391
emit_pending_chars ();
390
-
ignore (S.process t.sink Tokenizer_token.EOF)
392
+
let line, column = Tokenizer_stream.position t.stream in ignore (S.process t.sink Tokenizer_token.EOF ~line ~column)
391
393
| Tokenizer_state.Script_data_double_escaped_less_than_sign ->
392
394
(* '<' was already emitted when entering this state from Script_data_double_escaped *)
393
395
emit_pending_chars ();
394
-
ignore (S.process t.sink Tokenizer_token.EOF)
396
+
let line, column = Tokenizer_stream.position t.stream in ignore (S.process t.sink Tokenizer_token.EOF ~line ~column)
395
397
| Tokenizer_state.Script_data_double_escape_end ->
396
398
emit_pending_chars ();
397
-
ignore (S.process t.sink Tokenizer_token.EOF)
399
+
let line, column = Tokenizer_stream.position t.stream in ignore (S.process t.sink Tokenizer_token.EOF ~line ~column)
398
400
| Tokenizer_state.Plaintext ->
399
401
emit_pending_chars ();
400
-
ignore (S.process t.sink Tokenizer_token.EOF)
402
+
let line, column = Tokenizer_stream.position t.stream in ignore (S.process t.sink Tokenizer_token.EOF ~line ~column)
401
403
| Tokenizer_state.Comment_start
402
404
| Tokenizer_state.Comment_start_dash
403
405
| Tokenizer_state.Comment
···
411
413
error t "eof-in-comment";
412
414
emit_current_comment ();
413
415
emit_pending_chars ();
414
-
ignore (S.process t.sink Tokenizer_token.EOF)
416
+
let line, column = Tokenizer_stream.position t.stream in ignore (S.process t.sink Tokenizer_token.EOF ~line ~column)
415
417
| Tokenizer_state.Bogus_comment ->
416
418
emit_current_comment ();
417
419
emit_pending_chars ();
418
-
ignore (S.process t.sink Tokenizer_token.EOF)
420
+
let line, column = Tokenizer_stream.position t.stream in ignore (S.process t.sink Tokenizer_token.EOF ~line ~column)
419
421
| Tokenizer_state.Markup_declaration_open ->
420
422
error t "incorrectly-opened-comment";
421
423
Buffer.clear t.current_comment;
422
424
emit_current_comment ();
423
425
emit_pending_chars ();
424
-
ignore (S.process t.sink Tokenizer_token.EOF)
426
+
let line, column = Tokenizer_stream.position t.stream in ignore (S.process t.sink Tokenizer_token.EOF ~line ~column)
425
427
| Tokenizer_state.Doctype
426
428
| Tokenizer_state.Before_doctype_name ->
427
429
error t "eof-in-doctype";
···
429
431
t.current_doctype_force_quirks <- true;
430
432
emit_current_doctype ();
431
433
emit_pending_chars ();
432
-
ignore (S.process t.sink Tokenizer_token.EOF)
434
+
let line, column = Tokenizer_stream.position t.stream in ignore (S.process t.sink Tokenizer_token.EOF ~line ~column)
433
435
| Tokenizer_state.Doctype_name
434
436
| Tokenizer_state.After_doctype_name
435
437
| Tokenizer_state.After_doctype_public_keyword
···
447
449
t.current_doctype_force_quirks <- true;
448
450
emit_current_doctype ();
449
451
emit_pending_chars ();
450
-
ignore (S.process t.sink Tokenizer_token.EOF)
452
+
let line, column = Tokenizer_stream.position t.stream in ignore (S.process t.sink Tokenizer_token.EOF ~line ~column)
451
453
| Tokenizer_state.Bogus_doctype ->
452
454
emit_current_doctype ();
453
455
emit_pending_chars ();
454
-
ignore (S.process t.sink Tokenizer_token.EOF)
456
+
let line, column = Tokenizer_stream.position t.stream in ignore (S.process t.sink Tokenizer_token.EOF ~line ~column)
455
457
| Tokenizer_state.Cdata_section ->
456
458
error t "eof-in-cdata";
457
459
emit_pending_chars ();
458
-
ignore (S.process t.sink Tokenizer_token.EOF)
460
+
let line, column = Tokenizer_stream.position t.stream in ignore (S.process t.sink Tokenizer_token.EOF ~line ~column)
459
461
| Tokenizer_state.Cdata_section_bracket ->
460
462
error t "eof-in-cdata";
461
463
emit_char t ']';
462
464
emit_pending_chars ();
463
-
ignore (S.process t.sink Tokenizer_token.EOF)
465
+
let line, column = Tokenizer_stream.position t.stream in ignore (S.process t.sink Tokenizer_token.EOF ~line ~column)
464
466
| Tokenizer_state.Cdata_section_end ->
465
467
error t "eof-in-cdata";
466
468
emit_str t "]]";
467
469
emit_pending_chars ();
468
-
ignore (S.process t.sink Tokenizer_token.EOF)
470
+
let line, column = Tokenizer_stream.position t.stream in ignore (S.process t.sink Tokenizer_token.EOF ~line ~column)
469
471
| Tokenizer_state.Character_reference ->
470
472
(* state_character_reference never ran, so initialize temp_buffer with & *)
471
473
Buffer.clear t.temp_buffer;
···
617
619
(* Emit pending chars first, then emit null separately for proper tree builder handling *)
618
620
emit_pending_chars ();
619
621
error t "unexpected-null-character";
620
-
ignore (S.process t.sink (Tokenizer_token.Character "\x00"))
622
+
let line, column = Tokenizer_stream.position t.stream in
623
+
ignore (S.process t.sink (Tokenizer_token.Character "\x00") ~line ~column)
621
624
| Some c ->
622
625
emit_char_checked c
623
626
| None -> ()
+15
test/dune
+15
test/dune
···
54
54
(glob_files ../html5lib-tests/serializer/*.test))
55
55
(action
56
56
(run %{exe:test_serializer.exe} ../html5lib-tests/serializer)))
57
+
58
+
(executable
59
+
(name test_nesting_checker)
60
+
(modules test_nesting_checker)
61
+
(libraries html5rw.checker))
62
+
63
+
(executable
64
+
(name test_html5_checker)
65
+
(modules test_html5_checker)
66
+
(libraries bytesrw html5rw html5rw.checker str))
67
+
68
+
(rule
69
+
(alias runtest)
70
+
(action
71
+
(run %{exe:test_html5_checker.exe})))
+298
test/test_html5_checker.ml
+298
test/test_html5_checker.ml
···
1
+
(** Tests for the html5_checker library *)
2
+
3
+
(** Helper to create a reader from a string *)
4
+
let reader_of_string s = Bytesrw.Bytes.Reader.of_string s
5
+
6
+
(** Helper to check if a message contains a substring *)
7
+
let message_contains msg substring =
8
+
String.lowercase_ascii msg.Html5_checker.Message.message
9
+
|> fun s -> String.length s >= String.length substring &&
10
+
try
11
+
ignore (Str.search_forward (Str.regexp_case_fold (Str.quote substring)) s 0);
12
+
true
13
+
with Not_found -> false
14
+
15
+
(** Test that valid HTML5 produces no errors *)
16
+
let test_valid_html5 () =
17
+
Printf.printf "Test 1: Valid HTML5 document\n";
18
+
let html = {|<!DOCTYPE html>
19
+
<html lang="en">
20
+
<head><title>Test</title></head>
21
+
<body><p>Hello world</p></body>
22
+
</html>|} in
23
+
let reader = reader_of_string html in
24
+
let result = Html5_checker.check reader in
25
+
let errors = Html5_checker.errors result in
26
+
Printf.printf " Found %d error(s)\n" (List.length errors);
27
+
if List.length errors > 0 then begin
28
+
List.iter (fun msg ->
29
+
Printf.printf " - %s\n" msg.Html5_checker.Message.message
30
+
) errors;
31
+
end else
32
+
Printf.printf " OK: No errors as expected\n"
33
+
34
+
(** Test that missing DOCTYPE is detected *)
35
+
let test_missing_doctype () =
36
+
Printf.printf "\nTest 2: Missing DOCTYPE\n";
37
+
let html = "<html><body>Hello</body></html>" in
38
+
let reader = reader_of_string html in
39
+
let result = Html5_checker.check reader in
40
+
let errors = Html5_checker.errors result in
41
+
Printf.printf " Found %d error(s)\n" (List.length errors);
42
+
if List.length errors = 0 then
43
+
Printf.printf " Warning: Expected parse errors for missing DOCTYPE\n"
44
+
else begin
45
+
List.iter (fun msg ->
46
+
Printf.printf " - %s\n" msg.Html5_checker.Message.message
47
+
) errors;
48
+
end
49
+
50
+
(** Test that obsolete elements are detected *)
51
+
let test_obsolete_element () =
52
+
Printf.printf "\nTest 3: Obsolete <center> element\n";
53
+
let html = "<!DOCTYPE html><html><body><center>Centered</center></body></html>" in
54
+
let reader = reader_of_string html in
55
+
let result = Html5_checker.check reader in
56
+
let all_msgs = Html5_checker.messages result in
57
+
Printf.printf " Found %d message(s)\n" (List.length all_msgs);
58
+
let obsolete_msgs = List.filter (fun m ->
59
+
message_contains m "obsolete" || message_contains m "center"
60
+
) all_msgs in
61
+
if List.length obsolete_msgs > 0 then begin
62
+
Printf.printf " Found obsolete-related messages:\n";
63
+
List.iter (fun msg ->
64
+
Printf.printf " - %s\n" msg.Html5_checker.Message.message
65
+
) obsolete_msgs;
66
+
end else
67
+
Printf.printf " Note: No obsolete element warnings found (checker may not be enabled)\n"
68
+
69
+
(** Test duplicate IDs *)
70
+
let test_duplicate_id () =
71
+
Printf.printf "\nTest 4: Duplicate ID attributes\n";
72
+
let html = {|<!DOCTYPE html><html><body>
73
+
<div id="foo">First</div>
74
+
<div id="foo">Second</div>
75
+
</body></html>|} in
76
+
let reader = reader_of_string html in
77
+
let result = Html5_checker.check reader in
78
+
let all_msgs = Html5_checker.messages result in
79
+
Printf.printf " Found %d message(s)\n" (List.length all_msgs);
80
+
let id_msgs = List.filter (fun m ->
81
+
message_contains m "duplicate" || message_contains m "id"
82
+
) all_msgs in
83
+
if List.length id_msgs > 0 then begin
84
+
Printf.printf " Found ID-related messages:\n";
85
+
List.iter (fun msg ->
86
+
Printf.printf " - %s\n" msg.Html5_checker.Message.message
87
+
) id_msgs;
88
+
end else
89
+
Printf.printf " Note: No duplicate ID errors found (checker may not be enabled)\n"
90
+
91
+
(** Test heading structure *)
92
+
let test_heading_skip () =
93
+
Printf.printf "\nTest 5: Skipped heading level\n";
94
+
let html = {|<!DOCTYPE html><html><body>
95
+
<h1>Title</h1>
96
+
<h3>Skipped h2</h3>
97
+
</body></html>|} in
98
+
let reader = reader_of_string html in
99
+
let result = Html5_checker.check reader in
100
+
let all_msgs = Html5_checker.messages result in
101
+
Printf.printf " Found %d message(s)\n" (List.length all_msgs);
102
+
let heading_msgs = List.filter (fun m ->
103
+
message_contains m "heading" || message_contains m "skip"
104
+
) all_msgs in
105
+
if List.length heading_msgs > 0 then begin
106
+
Printf.printf " Found heading-related messages:\n";
107
+
List.iter (fun msg ->
108
+
Printf.printf " - %s\n" msg.Html5_checker.Message.message
109
+
) heading_msgs;
110
+
end else
111
+
Printf.printf " Note: No heading structure warnings found (checker may not be enabled)\n"
112
+
113
+
(** Test img without alt *)
114
+
let test_img_without_alt () =
115
+
Printf.printf "\nTest 6: Image without alt attribute\n";
116
+
let html = {|<!DOCTYPE html><html><body>
117
+
<img src="test.jpg">
118
+
</body></html>|} in
119
+
let reader = reader_of_string html in
120
+
let result = Html5_checker.check reader in
121
+
let all_msgs = Html5_checker.messages result in
122
+
Printf.printf " Found %d message(s)\n" (List.length all_msgs);
123
+
let img_msgs = List.filter (fun m ->
124
+
message_contains m "alt" || (message_contains m "img" && message_contains m "attribute")
125
+
) all_msgs in
126
+
if List.length img_msgs > 0 then begin
127
+
Printf.printf " Found img/alt-related messages:\n";
128
+
List.iter (fun msg ->
129
+
Printf.printf " - %s\n" msg.Html5_checker.Message.message
130
+
) img_msgs;
131
+
end else
132
+
Printf.printf " Note: No missing alt attribute errors found (checker may not be enabled)\n"
133
+
134
+
(** Test invalid nesting *)
135
+
let test_invalid_nesting () =
136
+
Printf.printf "\nTest 7: Invalid nesting - <a> inside <a>\n";
137
+
let html = {|<!DOCTYPE html><html><body>
138
+
<a href="#">Link <a href="#">Nested</a></a>
139
+
</body></html>|} in
140
+
let reader = reader_of_string html in
141
+
let result = Html5_checker.check reader in
142
+
let all_msgs = Html5_checker.messages result in
143
+
Printf.printf " Found %d message(s)\n" (List.length all_msgs);
144
+
let nesting_msgs = List.filter (fun m ->
145
+
message_contains m "nesting" || message_contains m "nested" || message_contains m "ancestor"
146
+
) all_msgs in
147
+
if List.length nesting_msgs > 0 then begin
148
+
Printf.printf " Found nesting-related messages:\n";
149
+
List.iter (fun msg ->
150
+
Printf.printf " - %s\n" msg.Html5_checker.Message.message
151
+
) nesting_msgs;
152
+
end else
153
+
Printf.printf " Note: No nesting errors found (checker may not be enabled)\n"
154
+
155
+
(** Test form inside form *)
156
+
let test_form_nesting () =
157
+
Printf.printf "\nTest 8: Invalid nesting - <form> inside <form>\n";
158
+
let html = {|<!DOCTYPE html><html><body>
159
+
<form><form></form></form>
160
+
</body></html>|} in
161
+
let reader = reader_of_string html in
162
+
let result = Html5_checker.check reader in
163
+
let all_msgs = Html5_checker.messages result in
164
+
Printf.printf " Found %d message(s)\n" (List.length all_msgs);
165
+
let form_msgs = List.filter (fun m ->
166
+
message_contains m "form"
167
+
) all_msgs in
168
+
if List.length form_msgs > 0 then begin
169
+
Printf.printf " Found form-related messages:\n";
170
+
List.iter (fun msg ->
171
+
Printf.printf " - %s\n" msg.Html5_checker.Message.message
172
+
) form_msgs;
173
+
end else
174
+
Printf.printf " Note: No form nesting errors found (checker may not be enabled)\n"
175
+
176
+
(** Test output formatting *)
177
+
let test_output_formats () =
178
+
Printf.printf "\nTest 9: Output format testing\n";
179
+
let html = {|<!DOCTYPE html><html><body><p>Test</p></body></html>|} in
180
+
let reader = reader_of_string html in
181
+
let result = Html5_checker.check reader in
182
+
183
+
Printf.printf " Testing text format:\n";
184
+
let text_output = Html5_checker.format_text result in
185
+
Printf.printf " Length: %d chars\n" (String.length text_output);
186
+
187
+
Printf.printf " Testing JSON format:\n";
188
+
let json_output = Html5_checker.format_json result in
189
+
Printf.printf " Length: %d chars\n" (String.length json_output);
190
+
191
+
Printf.printf " Testing GNU format:\n";
192
+
let gnu_output = Html5_checker.format_gnu result in
193
+
Printf.printf " Length: %d chars\n" (String.length gnu_output)
194
+
195
+
(** Test has_errors function *)
196
+
let test_has_errors () =
197
+
Printf.printf "\nTest 10: has_errors function\n";
198
+
199
+
(* Valid document should have no errors *)
200
+
let valid_html = "<!DOCTYPE html><html><body><p>Valid</p></body></html>" in
201
+
let result1 = Html5_checker.check (reader_of_string valid_html) in
202
+
Printf.printf " Valid document has_errors: %b\n" (Html5_checker.has_errors result1);
203
+
204
+
(* Document with likely parse errors *)
205
+
let invalid_html = "<html><body><p>Unclosed" in
206
+
let result2 = Html5_checker.check (reader_of_string invalid_html) in
207
+
Printf.printf " Invalid document has_errors: %b\n" (Html5_checker.has_errors result2)
208
+
209
+
(** Test check_dom with pre-parsed document *)
210
+
let test_check_dom () =
211
+
Printf.printf "\nTest 11: check_dom with pre-parsed document\n";
212
+
let html = "<!DOCTYPE html><html><body><p>Test</p></body></html>" in
213
+
let reader = reader_of_string html in
214
+
let parsed = Html5rw.parse reader in
215
+
let result = Html5_checker.check_dom parsed in
216
+
let all_msgs = Html5_checker.messages result in
217
+
Printf.printf " check_dom found %d message(s)\n" (List.length all_msgs);
218
+
Printf.printf " OK: check_dom completed successfully\n"
219
+
220
+
(** Test system_id parameter *)
221
+
let test_system_id () =
222
+
Printf.printf "\nTest 12: system_id parameter\n";
223
+
let html = "<!DOCTYPE html><html><body><p>Test</p></body></html>" in
224
+
let reader = reader_of_string html in
225
+
let result = Html5_checker.check ~system_id:"test.html" reader in
226
+
match Html5_checker.system_id result with
227
+
| Some id -> Printf.printf " system_id: %s\n" id
228
+
| None -> Printf.printf " Warning: system_id not set\n"
229
+
230
+
(** Test collect_parse_errors flag *)
231
+
let test_collect_parse_errors_flag () =
232
+
Printf.printf "\nTest 13: collect_parse_errors flag\n";
233
+
let html = "<!DOCTYPE html><html><body><p>Test</p></body></html>" in
234
+
235
+
let result_with = Html5_checker.check ~collect_parse_errors:true (reader_of_string html) in
236
+
let msgs_with = Html5_checker.messages result_with in
237
+
Printf.printf " With parse errors: %d message(s)\n" (List.length msgs_with);
238
+
239
+
let result_without = Html5_checker.check ~collect_parse_errors:false (reader_of_string html) in
240
+
let msgs_without = Html5_checker.messages result_without in
241
+
Printf.printf " Without parse errors: %d message(s)\n" (List.length msgs_without)
242
+
243
+
(** Test document accessor *)
244
+
let test_document_accessor () =
245
+
Printf.printf "\nTest 14: document accessor\n";
246
+
let html = "<!DOCTYPE html><html><body><p>Test</p></body></html>" in
247
+
let reader = reader_of_string html in
248
+
let result = Html5_checker.check reader in
249
+
let _doc = Html5_checker.document result in
250
+
Printf.printf " OK: document accessor works\n"
251
+
252
+
(** Test message severity filtering *)
253
+
let test_severity_filtering () =
254
+
Printf.printf "\nTest 15: Message severity filtering\n";
255
+
let html = "<!DOCTYPE html><html><body><p>Test</p></body></html>" in
256
+
let reader = reader_of_string html in
257
+
let result = Html5_checker.check reader in
258
+
259
+
let all_msgs = Html5_checker.messages result in
260
+
let errors = Html5_checker.errors result in
261
+
let warnings = Html5_checker.warnings result in
262
+
263
+
Printf.printf " Total messages: %d\n" (List.length all_msgs);
264
+
Printf.printf " Errors: %d\n" (List.length errors);
265
+
Printf.printf " Warnings: %d\n" (List.length warnings);
266
+
267
+
(* Verify that errors + warnings <= all messages *)
268
+
if List.length errors + List.length warnings <= List.length all_msgs then
269
+
Printf.printf " OK: Message counts are consistent\n"
270
+
else
271
+
Printf.printf " Warning: Message counts inconsistent\n"
272
+
273
+
(** Run all tests *)
274
+
let () =
275
+
Printf.printf "Running html5_checker tests...\n";
276
+
Printf.printf "========================================\n\n";
277
+
278
+
test_valid_html5 ();
279
+
test_missing_doctype ();
280
+
test_obsolete_element ();
281
+
test_duplicate_id ();
282
+
test_heading_skip ();
283
+
test_img_without_alt ();
284
+
test_invalid_nesting ();
285
+
test_form_nesting ();
286
+
test_output_formats ();
287
+
test_has_errors ();
288
+
test_check_dom ();
289
+
test_system_id ();
290
+
test_collect_parse_errors_flag ();
291
+
test_document_accessor ();
292
+
test_severity_filtering ();
293
+
294
+
Printf.printf "\n========================================\n";
295
+
Printf.printf "All tests completed!\n";
296
+
Printf.printf "\nNote: Some checkers may not be enabled yet.\n";
297
+
Printf.printf "Tests marked with 'Note:' indicate features that may be\n";
298
+
Printf.printf "implemented in future versions.\n"
+126
test/test_nesting_checker.ml
+126
test/test_nesting_checker.ml
···
1
+
(** Simple test for nesting_checker functionality *)
2
+
3
+
let () =
4
+
(* Create a message collector *)
5
+
let collector = Html5_checker.Message_collector.create () in
6
+
7
+
(* Get the nesting checker *)
8
+
let module C = (val Html5_checker__Nesting_checker.checker : Html5_checker__Checker.S) in
9
+
let state = C.create () in
10
+
11
+
(* Test 1: <a> cannot contain another <a> *)
12
+
Printf.printf "Test 1: Checking <a href> inside <a href>\n";
13
+
C.start_element state ~name:"a" ~namespace:None ~attrs:[("href", "#")] collector;
14
+
C.start_element state ~name:"a" ~namespace:None ~attrs:[("href", "#")] collector;
15
+
16
+
let errors1 = Html5_checker.Message_collector.errors collector in
17
+
Printf.printf " Found %d error(s)\n" (List.length errors1);
18
+
List.iter (fun msg ->
19
+
Printf.printf " - %s\n" msg.Html5_checker.Message.message
20
+
) errors1;
21
+
22
+
C.end_element state ~name:"a" ~namespace:None collector;
23
+
C.end_element state ~name:"a" ~namespace:None collector;
24
+
Html5_checker.Message_collector.clear collector;
25
+
26
+
(* Test 2: <button> inside <a> *)
27
+
Printf.printf "\nTest 2: Checking <button> inside <a href>\n";
28
+
C.start_element state ~name:"a" ~namespace:None ~attrs:[("href", "#")] collector;
29
+
C.start_element state ~name:"button" ~namespace:None ~attrs:[] collector;
30
+
31
+
let errors2 = Html5_checker.Message_collector.errors collector in
32
+
Printf.printf " Found %d error(s)\n" (List.length errors2);
33
+
List.iter (fun msg ->
34
+
Printf.printf " - %s\n" msg.Html5_checker.Message.message
35
+
) errors2;
36
+
37
+
C.end_element state ~name:"button" ~namespace:None collector;
38
+
C.end_element state ~name:"a" ~namespace:None collector;
39
+
Html5_checker.Message_collector.clear collector;
40
+
41
+
(* Test 3: form inside form *)
42
+
Printf.printf "\nTest 3: Checking <form> inside <form>\n";
43
+
C.start_element state ~name:"form" ~namespace:None ~attrs:[] collector;
44
+
C.start_element state ~name:"form" ~namespace:None ~attrs:[] collector;
45
+
46
+
let errors3 = Html5_checker.Message_collector.errors collector in
47
+
Printf.printf " Found %d error(s)\n" (List.length errors3);
48
+
List.iter (fun msg ->
49
+
Printf.printf " - %s\n" msg.Html5_checker.Message.message
50
+
) errors3;
51
+
52
+
C.end_element state ~name:"form" ~namespace:None collector;
53
+
C.end_element state ~name:"form" ~namespace:None collector;
54
+
Html5_checker.Message_collector.clear collector;
55
+
56
+
(* Test 4: header inside footer *)
57
+
Printf.printf "\nTest 4: Checking <header> inside <footer>\n";
58
+
C.start_element state ~name:"footer" ~namespace:None ~attrs:[] collector;
59
+
C.start_element state ~name:"header" ~namespace:None ~attrs:[] collector;
60
+
61
+
let errors4 = Html5_checker.Message_collector.errors collector in
62
+
Printf.printf " Found %d error(s)\n" (List.length errors4);
63
+
List.iter (fun msg ->
64
+
Printf.printf " - %s\n" msg.Html5_checker.Message.message
65
+
) errors4;
66
+
67
+
C.end_element state ~name:"header" ~namespace:None collector;
68
+
C.end_element state ~name:"footer" ~namespace:None collector;
69
+
Html5_checker.Message_collector.clear collector;
70
+
71
+
(* Test 5: input (not hidden) inside button *)
72
+
Printf.printf "\nTest 5: Checking <input type=text> inside <button>\n";
73
+
C.start_element state ~name:"button" ~namespace:None ~attrs:[] collector;
74
+
C.start_element state ~name:"input" ~namespace:None ~attrs:[("type", "text")] collector;
75
+
76
+
let errors5 = Html5_checker.Message_collector.errors collector in
77
+
Printf.printf " Found %d error(s)\n" (List.length errors5);
78
+
List.iter (fun msg ->
79
+
Printf.printf " - %s\n" msg.Html5_checker.Message.message
80
+
) errors5;
81
+
82
+
C.end_element state ~name:"input" ~namespace:None collector;
83
+
C.end_element state ~name:"button" ~namespace:None collector;
84
+
Html5_checker.Message_collector.clear collector;
85
+
86
+
(* Test 6: valid nesting - should not error *)
87
+
Printf.printf "\nTest 6: Checking valid nesting: <div> inside <div>\n";
88
+
C.start_element state ~name:"div" ~namespace:None ~attrs:[] collector;
89
+
C.start_element state ~name:"div" ~namespace:None ~attrs:[] collector;
90
+
91
+
let errors6 = Html5_checker.Message_collector.errors collector in
92
+
Printf.printf " Found %d error(s)\n" (List.length errors6);
93
+
if List.length errors6 = 0 then
94
+
Printf.printf " OK: No errors as expected\n";
95
+
96
+
C.end_element state ~name:"div" ~namespace:None collector;
97
+
C.end_element state ~name:"div" ~namespace:None collector;
98
+
Html5_checker.Message_collector.clear collector;
99
+
100
+
(* Test 7: area without map ancestor *)
101
+
Printf.printf "\nTest 7: Checking <area> without <map> ancestor\n";
102
+
C.start_element state ~name:"area" ~namespace:None ~attrs:[] collector;
103
+
104
+
let errors7 = Html5_checker.Message_collector.errors collector in
105
+
Printf.printf " Found %d error(s)\n" (List.length errors7);
106
+
List.iter (fun msg ->
107
+
Printf.printf " - %s\n" msg.Html5_checker.Message.message
108
+
) errors7;
109
+
110
+
C.end_element state ~name:"area" ~namespace:None collector;
111
+
Html5_checker.Message_collector.clear collector;
112
+
113
+
(* Test 8: area with map ancestor (valid) *)
114
+
Printf.printf "\nTest 8: Checking <area> with <map> ancestor (valid)\n";
115
+
C.start_element state ~name:"map" ~namespace:None ~attrs:[] collector;
116
+
C.start_element state ~name:"area" ~namespace:None ~attrs:[] collector;
117
+
118
+
let errors8 = Html5_checker.Message_collector.errors collector in
119
+
Printf.printf " Found %d error(s)\n" (List.length errors8);
120
+
if List.length errors8 = 0 then
121
+
Printf.printf " OK: No errors as expected\n";
122
+
123
+
C.end_element state ~name:"area" ~namespace:None collector;
124
+
C.end_element state ~name:"map" ~namespace:None collector;
125
+
126
+
Printf.printf "\nAll tests completed!\n"