+29
lib/htmlrw_check/checker.ml
+29
lib/htmlrw_check/checker.ml
···
38
38
end
39
39
40
40
let noop () = (module Noop : S)
41
+
42
+
(** Input signature for Make functor *)
43
+
module type Input = sig
44
+
type state
45
+
val create : unit -> state
46
+
val reset : state -> unit
47
+
val start_element : state -> element:Element.t -> Message_collector.t -> unit
48
+
val end_element : state -> tag:Tag.element_tag -> Message_collector.t -> unit
49
+
val characters : (state -> string -> Message_collector.t -> unit) option
50
+
val end_document : (state -> Message_collector.t -> unit) option
51
+
end
52
+
53
+
(** Functor to create a checker with default implementations for optional callbacks *)
54
+
module Make (I : Input) : S with type state = I.state = struct
55
+
type state = I.state
56
+
57
+
let create = I.create
58
+
let reset = I.reset
59
+
let start_element = I.start_element
60
+
let end_element = I.end_element
61
+
62
+
let characters = match I.characters with
63
+
| Some f -> f
64
+
| None -> fun _ _ _ -> ()
65
+
66
+
let end_document = match I.end_document with
67
+
| Some f -> f
68
+
| None -> fun _ _ -> ()
69
+
end
+42
lib/htmlrw_check/checker.mli
+42
lib/htmlrw_check/checker.mli
···
172
172
(* Does nothing when walked over a DOM tree *)
173
173
]}
174
174
*)
175
+
176
+
(** {1 Checker Construction Helpers} *)
177
+
178
+
(** Input signature for {!Make} functor.
179
+
180
+
Only the required callbacks need to be provided. Optional callbacks
181
+
(characters, end_document) default to no-op implementations. *)
182
+
module type Input = sig
183
+
type state
184
+
val create : unit -> state
185
+
val reset : state -> unit
186
+
val start_element : state -> element:Element.t -> Message_collector.t -> unit
187
+
val end_element : state -> tag:Tag.element_tag -> Message_collector.t -> unit
188
+
189
+
(** Optional: called for text content. Default: no-op. *)
190
+
val characters : (state -> string -> Message_collector.t -> unit) option
191
+
192
+
(** Optional: called at document end. Default: no-op. *)
193
+
val end_document : (state -> Message_collector.t -> unit) option
194
+
end
195
+
196
+
(** Functor to create a checker from an {!Input} module.
197
+
198
+
This reduces boilerplate when creating checkers that don't need
199
+
to handle all events. The characters and end_document callbacks
200
+
default to no-ops if not provided.
201
+
202
+
{b Example:}
203
+
{[
204
+
let checker = Checker.Make(struct
205
+
type state = { mutable count : int }
206
+
let create () = { count = 0 }
207
+
let reset s = s.count <- 0
208
+
let start_element s ~element collector =
209
+
s.count <- s.count + 1
210
+
let end_element _ ~tag:_ _ = ()
211
+
let characters = None (* Use default no-op *)
212
+
let end_document = None (* Use default no-op *)
213
+
end)
214
+
]}
215
+
*)
216
+
module Make : functor (I : Input) -> S with type state = I.state
+141
lib/htmlrw_check/context_tracker.ml
+141
lib/htmlrw_check/context_tracker.ml
···
1
+
(** Reusable context/ancestor tracking for checkers.
2
+
3
+
Many checkers need to track element ancestors, depth, or maintain
4
+
context stacks during DOM traversal. This module provides common
5
+
utilities to reduce duplication. *)
6
+
7
+
(** Generic stack-based context tracker. *)
8
+
module Stack : sig
9
+
type 'a t
10
+
11
+
(** Create an empty context stack. *)
12
+
val create : unit -> 'a t
13
+
14
+
(** Reset the stack to empty. *)
15
+
val reset : 'a t -> unit
16
+
17
+
(** Push a context onto the stack. *)
18
+
val push : 'a t -> 'a -> unit
19
+
20
+
(** Pop a context from the stack. Returns None if empty. *)
21
+
val pop : 'a t -> 'a option
22
+
23
+
(** Get the current (top) context without removing it. *)
24
+
val current : 'a t -> 'a option
25
+
26
+
(** Get current depth (number of items on stack). *)
27
+
val depth : 'a t -> int
28
+
29
+
(** Check if stack is empty. *)
30
+
val is_empty : 'a t -> bool
31
+
32
+
(** Get all ancestors (bottom to top). *)
33
+
val to_list : 'a t -> 'a list
34
+
35
+
(** Check if any ancestor satisfies predicate. *)
36
+
val exists : 'a t -> ('a -> bool) -> bool
37
+
38
+
(** Find first ancestor satisfying predicate (top to bottom). *)
39
+
val find : 'a t -> ('a -> bool) -> 'a option
40
+
41
+
(** Iterate over all contexts (top to bottom). *)
42
+
val iter : 'a t -> ('a -> unit) -> unit
43
+
end = struct
44
+
type 'a t = { mutable stack : 'a list }
45
+
46
+
let create () = { stack = [] }
47
+
let reset t = t.stack <- []
48
+
let push t x = t.stack <- x :: t.stack
49
+
let pop t = match t.stack with
50
+
| [] -> None
51
+
| x :: rest -> t.stack <- rest; Some x
52
+
let current t = match t.stack with
53
+
| [] -> None
54
+
| x :: _ -> Some x
55
+
let depth t = List.length t.stack
56
+
let is_empty t = t.stack = []
57
+
let to_list t = List.rev t.stack
58
+
let exists t f = List.exists f t.stack
59
+
let find t f = List.find_opt f t.stack
60
+
let iter t f = List.iter f t.stack
61
+
end
62
+
63
+
(** Simple depth counter for tracking nesting level. *)
64
+
module Depth : sig
65
+
type t
66
+
67
+
(** Create a depth counter starting at 0. *)
68
+
val create : unit -> t
69
+
70
+
(** Reset depth to 0. *)
71
+
val reset : t -> unit
72
+
73
+
(** Increment depth (entering element). *)
74
+
val enter : t -> unit
75
+
76
+
(** Decrement depth (leaving element). Returns false if was already 0. *)
77
+
val leave : t -> bool
78
+
79
+
(** Get current depth. *)
80
+
val get : t -> int
81
+
82
+
(** Check if inside (depth > 0). *)
83
+
val is_inside : t -> bool
84
+
end = struct
85
+
type t = { mutable depth : int }
86
+
87
+
let create () = { depth = 0 }
88
+
let reset t = t.depth <- 0
89
+
let enter t = t.depth <- t.depth + 1
90
+
let leave t =
91
+
if t.depth > 0 then begin
92
+
t.depth <- t.depth - 1;
93
+
true
94
+
end else false
95
+
let get t = t.depth
96
+
let is_inside t = t.depth > 0
97
+
end
98
+
99
+
(** Element name stack for tracking ancestors by name. *)
100
+
module Ancestors : sig
101
+
type t
102
+
103
+
(** Create an empty ancestor tracker. *)
104
+
val create : unit -> t
105
+
106
+
(** Reset to empty. *)
107
+
val reset : t -> unit
108
+
109
+
(** Push an element name onto the ancestor stack. *)
110
+
val push : t -> string -> unit
111
+
112
+
(** Pop an element from the ancestor stack. *)
113
+
val pop : t -> unit
114
+
115
+
(** Get the immediate parent element name. *)
116
+
val parent : t -> string option
117
+
118
+
(** Check if an element name is an ancestor. *)
119
+
val has_ancestor : t -> string -> bool
120
+
121
+
(** Get depth (number of ancestors). *)
122
+
val depth : t -> int
123
+
124
+
(** Get all ancestor names (outermost first). *)
125
+
val to_list : t -> string list
126
+
end = struct
127
+
type t = { mutable stack : string list }
128
+
129
+
let create () = { stack = [] }
130
+
let reset t = t.stack <- []
131
+
let push t name = t.stack <- name :: t.stack
132
+
let pop t = match t.stack with
133
+
| _ :: rest -> t.stack <- rest
134
+
| [] -> ()
135
+
let parent t = match t.stack with
136
+
| x :: _ -> Some x
137
+
| [] -> None
138
+
let has_ancestor t name = List.mem name t.stack
139
+
let depth t = List.length t.stack
140
+
let to_list t = List.rev t.stack
141
+
end
+116
lib/htmlrw_check/context_tracker.mli
+116
lib/htmlrw_check/context_tracker.mli
···
1
+
(** Reusable context/ancestor tracking for checkers.
2
+
3
+
Many checkers need to track element ancestors, depth, or maintain
4
+
context stacks during DOM traversal. This module provides common
5
+
utilities to reduce duplication across checkers.
6
+
7
+
{2 Available Trackers}
8
+
9
+
- {!Stack}: Generic stack for any context type
10
+
- {!Depth}: Simple integer depth counter
11
+
- {!Ancestors}: String-based element ancestor tracking *)
12
+
13
+
(** {2 Generic Stack} *)
14
+
15
+
(** Generic stack-based context tracker.
16
+
17
+
Use this when you need to track complex state at each nesting level.
18
+
For example, tracking whether each ancestor has certain attributes. *)
19
+
module Stack : sig
20
+
type 'a t
21
+
22
+
(** Create an empty context stack. *)
23
+
val create : unit -> 'a t
24
+
25
+
(** Reset the stack to empty. *)
26
+
val reset : 'a t -> unit
27
+
28
+
(** Push a context onto the stack. *)
29
+
val push : 'a t -> 'a -> unit
30
+
31
+
(** Pop a context from the stack. Returns None if empty. *)
32
+
val pop : 'a t -> 'a option
33
+
34
+
(** Get the current (top) context without removing it. *)
35
+
val current : 'a t -> 'a option
36
+
37
+
(** Get current depth (number of items on stack). *)
38
+
val depth : 'a t -> int
39
+
40
+
(** Check if stack is empty. *)
41
+
val is_empty : 'a t -> bool
42
+
43
+
(** Get all ancestors (bottom to top). *)
44
+
val to_list : 'a t -> 'a list
45
+
46
+
(** Check if any ancestor satisfies predicate. *)
47
+
val exists : 'a t -> ('a -> bool) -> bool
48
+
49
+
(** Find first ancestor satisfying predicate (top to bottom). *)
50
+
val find : 'a t -> ('a -> bool) -> 'a option
51
+
52
+
(** Iterate over all contexts (top to bottom). *)
53
+
val iter : 'a t -> ('a -> unit) -> unit
54
+
end
55
+
56
+
(** {2 Depth Counter} *)
57
+
58
+
(** Simple depth counter for tracking nesting level.
59
+
60
+
Use this when you only need to know if you're inside a certain
61
+
element type, not the full context at each level. *)
62
+
module Depth : sig
63
+
type t
64
+
65
+
(** Create a depth counter starting at 0. *)
66
+
val create : unit -> t
67
+
68
+
(** Reset depth to 0. *)
69
+
val reset : t -> unit
70
+
71
+
(** Increment depth (entering element). *)
72
+
val enter : t -> unit
73
+
74
+
(** Decrement depth (leaving element). Returns false if was already 0. *)
75
+
val leave : t -> bool
76
+
77
+
(** Get current depth. *)
78
+
val get : t -> int
79
+
80
+
(** Check if inside (depth > 0). *)
81
+
val is_inside : t -> bool
82
+
end
83
+
84
+
(** {2 Ancestor Tracker} *)
85
+
86
+
(** Element name stack for tracking ancestors by name.
87
+
88
+
Use this when you need to check if certain elements are ancestors,
89
+
but don't need complex context at each level. *)
90
+
module Ancestors : sig
91
+
type t
92
+
93
+
(** Create an empty ancestor tracker. *)
94
+
val create : unit -> t
95
+
96
+
(** Reset to empty. *)
97
+
val reset : t -> unit
98
+
99
+
(** Push an element name onto the ancestor stack. *)
100
+
val push : t -> string -> unit
101
+
102
+
(** Pop an element from the ancestor stack. *)
103
+
val pop : t -> unit
104
+
105
+
(** Get the immediate parent element name. *)
106
+
val parent : t -> string option
107
+
108
+
(** Check if an element name is an ancestor. *)
109
+
val has_ancestor : t -> string -> bool
110
+
111
+
(** Get depth (number of ancestors). *)
112
+
val depth : t -> int
113
+
114
+
(** Get all ancestor names (outermost first). *)
115
+
val to_list : t -> string list
116
+
end
+15
lib/htmlrw_check/datatype/datatype.ml
+15
lib/htmlrw_check/datatype/datatype.ml
···
40
40
let end_pos = find_end (len - 1) in
41
41
if start > end_pos then ""
42
42
else String.sub s start (end_pos - start + 1)
43
+
44
+
(** Factory for creating enum-based validators.
45
+
Many HTML attributes accept a fixed set of keyword values. *)
46
+
let make_enum ~name ~values ?(allow_empty = true) () : t =
47
+
let values_set = List.map String.lowercase_ascii values in
48
+
let values_str = String.concat ", " (List.map (Printf.sprintf "'%s'") values) in
49
+
(module struct
50
+
let name = name
51
+
let validate s =
52
+
let s_lower = string_to_ascii_lowercase s in
53
+
if (allow_empty && s = "") || List.mem s_lower values_set then Ok ()
54
+
else Error (Printf.sprintf "The value '%s' is not a valid %s value. Expected %s%s."
55
+
s name (if allow_empty then "empty string, " else "") values_str)
56
+
let is_valid s = Result.is_ok (validate s)
57
+
end : S)
+8
lib/htmlrw_check/datatype/datatype.mli
+8
lib/htmlrw_check/datatype/datatype.mli
···
43
43
44
44
(** Trim HTML5 whitespace from both ends of a string. *)
45
45
val trim_html_spaces : string -> string
46
+
47
+
(** {2 Datatype Factories} *)
48
+
49
+
(** Create an enum-based validator for attributes with fixed keyword values.
50
+
@param name The datatype name (e.g., "loading", "crossorigin")
51
+
@param values List of valid keyword values (case-insensitive)
52
+
@param allow_empty Whether empty string is valid (default: true) *)
53
+
val make_enum : name:string -> values:string list -> ?allow_empty:bool -> unit -> t
+26
lib/htmlrw_check/error_code.ml
+26
lib/htmlrw_check/error_code.ml
···
726
726
727
727
(* Generic *)
728
728
| `Generic message -> message
729
+
730
+
(** {2 Error Construction Helpers} *)
731
+
732
+
(** Create a bad attribute value error with element, attribute, value, and reason. *)
733
+
let bad_value ~element ~attr ~value ~reason : t =
734
+
`Attr (`Bad_value (`Elem element, `Attr attr, `Value value, `Reason reason))
735
+
736
+
(** Create a bad attribute value error with just a message. *)
737
+
let bad_value_msg msg : t =
738
+
`Attr (`Bad_value_generic (`Message msg))
739
+
740
+
(** Create a missing required attribute error. *)
741
+
let missing_attr ~element ~attr : t =
742
+
`Attr (`Missing (`Elem element, `Attr attr))
743
+
744
+
(** Create an attribute not allowed error. *)
745
+
let attr_not_allowed ~element ~attr : t =
746
+
`Attr (`Not_allowed (`Attr attr, `Elem element))
747
+
748
+
(** Create an element not allowed as child error. *)
749
+
let not_allowed_as_child ~child ~parent : t =
750
+
`Element (`Not_allowed_as_child (`Child child, `Parent parent))
751
+
752
+
(** Create a must not be empty error. *)
753
+
let must_not_be_empty ~element : t =
754
+
`Element (`Must_not_be_empty (`Elem element))
+28
lib/htmlrw_check/error_code.mli
+28
lib/htmlrw_check/error_code.mli
···
748
748
(** Format a string with Unicode curly quotes.
749
749
Wraps the string in U+201C and U+201D ("..."). *)
750
750
val q : string -> string
751
+
752
+
(** {1 Error Construction Helpers}
753
+
754
+
These functions simplify creating common error types. *)
755
+
756
+
(** Create a bad attribute value error.
757
+
Example: [bad_value ~element:"img" ~attr:"src" ~value:"" ~reason:"URL cannot be empty"] *)
758
+
val bad_value : element:string -> attr:string -> value:string -> reason:string -> t
759
+
760
+
(** Create a bad attribute value error with just a message.
761
+
Example: [bad_value_msg "The value must be a valid URL"] *)
762
+
val bad_value_msg : string -> t
763
+
764
+
(** Create a missing required attribute error.
765
+
Example: [missing_attr ~element:"img" ~attr:"alt"] *)
766
+
val missing_attr : element:string -> attr:string -> t
767
+
768
+
(** Create an attribute not allowed error.
769
+
Example: [attr_not_allowed ~element:"span" ~attr:"href"] *)
770
+
val attr_not_allowed : element:string -> attr:string -> t
771
+
772
+
(** Create an element not allowed as child error.
773
+
Example: [not_allowed_as_child ~child:"div" ~parent:"p"] *)
774
+
val not_allowed_as_child : child:string -> parent:string -> t
775
+
776
+
(** Create a must not be empty error.
777
+
Example: [must_not_be_empty ~element:"title"] *)
778
+
val must_not_be_empty : element:string -> t
+8
-27
lib/htmlrw_check/specialized/h1_checker.mli
+8
-27
lib/htmlrw_check/specialized/h1_checker.mli
···
1
-
(*---------------------------------------------------------------------------
2
-
Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
3
-
SPDX-License-Identifier: MIT
4
-
---------------------------------------------------------------------------*)
1
+
(** H1 element counter checker.
5
2
6
-
(** H1 element counter and validator.
3
+
This checker validates that documents don't have multiple h1 elements,
4
+
which can confuse document structure and accessibility tools.
7
5
8
-
This checker warns about multiple [<h1>] elements in a document.
9
-
While HTML5 technically allows multiple [<h1>] elements when using
10
-
the document outline algorithm, this algorithm was never implemented
11
-
by browsers and has been removed from the specification.
6
+
{2 Validation Rules}
12
7
13
-
{2 Best Practice}
14
-
15
-
Documents should have exactly one [<h1>] element that represents the
16
-
main heading of the page. Multiple [<h1>] elements can confuse users
17
-
and assistive technologies about the document's structure.
18
-
19
-
{2 Special Cases}
20
-
21
-
- [<h1>] elements inside [<svg>] content (e.g., in [<foreignObject>])
22
-
are not counted, as they may represent different content contexts
23
-
- The checker reports a warning after the second [<h1>] is encountered
8
+
- Documents should have at most one [<h1>] element
9
+
- [<h1>] elements inside SVG content (foreignObject, desc) are not counted
24
10
25
11
{2 Error Messages}
26
12
27
-
Reports [Multiple_h1] when more than one [<h1>] element is found
28
-
in the document.
29
-
30
-
@see <https://html.spec.whatwg.org/multipage/sections.html#the-h1,-h2,-h3,-h4,-h5,-and-h6-elements>
31
-
HTML Standard: The h1-h6 elements
32
-
*)
13
+
- [Multiple_h1]: Document contains more than one h1 element *)
33
14
34
15
val checker : Checker.t
35
-
(** The h1 element counter/validator instance. *)
16
+
(** The H1 checker instance. *)
+14
-2
lib/htmlrw_check/specialized/heading_checker.ml
+14
-2
lib/htmlrw_check/specialized/heading_checker.ml
···
14
14
mutable first_heading_checked : bool;
15
15
mutable in_heading : Tag.html_tag option;
16
16
mutable heading_has_text : bool;
17
+
mutable svg_depth : int; (* Track depth inside SVG - headings in SVG don't count *)
17
18
}
18
19
19
20
let create () =
···
24
25
first_heading_checked = false;
25
26
in_heading = None;
26
27
heading_has_text = false;
28
+
svg_depth = 0;
27
29
}
28
30
29
31
let reset state =
···
32
34
state.has_any_heading <- false;
33
35
state.first_heading_checked <- false;
34
36
state.in_heading <- None;
35
-
state.heading_has_text <- false
37
+
state.heading_has_text <- false;
38
+
state.svg_depth <- 0
36
39
37
40
(** Check if text is effectively empty (only whitespace). *)
38
41
let is_empty_text text =
···
48
51
49
52
let start_element state ~element collector =
50
53
match element.Element.tag with
51
-
| Tag.Html (#Tag.heading_tag as h) ->
54
+
| Tag.Svg _ ->
55
+
(* Track SVG depth - headings inside SVG (foreignObject, desc) don't count *)
56
+
state.svg_depth <- state.svg_depth + 1
57
+
| Tag.Html (#Tag.heading_tag as h) when state.svg_depth = 0 ->
52
58
let level = match Tag.heading_level h with Some l -> l | None -> 0 in
53
59
let name = Tag.html_tag_to_string h in
54
60
state.has_any_heading <- true;
···
89
95
| _ -> ()
90
96
91
97
let end_element state ~tag collector =
98
+
(* Track SVG depth *)
99
+
(match tag with
100
+
| Tag.Svg _ when state.svg_depth > 0 ->
101
+
state.svg_depth <- state.svg_depth - 1
102
+
| _ -> ());
103
+
(* Check for empty headings *)
92
104
match state.in_heading, tag with
93
105
| Some h, Tag.Html h2 when h = h2 ->
94
106
if not state.heading_has_text then