(** DOM tree traversal for HTML5 conformance checking.
This module provides functions to traverse DOM trees and apply checkers
to validate HTML5 documents. It implements a depth-first, in-order
traversal that visits every node in the tree and notifies checkers
of traversal events.
{2 Traversal Model}
The walker follows a SAX-like event model, emitting events as it
encounters different node types during traversal:
{v
Document
└── html (start_element "html")
├── head (start_element "head")
│ └── title (start_element "title")
│ ├── #text "Page Title" (characters)
│ └── (end_element "title")
└── body (start_element "body")
└── p (start_element "p")
├── #text "Hello " (characters)
├── b (start_element "b")
│ ├── #text "world" (characters)
│ └── (end_element "b")
├── #text "!" (characters)
└── (end_element "p")
end_document
v}
{2 Event Sequence}
For each element node:
1. {!Checker.S.start_element} is called when entering the element
2. Children are recursively traversed
3. {!Checker.S.end_element} is called when exiting the element
For text and comment nodes:
- {!Checker.S.characters} is called with the text content
After the entire tree is traversed:
- {!Checker.S.end_document} is called on all checkers
{2 Checker Coordination}
When multiple checkers are used:
- All checkers receive the same event sequence
- Events are delivered to checkers in the order they appear in the list
- Each checker maintains independent state
- Messages from all checkers are collected together
This allows composing orthogonal validation rules without interference.
{2 Usage Examples}
{b Single checker:}
{[
let checker = Checker.noop () in
let collector = Message_collector.create () in
walk checker collector dom;
let messages = Message_collector.messages collector in
List.iter Message.pp messages
]}
{b Multiple checkers:}
{[
let checkers = [checker1; checker2; checker3] in
let collector = Message_collector.create () in
walk_all checkers collector dom;
(* Analyze messages from all checkers *)
]}
{b Registry of checkers:}
{[
let registry = Checker_registry.default () in
let collector = Message_collector.create () in
walk_registry registry collector dom;
(* All registered checkers have validated the DOM *)
]} *)
(** {1 Single Checker Traversal} *)
val walk : Checker.t -> Message_collector.t -> Html5rw.Dom.node -> unit
(** [walk checker collector node] traverses a DOM tree with a single checker.
@param checker The checker to apply during traversal
@param collector The message collector for validation messages
@param node The root node to start traversal from
The traversal is depth-first and in-order: for each element, the
checker receives a {!Checker.S.start_element} event, then children
are recursively traversed, then an {!Checker.S.end_element} event
is emitted.
After the entire tree is traversed, {!Checker.S.end_document} is
called to allow the checker to emit any final validation messages.
{b Example:}
{[
(* Validate a parsed HTML document *)
let checker = Checker.noop () in
let collector = Message_collector.create () in
walk checker collector document_node;
(* Check for errors *)
let messages = Message_collector.messages collector in
let errors = List.filter
(fun msg -> msg.Message.severity = Message.Error)
messages in
if errors <> [] then
Printf.printf "Found %d errors\n" (List.length errors)
]}
{b Notes:}
- Only element nodes trigger start/end events
- Text and comment nodes trigger character events
- Document and doctype nodes are silently skipped
- The traversal follows document order (parent before children,
earlier siblings before later ones) *)
(** {1 Multiple Checker Traversal} *)
val walk_all :
Checker.t list -> Message_collector.t -> Html5rw.Dom.node -> unit
(** [walk_all checkers collector node] traverses a DOM tree with multiple
checkers.
@param checkers List of checkers to apply during traversal
@param collector The message collector for validation messages
@param node The root node to start traversal from
This performs a single tree traversal, delivering each event to all
checkers in sequence. This is more efficient than calling {!walk}
multiple times.
All checkers receive events in the order they appear in the list.
Each checker maintains independent state, so validation rules can
be composed without interference.
{b Example:}
{[
(* Run multiple validation passes in one traversal *)
let structure_checker = (module StructureChecker : Checker.S) in
let attribute_checker = (module AttributeChecker : Checker.S) in
let obsolete_checker = (module ObsoleteChecker : Checker.S) in
let checkers = [structure_checker; attribute_checker; obsolete_checker] in
let collector = Message_collector.create () in
walk_all checkers collector document_node;
(* All three checkers have validated the document *)
let messages = Message_collector.messages collector in
Message_format.print_messages messages
]}
{b Empty list behavior:}
If the checkers list is empty, the tree is traversed but no validation
is performed. This is equivalent to calling [walk (Checker.noop ()) ...]. *)
(** {1 Registry-Based Traversal} *)
val walk_registry :
Checker_registry.t -> Message_collector.t -> Html5rw.Dom.node -> unit
(** [walk_registry registry collector node] traverses a DOM tree with all
checkers from a registry.
@param registry The registry containing checkers to apply
@param collector The message collector for validation messages
@param node The root node to start traversal from
This is equivalent to:
{[
let checkers = Checker_registry.all registry in
walk_all checkers collector node
]}
Use this when you want to run a pre-configured set of checkers
without manually extracting them from the registry.
{b Example:}
{[
(* Set up registry with desired checkers *)
let registry = Checker_registry.default () in
Checker_registry.register registry "custom" my_checker;
(* Validate multiple documents with same checker set *)
List.iter (fun doc ->
let collector = Message_collector.create () in
walk_registry registry collector doc;
report_results collector
) documents
]}
{b Empty registry behavior:}
If the registry is empty, the tree is traversed but no validation
is performed. *)