(** DOM tree traversal for HTML5 conformance checking. This module provides functions to traverse DOM trees and apply checkers to validate HTML5 documents. It implements a depth-first, in-order traversal that visits every node in the tree and notifies checkers of traversal events. {2 Traversal Model} The walker follows a SAX-like event model, emitting events as it encounters different node types during traversal: {v Document └── html (start_element "html") ├── head (start_element "head") │ └── title (start_element "title") │ ├── #text "Page Title" (characters) │ └── (end_element "title") └── body (start_element "body") └── p (start_element "p") ├── #text "Hello " (characters) ├── b (start_element "b") │ ├── #text "world" (characters) │ └── (end_element "b") ├── #text "!" (characters) └── (end_element "p") end_document v} {2 Event Sequence} For each element node: 1. {!Checker.S.start_element} is called when entering the element 2. Children are recursively traversed 3. {!Checker.S.end_element} is called when exiting the element For text and comment nodes: - {!Checker.S.characters} is called with the text content After the entire tree is traversed: - {!Checker.S.end_document} is called on all checkers {2 Checker Coordination} When multiple checkers are used: - All checkers receive the same event sequence - Events are delivered to checkers in the order they appear in the list - Each checker maintains independent state - Messages from all checkers are collected together This allows composing orthogonal validation rules without interference. {2 Usage Examples} {b Single checker:} {[ let checker = Checker.noop () in let collector = Message_collector.create () in walk checker collector dom; let messages = Message_collector.messages collector in List.iter Message.pp messages ]} {b Multiple checkers:} {[ let checkers = [checker1; checker2; checker3] in let collector = Message_collector.create () in walk_all checkers collector dom; (* Analyze messages from all checkers *) ]} {b Registry of checkers:} {[ let registry = Checker_registry.default () in let collector = Message_collector.create () in walk_registry registry collector dom; (* All registered checkers have validated the DOM *) ]} *) (** {1 Single Checker Traversal} *) val walk : Checker.t -> Message_collector.t -> Html5rw.Dom.node -> unit (** [walk checker collector node] traverses a DOM tree with a single checker. @param checker The checker to apply during traversal @param collector The message collector for validation messages @param node The root node to start traversal from The traversal is depth-first and in-order: for each element, the checker receives a {!Checker.S.start_element} event, then children are recursively traversed, then an {!Checker.S.end_element} event is emitted. After the entire tree is traversed, {!Checker.S.end_document} is called to allow the checker to emit any final validation messages. {b Example:} {[ (* Validate a parsed HTML document *) let checker = Checker.noop () in let collector = Message_collector.create () in walk checker collector document_node; (* Check for errors *) let messages = Message_collector.messages collector in let errors = List.filter (fun msg -> msg.Message.severity = Message.Error) messages in if errors <> [] then Printf.printf "Found %d errors\n" (List.length errors) ]} {b Notes:} - Only element nodes trigger start/end events - Text and comment nodes trigger character events - Document and doctype nodes are silently skipped - The traversal follows document order (parent before children, earlier siblings before later ones) *) (** {1 Multiple Checker Traversal} *) val walk_all : Checker.t list -> Message_collector.t -> Html5rw.Dom.node -> unit (** [walk_all checkers collector node] traverses a DOM tree with multiple checkers. @param checkers List of checkers to apply during traversal @param collector The message collector for validation messages @param node The root node to start traversal from This performs a single tree traversal, delivering each event to all checkers in sequence. This is more efficient than calling {!walk} multiple times. All checkers receive events in the order they appear in the list. Each checker maintains independent state, so validation rules can be composed without interference. {b Example:} {[ (* Run multiple validation passes in one traversal *) let structure_checker = (module StructureChecker : Checker.S) in let attribute_checker = (module AttributeChecker : Checker.S) in let obsolete_checker = (module ObsoleteChecker : Checker.S) in let checkers = [structure_checker; attribute_checker; obsolete_checker] in let collector = Message_collector.create () in walk_all checkers collector document_node; (* All three checkers have validated the document *) let messages = Message_collector.messages collector in Message_format.print_messages messages ]} {b Empty list behavior:} If the checkers list is empty, the tree is traversed but no validation is performed. This is equivalent to calling [walk (Checker.noop ()) ...]. *) (** {1 Registry-Based Traversal} *) val walk_registry : Checker_registry.t -> Message_collector.t -> Html5rw.Dom.node -> unit (** [walk_registry registry collector node] traverses a DOM tree with all checkers from a registry. @param registry The registry containing checkers to apply @param collector The message collector for validation messages @param node The root node to start traversal from This is equivalent to: {[ let checkers = Checker_registry.all registry in walk_all checkers collector node ]} Use this when you want to run a pre-configured set of checkers without manually extracting them from the registry. {b Example:} {[ (* Set up registry with desired checkers *) let registry = Checker_registry.default () in Checker_registry.register registry "custom" my_checker; (* Validate multiple documents with same checker set *) List.iter (fun doc -> let collector = Message_collector.create () in walk_registry registry collector doc; report_results collector ) documents ]} {b Empty registry behavior:} If the registry is empty, the tree is traversed but no validation is performed. *)