OCaml HTML5 parser/serialiser based on Python's JustHTML
1(*---------------------------------------------------------------------------
2 Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
3 SPDX-License-Identifier: MIT
4 ---------------------------------------------------------------------------*)
5
6(** Browser DOM utilities for mapping validation results to live elements.
7
8 This module bridges the gap between HTML string validation (which produces
9 line/column locations) and live DOM manipulation (which needs element
10 references). It builds mappings between source positions and DOM elements
11 by walking both the serialized HTML and the DOM tree in parallel. *)
12
13
14(** {1 Element Mapping}
15
16 When we validate [element.outerHTML], we get messages with line/column
17 positions. To annotate the original DOM, we need to map those positions
18 back to the live elements. *)
19
20(** An element map associates source locations with DOM elements. *)
21type t
22
23(** Build an element map by walking a DOM element and its serialization.
24
25 This function:
26 1. Serializes the element to HTML via [outerHTML]
27 2. Parses that HTML with Html5rw to get the parse tree with locations
28 3. Walks both trees in parallel to build a bidirectional mapping
29
30 @param root The DOM element to map.
31 @return The element map and the HTML source string. *)
32val create : Brr.El.t -> t * string
33
34(** Find the DOM element corresponding to a source location.
35
36 @param line 1-indexed line number
37 @param column 1-indexed column number
38 @return The element at or containing that position, or [None]. *)
39val find_by_location : t -> line:int -> column:int -> Brr.El.t option
40
41(** Find the DOM element corresponding to an element name at a location.
42
43 More precise than {!find_by_location} when the validator provides
44 the element name along with the location.
45
46 @param line 1-indexed line number
47 @param column 1-indexed column number
48 @param tag Element tag name (lowercase)
49 @return The matching element, or [None]. *)
50val find_by_location_and_tag :
51 t -> line:int -> column:int -> tag:string -> Brr.El.t option
52
53(** Find the DOM element for a validation message.
54
55 Uses the message's location and element fields to find the best match.
56 This is the primary function used by the annotation system. *)
57val find_for_message : t -> Htmlrw_check.message -> Brr.El.t option
58
59(** The HTML source string that was used to build this map. *)
60val html_source : t -> string
61
62(** The root element this map was built from. *)
63val root_element : t -> Brr.El.t
64
65
66(** {1 CSS Selector Generation} *)
67
68(** Build a CSS selector path that uniquely identifies an element.
69
70 The selector uses child combinators and [:nth-child] to be specific:
71 ["body > div.main:nth-child(2) > p > img:nth-child(1)"]
72
73 @param root Optional root element; selector will be relative to this.
74 Defaults to [document.body].
75 @param el The element to build a selector for.
76 @return A CSS selector string. *)
77val selector_path : ?root:Brr.El.t -> Brr.El.t -> string
78
79(** Build a shorter selector using IDs and classes when available.
80
81 Tries to find the shortest unique selector:
82 1. If element has an ID: ["#myId"]
83 2. If parent has ID: ["#parentId > .myClass"]
84 3. Falls back to full path from {!selector_path}
85
86 @param root Optional root element.
87 @param el The element to build a selector for. *)
88val short_selector : ?root:Brr.El.t -> Brr.El.t -> string
89
90
91(** {1 DOM Iteration} *)
92
93(** Iterate over all elements in document order (depth-first pre-order). *)
94val iter_elements : (Brr.El.t -> unit) -> Brr.El.t -> unit
95
96(** Fold over all elements in document order. *)
97val fold_elements : ('a -> Brr.El.t -> 'a) -> 'a -> Brr.El.t -> 'a
98
99(** Find all elements matching a predicate. *)
100val filter_elements : (Brr.El.t -> bool) -> Brr.El.t -> Brr.El.t list
101
102
103(** {1 Serialization} *)
104
105(** Get the outer HTML of an element.
106
107 This is a wrapper around the browser's [outerHTML] property. *)
108val outer_html : Brr.El.t -> string
109
110(** Get the inner HTML of an element. *)
111val inner_html : Brr.El.t -> string