(*--------------------------------------------------------------------------- Copyright (c) 2025 Anil Madhavapeddy . All rights reserved. SPDX-License-Identifier: MIT ---------------------------------------------------------------------------*) open Brr (* Helper to compare elements using JavaScript strict equality *) let el_equal a b = Jv.strict_equal (El.to_jv a) (El.to_jv b) (* A location-keyed map for finding elements by line/column *) module LocMap = Map.Make(struct type t = int * int let compare = compare end) type t = { root : El.t; html_source : string; loc_to_el : El.t LocMap.t; (* Mapping from (line, column) to browser elements *) } let outer_html el = Jstr.to_string (Jv.get (El.to_jv el) "outerHTML" |> Jv.to_jstr) let inner_html el = Jstr.to_string (Jv.get (El.to_jv el) "innerHTML" |> Jv.to_jstr) let iter_elements f root = let rec walk el = f el; List.iter walk (El.children ~only_els:true el) in walk root let fold_elements f acc root = let rec walk acc el = let acc = f acc el in List.fold_left walk acc (El.children ~only_els:true el) in walk acc root let filter_elements pred root = fold_elements (fun acc el -> if pred el then el :: acc else acc ) [] root |> List.rev (* Build element map by walking browser DOM and parsed DOM in parallel *) let create root = let raw_html = outer_html root in (* Prepend DOCTYPE if not present - outerHTML doesn't include it *) let html = let lower = String.lowercase_ascii raw_html in if String.length lower >= 9 && String.sub lower 0 9 = "" ^ raw_html in (* Parse the HTML to get a tree with locations *) let reader = Bytesrw.Bytes.Reader.of_string html in let parsed = Html5rw.parse ~collect_errors:false reader in (* Walk both trees in parallel to build the mapping. Browser elements are in document order, and so are Html5rw nodes. *) let browser_elements = fold_elements (fun acc el -> el :: acc) [] root |> List.rev in (* Extract elements from Html5rw DOM in document order *) let rec extract_html5rw_elements acc node = if Html5rw.is_element node then let children = node.Html5rw.Dom.children in let acc = node :: acc in List.fold_left extract_html5rw_elements acc children else let children = node.Html5rw.Dom.children in List.fold_left extract_html5rw_elements acc children in let html5rw_elements = extract_html5rw_elements [] (Html5rw.root parsed) |> List.rev in (* Build the location map by matching elements *) let loc_to_el = (* Find the starting point in parsed elements that matches the root tag *) let root_tag = String.lowercase_ascii (Jstr.to_string (El.tag_name root)) in let rec find_start = function | [] -> [] | h_el :: rest -> if String.lowercase_ascii h_el.Html5rw.Dom.name = root_tag then h_el :: rest else find_start rest in let html5rw_elements_aligned = find_start html5rw_elements in let rec match_elements loc_map browser_els html5rw_els = match browser_els, html5rw_els with | [], _ | _, [] -> loc_map | b_el :: b_rest, h_el :: h_rest -> let b_tag = String.lowercase_ascii (Jstr.to_string (El.tag_name b_el)) in let h_tag = String.lowercase_ascii h_el.Html5rw.Dom.name in if b_tag = h_tag then (* Tags match - record the mapping if we have a location *) let loc_map = match h_el.Html5rw.Dom.location with | Some loc -> LocMap.add (loc.line, loc.column) b_el loc_map | None -> loc_map in match_elements loc_map b_rest h_rest else (* Tags don't match - try skipping the parsed element first *) (* This handles cases where parser creates implicit elements *) match_elements loc_map browser_els h_rest in match_elements LocMap.empty browser_elements html5rw_elements_aligned in { root; html_source = html; loc_to_el }, html let find_by_location t ~line ~column = LocMap.find_opt (line, column) t.loc_to_el let find_by_location_and_tag t ~line ~column ~tag = match LocMap.find_opt (line, column) t.loc_to_el with | Some el when String.lowercase_ascii (Jstr.to_string (El.tag_name el)) = String.lowercase_ascii tag -> Some el | _ -> None let find_for_message t msg = (* Try to find element by location first *) match msg.Htmlrw_check.location with | Some loc -> (match msg.Htmlrw_check.element with | Some tag -> find_by_location_and_tag t ~line:loc.line ~column:loc.column ~tag | None -> find_by_location t ~line:loc.line ~column:loc.column) | None -> (* No location - try to find by element name if we have one *) match msg.Htmlrw_check.element with | Some tag -> (* Find first element with this tag *) let matches = filter_elements (fun el -> String.lowercase_ascii (Jstr.to_string (El.tag_name el)) = String.lowercase_ascii tag ) t.root in (match matches with | el :: _ -> Some el | [] -> None) | None -> None let html_source t = t.html_source let root_element t = t.root let selector_path ?root el = let stop_at = match root with | Some r -> Some r | None -> None in let rec build_path el acc = (* Stop if we've reached the root *) let should_stop = match stop_at with | Some r -> el_equal el r | None -> String.lowercase_ascii (Jstr.to_string (El.tag_name el)) = "body" in if should_stop then acc else let tag = String.lowercase_ascii (Jstr.to_string (El.tag_name el)) in let segment = match El.parent el with | None -> tag | Some parent -> let siblings = El.children ~only_els:true parent in let same_tag = List.filter (fun sib -> String.lowercase_ascii (Jstr.to_string (El.tag_name sib)) = tag ) siblings in if List.length same_tag <= 1 then tag else let idx = let rec find_idx i = function | [] -> 1 | sib :: rest -> if el_equal sib el then i else find_idx (i + 1) rest in find_idx 1 same_tag in Printf.sprintf "%s:nth-of-type(%d)" tag idx in let new_acc = segment :: acc in match El.parent el with | None -> new_acc | Some parent -> build_path parent new_acc in String.concat " > " (build_path el []) let short_selector ?root el = (* Try ID first *) match El.at (Jstr.v "id") el with | Some id when not (Jstr.is_empty id) -> "#" ^ Jstr.to_string id | _ -> (* Try parent ID + short path *) let rec find_id_ancestor el depth = if depth > 3 then None else match El.parent el with | None -> None | Some parent -> match El.at (Jstr.v "id") parent with | Some id when not (Jstr.is_empty id) -> Some (parent, id) | _ -> find_id_ancestor parent (depth + 1) in match find_id_ancestor el 0 with | Some (ancestor, id) -> let path = selector_path ~root:ancestor el in "#" ^ Jstr.to_string id ^ " > " ^ path | None -> selector_path ?root el