(* Test runner for html5lib-tests tree construction tests *) open Bytesrw module Parser = Html5rw.Parser module Dom = Html5rw.Dom module Report = Test_report type test_case = { input : string; expected_tree : string; expected_errors : string list; script_on : bool; fragment_context : string option; raw_lines : string; (* Original test data from .dat file *) } let _is_blank s = String.trim s = "" (* Parse a single test case from lines *) let parse_test_case lines = let raw_lines = String.concat "\n" lines in let rec parse acc = function | [] -> acc | line :: rest when String.length line > 0 && line.[0] = '#' -> let section = String.trim line in let content, remaining = collect_section rest in parse ((section, content) :: acc) remaining | _ :: rest -> parse acc rest and collect_section lines = let rec loop acc = function | [] -> (List.rev acc, []) | line :: rest when String.length line > 0 && line.[0] = '#' -> (List.rev acc, line :: rest) | line :: rest -> loop (line :: acc) rest in loop [] lines in let sections = parse [] lines in let get_section name = match List.assoc_opt name sections with | Some lines -> String.concat "\n" lines | None -> "" in let data = get_section "#data" in let document = get_section "#document" in let errors_text = get_section "#errors" in let errors = String.split_on_char '\n' errors_text |> List.filter (fun s -> String.trim s <> "") in let script_on = List.mem_assoc "#script-on" sections in let fragment = if List.mem_assoc "#document-fragment" sections then Some (get_section "#document-fragment" |> String.trim) else None in { input = data; expected_tree = document; expected_errors = errors; script_on; fragment_context = fragment; raw_lines; } (* Parse a .dat file into test cases *) let parse_dat_file content = let lines = String.split_on_char '\n' content in (* Split on empty lines followed by #data *) let rec split_tests current acc = function | [] -> if current = [] then List.rev acc else List.rev (List.rev current :: acc) | "" :: "#data" :: rest -> (* End of current test, start new one *) let new_acc = if current = [] then acc else (List.rev current :: acc) in split_tests ["#data"] new_acc rest | line :: rest -> split_tests (line :: current) acc rest in let test_groups = split_tests [] [] lines in List.filter_map (fun lines -> if List.exists (fun l -> l = "#data") lines then Some (parse_test_case lines) else None ) test_groups (* Strip "| " prefix from each line *) let strip_tree_prefix s = let lines = String.split_on_char '\n' s in let stripped = List.filter_map (fun line -> if String.length line >= 2 && String.sub line 0 2 = "| " then Some (String.sub line 2 (String.length line - 2)) else if String.trim line = "" then None else Some line ) lines in String.concat "\n" stripped (* Normalize tree output for comparison *) let normalize_tree s = let lines = String.split_on_char '\n' s in let non_empty = List.filter (fun l -> String.trim l <> "") lines in String.concat "\n" non_empty let run_test test = try let result = match test.fragment_context with | Some ctx_str -> (* Parse "namespace element" or just "element" *) let (namespace, tag_name) = match String.split_on_char ' ' ctx_str with | [ns; tag] when ns = "svg" -> (Some "svg", tag) | [ns; tag] when ns = "math" -> (Some "mathml", tag) | [tag] -> (None, tag) | _ -> (None, ctx_str) in let context = Html5rw.Parser.make_fragment_context ~tag_name ~namespace () in let reader = Bytes.Reader.of_string test.input in Html5rw.Parser.parse ~collect_errors:true ~fragment_context:context reader | None -> let reader = Bytes.Reader.of_string test.input in Html5rw.Parser.parse ~collect_errors:true reader in let actual_tree = Html5rw.Dom.to_test_format (Html5rw.Parser.root result) in let expected = normalize_tree (strip_tree_prefix test.expected_tree) in let actual = normalize_tree (strip_tree_prefix actual_tree) in let error_count = List.length (Html5rw.Parser.errors result) in let expected_error_count = List.length test.expected_errors in (expected = actual, expected, actual, error_count, expected_error_count) with e -> let expected = normalize_tree (strip_tree_prefix test.expected_tree) in (false, expected, Printf.sprintf "EXCEPTION: %s" (Printexc.to_string e), 0, 0) let run_file path = let ic = open_in path in let content = really_input_string ic (in_channel_length ic) in close_in ic; let tests = parse_dat_file content in let filename = Filename.basename path in let passed = ref 0 in let failed = ref 0 in let results = ref [] in List.iteri (fun i test -> (* Skip script-on tests since we don't support scripting *) if test.script_on then () (* Skip this test *) else begin let (success, expected, actual, actual_error_count, expected_error_count) = run_test test in let description = let input_preview = String.sub test.input 0 (min 60 (String.length test.input)) in if test.fragment_context <> None then Printf.sprintf "Fragment (%s): %s" (Option.get test.fragment_context) input_preview else input_preview in let details = [ ("Fragment Context", Option.value test.fragment_context ~default:"(none)"); ("Expected Errors", string_of_int expected_error_count); ("Actual Errors", string_of_int actual_error_count); ] in let result : Report.test_result = { test_num = i + 1; description; input = test.input; expected; actual; success; details; raw_test_data = Some test.raw_lines; } in results := result :: !results; if success then incr passed else incr failed end ) tests; let file_result : Report.file_result = { filename; test_type = "Tree Construction"; passed_count = !passed; failed_count = !failed; tests = List.rev !results; } in (file_result, !passed, !failed) let () = let test_dir = Sys.argv.(1) in let files = Sys.readdir test_dir |> Array.to_list in let dat_files = List.filter (fun f -> Filename.check_suffix f ".dat" && not (String.contains f '/') (* Skip subdirectories *) ) files in let total_passed = ref 0 in let total_failed = ref 0 in let file_results = ref [] in List.iter (fun file -> let path = Filename.concat test_dir file in if Sys.is_directory path then () else begin let (file_result, passed, failed) = run_file path in total_passed := !total_passed + passed; total_failed := !total_failed + failed; file_results := file_result :: !file_results; Printf.printf "%s: %d passed, %d failed\n" file passed failed end ) (List.sort String.compare dat_files); Printf.printf "\n=== Summary ===\n"; Printf.printf "Total: %d passed, %d failed\n" !total_passed !total_failed; (* Generate HTML report *) let report : Report.report = { title = "HTML5 Tree Construction Tests"; test_type = "tree-construction"; description = "These tests validate the HTML5 tree construction algorithm as specified in the WHATWG HTML Standard. \ Each test provides HTML input and the expected DOM tree structure. The parser processes the HTML and \ builds a document tree, which is then serialized and compared against the expected output. \ Tests cover various edge cases including malformed HTML, implicit element creation, foster parenting, \ adoption agency algorithm, and foreign content (SVG/MathML). Fragment parsing tests verify parsing \ in the context of specific elements."; files = List.rev !file_results; total_passed = !total_passed; total_failed = !total_failed; match_quality = None; test_type_breakdown = None; strictness_mode = None; run_timestamp = None; } in Report.generate_report report "test_html5lib_report.html"; exit (if !total_failed > 0 then 1 else 0)