(* Test runner for html5lib-tests tree construction tests *)
open Bytesrw
module Parser = Html5rw.Parser
module Dom = Html5rw.Dom
module Report = Test_report
type test_case = {
input : string;
expected_tree : string;
expected_errors : string list;
script_on : bool;
fragment_context : string option;
raw_lines : string; (* Original test data from .dat file *)
}
let _is_blank s = String.trim s = ""
(* Parse a single test case from lines *)
let parse_test_case lines =
let raw_lines = String.concat "\n" lines in
let rec parse acc = function
| [] -> acc
| line :: rest when String.length line > 0 && line.[0] = '#' ->
let section = String.trim line in
let content, remaining = collect_section rest in
parse ((section, content) :: acc) remaining
| _ :: rest -> parse acc rest
and collect_section lines =
let rec loop acc = function
| [] -> (List.rev acc, [])
| line :: rest when String.length line > 0 && line.[0] = '#' ->
(List.rev acc, line :: rest)
| line :: rest -> loop (line :: acc) rest
in
loop [] lines
in
let sections = parse [] lines in
let get_section name =
match List.assoc_opt name sections with
| Some lines -> String.concat "\n" lines
| None -> ""
in
let data = get_section "#data" in
let document = get_section "#document" in
let errors_text = get_section "#errors" in
let errors =
String.split_on_char '\n' errors_text
|> List.filter (fun s -> String.trim s <> "")
in
let script_on = List.mem_assoc "#script-on" sections in
let fragment =
if List.mem_assoc "#document-fragment" sections then
Some (get_section "#document-fragment" |> String.trim)
else None
in
{
input = data;
expected_tree = document;
expected_errors = errors;
script_on;
fragment_context = fragment;
raw_lines;
}
(* Parse a .dat file into test cases *)
let parse_dat_file content =
let lines = String.split_on_char '\n' content in
(* Split on empty lines followed by #data *)
let rec split_tests current acc = function
| [] ->
if current = [] then List.rev acc
else List.rev (List.rev current :: acc)
| "" :: "#data" :: rest ->
(* End of current test, start new one *)
let new_acc = if current = [] then acc else (List.rev current :: acc) in
split_tests ["#data"] new_acc rest
| line :: rest ->
split_tests (line :: current) acc rest
in
let test_groups = split_tests [] [] lines in
List.filter_map (fun lines ->
if List.exists (fun l -> l = "#data") lines then
Some (parse_test_case lines)
else None
) test_groups
(* Strip "| " prefix from each line *)
let strip_tree_prefix s =
let lines = String.split_on_char '\n' s in
let stripped = List.filter_map (fun line ->
if String.length line >= 2 && String.sub line 0 2 = "| " then
Some (String.sub line 2 (String.length line - 2))
else if String.trim line = "" then None
else Some line
) lines in
String.concat "\n" stripped
(* Normalize tree output for comparison *)
let normalize_tree s =
let lines = String.split_on_char '\n' s in
let non_empty = List.filter (fun l -> String.trim l <> "") lines in
String.concat "\n" non_empty
let run_test test =
try
let result =
match test.fragment_context with
| Some ctx_str ->
(* Parse "namespace element" or just "element" *)
let (namespace, tag_name) =
match String.split_on_char ' ' ctx_str with
| [ns; tag] when ns = "svg" -> (Some "svg", tag)
| [ns; tag] when ns = "math" -> (Some "mathml", tag)
| [tag] -> (None, tag)
| _ -> (None, ctx_str)
in
let context = Html5rw.Parser.make_fragment_context ~tag_name ~namespace () in
let reader = Bytes.Reader.of_string test.input in
Html5rw.Parser.parse ~collect_errors:true ~fragment_context:context reader
| None ->
let reader = Bytes.Reader.of_string test.input in
Html5rw.Parser.parse ~collect_errors:true reader
in
let actual_tree = Html5rw.Dom.to_test_format (Html5rw.Parser.root result) in
let expected = normalize_tree (strip_tree_prefix test.expected_tree) in
let actual = normalize_tree (strip_tree_prefix actual_tree) in
let error_count = List.length (Html5rw.Parser.errors result) in
let expected_error_count = List.length test.expected_errors in
(expected = actual, expected, actual, error_count, expected_error_count)
with e ->
let expected = normalize_tree (strip_tree_prefix test.expected_tree) in
(false, expected, Printf.sprintf "EXCEPTION: %s" (Printexc.to_string e), 0, 0)
let run_file path =
let ic = open_in path in
let content = really_input_string ic (in_channel_length ic) in
close_in ic;
let tests = parse_dat_file content in
let filename = Filename.basename path in
let passed = ref 0 in
let failed = ref 0 in
let results = ref [] in
List.iteri (fun i test ->
(* Skip script-on tests since we don't support scripting *)
if test.script_on then
() (* Skip this test *)
else begin
let (success, expected, actual, actual_error_count, expected_error_count) = run_test test in
let description =
let input_preview = String.sub test.input 0 (min 60 (String.length test.input)) in
if test.fragment_context <> None then
Printf.sprintf "Fragment (%s): %s" (Option.get test.fragment_context) input_preview
else
input_preview
in
let details = [
("Fragment Context", Option.value test.fragment_context ~default:"(none)");
("Expected Errors", string_of_int expected_error_count);
("Actual Errors", string_of_int actual_error_count);
] in
let result : Report.test_result = {
test_num = i + 1;
description;
input = test.input;
expected;
actual;
success;
details;
raw_test_data = Some test.raw_lines;
} in
results := result :: !results;
if success then incr passed else incr failed
end
) tests;
let file_result : Report.file_result = {
filename;
test_type = "Tree Construction";
passed_count = !passed;
failed_count = !failed;
tests = List.rev !results;
} in
(file_result, !passed, !failed)
let () =
let test_dir = Sys.argv.(1) in
let files = Sys.readdir test_dir |> Array.to_list in
let dat_files = List.filter (fun f ->
Filename.check_suffix f ".dat" &&
not (String.contains f '/') (* Skip subdirectories *)
) files in
let total_passed = ref 0 in
let total_failed = ref 0 in
let file_results = ref [] in
List.iter (fun file ->
let path = Filename.concat test_dir file in
if Sys.is_directory path then () else begin
let (file_result, passed, failed) = run_file path in
total_passed := !total_passed + passed;
total_failed := !total_failed + failed;
file_results := file_result :: !file_results;
Printf.printf "%s: %d passed, %d failed\n" file passed failed
end
) (List.sort String.compare dat_files);
Printf.printf "\n=== Summary ===\n";
Printf.printf "Total: %d passed, %d failed\n" !total_passed !total_failed;
(* Generate HTML report *)
let report : Report.report = {
title = "HTML5 Tree Construction Tests";
test_type = "tree-construction";
description = "These tests validate the HTML5 tree construction algorithm as specified in the WHATWG HTML Standard. \
Each test provides HTML input and the expected DOM tree structure. The parser processes the HTML and \
builds a document tree, which is then serialized and compared against the expected output. \
Tests cover various edge cases including malformed HTML, implicit element creation, foster parenting, \
adoption agency algorithm, and foreign content (SVG/MathML). Fragment parsing tests verify parsing \
in the context of specific elements.";
files = List.rev !file_results;
total_passed = !total_passed;
total_failed = !total_failed;
match_quality = None;
test_type_breakdown = None;
strictness_mode = None;
run_timestamp = None;
} in
Report.generate_report report "test_html5lib_report.html";
exit (if !total_failed > 0 then 1 else 0)