(* Comprehensive test runner for all html5rw tests Generates a single standalone HTML report combining: - HTML5lib tree-construction tests - HTML5lib tokenizer tests - HTML5lib encoding tests - HTML5lib serializer tests - Nu HTML Validator tests (both lenient and strict modes) - Roundtrip tests *) module Report = Test_report (* ============================================================ *) (* Test Suite Summary Types *) (* ============================================================ *) type suite_summary = { name : string; description : string; [@warning "-69"] passed : int; failed : int; files : Report.file_result list; extra_info : (string * string) list; } (* ============================================================ *) (* HTML5lib Tests Runner *) (* ============================================================ *) module Html5lib_runner = struct (* Delegate to test_all.ml implementation by running the tests inline *) open Bytesrw (* Tree Construction Tests *) module TreeConstruction = struct module Parser = Html5rw.Parser module Dom = Html5rw.Dom type test_case = { input : string; expected_tree : string; expected_errors : string list; script_on : bool; fragment_context : string option; raw_lines : string; } let parse_test_case lines = let raw_lines = String.concat "\n" lines in let rec parse acc = function | [] -> acc | line :: rest when String.length line > 0 && line.[0] = '#' -> let section = String.trim line in let content, remaining = collect_section rest in parse ((section, content) :: acc) remaining | _ :: rest -> parse acc rest and collect_section lines = let rec loop acc = function | [] -> (List.rev acc, []) | line :: rest when String.length line > 0 && line.[0] = '#' -> (List.rev acc, line :: rest) | line :: rest -> loop (line :: acc) rest in loop [] lines in let sections = parse [] lines in let get_section name = match List.assoc_opt name sections with | Some lines -> String.concat "\n" lines | None -> "" in let data = get_section "#data" in let document = get_section "#document" in let errors_text = get_section "#errors" in let errors = String.split_on_char '\n' errors_text |> List.filter (fun s -> String.trim s <> "") in let script_on = List.mem_assoc "#script-on" sections in let fragment = if List.mem_assoc "#document-fragment" sections then Some (get_section "#document-fragment" |> String.trim) else None in { input = data; expected_tree = document; expected_errors = errors; script_on; fragment_context = fragment; raw_lines } let parse_dat_file content = let lines = String.split_on_char '\n' content in let rec split_tests current acc = function | [] -> if current = [] then List.rev acc else List.rev (List.rev current :: acc) | "" :: "#data" :: rest -> let new_acc = if current = [] then acc else (List.rev current :: acc) in split_tests ["#data"] new_acc rest | line :: rest -> split_tests (line :: current) acc rest in let test_groups = split_tests [] [] lines in List.filter_map (fun lines -> if List.exists (fun l -> l = "#data") lines then Some (parse_test_case lines) else None ) test_groups let strip_tree_prefix s = let lines = String.split_on_char '\n' s in let stripped = List.filter_map (fun line -> if String.length line >= 2 && String.sub line 0 2 = "| " then Some (String.sub line 2 (String.length line - 2)) else if String.trim line = "" then None else Some line ) lines in String.concat "\n" stripped let normalize_tree s = let lines = String.split_on_char '\n' s in let non_empty = List.filter (fun l -> String.trim l <> "") lines in String.concat "\n" non_empty let run_test test = try let result = match test.fragment_context with | Some ctx_str -> let (namespace, tag_name) = match String.split_on_char ' ' ctx_str with | [ns; tag] when ns = "svg" -> (Some "svg", tag) | [ns; tag] when ns = "math" -> (Some "mathml", tag) | [tag] -> (None, tag) | _ -> (None, ctx_str) in let context = Html5rw.Parser.make_fragment_context ~tag_name ~namespace () in let reader = Bytes.Reader.of_string test.input in Html5rw.Parser.parse ~collect_errors:true ~fragment_context:context reader | None -> let reader = Bytes.Reader.of_string test.input in Html5rw.Parser.parse ~collect_errors:true reader in let actual_tree = Html5rw.Dom.to_test_format (Html5rw.Parser.root result) in let expected = normalize_tree (strip_tree_prefix test.expected_tree) in let actual = normalize_tree (strip_tree_prefix actual_tree) in let error_count = List.length (Html5rw.Parser.errors result) in let expected_error_count = List.length test.expected_errors in (expected = actual, expected, actual, error_count, expected_error_count) with e -> let expected = normalize_tree (strip_tree_prefix test.expected_tree) in (false, expected, Printf.sprintf "EXCEPTION: %s" (Printexc.to_string e), 0, 0) let run_file path = let ic = open_in path in let content = really_input_string ic (in_channel_length ic) in close_in ic; let tests = parse_dat_file content in let filename = Filename.basename path in let passed = ref 0 in let failed = ref 0 in let results = ref [] in List.iteri (fun i test -> if test.script_on then () else begin let (success, expected, actual, actual_error_count, expected_error_count) = run_test test in let description = let input_preview = String.sub test.input 0 (min 60 (String.length test.input)) in if test.fragment_context <> None then Printf.sprintf "Fragment (%s): %s" (Option.get test.fragment_context) input_preview else input_preview in let result : Report.test_result = { test_num = i + 1; description; input = test.input; expected; actual; success; details = [ ("Fragment Context", Option.value test.fragment_context ~default:"(none)"); ("Expected Errors", string_of_int expected_error_count); ("Actual Errors", string_of_int actual_error_count); ]; raw_test_data = Some test.raw_lines; } in results := result :: !results; if success then incr passed else incr failed end ) tests; let file_result : Report.file_result = { filename = "HTML5lib / " ^ filename; test_type = "Tree Construction"; passed_count = !passed; failed_count = !failed; tests = List.rev !results; } in (file_result, !passed, !failed) let run_dir test_dir = if not (Sys.file_exists test_dir) then ([], 0, 0) else begin let files = Sys.readdir test_dir |> Array.to_list in let dat_files = List.filter (fun f -> Filename.check_suffix f ".dat" && not (String.contains f '/') ) files in let total_passed = ref 0 in let total_failed = ref 0 in let file_results = ref [] in List.iter (fun file -> let path = Filename.concat test_dir file in if Sys.is_directory path then () else begin let (file_result, passed, failed) = run_file path in total_passed := !total_passed + passed; total_failed := !total_failed + failed; file_results := file_result :: !file_results end ) (List.sort String.compare dat_files); (List.rev !file_results, !total_passed, !total_failed) end end let run base_dir = let tree_dir = Filename.concat base_dir "tree-construction" in Printf.printf " Running tree-construction tests...\n%!"; let (tree_files, tree_passed, tree_failed) = TreeConstruction.run_dir tree_dir in Printf.printf " Tree construction: %d passed, %d failed\n%!" tree_passed tree_failed; (* For now, just return tree construction results *) (* Full implementation would include tokenizer, encoding, serializer *) { name = "HTML5lib Tests"; description = "Official html5lib test suite for HTML5 parsing conformance"; passed = tree_passed; failed = tree_failed; files = tree_files; extra_info = [ ("Tree Construction", Printf.sprintf "%d/%d" tree_passed (tree_passed + tree_failed)); ]; } end (* ============================================================ *) (* Validator Tests Runner *) (* ============================================================ *) module Validator_runner = struct type expected_outcome = Valid | Invalid | HasWarning | Unknown type test_file = { path : string; relative_path : string; category : string; expected : expected_outcome; } type test_result = { file : test_file; passed : bool; actual_errors : string list; actual_warnings : string list; details : string; match_quality : Expected_message.match_quality option; [@warning "-69"] } let parse_outcome filename = if String.length filename > 13 && String.sub filename (String.length filename - 13) 13 = "-isvalid.html" then Valid else if String.length filename > 13 && String.sub filename (String.length filename - 13) 13 = "-novalid.html" then Invalid else if String.length filename > 13 && String.sub filename (String.length filename - 13) 13 = "-haswarn.html" then HasWarning else if String.length filename > 14 && String.sub filename (String.length filename - 14) 14 = "-isvalid.xhtml" then Valid else if String.length filename > 14 && String.sub filename (String.length filename - 14) 14 = "-novalid.xhtml" then Invalid else if String.length filename > 14 && String.sub filename (String.length filename - 14) 14 = "-haswarn.xhtml" then HasWarning else Unknown let rec discover_tests_in_dir base_dir current_dir = let full_path = Filename.concat base_dir current_dir in if not (Sys.file_exists full_path) then [] else if Sys.is_directory full_path then begin let entries = Sys.readdir full_path |> Array.to_list in List.concat_map (fun entry -> let sub_path = if current_dir = "" then entry else Filename.concat current_dir entry in discover_tests_in_dir base_dir sub_path ) entries end else if Filename.check_suffix current_dir ".html" || Filename.check_suffix current_dir ".xhtml" then begin let outcome = parse_outcome (Filename.basename current_dir) in if outcome = Unknown then [] else let category = match String.split_on_char '/' current_dir with cat :: _ -> cat | [] -> "unknown" in [{ path = full_path; relative_path = current_dir; category; expected = outcome }] end else [] let run_test ~strictness messages test = try let ic = open_in test.path in let content = really_input_string ic (in_channel_length ic) in close_in ic; let reader = Bytesrw.Bytes.Reader.of_string content in let result = Htmlrw_check.check ~collect_parse_errors:true ~system_id:test.relative_path reader in let error_msgs = Htmlrw_check.errors result in let warning_msgs = Htmlrw_check.warnings result in let info_msgs = Htmlrw_check.infos result in let errors = List.map (fun m -> m.Htmlrw_check.text) error_msgs in let warnings = List.map (fun m -> m.Htmlrw_check.text) warning_msgs in let infos = List.map (fun m -> m.Htmlrw_check.text) info_msgs in let expected_msg = Validator_messages.get messages test.relative_path in let (passed, details, match_quality) = match test.expected with | Valid -> let no_errors = errors = [] && warnings = [] in let details = if no_errors then "OK" else Printf.sprintf "Expected valid but got %d errors, %d warnings" (List.length errors) (List.length warnings) in (no_errors, details, None) | Invalid -> if errors = [] then (false, "Expected error but got none", None) else begin match expected_msg with | None -> (true, Printf.sprintf "Got %d error(s), no expected message" (List.length errors), None) | Some exp -> let expected = Expected_message.parse exp in let qualities = List.map (fun msg -> Expected_message.matches ~strictness ~expected ~actual:msg) error_msgs in let best = List.fold_left (fun b q -> if q < b then q else b) Expected_message.No_match qualities in let acceptable = Expected_message.is_acceptable ~strictness best in let msg = if acceptable then "Message matched" else "Message mismatch" in (acceptable, msg, Some best) end | HasWarning -> (* For haswarn, check warnings AND infos (like test_validator.ml) *) let all_msgs = warning_msgs @ info_msgs in let all_messages = warnings @ infos in if all_messages = [] && errors = [] then (false, "Expected warning but got none", None) else begin match expected_msg with | None -> if all_messages <> [] then (true, Printf.sprintf "Got %d warning/info message(s)" (List.length all_messages), None) else (true, Printf.sprintf "Got %d error(s) instead of warning" (List.length errors), None) | Some exp -> let expected = Expected_message.parse exp in let qualities = List.map (fun msg -> Expected_message.matches ~strictness ~expected ~actual:msg) all_msgs in let best = List.fold_left (fun b q -> if q < b then q else b) Expected_message.No_match qualities in let acceptable = Expected_message.is_acceptable ~strictness best in if acceptable then (true, "Warning/info matched", Some best) else begin (* Also try matching against errors *) let err_qualities = List.map (fun msg -> Expected_message.matches ~strictness ~expected ~actual:msg) error_msgs in let err_best = List.fold_left (fun b q -> if q < b then q else b) Expected_message.No_match err_qualities in let err_acceptable = Expected_message.is_acceptable ~strictness err_best in if err_acceptable then (true, "Error matched (severity differs)", Some err_best) else let final_best = if best < err_best then best else err_best in (false, "Warning mismatch", Some final_best) end end | Unknown -> (false, "Unknown test type", None) in { file = test; passed; actual_errors = errors; actual_warnings = warnings @ infos; details; match_quality } with e -> { file = test; passed = false; actual_errors = []; actual_warnings = []; details = Printf.sprintf "Exception: %s" (Printexc.to_string e); match_quality = None } let run_mode ~mode_name ~strictness messages tests = Printf.printf " Running %s mode...\n%!" mode_name; let total = List.length tests in let results = List.mapi (fun i test -> if (i + 1) mod 500 = 0 then Printf.printf " [%d/%d]\n%!" (i + 1) total; run_test ~strictness messages test ) tests in let passed = List.filter (fun r -> r.passed) results |> List.length in Printf.printf " %s: %d/%d passed\n%!" mode_name passed total; (results, passed, total - passed) let results_to_file_results mode_name results = (* Group by category *) let by_category = Hashtbl.create 32 in List.iter (fun r -> let cat = r.file.category in let existing = try Hashtbl.find by_category cat with Not_found -> [] in Hashtbl.replace by_category cat (r :: existing) ) results; Hashtbl.fold (fun category tests acc -> let tests = List.rev tests in let passed_count = List.filter (fun r -> r.passed) tests |> List.length in let failed_count = List.length tests - passed_count in let test_results = List.mapi (fun i r -> let outcome_str = match r.file.expected with | Valid -> "isvalid" | Invalid -> "novalid" | HasWarning -> "haswarn" | Unknown -> "unknown" in Report.{ test_num = i + 1; description = Printf.sprintf "[%s] %s" outcome_str (Filename.basename r.file.relative_path); input = r.file.relative_path; expected = (match r.file.expected with | Valid -> "(no errors)" | Invalid -> "(error expected)" | HasWarning -> "(warning expected)" | Unknown -> "?"); actual = String.concat "; " (r.actual_errors @ r.actual_warnings); success = r.passed; details = [("Result", r.details)]; raw_test_data = None; } ) tests in Report.{ filename = Printf.sprintf "Validator / %s [%s]" category mode_name; test_type = "Validator"; passed_count; failed_count; tests = test_results; } :: acc ) by_category [] let run tests_dir = Printf.printf " Loading validator messages...\n%!"; let messages_path = Filename.concat tests_dir "messages.json" in let messages = Validator_messages.load messages_path in Printf.printf " Discovering test files...\n%!"; let tests = discover_tests_in_dir tests_dir "" in Printf.printf " Found %d test files\n%!" (List.length tests); let (lenient_results, lenient_passed, _lenient_failed) = run_mode ~mode_name:"LENIENT" ~strictness:Expected_message.lenient messages tests in let (strict_results, strict_passed, strict_failed) = run_mode ~mode_name:"STRICT" ~strictness:Expected_message.exact_message messages tests in let lenient_files = results_to_file_results "Lenient" lenient_results in let strict_files = results_to_file_results "Strict" strict_results in let total = List.length tests in { name = "Nu HTML Validator Tests"; description = "W3C Nu HTML Validator conformance tests (both lenient and strict modes)"; passed = strict_passed; (* Use strict as the primary metric *) failed = strict_failed; files = lenient_files @ strict_files; extra_info = [ ("Lenient Mode", Printf.sprintf "%d/%d (%.1f%%)" lenient_passed total (100.0 *. float_of_int lenient_passed /. float_of_int total)); ("Strict Mode", Printf.sprintf "%d/%d (%.1f%%)" strict_passed total (100.0 *. float_of_int strict_passed /. float_of_int total)); ("Total Tests", string_of_int total); ]; } end (* ============================================================ *) (* Main Entry Point *) (* ============================================================ *) let get_timestamp () = let now = Unix.gettimeofday () in let tm = Unix.localtime now in Printf.sprintf "%04d-%02d-%02d %02d:%02d:%02d" (tm.Unix.tm_year + 1900) (tm.Unix.tm_mon + 1) tm.Unix.tm_mday tm.Unix.tm_hour tm.Unix.tm_min tm.Unix.tm_sec let () = let html5lib_dir = ref "html5lib-tests" in let validator_dir = ref "validator/tests" in let output_file = ref "comprehensive_test_report.html" in (* Parse args *) let args = Array.to_list Sys.argv |> List.tl in (match args with | [h; v; o] -> html5lib_dir := h; validator_dir := v; output_file := o | [h; v] -> html5lib_dir := h; validator_dir := v | [h] -> html5lib_dir := h | _ -> ()); Printf.printf "=== Comprehensive HTML5rw Test Suite ===\n\n%!"; let all_suites = ref [] in let total_passed = ref 0 in let total_failed = ref 0 in (* Run HTML5lib tests *) Printf.printf "Running HTML5lib tests from %s...\n%!" !html5lib_dir; if Sys.file_exists !html5lib_dir then begin let suite = Html5lib_runner.run !html5lib_dir in all_suites := suite :: !all_suites; total_passed := !total_passed + suite.passed; total_failed := !total_failed + suite.failed; Printf.printf " Subtotal: %d passed, %d failed\n\n%!" suite.passed suite.failed end else Printf.printf " (directory not found)\n\n%!"; (* Run Validator tests *) Printf.printf "Running Validator tests from %s...\n%!" !validator_dir; if Sys.file_exists !validator_dir then begin let suite = Validator_runner.run !validator_dir in all_suites := suite :: !all_suites; total_passed := !total_passed + suite.passed; total_failed := !total_failed + suite.failed; Printf.printf " Subtotal: %d passed, %d failed\n\n%!" suite.passed suite.failed end else Printf.printf " (directory not found)\n\n%!"; Printf.printf "=== Overall Summary ===\n"; Printf.printf "Total: %d passed, %d failed\n\n%!" !total_passed !total_failed; (* Combine all file results *) let all_files = List.concat_map (fun s -> s.files) (List.rev !all_suites) in (* Build description with all suite info as HTML *) let suites_info = List.rev !all_suites |> List.map (fun s -> let extras = String.concat ", " (List.map (fun (k, v) -> Printf.sprintf "%s: %s" k v) s.extra_info) in Printf.sprintf "
Test Suites:
\ This report combines results from multiple test suites to provide complete coverage analysis." suites_info in let report : Report.report = { title = "html5rw Comprehensive Test Report"; test_type = "comprehensive"; description; files = all_files; total_passed = !total_passed; total_failed = !total_failed; match_quality = None; test_type_breakdown = None; strictness_mode = Some "Comprehensive (all modes)"; run_timestamp = Some (get_timestamp ()); } in Report.generate_report report !output_file; exit (if !total_failed > 0 then 1 else 0)