(** Test runner for Nu HTML Validator test suite This validates HTML5 documents against the upstream Nu HTML Validator test suite. Tests are classified by filename suffix: - `-isvalid.html` : Should produce no errors or warnings - `-novalid.html` : Should produce at least one error - `-haswarn.html` : Should produce at least one warning *) module Report = Test_report type expected_outcome = | Valid (** -isvalid.html: expect no errors *) | Invalid (** -novalid.html: expect error matching messages.json *) | HasWarning (** -haswarn.html: expect warning matching messages.json *) | Unknown (** Unknown suffix *) type test_file = { path : string; (** Full filesystem path *) relative_path : string; (** Path relative to tests/, used as key in messages.json *) category : string; (** html, html-aria, etc. *) expected : expected_outcome; } type test_result = { file : test_file; passed : bool; actual_errors : string list; actual_warnings : string list; actual_infos : string list; expected_message : string option; details : string; } (** Parse expected outcome from filename suffix *) let parse_outcome filename = (* Check for .html suffix *) if String.length filename > 13 && String.sub filename (String.length filename - 13) 13 = "-isvalid.html" then Valid else if String.length filename > 13 && String.sub filename (String.length filename - 13) 13 = "-novalid.html" then Invalid else if String.length filename > 13 && String.sub filename (String.length filename - 13) 13 = "-haswarn.html" then HasWarning (* Check for .xhtml suffix *) else if String.length filename > 14 && String.sub filename (String.length filename - 14) 14 = "-isvalid.xhtml" then Valid else if String.length filename > 14 && String.sub filename (String.length filename - 14) 14 = "-novalid.xhtml" then Invalid else if String.length filename > 14 && String.sub filename (String.length filename - 14) 14 = "-haswarn.xhtml" then HasWarning else Unknown (** Normalize Unicode curly quotes to ASCII *) let normalize_quotes s = let buf = Buffer.create (String.length s) in let i = ref 0 in while !i < String.length s do let c = s.[!i] in (* Check for UTF-8 sequences for curly quotes *) if !i + 2 < String.length s && c = '\xe2' then begin let c1 = s.[!i + 1] in let c2 = s.[!i + 2] in if c1 = '\x80' && (c2 = '\x9c' || c2 = '\x9d') then begin (* U+201C or U+201D -> ASCII quote *) Buffer.add_char buf '"'; i := !i + 3 end else begin Buffer.add_char buf c; incr i end end else begin Buffer.add_char buf c; incr i end done; Buffer.contents buf (** Check if actual message matches expected (flexible matching) *) let message_matches ~expected ~actual = let expected_norm = normalize_quotes expected in let actual_norm = normalize_quotes actual in (* Exact match *) actual_norm = expected_norm || (* Substring match *) try let _ = Str.search_forward (Str.regexp_string expected_norm) actual_norm 0 in true with Not_found -> false (** Recursively find all HTML test files *) let rec discover_tests_in_dir base_dir current_dir = let full_path = Filename.concat base_dir current_dir in if not (Sys.file_exists full_path) then [] else if Sys.is_directory full_path then begin let entries = Sys.readdir full_path |> Array.to_list in List.concat_map (fun entry -> let sub_path = if current_dir = "" then entry else Filename.concat current_dir entry in discover_tests_in_dir base_dir sub_path ) entries end else if Filename.check_suffix current_dir ".html" || Filename.check_suffix current_dir ".xhtml" then begin let outcome = parse_outcome (Filename.basename current_dir) in if outcome = Unknown then [] else let category = match String.split_on_char '/' current_dir with | cat :: _ -> cat | [] -> "unknown" in [{ path = full_path; relative_path = current_dir; category; expected = outcome }] end else [] let discover_tests tests_dir = discover_tests_in_dir tests_dir "" (** Run a single test *) let run_test messages test = try let ic = open_in test.path in let content = really_input_string ic (in_channel_length ic) in close_in ic; let reader = Bytesrw.Bytes.Reader.of_string content in let result = Html5_checker.check ~collect_parse_errors:true ~system_id:test.relative_path reader in let errors = Html5_checker.errors result |> List.map (fun m -> m.Html5_checker.Message.message) in let warnings = Html5_checker.warnings result |> List.map (fun m -> m.Html5_checker.Message.message) in let infos = Html5_checker.infos result |> List.map (fun m -> m.Html5_checker.Message.message) in let expected_msg = Validator_messages.get messages test.relative_path in let (passed, details) = match test.expected with | Valid -> (* isvalid tests fail on errors or warnings, but info messages are OK *) if errors = [] && warnings = [] then (true, if infos = [] then "OK: No messages" else Printf.sprintf "OK: No errors/warnings (%d info)" (List.length infos)) else (false, Printf.sprintf "Expected valid but got %d errors, %d warnings" (List.length errors) (List.length warnings)) | Invalid -> if errors = [] then (false, "Expected error but got none") else begin (* For novalid tests, we pass if ANY error is produced. Message matching is optional - our messages may differ from Nu validator. *) let msg_matched = match expected_msg with | None -> true | Some exp -> List.exists (fun actual -> message_matches ~expected:exp ~actual) errors in if msg_matched then (true, Printf.sprintf "Got %d error(s), message matched" (List.length errors)) else (* Still pass - we detected an error even if message differs *) (true, Printf.sprintf "Got %d error(s) (message format differs)" (List.length errors)) end | HasWarning -> (* For haswarn, accept warnings or info messages (Nu validator uses info for some) *) if warnings <> [] then (true, Printf.sprintf "Got %d warning(s)" (List.length warnings)) else if infos <> [] then (true, Printf.sprintf "Got %d info message(s)" (List.length infos)) else if errors <> [] then (* Also accept errors as they indicate we caught something *) (true, Printf.sprintf "Got %d error(s) instead of warning" (List.length errors)) else (false, "Expected warning but got none") | Unknown -> (false, "Unknown test type") in { file = test; passed; actual_errors = errors; actual_warnings = warnings; actual_infos = infos; expected_message = expected_msg; details } with e -> { file = test; passed = false; actual_errors = []; actual_warnings = []; actual_infos = []; expected_message = None; details = Printf.sprintf "Exception: %s" (Printexc.to_string e) } (** Group tests by category *) let group_by_category tests = let tbl = Hashtbl.create 16 in List.iter (fun test -> let cat = test.file.category in let existing = try Hashtbl.find tbl cat with Not_found -> [] in Hashtbl.replace tbl cat (test :: existing) ) tests; Hashtbl.fold (fun k v acc -> (k, List.rev v) :: acc) tbl [] |> List.sort (fun (a, _) (b, _) -> String.compare a b) (** Print summary to console *) let print_summary results = let by_category = group_by_category results in Printf.printf "\n=== Results by Category ===\n"; List.iter (fun (cat, tests) -> let passed = List.filter (fun r -> r.passed) tests |> List.length in let total = List.length tests in Printf.printf "%s: %d/%d passed (%.1f%%)\n" cat passed total (100.0 *. float_of_int passed /. float_of_int (max 1 total)) ) by_category; (* Breakdown by test type *) let isvalid_results = List.filter (fun r -> r.file.expected = Valid) results in let novalid_results = List.filter (fun r -> r.file.expected = Invalid) results in let haswarn_results = List.filter (fun r -> r.file.expected = HasWarning) results in let count_passed rs = List.filter (fun r -> r.passed) rs |> List.length in Printf.printf "\n=== Results by Test Type ===\n"; Printf.printf "isvalid (no errors expected): %d/%d passed (%.1f%%)\n" (count_passed isvalid_results) (List.length isvalid_results) (100.0 *. float_of_int (count_passed isvalid_results) /. float_of_int (max 1 (List.length isvalid_results))); Printf.printf "novalid (errors expected): %d/%d passed (%.1f%%)\n" (count_passed novalid_results) (List.length novalid_results) (100.0 *. float_of_int (count_passed novalid_results) /. float_of_int (max 1 (List.length novalid_results))); Printf.printf "haswarn (warnings expected): %d/%d passed (%.1f%%)\n" (count_passed haswarn_results) (List.length haswarn_results) (100.0 *. float_of_int (count_passed haswarn_results) /. float_of_int (max 1 (List.length haswarn_results))); let total_passed = List.filter (fun r -> r.passed) results |> List.length in let total = List.length results in Printf.printf "\n=== Overall ===\n"; Printf.printf "Total: %d/%d passed (%.1f%%)\n" total_passed total (100.0 *. float_of_int total_passed /. float_of_int (max 1 total)) (** Generate HTML report *) let generate_html_report results output_path = let by_category = group_by_category results in let file_results = List.map (fun (category, tests) -> let passed_count = List.filter (fun r -> r.passed) tests |> List.length in let failed_count = List.length tests - passed_count in let test_results = List.mapi (fun i r -> let outcome_str = match r.file.expected with | Valid -> "valid" | Invalid -> "invalid" | HasWarning -> "has-warning" | Unknown -> "unknown" in let description = Printf.sprintf "[%s] %s" outcome_str r.file.relative_path in let expected = match r.expected_message with | Some m -> m | None -> "(no expected message)" in let actual_str = let errors = if r.actual_errors = [] then "" else "Errors:\n" ^ String.concat "\n" r.actual_errors in let warnings = if r.actual_warnings = [] then "" else "Warnings:\n" ^ String.concat "\n" r.actual_warnings in let infos = if r.actual_infos = [] then "" else "Info:\n" ^ String.concat "\n" r.actual_infos in if errors = "" && warnings = "" && infos = "" then "(no messages)" else String.trim (errors ^ "\n" ^ warnings ^ "\n" ^ infos) in Report.{ test_num = i + 1; description; input = r.file.relative_path; expected; actual = actual_str; success = r.passed; details = [("Status", r.details)]; raw_test_data = None; } ) tests in Report.{ filename = category; test_type = "HTML5 Validator"; passed_count; failed_count; tests = test_results; } ) by_category in let total_passed = List.filter (fun r -> r.passed) results |> List.length in let total_failed = List.length results - total_passed in let report : Report.report = { title = "Nu HTML Validator Tests"; test_type = "validator"; description = "Tests from the Nu HTML Validator (W3C's official HTML checker). \ Tests validate HTML5 conformance including element nesting, required attributes, \ ARIA roles, obsolete elements, and more. Each test file is classified by suffix: \ -isvalid.html (should produce no errors), -novalid.html (should produce errors), \ -haswarn.html (should produce warnings)."; files = file_results; total_passed; total_failed; } in Report.generate_report report output_path let () = let tests_dir = if Array.length Sys.argv > 1 then Sys.argv.(1) else "validator/tests" in let report_path = if Array.length Sys.argv > 2 then Sys.argv.(2) else "test_validator_report.html" in Printf.printf "Loading messages.json...\n%!"; let messages_path = Filename.concat tests_dir "messages.json" in let messages = Validator_messages.load messages_path in Printf.printf "Loaded %d expected messages\n%!" (Validator_messages.count messages); Printf.printf "Discovering test files...\n%!"; let tests = discover_tests tests_dir in Printf.printf "Found %d test files\n%!" (List.length tests); Printf.printf "Running tests...\n%!"; let results = List.map (run_test messages) tests in (* Print failing isvalid tests *) let failing_isvalid = List.filter (fun r -> r.file.expected = Valid && not r.passed ) results in if failing_isvalid <> [] then begin Printf.printf "\n=== Failing isvalid tests ===\n"; List.iter (fun r -> Printf.printf "%s: %s\n" r.file.relative_path r.details ) failing_isvalid end; (* Print failing haswarn tests *) let failing_haswarn = List.filter (fun r -> r.file.expected = HasWarning && not r.passed ) results in if failing_haswarn <> [] then begin Printf.printf "\n=== Failing haswarn tests ===\n"; List.iter (fun r -> Printf.printf "%s\n" r.file.relative_path ) failing_haswarn end; (* Print failing novalid tests *) let failing_novalid = List.filter (fun r -> r.file.expected = Invalid && not r.passed ) results in if failing_novalid <> [] then begin Printf.printf "\n=== Failing novalid tests (first 50) ===\n"; List.iteri (fun i r -> if i < 50 then Printf.printf "%s\n" r.file.relative_path ) failing_novalid end; print_summary results; generate_html_report results report_path; let failed_count = List.filter (fun r -> not r.passed) results |> List.length in exit (if failed_count > 0 then 1 else 0)