(** Test runner for Nu HTML Validator test suite This validates HTML5 documents against the upstream Nu HTML Validator test suite. Tests are classified by filename suffix: - `-isvalid.html` : Should produce no errors or warnings - `-novalid.html` : Should produce at least one error - `-haswarn.html` : Should produce at least one warning *) module Report = Test_report type expected_outcome = | Valid (** -isvalid.html: expect no errors *) | Invalid (** -novalid.html: expect error matching messages.json *) | HasWarning (** -haswarn.html: expect warning matching messages.json *) | Unknown (** Unknown suffix *) type test_file = { path : string; (** Full filesystem path *) relative_path : string; (** Path relative to tests/, used as key in messages.json *) category : string; (** html, html-aria, etc. *) expected : expected_outcome; } type test_result = { file : test_file; passed : bool; actual_errors : string list; actual_warnings : string list; actual_infos : string list; expected_message : string option; match_quality : Expected_message.match_quality option; (** How well did message match? *) details : string; } (** Parse expected outcome from filename suffix *) let parse_outcome filename = (* Check for .html suffix *) if String.length filename > 13 && String.sub filename (String.length filename - 13) 13 = "-isvalid.html" then Valid else if String.length filename > 13 && String.sub filename (String.length filename - 13) 13 = "-novalid.html" then Invalid else if String.length filename > 13 && String.sub filename (String.length filename - 13) 13 = "-haswarn.html" then HasWarning (* Check for .xhtml suffix *) else if String.length filename > 14 && String.sub filename (String.length filename - 14) 14 = "-isvalid.xhtml" then Valid else if String.length filename > 14 && String.sub filename (String.length filename - 14) 14 = "-novalid.xhtml" then Invalid else if String.length filename > 14 && String.sub filename (String.length filename - 14) 14 = "-haswarn.xhtml" then HasWarning else Unknown (** Current strictness setting - can be set via --strict flag *) let strictness = ref Expected_message.lenient (** Find best matching message and return (found_acceptable, best_quality) *) let find_best_match ~expected_str ~actual_msgs = let expected = Expected_message.parse expected_str in let qualities = List.map (fun msg -> Expected_message.matches ~strictness:!strictness ~expected ~actual:msg ) actual_msgs in let best_quality = List.fold_left (fun best q -> (* Lower variant = better match in our type definition *) if q < best then q else best ) Expected_message.No_match qualities in let acceptable = Expected_message.is_acceptable ~strictness:!strictness best_quality in (acceptable, best_quality) (** Recursively find all HTML test files *) let rec discover_tests_in_dir base_dir current_dir = let full_path = Filename.concat base_dir current_dir in if not (Sys.file_exists full_path) then [] else if Sys.is_directory full_path then begin let entries = Sys.readdir full_path |> Array.to_list in List.concat_map (fun entry -> let sub_path = if current_dir = "" then entry else Filename.concat current_dir entry in discover_tests_in_dir base_dir sub_path ) entries end else if Filename.check_suffix current_dir ".html" || Filename.check_suffix current_dir ".xhtml" then begin let outcome = parse_outcome (Filename.basename current_dir) in if outcome = Unknown then [] else let category = match String.split_on_char '/' current_dir with | cat :: _ -> cat | [] -> "unknown" in [{ path = full_path; relative_path = current_dir; category; expected = outcome }] end else [] let discover_tests tests_dir = discover_tests_in_dir tests_dir "" (** Run a single test *) let run_test messages test = try let ic = open_in test.path in let content = really_input_string ic (in_channel_length ic) in close_in ic; let reader = Bytesrw.Bytes.Reader.of_string content in let result = Htmlrw_check.check ~collect_parse_errors:true ~system_id:test.relative_path reader in (* Keep full message objects for proper matching *) let error_msgs = Htmlrw_check.errors result in let warning_msgs = Htmlrw_check.warnings result in let info_msgs = Htmlrw_check.infos result in (* Extract text for reporting *) let errors = List.map (fun m -> m.Htmlrw_check.text) error_msgs in let warnings = List.map (fun m -> m.Htmlrw_check.text) warning_msgs in let infos = List.map (fun m -> m.Htmlrw_check.text) info_msgs in let expected_msg = Validator_messages.get messages test.relative_path in let (passed, match_quality, details) = match test.expected with | Valid -> (* isvalid tests fail on errors or warnings, but info messages are OK *) if errors = [] && warnings = [] then (true, None, if infos = [] then "OK: No messages" else Printf.sprintf "OK: No errors/warnings (%d info)" (List.length infos)) else (false, None, Printf.sprintf "Expected valid but got %d errors, %d warnings" (List.length errors) (List.length warnings)) | Invalid -> if errors = [] then (false, None, "Expected error but got none") else begin (* For novalid tests, require message match when expected message is provided *) match expected_msg with | None -> (* No expected message - pass if any error detected *) (true, None, Printf.sprintf "Got %d error(s), no expected message to match" (List.length errors)) | Some exp -> let (matched, quality) = find_best_match ~expected_str:exp ~actual_msgs:error_msgs in if matched then (true, Some quality, Printf.sprintf "Got %d error(s), match: %s" (List.length errors) (Expected_message.match_quality_to_string quality)) else (* FAIL if message doesn't match *) (false, Some quality, Printf.sprintf "Message mismatch (quality: %s).\n Expected: %s\n Got: %s" (Expected_message.match_quality_to_string quality) exp (String.concat "\n " errors)) end | HasWarning -> (* For haswarn, require message match against warnings or infos *) let all_msgs = warning_msgs @ info_msgs in let all_messages = warnings @ infos in if all_messages = [] && errors = [] then (false, None, "Expected warning but got none") else begin match expected_msg with | None -> if all_messages <> [] then (true, None, Printf.sprintf "Got %d warning/info message(s)" (List.length all_messages)) else (true, None, Printf.sprintf "Got %d error(s) instead of warning" (List.length errors)) | Some exp -> let (warn_matched, warn_quality) = find_best_match ~expected_str:exp ~actual_msgs:all_msgs in if warn_matched then (true, Some warn_quality, Printf.sprintf "Got %d warning/info message(s), match: %s" (List.length all_messages) (Expected_message.match_quality_to_string warn_quality)) else begin let (err_matched, err_quality) = find_best_match ~expected_str:exp ~actual_msgs:error_msgs in if err_matched then (* Accept error if message matches (severity might differ) *) (true, Some err_quality, Printf.sprintf "Got error instead of warning, match: %s" (Expected_message.match_quality_to_string err_quality)) else let best = if warn_quality < err_quality then warn_quality else err_quality in (false, Some best, Printf.sprintf "Message mismatch (quality: %s).\n Expected: %s\n Got warnings: %s\n Got errors: %s" (Expected_message.match_quality_to_string best) exp (String.concat "\n " (if all_messages = [] then ["(none)"] else all_messages)) (String.concat "\n " (if errors = [] then ["(none)"] else errors))) end end | Unknown -> (false, None, "Unknown test type") in { file = test; passed; actual_errors = errors; actual_warnings = warnings; actual_infos = infos; expected_message = expected_msg; match_quality; details } with e -> { file = test; passed = false; actual_errors = []; actual_warnings = []; actual_infos = []; expected_message = None; match_quality = None; details = Printf.sprintf "Exception: %s" (Printexc.to_string e) } (** Group tests by category *) let group_by_category tests = let tbl = Hashtbl.create 16 in List.iter (fun test -> let cat = test.file.category in let existing = try Hashtbl.find tbl cat with Not_found -> [] in Hashtbl.replace tbl cat (test :: existing) ) tests; Hashtbl.fold (fun k v acc -> (k, List.rev v) :: acc) tbl [] |> List.sort (fun (a, _) (b, _) -> String.compare a b) (** Print summary to console *) let print_summary results = let by_category = group_by_category results in Printf.printf "\n=== Results by Category ===\n"; List.iter (fun (cat, tests) -> let passed = List.filter (fun r -> r.passed) tests |> List.length in let total = List.length tests in Printf.printf "%s: %d/%d passed (%.1f%%)\n" cat passed total (100.0 *. float_of_int passed /. float_of_int (max 1 total)) ) by_category; (* Breakdown by test type *) let isvalid_results = List.filter (fun r -> r.file.expected = Valid) results in let novalid_results = List.filter (fun r -> r.file.expected = Invalid) results in let haswarn_results = List.filter (fun r -> r.file.expected = HasWarning) results in let count_passed rs = List.filter (fun r -> r.passed) rs |> List.length in Printf.printf "\n=== Results by Test Type ===\n"; Printf.printf "isvalid (no errors expected): %d/%d passed (%.1f%%)\n" (count_passed isvalid_results) (List.length isvalid_results) (100.0 *. float_of_int (count_passed isvalid_results) /. float_of_int (max 1 (List.length isvalid_results))); Printf.printf "novalid (errors expected): %d/%d passed (%.1f%%)\n" (count_passed novalid_results) (List.length novalid_results) (100.0 *. float_of_int (count_passed novalid_results) /. float_of_int (max 1 (List.length novalid_results))); Printf.printf "haswarn (warnings expected): %d/%d passed (%.1f%%)\n" (count_passed haswarn_results) (List.length haswarn_results) (100.0 *. float_of_int (count_passed haswarn_results) /. float_of_int (max 1 (List.length haswarn_results))); let total_passed = List.filter (fun r -> r.passed) results |> List.length in let total = List.length results in Printf.printf "\n=== Overall ===\n"; Printf.printf "Total: %d/%d passed (%.1f%%)\n" total_passed total (100.0 *. float_of_int total_passed /. float_of_int (max 1 total)); (* Match quality breakdown *) let count_quality q = List.filter (fun r -> match r.match_quality with Some mq -> mq = q | None -> false ) results |> List.length in let exact = count_quality Expected_message.Exact_match in let code_match = count_quality Expected_message.Code_match in let msg_match = count_quality Expected_message.Message_match in let substring = count_quality Expected_message.Substring_match in let sev_mismatch = count_quality Expected_message.Severity_mismatch in let no_match = count_quality Expected_message.No_match in let no_quality = List.filter (fun r -> r.match_quality = None) results |> List.length in Printf.printf "\n=== Match Quality ===\n"; let mode_name = if !strictness = Expected_message.strict then "STRICT (full)" else if !strictness = Expected_message.exact_message then "STRICT (exact message)" else "lenient" in Printf.printf "Mode: %s\n" mode_name; Printf.printf "Exact matches: %d\n" exact; Printf.printf "Code matches: %d\n" code_match; Printf.printf "Message matches: %d\n" msg_match; Printf.printf "Substring matches: %d\n" substring; Printf.printf "Severity mismatches: %d\n" sev_mismatch; Printf.printf "No matches: %d\n" no_match; Printf.printf "N/A (isvalid or no expected): %d\n" no_quality (** Read HTML source file for display in report *) let read_html_source path = try let ic = open_in path in let content = really_input_string ic (in_channel_length ic) in close_in ic; Some content with _ -> None (** Generate HTML report *) let generate_html_report results output_path = let by_category = group_by_category results in let file_results = List.map (fun (category, tests) -> let passed_count = List.filter (fun r -> r.passed) tests |> List.length in let failed_count = List.length tests - passed_count in let test_results = List.mapi (fun i r -> let outcome_str = match r.file.expected with | Valid -> "isvalid" | Invalid -> "novalid" | HasWarning -> "haswarn" | Unknown -> "unknown" in let description = Printf.sprintf "[%s] %s" outcome_str (Filename.basename r.file.relative_path) in let expected = match r.expected_message with | Some m -> m | None -> match r.file.expected with | Valid -> "(should produce no errors or warnings)" | Invalid -> "(should produce at least one error)" | HasWarning -> "(should produce at least one warning)" | Unknown -> "(unknown test type)" in let actual_str = let errors = if r.actual_errors = [] then "" else "Errors:\n • " ^ String.concat "\n • " r.actual_errors in let warnings = if r.actual_warnings = [] then "" else "Warnings:\n • " ^ String.concat "\n • " r.actual_warnings in let infos = if r.actual_infos = [] then "" else "Info:\n • " ^ String.concat "\n • " r.actual_infos in if errors = "" && warnings = "" && infos = "" then "(no messages produced)" else String.trim (errors ^ (if errors <> "" && warnings <> "" then "\n\n" else "") ^ warnings ^ (if (errors <> "" || warnings <> "") && infos <> "" then "\n\n" else "") ^ infos) in let match_quality_str = match r.match_quality with | Some q -> Expected_message.match_quality_to_string q | None -> "N/A" in Report.{ test_num = i + 1; description; input = r.file.relative_path; expected; actual = actual_str; success = r.passed; details = [ ("Result", r.details); ("Match Quality", match_quality_str); ]; raw_test_data = read_html_source r.file.path; } ) tests in Report.{ filename = category; test_type = "HTML5 Validator"; passed_count; failed_count; tests = test_results; } ) by_category in let total_passed = List.filter (fun r -> r.passed) results |> List.length in let total_failed = List.length results - total_passed in (* Compute match quality stats *) let count_quality q = List.filter (fun r -> match r.match_quality with Some mq -> mq = q | None -> false ) results |> List.length in let match_quality_stats : Report.match_quality_stats = { exact_matches = count_quality Expected_message.Exact_match; code_matches = count_quality Expected_message.Code_match; message_matches = count_quality Expected_message.Message_match; substring_matches = count_quality Expected_message.Substring_match; severity_mismatches = count_quality Expected_message.Severity_mismatch; no_matches = count_quality Expected_message.No_match; not_applicable = List.filter (fun r -> r.match_quality = None) results |> List.length; } in (* Compute test type stats *) let isvalid_results = List.filter (fun r -> r.file.expected = Valid) results in let novalid_results = List.filter (fun r -> r.file.expected = Invalid) results in let haswarn_results = List.filter (fun r -> r.file.expected = HasWarning) results in let count_passed rs = List.filter (fun r -> r.passed) rs |> List.length in let test_type_stats : Report.test_type_stats = { isvalid_passed = count_passed isvalid_results; isvalid_total = List.length isvalid_results; novalid_passed = count_passed novalid_results; novalid_total = List.length novalid_results; haswarn_passed = count_passed haswarn_results; haswarn_total = List.length haswarn_results; } in let mode_name = if !strictness = Expected_message.strict then "STRICT (full)" else if !strictness = Expected_message.exact_message then "STRICT (exact message)" else "lenient" in (* Get current timestamp *) let now = Unix.gettimeofday () in let tm = Unix.localtime now in let timestamp = Printf.sprintf "%04d-%02d-%02d %02d:%02d:%02d" (tm.Unix.tm_year + 1900) (tm.Unix.tm_mon + 1) tm.Unix.tm_mday tm.Unix.tm_hour tm.Unix.tm_min tm.Unix.tm_sec in let report : Report.report = { title = "Nu HTML Validator Tests"; test_type = "validator"; description = "Tests from the Nu HTML Validator (W3C's official HTML checker). \ Tests validate HTML5 conformance including element nesting, required attributes, \ ARIA roles, obsolete elements, and more. Each test file is classified by suffix: \ -isvalid.html (should produce no errors), -novalid.html (should produce errors), \ -haswarn.html (should produce warnings)."; files = file_results; total_passed; total_failed; match_quality = Some match_quality_stats; test_type_breakdown = Some test_type_stats; strictness_mode = Some mode_name; run_timestamp = Some timestamp; } in Report.generate_report report output_path (** Run tests with a given strictness and return results *) let run_all_tests ~mode_name ~strictness_setting messages tests = strictness := strictness_setting; Printf.printf "\n=== Running in %s mode ===\n%!" mode_name; let total = List.length tests in let results = List.mapi (fun i test -> Printf.printf "\r[%d/%d] %s%!" (i + 1) total test.relative_path; run_test messages test ) tests in Printf.printf "\n%!"; results (** Print failures for a test run *) let print_failures mode_name results = Printf.printf "\n--- %s mode results ---\n" mode_name; let failing_isvalid = List.filter (fun r -> r.file.expected = Valid && not r.passed ) results in if failing_isvalid <> [] then begin Printf.printf "Failing isvalid tests:\n"; List.iter (fun r -> Printf.printf " %s: %s\n" r.file.relative_path r.details ) failing_isvalid end; let failing_haswarn = List.filter (fun r -> r.file.expected = HasWarning && not r.passed ) results in if failing_haswarn <> [] then begin Printf.printf "Failing haswarn tests:\n"; List.iter (fun r -> Printf.printf " %s\n" r.file.relative_path ) failing_haswarn end; let failing_novalid = List.filter (fun r -> r.file.expected = Invalid && not r.passed ) results in if failing_novalid <> [] then begin Printf.printf "Failing novalid tests (first 20):\n"; List.iteri (fun i r -> if i < 20 then Printf.printf " %s\n" r.file.relative_path ) failing_novalid end; let passed = List.filter (fun r -> r.passed) results |> List.length in let total = List.length results in Printf.printf "%s: %d/%d passed (%.1f%%)\n%!" mode_name passed total (100.0 *. float_of_int passed /. float_of_int total) (** Generate combined HTML report for both modes *) let generate_combined_html_report ~lenient_results ~strict_results output_path = (* Helper to build file results from a set of results *) let build_file_results results = let by_category = group_by_category results in List.map (fun (category, tests) -> let passed_count = List.filter (fun r -> r.passed) tests |> List.length in let failed_count = List.length tests - passed_count in let test_results = List.mapi (fun i r -> let outcome_str = match r.file.expected with | Valid -> "isvalid" | Invalid -> "novalid" | HasWarning -> "haswarn" | Unknown -> "unknown" in let description = Printf.sprintf "[%s] %s" outcome_str (Filename.basename r.file.relative_path) in let expected = match r.expected_message with | Some m -> m | None -> match r.file.expected with | Valid -> "(should produce no errors or warnings)" | Invalid -> "(should produce at least one error)" | HasWarning -> "(should produce at least one warning)" | Unknown -> "(unknown test type)" in let actual_str = let errors = if r.actual_errors = [] then "" else "Errors:\n • " ^ String.concat "\n • " r.actual_errors in let warnings = if r.actual_warnings = [] then "" else "Warnings:\n • " ^ String.concat "\n • " r.actual_warnings in let infos = if r.actual_infos = [] then "" else "Info:\n • " ^ String.concat "\n • " r.actual_infos in if errors = "" && warnings = "" && infos = "" then "(no messages produced)" else String.trim (errors ^ (if errors <> "" && warnings <> "" then "\n\n" else "") ^ warnings ^ (if (errors <> "" || warnings <> "") && infos <> "" then "\n\n" else "") ^ infos) in let match_quality_str = match r.match_quality with | Some q -> Expected_message.match_quality_to_string q | None -> "N/A" in Report.{ test_num = i + 1; description; input = r.file.relative_path; expected; actual = actual_str; success = r.passed; details = [ ("Result", r.details); ("Match Quality", match_quality_str); ]; raw_test_data = read_html_source r.file.path; } ) tests in Report.{ filename = category; test_type = "HTML5 Validator"; passed_count; failed_count; tests = test_results; } ) by_category in let compute_stats results mode_name = let total_passed = List.filter (fun r -> r.passed) results |> List.length in let total_failed = List.length results - total_passed in let count_quality q = List.filter (fun r -> match r.match_quality with Some mq -> mq = q | None -> false ) results |> List.length in let match_quality_stats : Report.match_quality_stats = { exact_matches = count_quality Expected_message.Exact_match; code_matches = count_quality Expected_message.Code_match; message_matches = count_quality Expected_message.Message_match; substring_matches = count_quality Expected_message.Substring_match; severity_mismatches = count_quality Expected_message.Severity_mismatch; no_matches = count_quality Expected_message.No_match; not_applicable = List.filter (fun r -> r.match_quality = None) results |> List.length; } in let isvalid_results = List.filter (fun r -> r.file.expected = Valid) results in let novalid_results = List.filter (fun r -> r.file.expected = Invalid) results in let haswarn_results = List.filter (fun r -> r.file.expected = HasWarning) results in let count_passed rs = List.filter (fun r -> r.passed) rs |> List.length in let test_type_stats : Report.test_type_stats = { isvalid_passed = count_passed isvalid_results; isvalid_total = List.length isvalid_results; novalid_passed = count_passed novalid_results; novalid_total = List.length novalid_results; haswarn_passed = count_passed haswarn_results; haswarn_total = List.length haswarn_results; } in (total_passed, total_failed, match_quality_stats, test_type_stats, mode_name) in let lenient_stats = compute_stats lenient_results "lenient" in let strict_stats = compute_stats strict_results "strict" in (* Use strict results for the main report, but include both in description *) let (strict_passed, strict_failed, strict_mq, strict_tt, _) = strict_stats in let (lenient_passed, _lenient_failed, _, _, _) = lenient_stats in let now = Unix.gettimeofday () in let tm = Unix.localtime now in let timestamp = Printf.sprintf "%04d-%02d-%02d %02d:%02d:%02d" (tm.Unix.tm_year + 1900) (tm.Unix.tm_mon + 1) tm.Unix.tm_mday tm.Unix.tm_hour tm.Unix.tm_min tm.Unix.tm_sec in let total = List.length strict_results in let description = Printf.sprintf "Tests from the Nu HTML Validator (W3C's official HTML checker). \ Tests validate HTML5 conformance including element nesting, required attributes, \ ARIA roles, obsolete elements, and more.\n\n\ LENIENT mode: %d/%d passed (%.1f%%) - allows substring matching\n\ STRICT mode: %d/%d passed (%.1f%%) - requires exact message matching" lenient_passed total (100.0 *. float_of_int lenient_passed /. float_of_int total) strict_passed total (100.0 *. float_of_int strict_passed /. float_of_int total) in let report : Report.report = { title = "Nu HTML Validator Tests (Lenient + Strict)"; test_type = "validator"; description; files = build_file_results strict_results; (* Show strict results in detail *) total_passed = strict_passed; total_failed = strict_failed; match_quality = Some strict_mq; test_type_breakdown = Some strict_tt; strictness_mode = Some (Printf.sprintf "BOTH (Lenient: %d/%d, Strict: %d/%d)" lenient_passed total strict_passed total); run_timestamp = Some timestamp; } in Report.generate_report report output_path let () = (* Parse command line arguments *) let args = Array.to_list Sys.argv |> List.tl in let is_strict = List.mem "--strict" args in let is_both = List.mem "--both" args in let non_flag_args = List.filter (fun s -> not (String.length s > 0 && s.[0] = '-')) args in let tests_dir = match non_flag_args with x :: _ -> x | [] -> "validator/tests" in let report_path = match non_flag_args with _ :: x :: _ -> x | _ -> "test_validator_report.html" in Printf.printf "Loading messages.json...\n%!"; let messages_path = Filename.concat tests_dir "messages.json" in let messages = Validator_messages.load messages_path in Printf.printf "Loaded %d expected messages\n%!" (Validator_messages.count messages); Printf.printf "Discovering test files...\n%!"; let tests = discover_tests tests_dir in Printf.printf "Found %d test files\n%!" (List.length tests); if is_both then begin (* Run both modes *) let lenient_results = run_all_tests ~mode_name:"LENIENT" ~strictness_setting:Expected_message.lenient messages tests in let strict_results = run_all_tests ~mode_name:"STRICT" ~strictness_setting:Expected_message.exact_message messages tests in print_failures "LENIENT" lenient_results; print_failures "STRICT" strict_results; Printf.printf "\n=== Summary ===\n"; let lenient_passed = List.filter (fun r -> r.passed) lenient_results |> List.length in let strict_passed = List.filter (fun r -> r.passed) strict_results |> List.length in let total = List.length tests in Printf.printf "LENIENT: %d/%d (%.1f%%)\n" lenient_passed total (100.0 *. float_of_int lenient_passed /. float_of_int total); Printf.printf "STRICT: %d/%d (%.1f%%)\n" strict_passed total (100.0 *. float_of_int strict_passed /. float_of_int total); generate_combined_html_report ~lenient_results ~strict_results report_path; (* Exit with error if strict mode has failures *) let strict_failed = List.filter (fun r -> not r.passed) strict_results |> List.length in exit (if strict_failed > 0 then 1 else 0) end else begin (* Single mode (original behavior) *) if is_strict then begin strictness := Expected_message.exact_message; Printf.printf "Running in STRICT mode (exact message matching required)\n%!" end; Printf.printf "Running tests...\n%!"; let total = List.length tests in let results = List.mapi (fun i test -> Printf.printf "\r[%d/%d] %s%!" (i + 1) total test.relative_path; run_test messages test ) tests in Printf.printf "\n%!"; (* Print failing isvalid tests *) let failing_isvalid = List.filter (fun r -> r.file.expected = Valid && not r.passed ) results in if failing_isvalid <> [] then begin Printf.printf "\n=== Failing isvalid tests ===\n"; List.iter (fun r -> Printf.printf "%s: %s\n" r.file.relative_path r.details ) failing_isvalid end; (* Print failing haswarn tests *) let failing_haswarn = List.filter (fun r -> r.file.expected = HasWarning && not r.passed ) results in if failing_haswarn <> [] then begin Printf.printf "\n=== Failing haswarn tests ===\n"; List.iter (fun r -> Printf.printf "%s\n" r.file.relative_path ) failing_haswarn end; (* Print failing novalid tests *) let failing_novalid = List.filter (fun r -> r.file.expected = Invalid && not r.passed ) results in if failing_novalid <> [] then begin Printf.printf "\n=== Failing novalid tests (first 50) ===\n"; List.iteri (fun i r -> if i < 50 then Printf.printf "%s\n" r.file.relative_path ) failing_novalid end; print_summary results; generate_html_report results report_path; let failed_count = List.filter (fun r -> not r.passed) results |> List.length in exit (if failed_count > 0 then 1 else 0) end