(* Roundtrip test: Parse -> Serialize -> Re-parse -> Validate This test validates that the HTML5 serializer produces valid HTML5 by roundtripping the validator test suite files through: 1. Parse with HTML5 parser 2. Serialize DOM back to HTML 3. Re-parse the serialized HTML 4. Validate the result For "isvalid" tests: the roundtripped document should still be valid For "novalid/haswarn" tests: we just verify the roundtrip works without crashes *) module Report = Test_report (* Test result type *) type test_result = { filename : string; test_type : string; (* isvalid, novalid, haswarn *) original_valid : bool; (* Did original pass validation? *) roundtrip_valid : bool; (* Did roundtripped doc pass validation? *) roundtrip_ok : bool; (* Did roundtrip work without errors? *) original_errors : int; roundtrip_errors : int; parse_error : string option; } (* Get test type from filename *) let get_test_type filename = if Astring.String.is_suffix ~affix:"-isvalid.html" filename || Astring.String.is_suffix ~affix:"-isvalid.xhtml" filename then "isvalid" else if Astring.String.is_suffix ~affix:"-novalid.html" filename || Astring.String.is_suffix ~affix:"-novalid.xhtml" filename then "novalid" else if Astring.String.is_suffix ~affix:"-haswarn.html" filename || Astring.String.is_suffix ~affix:"-haswarn.xhtml" filename then "haswarn" else "unknown" (* Count errors in validation result *) let count_errors messages = List.length (List.filter (fun (m : Htmlrw_check.message) -> m.severity = Htmlrw_check.Error ) messages) (* Serialize a document to HTML string *) let serialize_document doc = Html5rw.Dom.to_html ~pretty:false doc (* Run roundtrip test on a single file *) let test_file path = let filename = Filename.basename path in let test_type = get_test_type filename in try (* Read file content *) let content = let ic = open_in path in let n = in_channel_length ic in let s = really_input_string ic n in close_in ic; s in (* Parse original *) let original_result = Html5rw.parse_bytes (Bytes.of_string content) in let original_doc = Html5rw.root original_result in (* Validate original *) let checker_result = Htmlrw_check.check_parsed ~system_id:path original_result in let original_messages = Htmlrw_check.messages checker_result in let original_errors = count_errors original_messages in let original_valid = original_errors = 0 in (* Serialize to HTML *) let serialized = serialize_document original_doc in (* Re-parse serialized HTML *) let roundtrip_result = Html5rw.parse_bytes (Bytes.of_string serialized) in (* Validate roundtripped document *) let roundtrip_checker = Htmlrw_check.check_parsed ~system_id:path roundtrip_result in let roundtrip_messages = Htmlrw_check.messages roundtrip_checker in let roundtrip_errors = count_errors roundtrip_messages in let roundtrip_valid = roundtrip_errors = 0 in { filename; test_type; original_valid; roundtrip_valid; roundtrip_ok = true; original_errors; roundtrip_errors; parse_error = None; } with e -> { filename; test_type; original_valid = false; roundtrip_valid = false; roundtrip_ok = false; original_errors = 0; roundtrip_errors = 0; parse_error = Some (Printexc.to_string e); } (* Recursively find all test files *) let rec find_test_files dir = let files = Sys.readdir dir |> Array.to_list in List.concat_map (fun f -> let path = Filename.concat dir f in if Sys.is_directory path then find_test_files path else if Astring.String.is_suffix ~affix:"-isvalid.html" f || Astring.String.is_suffix ~affix:"-novalid.html" f || Astring.String.is_suffix ~affix:"-haswarn.html" f then [path] else [] ) files let () = let test_dir = Sys.argv.(1) in Printf.printf "Discovering test files...\n%!"; let test_files = find_test_files test_dir in Printf.printf "Found %d test files\n%!" (List.length test_files); Printf.printf "Running roundtrip tests...\n%!"; (* Run tests *) let total = List.length test_files in let results = List.mapi (fun i path -> Printf.printf "\r[%d/%d] %s%!" (i + 1) total (Filename.basename path); test_file path ) test_files in Printf.printf "\n%!"; (* Categorize results *) let isvalid_tests = List.filter (fun r -> r.test_type = "isvalid") results in let novalid_tests = List.filter (fun r -> r.test_type = "novalid") results in let haswarn_tests = List.filter (fun r -> r.test_type = "haswarn") results in (* For isvalid tests: check that roundtripped document is still valid *) let isvalid_passed = List.filter (fun r -> r.roundtrip_ok && r.roundtrip_valid ) isvalid_tests in (* For novalid/haswarn tests: just check roundtrip works *) let novalid_passed = List.filter (fun r -> r.roundtrip_ok) novalid_tests in let haswarn_passed = List.filter (fun r -> r.roundtrip_ok) haswarn_tests in (* Print failures for isvalid tests *) let isvalid_failed = List.filter (fun r -> not r.roundtrip_ok || not r.roundtrip_valid ) isvalid_tests in if List.length isvalid_failed > 0 then begin Printf.printf "\n=== Failing isvalid roundtrip tests (first 20) ===\n"; List.iteri (fun i r -> if i < 20 then begin match r.parse_error with | Some err -> Printf.printf "%s: %s\n" r.filename err | None -> Printf.printf "%s: original_valid=%b, roundtrip_valid=%b (errors: %d -> %d)\n" r.filename r.original_valid r.roundtrip_valid r.original_errors r.roundtrip_errors end ) isvalid_failed end; (* Print roundtrip failures for all tests *) let roundtrip_failures = List.filter (fun r -> not r.roundtrip_ok) results in if List.length roundtrip_failures > 0 then begin Printf.printf "\n=== Roundtrip failures (first 20) ===\n"; List.iteri (fun i r -> if i < 20 then Printf.printf "%s: %s\n" r.filename (Option.value ~default:"unknown error" r.parse_error) ) roundtrip_failures end; (* Summary *) Printf.printf "\n=== Roundtrip Test Results ===\n"; Printf.printf "isvalid tests: %d/%d passed (roundtripped and still valid)\n" (List.length isvalid_passed) (List.length isvalid_tests); Printf.printf "novalid tests: %d/%d roundtripped successfully\n" (List.length novalid_passed) (List.length novalid_tests); Printf.printf "haswarn tests: %d/%d roundtripped successfully\n" (List.length haswarn_passed) (List.length haswarn_tests); let total_roundtrip_ok = List.length (List.filter (fun r -> r.roundtrip_ok) results) in Printf.printf "\nTotal: %d/%d roundtripped without errors\n" total_roundtrip_ok (List.length results); Printf.printf "isvalid preservation: %d/%d still valid after roundtrip\n" (List.length isvalid_passed) (List.length isvalid_tests); (* Exit with error if isvalid tests fail validation after roundtrip *) let exit_code = if List.length isvalid_failed > 0 then 1 else 0 in exit exit_code