(* Combined test runner for all html5lib-tests *)
(* Generates a single standalone HTML report *)
open Bytesrw
module Report = Test_report
(* ============================================================ *)
(* Tree Construction Tests *)
(* ============================================================ *)
module TreeConstruction = struct
module Parser = Html5rw.Parser
module Dom = Html5rw.Dom
type test_case = {
input : string;
expected_tree : string;
expected_errors : string list;
script_on : bool;
fragment_context : string option;
raw_lines : string;
}
let parse_test_case lines =
let raw_lines = String.concat "\n" lines in
let rec parse acc = function
| [] -> acc
| line :: rest when String.length line > 0 && line.[0] = '#' ->
let section = String.trim line in
let content, remaining = collect_section rest in
parse ((section, content) :: acc) remaining
| _ :: rest -> parse acc rest
and collect_section lines =
let rec loop acc = function
| [] -> (List.rev acc, [])
| line :: rest when String.length line > 0 && line.[0] = '#' ->
(List.rev acc, line :: rest)
| line :: rest -> loop (line :: acc) rest
in
loop [] lines
in
let sections = parse [] lines in
let get_section name =
match List.assoc_opt name sections with
| Some lines -> String.concat "\n" lines
| None -> ""
in
let data = get_section "#data" in
let document = get_section "#document" in
let errors_text = get_section "#errors" in
let errors =
String.split_on_char '\n' errors_text
|> List.filter (fun s -> String.trim s <> "")
in
let script_on = List.mem_assoc "#script-on" sections in
let fragment =
if List.mem_assoc "#document-fragment" sections then
Some (get_section "#document-fragment" |> String.trim)
else None
in
{ input = data; expected_tree = document; expected_errors = errors;
script_on; fragment_context = fragment; raw_lines }
let parse_dat_file content =
let lines = String.split_on_char '\n' content in
let rec split_tests current acc = function
| [] ->
if current = [] then List.rev acc
else List.rev (List.rev current :: acc)
| "" :: "#data" :: rest ->
let new_acc = if current = [] then acc else (List.rev current :: acc) in
split_tests ["#data"] new_acc rest
| line :: rest ->
split_tests (line :: current) acc rest
in
let test_groups = split_tests [] [] lines in
List.filter_map (fun lines ->
if List.exists (fun l -> l = "#data") lines then
Some (parse_test_case lines)
else None
) test_groups
let strip_tree_prefix s =
let lines = String.split_on_char '\n' s in
let stripped = List.filter_map (fun line ->
if String.length line >= 2 && String.sub line 0 2 = "| " then
Some (String.sub line 2 (String.length line - 2))
else if String.trim line = "" then None
else Some line
) lines in
String.concat "\n" stripped
let normalize_tree s =
let lines = String.split_on_char '\n' s in
let non_empty = List.filter (fun l -> String.trim l <> "") lines in
String.concat "\n" non_empty
let run_test test =
try
let result =
match test.fragment_context with
| Some ctx_str ->
let (namespace, tag_name) =
match String.split_on_char ' ' ctx_str with
| [ns; tag] when ns = "svg" -> (Some "svg", tag)
| [ns; tag] when ns = "math" -> (Some "mathml", tag)
| [tag] -> (None, tag)
| _ -> (None, ctx_str)
in
let context = Html5rw.Parser.make_fragment_context ~tag_name ~namespace () in
let reader = Bytes.Reader.of_string test.input in
Html5rw.Parser.parse ~collect_errors:true ~fragment_context:context reader
| None ->
let reader = Bytes.Reader.of_string test.input in
Html5rw.Parser.parse ~collect_errors:true reader
in
let actual_tree = Html5rw.Dom.to_test_format (Html5rw.Parser.root result) in
let expected = normalize_tree (strip_tree_prefix test.expected_tree) in
let actual = normalize_tree (strip_tree_prefix actual_tree) in
let error_count = List.length (Html5rw.Parser.errors result) in
let expected_error_count = List.length test.expected_errors in
(expected = actual, expected, actual, error_count, expected_error_count)
with e ->
let expected = normalize_tree (strip_tree_prefix test.expected_tree) in
(false, expected, Printf.sprintf "EXCEPTION: %s" (Printexc.to_string e), 0, 0)
let run_file path =
let ic = open_in path in
let content = really_input_string ic (in_channel_length ic) in
close_in ic;
let tests = parse_dat_file content in
let filename = Filename.basename path in
let passed = ref 0 in
let failed = ref 0 in
let results = ref [] in
List.iteri (fun i test ->
if test.script_on then ()
else begin
let (success, expected, actual, actual_error_count, expected_error_count) = run_test test in
let description =
let input_preview = String.sub test.input 0 (min 60 (String.length test.input)) in
if test.fragment_context <> None then
Printf.sprintf "Fragment (%s): %s" (Option.get test.fragment_context) input_preview
else input_preview
in
let result : Report.test_result = {
test_num = i + 1; description; input = test.input; expected; actual; success;
details = [
("Fragment Context", Option.value test.fragment_context ~default:"(none)");
("Expected Errors", string_of_int expected_error_count);
("Actual Errors", string_of_int actual_error_count);
];
raw_test_data = Some test.raw_lines;
} in
results := result :: !results;
if success then incr passed else incr failed
end
) tests;
let file_result : Report.file_result = {
filename; test_type = "Tree Construction";
passed_count = !passed; failed_count = !failed;
tests = List.rev !results;
} in
(file_result, !passed, !failed)
let run_dir test_dir =
let files = Sys.readdir test_dir |> Array.to_list in
let dat_files = List.filter (fun f ->
Filename.check_suffix f ".dat" && not (String.contains f '/')
) files in
let total_passed = ref 0 in
let total_failed = ref 0 in
let file_results = ref [] in
List.iter (fun file ->
let path = Filename.concat test_dir file in
if Sys.is_directory path then () else begin
let (file_result, passed, failed) = run_file path in
total_passed := !total_passed + passed;
total_failed := !total_failed + failed;
file_results := file_result :: !file_results;
Printf.printf " %s: %d passed, %d failed\n" file passed failed
end
) (List.sort String.compare dat_files);
(List.rev !file_results, !total_passed, !total_failed)
end
(* ============================================================ *)
(* Tokenizer Tests *)
(* ============================================================ *)
module Tokenizer_tests = struct
module Tokenizer = Html5rw.Tokenizer
module TokenCollector = struct
type t = { mutable tokens : Html5rw.Tokenizer.Token.t list }
let create () = { tokens = [] }
let process t token ~line:_ ~column:_ = t.tokens <- token :: t.tokens; `Continue
let adjusted_current_node_in_html_namespace _ = true
let get_tokens t = List.rev t.tokens
end
type test_case = {
description : string;
input : string;
output : Jsont.json list;
expected_error_count : int;
initial_states : string list;
last_start_tag : string option;
double_escaped : bool;
xml_mode : bool;
raw_json : string;
}
let unescape_double s =
let b = Buffer.create (String.length s) in
let i = ref 0 in
while !i < String.length s do
if !i + 1 < String.length s && s.[!i] = '\\' then begin
match s.[!i + 1] with
| 'u' when !i + 5 < String.length s ->
let hex = String.sub s (!i + 2) 4 in
(try
let code = int_of_string ("0x" ^ hex) in
if code < 128 then Buffer.add_char b (Char.chr code)
else begin
if code < 0x800 then begin
Buffer.add_char b (Char.chr (0xC0 lor (code lsr 6)));
Buffer.add_char b (Char.chr (0x80 lor (code land 0x3F)))
end else begin
Buffer.add_char b (Char.chr (0xE0 lor (code lsr 12)));
Buffer.add_char b (Char.chr (0x80 lor ((code lsr 6) land 0x3F)));
Buffer.add_char b (Char.chr (0x80 lor (code land 0x3F)))
end
end;
i := !i + 6
with _ -> Buffer.add_char b s.[!i]; incr i)
| 'n' -> Buffer.add_char b '\n'; i := !i + 2
| 'r' -> Buffer.add_char b '\r'; i := !i + 2
| 't' -> Buffer.add_char b '\t'; i := !i + 2
| '\\' -> Buffer.add_char b '\\'; i := !i + 2
| _ -> Buffer.add_char b s.[!i]; incr i
end else begin
Buffer.add_char b s.[!i]; incr i
end
done;
Buffer.contents b
let json_string = function Jsont.String (s, _) -> s | _ -> failwith "Expected string"
let json_bool = function Jsont.Bool (b, _) -> b | _ -> failwith "Expected bool"
let json_array = function Jsont.Array (arr, _) -> arr | _ -> failwith "Expected array"
let json_object = function Jsont.Object (obj, _) -> obj | _ -> failwith "Expected object"
let json_mem name obj =
match List.find_opt (fun ((n, _), _) -> n = name) obj with
| Some (_, v) -> Some v | None -> None
let json_mem_exn name obj =
match json_mem name obj with Some v -> v | None -> failwith ("Missing member: " ^ name)
let rec json_to_string = function
| Jsont.Null _ -> "null"
| Jsont.Bool (b, _) -> string_of_bool b
| Jsont.Number (n, _) -> Printf.sprintf "%g" n
| Jsont.String (s, _) -> Printf.sprintf "%S" s
| Jsont.Array (arr, _) -> "[" ^ String.concat ", " (List.map json_to_string arr) ^ "]"
| Jsont.Object (obj, _) ->
"{" ^ String.concat ", " (List.map (fun ((n, _), v) -> Printf.sprintf "%S: %s" n (json_to_string v)) obj) ^ "}"
let parse_test_case ~xml_mode json =
let raw_json = json_to_string json in
let obj = json_object json in
let description = json_string (json_mem_exn "description" obj) in
let input = json_string (json_mem_exn "input" obj) in
let output = json_array (json_mem_exn "output" obj) in
let expected_error_count = match json_mem "errors" obj with
| Some e -> List.length (json_array e) | None -> 0
in
let initial_states = match json_mem "initialStates" obj with
| Some s -> List.map json_string (json_array s) | None -> ["Data state"]
in
let last_start_tag = match json_mem "lastStartTag" obj with
| Some s -> Some (json_string s) | None -> None
in
let double_escaped = match json_mem "doubleEscaped" obj with
| Some b -> json_bool b | None -> false
in
{ description; input; output; expected_error_count; initial_states;
last_start_tag; double_escaped; xml_mode; raw_json }
let state_of_string = function
| "Data state" -> Html5rw.Tokenizer.State.Data
| "PLAINTEXT state" -> Html5rw.Tokenizer.State.Plaintext
| "RCDATA state" -> Html5rw.Tokenizer.State.Rcdata
| "RAWTEXT state" -> Html5rw.Tokenizer.State.Rawtext
| "Script data state" -> Html5rw.Tokenizer.State.Script_data
| "CDATA section state" -> Html5rw.Tokenizer.State.Cdata_section
| s -> failwith ("Unknown state: " ^ s)
let token_to_test_json (tok : Html5rw.Tokenizer.Token.t) : Jsont.json list =
let str s = Jsont.String (s, Jsont.Meta.none) in
let arr l = Jsont.Array (l, Jsont.Meta.none) in
match tok with
| Html5rw.Tokenizer.Token.Doctype { name; public_id; system_id; force_quirks } ->
let name_json = match name with Some n -> str n | None -> Jsont.Null ((), Jsont.Meta.none) in
let public_json = match public_id with Some p -> str p | None -> Jsont.Null ((), Jsont.Meta.none) in
let system_json = match system_id with Some s -> str s | None -> Jsont.Null ((), Jsont.Meta.none) in
let correctness = Jsont.Bool (not force_quirks, Jsont.Meta.none) in
[arr [str "DOCTYPE"; name_json; public_json; system_json; correctness]]
| Html5rw.Tokenizer.Token.Tag { kind = Start; name; attrs; self_closing } ->
let attrs_obj = Jsont.Object (
List.map (fun (n, v) -> ((n, Jsont.Meta.none), str v)) (List.rev attrs),
Jsont.Meta.none
) in
if self_closing then [arr [str "StartTag"; str name; attrs_obj; Jsont.Bool (true, Jsont.Meta.none)]]
else [arr [str "StartTag"; str name; attrs_obj]]
| Html5rw.Tokenizer.Token.Tag { kind = End; name; _ } -> [arr [str "EndTag"; str name]]
| Html5rw.Tokenizer.Token.Comment data -> [arr [str "Comment"; str data]]
| Html5rw.Tokenizer.Token.Character data -> [arr [str "Character"; str data]]
| Html5rw.Tokenizer.Token.EOF -> []
let rec json_equal a b =
match a, b with
| Jsont.Null _, Jsont.Null _ -> true
| Jsont.Bool (a, _), Jsont.Bool (b, _) -> a = b
| Jsont.Number (a, _), Jsont.Number (b, _) -> a = b
| Jsont.String (a, _), Jsont.String (b, _) -> a = b
| Jsont.Array (a, _), Jsont.Array (b, _) ->
List.length a = List.length b && List.for_all2 json_equal a b
| Jsont.Object (a, _), Jsont.Object (b, _) ->
let a_sorted = List.sort (fun ((n1, _), _) ((n2, _), _) -> String.compare n1 n2) a in
let b_sorted = List.sort (fun ((n1, _), _) ((n2, _), _) -> String.compare n1 n2) b in
List.length a_sorted = List.length b_sorted &&
List.for_all2 (fun ((n1, _), v1) ((n2, _), v2) -> n1 = n2 && json_equal v1 v2) a_sorted b_sorted
| _ -> false
let merge_character_tokens tokens =
let rec loop acc = function
| [] -> List.rev acc
| Html5rw.Tokenizer.Token.Character s1 :: Html5rw.Tokenizer.Token.Character s2 :: rest ->
loop acc (Html5rw.Tokenizer.Token.Character (s1 ^ s2) :: rest)
| tok :: rest -> loop (tok :: acc) rest
in loop [] tokens
let run_test test initial_state =
let input = if test.double_escaped then unescape_double test.input else test.input in
let collector = TokenCollector.create () in
let tokenizer = Html5rw.Tokenizer.create (module TokenCollector) collector ~collect_errors:true ~xml_mode:test.xml_mode () in
Html5rw.Tokenizer.set_state tokenizer initial_state;
(match test.last_start_tag with Some tag -> Html5rw.Tokenizer.set_last_start_tag tokenizer tag | None -> ());
let reader = Bytes.Reader.of_string input in
Html5rw.Tokenizer.run tokenizer (module TokenCollector) reader;
let tokens = merge_character_tokens (TokenCollector.get_tokens collector) in
let actual_tokens = List.concat_map token_to_test_json tokens in
let expected_output = if test.double_escaped then
let rec unescape_json = function
| Jsont.String (s, m) -> Jsont.String (unescape_double s, m)
| Jsont.Array (arr, m) -> Jsont.Array (List.map unescape_json arr, m)
| Jsont.Object (obj, m) -> Jsont.Object (List.map (fun (n, v) -> (n, unescape_json v)) obj, m)
| other -> other
in List.map unescape_json test.output
else test.output in
let rec merge_expected = function
| [] -> []
| [x] -> [x]
| Jsont.Array ([Jsont.String ("Character", _); Jsont.String (s1, m1)], am1) ::
Jsont.Array ([Jsont.String ("Character", _); Jsont.String (s2, _)], _) :: rest ->
merge_expected (Jsont.Array ([Jsont.String ("Character", Jsont.Meta.none); Jsont.String (s1 ^ s2, m1)], am1) :: rest)
| x :: rest -> x :: merge_expected rest
in
let expected = merge_expected expected_output in
let tokens_match =
List.length actual_tokens = List.length expected &&
List.for_all2 json_equal actual_tokens expected
in
let actual_error_count = List.length (Html5rw.Tokenizer.get_errors tokenizer) in
let errors_count_match = actual_error_count = test.expected_error_count in
(tokens_match && errors_count_match, actual_tokens, expected, actual_error_count, test.expected_error_count)
let run_file path =
let content =
let ic = open_in path in
let n = in_channel_length ic in
let s = really_input_string ic n in
close_in ic; s
in
let json = match Jsont_bytesrw.decode_string Jsont.json content with
| Ok j -> j
| Error e -> failwith (Printf.sprintf "JSON parse error in %s: %s" path e)
in
let obj = json_object json in
let regular_tests = match json_mem "tests" obj with
| Some t -> List.map (parse_test_case ~xml_mode:false) (json_array t) | None -> []
in
let xml_tests = match json_mem "xmlViolationTests" obj with
| Some t -> List.map (parse_test_case ~xml_mode:true) (json_array t) | None -> []
in
let all_tests = regular_tests @ xml_tests in
let filename = Filename.basename path in
let passed = ref 0 in
let failed = ref 0 in
let results = ref [] in
List.iteri (fun i test ->
List.iter (fun state_name ->
try
let state = state_of_string state_name in
let (success, actual, expected, actual_err_count, expected_err_count) = run_test test state in
let description = Printf.sprintf "[%s] %s" state_name test.description in
let result : Report.test_result = {
test_num = i + 1; description; input = test.input;
expected = String.concat "\n" (List.map json_to_string expected);
actual = String.concat "\n" (List.map json_to_string actual);
success;
details = [
("Initial State", state_name);
("Last Start Tag", Option.value test.last_start_tag ~default:"(none)");
("Double Escaped", string_of_bool test.double_escaped);
("XML Mode", string_of_bool test.xml_mode);
("Expected Errors", string_of_int expected_err_count);
("Actual Errors", string_of_int actual_err_count);
];
raw_test_data = Some test.raw_json;
} in
results := result :: !results;
if success then incr passed else incr failed
with e ->
incr failed;
let result : Report.test_result = {
test_num = i + 1;
description = Printf.sprintf "[%s] %s" state_name test.description;
input = test.input; expected = "";
actual = Printf.sprintf "EXCEPTION: %s" (Printexc.to_string e);
success = false; details = [];
raw_test_data = Some test.raw_json;
} in
results := result :: !results
) test.initial_states
) all_tests;
let file_result : Report.file_result = {
filename; test_type = "Tokenizer";
passed_count = !passed; failed_count = !failed;
tests = List.rev !results;
} in
(file_result, !passed, !failed)
let run_dir test_dir =
let files = Sys.readdir test_dir |> Array.to_list in
let test_files = List.filter (fun f -> Filename.check_suffix f ".test") files in
let total_passed = ref 0 in
let total_failed = ref 0 in
let file_results = ref [] in
List.iter (fun file ->
let path = Filename.concat test_dir file in
let (file_result, passed, failed) = run_file path in
total_passed := !total_passed + passed;
total_failed := !total_failed + failed;
file_results := file_result :: !file_results;
Printf.printf " %s: %d passed, %d failed\n" file passed failed
) (List.sort String.compare test_files);
(List.rev !file_results, !total_passed, !total_failed)
end
(* ============================================================ *)
(* Encoding Tests *)
(* ============================================================ *)
module Encoding_tests = struct
module Encoding = Html5rw.Encoding
type test_case = {
input : string;
expected_encoding : string;
raw_lines : string;
}
let normalize_encoding_name s = String.lowercase_ascii (String.trim s)
let encoding_to_test_name = function
| Html5rw.Encoding.Utf8 -> "utf-8"
| Html5rw.Encoding.Utf16le -> "utf-16le"
| Html5rw.Encoding.Utf16be -> "utf-16be"
| Html5rw.Encoding.Windows_1252 -> "windows-1252"
| Html5rw.Encoding.Iso_8859_2 -> "iso-8859-2"
| Html5rw.Encoding.Euc_jp -> "euc-jp"
let parse_test_case lines =
let raw_lines = String.concat "\n" lines in
let rec parse acc = function
| [] -> acc
| line :: rest when String.length line > 0 && line.[0] = '#' ->
let section = String.trim line in
let content, remaining = collect_section rest in
parse ((section, content) :: acc) remaining
| _ :: rest -> parse acc rest
and collect_section lines =
let rec loop acc = function
| [] -> (List.rev acc, [])
| line :: rest when String.length line > 0 && line.[0] = '#' ->
(List.rev acc, line :: rest)
| line :: rest -> loop (line :: acc) rest
in loop [] lines
in
let sections = parse [] lines in
let get_section name =
match List.assoc_opt name sections with
| Some lines -> String.concat "\n" lines | None -> ""
in
let data = get_section "#data" in
let encoding = get_section "#encoding" in
{ input = data; expected_encoding = String.trim encoding; raw_lines }
let parse_dat_file content =
let lines = String.split_on_char '\n' content in
let rec split_tests current acc = function
| [] -> if current = [] then List.rev acc else List.rev (List.rev current :: acc)
| "" :: "#data" :: rest ->
let new_acc = if current = [] then acc else (List.rev current :: acc) in
split_tests ["#data"] new_acc rest
| line :: rest -> split_tests (line :: current) acc rest
in
let test_groups = split_tests [] [] lines in
List.filter_map (fun lines ->
if List.exists (fun l -> l = "#data") lines then Some (parse_test_case lines)
else None
) test_groups
let run_test test =
try
let (_, detected_encoding) = Html5rw.Encoding.decode (Bytes.of_string test.input) () in
let detected_name = encoding_to_test_name detected_encoding in
let expected_name = normalize_encoding_name test.expected_encoding in
let match_encoding det exp =
det = exp ||
(det = "windows-1252" && (exp = "windows-1252" || exp = "cp1252" || exp = "iso-8859-1")) ||
(det = "iso-8859-2" && (exp = "iso-8859-2" || exp = "iso8859-2" || exp = "latin2")) ||
(det = "utf-8" && (exp = "utf-8" || exp = "utf8")) ||
(det = "euc-jp" && (exp = "euc-jp" || exp = "eucjp"))
in
(match_encoding detected_name expected_name, detected_name, expected_name)
with e ->
(false, Printf.sprintf "EXCEPTION: %s" (Printexc.to_string e), test.expected_encoding)
let run_file path =
let ic = open_in path in
let content = really_input_string ic (in_channel_length ic) in
close_in ic;
let tests = parse_dat_file content in
let filename = Filename.basename path in
let passed = ref 0 in
let failed = ref 0 in
let results = ref [] in
List.iteri (fun i test ->
if String.trim test.expected_encoding = "" then ()
else begin
let (success, detected, expected) = run_test test in
let result : Report.test_result = {
test_num = i + 1;
description = Printf.sprintf "Detect %s encoding" expected;
input = String.escaped test.input;
expected; actual = detected; success;
details = [
("Input Length", string_of_int (String.length test.input));
("Has BOM", string_of_bool (String.length test.input >= 3 &&
(String.sub test.input 0 3 = "\xEF\xBB\xBF" ||
String.sub test.input 0 2 = "\xFF\xFE" ||
String.sub test.input 0 2 = "\xFE\xFF")));
];
raw_test_data = Some test.raw_lines;
} in
results := result :: !results;
if success then incr passed else incr failed
end
) tests;
let file_result : Report.file_result = {
filename; test_type = "Encoding Detection";
passed_count = !passed; failed_count = !failed;
tests = List.rev !results;
} in
(file_result, !passed, !failed)
let run_dir test_dir =
let files = Sys.readdir test_dir |> Array.to_list in
let dat_files = List.filter (fun f ->
Filename.check_suffix f ".dat" && not (String.contains f '/')
) files in
let total_passed = ref 0 in
let total_failed = ref 0 in
let file_results = ref [] in
List.iter (fun file ->
let path = Filename.concat test_dir file in
if Sys.is_directory path then () else begin
let (file_result, passed, failed) = run_file path in
total_passed := !total_passed + passed;
total_failed := !total_failed + failed;
file_results := file_result :: !file_results;
Printf.printf " %s: %d passed, %d failed\n" file passed failed
end
) (List.sort String.compare dat_files);
(List.rev !file_results, !total_passed, !total_failed)
end
(* ============================================================ *)
(* Main Entry Point *)
(* ============================================================ *)
let () =
let base_dir = if Array.length Sys.argv > 1 then Sys.argv.(1) else "html5lib-tests" in
let all_files = ref [] in
let total_passed = ref 0 in
let total_failed = ref 0 in
(* Run Tree Construction Tests *)
Printf.printf "\n=== Tree Construction Tests ===\n";
let tree_dir = Filename.concat base_dir "tree-construction" in
if Sys.file_exists tree_dir then begin
let (files, passed, failed) = TreeConstruction.run_dir tree_dir in
all_files := !all_files @ files;
total_passed := !total_passed + passed;
total_failed := !total_failed + failed;
Printf.printf " Subtotal: %d passed, %d failed\n" passed failed
end else
Printf.printf " (directory not found: %s)\n" tree_dir;
(* Run Tokenizer Tests *)
Printf.printf "\n=== Tokenizer Tests ===\n";
let tok_dir = Filename.concat base_dir "tokenizer" in
if Sys.file_exists tok_dir then begin
let (files, passed, failed) = Tokenizer_tests.run_dir tok_dir in
all_files := !all_files @ files;
total_passed := !total_passed + passed;
total_failed := !total_failed + failed;
Printf.printf " Subtotal: %d passed, %d failed\n" passed failed
end else
Printf.printf " (directory not found: %s)\n" tok_dir;
(* Run Encoding Tests *)
Printf.printf "\n=== Encoding Detection Tests ===\n";
let enc_dir = Filename.concat base_dir "encoding" in
if Sys.file_exists enc_dir then begin
let (files, passed, failed) = Encoding_tests.run_dir enc_dir in
all_files := !all_files @ files;
total_passed := !total_passed + passed;
total_failed := !total_failed + failed;
Printf.printf " Subtotal: %d passed, %d failed\n" passed failed
end else
Printf.printf " (directory not found: %s)\n" enc_dir;
(* Note: Serializer tests use the standalone test_serializer.exe for full implementation *)
Printf.printf "\n=== Overall Summary ===\n";
Printf.printf "Total: %d passed, %d failed\n" !total_passed !total_failed;
(* Generate combined HTML report *)
let report : Report.report = {
title = "HTML5 Parser Test Suite";
test_type = "combined";
description = "This is a comprehensive test report for the html5rw OCaml HTML5 parser library, \
validating conformance against the official html5lib-tests test suite. \
Tests cover: (1) Tree Construction - validating the DOM tree building algorithm; \
(2) Tokenization - validating the HTML tokenizer state machine; \
(3) Encoding Detection - validating character encoding sniffing. \
Each test shows the input, expected output, actual output, and original test data.";
files = !all_files;
total_passed = !total_passed;
total_failed = !total_failed;
match_quality = None;
test_type_breakdown = None;
strictness_mode = None;
run_timestamp = None;
} in
Report.generate_report report "html5lib_test_report.html";
exit (if !total_failed > 0 then 1 else 0)