(* Test runner for html5lib-tests serializer tests *) module Dom = Html5rw.Dom module Report = Test_report (* Extract values from JSON *) let json_string = function | Jsont.String (s, _) -> s | _ -> failwith "Expected string" let json_string_opt = function | Jsont.Null _ -> None | Jsont.String (s, _) -> Some s | _ -> failwith "Expected string or null" let json_bool = function | Jsont.Bool (b, _) -> b | _ -> failwith "Expected bool" let json_array = function | Jsont.Array (arr, _) -> arr | _ -> failwith "Expected array" let json_object = function | Jsont.Object (obj, _) -> obj | _ -> failwith "Expected object" let json_mem name obj = match List.find_opt (fun ((n, _), _) -> n = name) obj with | Some (_, v) -> Some v | None -> None let json_mem_exn name obj = match json_mem name obj with | Some v -> v | None -> failwith ("Missing member: " ^ name) let rec json_to_string = function | Jsont.Null _ -> "null" | Jsont.Bool (b, _) -> string_of_bool b | Jsont.Number (n, _) -> Printf.sprintf "%g" n | Jsont.String (s, _) -> Printf.sprintf "%S" s | Jsont.Array (arr, _) -> "[" ^ String.concat ", " (List.map json_to_string arr) ^ "]" | Jsont.Object (obj, _) -> "{" ^ String.concat ", " (List.map (fun ((n, _), v) -> Printf.sprintf "%S: %s" n (json_to_string v)) obj) ^ "}" (* Serialization options *) type serialize_options = { quote_char : char; quote_char_explicit : bool; (* Was quote_char explicitly set? *) minimize_boolean_attributes : bool; use_trailing_solidus : bool; escape_lt_in_attrs : bool; escape_rcdata : bool; strip_whitespace : bool; inject_meta_charset : bool; encoding : string option; omit_optional_tags : bool; } let default_options = { quote_char = '"'; quote_char_explicit = false; minimize_boolean_attributes = true; use_trailing_solidus = false; escape_lt_in_attrs = false; escape_rcdata = false; strip_whitespace = false; inject_meta_charset = false; encoding = None; omit_optional_tags = true; (* HTML5 default *) } (* Parse options from JSON *) let parse_options json_opt = match json_opt with | None -> default_options | Some json -> let obj = json_object json in let get_bool name default = match json_mem name obj with | Some j -> (try json_bool j with _ -> default) | None -> default in let get_string name = match json_mem name obj with | Some (Jsont.String (s, _)) -> Some s | _ -> None in let quote_char_opt = match json_mem "quote_char" obj with | Some (Jsont.String (s, _)) when String.length s = 1 -> Some s.[0] | _ -> None in { quote_char = Option.value ~default:'"' quote_char_opt; quote_char_explicit = Option.is_some quote_char_opt; minimize_boolean_attributes = get_bool "minimize_boolean_attributes" (get_bool "quote_attr_values" true); use_trailing_solidus = get_bool "use_trailing_solidus" false; escape_lt_in_attrs = get_bool "escape_lt_in_attrs" false; escape_rcdata = get_bool "escape_rcdata" false; strip_whitespace = get_bool "strip_whitespace" false; inject_meta_charset = get_bool "inject_meta_charset" false; encoding = get_string "encoding"; omit_optional_tags = get_bool "omit_optional_tags" true; } (* Test case *) type test_case = { description : string; input : Jsont.json list; expected : string list; options : serialize_options; raw_json : string; (* Original JSON representation of this test *) } let parse_test_case json = let raw_json = json_to_string json in let obj = json_object json in let description = json_string (json_mem_exn "description" obj) in let input = json_array (json_mem_exn "input" obj) in let expected = List.map json_string (json_array (json_mem_exn "expected" obj)) in let options = parse_options (json_mem "options" obj) in { description; input; expected; options; raw_json } (* Parse attrs that can be either array [{name, value}] or object {name: value} or empty {} *) let parse_attrs attrs_json = match attrs_json with | Jsont.Array (arr, _) -> List.map (fun attr_json -> let attr_obj = json_object attr_json in let attr_name = json_string (json_mem_exn "name" attr_obj) in let value = json_string (json_mem_exn "value" attr_obj) in (attr_name, value) ) arr | Jsont.Object (obj, _) -> List.map (fun ((n, _), v) -> (n, json_string v)) obj | _ -> [] (* Void elements that don't need end tags *) let is_void_element name = List.mem (String.lowercase_ascii name) ["area"; "base"; "br"; "col"; "embed"; "hr"; "img"; "input"; "link"; "meta"; "param"; "source"; "track"; "wbr"] (* Raw text elements whose content should not be escaped *) let is_raw_text_element name = List.mem (String.lowercase_ascii name) ["script"; "style"] (* Elements where whitespace should be preserved *) let is_whitespace_preserving_element name = List.mem (String.lowercase_ascii name) ["pre"; "textarea"; "script"; "style"] (* Block elements that close a p tag *) let p_closing_elements = [ "address"; "article"; "aside"; "blockquote"; "datagrid"; "dialog"; "dir"; "div"; "dl"; "fieldset"; "footer"; "form"; "h1"; "h2"; "h3"; "h4"; "h5"; "h6"; "header"; "hgroup"; "hr"; "main"; "menu"; "nav"; "ol"; "p"; "pre"; "section"; "table"; "ul" ] let is_p_closing_element name = List.mem (String.lowercase_ascii name) p_closing_elements (* Collapse runs of whitespace to single space *) let collapse_whitespace text = let len = String.length text in let buf = Buffer.create len in let in_whitespace = ref false in for i = 0 to len - 1 do let c = text.[i] in if c = '\t' || c = '\r' || c = '\n' || c = '\x0C' || c = ' ' then begin if not !in_whitespace then begin Buffer.add_char buf ' '; in_whitespace := true end end else begin Buffer.add_char buf c; in_whitespace := false end done; Buffer.contents buf (* Token types for stream-based serialization *) type token_type = | StartTag of string * (string * string) list (* name, attrs *) | EndTag of string (* name *) | EmptyTag of string * (string * string) list (* name, attrs *) | TextNode of string | CommentNode of string | DoctypeNode of Html5rw.Dom.node type token_info = { token : token_type option; } let build_token_info token = let arr = json_array token in match arr with | [] -> { token = None } | type_json :: rest -> let token_type_str = json_string type_json in match token_type_str, rest with | "StartTag", [_ns_json; name_json; attrs_json] -> let name = json_string name_json in let attrs = parse_attrs attrs_json in { token = Some (StartTag (name, attrs)) } | "StartTag", [name_json; attrs_json] -> let name = json_string name_json in let attrs = parse_attrs attrs_json in { token = Some (StartTag (name, attrs)) } | "EmptyTag", [name_json; attrs_json] -> let name = json_string name_json in let attrs = parse_attrs attrs_json in { token = Some (EmptyTag (name, attrs)) } | "EndTag", [_ns_json; name_json] -> let name = json_string name_json in { token = Some (EndTag name) } | "EndTag", [name_json] -> let name = json_string name_json in { token = Some (EndTag name) } | "Characters", [text_json] -> let text = json_string text_json in { token = Some (TextNode text) } | "Comment", [text_json] -> let text = json_string text_json in { token = Some (CommentNode text) } | "Doctype", [name_json] -> let name = json_string name_json in let node = Html5rw.Dom.create_doctype ~name () in { token = Some (DoctypeNode node) } | "Doctype", [name_json; public_json] -> let name = json_string name_json in let public_id = json_string_opt public_json in let node = match public_id with | Some pub -> Html5rw.Dom.create_doctype ~name ~public_id:pub () | None -> Html5rw.Dom.create_doctype ~name () in { token = Some (DoctypeNode node) } | "Doctype", [name_json; public_json; system_json] -> let name = json_string name_json in let public_id = json_string_opt public_json in let system_id = json_string_opt system_json in let node = match public_id, system_id with | Some pub, Some sys -> Html5rw.Dom.create_doctype ~name ~public_id:pub ~system_id:sys () | Some pub, None -> Html5rw.Dom.create_doctype ~name ~public_id:pub () | None, Some sys -> Html5rw.Dom.create_doctype ~name ~system_id:sys () | None, None -> Html5rw.Dom.create_doctype ~name () in { token = Some (DoctypeNode node) } | _ -> { token = None } (* Serialize a single node to HTML with options *) let escape_text text = let buf = Buffer.create (String.length text) in String.iter (fun c -> match c with | '&' -> Buffer.add_string buf "&" | '<' -> Buffer.add_string buf "<" | '>' -> Buffer.add_string buf ">" | c -> Buffer.add_char buf c ) text; Buffer.contents buf let can_unquote_attr_value value = if String.length value = 0 then false else let valid = ref true in String.iter (fun c -> if c = '>' || c = '"' || c = '\'' || c = '=' || c = '`' || c = ' ' || c = '\t' || c = '\n' || c = '\x0C' || c = '\r' then valid := false ) value; !valid let escape_attr_value value quote_char escape_lt = let buf = Buffer.create (String.length value) in String.iter (fun c -> match c with | '&' -> Buffer.add_string buf "&" | '"' when quote_char = '"' -> Buffer.add_string buf """ | '\'' when quote_char = '\'' -> Buffer.add_string buf "'" | '<' when escape_lt -> Buffer.add_string buf "<" | c -> Buffer.add_char buf c ) value; Buffer.contents buf let serialize_node opts ~in_raw_text node = match node.Dom.name with | "#text" -> if in_raw_text && not opts.escape_rcdata then node.Dom.data else escape_text node.Dom.data | "#comment" -> "" | "!doctype" -> let buf = Buffer.create 64 in Buffer.add_string buf " Buffer.add_string buf (Option.value ~default:"html" dt.Dom.name); (match dt.Dom.public_id with | Some pub when pub <> "" -> Buffer.add_string buf " PUBLIC \""; Buffer.add_string buf pub; Buffer.add_char buf '"'; (match dt.Dom.system_id with | Some sys -> Buffer.add_string buf " \""; Buffer.add_string buf sys; Buffer.add_char buf '"' | None -> ()) | _ -> match dt.Dom.system_id with | Some sys when sys <> "" -> Buffer.add_string buf " SYSTEM \""; Buffer.add_string buf sys; Buffer.add_char buf '"' | _ -> ()) | None -> Buffer.add_string buf "html"); Buffer.add_char buf '>'; Buffer.contents buf | _ -> failwith "serialize_node called with element" let choose_quote value default_quote explicit = (* If quote_char was explicitly set, always use it *) if explicit then default_quote else (* Otherwise, if value contains the default quote but not the other, use the other *) let has_double = String.contains value '"' in let has_single = String.contains value '\'' in if has_double && not has_single then '\'' else if has_single && not has_double then '"' else default_quote (* Serialize an element tag (start tag) *) let serialize_start_tag opts ~is_empty_tag name attrs = let buf = Buffer.create 64 in Buffer.add_char buf '<'; Buffer.add_string buf name; (* Sort attributes alphabetically for consistent output *) let sorted_attrs = List.sort (fun (a, _) (b, _) -> String.compare a b) attrs in List.iter (fun (key, value) -> Buffer.add_char buf ' '; Buffer.add_string buf key; let should_minimize = opts.minimize_boolean_attributes && String.lowercase_ascii key = String.lowercase_ascii value in if should_minimize then () else if String.length value = 0 then begin Buffer.add_char buf '='; Buffer.add_char buf opts.quote_char; Buffer.add_char buf opts.quote_char end else if can_unquote_attr_value value then begin Buffer.add_char buf '='; Buffer.add_string buf value end else begin let quote = choose_quote value opts.quote_char opts.quote_char_explicit in Buffer.add_char buf '='; Buffer.add_char buf quote; Buffer.add_string buf (escape_attr_value value quote opts.escape_lt_in_attrs); Buffer.add_char buf quote end ) sorted_attrs; if opts.use_trailing_solidus && (is_empty_tag || is_void_element name) then Buffer.add_string buf " /"; Buffer.add_char buf '>'; Buffer.contents buf (* Check if text starts with ASCII whitespace *) let text_starts_with_space text = String.length text > 0 && let c = text.[0] in c = '\t' || c = '\n' || c = '\x0C' || c = '\r' || c = ' ' (* Optional tag omission helpers *) type next_token = | NTComment | NTSpace (* Text starting with space *) | NTText (* Text not starting with space *) | NTStartTag of string | NTEmptyTag of string | NTEndTag of string | NTEOF let classify_next tokens idx = if idx >= Array.length tokens then NTEOF else match tokens.(idx).token with | None -> NTEOF | Some (CommentNode _) -> NTComment | Some (TextNode text) -> if text_starts_with_space text then NTSpace else NTText | Some (StartTag (name, _)) -> NTStartTag (String.lowercase_ascii name) | Some (EmptyTag (name, _)) -> NTEmptyTag (String.lowercase_ascii name) | Some (EndTag name) -> NTEndTag (String.lowercase_ascii name) | Some (DoctypeNode _) -> NTEOF (* Treat doctype as if nothing follows *) (* Should we omit a start tag? *) let should_omit_start_tag opts name attrs next = if not opts.omit_optional_tags then false else let name = String.lowercase_ascii name in match name, next with (* html start tag: omit if not followed by comment or space, AND has no attributes *) | "html", NTComment -> false | "html", NTSpace -> false | "html", _ -> attrs = [] (* only omit if no attributes *) (* head start tag: omit if followed by element (start/empty tag) *) | "head", NTStartTag _ -> true | "head", NTEmptyTag _ -> true | "head", NTEndTag "head" -> true (* empty head *) | "head", NTEOF -> true | "head", _ -> false (* body start tag: omit if not followed by comment or space, AND has no attributes *) | "body", NTComment -> false | "body", NTSpace -> false | "body", _ -> attrs = [] (* only omit if no attributes *) (* colgroup start tag: omit if followed by col element *) | "colgroup", NTStartTag "col" -> true | "colgroup", NTEmptyTag "col" -> true | "colgroup", _ -> false (* tbody start tag: omit if first child is tr *) | "tbody", NTStartTag "tr" -> true | "tbody", _ -> false | _ -> false (* Should we omit an end tag? *) let should_omit_end_tag opts name next = if not opts.omit_optional_tags then false else let name = String.lowercase_ascii name in match name, next with (* html end tag: omit if not followed by comment or space *) | "html", NTComment -> false | "html", NTSpace -> false | "html", _ -> true (* head end tag: omit if not followed by comment or space *) | "head", NTComment -> false | "head", NTSpace -> false | "head", _ -> true (* body end tag: omit if not followed by comment or space *) | "body", NTComment -> false | "body", NTSpace -> false | "body", _ -> true (* li end tag: omit if followed by li start tag or parent end tag *) | "li", NTStartTag "li" -> true | "li", NTEndTag _ -> true | "li", NTEOF -> true | "li", _ -> false (* dt end tag: omit if followed by dt or dd start tag (NOT end tag or EOF!) *) | "dt", NTStartTag "dt" -> true | "dt", NTStartTag "dd" -> true | "dt", _ -> false (* dd end tag: omit if followed by dd or dt start tag, or end tag, or EOF *) | "dd", NTStartTag "dd" -> true | "dd", NTStartTag "dt" -> true | "dd", NTEndTag _ -> true | "dd", NTEOF -> true | "dd", _ -> false (* p end tag: omit if followed by block element (start or empty tag), end tag, or EOF *) | "p", NTStartTag next_name when is_p_closing_element next_name -> true | "p", NTEmptyTag next_name when is_p_closing_element next_name -> true | "p", NTEndTag _ -> true | "p", NTEOF -> true | "p", _ -> false (* optgroup end tag: omit if followed by optgroup start tag, end tag, or EOF *) | "optgroup", NTStartTag "optgroup" -> true | "optgroup", NTEndTag _ -> true | "optgroup", NTEOF -> true | "optgroup", _ -> false (* option end tag: omit if followed by option/optgroup start tag, end tag, or EOF *) | "option", NTStartTag "option" -> true | "option", NTStartTag "optgroup" -> true | "option", NTEndTag _ -> true | "option", NTEOF -> true | "option", _ -> false (* colgroup end tag: omit if not followed by comment, space, or another colgroup *) | "colgroup", NTComment -> false | "colgroup", NTSpace -> false | "colgroup", NTStartTag "colgroup" -> false (* keep end tag when another colgroup follows *) | "colgroup", _ -> true (* thead end tag: omit if followed by tbody or tfoot start tag *) | "thead", NTStartTag "tbody" -> true | "thead", NTStartTag "tfoot" -> true | "thead", _ -> false (* tbody end tag: omit if followed by tbody/tfoot start tag, end tag, or EOF *) | "tbody", NTStartTag "tbody" -> true | "tbody", NTStartTag "tfoot" -> true | "tbody", NTEndTag _ -> true | "tbody", NTEOF -> true | "tbody", _ -> false (* tfoot end tag: omit if followed by tbody start tag, end tag, or EOF *) | "tfoot", NTStartTag "tbody" -> true | "tfoot", NTEndTag _ -> true | "tfoot", NTEOF -> true | "tfoot", _ -> false (* tr end tag: omit if followed by tr start tag, end tag, or EOF *) | "tr", NTStartTag "tr" -> true | "tr", NTEndTag _ -> true | "tr", NTEOF -> true | "tr", _ -> false (* td end tag: omit if followed by td/th start tag, end tag, or EOF *) | "td", NTStartTag "td" -> true | "td", NTStartTag "th" -> true | "td", NTEndTag _ -> true | "td", NTEOF -> true | "td", _ -> false (* th end tag: omit if followed by th/td start tag, end tag, or EOF *) | "th", NTStartTag "th" -> true | "th", NTStartTag "td" -> true | "th", NTEndTag _ -> true | "th", NTEOF -> true | "th", _ -> false | _ -> false (* Run a single test *) let run_test test = try (* Build token infos from input *) let token_infos = Array.of_list (List.map build_token_info test.input) in let num_tokens = Array.length token_infos in (* Handle inject_meta_charset option *) let inject_meta = test.options.inject_meta_charset in let encoding = test.options.encoding in (* Serialize with context tracking *) let buf = Buffer.create 256 in let in_raw_text = ref false in let preserve_whitespace = ref false in let element_stack : string list ref = ref [] in let in_head = ref false in let meta_charset_injected = ref false in let prev_was_section_end = ref false in (* Track if prev token was thead/tbody/tfoot end *) for i = 0 to num_tokens - 1 do let info = token_infos.(i) in let next = classify_next token_infos (i + 1) in match info.token with | None -> () | Some (StartTag (name, attrs)) -> let name_lower = String.lowercase_ascii name in (* Track head element *) if name_lower = "head" then in_head := true; (* For inject_meta_charset, we need to check if there's any charset meta coming up *) (* If yes, don't inject at head start; if no, inject at head start *) let should_inject_at_head = if not inject_meta || name_lower <> "head" then false else match encoding with | None -> false | Some _ -> (* Look ahead to see if there's a charset meta or http-equiv content-type *) let has_charset_meta = ref false in for j = i + 1 to num_tokens - 1 do match token_infos.(j).token with | Some (EmptyTag (n, a)) when String.lowercase_ascii n = "meta" -> let has_charset = List.exists (fun (k, _) -> String.lowercase_ascii k = "charset") a in let has_http_equiv_ct = List.exists (fun (k, v) -> String.lowercase_ascii k = "http-equiv" && String.lowercase_ascii v = "content-type") a in if has_charset || has_http_equiv_ct then has_charset_meta := true | Some (EndTag n) when String.lowercase_ascii n = "head" -> () | _ -> () done; not !has_charset_meta in (* Special case: tbody start tag cannot be omitted if preceded by section end tag *) let can_omit_start = if name_lower = "tbody" && !prev_was_section_end then false else should_omit_start_tag test.options name attrs next in prev_was_section_end := false; (* Reset for next iteration *) if should_inject_at_head then begin meta_charset_injected := true; (* Don't output head start tag if we should omit it *) if not can_omit_start then Buffer.add_string buf (serialize_start_tag test.options ~is_empty_tag:false name attrs); Buffer.add_string buf (Printf.sprintf "" (Option.get encoding)); element_stack := name :: !element_stack; if is_raw_text_element name then in_raw_text := true; if is_whitespace_preserving_element name then preserve_whitespace := true end else if not can_omit_start then begin Buffer.add_string buf (serialize_start_tag test.options ~is_empty_tag:false name attrs); element_stack := name :: !element_stack; if is_raw_text_element name then in_raw_text := true; if is_whitespace_preserving_element name then preserve_whitespace := true end else begin element_stack := name :: !element_stack; if is_raw_text_element name then in_raw_text := true; if is_whitespace_preserving_element name then preserve_whitespace := true end | Some (EmptyTag (name, attrs)) -> let name_lower = String.lowercase_ascii name in prev_was_section_end := false; (* Reset for next iteration *) (* Handle meta charset replacement *) if inject_meta && !in_head && name_lower = "meta" then begin let has_charset = List.exists (fun (k, _) -> String.lowercase_ascii k = "charset") attrs in let has_http_equiv_ct = List.exists (fun (k, v) -> String.lowercase_ascii k = "http-equiv" && String.lowercase_ascii v = "content-type" ) attrs in if has_charset then begin (* Replace charset value *) match encoding with | Some enc -> Buffer.add_string buf (Printf.sprintf "" enc) | None -> () (* No encoding, skip the meta tag *) end else if has_http_equiv_ct then begin (* Replace charset in content value *) match encoding with | Some enc -> let new_attrs = List.map (fun (k, v) -> if String.lowercase_ascii k = "content" then let new_content = Printf.sprintf "text/html; charset=%s" enc in (k, new_content) else (k, v) ) attrs in Buffer.add_string buf (serialize_start_tag test.options ~is_empty_tag:true name new_attrs) | None -> Buffer.add_string buf (serialize_start_tag test.options ~is_empty_tag:true name attrs) end else begin (* Regular meta tag, output as normal *) Buffer.add_string buf (serialize_start_tag test.options ~is_empty_tag:true name attrs) end end else Buffer.add_string buf (serialize_start_tag test.options ~is_empty_tag:true name attrs) | Some (EndTag name) -> let name_lower = String.lowercase_ascii name in (* Track head element *) if name_lower = "head" then in_head := false; (* Pop from element stack *) (match !element_stack with | top :: rest when String.lowercase_ascii top = name_lower -> element_stack := rest; if is_raw_text_element name then in_raw_text := false; if is_whitespace_preserving_element name then preserve_whitespace := false | _ -> ()); let is_section_end = List.mem name_lower ["thead"; "tbody"; "tfoot"] in let omit = should_omit_end_tag test.options name next in if not omit then begin Buffer.add_string buf "' end; (* Track if we omitted a section end tag - next tbody can't be omitted *) prev_was_section_end := is_section_end && omit | Some (TextNode text) -> prev_was_section_end := false; let processed_text = if !in_raw_text && not test.options.escape_rcdata then text else if test.options.strip_whitespace && not !preserve_whitespace then escape_text (collapse_whitespace text) else escape_text text in Buffer.add_string buf processed_text | Some (CommentNode text) -> prev_was_section_end := false; Buffer.add_string buf "" | Some (DoctypeNode node) -> prev_was_section_end := false; Buffer.add_string buf (serialize_node test.options ~in_raw_text:false node) done; let serialized = Buffer.contents buf in (* Check if it matches any expected output *) let matches = List.exists (fun exp -> serialized = exp) test.expected in (matches, serialized, test.expected) with e -> (false, Printf.sprintf "EXCEPTION: %s" (Printexc.to_string e), test.expected) (* Run all tests in a file *) let format_options opts = let parts = [] in let parts = if opts.quote_char_explicit then Printf.sprintf "quote_char='%c'" opts.quote_char :: parts else parts in let parts = if not opts.minimize_boolean_attributes then "minimize_bool=false" :: parts else parts in let parts = if opts.use_trailing_solidus then "trailing_solidus=true" :: parts else parts in let parts = if opts.escape_lt_in_attrs then "escape_lt=true" :: parts else parts in let parts = if opts.escape_rcdata then "escape_rcdata=true" :: parts else parts in let parts = if opts.strip_whitespace then "strip_ws=true" :: parts else parts in let parts = if opts.inject_meta_charset then "inject_charset=true" :: parts else parts in let parts = if not opts.omit_optional_tags then "omit_tags=false" :: parts else parts in if parts = [] then "(defaults)" else String.concat ", " (List.rev parts) let run_file path = let content = let ic = open_in path in let n = in_channel_length ic in let s = really_input_string ic n in close_in ic; s in let json = match Jsont_bytesrw.decode_string Jsont.json content with | Ok j -> j | Error e -> failwith (Printf.sprintf "JSON parse error in %s: %s" path e) in let obj = json_object json in let tests_json = match json_mem "tests" obj with | Some t -> json_array t | None -> [] in let filename = Filename.basename path in let passed = ref 0 in let failed = ref 0 in let results = ref [] in List.iteri (fun i test_json -> try let test = parse_test_case test_json in let (success, actual, expected) = run_test test in let result : Report.test_result = { test_num = i + 1; description = test.description; input = String.concat "\n" (List.map (fun tok -> (* Simplified token representation *) match tok with | Jsont.Array (arr, _) -> (match arr with | Jsont.String (ty, _) :: rest -> Printf.sprintf "%s: %s" ty (String.concat ", " (List.map (function | Jsont.String (s, _) -> Printf.sprintf "%S" s | Jsont.Object _ -> "{...}" | Jsont.Null _ -> "null" | _ -> "?" ) rest)) | _ -> "?") | _ -> "?" ) test.input); expected = String.concat " | " expected; actual; success; details = [ ("Options", format_options test.options); ("Expected Variants", string_of_int (List.length expected)); ]; raw_test_data = Some test.raw_json; } in results := result :: !results; if success then incr passed else incr failed with e -> incr failed; let result : Report.test_result = { test_num = i + 1; description = Printf.sprintf "Test %d" (i + 1); input = ""; expected = ""; actual = Printf.sprintf "EXCEPTION: %s" (Printexc.to_string e); success = false; details = []; raw_test_data = Some (json_to_string test_json); } in results := result :: !results; Printf.eprintf "Exception parsing test %d: %s\n" (i + 1) (Printexc.to_string e) ) tests_json; let file_result : Report.file_result = { filename; test_type = "Serializer"; passed_count = !passed; failed_count = !failed; tests = List.rev !results; } in (file_result, !passed, !failed) let () = let test_dir = Sys.argv.(1) in let files = Sys.readdir test_dir |> Array.to_list in let test_files = List.filter (fun f -> Filename.check_suffix f ".test") files in let total_passed = ref 0 in let total_failed = ref 0 in let file_results = ref [] in List.iter (fun file -> let path = Filename.concat test_dir file in let (file_result, passed, failed) = run_file path in total_passed := !total_passed + passed; total_failed := !total_failed + failed; file_results := file_result :: !file_results; Printf.printf "%s: %d passed, %d failed\n" file passed failed ) (List.sort String.compare test_files); Printf.printf "\n=== Summary ===\n"; Printf.printf "Total: %d passed, %d failed\n" !total_passed !total_failed; (* Generate HTML report *) let report : Report.report = { title = "HTML5 Serializer Tests"; test_type = "serializer"; description = "These tests validate the HTML serialization algorithm for converting DOM trees back to HTML text. \ Each test provides a sequence of tokens (start tags, end tags, text, comments, doctypes) and one \ or more valid serialized outputs. Tests cover attribute quoting, boolean attribute minimization, \ self-closing tag syntax (trailing solidus), entity escaping, whitespace handling, meta charset \ injection, and optional tag omission rules as specified in the HTML Standard. Multiple expected \ outputs allow for valid variations in serialization style."; files = List.rev !file_results; total_passed = !total_passed; total_failed = !total_failed; match_quality = None; test_type_breakdown = None; strictness_mode = None; run_timestamp = None; } in Report.generate_report report "test_serializer_report.html"; exit (if !total_failed > 0 then 1 else 0)