(* Test runner for html5lib-tests serializer tests *)
module Dom = Html5rw.Dom
module Report = Test_report
(* Extract values from JSON *)
let json_string = function
| Jsont.String (s, _) -> s
| _ -> failwith "Expected string"
let json_string_opt = function
| Jsont.Null _ -> None
| Jsont.String (s, _) -> Some s
| _ -> failwith "Expected string or null"
let json_bool = function
| Jsont.Bool (b, _) -> b
| _ -> failwith "Expected bool"
let json_array = function
| Jsont.Array (arr, _) -> arr
| _ -> failwith "Expected array"
let json_object = function
| Jsont.Object (obj, _) -> obj
| _ -> failwith "Expected object"
let json_mem name obj =
match List.find_opt (fun ((n, _), _) -> n = name) obj with
| Some (_, v) -> Some v
| None -> None
let json_mem_exn name obj =
match json_mem name obj with
| Some v -> v
| None -> failwith ("Missing member: " ^ name)
let rec json_to_string = function
| Jsont.Null _ -> "null"
| Jsont.Bool (b, _) -> string_of_bool b
| Jsont.Number (n, _) -> Printf.sprintf "%g" n
| Jsont.String (s, _) -> Printf.sprintf "%S" s
| Jsont.Array (arr, _) ->
"[" ^ String.concat ", " (List.map json_to_string arr) ^ "]"
| Jsont.Object (obj, _) ->
"{" ^ String.concat ", " (List.map (fun ((n, _), v) -> Printf.sprintf "%S: %s" n (json_to_string v)) obj) ^ "}"
(* Serialization options *)
type serialize_options = {
quote_char : char;
quote_char_explicit : bool; (* Was quote_char explicitly set? *)
minimize_boolean_attributes : bool;
use_trailing_solidus : bool;
escape_lt_in_attrs : bool;
escape_rcdata : bool;
strip_whitespace : bool;
inject_meta_charset : bool;
encoding : string option;
omit_optional_tags : bool;
}
let default_options = {
quote_char = '"';
quote_char_explicit = false;
minimize_boolean_attributes = true;
use_trailing_solidus = false;
escape_lt_in_attrs = false;
escape_rcdata = false;
strip_whitespace = false;
inject_meta_charset = false;
encoding = None;
omit_optional_tags = true; (* HTML5 default *)
}
(* Parse options from JSON *)
let parse_options json_opt =
match json_opt with
| None -> default_options
| Some json ->
let obj = json_object json in
let get_bool name default =
match json_mem name obj with
| Some j -> (try json_bool j with _ -> default)
| None -> default
in
let get_string name =
match json_mem name obj with
| Some (Jsont.String (s, _)) -> Some s
| _ -> None
in
let quote_char_opt =
match json_mem "quote_char" obj with
| Some (Jsont.String (s, _)) when String.length s = 1 -> Some s.[0]
| _ -> None
in
{
quote_char = Option.value ~default:'"' quote_char_opt;
quote_char_explicit = Option.is_some quote_char_opt;
minimize_boolean_attributes = get_bool "minimize_boolean_attributes" (get_bool "quote_attr_values" true);
use_trailing_solidus = get_bool "use_trailing_solidus" false;
escape_lt_in_attrs = get_bool "escape_lt_in_attrs" false;
escape_rcdata = get_bool "escape_rcdata" false;
strip_whitespace = get_bool "strip_whitespace" false;
inject_meta_charset = get_bool "inject_meta_charset" false;
encoding = get_string "encoding";
omit_optional_tags = get_bool "omit_optional_tags" true;
}
(* Test case *)
type test_case = {
description : string;
input : Jsont.json list;
expected : string list;
options : serialize_options;
raw_json : string; (* Original JSON representation of this test *)
}
let parse_test_case json =
let raw_json = json_to_string json in
let obj = json_object json in
let description = json_string (json_mem_exn "description" obj) in
let input = json_array (json_mem_exn "input" obj) in
let expected = List.map json_string (json_array (json_mem_exn "expected" obj)) in
let options = parse_options (json_mem "options" obj) in
{ description; input; expected; options; raw_json }
(* Parse attrs that can be either array [{name, value}] or object {name: value} or empty {} *)
let parse_attrs attrs_json =
match attrs_json with
| Jsont.Array (arr, _) ->
List.map (fun attr_json ->
let attr_obj = json_object attr_json in
let attr_name = json_string (json_mem_exn "name" attr_obj) in
let value = json_string (json_mem_exn "value" attr_obj) in
(attr_name, value)
) arr
| Jsont.Object (obj, _) ->
List.map (fun ((n, _), v) -> (n, json_string v)) obj
| _ -> []
(* Void elements that don't need end tags *)
let is_void_element name =
List.mem (String.lowercase_ascii name)
["area"; "base"; "br"; "col"; "embed"; "hr"; "img"; "input";
"link"; "meta"; "param"; "source"; "track"; "wbr"]
(* Raw text elements whose content should not be escaped *)
let is_raw_text_element name =
List.mem (String.lowercase_ascii name) ["script"; "style"]
(* Elements where whitespace should be preserved *)
let is_whitespace_preserving_element name =
List.mem (String.lowercase_ascii name) ["pre"; "textarea"; "script"; "style"]
(* Block elements that close a p tag *)
let p_closing_elements = [
"address"; "article"; "aside"; "blockquote"; "datagrid"; "dialog"; "dir";
"div"; "dl"; "fieldset"; "footer"; "form"; "h1"; "h2"; "h3"; "h4"; "h5"; "h6";
"header"; "hgroup"; "hr"; "main"; "menu"; "nav"; "ol"; "p"; "pre"; "section";
"table"; "ul"
]
let is_p_closing_element name =
List.mem (String.lowercase_ascii name) p_closing_elements
(* Collapse runs of whitespace to single space *)
let collapse_whitespace text =
let len = String.length text in
let buf = Buffer.create len in
let in_whitespace = ref false in
for i = 0 to len - 1 do
let c = text.[i] in
if c = '\t' || c = '\r' || c = '\n' || c = '\x0C' || c = ' ' then begin
if not !in_whitespace then begin
Buffer.add_char buf ' ';
in_whitespace := true
end
end else begin
Buffer.add_char buf c;
in_whitespace := false
end
done;
Buffer.contents buf
(* Token types for stream-based serialization *)
type token_type =
| StartTag of string * (string * string) list (* name, attrs *)
| EndTag of string (* name *)
| EmptyTag of string * (string * string) list (* name, attrs *)
| TextNode of string
| CommentNode of string
| DoctypeNode of Html5rw.Dom.node
type token_info = {
token : token_type option;
}
let build_token_info token =
let arr = json_array token in
match arr with
| [] -> { token = None }
| type_json :: rest ->
let token_type_str = json_string type_json in
match token_type_str, rest with
| "StartTag", [_ns_json; name_json; attrs_json] ->
let name = json_string name_json in
let attrs = parse_attrs attrs_json in
{ token = Some (StartTag (name, attrs)) }
| "StartTag", [name_json; attrs_json] ->
let name = json_string name_json in
let attrs = parse_attrs attrs_json in
{ token = Some (StartTag (name, attrs)) }
| "EmptyTag", [name_json; attrs_json] ->
let name = json_string name_json in
let attrs = parse_attrs attrs_json in
{ token = Some (EmptyTag (name, attrs)) }
| "EndTag", [_ns_json; name_json] ->
let name = json_string name_json in
{ token = Some (EndTag name) }
| "EndTag", [name_json] ->
let name = json_string name_json in
{ token = Some (EndTag name) }
| "Characters", [text_json] ->
let text = json_string text_json in
{ token = Some (TextNode text) }
| "Comment", [text_json] ->
let text = json_string text_json in
{ token = Some (CommentNode text) }
| "Doctype", [name_json] ->
let name = json_string name_json in
let node = Html5rw.Dom.create_doctype ~name () in
{ token = Some (DoctypeNode node) }
| "Doctype", [name_json; public_json] ->
let name = json_string name_json in
let public_id = json_string_opt public_json in
let node = match public_id with
| Some pub -> Html5rw.Dom.create_doctype ~name ~public_id:pub ()
| None -> Html5rw.Dom.create_doctype ~name ()
in
{ token = Some (DoctypeNode node) }
| "Doctype", [name_json; public_json; system_json] ->
let name = json_string name_json in
let public_id = json_string_opt public_json in
let system_id = json_string_opt system_json in
let node = match public_id, system_id with
| Some pub, Some sys -> Html5rw.Dom.create_doctype ~name ~public_id:pub ~system_id:sys ()
| Some pub, None -> Html5rw.Dom.create_doctype ~name ~public_id:pub ()
| None, Some sys -> Html5rw.Dom.create_doctype ~name ~system_id:sys ()
| None, None -> Html5rw.Dom.create_doctype ~name ()
in
{ token = Some (DoctypeNode node) }
| _ -> { token = None }
(* Serialize a single node to HTML with options *)
let escape_text text =
let buf = Buffer.create (String.length text) in
String.iter (fun c ->
match c with
| '&' -> Buffer.add_string buf "&"
| '<' -> Buffer.add_string buf "<"
| '>' -> Buffer.add_string buf ">"
| c -> Buffer.add_char buf c
) text;
Buffer.contents buf
let can_unquote_attr_value value =
if String.length value = 0 then false
else
let valid = ref true in
String.iter (fun c ->
if c = '>' || c = '"' || c = '\'' || c = '=' || c = '`' ||
c = ' ' || c = '\t' || c = '\n' || c = '\x0C' || c = '\r' then
valid := false
) value;
!valid
let escape_attr_value value quote_char escape_lt =
let buf = Buffer.create (String.length value) in
String.iter (fun c ->
match c with
| '&' -> Buffer.add_string buf "&"
| '"' when quote_char = '"' -> Buffer.add_string buf """
| '\'' when quote_char = '\'' -> Buffer.add_string buf "'"
| '<' when escape_lt -> Buffer.add_string buf "<"
| c -> Buffer.add_char buf c
) value;
Buffer.contents buf
let serialize_node opts ~in_raw_text node =
match node.Dom.name with
| "#text" ->
if in_raw_text && not opts.escape_rcdata then
node.Dom.data
else
escape_text node.Dom.data
| "#comment" ->
""
| "!doctype" ->
let buf = Buffer.create 64 in
Buffer.add_string buf "
Buffer.add_string buf (Option.value ~default:"html" dt.Dom.name);
(match dt.Dom.public_id with
| Some pub when pub <> "" ->
Buffer.add_string buf " PUBLIC \"";
Buffer.add_string buf pub;
Buffer.add_char buf '"';
(match dt.Dom.system_id with
| Some sys ->
Buffer.add_string buf " \"";
Buffer.add_string buf sys;
Buffer.add_char buf '"'
| None -> ())
| _ ->
match dt.Dom.system_id with
| Some sys when sys <> "" ->
Buffer.add_string buf " SYSTEM \"";
Buffer.add_string buf sys;
Buffer.add_char buf '"'
| _ -> ())
| None -> Buffer.add_string buf "html");
Buffer.add_char buf '>';
Buffer.contents buf
| _ -> failwith "serialize_node called with element"
let choose_quote value default_quote explicit =
(* If quote_char was explicitly set, always use it *)
if explicit then default_quote
else
(* Otherwise, if value contains the default quote but not the other, use the other *)
let has_double = String.contains value '"' in
let has_single = String.contains value '\'' in
if has_double && not has_single then '\''
else if has_single && not has_double then '"'
else default_quote
(* Serialize an element tag (start tag) *)
let serialize_start_tag opts ~is_empty_tag name attrs =
let buf = Buffer.create 64 in
Buffer.add_char buf '<';
Buffer.add_string buf name;
(* Sort attributes alphabetically for consistent output *)
let sorted_attrs = List.sort (fun (a, _) (b, _) -> String.compare a b) attrs in
List.iter (fun (key, value) ->
Buffer.add_char buf ' ';
Buffer.add_string buf key;
let should_minimize =
opts.minimize_boolean_attributes &&
String.lowercase_ascii key = String.lowercase_ascii value
in
if should_minimize then
()
else if String.length value = 0 then begin
Buffer.add_char buf '=';
Buffer.add_char buf opts.quote_char;
Buffer.add_char buf opts.quote_char
end else if can_unquote_attr_value value then begin
Buffer.add_char buf '=';
Buffer.add_string buf value
end else begin
let quote = choose_quote value opts.quote_char opts.quote_char_explicit in
Buffer.add_char buf '=';
Buffer.add_char buf quote;
Buffer.add_string buf (escape_attr_value value quote opts.escape_lt_in_attrs);
Buffer.add_char buf quote
end
) sorted_attrs;
if opts.use_trailing_solidus && (is_empty_tag || is_void_element name) then
Buffer.add_string buf " /";
Buffer.add_char buf '>';
Buffer.contents buf
(* Check if text starts with ASCII whitespace *)
let text_starts_with_space text =
String.length text > 0 &&
let c = text.[0] in
c = '\t' || c = '\n' || c = '\x0C' || c = '\r' || c = ' '
(* Optional tag omission helpers *)
type next_token =
| NTComment
| NTSpace (* Text starting with space *)
| NTText (* Text not starting with space *)
| NTStartTag of string
| NTEmptyTag of string
| NTEndTag of string
| NTEOF
let classify_next tokens idx =
if idx >= Array.length tokens then NTEOF
else match tokens.(idx).token with
| None -> NTEOF
| Some (CommentNode _) -> NTComment
| Some (TextNode text) ->
if text_starts_with_space text then NTSpace else NTText
| Some (StartTag (name, _)) -> NTStartTag (String.lowercase_ascii name)
| Some (EmptyTag (name, _)) -> NTEmptyTag (String.lowercase_ascii name)
| Some (EndTag name) -> NTEndTag (String.lowercase_ascii name)
| Some (DoctypeNode _) -> NTEOF (* Treat doctype as if nothing follows *)
(* Should we omit a start tag? *)
let should_omit_start_tag opts name attrs next =
if not opts.omit_optional_tags then false
else
let name = String.lowercase_ascii name in
match name, next with
(* html start tag: omit if not followed by comment or space, AND has no attributes *)
| "html", NTComment -> false
| "html", NTSpace -> false
| "html", _ -> attrs = [] (* only omit if no attributes *)
(* head start tag: omit if followed by element (start/empty tag) *)
| "head", NTStartTag _ -> true
| "head", NTEmptyTag _ -> true
| "head", NTEndTag "head" -> true (* empty head *)
| "head", NTEOF -> true
| "head", _ -> false
(* body start tag: omit if not followed by comment or space, AND has no attributes *)
| "body", NTComment -> false
| "body", NTSpace -> false
| "body", _ -> attrs = [] (* only omit if no attributes *)
(* colgroup start tag: omit if followed by col element *)
| "colgroup", NTStartTag "col" -> true
| "colgroup", NTEmptyTag "col" -> true
| "colgroup", _ -> false
(* tbody start tag: omit if first child is tr *)
| "tbody", NTStartTag "tr" -> true
| "tbody", _ -> false
| _ -> false
(* Should we omit an end tag? *)
let should_omit_end_tag opts name next =
if not opts.omit_optional_tags then false
else
let name = String.lowercase_ascii name in
match name, next with
(* html end tag: omit if not followed by comment or space *)
| "html", NTComment -> false
| "html", NTSpace -> false
| "html", _ -> true
(* head end tag: omit if not followed by comment or space *)
| "head", NTComment -> false
| "head", NTSpace -> false
| "head", _ -> true
(* body end tag: omit if not followed by comment or space *)
| "body", NTComment -> false
| "body", NTSpace -> false
| "body", _ -> true
(* li end tag: omit if followed by li start tag or parent end tag *)
| "li", NTStartTag "li" -> true
| "li", NTEndTag _ -> true
| "li", NTEOF -> true
| "li", _ -> false
(* dt end tag: omit if followed by dt or dd start tag (NOT end tag or EOF!) *)
| "dt", NTStartTag "dt" -> true
| "dt", NTStartTag "dd" -> true
| "dt", _ -> false
(* dd end tag: omit if followed by dd or dt start tag, or end tag, or EOF *)
| "dd", NTStartTag "dd" -> true
| "dd", NTStartTag "dt" -> true
| "dd", NTEndTag _ -> true
| "dd", NTEOF -> true
| "dd", _ -> false
(* p end tag: omit if followed by block element (start or empty tag), end tag, or EOF *)
| "p", NTStartTag next_name when is_p_closing_element next_name -> true
| "p", NTEmptyTag next_name when is_p_closing_element next_name -> true
| "p", NTEndTag _ -> true
| "p", NTEOF -> true
| "p", _ -> false
(* optgroup end tag: omit if followed by optgroup start tag, end tag, or EOF *)
| "optgroup", NTStartTag "optgroup" -> true
| "optgroup", NTEndTag _ -> true
| "optgroup", NTEOF -> true
| "optgroup", _ -> false
(* option end tag: omit if followed by option/optgroup start tag, end tag, or EOF *)
| "option", NTStartTag "option" -> true
| "option", NTStartTag "optgroup" -> true
| "option", NTEndTag _ -> true
| "option", NTEOF -> true
| "option", _ -> false
(* colgroup end tag: omit if not followed by comment, space, or another colgroup *)
| "colgroup", NTComment -> false
| "colgroup", NTSpace -> false
| "colgroup", NTStartTag "colgroup" -> false (* keep end tag when another colgroup follows *)
| "colgroup", _ -> true
(* thead end tag: omit if followed by tbody or tfoot start tag *)
| "thead", NTStartTag "tbody" -> true
| "thead", NTStartTag "tfoot" -> true
| "thead", _ -> false
(* tbody end tag: omit if followed by tbody/tfoot start tag, end tag, or EOF *)
| "tbody", NTStartTag "tbody" -> true
| "tbody", NTStartTag "tfoot" -> true
| "tbody", NTEndTag _ -> true
| "tbody", NTEOF -> true
| "tbody", _ -> false
(* tfoot end tag: omit if followed by tbody start tag, end tag, or EOF *)
| "tfoot", NTStartTag "tbody" -> true
| "tfoot", NTEndTag _ -> true
| "tfoot", NTEOF -> true
| "tfoot", _ -> false
(* tr end tag: omit if followed by tr start tag, end tag, or EOF *)
| "tr", NTStartTag "tr" -> true
| "tr", NTEndTag _ -> true
| "tr", NTEOF -> true
| "tr", _ -> false
(* td end tag: omit if followed by td/th start tag, end tag, or EOF *)
| "td", NTStartTag "td" -> true
| "td", NTStartTag "th" -> true
| "td", NTEndTag _ -> true
| "td", NTEOF -> true
| "td", _ -> false
(* th end tag: omit if followed by th/td start tag, end tag, or EOF *)
| "th", NTStartTag "th" -> true
| "th", NTStartTag "td" -> true
| "th", NTEndTag _ -> true
| "th", NTEOF -> true
| "th", _ -> false
| _ -> false
(* Run a single test *)
let run_test test =
try
(* Build token infos from input *)
let token_infos = Array.of_list (List.map build_token_info test.input) in
let num_tokens = Array.length token_infos in
(* Handle inject_meta_charset option *)
let inject_meta = test.options.inject_meta_charset in
let encoding = test.options.encoding in
(* Serialize with context tracking *)
let buf = Buffer.create 256 in
let in_raw_text = ref false in
let preserve_whitespace = ref false in
let element_stack : string list ref = ref [] in
let in_head = ref false in
let meta_charset_injected = ref false in
let prev_was_section_end = ref false in (* Track if prev token was thead/tbody/tfoot end *)
for i = 0 to num_tokens - 1 do
let info = token_infos.(i) in
let next = classify_next token_infos (i + 1) in
match info.token with
| None -> ()
| Some (StartTag (name, attrs)) ->
let name_lower = String.lowercase_ascii name in
(* Track head element *)
if name_lower = "head" then in_head := true;
(* For inject_meta_charset, we need to check if there's any charset meta coming up *)
(* If yes, don't inject at head start; if no, inject at head start *)
let should_inject_at_head =
if not inject_meta || name_lower <> "head" then false
else match encoding with
| None -> false
| Some _ ->
(* Look ahead to see if there's a charset meta or http-equiv content-type *)
let has_charset_meta = ref false in
for j = i + 1 to num_tokens - 1 do
match token_infos.(j).token with
| Some (EmptyTag (n, a)) when String.lowercase_ascii n = "meta" ->
let has_charset = List.exists (fun (k, _) -> String.lowercase_ascii k = "charset") a in
let has_http_equiv_ct = List.exists (fun (k, v) ->
String.lowercase_ascii k = "http-equiv" && String.lowercase_ascii v = "content-type") a in
if has_charset || has_http_equiv_ct then has_charset_meta := true
| Some (EndTag n) when String.lowercase_ascii n = "head" -> ()
| _ -> ()
done;
not !has_charset_meta
in
(* Special case: tbody start tag cannot be omitted if preceded by section end tag *)
let can_omit_start =
if name_lower = "tbody" && !prev_was_section_end then false
else should_omit_start_tag test.options name attrs next
in
prev_was_section_end := false; (* Reset for next iteration *)
if should_inject_at_head then begin
meta_charset_injected := true;
(* Don't output head start tag if we should omit it *)
if not can_omit_start then
Buffer.add_string buf (serialize_start_tag test.options ~is_empty_tag:false name attrs);
Buffer.add_string buf (Printf.sprintf "" (Option.get encoding));
element_stack := name :: !element_stack;
if is_raw_text_element name then in_raw_text := true;
if is_whitespace_preserving_element name then preserve_whitespace := true
end else if not can_omit_start then begin
Buffer.add_string buf (serialize_start_tag test.options ~is_empty_tag:false name attrs);
element_stack := name :: !element_stack;
if is_raw_text_element name then in_raw_text := true;
if is_whitespace_preserving_element name then preserve_whitespace := true
end else begin
element_stack := name :: !element_stack;
if is_raw_text_element name then in_raw_text := true;
if is_whitespace_preserving_element name then preserve_whitespace := true
end
| Some (EmptyTag (name, attrs)) ->
let name_lower = String.lowercase_ascii name in
prev_was_section_end := false; (* Reset for next iteration *)
(* Handle meta charset replacement *)
if inject_meta && !in_head && name_lower = "meta" then begin
let has_charset = List.exists (fun (k, _) -> String.lowercase_ascii k = "charset") attrs in
let has_http_equiv_ct =
List.exists (fun (k, v) ->
String.lowercase_ascii k = "http-equiv" &&
String.lowercase_ascii v = "content-type"
) attrs
in
if has_charset then begin
(* Replace charset value *)
match encoding with
| Some enc ->
Buffer.add_string buf (Printf.sprintf "" enc)
| None -> () (* No encoding, skip the meta tag *)
end else if has_http_equiv_ct then begin
(* Replace charset in content value *)
match encoding with
| Some enc ->
let new_attrs = List.map (fun (k, v) ->
if String.lowercase_ascii k = "content" then
let new_content = Printf.sprintf "text/html; charset=%s" enc in
(k, new_content)
else (k, v)
) attrs in
Buffer.add_string buf (serialize_start_tag test.options ~is_empty_tag:true name new_attrs)
| None ->
Buffer.add_string buf (serialize_start_tag test.options ~is_empty_tag:true name attrs)
end else begin
(* Regular meta tag, output as normal *)
Buffer.add_string buf (serialize_start_tag test.options ~is_empty_tag:true name attrs)
end
end else
Buffer.add_string buf (serialize_start_tag test.options ~is_empty_tag:true name attrs)
| Some (EndTag name) ->
let name_lower = String.lowercase_ascii name in
(* Track head element *)
if name_lower = "head" then in_head := false;
(* Pop from element stack *)
(match !element_stack with
| top :: rest when String.lowercase_ascii top = name_lower ->
element_stack := rest;
if is_raw_text_element name then in_raw_text := false;
if is_whitespace_preserving_element name then preserve_whitespace := false
| _ -> ());
let is_section_end = List.mem name_lower ["thead"; "tbody"; "tfoot"] in
let omit = should_omit_end_tag test.options name next in
if not omit then begin
Buffer.add_string buf "";
Buffer.add_string buf name;
Buffer.add_char buf '>'
end;
(* Track if we omitted a section end tag - next tbody can't be omitted *)
prev_was_section_end := is_section_end && omit
| Some (TextNode text) ->
prev_was_section_end := false;
let processed_text =
if !in_raw_text && not test.options.escape_rcdata then
text
else if test.options.strip_whitespace && not !preserve_whitespace then
escape_text (collapse_whitespace text)
else
escape_text text
in
Buffer.add_string buf processed_text
| Some (CommentNode text) ->
prev_was_section_end := false;
Buffer.add_string buf ""
| Some (DoctypeNode node) ->
prev_was_section_end := false;
Buffer.add_string buf (serialize_node test.options ~in_raw_text:false node)
done;
let serialized = Buffer.contents buf in
(* Check if it matches any expected output *)
let matches = List.exists (fun exp -> serialized = exp) test.expected in
(matches, serialized, test.expected)
with e ->
(false, Printf.sprintf "EXCEPTION: %s" (Printexc.to_string e), test.expected)
(* Run all tests in a file *)
let format_options opts =
let parts = [] in
let parts = if opts.quote_char_explicit then
Printf.sprintf "quote_char='%c'" opts.quote_char :: parts else parts in
let parts = if not opts.minimize_boolean_attributes then
"minimize_bool=false" :: parts else parts in
let parts = if opts.use_trailing_solidus then
"trailing_solidus=true" :: parts else parts in
let parts = if opts.escape_lt_in_attrs then
"escape_lt=true" :: parts else parts in
let parts = if opts.escape_rcdata then
"escape_rcdata=true" :: parts else parts in
let parts = if opts.strip_whitespace then
"strip_ws=true" :: parts else parts in
let parts = if opts.inject_meta_charset then
"inject_charset=true" :: parts else parts in
let parts = if not opts.omit_optional_tags then
"omit_tags=false" :: parts else parts in
if parts = [] then "(defaults)" else String.concat ", " (List.rev parts)
let run_file path =
let content =
let ic = open_in path in
let n = in_channel_length ic in
let s = really_input_string ic n in
close_in ic;
s
in
let json = match Jsont_bytesrw.decode_string Jsont.json content with
| Ok j -> j
| Error e -> failwith (Printf.sprintf "JSON parse error in %s: %s" path e)
in
let obj = json_object json in
let tests_json = match json_mem "tests" obj with
| Some t -> json_array t
| None -> []
in
let filename = Filename.basename path in
let passed = ref 0 in
let failed = ref 0 in
let results = ref [] in
List.iteri (fun i test_json ->
try
let test = parse_test_case test_json in
let (success, actual, expected) = run_test test in
let result : Report.test_result = {
test_num = i + 1;
description = test.description;
input = String.concat "\n" (List.map (fun tok ->
(* Simplified token representation *)
match tok with
| Jsont.Array (arr, _) ->
(match arr with
| Jsont.String (ty, _) :: rest ->
Printf.sprintf "%s: %s" ty (String.concat ", " (List.map (function
| Jsont.String (s, _) -> Printf.sprintf "%S" s
| Jsont.Object _ -> "{...}"
| Jsont.Null _ -> "null"
| _ -> "?"
) rest))
| _ -> "?")
| _ -> "?"
) test.input);
expected = String.concat " | " expected;
actual;
success;
details = [
("Options", format_options test.options);
("Expected Variants", string_of_int (List.length expected));
];
raw_test_data = Some test.raw_json;
} in
results := result :: !results;
if success then incr passed else incr failed
with e ->
incr failed;
let result : Report.test_result = {
test_num = i + 1;
description = Printf.sprintf "Test %d" (i + 1);
input = "";
expected = "";
actual = Printf.sprintf "EXCEPTION: %s" (Printexc.to_string e);
success = false;
details = [];
raw_test_data = Some (json_to_string test_json);
} in
results := result :: !results;
Printf.eprintf "Exception parsing test %d: %s\n" (i + 1) (Printexc.to_string e)
) tests_json;
let file_result : Report.file_result = {
filename;
test_type = "Serializer";
passed_count = !passed;
failed_count = !failed;
tests = List.rev !results;
} in
(file_result, !passed, !failed)
let () =
let test_dir = Sys.argv.(1) in
let files = Sys.readdir test_dir |> Array.to_list in
let test_files = List.filter (fun f -> Filename.check_suffix f ".test") files in
let total_passed = ref 0 in
let total_failed = ref 0 in
let file_results = ref [] in
List.iter (fun file ->
let path = Filename.concat test_dir file in
let (file_result, passed, failed) = run_file path in
total_passed := !total_passed + passed;
total_failed := !total_failed + failed;
file_results := file_result :: !file_results;
Printf.printf "%s: %d passed, %d failed\n" file passed failed
) (List.sort String.compare test_files);
Printf.printf "\n=== Summary ===\n";
Printf.printf "Total: %d passed, %d failed\n" !total_passed !total_failed;
(* Generate HTML report *)
let report : Report.report = {
title = "HTML5 Serializer Tests";
test_type = "serializer";
description = "These tests validate the HTML serialization algorithm for converting DOM trees back to HTML text. \
Each test provides a sequence of tokens (start tags, end tags, text, comments, doctypes) and one \
or more valid serialized outputs. Tests cover attribute quoting, boolean attribute minimization, \
self-closing tag syntax (trailing solidus), entity escaping, whitespace handling, meta charset \
injection, and optional tag omission rules as specified in the HTML Standard. Multiple expected \
outputs allow for valid variations in serialization style.";
files = List.rev !file_results;
total_passed = !total_passed;
total_failed = !total_failed;
} in
Report.generate_report report "test_serializer_report.html";
exit (if !total_failed > 0 then 1 else 0)