(* Test runner for html5lib-tests serializer tests *)

module Dom = Html5rw.Dom
module Report = Test_report

(* Extract values from JSON *)
let json_string = function
  | Jsont.String (s, _) -> s
  | _ -> failwith "Expected string"

let json_string_opt = function
  | Jsont.Null _ -> None
  | Jsont.String (s, _) -> Some s
  | _ -> failwith "Expected string or null"

let json_bool = function
  | Jsont.Bool (b, _) -> b
  | _ -> failwith "Expected bool"

let json_array = function
  | Jsont.Array (arr, _) -> arr
  | _ -> failwith "Expected array"

let json_object = function
  | Jsont.Object (obj, _) -> obj
  | _ -> failwith "Expected object"

let json_mem name obj =
  match List.find_opt (fun ((n, _), _) -> n = name) obj with
  | Some (_, v) -> Some v
  | None -> None

let json_mem_exn name obj =
  match json_mem name obj with
  | Some v -> v
  | None -> failwith ("Missing member: " ^ name)

let rec json_to_string = function
  | Jsont.Null _ -> "null"
  | Jsont.Bool (b, _) -> string_of_bool b
  | Jsont.Number (n, _) -> Printf.sprintf "%g" n
  | Jsont.String (s, _) -> Printf.sprintf "%S" s
  | Jsont.Array (arr, _) ->
    "[" ^ String.concat ", " (List.map json_to_string arr) ^ "]"
  | Jsont.Object (obj, _) ->
    "{" ^ String.concat ", " (List.map (fun ((n, _), v) -> Printf.sprintf "%S: %s" n (json_to_string v)) obj) ^ "}"

(* Serialization options *)
type serialize_options = {
  quote_char : char;
  quote_char_explicit : bool;  (* Was quote_char explicitly set? *)
  minimize_boolean_attributes : bool;
  use_trailing_solidus : bool;
  escape_lt_in_attrs : bool;
  escape_rcdata : bool;
  strip_whitespace : bool;
  inject_meta_charset : bool;
  encoding : string option;
  omit_optional_tags : bool;
}

let default_options = {
  quote_char = '"';
  quote_char_explicit = false;
  minimize_boolean_attributes = true;
  use_trailing_solidus = false;
  escape_lt_in_attrs = false;
  escape_rcdata = false;
  strip_whitespace = false;
  inject_meta_charset = false;
  encoding = None;
  omit_optional_tags = true;  (* HTML5 default *)
}

(* Parse options from JSON *)
let parse_options json_opt =
  match json_opt with
  | None -> default_options
  | Some json ->
    let obj = json_object json in
    let get_bool name default =
      match json_mem name obj with
      | Some j -> (try json_bool j with _ -> default)
      | None -> default
    in
    let get_string name =
      match json_mem name obj with
      | Some (Jsont.String (s, _)) -> Some s
      | _ -> None
    in
    let quote_char_opt =
      match json_mem "quote_char" obj with
      | Some (Jsont.String (s, _)) when String.length s = 1 -> Some s.[0]
      | _ -> None
    in
    {
      quote_char = Option.value ~default:'"' quote_char_opt;
      quote_char_explicit = Option.is_some quote_char_opt;
      minimize_boolean_attributes = get_bool "minimize_boolean_attributes" (get_bool "quote_attr_values" true);
      use_trailing_solidus = get_bool "use_trailing_solidus" false;
      escape_lt_in_attrs = get_bool "escape_lt_in_attrs" false;
      escape_rcdata = get_bool "escape_rcdata" false;
      strip_whitespace = get_bool "strip_whitespace" false;
      inject_meta_charset = get_bool "inject_meta_charset" false;
      encoding = get_string "encoding";
      omit_optional_tags = get_bool "omit_optional_tags" true;
    }

(* Test case *)
type test_case = {
  description : string;
  input : Jsont.json list;
  expected : string list;
  options : serialize_options;
  raw_json : string;  (* Original JSON representation of this test *)
}

let parse_test_case json =
  let raw_json = json_to_string json in
  let obj = json_object json in
  let description = json_string (json_mem_exn "description" obj) in
  let input = json_array (json_mem_exn "input" obj) in
  let expected = List.map json_string (json_array (json_mem_exn "expected" obj)) in
  let options = parse_options (json_mem "options" obj) in
  { description; input; expected; options; raw_json }

(* Parse attrs that can be either array [{name, value}] or object {name: value} or empty {} *)
let parse_attrs attrs_json =
  match attrs_json with
  | Jsont.Array (arr, _) ->
    List.map (fun attr_json ->
      let attr_obj = json_object attr_json in
      let attr_name = json_string (json_mem_exn "name" attr_obj) in
      let value = json_string (json_mem_exn "value" attr_obj) in
      (attr_name, value)
    ) arr
  | Jsont.Object (obj, _) ->
    List.map (fun ((n, _), v) -> (n, json_string v)) obj
  | _ -> []

(* Void elements that don't need end tags *)
let is_void_element name =
  List.mem (String.lowercase_ascii name)
    ["area"; "base"; "br"; "col"; "embed"; "hr"; "img"; "input";
     "link"; "meta"; "param"; "source"; "track"; "wbr"]

(* Raw text elements whose content should not be escaped *)
let is_raw_text_element name =
  List.mem (String.lowercase_ascii name) ["script"; "style"]

(* Elements where whitespace should be preserved *)
let is_whitespace_preserving_element name =
  List.mem (String.lowercase_ascii name) ["pre"; "textarea"; "script"; "style"]

(* Block elements that close a p tag *)
let p_closing_elements = [
  "address"; "article"; "aside"; "blockquote"; "datagrid"; "dialog"; "dir";
  "div"; "dl"; "fieldset"; "footer"; "form"; "h1"; "h2"; "h3"; "h4"; "h5"; "h6";
  "header"; "hgroup"; "hr"; "main"; "menu"; "nav"; "ol"; "p"; "pre"; "section";
  "table"; "ul"
]

let is_p_closing_element name =
  List.mem (String.lowercase_ascii name) p_closing_elements

(* Collapse runs of whitespace to single space *)
let collapse_whitespace text =
  let len = String.length text in
  let buf = Buffer.create len in
  let in_whitespace = ref false in
  for i = 0 to len - 1 do
    let c = text.[i] in
    if c = '\t' || c = '\r' || c = '\n' || c = '\x0C' || c = ' ' then begin
      if not !in_whitespace then begin
        Buffer.add_char buf ' ';
        in_whitespace := true
      end
    end else begin
      Buffer.add_char buf c;
      in_whitespace := false
    end
  done;
  Buffer.contents buf

(* Token types for stream-based serialization *)
type token_type =
  | StartTag of string * (string * string) list  (* name, attrs *)
  | EndTag of string  (* name *)
  | EmptyTag of string * (string * string) list  (* name, attrs *)
  | TextNode of string
  | CommentNode of string
  | DoctypeNode of Html5rw.Dom.node

type token_info = {
  token : token_type option;
}

let build_token_info token =
  let arr = json_array token in
  match arr with
  | [] -> { token = None }
  | type_json :: rest ->
    let token_type_str = json_string type_json in
    match token_type_str, rest with
    | "StartTag", [_ns_json; name_json; attrs_json] ->
      let name = json_string name_json in
      let attrs = parse_attrs attrs_json in
      { token = Some (StartTag (name, attrs)) }

    | "StartTag", [name_json; attrs_json] ->
      let name = json_string name_json in
      let attrs = parse_attrs attrs_json in
      { token = Some (StartTag (name, attrs)) }

    | "EmptyTag", [name_json; attrs_json] ->
      let name = json_string name_json in
      let attrs = parse_attrs attrs_json in
      { token = Some (EmptyTag (name, attrs)) }

    | "EndTag", [_ns_json; name_json] ->
      let name = json_string name_json in
      { token = Some (EndTag name) }

    | "EndTag", [name_json] ->
      let name = json_string name_json in
      { token = Some (EndTag name) }

    | "Characters", [text_json] ->
      let text = json_string text_json in
      { token = Some (TextNode text) }

    | "Comment", [text_json] ->
      let text = json_string text_json in
      { token = Some (CommentNode text) }

    | "Doctype", [name_json] ->
      let name = json_string name_json in
      let node = Html5rw.Dom.create_doctype ~name () in
      { token = Some (DoctypeNode node) }

    | "Doctype", [name_json; public_json] ->
      let name = json_string name_json in
      let public_id = json_string_opt public_json in
      let node = match public_id with
       | Some pub -> Html5rw.Dom.create_doctype ~name ~public_id:pub ()
       | None -> Html5rw.Dom.create_doctype ~name ()
      in
      { token = Some (DoctypeNode node) }

    | "Doctype", [name_json; public_json; system_json] ->
      let name = json_string name_json in
      let public_id = json_string_opt public_json in
      let system_id = json_string_opt system_json in
      let node = match public_id, system_id with
       | Some pub, Some sys -> Html5rw.Dom.create_doctype ~name ~public_id:pub ~system_id:sys ()
       | Some pub, None -> Html5rw.Dom.create_doctype ~name ~public_id:pub ()
       | None, Some sys -> Html5rw.Dom.create_doctype ~name ~system_id:sys ()
       | None, None -> Html5rw.Dom.create_doctype ~name ()
      in
      { token = Some (DoctypeNode node) }

    | _ -> { token = None }

(* Serialize a single node to HTML with options *)
let escape_text text =
  let buf = Buffer.create (String.length text) in
  String.iter (fun c ->
    match c with
    | '&' -> Buffer.add_string buf "&amp;"
    | '<' -> Buffer.add_string buf "&lt;"
    | '>' -> Buffer.add_string buf "&gt;"
    | c -> Buffer.add_char buf c
  ) text;
  Buffer.contents buf

let can_unquote_attr_value value =
  if String.length value = 0 then false
  else
    let valid = ref true in
    String.iter (fun c ->
      if c = '>' || c = '"' || c = '\'' || c = '=' || c = '`' ||
         c = ' ' || c = '\t' || c = '\n' || c = '\x0C' || c = '\r' then
        valid := false
    ) value;
    !valid

let escape_attr_value value quote_char escape_lt =
  let buf = Buffer.create (String.length value) in
  String.iter (fun c ->
    match c with
    | '&' -> Buffer.add_string buf "&amp;"
    | '"' when quote_char = '"' -> Buffer.add_string buf "&quot;"
    | '\'' when quote_char = '\'' -> Buffer.add_string buf "&#39;"
    | '<' when escape_lt -> Buffer.add_string buf "&lt;"
    | c -> Buffer.add_char buf c
  ) value;
  Buffer.contents buf

let serialize_node opts ~in_raw_text node =
  match node.Dom.name with
  | "#text" ->
    if in_raw_text && not opts.escape_rcdata then
      node.Dom.data
    else
      escape_text node.Dom.data
  | "#comment" ->
    "<!--" ^ node.Dom.data ^ "-->"
  | "!doctype" ->
    let buf = Buffer.create 64 in
    Buffer.add_string buf "<!DOCTYPE ";
    (match node.Dom.doctype with
     | Some dt ->
       Buffer.add_string buf (Option.value ~default:"html" dt.Dom.name);
       (match dt.Dom.public_id with
        | Some pub when pub <> "" ->
          Buffer.add_string buf " PUBLIC \"";
          Buffer.add_string buf pub;
          Buffer.add_char buf '"';
          (match dt.Dom.system_id with
           | Some sys ->
             Buffer.add_string buf " \"";
             Buffer.add_string buf sys;
             Buffer.add_char buf '"'
           | None -> ())
        | _ ->
          match dt.Dom.system_id with
          | Some sys when sys <> "" ->
            Buffer.add_string buf " SYSTEM \"";
            Buffer.add_string buf sys;
            Buffer.add_char buf '"'
          | _ -> ())
     | None -> Buffer.add_string buf "html");
    Buffer.add_char buf '>';
    Buffer.contents buf
  | _ -> failwith "serialize_node called with element"

let choose_quote value default_quote explicit =
  (* If quote_char was explicitly set, always use it *)
  if explicit then default_quote
  else
    (* Otherwise, if value contains the default quote but not the other, use the other *)
    let has_double = String.contains value '"' in
    let has_single = String.contains value '\'' in
    if has_double && not has_single then '\''
    else if has_single && not has_double then '"'
    else default_quote

(* Serialize an element tag (start tag) *)
let serialize_start_tag opts ~is_empty_tag name attrs =
  let buf = Buffer.create 64 in
  Buffer.add_char buf '<';
  Buffer.add_string buf name;
  (* Sort attributes alphabetically for consistent output *)
  let sorted_attrs = List.sort (fun (a, _) (b, _) -> String.compare a b) attrs in
  List.iter (fun (key, value) ->
    Buffer.add_char buf ' ';
    Buffer.add_string buf key;
    let should_minimize =
      opts.minimize_boolean_attributes &&
      String.lowercase_ascii key = String.lowercase_ascii value
    in
    if should_minimize then
      ()
    else if String.length value = 0 then begin
      Buffer.add_char buf '=';
      Buffer.add_char buf opts.quote_char;
      Buffer.add_char buf opts.quote_char
    end else if can_unquote_attr_value value then begin
      Buffer.add_char buf '=';
      Buffer.add_string buf value
    end else begin
      let quote = choose_quote value opts.quote_char opts.quote_char_explicit in
      Buffer.add_char buf '=';
      Buffer.add_char buf quote;
      Buffer.add_string buf (escape_attr_value value quote opts.escape_lt_in_attrs);
      Buffer.add_char buf quote
    end
  ) sorted_attrs;
  if opts.use_trailing_solidus && (is_empty_tag || is_void_element name) then
    Buffer.add_string buf " /";
  Buffer.add_char buf '>';
  Buffer.contents buf

(* Check if text starts with ASCII whitespace *)
let text_starts_with_space text =
  String.length text > 0 &&
  let c = text.[0] in
  c = '\t' || c = '\n' || c = '\x0C' || c = '\r' || c = ' '

(* Optional tag omission helpers *)
type next_token =
  | NTComment
  | NTSpace  (* Text starting with space *)
  | NTText   (* Text not starting with space *)
  | NTStartTag of string
  | NTEmptyTag of string
  | NTEndTag of string
  | NTEOF

let classify_next tokens idx =
  if idx >= Array.length tokens then NTEOF
  else match tokens.(idx).token with
    | None -> NTEOF
    | Some (CommentNode _) -> NTComment
    | Some (TextNode text) ->
      if text_starts_with_space text then NTSpace else NTText
    | Some (StartTag (name, _)) -> NTStartTag (String.lowercase_ascii name)
    | Some (EmptyTag (name, _)) -> NTEmptyTag (String.lowercase_ascii name)
    | Some (EndTag name) -> NTEndTag (String.lowercase_ascii name)
    | Some (DoctypeNode _) -> NTEOF  (* Treat doctype as if nothing follows *)

(* Should we omit a start tag? *)
let should_omit_start_tag opts name attrs next =
  if not opts.omit_optional_tags then false
  else
    let name = String.lowercase_ascii name in
    match name, next with
    (* html start tag: omit if not followed by comment or space, AND has no attributes *)
    | "html", NTComment -> false
    | "html", NTSpace -> false
    | "html", _ -> attrs = []  (* only omit if no attributes *)
    (* head start tag: omit if followed by element (start/empty tag) *)
    | "head", NTStartTag _ -> true
    | "head", NTEmptyTag _ -> true
    | "head", NTEndTag "head" -> true  (* empty head *)
    | "head", NTEOF -> true
    | "head", _ -> false
    (* body start tag: omit if not followed by comment or space, AND has no attributes *)
    | "body", NTComment -> false
    | "body", NTSpace -> false
    | "body", _ -> attrs = []  (* only omit if no attributes *)
    (* colgroup start tag: omit if followed by col element *)
    | "colgroup", NTStartTag "col" -> true
    | "colgroup", NTEmptyTag "col" -> true
    | "colgroup", _ -> false
    (* tbody start tag: omit if first child is tr *)
    | "tbody", NTStartTag "tr" -> true
    | "tbody", _ -> false
    | _ -> false

(* Should we omit an end tag? *)
let should_omit_end_tag opts name next =
  if not opts.omit_optional_tags then false
  else
    let name = String.lowercase_ascii name in
    match name, next with
    (* html end tag: omit if not followed by comment or space *)
    | "html", NTComment -> false
    | "html", NTSpace -> false
    | "html", _ -> true
    (* head end tag: omit if not followed by comment or space *)
    | "head", NTComment -> false
    | "head", NTSpace -> false
    | "head", _ -> true
    (* body end tag: omit if not followed by comment or space *)
    | "body", NTComment -> false
    | "body", NTSpace -> false
    | "body", _ -> true
    (* li end tag: omit if followed by li start tag or parent end tag *)
    | "li", NTStartTag "li" -> true
    | "li", NTEndTag _ -> true
    | "li", NTEOF -> true
    | "li", _ -> false
    (* dt end tag: omit if followed by dt or dd start tag (NOT end tag or EOF!) *)
    | "dt", NTStartTag "dt" -> true
    | "dt", NTStartTag "dd" -> true
    | "dt", _ -> false
    (* dd end tag: omit if followed by dd or dt start tag, or end tag, or EOF *)
    | "dd", NTStartTag "dd" -> true
    | "dd", NTStartTag "dt" -> true
    | "dd", NTEndTag _ -> true
    | "dd", NTEOF -> true
    | "dd", _ -> false
    (* p end tag: omit if followed by block element (start or empty tag), end tag, or EOF *)
    | "p", NTStartTag next_name when is_p_closing_element next_name -> true
    | "p", NTEmptyTag next_name when is_p_closing_element next_name -> true
    | "p", NTEndTag _ -> true
    | "p", NTEOF -> true
    | "p", _ -> false
    (* optgroup end tag: omit if followed by optgroup start tag, end tag, or EOF *)
    | "optgroup", NTStartTag "optgroup" -> true
    | "optgroup", NTEndTag _ -> true
    | "optgroup", NTEOF -> true
    | "optgroup", _ -> false
    (* option end tag: omit if followed by option/optgroup start tag, end tag, or EOF *)
    | "option", NTStartTag "option" -> true
    | "option", NTStartTag "optgroup" -> true
    | "option", NTEndTag _ -> true
    | "option", NTEOF -> true
    | "option", _ -> false
    (* colgroup end tag: omit if not followed by comment, space, or another colgroup *)
    | "colgroup", NTComment -> false
    | "colgroup", NTSpace -> false
    | "colgroup", NTStartTag "colgroup" -> false  (* keep end tag when another colgroup follows *)
    | "colgroup", _ -> true
    (* thead end tag: omit if followed by tbody or tfoot start tag *)
    | "thead", NTStartTag "tbody" -> true
    | "thead", NTStartTag "tfoot" -> true
    | "thead", _ -> false
    (* tbody end tag: omit if followed by tbody/tfoot start tag, end tag, or EOF *)
    | "tbody", NTStartTag "tbody" -> true
    | "tbody", NTStartTag "tfoot" -> true
    | "tbody", NTEndTag _ -> true
    | "tbody", NTEOF -> true
    | "tbody", _ -> false
    (* tfoot end tag: omit if followed by tbody start tag, end tag, or EOF *)
    | "tfoot", NTStartTag "tbody" -> true
    | "tfoot", NTEndTag _ -> true
    | "tfoot", NTEOF -> true
    | "tfoot", _ -> false
    (* tr end tag: omit if followed by tr start tag, end tag, or EOF *)
    | "tr", NTStartTag "tr" -> true
    | "tr", NTEndTag _ -> true
    | "tr", NTEOF -> true
    | "tr", _ -> false
    (* td end tag: omit if followed by td/th start tag, end tag, or EOF *)
    | "td", NTStartTag "td" -> true
    | "td", NTStartTag "th" -> true
    | "td", NTEndTag _ -> true
    | "td", NTEOF -> true
    | "td", _ -> false
    (* th end tag: omit if followed by th/td start tag, end tag, or EOF *)
    | "th", NTStartTag "th" -> true
    | "th", NTStartTag "td" -> true
    | "th", NTEndTag _ -> true
    | "th", NTEOF -> true
    | "th", _ -> false
    | _ -> false

(* Run a single test *)
let run_test test =
  try
    (* Build token infos from input *)
    let token_infos = Array.of_list (List.map build_token_info test.input) in
    let num_tokens = Array.length token_infos in

    (* Handle inject_meta_charset option *)
    let inject_meta = test.options.inject_meta_charset in
    let encoding = test.options.encoding in

    (* Serialize with context tracking *)
    let buf = Buffer.create 256 in
    let in_raw_text = ref false in
    let preserve_whitespace = ref false in
    let element_stack : string list ref = ref [] in
    let in_head = ref false in
    let meta_charset_injected = ref false in
    let prev_was_section_end = ref false in  (* Track if prev token was thead/tbody/tfoot end *)

    for i = 0 to num_tokens - 1 do
      let info = token_infos.(i) in
      let next = classify_next token_infos (i + 1) in

      match info.token with
      | None -> ()

      | Some (StartTag (name, attrs)) ->
        let name_lower = String.lowercase_ascii name in

        (* Track head element *)
        if name_lower = "head" then in_head := true;

        (* For inject_meta_charset, we need to check if there's any charset meta coming up *)
        (* If yes, don't inject at head start; if no, inject at head start *)
        let should_inject_at_head =
          if not inject_meta || name_lower <> "head" then false
          else match encoding with
            | None -> false
            | Some _ ->
              (* Look ahead to see if there's a charset meta or http-equiv content-type *)
              let has_charset_meta = ref false in
              for j = i + 1 to num_tokens - 1 do
                match token_infos.(j).token with
                | Some (EmptyTag (n, a)) when String.lowercase_ascii n = "meta" ->
                  let has_charset = List.exists (fun (k, _) -> String.lowercase_ascii k = "charset") a in
                  let has_http_equiv_ct = List.exists (fun (k, v) ->
                    String.lowercase_ascii k = "http-equiv" && String.lowercase_ascii v = "content-type") a in
                  if has_charset || has_http_equiv_ct then has_charset_meta := true
                | Some (EndTag n) when String.lowercase_ascii n = "head" -> ()
                | _ -> ()
              done;
              not !has_charset_meta
        in

        (* Special case: tbody start tag cannot be omitted if preceded by section end tag *)
        let can_omit_start =
          if name_lower = "tbody" && !prev_was_section_end then false
          else should_omit_start_tag test.options name attrs next
        in
        prev_was_section_end := false;  (* Reset for next iteration *)

        if should_inject_at_head then begin
          meta_charset_injected := true;
          (* Don't output head start tag if we should omit it *)
          if not can_omit_start then
            Buffer.add_string buf (serialize_start_tag test.options ~is_empty_tag:false name attrs);
          Buffer.add_string buf (Printf.sprintf "<meta charset=%s>" (Option.get encoding));
          element_stack := name :: !element_stack;
          if is_raw_text_element name then in_raw_text := true;
          if is_whitespace_preserving_element name then preserve_whitespace := true
        end else if not can_omit_start then begin
          Buffer.add_string buf (serialize_start_tag test.options ~is_empty_tag:false name attrs);
          element_stack := name :: !element_stack;
          if is_raw_text_element name then in_raw_text := true;
          if is_whitespace_preserving_element name then preserve_whitespace := true
        end else begin
          element_stack := name :: !element_stack;
          if is_raw_text_element name then in_raw_text := true;
          if is_whitespace_preserving_element name then preserve_whitespace := true
        end

      | Some (EmptyTag (name, attrs)) ->
        let name_lower = String.lowercase_ascii name in
        prev_was_section_end := false;  (* Reset for next iteration *)

        (* Handle meta charset replacement *)
        if inject_meta && !in_head && name_lower = "meta" then begin
          let has_charset = List.exists (fun (k, _) -> String.lowercase_ascii k = "charset") attrs in
          let has_http_equiv_ct =
            List.exists (fun (k, v) ->
              String.lowercase_ascii k = "http-equiv" &&
              String.lowercase_ascii v = "content-type"
            ) attrs
          in
          if has_charset then begin
            (* Replace charset value *)
            match encoding with
            | Some enc ->
              Buffer.add_string buf (Printf.sprintf "<meta charset=%s>" enc)
            | None -> ()  (* No encoding, skip the meta tag *)
          end else if has_http_equiv_ct then begin
            (* Replace charset in content value *)
            match encoding with
            | Some enc ->
              let new_attrs = List.map (fun (k, v) ->
                if String.lowercase_ascii k = "content" then
                  let new_content = Printf.sprintf "text/html; charset=%s" enc in
                  (k, new_content)
                else (k, v)
              ) attrs in
              Buffer.add_string buf (serialize_start_tag test.options ~is_empty_tag:true name new_attrs)
            | None ->
              Buffer.add_string buf (serialize_start_tag test.options ~is_empty_tag:true name attrs)
          end else begin
            (* Regular meta tag, output as normal *)
            Buffer.add_string buf (serialize_start_tag test.options ~is_empty_tag:true name attrs)
          end
        end else
          Buffer.add_string buf (serialize_start_tag test.options ~is_empty_tag:true name attrs)

      | Some (EndTag name) ->
        let name_lower = String.lowercase_ascii name in

        (* Track head element *)
        if name_lower = "head" then in_head := false;

        (* Pop from element stack *)
        (match !element_stack with
         | top :: rest when String.lowercase_ascii top = name_lower ->
           element_stack := rest;
           if is_raw_text_element name then in_raw_text := false;
           if is_whitespace_preserving_element name then preserve_whitespace := false
         | _ -> ());

        let is_section_end = List.mem name_lower ["thead"; "tbody"; "tfoot"] in
        let omit = should_omit_end_tag test.options name next in

        if not omit then begin
          Buffer.add_string buf "</";
          Buffer.add_string buf name;
          Buffer.add_char buf '>'
        end;

        (* Track if we omitted a section end tag - next tbody can't be omitted *)
        prev_was_section_end := is_section_end && omit

      | Some (TextNode text) ->
        prev_was_section_end := false;
        let processed_text =
          if !in_raw_text && not test.options.escape_rcdata then
            text
          else if test.options.strip_whitespace && not !preserve_whitespace then
            escape_text (collapse_whitespace text)
          else
            escape_text text
        in
        Buffer.add_string buf processed_text

      | Some (CommentNode text) ->
        prev_was_section_end := false;
        Buffer.add_string buf "<!--";
        Buffer.add_string buf text;
        Buffer.add_string buf "-->"

      | Some (DoctypeNode node) ->
        prev_was_section_end := false;
        Buffer.add_string buf (serialize_node test.options ~in_raw_text:false node)
    done;

    let serialized = Buffer.contents buf in

    (* Check if it matches any expected output *)
    let matches = List.exists (fun exp -> serialized = exp) test.expected in

    (matches, serialized, test.expected)
  with e ->
    (false, Printf.sprintf "EXCEPTION: %s" (Printexc.to_string e), test.expected)

(* Run all tests in a file *)
let format_options opts =
  let parts = [] in
  let parts = if opts.quote_char_explicit then
    Printf.sprintf "quote_char='%c'" opts.quote_char :: parts else parts in
  let parts = if not opts.minimize_boolean_attributes then
    "minimize_bool=false" :: parts else parts in
  let parts = if opts.use_trailing_solidus then
    "trailing_solidus=true" :: parts else parts in
  let parts = if opts.escape_lt_in_attrs then
    "escape_lt=true" :: parts else parts in
  let parts = if opts.escape_rcdata then
    "escape_rcdata=true" :: parts else parts in
  let parts = if opts.strip_whitespace then
    "strip_ws=true" :: parts else parts in
  let parts = if opts.inject_meta_charset then
    "inject_charset=true" :: parts else parts in
  let parts = if not opts.omit_optional_tags then
    "omit_tags=false" :: parts else parts in
  if parts = [] then "(defaults)" else String.concat ", " (List.rev parts)

let run_file path =
  let content =
    let ic = open_in path in
    let n = in_channel_length ic in
    let s = really_input_string ic n in
    close_in ic;
    s
  in

  let json = match Jsont_bytesrw.decode_string Jsont.json content with
    | Ok j -> j
    | Error e -> failwith (Printf.sprintf "JSON parse error in %s: %s" path e)
  in

  let obj = json_object json in
  let tests_json = match json_mem "tests" obj with
    | Some t -> json_array t
    | None -> []
  in

  let filename = Filename.basename path in
  let passed = ref 0 in
  let failed = ref 0 in
  let results = ref [] in

  List.iteri (fun i test_json ->
    try
      let test = parse_test_case test_json in
      let (success, actual, expected) = run_test test in

      let result : Report.test_result = {
        test_num = i + 1;
        description = test.description;
        input = String.concat "\n" (List.map (fun tok ->
          (* Simplified token representation *)
          match tok with
          | Jsont.Array (arr, _) ->
            (match arr with
             | Jsont.String (ty, _) :: rest ->
               Printf.sprintf "%s: %s" ty (String.concat ", " (List.map (function
                 | Jsont.String (s, _) -> Printf.sprintf "%S" s
                 | Jsont.Object _ -> "{...}"
                 | Jsont.Null _ -> "null"
                 | _ -> "?"
               ) rest))
             | _ -> "?")
          | _ -> "?"
        ) test.input);
        expected = String.concat " | " expected;
        actual;
        success;
        details = [
          ("Options", format_options test.options);
          ("Expected Variants", string_of_int (List.length expected));
        ];
        raw_test_data = Some test.raw_json;
      } in
      results := result :: !results;

      if success then incr passed else incr failed
    with e ->
      incr failed;
      let result : Report.test_result = {
        test_num = i + 1;
        description = Printf.sprintf "Test %d" (i + 1);
        input = "";
        expected = "";
        actual = Printf.sprintf "EXCEPTION: %s" (Printexc.to_string e);
        success = false;
        details = [];
        raw_test_data = Some (json_to_string test_json);
      } in
      results := result :: !results;
      Printf.eprintf "Exception parsing test %d: %s\n" (i + 1) (Printexc.to_string e)
  ) tests_json;

  let file_result : Report.file_result = {
    filename;
    test_type = "Serializer";
    passed_count = !passed;
    failed_count = !failed;
    tests = List.rev !results;
  } in
  (file_result, !passed, !failed)

let () =
  let test_dir = Sys.argv.(1) in
  let files = Sys.readdir test_dir |> Array.to_list in
  let test_files = List.filter (fun f -> Filename.check_suffix f ".test") files in

  let total_passed = ref 0 in
  let total_failed = ref 0 in
  let file_results = ref [] in

  List.iter (fun file ->
    let path = Filename.concat test_dir file in
    let (file_result, passed, failed) = run_file path in
    total_passed := !total_passed + passed;
    total_failed := !total_failed + failed;
    file_results := file_result :: !file_results;
    Printf.printf "%s: %d passed, %d failed\n" file passed failed
  ) (List.sort String.compare test_files);

  Printf.printf "\n=== Summary ===\n";
  Printf.printf "Total: %d passed, %d failed\n" !total_passed !total_failed;

  (* Generate HTML report *)
  let report : Report.report = {
    title = "HTML5 Serializer Tests";
    test_type = "serializer";
    description = "These tests validate the HTML serialization algorithm for converting DOM trees back to HTML text. \
                   Each test provides a sequence of tokens (start tags, end tags, text, comments, doctypes) and one \
                   or more valid serialized outputs. Tests cover attribute quoting, boolean attribute minimization, \
                   self-closing tag syntax (trailing solidus), entity escaping, whitespace handling, meta charset \
                   injection, and optional tag omission rules as specified in the HTML Standard. Multiple expected \
                   outputs allow for valid variations in serialization style.";
    files = List.rev !file_results;
    total_passed = !total_passed;
    total_failed = !total_failed;
    match_quality = None;
    test_type_breakdown = None;
    strictness_mode = None;
    run_timestamp = None;
  } in
  Report.generate_report report "test_serializer_report.html";

  exit (if !total_failed > 0 then 1 else 0)