OCaml HTML5 parser/serialiser based on Python's JustHTML

html tests

+1
.gitignore
··· 15 15 16 16 # Opam local switch 17 17 _opam/ 18 + *.html
+17 -4
test/dune
··· 1 + (library 2 + (name test_report) 3 + (modules test_report)) 4 + 5 + (executable 6 + (name test_all) 7 + (modules test_all) 8 + (libraries bytesrw html5rw.parser html5rw.dom html5rw.tokenizer html5rw.encoding jsont jsont.bytesrw test_report)) 9 + 1 10 (executable 2 11 (name test_html5lib) 3 - (libraries bytesrw html5rw.parser html5rw.dom)) 12 + (modules test_html5lib) 13 + (libraries bytesrw html5rw.parser html5rw.dom test_report)) 4 14 5 15 (rule 6 16 (alias runtest) ··· 11 21 12 22 (executable 13 23 (name test_tokenizer) 14 - (libraries bytesrw html5rw.tokenizer jsont jsont.bytesrw)) 24 + (modules test_tokenizer) 25 + (libraries bytesrw html5rw.tokenizer jsont jsont.bytesrw test_report)) 15 26 16 27 (rule 17 28 (alias runtest) ··· 22 33 23 34 (executable 24 35 (name test_encoding) 25 - (libraries html5rw.encoding)) 36 + (modules test_encoding) 37 + (libraries html5rw.encoding test_report)) 26 38 27 39 (rule 28 40 (alias runtest) ··· 33 45 34 46 (executable 35 47 (name test_serializer) 36 - (libraries html5rw.dom jsont jsont.bytesrw)) 48 + (modules test_serializer) 49 + (libraries html5rw.dom jsont jsont.bytesrw test_report)) 37 50 38 51 (rule 39 52 (alias runtest)
+670
test/test_all.ml
··· 1 + (* Combined test runner for all html5lib-tests *) 2 + (* Generates a single standalone HTML report *) 3 + 4 + open Bytesrw 5 + 6 + module Report = Test_report 7 + 8 + (* ============================================================ *) 9 + (* Tree Construction Tests *) 10 + (* ============================================================ *) 11 + 12 + module TreeConstruction = struct 13 + module Parser = Html5rw_parser 14 + module Dom = Html5rw_dom 15 + 16 + type test_case = { 17 + input : string; 18 + expected_tree : string; 19 + expected_errors : string list; 20 + script_on : bool; 21 + fragment_context : string option; 22 + raw_lines : string; 23 + } 24 + 25 + let parse_test_case lines = 26 + let raw_lines = String.concat "\n" lines in 27 + let rec parse acc = function 28 + | [] -> acc 29 + | line :: rest when String.length line > 0 && line.[0] = '#' -> 30 + let section = String.trim line in 31 + let content, remaining = collect_section rest in 32 + parse ((section, content) :: acc) remaining 33 + | _ :: rest -> parse acc rest 34 + and collect_section lines = 35 + let rec loop acc = function 36 + | [] -> (List.rev acc, []) 37 + | line :: rest when String.length line > 0 && line.[0] = '#' -> 38 + (List.rev acc, line :: rest) 39 + | line :: rest -> loop (line :: acc) rest 40 + in 41 + loop [] lines 42 + in 43 + let sections = parse [] lines in 44 + let get_section name = 45 + match List.assoc_opt name sections with 46 + | Some lines -> String.concat "\n" lines 47 + | None -> "" 48 + in 49 + let data = get_section "#data" in 50 + let document = get_section "#document" in 51 + let errors_text = get_section "#errors" in 52 + let errors = 53 + String.split_on_char '\n' errors_text 54 + |> List.filter (fun s -> String.trim s <> "") 55 + in 56 + let script_on = List.mem_assoc "#script-on" sections in 57 + let fragment = 58 + if List.mem_assoc "#document-fragment" sections then 59 + Some (get_section "#document-fragment" |> String.trim) 60 + else None 61 + in 62 + { input = data; expected_tree = document; expected_errors = errors; 63 + script_on; fragment_context = fragment; raw_lines } 64 + 65 + let parse_dat_file content = 66 + let lines = String.split_on_char '\n' content in 67 + let rec split_tests current acc = function 68 + | [] -> 69 + if current = [] then List.rev acc 70 + else List.rev (List.rev current :: acc) 71 + | "" :: "#data" :: rest -> 72 + let new_acc = if current = [] then acc else (List.rev current :: acc) in 73 + split_tests ["#data"] new_acc rest 74 + | line :: rest -> 75 + split_tests (line :: current) acc rest 76 + in 77 + let test_groups = split_tests [] [] lines in 78 + List.filter_map (fun lines -> 79 + if List.exists (fun l -> l = "#data") lines then 80 + Some (parse_test_case lines) 81 + else None 82 + ) test_groups 83 + 84 + let strip_tree_prefix s = 85 + let lines = String.split_on_char '\n' s in 86 + let stripped = List.filter_map (fun line -> 87 + if String.length line >= 2 && String.sub line 0 2 = "| " then 88 + Some (String.sub line 2 (String.length line - 2)) 89 + else if String.trim line = "" then None 90 + else Some line 91 + ) lines in 92 + String.concat "\n" stripped 93 + 94 + let normalize_tree s = 95 + let lines = String.split_on_char '\n' s in 96 + let non_empty = List.filter (fun l -> String.trim l <> "") lines in 97 + String.concat "\n" non_empty 98 + 99 + let run_test test = 100 + try 101 + let result = 102 + match test.fragment_context with 103 + | Some ctx_str -> 104 + let (namespace, tag_name) = 105 + match String.split_on_char ' ' ctx_str with 106 + | [ns; tag] when ns = "svg" -> (Some "svg", tag) 107 + | [ns; tag] when ns = "math" -> (Some "mathml", tag) 108 + | [tag] -> (None, tag) 109 + | _ -> (None, ctx_str) 110 + in 111 + let context = Parser.make_fragment_context ~tag_name ~namespace () in 112 + let reader = Bytes.Reader.of_string test.input in 113 + Parser.parse ~collect_errors:true ~fragment_context:context reader 114 + | None -> 115 + let reader = Bytes.Reader.of_string test.input in 116 + Parser.parse ~collect_errors:true reader 117 + in 118 + let actual_tree = Dom.to_test_format (Parser.root result) in 119 + let expected = normalize_tree (strip_tree_prefix test.expected_tree) in 120 + let actual = normalize_tree (strip_tree_prefix actual_tree) in 121 + let error_count = List.length (Parser.errors result) in 122 + let expected_error_count = List.length test.expected_errors in 123 + (expected = actual, expected, actual, error_count, expected_error_count) 124 + with e -> 125 + let expected = normalize_tree (strip_tree_prefix test.expected_tree) in 126 + (false, expected, Printf.sprintf "EXCEPTION: %s" (Printexc.to_string e), 0, 0) 127 + 128 + let run_file path = 129 + let ic = open_in path in 130 + let content = really_input_string ic (in_channel_length ic) in 131 + close_in ic; 132 + let tests = parse_dat_file content in 133 + let filename = Filename.basename path in 134 + let passed = ref 0 in 135 + let failed = ref 0 in 136 + let results = ref [] in 137 + List.iteri (fun i test -> 138 + if test.script_on then () 139 + else begin 140 + let (success, expected, actual, actual_error_count, expected_error_count) = run_test test in 141 + let description = 142 + let input_preview = String.sub test.input 0 (min 60 (String.length test.input)) in 143 + if test.fragment_context <> None then 144 + Printf.sprintf "Fragment (%s): %s" (Option.get test.fragment_context) input_preview 145 + else input_preview 146 + in 147 + let result : Report.test_result = { 148 + test_num = i + 1; description; input = test.input; expected; actual; success; 149 + details = [ 150 + ("Fragment Context", Option.value test.fragment_context ~default:"(none)"); 151 + ("Expected Errors", string_of_int expected_error_count); 152 + ("Actual Errors", string_of_int actual_error_count); 153 + ]; 154 + raw_test_data = Some test.raw_lines; 155 + } in 156 + results := result :: !results; 157 + if success then incr passed else incr failed 158 + end 159 + ) tests; 160 + let file_result : Report.file_result = { 161 + filename; test_type = "Tree Construction"; 162 + passed_count = !passed; failed_count = !failed; 163 + tests = List.rev !results; 164 + } in 165 + (file_result, !passed, !failed) 166 + 167 + let run_dir test_dir = 168 + let files = Sys.readdir test_dir |> Array.to_list in 169 + let dat_files = List.filter (fun f -> 170 + Filename.check_suffix f ".dat" && not (String.contains f '/') 171 + ) files in 172 + let total_passed = ref 0 in 173 + let total_failed = ref 0 in 174 + let file_results = ref [] in 175 + List.iter (fun file -> 176 + let path = Filename.concat test_dir file in 177 + if Sys.is_directory path then () else begin 178 + let (file_result, passed, failed) = run_file path in 179 + total_passed := !total_passed + passed; 180 + total_failed := !total_failed + failed; 181 + file_results := file_result :: !file_results; 182 + Printf.printf " %s: %d passed, %d failed\n" file passed failed 183 + end 184 + ) (List.sort String.compare dat_files); 185 + (List.rev !file_results, !total_passed, !total_failed) 186 + end 187 + 188 + (* ============================================================ *) 189 + (* Tokenizer Tests *) 190 + (* ============================================================ *) 191 + 192 + module Tokenizer_tests = struct 193 + module Tokenizer = Html5rw_tokenizer 194 + 195 + module TokenCollector = struct 196 + type t = { mutable tokens : Tokenizer.Token.t list } 197 + let create () = { tokens = [] } 198 + let process t token = t.tokens <- token :: t.tokens; `Continue 199 + let adjusted_current_node_in_html_namespace _ = true 200 + let get_tokens t = List.rev t.tokens 201 + end 202 + 203 + type test_case = { 204 + description : string; 205 + input : string; 206 + output : Jsont.json list; 207 + expected_error_count : int; 208 + initial_states : string list; 209 + last_start_tag : string option; 210 + double_escaped : bool; 211 + xml_mode : bool; 212 + raw_json : string; 213 + } 214 + 215 + let unescape_double s = 216 + let b = Buffer.create (String.length s) in 217 + let i = ref 0 in 218 + while !i < String.length s do 219 + if !i + 1 < String.length s && s.[!i] = '\\' then begin 220 + match s.[!i + 1] with 221 + | 'u' when !i + 5 < String.length s -> 222 + let hex = String.sub s (!i + 2) 4 in 223 + (try 224 + let code = int_of_string ("0x" ^ hex) in 225 + if code < 128 then Buffer.add_char b (Char.chr code) 226 + else begin 227 + if code < 0x800 then begin 228 + Buffer.add_char b (Char.chr (0xC0 lor (code lsr 6))); 229 + Buffer.add_char b (Char.chr (0x80 lor (code land 0x3F))) 230 + end else begin 231 + Buffer.add_char b (Char.chr (0xE0 lor (code lsr 12))); 232 + Buffer.add_char b (Char.chr (0x80 lor ((code lsr 6) land 0x3F))); 233 + Buffer.add_char b (Char.chr (0x80 lor (code land 0x3F))) 234 + end 235 + end; 236 + i := !i + 6 237 + with _ -> Buffer.add_char b s.[!i]; incr i) 238 + | 'n' -> Buffer.add_char b '\n'; i := !i + 2 239 + | 'r' -> Buffer.add_char b '\r'; i := !i + 2 240 + | 't' -> Buffer.add_char b '\t'; i := !i + 2 241 + | '\\' -> Buffer.add_char b '\\'; i := !i + 2 242 + | _ -> Buffer.add_char b s.[!i]; incr i 243 + end else begin 244 + Buffer.add_char b s.[!i]; incr i 245 + end 246 + done; 247 + Buffer.contents b 248 + 249 + let json_string = function Jsont.String (s, _) -> s | _ -> failwith "Expected string" 250 + let json_bool = function Jsont.Bool (b, _) -> b | _ -> failwith "Expected bool" 251 + let json_array = function Jsont.Array (arr, _) -> arr | _ -> failwith "Expected array" 252 + let json_object = function Jsont.Object (obj, _) -> obj | _ -> failwith "Expected object" 253 + 254 + let json_mem name obj = 255 + match List.find_opt (fun ((n, _), _) -> n = name) obj with 256 + | Some (_, v) -> Some v | None -> None 257 + 258 + let json_mem_exn name obj = 259 + match json_mem name obj with Some v -> v | None -> failwith ("Missing member: " ^ name) 260 + 261 + let rec json_to_string = function 262 + | Jsont.Null _ -> "null" 263 + | Jsont.Bool (b, _) -> string_of_bool b 264 + | Jsont.Number (n, _) -> Printf.sprintf "%g" n 265 + | Jsont.String (s, _) -> Printf.sprintf "%S" s 266 + | Jsont.Array (arr, _) -> "[" ^ String.concat ", " (List.map json_to_string arr) ^ "]" 267 + | Jsont.Object (obj, _) -> 268 + "{" ^ String.concat ", " (List.map (fun ((n, _), v) -> Printf.sprintf "%S: %s" n (json_to_string v)) obj) ^ "}" 269 + 270 + let parse_test_case ~xml_mode json = 271 + let raw_json = json_to_string json in 272 + let obj = json_object json in 273 + let description = json_string (json_mem_exn "description" obj) in 274 + let input = json_string (json_mem_exn "input" obj) in 275 + let output = json_array (json_mem_exn "output" obj) in 276 + let expected_error_count = match json_mem "errors" obj with 277 + | Some e -> List.length (json_array e) | None -> 0 278 + in 279 + let initial_states = match json_mem "initialStates" obj with 280 + | Some s -> List.map json_string (json_array s) | None -> ["Data state"] 281 + in 282 + let last_start_tag = match json_mem "lastStartTag" obj with 283 + | Some s -> Some (json_string s) | None -> None 284 + in 285 + let double_escaped = match json_mem "doubleEscaped" obj with 286 + | Some b -> json_bool b | None -> false 287 + in 288 + { description; input; output; expected_error_count; initial_states; 289 + last_start_tag; double_escaped; xml_mode; raw_json } 290 + 291 + let state_of_string = function 292 + | "Data state" -> Tokenizer.State.Data 293 + | "PLAINTEXT state" -> Tokenizer.State.Plaintext 294 + | "RCDATA state" -> Tokenizer.State.Rcdata 295 + | "RAWTEXT state" -> Tokenizer.State.Rawtext 296 + | "Script data state" -> Tokenizer.State.Script_data 297 + | "CDATA section state" -> Tokenizer.State.Cdata_section 298 + | s -> failwith ("Unknown state: " ^ s) 299 + 300 + let token_to_test_json (tok : Tokenizer.Token.t) : Jsont.json list = 301 + let str s = Jsont.String (s, Jsont.Meta.none) in 302 + let arr l = Jsont.Array (l, Jsont.Meta.none) in 303 + match tok with 304 + | Tokenizer.Token.Doctype { name; public_id; system_id; force_quirks } -> 305 + let name_json = match name with Some n -> str n | None -> Jsont.Null ((), Jsont.Meta.none) in 306 + let public_json = match public_id with Some p -> str p | None -> Jsont.Null ((), Jsont.Meta.none) in 307 + let system_json = match system_id with Some s -> str s | None -> Jsont.Null ((), Jsont.Meta.none) in 308 + let correctness = Jsont.Bool (not force_quirks, Jsont.Meta.none) in 309 + [arr [str "DOCTYPE"; name_json; public_json; system_json; correctness]] 310 + | Tokenizer.Token.Tag { kind = Start; name; attrs; self_closing } -> 311 + let attrs_obj = Jsont.Object ( 312 + List.map (fun (n, v) -> ((n, Jsont.Meta.none), str v)) (List.rev attrs), 313 + Jsont.Meta.none 314 + ) in 315 + if self_closing then [arr [str "StartTag"; str name; attrs_obj; Jsont.Bool (true, Jsont.Meta.none)]] 316 + else [arr [str "StartTag"; str name; attrs_obj]] 317 + | Tokenizer.Token.Tag { kind = End; name; _ } -> [arr [str "EndTag"; str name]] 318 + | Tokenizer.Token.Comment data -> [arr [str "Comment"; str data]] 319 + | Tokenizer.Token.Character data -> [arr [str "Character"; str data]] 320 + | Tokenizer.Token.EOF -> [] 321 + 322 + let rec json_equal a b = 323 + match a, b with 324 + | Jsont.Null _, Jsont.Null _ -> true 325 + | Jsont.Bool (a, _), Jsont.Bool (b, _) -> a = b 326 + | Jsont.Number (a, _), Jsont.Number (b, _) -> a = b 327 + | Jsont.String (a, _), Jsont.String (b, _) -> a = b 328 + | Jsont.Array (a, _), Jsont.Array (b, _) -> 329 + List.length a = List.length b && List.for_all2 json_equal a b 330 + | Jsont.Object (a, _), Jsont.Object (b, _) -> 331 + let a_sorted = List.sort (fun ((n1, _), _) ((n2, _), _) -> String.compare n1 n2) a in 332 + let b_sorted = List.sort (fun ((n1, _), _) ((n2, _), _) -> String.compare n1 n2) b in 333 + List.length a_sorted = List.length b_sorted && 334 + List.for_all2 (fun ((n1, _), v1) ((n2, _), v2) -> n1 = n2 && json_equal v1 v2) a_sorted b_sorted 335 + | _ -> false 336 + 337 + let merge_character_tokens tokens = 338 + let rec loop acc = function 339 + | [] -> List.rev acc 340 + | Tokenizer.Token.Character s1 :: Tokenizer.Token.Character s2 :: rest -> 341 + loop acc (Tokenizer.Token.Character (s1 ^ s2) :: rest) 342 + | tok :: rest -> loop (tok :: acc) rest 343 + in loop [] tokens 344 + 345 + let run_test test initial_state = 346 + let input = if test.double_escaped then unescape_double test.input else test.input in 347 + let collector = TokenCollector.create () in 348 + let tokenizer = Tokenizer.create (module TokenCollector) collector ~collect_errors:true ~xml_mode:test.xml_mode () in 349 + Tokenizer.set_state tokenizer initial_state; 350 + (match test.last_start_tag with Some tag -> Tokenizer.set_last_start_tag tokenizer tag | None -> ()); 351 + let reader = Bytes.Reader.of_string input in 352 + Tokenizer.run tokenizer (module TokenCollector) reader; 353 + let tokens = merge_character_tokens (TokenCollector.get_tokens collector) in 354 + let actual_tokens = List.concat_map token_to_test_json tokens in 355 + let expected_output = if test.double_escaped then 356 + let rec unescape_json = function 357 + | Jsont.String (s, m) -> Jsont.String (unescape_double s, m) 358 + | Jsont.Array (arr, m) -> Jsont.Array (List.map unescape_json arr, m) 359 + | Jsont.Object (obj, m) -> Jsont.Object (List.map (fun (n, v) -> (n, unescape_json v)) obj, m) 360 + | other -> other 361 + in List.map unescape_json test.output 362 + else test.output in 363 + let rec merge_expected = function 364 + | [] -> [] 365 + | [x] -> [x] 366 + | Jsont.Array ([Jsont.String ("Character", _); Jsont.String (s1, m1)], am1) :: 367 + Jsont.Array ([Jsont.String ("Character", _); Jsont.String (s2, _)], _) :: rest -> 368 + merge_expected (Jsont.Array ([Jsont.String ("Character", Jsont.Meta.none); Jsont.String (s1 ^ s2, m1)], am1) :: rest) 369 + | x :: rest -> x :: merge_expected rest 370 + in 371 + let expected = merge_expected expected_output in 372 + let tokens_match = 373 + List.length actual_tokens = List.length expected && 374 + List.for_all2 json_equal actual_tokens expected 375 + in 376 + let actual_error_count = List.length (Tokenizer.get_errors tokenizer) in 377 + let errors_count_match = actual_error_count = test.expected_error_count in 378 + (tokens_match && errors_count_match, actual_tokens, expected, actual_error_count, test.expected_error_count) 379 + 380 + let run_file path = 381 + let content = 382 + let ic = open_in path in 383 + let n = in_channel_length ic in 384 + let s = really_input_string ic n in 385 + close_in ic; s 386 + in 387 + let json = match Jsont_bytesrw.decode_string Jsont.json content with 388 + | Ok j -> j 389 + | Error e -> failwith (Printf.sprintf "JSON parse error in %s: %s" path e) 390 + in 391 + let obj = json_object json in 392 + let regular_tests = match json_mem "tests" obj with 393 + | Some t -> List.map (parse_test_case ~xml_mode:false) (json_array t) | None -> [] 394 + in 395 + let xml_tests = match json_mem "xmlViolationTests" obj with 396 + | Some t -> List.map (parse_test_case ~xml_mode:true) (json_array t) | None -> [] 397 + in 398 + let all_tests = regular_tests @ xml_tests in 399 + let filename = Filename.basename path in 400 + let passed = ref 0 in 401 + let failed = ref 0 in 402 + let results = ref [] in 403 + List.iteri (fun i test -> 404 + List.iter (fun state_name -> 405 + try 406 + let state = state_of_string state_name in 407 + let (success, actual, expected, actual_err_count, expected_err_count) = run_test test state in 408 + let description = Printf.sprintf "[%s] %s" state_name test.description in 409 + let result : Report.test_result = { 410 + test_num = i + 1; description; input = test.input; 411 + expected = String.concat "\n" (List.map json_to_string expected); 412 + actual = String.concat "\n" (List.map json_to_string actual); 413 + success; 414 + details = [ 415 + ("Initial State", state_name); 416 + ("Last Start Tag", Option.value test.last_start_tag ~default:"(none)"); 417 + ("Double Escaped", string_of_bool test.double_escaped); 418 + ("XML Mode", string_of_bool test.xml_mode); 419 + ("Expected Errors", string_of_int expected_err_count); 420 + ("Actual Errors", string_of_int actual_err_count); 421 + ]; 422 + raw_test_data = Some test.raw_json; 423 + } in 424 + results := result :: !results; 425 + if success then incr passed else incr failed 426 + with e -> 427 + incr failed; 428 + let result : Report.test_result = { 429 + test_num = i + 1; 430 + description = Printf.sprintf "[%s] %s" state_name test.description; 431 + input = test.input; expected = ""; 432 + actual = Printf.sprintf "EXCEPTION: %s" (Printexc.to_string e); 433 + success = false; details = []; 434 + raw_test_data = Some test.raw_json; 435 + } in 436 + results := result :: !results 437 + ) test.initial_states 438 + ) all_tests; 439 + let file_result : Report.file_result = { 440 + filename; test_type = "Tokenizer"; 441 + passed_count = !passed; failed_count = !failed; 442 + tests = List.rev !results; 443 + } in 444 + (file_result, !passed, !failed) 445 + 446 + let run_dir test_dir = 447 + let files = Sys.readdir test_dir |> Array.to_list in 448 + let test_files = List.filter (fun f -> Filename.check_suffix f ".test") files in 449 + let total_passed = ref 0 in 450 + let total_failed = ref 0 in 451 + let file_results = ref [] in 452 + List.iter (fun file -> 453 + let path = Filename.concat test_dir file in 454 + let (file_result, passed, failed) = run_file path in 455 + total_passed := !total_passed + passed; 456 + total_failed := !total_failed + failed; 457 + file_results := file_result :: !file_results; 458 + Printf.printf " %s: %d passed, %d failed\n" file passed failed 459 + ) (List.sort String.compare test_files); 460 + (List.rev !file_results, !total_passed, !total_failed) 461 + end 462 + 463 + (* ============================================================ *) 464 + (* Encoding Tests *) 465 + (* ============================================================ *) 466 + 467 + module Encoding_tests = struct 468 + module Encoding = Html5rw_encoding 469 + 470 + type test_case = { 471 + input : string; 472 + expected_encoding : string; 473 + raw_lines : string; 474 + } 475 + 476 + let normalize_encoding_name s = String.lowercase_ascii (String.trim s) 477 + 478 + let encoding_to_test_name = function 479 + | Encoding.Utf8 -> "utf-8" 480 + | Encoding.Utf16le -> "utf-16le" 481 + | Encoding.Utf16be -> "utf-16be" 482 + | Encoding.Windows_1252 -> "windows-1252" 483 + | Encoding.Iso_8859_2 -> "iso-8859-2" 484 + | Encoding.Euc_jp -> "euc-jp" 485 + 486 + let parse_test_case lines = 487 + let raw_lines = String.concat "\n" lines in 488 + let rec parse acc = function 489 + | [] -> acc 490 + | line :: rest when String.length line > 0 && line.[0] = '#' -> 491 + let section = String.trim line in 492 + let content, remaining = collect_section rest in 493 + parse ((section, content) :: acc) remaining 494 + | _ :: rest -> parse acc rest 495 + and collect_section lines = 496 + let rec loop acc = function 497 + | [] -> (List.rev acc, []) 498 + | line :: rest when String.length line > 0 && line.[0] = '#' -> 499 + (List.rev acc, line :: rest) 500 + | line :: rest -> loop (line :: acc) rest 501 + in loop [] lines 502 + in 503 + let sections = parse [] lines in 504 + let get_section name = 505 + match List.assoc_opt name sections with 506 + | Some lines -> String.concat "\n" lines | None -> "" 507 + in 508 + let data = get_section "#data" in 509 + let encoding = get_section "#encoding" in 510 + { input = data; expected_encoding = String.trim encoding; raw_lines } 511 + 512 + let parse_dat_file content = 513 + let lines = String.split_on_char '\n' content in 514 + let rec split_tests current acc = function 515 + | [] -> if current = [] then List.rev acc else List.rev (List.rev current :: acc) 516 + | "" :: "#data" :: rest -> 517 + let new_acc = if current = [] then acc else (List.rev current :: acc) in 518 + split_tests ["#data"] new_acc rest 519 + | line :: rest -> split_tests (line :: current) acc rest 520 + in 521 + let test_groups = split_tests [] [] lines in 522 + List.filter_map (fun lines -> 523 + if List.exists (fun l -> l = "#data") lines then Some (parse_test_case lines) 524 + else None 525 + ) test_groups 526 + 527 + let run_test test = 528 + try 529 + let (_, detected_encoding) = Encoding.decode (Bytes.of_string test.input) () in 530 + let detected_name = encoding_to_test_name detected_encoding in 531 + let expected_name = normalize_encoding_name test.expected_encoding in 532 + let match_encoding det exp = 533 + det = exp || 534 + (det = "windows-1252" && (exp = "windows-1252" || exp = "cp1252" || exp = "iso-8859-1")) || 535 + (det = "iso-8859-2" && (exp = "iso-8859-2" || exp = "iso8859-2" || exp = "latin2")) || 536 + (det = "utf-8" && (exp = "utf-8" || exp = "utf8")) || 537 + (det = "euc-jp" && (exp = "euc-jp" || exp = "eucjp")) 538 + in 539 + (match_encoding detected_name expected_name, detected_name, expected_name) 540 + with e -> 541 + (false, Printf.sprintf "EXCEPTION: %s" (Printexc.to_string e), test.expected_encoding) 542 + 543 + let run_file path = 544 + let ic = open_in path in 545 + let content = really_input_string ic (in_channel_length ic) in 546 + close_in ic; 547 + let tests = parse_dat_file content in 548 + let filename = Filename.basename path in 549 + let passed = ref 0 in 550 + let failed = ref 0 in 551 + let results = ref [] in 552 + List.iteri (fun i test -> 553 + if String.trim test.expected_encoding = "" then () 554 + else begin 555 + let (success, detected, expected) = run_test test in 556 + let result : Report.test_result = { 557 + test_num = i + 1; 558 + description = Printf.sprintf "Detect %s encoding" expected; 559 + input = String.escaped test.input; 560 + expected; actual = detected; success; 561 + details = [ 562 + ("Input Length", string_of_int (String.length test.input)); 563 + ("Has BOM", string_of_bool (String.length test.input >= 3 && 564 + (String.sub test.input 0 3 = "\xEF\xBB\xBF" || 565 + String.sub test.input 0 2 = "\xFF\xFE" || 566 + String.sub test.input 0 2 = "\xFE\xFF"))); 567 + ]; 568 + raw_test_data = Some test.raw_lines; 569 + } in 570 + results := result :: !results; 571 + if success then incr passed else incr failed 572 + end 573 + ) tests; 574 + let file_result : Report.file_result = { 575 + filename; test_type = "Encoding Detection"; 576 + passed_count = !passed; failed_count = !failed; 577 + tests = List.rev !results; 578 + } in 579 + (file_result, !passed, !failed) 580 + 581 + let run_dir test_dir = 582 + let files = Sys.readdir test_dir |> Array.to_list in 583 + let dat_files = List.filter (fun f -> 584 + Filename.check_suffix f ".dat" && not (String.contains f '/') 585 + ) files in 586 + let total_passed = ref 0 in 587 + let total_failed = ref 0 in 588 + let file_results = ref [] in 589 + List.iter (fun file -> 590 + let path = Filename.concat test_dir file in 591 + if Sys.is_directory path then () else begin 592 + let (file_result, passed, failed) = run_file path in 593 + total_passed := !total_passed + passed; 594 + total_failed := !total_failed + failed; 595 + file_results := file_result :: !file_results; 596 + Printf.printf " %s: %d passed, %d failed\n" file passed failed 597 + end 598 + ) (List.sort String.compare dat_files); 599 + (List.rev !file_results, !total_passed, !total_failed) 600 + end 601 + 602 + (* ============================================================ *) 603 + (* Main Entry Point *) 604 + (* ============================================================ *) 605 + 606 + let () = 607 + let base_dir = if Array.length Sys.argv > 1 then Sys.argv.(1) else "html5lib-tests" in 608 + 609 + let all_files = ref [] in 610 + let total_passed = ref 0 in 611 + let total_failed = ref 0 in 612 + 613 + (* Run Tree Construction Tests *) 614 + Printf.printf "\n=== Tree Construction Tests ===\n"; 615 + let tree_dir = Filename.concat base_dir "tree-construction" in 616 + if Sys.file_exists tree_dir then begin 617 + let (files, passed, failed) = TreeConstruction.run_dir tree_dir in 618 + all_files := !all_files @ files; 619 + total_passed := !total_passed + passed; 620 + total_failed := !total_failed + failed; 621 + Printf.printf " Subtotal: %d passed, %d failed\n" passed failed 622 + end else 623 + Printf.printf " (directory not found: %s)\n" tree_dir; 624 + 625 + (* Run Tokenizer Tests *) 626 + Printf.printf "\n=== Tokenizer Tests ===\n"; 627 + let tok_dir = Filename.concat base_dir "tokenizer" in 628 + if Sys.file_exists tok_dir then begin 629 + let (files, passed, failed) = Tokenizer_tests.run_dir tok_dir in 630 + all_files := !all_files @ files; 631 + total_passed := !total_passed + passed; 632 + total_failed := !total_failed + failed; 633 + Printf.printf " Subtotal: %d passed, %d failed\n" passed failed 634 + end else 635 + Printf.printf " (directory not found: %s)\n" tok_dir; 636 + 637 + (* Run Encoding Tests *) 638 + Printf.printf "\n=== Encoding Detection Tests ===\n"; 639 + let enc_dir = Filename.concat base_dir "encoding" in 640 + if Sys.file_exists enc_dir then begin 641 + let (files, passed, failed) = Encoding_tests.run_dir enc_dir in 642 + all_files := !all_files @ files; 643 + total_passed := !total_passed + passed; 644 + total_failed := !total_failed + failed; 645 + Printf.printf " Subtotal: %d passed, %d failed\n" passed failed 646 + end else 647 + Printf.printf " (directory not found: %s)\n" enc_dir; 648 + 649 + (* Note: Serializer tests use the standalone test_serializer.exe for full implementation *) 650 + 651 + Printf.printf "\n=== Overall Summary ===\n"; 652 + Printf.printf "Total: %d passed, %d failed\n" !total_passed !total_failed; 653 + 654 + (* Generate combined HTML report *) 655 + let report : Report.report = { 656 + title = "HTML5 Parser Test Suite"; 657 + test_type = "combined"; 658 + description = "This is a comprehensive test report for the html5rw OCaml HTML5 parser library, \ 659 + validating conformance against the official html5lib-tests test suite. \ 660 + Tests cover: (1) Tree Construction - validating the DOM tree building algorithm; \ 661 + (2) Tokenization - validating the HTML tokenizer state machine; \ 662 + (3) Encoding Detection - validating character encoding sniffing. \ 663 + Each test shows the input, expected output, actual output, and original test data."; 664 + files = !all_files; 665 + total_passed = !total_passed; 666 + total_failed = !total_failed; 667 + } in 668 + Report.generate_report report "html5lib_test_report.html"; 669 + 670 + exit (if !total_failed > 0 then 1 else 0)
+50 -27
test/test_encoding.ml
··· 1 1 (* Test runner for html5lib-tests encoding tests *) 2 2 3 3 module Encoding = Html5rw_encoding 4 + module Report = Test_report 4 5 5 6 type test_case = { 6 7 input : string; 7 8 expected_encoding : string; 9 + raw_lines : string; (* Original test data from .dat file *) 8 10 } 9 11 10 12 (* Normalize encoding name for comparison *) ··· 22 24 23 25 (* Parse a single test case from lines *) 24 26 let parse_test_case lines = 27 + let raw_lines = String.concat "\n" lines in 25 28 let rec parse acc = function 26 29 | [] -> acc 27 30 | line :: rest when String.length line > 0 && line.[0] = '#' -> ··· 49 52 let data = get_section "#data" in 50 53 let encoding = get_section "#encoding" in 51 54 52 - { input = data; expected_encoding = String.trim encoding } 55 + { input = data; expected_encoding = String.trim encoding; raw_lines } 53 56 54 57 (* Parse a .dat file into test cases *) 55 58 let parse_dat_file content = ··· 104 107 105 108 let passed = ref 0 in 106 109 let failed = ref 0 in 107 - let errors = ref [] in 110 + let results = ref [] in 108 111 109 112 List.iteri (fun i test -> 110 113 if String.trim test.expected_encoding = "" then ··· 112 115 () 113 116 else begin 114 117 let (success, detected, expected) = run_test test in 115 - if success then 116 - incr passed 117 - else begin 118 - incr failed; 119 - if List.length !errors < 5 then 120 - errors := (i + 1, test.input, detected, expected) :: !errors 121 - end 118 + let result : Report.test_result = { 119 + test_num = i + 1; 120 + description = Printf.sprintf "Detect %s encoding" expected; 121 + input = String.escaped test.input; (* Show escaped version of full input *) 122 + expected; 123 + actual = detected; 124 + success; 125 + details = [ 126 + ("Input Length", string_of_int (String.length test.input)); 127 + ("Has BOM", string_of_bool (String.length test.input >= 3 && 128 + (String.sub test.input 0 3 = "\xEF\xBB\xBF" || (* UTF-8 BOM *) 129 + String.sub test.input 0 2 = "\xFF\xFE" || (* UTF-16 LE BOM *) 130 + String.sub test.input 0 2 = "\xFE\xFF"))); (* UTF-16 BE BOM *) 131 + ]; 132 + raw_test_data = Some test.raw_lines; 133 + } in 134 + results := result :: !results; 135 + if success then incr passed else incr failed 122 136 end 123 137 ) tests; 124 138 125 - (!passed, !failed, List.rev !errors, filename) 139 + let file_result : Report.file_result = { 140 + filename; 141 + test_type = "Encoding Detection"; 142 + passed_count = !passed; 143 + failed_count = !failed; 144 + tests = List.rev !results; 145 + } in 146 + (file_result, !passed, !failed) 126 147 127 148 let () = 128 149 let test_dir = Sys.argv.(1) in ··· 134 155 135 156 let total_passed = ref 0 in 136 157 let total_failed = ref 0 in 137 - let all_errors = ref [] in 158 + let file_results = ref [] in 138 159 139 160 List.iter (fun file -> 140 161 let path = Filename.concat test_dir file in 141 162 if Sys.is_directory path then () else begin 142 - let (passed, failed, errors, filename) = run_file path in 163 + let (file_result, passed, failed) = run_file path in 143 164 total_passed := !total_passed + passed; 144 165 total_failed := !total_failed + failed; 145 - if errors <> [] then 146 - all_errors := (filename, errors) :: !all_errors; 147 - Printf.printf "%s: %d passed, %d failed\n" filename passed failed 166 + file_results := file_result :: !file_results; 167 + Printf.printf "%s: %d passed, %d failed\n" file passed failed 148 168 end 149 169 ) (List.sort String.compare dat_files); 150 170 151 171 Printf.printf "\n=== Summary ===\n"; 152 172 Printf.printf "Total: %d passed, %d failed\n" !total_passed !total_failed; 153 173 154 - if !all_errors <> [] then begin 155 - Printf.printf "\n=== First failures ===\n"; 156 - List.iter (fun (filename, errors) -> 157 - List.iter (fun (test_num, input, detected, expected) -> 158 - Printf.printf "\n--- %s test %d ---\n" filename test_num; 159 - Printf.printf "Input (first 200 chars): %s\n" 160 - (String.escaped (String.sub input 0 (min 200 (String.length input)))); 161 - Printf.printf "Expected encoding: %s\n" expected; 162 - Printf.printf "Detected encoding: %s\n" detected 163 - ) errors 164 - ) (List.rev !all_errors) 165 - end; 174 + (* Generate HTML report *) 175 + let report : Report.report = { 176 + title = "HTML5 Encoding Detection Tests"; 177 + test_type = "encoding"; 178 + description = "These tests validate the character encoding detection algorithm as specified in the WHATWG \ 179 + Encoding Standard. The parser must determine the document's character encoding from byte order \ 180 + marks (BOM), meta charset declarations, or content sniffing. Tests cover UTF-8, UTF-16 \ 181 + (big/little endian), Windows-1252, ISO-8859-2, EUC-JP, and other encodings. The algorithm \ 182 + examines initial bytes for BOM signatures and scans the first 1024 bytes for meta elements \ 183 + declaring charset or http-equiv content-type."; 184 + files = List.rev !file_results; 185 + total_passed = !total_passed; 186 + total_failed = !total_failed; 187 + } in 188 + Report.generate_report report "test_encoding_report.html"; 166 189 167 190 exit (if !total_failed > 0 then 1 else 0)
+60 -26
test/test_html5lib.ml
··· 4 4 5 5 module Parser = Html5rw_parser 6 6 module Dom = Html5rw_dom 7 + module Report = Test_report 7 8 8 9 type test_case = { 9 10 input : string; ··· 11 12 expected_errors : string list; 12 13 script_on : bool; 13 14 fragment_context : string option; 15 + raw_lines : string; (* Original test data from .dat file *) 14 16 } 15 17 16 18 let _is_blank s = String.trim s = "" 17 19 18 20 (* Parse a single test case from lines *) 19 21 let parse_test_case lines = 22 + let raw_lines = String.concat "\n" lines in 20 23 let rec parse acc = function 21 24 | [] -> acc 22 25 | line :: rest when String.length line > 0 && line.[0] = '#' -> ··· 61 64 expected_errors = errors; 62 65 script_on; 63 66 fragment_context = fragment; 67 + raw_lines; 64 68 } 65 69 66 70 (* Parse a .dat file into test cases *) ··· 125 129 let actual_tree = Dom.to_test_format (Parser.root result) in 126 130 let expected = normalize_tree (strip_tree_prefix test.expected_tree) in 127 131 let actual = normalize_tree (strip_tree_prefix actual_tree) in 128 - (expected = actual, expected, actual, List.length (Parser.errors result), List.length test.expected_errors) 132 + let error_count = List.length (Parser.errors result) in 133 + let expected_error_count = List.length test.expected_errors in 134 + (expected = actual, expected, actual, error_count, expected_error_count) 129 135 with e -> 130 136 let expected = normalize_tree (strip_tree_prefix test.expected_tree) in 131 137 (false, expected, Printf.sprintf "EXCEPTION: %s" (Printexc.to_string e), 0, 0) ··· 140 146 141 147 let passed = ref 0 in 142 148 let failed = ref 0 in 143 - let errors = ref [] in 149 + let results = ref [] in 144 150 145 151 List.iteri (fun i test -> 146 152 (* Skip script-on tests since we don't support scripting *) 147 153 if test.script_on then 148 154 () (* Skip this test *) 149 155 else begin 150 - let (success, expected, actual, _actual_error_count, _expected_error_count) = run_test test in 151 - if success then 152 - incr passed 153 - else begin 154 - incr failed; 155 - errors := (i + 1, test.input, expected, actual) :: !errors 156 - end 156 + let (success, expected, actual, actual_error_count, expected_error_count) = run_test test in 157 + let description = 158 + let input_preview = String.sub test.input 0 (min 60 (String.length test.input)) in 159 + if test.fragment_context <> None then 160 + Printf.sprintf "Fragment (%s): %s" (Option.get test.fragment_context) input_preview 161 + else 162 + input_preview 163 + in 164 + let details = [ 165 + ("Fragment Context", Option.value test.fragment_context ~default:"(none)"); 166 + ("Expected Errors", string_of_int expected_error_count); 167 + ("Actual Errors", string_of_int actual_error_count); 168 + ] in 169 + let result : Report.test_result = { 170 + test_num = i + 1; 171 + description; 172 + input = test.input; 173 + expected; 174 + actual; 175 + success; 176 + details; 177 + raw_test_data = Some test.raw_lines; 178 + } in 179 + results := result :: !results; 180 + if success then incr passed else incr failed 157 181 end 158 182 ) tests; 159 183 160 - (!passed, !failed, List.rev !errors, filename) 184 + let file_result : Report.file_result = { 185 + filename; 186 + test_type = "Tree Construction"; 187 + passed_count = !passed; 188 + failed_count = !failed; 189 + tests = List.rev !results; 190 + } in 191 + (file_result, !passed, !failed) 161 192 162 193 let () = 163 194 let test_dir = Sys.argv.(1) in ··· 169 200 170 201 let total_passed = ref 0 in 171 202 let total_failed = ref 0 in 172 - let all_errors = ref [] in 203 + let file_results = ref [] in 173 204 174 205 List.iter (fun file -> 175 206 let path = Filename.concat test_dir file in 176 207 if Sys.is_directory path then () else begin 177 - let (passed, failed, errors, filename) = run_file path in 208 + let (file_result, passed, failed) = run_file path in 178 209 total_passed := !total_passed + passed; 179 210 total_failed := !total_failed + failed; 180 - if errors <> [] then 181 - all_errors := (filename, errors) :: !all_errors; 182 - Printf.printf "%s: %d passed, %d failed\n" filename passed failed 211 + file_results := file_result :: !file_results; 212 + Printf.printf "%s: %d passed, %d failed\n" file passed failed 183 213 end 184 214 ) (List.sort String.compare dat_files); 185 215 186 216 Printf.printf "\n=== Summary ===\n"; 187 217 Printf.printf "Total: %d passed, %d failed\n" !total_passed !total_failed; 188 218 189 - if !all_errors <> [] then begin 190 - Printf.printf "\n=== First failures ===\n"; 191 - List.iter (fun (filename, errors) -> 192 - List.iter (fun (test_num, input, expected, actual) -> 193 - Printf.printf "\n--- %s test %d ---\n" filename test_num; 194 - Printf.printf "Input: %s\n" (String.escaped input); 195 - Printf.printf "Expected:\n%s\n" expected; 196 - Printf.printf "Actual:\n%s\n" actual 197 - ) (List.filteri (fun i _ -> i < 3) errors) 198 - ) (List.filteri (fun i _ -> i < 10) !all_errors) 199 - end; 219 + (* Generate HTML report *) 220 + let report : Report.report = { 221 + title = "HTML5 Tree Construction Tests"; 222 + test_type = "tree-construction"; 223 + description = "These tests validate the HTML5 tree construction algorithm as specified in the WHATWG HTML Standard. \ 224 + Each test provides HTML input and the expected DOM tree structure. The parser processes the HTML and \ 225 + builds a document tree, which is then serialized and compared against the expected output. \ 226 + Tests cover various edge cases including malformed HTML, implicit element creation, foster parenting, \ 227 + adoption agency algorithm, and foreign content (SVG/MathML). Fragment parsing tests verify parsing \ 228 + in the context of specific elements."; 229 + files = List.rev !file_results; 230 + total_passed = !total_passed; 231 + total_failed = !total_failed; 232 + } in 233 + Report.generate_report report "test_html5lib_report.html"; 200 234 201 235 exit (if !total_failed > 0 then 1 else 0)
+587
test/test_report.ml
··· 1 + (* HTML Test Report Generator *) 2 + 3 + type test_result = { 4 + test_num : int; 5 + description : string; 6 + input : string; 7 + expected : string; 8 + actual : string; 9 + success : bool; 10 + details : (string * string) list; (* Additional key-value pairs *) 11 + raw_test_data : string option; (* Original test file content for context *) 12 + } 13 + 14 + type file_result = { 15 + filename : string; 16 + test_type : string; 17 + passed_count : int; 18 + failed_count : int; 19 + tests : test_result list; 20 + } 21 + 22 + type report = { 23 + title : string; 24 + test_type : string; 25 + description : string; (* Explanation of what this test suite validates *) 26 + files : file_result list; 27 + total_passed : int; 28 + total_failed : int; 29 + } 30 + 31 + let html_escape s = 32 + let buf = Buffer.create (String.length s * 2) in 33 + String.iter (fun c -> 34 + match c with 35 + | '&' -> Buffer.add_string buf "&amp;" 36 + | '<' -> Buffer.add_string buf "&lt;" 37 + | '>' -> Buffer.add_string buf "&gt;" 38 + | '"' -> Buffer.add_string buf "&quot;" 39 + | '\'' -> Buffer.add_string buf "&#x27;" 40 + | c -> Buffer.add_char buf c 41 + ) s; 42 + Buffer.contents buf 43 + 44 + (* No truncation - show full content for standalone reports *) 45 + let truncate_string ?(max_len=10000) s = 46 + if String.length s <= max_len then s 47 + else String.sub s 0 max_len ^ "\n... (truncated at " ^ string_of_int max_len ^ " chars)" 48 + 49 + let css = {| 50 + :root { 51 + --bg-primary: #1a1a2e; 52 + --bg-secondary: #16213e; 53 + --bg-tertiary: #0f3460; 54 + --text-primary: #eee; 55 + --text-secondary: #aaa; 56 + --accent: #e94560; 57 + --success: #4ade80; 58 + --failure: #f87171; 59 + --border: #333; 60 + } 61 + 62 + * { box-sizing: border-box; margin: 0; padding: 0; } 63 + 64 + body { 65 + font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif; 66 + background: var(--bg-primary); 67 + color: var(--text-primary); 68 + line-height: 1.6; 69 + } 70 + 71 + .container { 72 + max-width: 1400px; 73 + margin: 0 auto; 74 + padding: 20px; 75 + } 76 + 77 + header { 78 + background: var(--bg-secondary); 79 + padding: 20px; 80 + border-radius: 8px; 81 + margin-bottom: 20px; 82 + } 83 + 84 + header h1 { 85 + font-size: 1.5rem; 86 + margin-bottom: 10px; 87 + color: var(--accent); 88 + } 89 + 90 + .summary { 91 + display: flex; 92 + gap: 20px; 93 + flex-wrap: wrap; 94 + align-items: center; 95 + } 96 + 97 + .stat { 98 + padding: 8px 16px; 99 + border-radius: 6px; 100 + font-weight: 600; 101 + } 102 + 103 + .stat.total { background: var(--bg-tertiary); } 104 + .stat.passed { background: rgba(74, 222, 128, 0.2); color: var(--success); } 105 + .stat.failed { background: rgba(248, 113, 113, 0.2); color: var(--failure); } 106 + 107 + .controls { 108 + display: flex; 109 + gap: 10px; 110 + margin-top: 10px; 111 + flex-wrap: wrap; 112 + } 113 + 114 + input[type="search"], select { 115 + padding: 8px 12px; 116 + border: 1px solid var(--border); 117 + border-radius: 6px; 118 + background: var(--bg-primary); 119 + color: var(--text-primary); 120 + font-size: 14px; 121 + } 122 + 123 + input[type="search"] { width: 300px; } 124 + 125 + button { 126 + padding: 8px 16px; 127 + border: none; 128 + border-radius: 6px; 129 + background: var(--accent); 130 + color: white; 131 + cursor: pointer; 132 + font-size: 14px; 133 + } 134 + 135 + button:hover { opacity: 0.9; } 136 + 137 + .sidebar { 138 + position: fixed; 139 + left: 0; 140 + top: 0; 141 + bottom: 0; 142 + width: 280px; 143 + background: var(--bg-secondary); 144 + border-right: 1px solid var(--border); 145 + overflow-y: auto; 146 + padding: 10px; 147 + padding-top: 20px; 148 + } 149 + 150 + .sidebar-item { 151 + padding: 8px 12px; 152 + border-radius: 6px; 153 + cursor: pointer; 154 + display: flex; 155 + justify-content: space-between; 156 + align-items: center; 157 + margin-bottom: 4px; 158 + font-size: 14px; 159 + } 160 + 161 + .sidebar-item:hover { background: var(--bg-tertiary); } 162 + .sidebar-item.active { background: var(--accent); } 163 + 164 + .sidebar-item .count { 165 + font-size: 12px; 166 + padding: 2px 8px; 167 + border-radius: 10px; 168 + background: var(--bg-primary); 169 + } 170 + 171 + .sidebar-item .count.all-passed { color: var(--success); } 172 + .sidebar-item .count.has-failed { color: var(--failure); } 173 + 174 + main { 175 + margin-left: 300px; 176 + padding: 20px; 177 + padding-top: 30px; 178 + } 179 + 180 + .intro { 181 + background: var(--bg-secondary); 182 + padding: 20px; 183 + border-radius: 8px; 184 + margin-bottom: 20px; 185 + } 186 + 187 + .file-section { 188 + margin-bottom: 30px; 189 + background: var(--bg-secondary); 190 + border-radius: 8px; 191 + overflow: hidden; 192 + } 193 + 194 + .file-header { 195 + padding: 15px 20px; 196 + background: var(--bg-tertiary); 197 + cursor: pointer; 198 + display: flex; 199 + justify-content: space-between; 200 + align-items: center; 201 + } 202 + 203 + .file-header h2 { 204 + font-size: 1.1rem; 205 + display: flex; 206 + align-items: center; 207 + gap: 10px; 208 + } 209 + 210 + .file-header .toggle { 211 + font-size: 1.2rem; 212 + transition: transform 0.2s; 213 + } 214 + 215 + .file-header.collapsed .toggle { transform: rotate(-90deg); } 216 + 217 + .file-stats { 218 + display: flex; 219 + gap: 15px; 220 + font-size: 14px; 221 + } 222 + 223 + .file-stats .passed { color: var(--success); } 224 + .file-stats .failed { color: var(--failure); } 225 + 226 + .tests-container { 227 + padding: 10px; 228 + } 229 + 230 + .tests-container.hidden { display: none; } 231 + 232 + .test-item { 233 + margin: 8px 0; 234 + border: 1px solid var(--border); 235 + border-radius: 6px; 236 + overflow: hidden; 237 + } 238 + 239 + .test-header { 240 + padding: 10px 15px; 241 + cursor: pointer; 242 + display: flex; 243 + justify-content: space-between; 244 + align-items: center; 245 + background: var(--bg-primary); 246 + } 247 + 248 + .test-header:hover { background: var(--bg-tertiary); } 249 + 250 + .test-header .status { 251 + width: 10px; 252 + height: 10px; 253 + border-radius: 50%; 254 + margin-right: 10px; 255 + } 256 + 257 + .test-header .status.passed { background: var(--success); } 258 + .test-header .status.failed { background: var(--failure); } 259 + 260 + .test-header .test-info { 261 + flex: 1; 262 + display: flex; 263 + align-items: center; 264 + } 265 + 266 + .test-header .test-num { 267 + font-weight: 600; 268 + margin-right: 10px; 269 + color: var(--text-secondary); 270 + } 271 + 272 + .test-header .test-desc { 273 + font-size: 14px; 274 + color: var(--text-primary); 275 + white-space: nowrap; 276 + overflow: hidden; 277 + text-overflow: ellipsis; 278 + max-width: 600px; 279 + } 280 + 281 + .test-details { 282 + padding: 15px; 283 + background: var(--bg-primary); 284 + border-top: 1px solid var(--border); 285 + display: none; 286 + } 287 + 288 + .test-details.visible { display: block; } 289 + 290 + .detail-section { 291 + margin-bottom: 15px; 292 + } 293 + 294 + .detail-section h4 { 295 + font-size: 12px; 296 + text-transform: uppercase; 297 + color: var(--text-secondary); 298 + margin-bottom: 8px; 299 + letter-spacing: 0.5px; 300 + } 301 + 302 + .detail-section pre { 303 + background: var(--bg-secondary); 304 + padding: 12px; 305 + border-radius: 6px; 306 + overflow-x: auto; 307 + font-family: 'Monaco', 'Menlo', monospace; 308 + font-size: 13px; 309 + white-space: pre-wrap; 310 + word-break: break-all; 311 + max-height: 300px; 312 + overflow-y: auto; 313 + } 314 + 315 + .detail-row { 316 + display: grid; 317 + grid-template-columns: 1fr 1fr; 318 + gap: 15px; 319 + } 320 + 321 + .detail-row.single { grid-template-columns: 1fr; } 322 + 323 + .meta-info { 324 + display: flex; 325 + gap: 20px; 326 + flex-wrap: wrap; 327 + font-size: 13px; 328 + color: var(--text-secondary); 329 + margin-bottom: 15px; 330 + } 331 + 332 + .meta-info span { 333 + background: var(--bg-secondary); 334 + padding: 4px 10px; 335 + border-radius: 4px; 336 + } 337 + 338 + .diff-indicator { 339 + color: var(--failure); 340 + font-weight: bold; 341 + margin-left: 5px; 342 + } 343 + 344 + @media (max-width: 900px) { 345 + .sidebar { display: none; } 346 + main { margin-left: 0; } 347 + .detail-row { grid-template-columns: 1fr; } 348 + } 349 + |} 350 + 351 + let js = {| 352 + document.addEventListener('DOMContentLoaded', function() { 353 + // File section toggling 354 + document.querySelectorAll('.file-header').forEach(header => { 355 + header.addEventListener('click', function() { 356 + this.classList.toggle('collapsed'); 357 + const container = this.nextElementSibling; 358 + container.classList.toggle('hidden'); 359 + }); 360 + }); 361 + 362 + // Test details toggling 363 + document.querySelectorAll('.test-header').forEach(header => { 364 + header.addEventListener('click', function(e) { 365 + e.stopPropagation(); 366 + const details = this.nextElementSibling; 367 + details.classList.toggle('visible'); 368 + }); 369 + }); 370 + 371 + // Sidebar navigation 372 + document.querySelectorAll('.sidebar-item').forEach(item => { 373 + item.addEventListener('click', function() { 374 + const fileId = this.dataset.file; 375 + const section = document.getElementById(fileId); 376 + if (section) { 377 + section.scrollIntoView({ behavior: 'smooth' }); 378 + // Expand if collapsed 379 + const header = section.querySelector('.file-header'); 380 + if (header.classList.contains('collapsed')) { 381 + header.click(); 382 + } 383 + } 384 + // Update active state 385 + document.querySelectorAll('.sidebar-item').forEach(i => i.classList.remove('active')); 386 + this.classList.add('active'); 387 + }); 388 + }); 389 + 390 + // Search functionality 391 + const searchInput = document.getElementById('search'); 392 + if (searchInput) { 393 + searchInput.addEventListener('input', function() { 394 + const query = this.value.toLowerCase(); 395 + document.querySelectorAll('.test-item').forEach(item => { 396 + const text = item.textContent.toLowerCase(); 397 + item.style.display = text.includes(query) ? '' : 'none'; 398 + }); 399 + }); 400 + } 401 + 402 + // Filter functionality 403 + const filterSelect = document.getElementById('filter'); 404 + if (filterSelect) { 405 + filterSelect.addEventListener('change', function() { 406 + const filter = this.value; 407 + document.querySelectorAll('.test-item').forEach(item => { 408 + const passed = item.querySelector('.status.passed') !== null; 409 + if (filter === 'all') { 410 + item.style.display = ''; 411 + } else if (filter === 'passed') { 412 + item.style.display = passed ? '' : 'none'; 413 + } else if (filter === 'failed') { 414 + item.style.display = passed ? 'none' : ''; 415 + } 416 + }); 417 + }); 418 + } 419 + 420 + // Expand/Collapse all 421 + document.getElementById('expand-all')?.addEventListener('click', function() { 422 + document.querySelectorAll('.file-header.collapsed').forEach(h => h.click()); 423 + }); 424 + 425 + document.getElementById('collapse-all')?.addEventListener('click', function() { 426 + document.querySelectorAll('.file-header:not(.collapsed)').forEach(h => h.click()); 427 + }); 428 + }); 429 + |} 430 + 431 + let generate_test_html test = 432 + let status_class = if test.success then "passed" else "failed" in 433 + let desc_escaped = html_escape test.description in 434 + let input_escaped = html_escape (truncate_string test.input) in 435 + let expected_escaped = html_escape (truncate_string test.expected) in 436 + let actual_escaped = html_escape (truncate_string test.actual) in 437 + 438 + let details_html = String.concat "" (List.map (fun (key, value) -> 439 + Printf.sprintf {| 440 + <div class="detail-section"> 441 + <h4>%s</h4> 442 + <pre>%s</pre> 443 + </div> 444 + |} (html_escape key) (html_escape value) 445 + ) test.details) in 446 + 447 + let raw_data_html = match test.raw_test_data with 448 + | Some data -> 449 + Printf.sprintf {| 450 + <div class="detail-section"> 451 + <h4>Original Test Data (from .dat/.test file)</h4> 452 + <pre>%s</pre> 453 + </div> 454 + |} (html_escape (truncate_string data)) 455 + | None -> "" 456 + in 457 + 458 + let diff_indicator = if test.success then "" else {|<span class="diff-indicator">✗</span>|} in 459 + 460 + Printf.sprintf {| 461 + <div class="test-item" data-passed="%b"> 462 + <div class="test-header"> 463 + <div class="test-info"> 464 + <span class="status %s"></span> 465 + <span class="test-num">#%d</span> 466 + <span class="test-desc">%s</span> 467 + </div> 468 + <span>▼</span> 469 + </div> 470 + <div class="test-details"> 471 + %s 472 + <div class="detail-section"> 473 + <h4>Input (HTML to parse)</h4> 474 + <pre>%s</pre> 475 + </div> 476 + <div class="detail-row"> 477 + <div class="detail-section"> 478 + <h4>Expected Output%s</h4> 479 + <pre>%s</pre> 480 + </div> 481 + <div class="detail-section"> 482 + <h4>Actual Output%s</h4> 483 + <pre>%s</pre> 484 + </div> 485 + </div> 486 + %s 487 + </div> 488 + </div> 489 + |} test.success status_class test.test_num desc_escaped 490 + raw_data_html input_escaped diff_indicator expected_escaped diff_indicator actual_escaped details_html 491 + 492 + let generate_file_html file = 493 + let file_id = String.map (fun c -> if c = '.' then '-' else c) file.filename in 494 + let tests_html = String.concat "\n" (List.map generate_test_html file.tests) in 495 + let collapsed = if file.failed_count = 0 then "collapsed" else "" in 496 + let hidden = if file.failed_count = 0 then "hidden" else "" in 497 + 498 + Printf.sprintf {| 499 + <div class="file-section" id="file-%s"> 500 + <div class="file-header %s"> 501 + <h2> 502 + <span class="toggle">▼</span> 503 + %s 504 + <span style="font-weight: normal; font-size: 0.9em; color: var(--text-secondary)">(%s)</span> 505 + </h2> 506 + <div class="file-stats"> 507 + <span class="passed">✓ %d passed</span> 508 + <span class="failed">✗ %d failed</span> 509 + </div> 510 + </div> 511 + <div class="tests-container %s"> 512 + %s 513 + </div> 514 + </div> 515 + |} file_id collapsed file.filename file.test_type file.passed_count file.failed_count hidden tests_html 516 + 517 + let generate_sidebar_html files = 518 + String.concat "\n" (List.map (fun file -> 519 + let file_id = String.map (fun c -> if c = '.' then '-' else c) file.filename in 520 + let count_class = if file.failed_count = 0 then "all-passed" else "has-failed" in 521 + Printf.sprintf {| 522 + <div class="sidebar-item" data-file="file-%s"> 523 + <span>%s</span> 524 + <span class="count %s">%d/%d</span> 525 + </div> 526 + |} file_id file.filename count_class file.passed_count (file.passed_count + file.failed_count) 527 + ) files) 528 + 529 + let generate_report report output_path = 530 + let files_html = String.concat "\n" (List.map generate_file_html report.files) in 531 + let sidebar_html = generate_sidebar_html report.files in 532 + 533 + let html = Printf.sprintf {|<!DOCTYPE html> 534 + <html lang="en"> 535 + <head> 536 + <meta charset="UTF-8"> 537 + <meta name="viewport" content="width=device-width, initial-scale=1.0"> 538 + <title>%s - Test Report</title> 539 + <style>%s</style> 540 + </head> 541 + <body> 542 + <div class="sidebar"> 543 + <h3 style="padding: 10px; color: var(--text-secondary); font-size: 12px; text-transform: uppercase;">Files</h3> 544 + %s 545 + </div> 546 + 547 + <main> 548 + <header> 549 + <h1>%s</h1> 550 + <p style="color: var(--text-secondary); margin: 10px 0; max-width: 900px;">%s</p> 551 + <div class="summary"> 552 + <span class="stat total">%d tests</span> 553 + <span class="stat passed">✓ %d passed</span> 554 + <span class="stat failed">✗ %d failed</span> 555 + <span class="stat total">%.1f%% pass rate</span> 556 + </div> 557 + <div class="controls"> 558 + <input type="search" id="search" placeholder="Search tests..."> 559 + <select id="filter"> 560 + <option value="all">All tests</option> 561 + <option value="passed">Passed only</option> 562 + <option value="failed">Failed only</option> 563 + </select> 564 + <button id="expand-all">Expand All</button> 565 + <button id="collapse-all">Collapse All</button> 566 + </div> 567 + </header> 568 + %s 569 + </main> 570 + 571 + <script>%s</script> 572 + </body> 573 + </html> 574 + |} report.title css 575 + sidebar_html 576 + report.title (html_escape report.description) 577 + (report.total_passed + report.total_failed) 578 + report.total_passed 579 + report.total_failed 580 + (100.0 *. float_of_int report.total_passed /. float_of_int (max 1 (report.total_passed + report.total_failed))) 581 + files_html js 582 + in 583 + 584 + let oc = open_out output_path in 585 + output_string oc html; 586 + close_out oc; 587 + Printf.printf "HTML report written to: %s\n" output_path
+103 -26
test/test_serializer.ml
··· 1 1 (* Test runner for html5lib-tests serializer tests *) 2 2 3 3 module Dom = Html5rw_dom 4 + module Report = Test_report 4 5 5 6 (* Extract values from JSON *) 6 7 let json_string = function ··· 33 34 match json_mem name obj with 34 35 | Some v -> v 35 36 | None -> failwith ("Missing member: " ^ name) 37 + 38 + let rec json_to_string = function 39 + | Jsont.Null _ -> "null" 40 + | Jsont.Bool (b, _) -> string_of_bool b 41 + | Jsont.Number (n, _) -> Printf.sprintf "%g" n 42 + | Jsont.String (s, _) -> Printf.sprintf "%S" s 43 + | Jsont.Array (arr, _) -> 44 + "[" ^ String.concat ", " (List.map json_to_string arr) ^ "]" 45 + | Jsont.Object (obj, _) -> 46 + "{" ^ String.concat ", " (List.map (fun ((n, _), v) -> Printf.sprintf "%S: %s" n (json_to_string v)) obj) ^ "}" 36 47 37 48 (* Serialization options *) 38 49 type serialize_options = { ··· 101 112 input : Jsont.json list; 102 113 expected : string list; 103 114 options : serialize_options; 115 + raw_json : string; (* Original JSON representation of this test *) 104 116 } 105 117 106 118 let parse_test_case json = 119 + let raw_json = json_to_string json in 107 120 let obj = json_object json in 108 121 let description = json_string (json_mem_exn "description" obj) in 109 122 let input = json_array (json_mem_exn "input" obj) in 110 123 let expected = List.map json_string (json_array (json_mem_exn "expected" obj)) in 111 124 let options = parse_options (json_mem "options" obj) in 112 - { description; input; expected; options } 125 + { description; input; expected; options; raw_json } 113 126 114 127 (* Parse attrs that can be either array [{name, value}] or object {name: value} or empty {} *) 115 128 let parse_attrs attrs_json = ··· 694 707 (false, Printf.sprintf "EXCEPTION: %s" (Printexc.to_string e), test.expected) 695 708 696 709 (* Run all tests in a file *) 710 + let format_options opts = 711 + let parts = [] in 712 + let parts = if opts.quote_char_explicit then 713 + Printf.sprintf "quote_char='%c'" opts.quote_char :: parts else parts in 714 + let parts = if not opts.minimize_boolean_attributes then 715 + "minimize_bool=false" :: parts else parts in 716 + let parts = if opts.use_trailing_solidus then 717 + "trailing_solidus=true" :: parts else parts in 718 + let parts = if opts.escape_lt_in_attrs then 719 + "escape_lt=true" :: parts else parts in 720 + let parts = if opts.escape_rcdata then 721 + "escape_rcdata=true" :: parts else parts in 722 + let parts = if opts.strip_whitespace then 723 + "strip_ws=true" :: parts else parts in 724 + let parts = if opts.inject_meta_charset then 725 + "inject_charset=true" :: parts else parts in 726 + let parts = if not opts.omit_optional_tags then 727 + "omit_tags=false" :: parts else parts in 728 + if parts = [] then "(defaults)" else String.concat ", " (List.rev parts) 729 + 697 730 let run_file path = 698 731 let content = 699 732 let ic = open_in path in ··· 717 750 let filename = Filename.basename path in 718 751 let passed = ref 0 in 719 752 let failed = ref 0 in 720 - let first_failures = ref [] in 753 + let results = ref [] in 721 754 722 755 List.iteri (fun i test_json -> 723 756 try 724 757 let test = parse_test_case test_json in 725 758 let (success, actual, expected) = run_test test in 726 759 727 - if success then 728 - incr passed 729 - else begin 730 - incr failed; 731 - if List.length !first_failures < 3 then 732 - first_failures := (i + 1, test.description, actual, expected) :: !first_failures 733 - end 760 + let result : Report.test_result = { 761 + test_num = i + 1; 762 + description = test.description; 763 + input = String.concat "\n" (List.map (fun tok -> 764 + (* Simplified token representation *) 765 + match tok with 766 + | Jsont.Array (arr, _) -> 767 + (match arr with 768 + | Jsont.String (ty, _) :: rest -> 769 + Printf.sprintf "%s: %s" ty (String.concat ", " (List.map (function 770 + | Jsont.String (s, _) -> Printf.sprintf "%S" s 771 + | Jsont.Object _ -> "{...}" 772 + | Jsont.Null _ -> "null" 773 + | _ -> "?" 774 + ) rest)) 775 + | _ -> "?") 776 + | _ -> "?" 777 + ) test.input); 778 + expected = String.concat " | " expected; 779 + actual; 780 + success; 781 + details = [ 782 + ("Options", format_options test.options); 783 + ("Expected Variants", string_of_int (List.length expected)); 784 + ]; 785 + raw_test_data = Some test.raw_json; 786 + } in 787 + results := result :: !results; 788 + 789 + if success then incr passed else incr failed 734 790 with e -> 735 791 incr failed; 792 + let result : Report.test_result = { 793 + test_num = i + 1; 794 + description = Printf.sprintf "Test %d" (i + 1); 795 + input = ""; 796 + expected = ""; 797 + actual = Printf.sprintf "EXCEPTION: %s" (Printexc.to_string e); 798 + success = false; 799 + details = []; 800 + raw_test_data = Some (json_to_string test_json); 801 + } in 802 + results := result :: !results; 736 803 Printf.eprintf "Exception parsing test %d: %s\n" (i + 1) (Printexc.to_string e) 737 804 ) tests_json; 738 805 739 - (!passed, !failed, List.rev !first_failures, filename) 806 + let file_result : Report.file_result = { 807 + filename; 808 + test_type = "Serializer"; 809 + passed_count = !passed; 810 + failed_count = !failed; 811 + tests = List.rev !results; 812 + } in 813 + (file_result, !passed, !failed) 740 814 741 815 let () = 742 816 let test_dir = Sys.argv.(1) in ··· 745 819 746 820 let total_passed = ref 0 in 747 821 let total_failed = ref 0 in 748 - let all_failures = ref [] in 822 + let file_results = ref [] in 749 823 750 824 List.iter (fun file -> 751 825 let path = Filename.concat test_dir file in 752 - let (passed, failed, failures, filename) = run_file path in 826 + let (file_result, passed, failed) = run_file path in 753 827 total_passed := !total_passed + passed; 754 828 total_failed := !total_failed + failed; 755 - if failures <> [] then 756 - all_failures := (filename, failures) :: !all_failures; 757 - Printf.printf "%s: %d passed, %d failed\n" filename passed failed 829 + file_results := file_result :: !file_results; 830 + Printf.printf "%s: %d passed, %d failed\n" file passed failed 758 831 ) (List.sort String.compare test_files); 759 832 760 833 Printf.printf "\n=== Summary ===\n"; 761 834 Printf.printf "Total: %d passed, %d failed\n" !total_passed !total_failed; 762 835 763 - if !all_failures <> [] then begin 764 - Printf.printf "\n=== First failures ===\n"; 765 - List.iter (fun (filename, failures) -> 766 - List.iter (fun (test_num, desc, actual, expected) -> 767 - Printf.printf "\n--- %s test %d ---\n" filename test_num; 768 - Printf.printf "Description: %s\n" desc; 769 - Printf.printf "Expected: %s\n" (String.concat " | " expected); 770 - Printf.printf "Actual: %s\n" actual 771 - ) failures 772 - ) (List.rev !all_failures) 773 - end; 836 + (* Generate HTML report *) 837 + let report : Report.report = { 838 + title = "HTML5 Serializer Tests"; 839 + test_type = "serializer"; 840 + description = "These tests validate the HTML serialization algorithm for converting DOM trees back to HTML text. \ 841 + Each test provides a sequence of tokens (start tags, end tags, text, comments, doctypes) and one \ 842 + or more valid serialized outputs. Tests cover attribute quoting, boolean attribute minimization, \ 843 + self-closing tag syntax (trailing solidus), entity escaping, whitespace handling, meta charset \ 844 + injection, and optional tag omission rules as specified in the HTML Standard. Multiple expected \ 845 + outputs allow for valid variations in serialization style."; 846 + files = List.rev !file_results; 847 + total_passed = !total_passed; 848 + total_failed = !total_failed; 849 + } in 850 + Report.generate_report report "test_serializer_report.html"; 774 851 775 852 exit (if !total_failed > 0 then 1 else 0)
+75 -41
test/test_tokenizer.ml
··· 3 3 open Bytesrw 4 4 5 5 module Tokenizer = Html5rw_tokenizer 6 + module Report = Test_report 6 7 7 8 (* Token collector sink - collects all tokens into a list *) 8 9 module TokenCollector = struct ··· 31 32 last_start_tag : string option; 32 33 double_escaped : bool; 33 34 xml_mode : bool; 35 + raw_json : string; (* Original JSON representation of this test *) 34 36 } 35 37 36 38 (* Unescape double-escaped strings from tests *) ··· 99 101 | Some v -> v 100 102 | None -> failwith ("Missing member: " ^ name) 101 103 104 + (* Format JSON for display *) 105 + let rec json_to_string = function 106 + | Jsont.Null _ -> "null" 107 + | Jsont.Bool (b, _) -> string_of_bool b 108 + | Jsont.Number (n, _) -> Printf.sprintf "%g" n 109 + | Jsont.String (s, _) -> Printf.sprintf "%S" s 110 + | Jsont.Array (arr, _) -> 111 + "[" ^ String.concat ", " (List.map json_to_string arr) ^ "]" 112 + | Jsont.Object (obj, _) -> 113 + "{" ^ String.concat ", " (List.map (fun ((n, _), v) -> Printf.sprintf "%S: %s" n (json_to_string v)) obj) ^ "}" 114 + 102 115 (* Parse a single test case from JSON *) 103 116 let parse_test_case ~xml_mode json = 117 + let raw_json = json_to_string json in 104 118 let obj = json_object json in 105 119 let description = json_string (json_mem_exn "description" obj) in 106 120 let input = json_string (json_mem_exn "input" obj) in ··· 121 135 | Some b -> json_bool b 122 136 | None -> false 123 137 in 124 - { description; input; output; expected_error_count; initial_states; last_start_tag; double_escaped; xml_mode } 138 + { description; input; output; expected_error_count; initial_states; last_start_tag; double_escaped; xml_mode; raw_json } 125 139 126 140 (* Convert state name to State.t *) 127 141 let state_of_string = function ··· 258 272 259 273 (tokens_match && errors_count_match, actual_tokens, expected, actual_error_count, test.expected_error_count) 260 274 261 - (* Format JSON for display *) 262 - let rec json_to_string = function 263 - | Jsont.Null _ -> "null" 264 - | Jsont.Bool (b, _) -> string_of_bool b 265 - | Jsont.Number (n, _) -> Printf.sprintf "%g" n 266 - | Jsont.String (s, _) -> Printf.sprintf "%S" s 267 - | Jsont.Array (arr, _) -> 268 - "[" ^ String.concat ", " (List.map json_to_string arr) ^ "]" 269 - | Jsont.Object (obj, _) -> 270 - "{" ^ String.concat ", " (List.map (fun ((n, _), v) -> Printf.sprintf "%S: %s" n (json_to_string v)) obj) ^ "}" 271 - 272 275 (* Run all tests in a file *) 273 276 let run_file path = 274 277 let content = ··· 303 306 let filename = Filename.basename path in 304 307 let passed = ref 0 in 305 308 let failed = ref 0 in 306 - let first_failures = ref [] in 309 + let results = ref [] in 307 310 308 311 List.iteri (fun i test -> 309 - (* test is already parsed *) 310 - 311 312 (* Run for each initial state *) 312 313 List.iter (fun state_name -> 313 314 try 314 315 let state = state_of_string state_name in 315 316 let (success, actual, expected, actual_err_count, expected_err_count) = run_test test state in 316 317 317 - if success then 318 - incr passed 319 - else begin 320 - incr failed; 321 - if List.length !first_failures < 3 then 322 - first_failures := (i + 1, test.description, state_name, actual, expected, actual_err_count, expected_err_count) :: !first_failures 323 - end 318 + let description = Printf.sprintf "[%s] %s" state_name test.description in 319 + let result : Report.test_result = { 320 + test_num = i + 1; 321 + description; 322 + input = test.input; 323 + expected = String.concat "\n" (List.map json_to_string expected); 324 + actual = String.concat "\n" (List.map json_to_string actual); 325 + success; 326 + details = [ 327 + ("Initial State", state_name); 328 + ("Last Start Tag", Option.value test.last_start_tag ~default:"(none)"); 329 + ("Double Escaped", string_of_bool test.double_escaped); 330 + ("XML Mode", string_of_bool test.xml_mode); 331 + ("Expected Errors", string_of_int expected_err_count); 332 + ("Actual Errors", string_of_int actual_err_count); 333 + ]; 334 + raw_test_data = Some test.raw_json; 335 + } in 336 + results := result :: !results; 337 + 338 + if success then incr passed else incr failed 324 339 with e -> 325 340 incr failed; 326 - if List.length !first_failures < 3 then 327 - first_failures := (i + 1, test.description, state_name, [], [], 0, 0) :: !first_failures; 341 + let result : Report.test_result = { 342 + test_num = i + 1; 343 + description = Printf.sprintf "[%s] %s" state_name test.description; 344 + input = test.input; 345 + expected = ""; 346 + actual = Printf.sprintf "EXCEPTION: %s" (Printexc.to_string e); 347 + success = false; 348 + details = []; 349 + raw_test_data = Some test.raw_json; 350 + } in 351 + results := result :: !results; 328 352 Printf.eprintf "Exception in test %d (%s): %s\n" (i + 1) test.description (Printexc.to_string e) 329 353 ) test.initial_states 330 354 ) all_tests; 331 355 332 - (!passed, !failed, List.rev !first_failures, filename) 356 + let file_result : Report.file_result = { 357 + filename; 358 + test_type = "Tokenizer"; 359 + passed_count = !passed; 360 + failed_count = !failed; 361 + tests = List.rev !results; 362 + } in 363 + (file_result, !passed, !failed) 333 364 334 365 let () = 335 366 let test_dir = Sys.argv.(1) in ··· 338 369 339 370 let total_passed = ref 0 in 340 371 let total_failed = ref 0 in 341 - let all_failures = ref [] in 372 + let file_results = ref [] in 342 373 343 374 List.iter (fun file -> 344 375 let path = Filename.concat test_dir file in 345 - let (passed, failed, failures, filename) = run_file path in 376 + let (file_result, passed, failed) = run_file path in 346 377 total_passed := !total_passed + passed; 347 378 total_failed := !total_failed + failed; 348 - if failures <> [] then 349 - all_failures := (filename, failures) :: !all_failures; 350 - Printf.printf "%s: %d passed, %d failed\n" filename passed failed 379 + file_results := file_result :: !file_results; 380 + Printf.printf "%s: %d passed, %d failed\n" file passed failed 351 381 ) (List.sort String.compare test_files); 352 382 353 383 Printf.printf "\n=== Summary ===\n"; 354 384 Printf.printf "Total: %d passed, %d failed\n" !total_passed !total_failed; 355 385 356 - if !all_failures <> [] then begin 357 - Printf.printf "\n=== First failures ===\n"; 358 - List.iter (fun (filename, failures) -> 359 - List.iter (fun (test_num, desc, state, actual, expected, actual_err_count, expected_err_count) -> 360 - Printf.printf "\n--- %s test %d (%s) in %s ---\n" filename test_num state desc; 361 - Printf.printf "Expected tokens: [%s]\n" (String.concat "; " (List.map json_to_string expected)); 362 - Printf.printf "Actual tokens: [%s]\n" (String.concat "; " (List.map json_to_string actual)); 363 - Printf.printf "Expected %d errors, got %d\n" expected_err_count actual_err_count 364 - ) failures 365 - ) (List.rev !all_failures) 366 - end; 386 + (* Generate HTML report *) 387 + let report : Report.report = { 388 + title = "HTML5 Tokenizer Tests"; 389 + test_type = "tokenizer"; 390 + description = "These tests validate the HTML5 tokenization algorithm as specified in the WHATWG HTML Standard. \ 391 + The tokenizer converts HTML input into a stream of tokens (DOCTYPE, start tags, end tags, comments, \ 392 + character data, and EOF). Each test specifies input HTML, expected tokens in JSON array format, \ 393 + and the initial tokenizer state. Tests cover normal parsing, RCDATA/RAWTEXT/PLAINTEXT states, \ 394 + script data parsing, CDATA sections, and various error conditions. Some tests are double-escaped \ 395 + to represent special characters. XML violation tests check behavior differences from XML mode."; 396 + files = List.rev !file_results; 397 + total_passed = !total_passed; 398 + total_failed = !total_failed; 399 + } in 400 + Report.generate_report report "test_tokenizer_report.html"; 367 401 368 402 exit (if !total_failed > 0 then 1 else 0)