OCaml HTML5 parser/serialiser based on Python's JustHTML

Add browser-based regression test runner

Add JS/WASM compilation rules for running html5lib conformance tests
in the browser:

- lib/js/htmlrw_js_tests.ml: Browser-compatible test runner that can
run tree-construction and encoding detection tests
- lib/js/dune: Updated with test runner executable for js and wasm modes
- test-regression.html: Interactive test page that loads test data files
and runs the full regression suite with progress and filtering

The test runner exposes a JavaScript API (html5rwTests) that can:
- Run individual test files
- Run all tests from a file list
- Quick parse test for simple validation

Build with: opam exec -- dune build lib/js/htmlrw-tests.js

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

+20
lib/js/dune
··· 37 (modes js wasm) 38 (modules htmlrw_js_worker)) 39 40 ; Copy to nice filenames (JS) 41 (rule 42 (targets htmlrw.js) ··· 48 (deps htmlrw_js_worker.bc.js) 49 (action (copy %{deps} %{targets}))) 50 51 ; Copy to nice filenames (WASM) 52 ; Note: requires wasm_of_ocaml-compiler to be installed 53 (rule ··· 59 (targets htmlrw-worker.wasm.js) 60 (deps htmlrw_js_worker.bc.wasm.js) 61 (action (copy %{deps} %{targets})))
··· 37 (modes js wasm) 38 (modules htmlrw_js_worker)) 39 40 + ; Test runner for browser-based regression testing 41 + ; Runs html5lib conformance tests in the browser 42 + (executable 43 + (name htmlrw_js_tests_main) 44 + (libraries html5rw bytesrw brr) 45 + (js_of_ocaml 46 + (javascript_files)) 47 + (modes js wasm) 48 + (modules htmlrw_js_tests htmlrw_js_tests_main)) 49 + 50 ; Copy to nice filenames (JS) 51 (rule 52 (targets htmlrw.js) ··· 58 (deps htmlrw_js_worker.bc.js) 59 (action (copy %{deps} %{targets}))) 60 61 + (rule 62 + (targets htmlrw-tests.js) 63 + (deps htmlrw_js_tests_main.bc.js) 64 + (action (copy %{deps} %{targets}))) 65 + 66 ; Copy to nice filenames (WASM) 67 ; Note: requires wasm_of_ocaml-compiler to be installed 68 (rule ··· 74 (targets htmlrw-worker.wasm.js) 75 (deps htmlrw_js_worker.bc.wasm.js) 76 (action (copy %{deps} %{targets}))) 77 + 78 + (rule 79 + (targets htmlrw-tests.wasm.js) 80 + (deps htmlrw_js_tests_main.bc.wasm.js) 81 + (action (copy %{deps} %{targets})))
+407
lib/js/htmlrw_js_tests.ml
···
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 + SPDX-License-Identifier: ISC 4 + ---------------------------------------------------------------------------*) 5 + 6 + (** Browser-compatible test runner for html5rw regression tests. 7 + 8 + This module provides functions to run html5lib conformance tests 9 + in the browser, receiving test data as strings and returning results 10 + as JavaScript-accessible objects. *) 11 + 12 + [@@@warning "-69-33"] (* Silence unused-field and unused-open warnings *) 13 + 14 + open Brr 15 + 16 + (* ============================================================ *) 17 + (* Test Result Types *) 18 + (* ============================================================ *) 19 + 20 + type test_result = { 21 + test_num : int; 22 + description : string; 23 + input : string; 24 + expected : string; 25 + actual : string; 26 + success : bool; 27 + } 28 + 29 + type file_result = { 30 + filename : string; 31 + test_type : string; 32 + passed_count : int; 33 + failed_count : int; 34 + tests : test_result list; 35 + } 36 + 37 + type suite_result = { 38 + name : string; 39 + total_passed : int; 40 + total_failed : int; 41 + files : file_result list; 42 + } 43 + 44 + (* ============================================================ *) 45 + (* Tree Construction Tests *) 46 + (* ============================================================ *) 47 + 48 + module TreeConstruction = struct 49 + type test_case = { 50 + input : string; 51 + expected_tree : string; 52 + expected_errors : string list; 53 + script_on : bool; 54 + fragment_context : string option; 55 + } 56 + 57 + let parse_test_case lines = 58 + let rec parse acc = function 59 + | [] -> acc 60 + | line :: rest when String.length line > 0 && line.[0] = '#' -> 61 + let section = String.trim line in 62 + let content, remaining = collect_section rest in 63 + parse ((section, content) :: acc) remaining 64 + | _ :: rest -> parse acc rest 65 + and collect_section lines = 66 + let rec loop acc = function 67 + | [] -> (List.rev acc, []) 68 + | line :: rest when String.length line > 0 && line.[0] = '#' -> 69 + (List.rev acc, line :: rest) 70 + | line :: rest -> loop (line :: acc) rest 71 + in 72 + loop [] lines 73 + in 74 + let sections = parse [] lines in 75 + let get_section name = 76 + match List.assoc_opt name sections with 77 + | Some lines -> String.concat "\n" lines 78 + | None -> "" 79 + in 80 + let data = get_section "#data" in 81 + let document = get_section "#document" in 82 + let errors_text = get_section "#errors" in 83 + let errors = 84 + String.split_on_char '\n' errors_text 85 + |> List.filter (fun s -> String.trim s <> "") 86 + in 87 + let script_on = List.mem_assoc "#script-on" sections in 88 + let fragment = 89 + if List.mem_assoc "#document-fragment" sections then 90 + Some (get_section "#document-fragment" |> String.trim) 91 + else None 92 + in 93 + { input = data; expected_tree = document; expected_errors = errors; 94 + script_on; fragment_context = fragment } 95 + 96 + let parse_dat_content content = 97 + let lines = String.split_on_char '\n' content in 98 + let rec split_tests current acc = function 99 + | [] -> 100 + if current = [] then List.rev acc 101 + else List.rev (List.rev current :: acc) 102 + | "" :: "#data" :: rest -> 103 + let new_acc = if current = [] then acc else (List.rev current :: acc) in 104 + split_tests ["#data"] new_acc rest 105 + | line :: rest -> 106 + split_tests (line :: current) acc rest 107 + in 108 + let test_groups = split_tests [] [] lines in 109 + List.filter_map (fun lines -> 110 + if List.exists (fun l -> l = "#data") lines then 111 + Some (parse_test_case lines) 112 + else None 113 + ) test_groups 114 + 115 + let strip_tree_prefix s = 116 + let lines = String.split_on_char '\n' s in 117 + let stripped = List.filter_map (fun line -> 118 + if String.length line >= 2 && String.sub line 0 2 = "| " then 119 + Some (String.sub line 2 (String.length line - 2)) 120 + else if String.trim line = "" then None 121 + else Some line 122 + ) lines in 123 + String.concat "\n" stripped 124 + 125 + let normalize_tree s = 126 + let lines = String.split_on_char '\n' s in 127 + let non_empty = List.filter (fun l -> String.trim l <> "") lines in 128 + String.concat "\n" non_empty 129 + 130 + let run_test test = 131 + try 132 + let result = 133 + match test.fragment_context with 134 + | Some ctx_str -> 135 + let (namespace, tag_name) = 136 + match String.split_on_char ' ' ctx_str with 137 + | [ns; tag] when ns = "svg" -> (Some "svg", tag) 138 + | [ns; tag] when ns = "math" -> (Some "mathml", tag) 139 + | [tag] -> (None, tag) 140 + | _ -> (None, ctx_str) 141 + in 142 + let context = Html5rw.Parser.make_fragment_context ~tag_name ~namespace () in 143 + let reader = Bytesrw.Bytes.Reader.of_string test.input in 144 + Html5rw.Parser.parse ~collect_errors:true ~fragment_context:context reader 145 + | None -> 146 + let reader = Bytesrw.Bytes.Reader.of_string test.input in 147 + Html5rw.Parser.parse ~collect_errors:true reader 148 + in 149 + let actual_tree = Html5rw.Dom.to_test_format (Html5rw.Parser.root result) in 150 + let expected = normalize_tree (strip_tree_prefix test.expected_tree) in 151 + let actual = normalize_tree (strip_tree_prefix actual_tree) in 152 + (expected = actual, expected, actual) 153 + with e -> 154 + let expected = normalize_tree (strip_tree_prefix test.expected_tree) in 155 + (false, expected, Printf.sprintf "EXCEPTION: %s" (Printexc.to_string e)) 156 + 157 + let run_content ~filename content = 158 + let tests = parse_dat_content content in 159 + let passed = ref 0 in 160 + let failed = ref 0 in 161 + let results = ref [] in 162 + List.iteri (fun i test -> 163 + if test.script_on then () 164 + else begin 165 + let (success, expected, actual) = run_test test in 166 + let description = 167 + let input_preview = String.sub test.input 0 (min 60 (String.length test.input)) in 168 + if test.fragment_context <> None then 169 + Printf.sprintf "Fragment (%s): %s" (Option.get test.fragment_context) input_preview 170 + else input_preview 171 + in 172 + let result = { 173 + test_num = i + 1; description; input = test.input; expected; actual; success; 174 + } in 175 + results := result :: !results; 176 + if success then incr passed else incr failed 177 + end 178 + ) tests; 179 + { 180 + filename; test_type = "Tree Construction"; 181 + passed_count = !passed; failed_count = !failed; 182 + tests = List.rev !results; 183 + } 184 + end 185 + 186 + (* ============================================================ *) 187 + (* Encoding Tests *) 188 + (* ============================================================ *) 189 + 190 + module EncodingTests = struct 191 + type test_case = { 192 + input : string; 193 + expected_encoding : string; 194 + } 195 + 196 + let normalize_encoding_name s = String.lowercase_ascii (String.trim s) 197 + 198 + let encoding_to_test_name = function 199 + | Html5rw.Encoding.Utf8 -> "utf-8" 200 + | Html5rw.Encoding.Utf16le -> "utf-16le" 201 + | Html5rw.Encoding.Utf16be -> "utf-16be" 202 + | Html5rw.Encoding.Windows_1252 -> "windows-1252" 203 + | Html5rw.Encoding.Iso_8859_2 -> "iso-8859-2" 204 + | Html5rw.Encoding.Euc_jp -> "euc-jp" 205 + 206 + let parse_test_case lines = 207 + let rec parse acc = function 208 + | [] -> acc 209 + | line :: rest when String.length line > 0 && line.[0] = '#' -> 210 + let section = String.trim line in 211 + let content, remaining = collect_section rest in 212 + parse ((section, content) :: acc) remaining 213 + | _ :: rest -> parse acc rest 214 + and collect_section lines = 215 + let rec loop acc = function 216 + | [] -> (List.rev acc, []) 217 + | line :: rest when String.length line > 0 && line.[0] = '#' -> 218 + (List.rev acc, line :: rest) 219 + | line :: rest -> loop (line :: acc) rest 220 + in loop [] lines 221 + in 222 + let sections = parse [] lines in 223 + let get_section name = 224 + match List.assoc_opt name sections with 225 + | Some lines -> String.concat "\n" lines | None -> "" 226 + in 227 + let data = get_section "#data" in 228 + let encoding = get_section "#encoding" in 229 + { input = data; expected_encoding = String.trim encoding } 230 + 231 + let parse_dat_content content = 232 + let lines = String.split_on_char '\n' content in 233 + let rec split_tests current acc = function 234 + | [] -> if current = [] then List.rev acc else List.rev (List.rev current :: acc) 235 + | "" :: "#data" :: rest -> 236 + let new_acc = if current = [] then acc else (List.rev current :: acc) in 237 + split_tests ["#data"] new_acc rest 238 + | line :: rest -> split_tests (line :: current) acc rest 239 + in 240 + let test_groups = split_tests [] [] lines in 241 + List.filter_map (fun lines -> 242 + if List.exists (fun l -> l = "#data") lines then Some (parse_test_case lines) 243 + else None 244 + ) test_groups 245 + 246 + let run_test test = 247 + try 248 + let (_, detected_encoding) = Html5rw.Encoding.decode (Bytes.of_string test.input) () in 249 + let detected_name = encoding_to_test_name detected_encoding in 250 + let expected_name = normalize_encoding_name test.expected_encoding in 251 + let match_encoding det exp = 252 + det = exp || 253 + (det = "windows-1252" && (exp = "windows-1252" || exp = "cp1252" || exp = "iso-8859-1")) || 254 + (det = "iso-8859-2" && (exp = "iso-8859-2" || exp = "iso8859-2" || exp = "latin2")) || 255 + (det = "utf-8" && (exp = "utf-8" || exp = "utf8")) || 256 + (det = "euc-jp" && (exp = "euc-jp" || exp = "eucjp")) 257 + in 258 + (match_encoding detected_name expected_name, detected_name, expected_name) 259 + with e -> 260 + (false, Printf.sprintf "EXCEPTION: %s" (Printexc.to_string e), test.expected_encoding) 261 + 262 + let run_content ~filename content = 263 + let tests = parse_dat_content content in 264 + let passed = ref 0 in 265 + let failed = ref 0 in 266 + let results = ref [] in 267 + List.iteri (fun i test -> 268 + if String.trim test.expected_encoding = "" then () 269 + else begin 270 + let (success, detected, expected) = run_test test in 271 + let result = { 272 + test_num = i + 1; 273 + description = Printf.sprintf "Detect %s encoding" expected; 274 + input = String.escaped test.input; 275 + expected; actual = detected; success; 276 + } in 277 + results := result :: !results; 278 + if success then incr passed else incr failed 279 + end 280 + ) tests; 281 + { 282 + filename; test_type = "Encoding Detection"; 283 + passed_count = !passed; failed_count = !failed; 284 + tests = List.rev !results; 285 + } 286 + end 287 + 288 + (* ============================================================ *) 289 + (* JavaScript API *) 290 + (* ============================================================ *) 291 + 292 + let test_result_to_jv (r : test_result) = 293 + Jv.obj [| 294 + "testNum", Jv.of_int r.test_num; 295 + "description", Jv.of_string r.description; 296 + "input", Jv.of_string r.input; 297 + "expected", Jv.of_string r.expected; 298 + "actual", Jv.of_string r.actual; 299 + "success", Jv.of_bool r.success; 300 + |] 301 + 302 + let file_result_to_jv (r : file_result) = 303 + Jv.obj [| 304 + "filename", Jv.of_string r.filename; 305 + "testType", Jv.of_string r.test_type; 306 + "passedCount", Jv.of_int r.passed_count; 307 + "failedCount", Jv.of_int r.failed_count; 308 + "tests", Jv.of_list test_result_to_jv r.tests; 309 + |] 310 + 311 + let suite_result_to_jv (r : suite_result) = 312 + Jv.obj [| 313 + "name", Jv.of_string r.name; 314 + "totalPassed", Jv.of_int r.total_passed; 315 + "totalFailed", Jv.of_int r.total_failed; 316 + "files", Jv.of_list file_result_to_jv r.files; 317 + |] 318 + 319 + (** Run tree construction tests on a single file's content *) 320 + let run_tree_construction_test filename content = 321 + let result = TreeConstruction.run_content ~filename content in 322 + file_result_to_jv result 323 + 324 + (** Run encoding detection tests on a single file's content *) 325 + let run_encoding_test filename content = 326 + let result = EncodingTests.run_content ~filename content in 327 + file_result_to_jv result 328 + 329 + (** Run all tests from provided test data *) 330 + let run_all_tests (test_files : (string * string * string) list) = 331 + let tree_files = ref [] in 332 + let encoding_files = ref [] in 333 + let total_passed = ref 0 in 334 + let total_failed = ref 0 in 335 + 336 + List.iter (fun (test_type, filename, content) -> 337 + let result = match test_type with 338 + | "tree-construction" -> 339 + let r = TreeConstruction.run_content ~filename content in 340 + tree_files := r :: !tree_files; 341 + r 342 + | "encoding" -> 343 + let r = EncodingTests.run_content ~filename content in 344 + encoding_files := r :: !encoding_files; 345 + r 346 + | _ -> failwith ("Unknown test type: " ^ test_type) 347 + in 348 + total_passed := !total_passed + result.passed_count; 349 + total_failed := !total_failed + result.failed_count 350 + ) test_files; 351 + 352 + let all_files = List.rev !tree_files @ List.rev !encoding_files in 353 + let suite = { 354 + name = "HTML5lib Regression Tests"; 355 + total_passed = !total_passed; 356 + total_failed = !total_failed; 357 + files = all_files; 358 + } in 359 + suite_result_to_jv suite 360 + 361 + (* ============================================================ *) 362 + (* Simple Parser Test for Quick Validation *) 363 + (* ============================================================ *) 364 + 365 + let quick_parse_test html = 366 + try 367 + let reader = Bytesrw.Bytes.Reader.of_string html in 368 + let result = Html5rw.Parser.parse ~collect_errors:true reader in 369 + let root = Html5rw.Parser.root result in 370 + let serialized = Html5rw.Dom.to_html root in 371 + let errors = Html5rw.Parser.errors result in 372 + let error_to_string e = Format.asprintf "%a" Html5rw.pp_parse_error e in 373 + Jv.obj [| 374 + "success", Jv.of_bool true; 375 + "html", Jv.of_string serialized; 376 + "errorCount", Jv.of_int (List.length errors); 377 + "errors", Jv.of_list (fun e -> Jv.of_string (error_to_string e)) errors; 378 + |] 379 + with e -> 380 + Jv.obj [| 381 + "success", Jv.of_bool false; 382 + "error", Jv.of_string (Printexc.to_string e); 383 + |] 384 + 385 + (* ============================================================ *) 386 + (* Export to JavaScript *) 387 + (* ============================================================ *) 388 + 389 + let () = 390 + let html5rw_tests = Jv.obj [| 391 + "runTreeConstructionTest", Jv.callback ~arity:2 (fun filename content -> 392 + run_tree_construction_test (Jv.to_string filename) (Jv.to_string content)); 393 + "runEncodingTest", Jv.callback ~arity:2 (fun filename content -> 394 + run_encoding_test (Jv.to_string filename) (Jv.to_string content)); 395 + "runAllTests", Jv.callback ~arity:1 (fun files_jv -> 396 + let files = Jv.to_list (fun item -> 397 + let test_type = Jv.to_string (Jv.get item "type") in 398 + let filename = Jv.to_string (Jv.get item "filename") in 399 + let content = Jv.to_string (Jv.get item "content") in 400 + (test_type, filename, content) 401 + ) files_jv in 402 + run_all_tests files); 403 + "quickParseTest", Jv.callback ~arity:1 (fun html -> 404 + quick_parse_test (Jv.to_string html)); 405 + "version", Jv.of_string "1.0.0"; 406 + |] in 407 + Jv.set Jv.global "html5rwTests" html5rw_tests
+25
lib/js/htmlrw_js_tests.mli
···
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 + SPDX-License-Identifier: ISC 4 + ---------------------------------------------------------------------------*) 5 + 6 + (** Browser-compatible test runner for html5rw regression tests. 7 + 8 + This module provides functions to run html5lib conformance tests 9 + in the browser. Test data is provided as strings and results are 10 + returned as JavaScript-accessible objects. 11 + 12 + {2 JavaScript API} 13 + 14 + The following functions are exposed to JavaScript via the global 15 + [html5rwTests] object: 16 + 17 + - [html5rwTests.runTreeConstructionTest(filename, content)] - Run tree 18 + construction tests from a .dat file content 19 + - [html5rwTests.runEncodingTest(filename, content)] - Run encoding 20 + detection tests from a .dat file content 21 + - [html5rwTests.runAllTests(files)] - Run all tests from an array of 22 + file objects with {type, filename, content} 23 + - [html5rwTests.quickParseTest(html)] - Quick parse test for a single 24 + HTML string 25 + - [html5rwTests.version] - Version string *)
+10
lib/js/htmlrw_js_tests_main.ml
···
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 + SPDX-License-Identifier: ISC 4 + ---------------------------------------------------------------------------*) 5 + 6 + (** Entry point for the browser test runner. 7 + 8 + The test runner module registers its JavaScript exports when loaded. *) 9 + 10 + (* Nothing needed here - the Htmlrw_js_tests module registers exports at load time *)
+6
lib/js/htmlrw_js_tests_main.mli
···
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 + SPDX-License-Identifier: ISC 4 + ---------------------------------------------------------------------------*) 5 + 6 + (** Entry point for the browser test runner. *)
+668
test-regression.html
···
··· 1 + <!DOCTYPE html> 2 + <html lang="en"> 3 + <head> 4 + <meta charset="UTF-8"> 5 + <meta name="viewport" content="width=device-width, initial-scale=1.0"> 6 + <title>HTML5rw Regression Test Suite</title> 7 + <style> 8 + :root { 9 + --bg-primary: #1a1a2e; 10 + --bg-secondary: #16213e; 11 + --bg-tertiary: #0f3460; 12 + --text-primary: #eee; 13 + --text-secondary: #aaa; 14 + --text-muted: #666; 15 + --accent: #e94560; 16 + --accent-light: #ff6b8a; 17 + --success: #4ade80; 18 + --success-dim: rgba(74, 222, 128, 0.2); 19 + --failure: #f87171; 20 + --failure-dim: rgba(248, 113, 113, 0.2); 21 + --warning: #fbbf24; 22 + --info: #60a5fa; 23 + --border: #333; 24 + --code-bg: #0d1117; 25 + } 26 + 27 + * { box-sizing: border-box; margin: 0; padding: 0; } 28 + 29 + body { 30 + font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif; 31 + background: var(--bg-primary); 32 + color: var(--text-primary); 33 + line-height: 1.6; 34 + padding: 20px; 35 + } 36 + 37 + .container { max-width: 1400px; margin: 0 auto; } 38 + 39 + .hero { 40 + background: linear-gradient(135deg, var(--bg-secondary) 0%, var(--bg-tertiary) 100%); 41 + padding: 30px 40px; 42 + border-radius: 12px; 43 + margin-bottom: 30px; 44 + border: 1px solid var(--border); 45 + } 46 + 47 + .hero h1 { 48 + font-size: 2rem; 49 + margin-bottom: 10px; 50 + color: var(--accent); 51 + } 52 + 53 + .hero p { color: var(--text-secondary); margin-bottom: 15px; } 54 + 55 + .controls { 56 + display: flex; 57 + gap: 12px; 58 + flex-wrap: wrap; 59 + align-items: center; 60 + } 61 + 62 + button { 63 + padding: 12px 24px; 64 + border: none; 65 + border-radius: 8px; 66 + background: var(--accent); 67 + color: white; 68 + cursor: pointer; 69 + font-size: 14px; 70 + font-weight: 600; 71 + transition: all 0.2s; 72 + } 73 + 74 + button:hover { background: var(--accent-light); transform: translateY(-1px); } 75 + button:disabled { opacity: 0.5; cursor: not-allowed; transform: none; } 76 + button.secondary { background: var(--bg-tertiary); border: 1px solid var(--border); } 77 + button.secondary:hover { background: var(--bg-secondary); } 78 + 79 + select { 80 + padding: 12px 16px; 81 + border: 1px solid var(--border); 82 + border-radius: 8px; 83 + background: var(--bg-secondary); 84 + color: var(--text-primary); 85 + font-size: 14px; 86 + } 87 + 88 + .summary-grid { 89 + display: grid; 90 + grid-template-columns: repeat(auto-fit, minmax(180px, 1fr)); 91 + gap: 20px; 92 + margin-bottom: 30px; 93 + } 94 + 95 + .summary-card { 96 + background: var(--bg-secondary); 97 + border-radius: 12px; 98 + padding: 20px; 99 + border: 1px solid var(--border); 100 + text-align: center; 101 + } 102 + 103 + .summary-card h3 { 104 + font-size: 0.8rem; 105 + text-transform: uppercase; 106 + letter-spacing: 1px; 107 + color: var(--text-secondary); 108 + margin-bottom: 8px; 109 + } 110 + 111 + .summary-card .value { 112 + font-size: 2rem; 113 + font-weight: 700; 114 + } 115 + 116 + .summary-card .value.success { color: var(--success); } 117 + .summary-card .value.failure { color: var(--failure); } 118 + .summary-card .value.neutral { color: var(--text-primary); } 119 + 120 + .progress-container { 121 + background: var(--bg-secondary); 122 + border-radius: 12px; 123 + padding: 20px; 124 + margin-bottom: 30px; 125 + border: 1px solid var(--border); 126 + } 127 + 128 + .progress-bar { 129 + height: 24px; 130 + background: var(--failure-dim); 131 + border-radius: 12px; 132 + overflow: hidden; 133 + margin-top: 10px; 134 + } 135 + 136 + .progress-fill { 137 + height: 100%; 138 + background: var(--success); 139 + border-radius: 12px; 140 + transition: width 0.3s ease; 141 + display: flex; 142 + align-items: center; 143 + justify-content: center; 144 + font-size: 12px; 145 + font-weight: 600; 146 + } 147 + 148 + .status-text { 149 + font-size: 14px; 150 + color: var(--text-secondary); 151 + margin-bottom: 8px; 152 + } 153 + 154 + .results-section { 155 + background: var(--bg-secondary); 156 + border-radius: 12px; 157 + margin-bottom: 20px; 158 + border: 1px solid var(--border); 159 + overflow: hidden; 160 + } 161 + 162 + .results-header { 163 + padding: 16px 20px; 164 + background: var(--bg-tertiary); 165 + cursor: pointer; 166 + display: flex; 167 + justify-content: space-between; 168 + align-items: center; 169 + } 170 + 171 + .results-header:hover { background: #1a4a7a; } 172 + 173 + .results-header h2 { 174 + font-size: 1rem; 175 + display: flex; 176 + align-items: center; 177 + gap: 10px; 178 + } 179 + 180 + .results-header .toggle { color: var(--text-secondary); transition: transform 0.2s; } 181 + .results-header.collapsed .toggle { transform: rotate(-90deg); } 182 + 183 + .results-stats { 184 + display: flex; 185 + gap: 15px; 186 + font-size: 14px; 187 + } 188 + 189 + .results-stats .passed { color: var(--success); } 190 + .results-stats .failed { color: var(--failure); } 191 + 192 + .results-content { padding: 15px; } 193 + .results-content.hidden { display: none; } 194 + 195 + .test-item { 196 + margin: 6px 0; 197 + border: 1px solid var(--border); 198 + border-radius: 6px; 199 + overflow: hidden; 200 + } 201 + 202 + .test-header { 203 + padding: 10px 14px; 204 + cursor: pointer; 205 + display: flex; 206 + justify-content: space-between; 207 + align-items: center; 208 + background: var(--bg-primary); 209 + font-size: 13px; 210 + } 211 + 212 + .test-header:hover { background: rgba(255,255,255,0.03); } 213 + 214 + .test-header .status { 215 + width: 8px; 216 + height: 8px; 217 + border-radius: 50%; 218 + margin-right: 10px; 219 + flex-shrink: 0; 220 + } 221 + 222 + .test-header .status.passed { background: var(--success); } 223 + .test-header .status.failed { background: var(--failure); } 224 + 225 + .test-header .test-info { flex: 1; display: flex; align-items: center; min-width: 0; } 226 + .test-header .test-num { font-weight: 600; margin-right: 10px; color: var(--text-muted); } 227 + .test-header .test-desc { white-space: nowrap; overflow: hidden; text-overflow: ellipsis; } 228 + .test-header .expand-icon { color: var(--text-muted); font-size: 0.75rem; } 229 + 230 + .test-details { 231 + padding: 15px; 232 + background: var(--code-bg); 233 + border-top: 1px solid var(--border); 234 + display: none; 235 + font-size: 13px; 236 + } 237 + 238 + .test-details.visible { display: block; } 239 + 240 + .detail-section { margin-bottom: 15px; } 241 + .detail-section:last-child { margin-bottom: 0; } 242 + 243 + .detail-section h4 { 244 + font-size: 11px; 245 + text-transform: uppercase; 246 + letter-spacing: 1px; 247 + color: var(--text-muted); 248 + margin-bottom: 8px; 249 + } 250 + 251 + .detail-section pre { 252 + background: var(--bg-secondary); 253 + padding: 12px; 254 + border-radius: 6px; 255 + overflow-x: auto; 256 + font-family: 'Monaco', 'Menlo', monospace; 257 + font-size: 12px; 258 + white-space: pre-wrap; 259 + word-break: break-word; 260 + max-height: 300px; 261 + overflow-y: auto; 262 + border: 1px solid var(--border); 263 + } 264 + 265 + .detail-row { 266 + display: grid; 267 + grid-template-columns: 1fr 1fr; 268 + gap: 15px; 269 + } 270 + 271 + .filter-controls { 272 + display: flex; 273 + gap: 10px; 274 + margin-bottom: 20px; 275 + flex-wrap: wrap; 276 + } 277 + 278 + .filter-controls input[type="search"] { 279 + padding: 10px 14px; 280 + border: 1px solid var(--border); 281 + border-radius: 8px; 282 + background: var(--bg-secondary); 283 + color: var(--text-primary); 284 + font-size: 14px; 285 + width: 250px; 286 + } 287 + 288 + .log-output { 289 + background: var(--code-bg); 290 + border: 1px solid var(--border); 291 + border-radius: 8px; 292 + padding: 15px; 293 + font-family: 'Monaco', 'Menlo', monospace; 294 + font-size: 12px; 295 + max-height: 200px; 296 + overflow-y: auto; 297 + white-space: pre-wrap; 298 + margin-bottom: 20px; 299 + } 300 + 301 + @media (max-width: 768px) { 302 + .detail-row { grid-template-columns: 1fr; } 303 + .summary-grid { grid-template-columns: 1fr 1fr; } 304 + } 305 + </style> 306 + </head> 307 + <body> 308 + <div class="container"> 309 + <div class="hero"> 310 + <h1>HTML5rw Regression Test Suite</h1> 311 + <p> 312 + Browser-based regression testing for the HTML5rw OCaml parser. 313 + Tests are loaded from the html5lib-tests conformance suite. 314 + </p> 315 + <div class="controls"> 316 + <button id="run-all" onclick="runAllTests()">Run All Tests</button> 317 + <button id="run-tree" class="secondary" onclick="runTreeTests()">Tree Construction Only</button> 318 + <button id="run-encoding" class="secondary" onclick="runEncodingTests()">Encoding Only</button> 319 + <select id="mode-select"> 320 + <option value="js">JavaScript (js_of_ocaml)</option> 321 + <option value="wasm">WebAssembly (wasm_of_ocaml)</option> 322 + </select> 323 + </div> 324 + </div> 325 + 326 + <div class="summary-grid" id="summary" style="display: none;"> 327 + <div class="summary-card"> 328 + <h3>Total Tests</h3> 329 + <div class="value neutral" id="total-count">0</div> 330 + </div> 331 + <div class="summary-card"> 332 + <h3>Passed</h3> 333 + <div class="value success" id="passed-count">0</div> 334 + </div> 335 + <div class="summary-card"> 336 + <h3>Failed</h3> 337 + <div class="value failure" id="failed-count">0</div> 338 + </div> 339 + <div class="summary-card"> 340 + <h3>Pass Rate</h3> 341 + <div class="value" id="pass-rate">0%</div> 342 + </div> 343 + </div> 344 + 345 + <div class="progress-container" id="progress-container"> 346 + <div class="status-text" id="status-text">Ready to run tests. Click a button above to start.</div> 347 + <div class="progress-bar"> 348 + <div class="progress-fill" id="progress-fill" style="width: 0%"></div> 349 + </div> 350 + </div> 351 + 352 + <div class="log-output" id="log-output">Waiting for tests to start...</div> 353 + 354 + <div class="filter-controls" id="filter-controls" style="display: none;"> 355 + <input type="search" id="search" placeholder="Search tests..."> 356 + <select id="filter"> 357 + <option value="all">All Tests</option> 358 + <option value="passed">Passed Only</option> 359 + <option value="failed">Failed Only</option> 360 + </select> 361 + <button class="secondary" onclick="expandAll()">Expand All</button> 362 + <button class="secondary" onclick="collapseAll()">Collapse All</button> 363 + </div> 364 + 365 + <div id="results-container"></div> 366 + </div> 367 + 368 + <script> 369 + // Test file lists 370 + const TREE_CONSTRUCTION_FILES = [ 371 + "adoption01.dat", "adoption02.dat", "blocks.dat", "comments01.dat", 372 + "doctype01.dat", "domjs-unsafe.dat", "entities01.dat", "entities02.dat", 373 + "foreign-fragment.dat", "html5test-com.dat", "inbody01.dat", "isindex.dat", 374 + "main-element.dat", "math.dat", "menuitem-element.dat", "namespace-sensitivity.dat", 375 + "noscript01.dat", "pending-spec-changes-plain-text-unsafe.dat", 376 + "pending-spec-changes.dat", "plain-text-unsafe.dat", "quirks01.dat", "ruby.dat", 377 + "scriptdata01.dat", "search-element.dat", "svg.dat", "tables01.dat", 378 + "template.dat", "tests_innerHTML_1.dat", "tests1.dat", "tests10.dat", 379 + "tests11.dat", "tests12.dat", "tests14.dat", "tests15.dat", "tests16.dat", 380 + "tests17.dat", "tests18.dat", "tests19.dat", "tests2.dat", "tests20.dat", 381 + "tests21.dat", "tests22.dat", "tests23.dat", "tests24.dat", "tests25.dat", 382 + "tests26.dat", "tests3.dat", "tests4.dat", "tests5.dat", "tests6.dat", 383 + "tests7.dat", "tests8.dat", "tests9.dat", "tricky01.dat", "webkit01.dat", 384 + "webkit02.dat" 385 + ]; 386 + 387 + const ENCODING_FILES = [ 388 + "test-yahoo-jp.dat", "tests1.dat", "tests2.dat" 389 + ]; 390 + 391 + let testRunner = null; 392 + let isRunning = false; 393 + 394 + function log(msg) { 395 + const output = document.getElementById('log-output'); 396 + output.textContent += msg + '\n'; 397 + output.scrollTop = output.scrollHeight; 398 + } 399 + 400 + function clearLog() { 401 + document.getElementById('log-output').textContent = ''; 402 + } 403 + 404 + function updateProgress(current, total, msg) { 405 + const pct = total > 0 ? (current / total * 100) : 0; 406 + document.getElementById('progress-fill').style.width = pct + '%'; 407 + document.getElementById('status-text').textContent = msg || `Running: ${current}/${total}`; 408 + } 409 + 410 + function updateSummary(passed, failed) { 411 + const total = passed + failed; 412 + const rate = total > 0 ? (passed / total * 100).toFixed(1) : 0; 413 + document.getElementById('total-count').textContent = total; 414 + document.getElementById('passed-count').textContent = passed; 415 + document.getElementById('failed-count').textContent = failed; 416 + document.getElementById('pass-rate').textContent = rate + '%'; 417 + document.getElementById('pass-rate').className = 'value ' + (rate >= 99 ? 'success' : rate >= 90 ? 'neutral' : 'failure'); 418 + document.getElementById('summary').style.display = 'grid'; 419 + } 420 + 421 + async function loadTestRunner() { 422 + const mode = document.getElementById('mode-select').value; 423 + const scriptName = mode === 'wasm' ? 'htmlrw-tests.wasm.js' : 'htmlrw-tests.js'; 424 + 425 + if (typeof html5rwTests !== 'undefined') { 426 + return true; 427 + } 428 + 429 + log(`Loading ${scriptName}...`); 430 + try { 431 + await new Promise((resolve, reject) => { 432 + const script = document.createElement('script'); 433 + script.src = `_build/default/lib/js/${scriptName}`; 434 + script.onload = resolve; 435 + script.onerror = () => reject(new Error(`Failed to load ${scriptName}`)); 436 + document.head.appendChild(script); 437 + }); 438 + 439 + // Wait for initialization 440 + await new Promise(resolve => setTimeout(resolve, 100)); 441 + 442 + if (typeof html5rwTests === 'undefined') { 443 + throw new Error('Test runner not initialized'); 444 + } 445 + 446 + log(`Test runner loaded (version ${html5rwTests.version})`); 447 + return true; 448 + } catch (e) { 449 + log(`ERROR: ${e.message}`); 450 + log('Make sure to run: opam exec -- dune build lib/js/htmlrw-tests.js'); 451 + return false; 452 + } 453 + } 454 + 455 + async function fetchTestFile(type, filename) { 456 + const basePath = type === 'tree-construction' 457 + ? 'html5lib-tests/tree-construction/' 458 + : 'html5lib-tests/encoding/'; 459 + const url = basePath + filename; 460 + const response = await fetch(url); 461 + if (!response.ok) { 462 + throw new Error(`Failed to fetch ${url}: ${response.status}`); 463 + } 464 + return await response.text(); 465 + } 466 + 467 + function renderFileResult(result) { 468 + const section = document.createElement('div'); 469 + section.className = 'results-section'; 470 + section.dataset.filename = result.filename; 471 + 472 + const collapsed = result.failedCount === 0 ? 'collapsed' : ''; 473 + const hidden = result.failedCount === 0 ? 'hidden' : ''; 474 + 475 + section.innerHTML = ` 476 + <div class="results-header ${collapsed}"> 477 + <h2><span class="toggle">▼</span> ${escapeHtml(result.filename)}</h2> 478 + <div class="results-stats"> 479 + <span class="passed">✓ ${result.passedCount}</span> 480 + <span class="failed">✗ ${result.failedCount}</span> 481 + </div> 482 + </div> 483 + <div class="results-content ${hidden}"> 484 + ${result.tests.map(renderTestResult).join('')} 485 + </div> 486 + `; 487 + 488 + // Add toggle handler 489 + section.querySelector('.results-header').addEventListener('click', function() { 490 + this.classList.toggle('collapsed'); 491 + this.nextElementSibling.classList.toggle('hidden'); 492 + }); 493 + 494 + // Add test detail handlers 495 + section.querySelectorAll('.test-header').forEach(header => { 496 + header.addEventListener('click', function(e) { 497 + e.stopPropagation(); 498 + const details = this.nextElementSibling; 499 + details.classList.toggle('visible'); 500 + const icon = this.querySelector('.expand-icon'); 501 + icon.textContent = details.classList.contains('visible') ? '▲' : '▼'; 502 + }); 503 + }); 504 + 505 + return section; 506 + } 507 + 508 + function renderTestResult(test) { 509 + const statusClass = test.success ? 'passed' : 'failed'; 510 + return ` 511 + <div class="test-item" data-passed="${test.success}"> 512 + <div class="test-header"> 513 + <div class="test-info"> 514 + <span class="status ${statusClass}"></span> 515 + <span class="test-num">#${test.testNum}</span> 516 + <span class="test-desc">${escapeHtml(test.description)}</span> 517 + </div> 518 + <span class="expand-icon">▼</span> 519 + </div> 520 + <div class="test-details"> 521 + <div class="detail-section"> 522 + <h4>Input</h4> 523 + <pre>${escapeHtml(test.input)}</pre> 524 + </div> 525 + <div class="detail-row"> 526 + <div class="detail-section"> 527 + <h4>Expected</h4> 528 + <pre>${escapeHtml(test.expected)}</pre> 529 + </div> 530 + <div class="detail-section"> 531 + <h4>Actual</h4> 532 + <pre>${escapeHtml(test.actual)}</pre> 533 + </div> 534 + </div> 535 + </div> 536 + </div> 537 + `; 538 + } 539 + 540 + function escapeHtml(str) { 541 + const div = document.createElement('div'); 542 + div.textContent = str; 543 + return div.innerHTML; 544 + } 545 + 546 + async function runTests(testType, files, basePath) { 547 + if (isRunning) return; 548 + isRunning = true; 549 + 550 + clearLog(); 551 + document.getElementById('results-container').innerHTML = ''; 552 + document.getElementById('filter-controls').style.display = 'none'; 553 + 554 + const buttons = document.querySelectorAll('button'); 555 + buttons.forEach(b => b.disabled = true); 556 + 557 + try { 558 + if (!await loadTestRunner()) { 559 + return; 560 + } 561 + 562 + log(`Starting ${testType} tests...`); 563 + let totalPassed = 0; 564 + let totalFailed = 0; 565 + const allResults = []; 566 + 567 + for (let i = 0; i < files.length; i++) { 568 + const filename = files[i]; 569 + updateProgress(i, files.length, `Loading ${filename}...`); 570 + 571 + try { 572 + const content = await fetchTestFile(basePath, filename); 573 + log(`Running ${filename}...`); 574 + 575 + let result; 576 + if (basePath === 'tree-construction') { 577 + result = html5rwTests.runTreeConstructionTest(filename, content); 578 + } else { 579 + result = html5rwTests.runEncodingTest(filename, content); 580 + } 581 + 582 + totalPassed += result.passedCount; 583 + totalFailed += result.failedCount; 584 + allResults.push(result); 585 + 586 + log(` ${filename}: ${result.passedCount} passed, ${result.failedCount} failed`); 587 + 588 + // Render result immediately 589 + const section = renderFileResult(result); 590 + document.getElementById('results-container').appendChild(section); 591 + 592 + } catch (e) { 593 + log(` ERROR loading ${filename}: ${e.message}`); 594 + } 595 + 596 + updateSummary(totalPassed, totalFailed); 597 + updateProgress(i + 1, files.length); 598 + } 599 + 600 + updateProgress(files.length, files.length, `Complete: ${totalPassed} passed, ${totalFailed} failed`); 601 + log(`\n=== SUMMARY ===`); 602 + log(`Total: ${totalPassed + totalFailed} tests`); 603 + log(`Passed: ${totalPassed}`); 604 + log(`Failed: ${totalFailed}`); 605 + log(`Pass rate: ${((totalPassed / (totalPassed + totalFailed)) * 100).toFixed(2)}%`); 606 + 607 + document.getElementById('filter-controls').style.display = 'flex'; 608 + setupFilters(); 609 + 610 + } finally { 611 + isRunning = false; 612 + buttons.forEach(b => b.disabled = false); 613 + } 614 + } 615 + 616 + function runAllTests() { 617 + // Run both tree and encoding tests 618 + runTests('all', TREE_CONSTRUCTION_FILES.concat(ENCODING_FILES.map(f => 'encoding/' + f)), 'tree-construction'); 619 + } 620 + 621 + async function runTreeTests() { 622 + await runTests('tree-construction', TREE_CONSTRUCTION_FILES, 'tree-construction'); 623 + } 624 + 625 + async function runEncodingTests() { 626 + await runTests('encoding', ENCODING_FILES, 'encoding'); 627 + } 628 + 629 + function setupFilters() { 630 + const search = document.getElementById('search'); 631 + const filter = document.getElementById('filter'); 632 + 633 + search.addEventListener('input', applyFilters); 634 + filter.addEventListener('change', applyFilters); 635 + } 636 + 637 + function applyFilters() { 638 + const query = document.getElementById('search').value.toLowerCase(); 639 + const filterValue = document.getElementById('filter').value; 640 + 641 + document.querySelectorAll('.test-item').forEach(item => { 642 + const text = item.textContent.toLowerCase(); 643 + const passed = item.dataset.passed === 'true'; 644 + let visible = true; 645 + 646 + if (query && !text.includes(query)) visible = false; 647 + if (filterValue === 'passed' && !passed) visible = false; 648 + if (filterValue === 'failed' && passed) visible = false; 649 + 650 + item.style.display = visible ? '' : 'none'; 651 + }); 652 + } 653 + 654 + function expandAll() { 655 + document.querySelectorAll('.results-header.collapsed').forEach(h => h.click()); 656 + } 657 + 658 + function collapseAll() { 659 + document.querySelectorAll('.results-header:not(.collapsed)').forEach(h => h.click()); 660 + } 661 + 662 + // Quick test on load 663 + window.addEventListener('load', function() { 664 + log('Ready. Select a test mode and click Run to begin.'); 665 + }); 666 + </script> 667 + </body> 668 + </html>