(* Simple benchmarks for HTML5 parsing and validation *)
let time_it name iterations f =
Gc.full_major ();
let start = Unix.gettimeofday () in
for _ = 1 to iterations do
ignore (f ())
done;
let elapsed = Unix.gettimeofday () -. start in
let per_iter = elapsed /. float_of_int iterations *. 1000.0 in
Printf.printf "%-40s %d iters in %.3fs (%.3f ms/iter)\n%!" name iterations elapsed per_iter
(* Generate HTML with nested elements *)
let generate_nested_html depth =
let buf = Buffer.create 4096 in
Buffer.add_string buf "
Test ";
for i = 1 to depth do
Buffer.add_string buf (Printf.sprintf "" i)
done;
Buffer.add_string buf "
Content
";
for _ = 1 to depth do
Buffer.add_string buf "
"
done;
Buffer.add_string buf "";
Buffer.contents buf
(* Generate HTML with many sibling elements *)
let generate_wide_html count =
let buf = Buffer.create (count * 100) in
Buffer.add_string buf "Test ";
for i = 1 to count do
Buffer.add_string buf (Printf.sprintf "Item %d " i i i)
done;
Buffer.add_string buf "
";
Buffer.contents buf
(* Generate HTML with table *)
let generate_table_html rows cols =
let buf = Buffer.create (rows * cols * 50) in
Buffer.add_string buf "Table ";
Buffer.add_string buf "";
for c = 1 to cols do
Buffer.add_string buf (Printf.sprintf "Col %d " c)
done;
Buffer.add_string buf " ";
for r = 1 to rows do
Buffer.add_string buf "";
for c = 1 to cols do
Buffer.add_string buf (Printf.sprintf "R%dC%d " r c)
done;
Buffer.add_string buf " "
done;
Buffer.add_string buf "
";
Buffer.contents buf
(* Generate HTML with interactive elements (tests nesting checker) *)
let generate_interactive_html count =
let buf = Buffer.create (count * 200) in
Buffer.add_string buf "Forms ";
for i = 1 to count do
Buffer.add_string buf (Printf.sprintf
"" i i i i)
done;
Buffer.add_string buf "";
Buffer.contents buf
(* Generate HTML with validation errors *)
let generate_invalid_html count =
let buf = Buffer.create (count * 100) in
Buffer.add_string buf "Invalid ";
for i = 1 to count do
(* Various validation errors *)
Buffer.add_string buf (Printf.sprintf "" i);
Buffer.add_string buf "
"; (* missing alt *)
Buffer.add_string buf "
Nested interactive ";
Buffer.add_string buf "
"
done;
Buffer.add_string buf "";
Buffer.contents buf
(* Benchmark parsing only *)
let bench_parse html iterations =
let bytes = Bytes.of_string html in
time_it "Parse only" iterations (fun () ->
Html5rw.parse_bytes bytes
)
(* Benchmark validation only (on pre-parsed DOM) *)
let bench_validate doc iterations =
time_it "Validate only" iterations (fun () ->
Htmlrw_check.check_parsed doc
)
(* Benchmark parse + validate *)
let bench_parse_and_validate html iterations =
time_it "Parse + Validate" iterations (fun () ->
Htmlrw_check.check_string html
)
(* Benchmark repeated message access (tests caching via check result) *)
let bench_message_access result iterations =
time_it "Message access (10x errors/warnings)" iterations (fun () ->
for _ = 1 to 10 do
ignore (Htmlrw_check.errors result);
ignore (Htmlrw_check.warnings result);
ignore (Htmlrw_check.messages result)
done
)
(* Micro-benchmarks for specific optimizations *)
(* Test List.mem vs Hashtbl.mem for table element lookups *)
let bench_list_mem iterations =
let elements = ["table"; "tbody"; "tfoot"; "thead"; "tr"] in
let test_names = ["table"; "div"; "tbody"; "span"; "tr"; "p"; "thead"; "a"] in
time_it "List.mem (5 elements, 8 lookups)" iterations (fun () ->
List.iter (fun name ->
ignore (List.mem name elements)
) test_names
)
let bench_hashtbl_mem iterations =
let tbl = Hashtbl.create 8 in
List.iter (fun e -> Hashtbl.add tbl e ()) ["table"; "tbody"; "tfoot"; "thead"; "tr"];
let test_names = ["table"; "div"; "tbody"; "span"; "tr"; "p"; "thead"; "a"] in
time_it "Hashtbl.mem (5 elements, 8 lookups)" iterations (fun () ->
List.iter (fun name ->
ignore (Hashtbl.mem tbl name)
) test_names
)
(* Test noncharacter validation - bitwise vs List.mem *)
let bench_nonchar_list iterations =
let nonchars = [0xFFFE; 0xFFFF; 0x1FFFE; 0x1FFFF; 0x2FFFE; 0x2FFFF;
0x3FFFE; 0x3FFFF; 0x4FFFE; 0x4FFFF; 0x5FFFE; 0x5FFFF;
0x6FFFE; 0x6FFFF; 0x7FFFE; 0x7FFFF; 0x8FFFE; 0x8FFFF;
0x9FFFE; 0x9FFFF; 0xAFFFE; 0xAFFFF; 0xBFFFE; 0xBFFFF;
0xCFFFE; 0xCFFFF; 0xDFFFE; 0xDFFFF; 0xEFFFE; 0xEFFFF;
0xFFFFE; 0xFFFFF; 0x10FFFE; 0x10FFFF] in
let test_codes = [0x41; 0xFFFE; 0x1000; 0x10FFFF; 0xFDD0; 0x3FFFE; 0x100; 0xFFFF] in
time_it "List.mem nonchar (34 elements, 8 checks)" iterations (fun () ->
List.iter (fun code ->
ignore (List.mem code nonchars)
) test_codes
)
let bench_nonchar_bitwise iterations =
let test_codes = [0x41; 0xFFFE; 0x1000; 0x10FFFF; 0xFDD0; 0x3FFFE; 0x100; 0xFFFF] in
time_it "Bitwise nonchar (8 checks)" iterations (fun () ->
List.iter (fun code ->
let low16 = code land 0xFFFF in
ignore (low16 = 0xFFFE || low16 = 0xFFFF)
) test_codes
)
(* Test message list reversal - once vs cached *)
let bench_list_rev iterations =
let msgs = List.init 100 (fun i -> Printf.sprintf "Message %d" i) in
time_it "List.rev (100 msgs, 10 accesses)" iterations (fun () ->
for _ = 1 to 10 do
ignore (List.rev msgs)
done
)
let bench_cached_rev iterations =
let msgs = List.init 100 (fun i -> Printf.sprintf "Message %d" i) in
let cached = ref None in
time_it "Cached rev (100 msgs, 10 accesses)" iterations (fun () ->
cached := None; (* Reset cache each iteration *)
for _ = 1 to 10 do
match !cached with
| Some c -> ignore c
| None ->
let rev = List.rev msgs in
cached := Some rev;
ignore rev
done
)
let () =
Printf.printf "\n=== HTML5 Parser/Validator Benchmarks ===\n\n";
(* Micro-benchmarks first *)
Printf.printf "--- Micro-benchmarks: Optimized Code Paths ---\n";
Printf.printf "\nTable element lookup (List.mem vs Hashtbl.mem):\n";
bench_list_mem 100000;
bench_hashtbl_mem 100000;
Printf.printf "\nNoncharacter validation (List.mem vs bitwise):\n";
bench_nonchar_list 100000;
bench_nonchar_bitwise 100000;
Printf.printf "\nMessage list reversal (repeated vs cached):\n";
bench_list_rev 10000;
bench_cached_rev 10000;
Printf.printf "\n";
(* Small documents *)
Printf.printf "--- Small Document (100 nested divs) ---\n";
let small_nested = generate_nested_html 100 in
Printf.printf "Document size: %d bytes\n" (String.length small_nested);
bench_parse small_nested 1000;
let doc = Html5rw.parse_bytes (Bytes.of_string small_nested) in
bench_validate doc 1000;
bench_parse_and_validate small_nested 1000;
Printf.printf "\n";
(* Wide documents *)
Printf.printf "--- Wide Document (1000 siblings) ---\n";
let wide = generate_wide_html 1000 in
Printf.printf "Document size: %d bytes\n" (String.length wide);
bench_parse wide 100;
let doc = Html5rw.parse_bytes (Bytes.of_string wide) in
bench_validate doc 100;
bench_parse_and_validate wide 100;
Printf.printf "\n";
(* Table documents *)
Printf.printf "--- Table Document (100x20) ---\n";
let table = generate_table_html 100 20 in
Printf.printf "Document size: %d bytes\n" (String.length table);
bench_parse table 100;
let doc = Html5rw.parse_bytes (Bytes.of_string table) in
bench_validate doc 100;
bench_parse_and_validate table 100;
Printf.printf "\n";
(* Interactive elements (nesting checker stress test) *)
Printf.printf "--- Interactive Elements (200 forms) ---\n";
let interactive = generate_interactive_html 200 in
Printf.printf "Document size: %d bytes\n" (String.length interactive);
bench_parse interactive 100;
let doc = Html5rw.parse_bytes (Bytes.of_string interactive) in
bench_validate doc 100;
bench_parse_and_validate interactive 100;
Printf.printf "\n";
(* Invalid HTML (validation error generation) *)
Printf.printf "--- Invalid HTML (100 error patterns) ---\n";
let invalid = generate_invalid_html 100 in
Printf.printf "Document size: %d bytes\n" (String.length invalid);
bench_parse invalid 100;
let doc = Html5rw.parse_bytes (Bytes.of_string invalid) in
bench_validate doc 100;
bench_parse_and_validate invalid 100;
Printf.printf "\n";
(* Message access benchmark - tests caching *)
Printf.printf "--- Message Access (invalid HTML result, 10 accesses) ---\n";
let result = Htmlrw_check.check_string invalid in
bench_message_access result 1000;
Printf.printf "\n";
(* Large document *)
Printf.printf "--- Large Document (500 nested divs) ---\n";
let large_nested = generate_nested_html 500 in
Printf.printf "Document size: %d bytes\n" (String.length large_nested);
bench_parse large_nested 100;
let doc = Html5rw.parse_bytes (Bytes.of_string large_nested) in
bench_validate doc 100;
bench_parse_and_validate large_nested 100;
Printf.printf "\n";
Printf.printf "=== Benchmarks Complete ===\n"