(* Simple benchmarks for HTML5 parsing and validation *) let time_it name iterations f = Gc.full_major (); let start = Unix.gettimeofday () in for _ = 1 to iterations do ignore (f ()) done; let elapsed = Unix.gettimeofday () -. start in let per_iter = elapsed /. float_of_int iterations *. 1000.0 in Printf.printf "%-40s %d iters in %.3fs (%.3f ms/iter)\n%!" name iterations elapsed per_iter (* Generate HTML with nested elements *) let generate_nested_html depth = let buf = Buffer.create 4096 in Buffer.add_string buf "Test"; for i = 1 to depth do Buffer.add_string buf (Printf.sprintf "
" i) done; Buffer.add_string buf "

Content

"; for _ = 1 to depth do Buffer.add_string buf "
" done; Buffer.add_string buf ""; Buffer.contents buf (* Generate HTML with many sibling elements *) let generate_wide_html count = let buf = Buffer.create (count * 100) in Buffer.add_string buf "Test
"; for i = 1 to count do Buffer.add_string buf (Printf.sprintf "Item %d" i i i) done; Buffer.add_string buf "
"; Buffer.contents buf (* Generate HTML with table *) let generate_table_html rows cols = let buf = Buffer.create (rows * cols * 50) in Buffer.add_string buf "Table"; Buffer.add_string buf ""; for c = 1 to cols do Buffer.add_string buf (Printf.sprintf "" c) done; Buffer.add_string buf ""; for r = 1 to rows do Buffer.add_string buf ""; for c = 1 to cols do Buffer.add_string buf (Printf.sprintf "" r c) done; Buffer.add_string buf "" done; Buffer.add_string buf "
Col %d
R%dC%d
"; Buffer.contents buf (* Generate HTML with interactive elements (tests nesting checker) *) let generate_interactive_html count = let buf = Buffer.create (count * 200) in Buffer.add_string buf "Forms"; for i = 1 to count do Buffer.add_string buf (Printf.sprintf "
" i i i i) done; Buffer.add_string buf ""; Buffer.contents buf (* Generate HTML with validation errors *) let generate_invalid_html count = let buf = Buffer.create (count * 100) in Buffer.add_string buf "Invalid"; for i = 1 to count do (* Various validation errors *) Buffer.add_string buf (Printf.sprintf "
" i); Buffer.add_string buf ""; (* missing alt *) Buffer.add_string buf ""; Buffer.add_string buf "
" done; Buffer.add_string buf ""; Buffer.contents buf (* Benchmark parsing only *) let bench_parse html iterations = let bytes = Bytes.of_string html in time_it "Parse only" iterations (fun () -> Html5rw.parse_bytes bytes ) (* Benchmark validation only (on pre-parsed DOM) *) let bench_validate doc iterations = time_it "Validate only" iterations (fun () -> Htmlrw_check.check_parsed doc ) (* Benchmark parse + validate *) let bench_parse_and_validate html iterations = time_it "Parse + Validate" iterations (fun () -> Htmlrw_check.check_string html ) (* Benchmark repeated message access (tests caching via check result) *) let bench_message_access result iterations = time_it "Message access (10x errors/warnings)" iterations (fun () -> for _ = 1 to 10 do ignore (Htmlrw_check.errors result); ignore (Htmlrw_check.warnings result); ignore (Htmlrw_check.messages result) done ) (* Micro-benchmarks for specific optimizations *) (* Test List.mem vs Hashtbl.mem for table element lookups *) let bench_list_mem iterations = let elements = ["table"; "tbody"; "tfoot"; "thead"; "tr"] in let test_names = ["table"; "div"; "tbody"; "span"; "tr"; "p"; "thead"; "a"] in time_it "List.mem (5 elements, 8 lookups)" iterations (fun () -> List.iter (fun name -> ignore (List.mem name elements) ) test_names ) let bench_hashtbl_mem iterations = let tbl = Hashtbl.create 8 in List.iter (fun e -> Hashtbl.add tbl e ()) ["table"; "tbody"; "tfoot"; "thead"; "tr"]; let test_names = ["table"; "div"; "tbody"; "span"; "tr"; "p"; "thead"; "a"] in time_it "Hashtbl.mem (5 elements, 8 lookups)" iterations (fun () -> List.iter (fun name -> ignore (Hashtbl.mem tbl name) ) test_names ) (* Test noncharacter validation - bitwise vs List.mem *) let bench_nonchar_list iterations = let nonchars = [0xFFFE; 0xFFFF; 0x1FFFE; 0x1FFFF; 0x2FFFE; 0x2FFFF; 0x3FFFE; 0x3FFFF; 0x4FFFE; 0x4FFFF; 0x5FFFE; 0x5FFFF; 0x6FFFE; 0x6FFFF; 0x7FFFE; 0x7FFFF; 0x8FFFE; 0x8FFFF; 0x9FFFE; 0x9FFFF; 0xAFFFE; 0xAFFFF; 0xBFFFE; 0xBFFFF; 0xCFFFE; 0xCFFFF; 0xDFFFE; 0xDFFFF; 0xEFFFE; 0xEFFFF; 0xFFFFE; 0xFFFFF; 0x10FFFE; 0x10FFFF] in let test_codes = [0x41; 0xFFFE; 0x1000; 0x10FFFF; 0xFDD0; 0x3FFFE; 0x100; 0xFFFF] in time_it "List.mem nonchar (34 elements, 8 checks)" iterations (fun () -> List.iter (fun code -> ignore (List.mem code nonchars) ) test_codes ) let bench_nonchar_bitwise iterations = let test_codes = [0x41; 0xFFFE; 0x1000; 0x10FFFF; 0xFDD0; 0x3FFFE; 0x100; 0xFFFF] in time_it "Bitwise nonchar (8 checks)" iterations (fun () -> List.iter (fun code -> let low16 = code land 0xFFFF in ignore (low16 = 0xFFFE || low16 = 0xFFFF) ) test_codes ) (* Test message list reversal - once vs cached *) let bench_list_rev iterations = let msgs = List.init 100 (fun i -> Printf.sprintf "Message %d" i) in time_it "List.rev (100 msgs, 10 accesses)" iterations (fun () -> for _ = 1 to 10 do ignore (List.rev msgs) done ) let bench_cached_rev iterations = let msgs = List.init 100 (fun i -> Printf.sprintf "Message %d" i) in let cached = ref None in time_it "Cached rev (100 msgs, 10 accesses)" iterations (fun () -> cached := None; (* Reset cache each iteration *) for _ = 1 to 10 do match !cached with | Some c -> ignore c | None -> let rev = List.rev msgs in cached := Some rev; ignore rev done ) let () = Printf.printf "\n=== HTML5 Parser/Validator Benchmarks ===\n\n"; (* Micro-benchmarks first *) Printf.printf "--- Micro-benchmarks: Optimized Code Paths ---\n"; Printf.printf "\nTable element lookup (List.mem vs Hashtbl.mem):\n"; bench_list_mem 100000; bench_hashtbl_mem 100000; Printf.printf "\nNoncharacter validation (List.mem vs bitwise):\n"; bench_nonchar_list 100000; bench_nonchar_bitwise 100000; Printf.printf "\nMessage list reversal (repeated vs cached):\n"; bench_list_rev 10000; bench_cached_rev 10000; Printf.printf "\n"; (* Small documents *) Printf.printf "--- Small Document (100 nested divs) ---\n"; let small_nested = generate_nested_html 100 in Printf.printf "Document size: %d bytes\n" (String.length small_nested); bench_parse small_nested 1000; let doc = Html5rw.parse_bytes (Bytes.of_string small_nested) in bench_validate doc 1000; bench_parse_and_validate small_nested 1000; Printf.printf "\n"; (* Wide documents *) Printf.printf "--- Wide Document (1000 siblings) ---\n"; let wide = generate_wide_html 1000 in Printf.printf "Document size: %d bytes\n" (String.length wide); bench_parse wide 100; let doc = Html5rw.parse_bytes (Bytes.of_string wide) in bench_validate doc 100; bench_parse_and_validate wide 100; Printf.printf "\n"; (* Table documents *) Printf.printf "--- Table Document (100x20) ---\n"; let table = generate_table_html 100 20 in Printf.printf "Document size: %d bytes\n" (String.length table); bench_parse table 100; let doc = Html5rw.parse_bytes (Bytes.of_string table) in bench_validate doc 100; bench_parse_and_validate table 100; Printf.printf "\n"; (* Interactive elements (nesting checker stress test) *) Printf.printf "--- Interactive Elements (200 forms) ---\n"; let interactive = generate_interactive_html 200 in Printf.printf "Document size: %d bytes\n" (String.length interactive); bench_parse interactive 100; let doc = Html5rw.parse_bytes (Bytes.of_string interactive) in bench_validate doc 100; bench_parse_and_validate interactive 100; Printf.printf "\n"; (* Invalid HTML (validation error generation) *) Printf.printf "--- Invalid HTML (100 error patterns) ---\n"; let invalid = generate_invalid_html 100 in Printf.printf "Document size: %d bytes\n" (String.length invalid); bench_parse invalid 100; let doc = Html5rw.parse_bytes (Bytes.of_string invalid) in bench_validate doc 100; bench_parse_and_validate invalid 100; Printf.printf "\n"; (* Message access benchmark - tests caching *) Printf.printf "--- Message Access (invalid HTML result, 10 accesses) ---\n"; let result = Htmlrw_check.check_string invalid in bench_message_access result 1000; Printf.printf "\n"; (* Large document *) Printf.printf "--- Large Document (500 nested divs) ---\n"; let large_nested = generate_nested_html 500 in Printf.printf "Document size: %d bytes\n" (String.length large_nested); bench_parse large_nested 100; let doc = Html5rw.parse_bytes (Bytes.of_string large_nested) in bench_validate doc 100; bench_parse_and_validate large_nested 100; Printf.printf "\n"; Printf.printf "=== Benchmarks Complete ===\n"