open Bytesrw (* Error handling and malformed HTML example *) let malformed_html = {| Unclosed title <meta charset="utf-8"> </head> <body> <div> <p>Unclosed paragraph <p>Another paragraph (implicitly closes the previous one) <span><div>Misnested tags</span></div> </div> <table> <tr><td>Cell 1<td>Cell 2</td> </table> <!-- Unclosed comment </body> </html> |} let () = Printf.printf "=== Parsing Malformed HTML ===\n\n"; (* Parse with error collection enabled *) let result = Html5rw.parse ~collect_errors:true (Bytes.Reader.of_string malformed_html) in (* Get parse errors *) let errs = Html5rw.errors result in Printf.printf "Parse errors: %d\n\n" (List.length errs); List.iter (fun err -> Printf.printf " Line %d, Col %d: %s\n" (Html5rw.error_line err) (Html5rw.error_column err) (Html5rw.Parse_error_code.to_string (Html5rw.error_code err)) ) errs; (* The parser still produces a valid DOM tree *) Printf.printf "\n=== Recovered DOM Tree ===\n"; let html = Html5rw.to_string ~pretty:true ~indent_size:2 result in Printf.printf "%s\n" html; (* Query the recovered tree *) Printf.printf "\n=== Query Results ===\n"; let paragraphs = Html5rw.query result "p" in Printf.printf "Found %d paragraphs\n" (List.length paragraphs); let cells = Html5rw.query result "td" in Printf.printf "Found %d table cells\n" (List.length cells)