OCaml HTML5 parser/serialiser based on Python's JustHTML
1let () = 2 let test_file = "validator/tests/xhtml/elements/menu/menu-containing-hr-novalid.xhtml" in 3 let ic = open_in test_file in 4 let html = really_input_string ic (in_channel_length ic) in 5 close_in ic; 6 let reader = Bytesrw.Bytes.Reader.of_string html in 7 let doc = Html5rw.parse ~collect_errors:true reader in 8 let root = Html5rw.root doc in 9 print_endline "=== DOM Structure ==="; 10 let rec print_node indent (node : Html5rw.Dom.node) = 11 let open Html5rw.Dom in 12 match node.name with 13 | "#text" -> 14 let text = String.trim node.data in 15 if String.length text > 0 then 16 Printf.printf "%sTEXT: %s\n" indent text 17 | "#document" | "#document-fragment" -> 18 Printf.printf "%s%s\n" indent node.name; 19 List.iter (print_node (indent ^ " ")) node.children 20 | "!doctype" -> Printf.printf "%s<!DOCTYPE>\n" indent 21 | "#comment" -> () 22 | _ -> 23 Printf.printf "%s<%s>\n" indent node.name; 24 List.iter (print_node (indent ^ " ")) node.children 25 in 26 print_node "" root; 27 print_endline "\n=== Now checking ==="; 28 let reader2 = Bytesrw.Bytes.Reader.of_string html in 29 let result = Html5_checker.check ~collect_parse_errors:true ~system_id:test_file reader2 in 30 let errors = Html5_checker.errors result in 31 let warnings = Html5_checker.warnings result in 32 print_endline "=== Errors ==="; 33 List.iter (fun e -> print_endline e.Html5_checker.Message.message) errors; 34 print_endline "=== Warnings ==="; 35 List.iter (fun e -> print_endline e.Html5_checker.Message.message) warnings; 36 print_endline "\n=== Expected ==="; 37 print_endline "Element \xe2\x80\x9chr\xe2\x80\x9d not allowed as child of element \xe2\x80\x9cmenu\xe2\x80\x9d in this context. (Suppressing further errors from this subtree.)"; 38 if List.length errors > 0 then 39 print_endline "\nPASS (has errors)" 40 else 41 print_endline "\nFAIL (no errors)"