OCaml HTML5 parser/serialiser based on Python's JustHTML
1let () =
2 let test_file = "validator/tests/xhtml/elements/menu/menu-containing-hr-novalid.xhtml" in
3 let ic = open_in test_file in
4 let html = really_input_string ic (in_channel_length ic) in
5 close_in ic;
6 let reader = Bytesrw.Bytes.Reader.of_string html in
7 let doc = Html5rw.parse ~collect_errors:true reader in
8 let root = Html5rw.root doc in
9 print_endline "=== DOM Structure ===";
10 let rec print_node indent (node : Html5rw.Dom.node) =
11 let open Html5rw.Dom in
12 match node.name with
13 | "#text" ->
14 let text = String.trim node.data in
15 if String.length text > 0 then
16 Printf.printf "%sTEXT: %s\n" indent text
17 | "#document" | "#document-fragment" ->
18 Printf.printf "%s%s\n" indent node.name;
19 List.iter (print_node (indent ^ " ")) node.children
20 | "!doctype" -> Printf.printf "%s<!DOCTYPE>\n" indent
21 | "#comment" -> ()
22 | _ ->
23 Printf.printf "%s<%s>\n" indent node.name;
24 List.iter (print_node (indent ^ " ")) node.children
25 in
26 print_node "" root;
27 print_endline "\n=== Now checking ===";
28 let reader2 = Bytesrw.Bytes.Reader.of_string html in
29 let result = Html5_checker.check ~collect_parse_errors:true ~system_id:test_file reader2 in
30 let errors = Html5_checker.errors result in
31 let warnings = Html5_checker.warnings result in
32 print_endline "=== Errors ===";
33 List.iter (fun e -> print_endline e.Html5_checker.Message.message) errors;
34 print_endline "=== Warnings ===";
35 List.iter (fun e -> print_endline e.Html5_checker.Message.message) warnings;
36 print_endline "\n=== Expected ===";
37 print_endline "Element \xe2\x80\x9chr\xe2\x80\x9d not allowed as child of element \xe2\x80\x9cmenu\xe2\x80\x9d in this context. (Suppressing further errors from this subtree.)";
38 if List.length errors > 0 then
39 print_endline "\nPASS (has errors)"
40 else
41 print_endline "\nFAIL (no errors)"