OCaml HTML5 parser/serialiser based on Python's JustHTML
1let () =
2 let test_file = "validator/tests/xhtml/elements/progress/002-isvalid.xhtml" in
3 let ic = open_in test_file in
4 let html = really_input_string ic (in_channel_length ic) in
5 close_in ic;
6 let reader = Bytesrw.Bytes.Reader.of_string html in
7 let doc = Html5rw.parse ~collect_errors:true reader in
8 let root = Html5rw.root doc in
9 print_endline "=== DOM Structure (with namespaces) ===";
10 let rec print_node indent (node : Html5rw.Dom.node) =
11 let open Html5rw.Dom in
12 match node.name with
13 | "#text" -> ()
14 | "#document" | "#document-fragment" ->
15 Printf.printf "%s%s\n" indent node.name;
16 List.iter (print_node (indent ^ " ")) node.children
17 | "!doctype" -> ()
18 | "#comment" -> ()
19 | _ ->
20 let ns = match node.namespace with Some ns -> ns | None -> "none" in
21 Printf.printf "%s<%s ns=%s>\n" indent node.name ns;
22 List.iter (fun (k, v) ->
23 if k = "foo" then Printf.printf "%s @%s=%s\n" indent k v
24 ) node.attrs;
25 List.iter (print_node (indent ^ " ")) node.children
26 in
27 print_node "" root;
28 print_endline "\n=== Checking... ===";
29 let reader2 = Bytesrw.Bytes.Reader.of_string html in
30 let result = Html5_checker.check ~collect_parse_errors:true ~system_id:test_file reader2 in
31 let errors = Html5_checker.errors result in
32 print_endline "=== Errors ===";
33 List.iter (fun e -> print_endline e.Html5_checker.Message.message) errors;
34 print_endline "\n=== Expected ===";
35 print_endline "Element \xe2\x80\x9crect\xe2\x80\x9d is missing required attribute \xe2\x80\x9cheight\xe2\x80\x9d."