OCaml HTML5 parser/serialiser based on Python's JustHTML
1let () =
2 let test_file = "validator/tests/html/attributes/lang/missing-lang-attribute-haswarn.html" in
3 let ic = open_in test_file in
4 let html = really_input_string ic (in_channel_length ic) in
5 close_in ic;
6 let reader = Bytesrw.Bytes.Reader.of_string html in
7 let doc = Html5rw.parse ~collect_errors:true reader in
8 let root = Html5rw.root doc in
9 print_endline "=== DOM Structure (with namespaces) ===";
10 let rec print_node indent (node : Html5rw.Dom.node) =
11 let open Html5rw.Dom in
12 match node.name with
13 | "#text" -> ()
14 | "#document" | "#document-fragment" ->
15 Printf.printf "%s%s\n" indent node.name;
16 List.iter (print_node (indent ^ " ")) node.children
17 | "!doctype" -> ()
18 | "#comment" -> ()
19 | _ ->
20 let ns = match node.namespace with Some ns -> ns | None -> "none" in
21 Printf.printf "%s<%s ns=%s>\n" indent node.name ns;
22 List.iter (fun (k, v) ->
23 if k = "foo" then Printf.printf "%s @%s=%s\n" indent k v
24 ) node.attrs;
25 List.iter (print_node (indent ^ " ")) node.children
26 in
27 print_node "" root;
28 print_endline "\n=== Checking... ===";
29 let reader2 = Bytesrw.Bytes.Reader.of_string html in
30 let result = Html5_checker.check ~collect_parse_errors:true ~system_id:test_file reader2 in
31 let errors = Html5_checker.errors result in
32 let warnings = Html5_checker.warnings result in
33 print_endline "=== Errors ===";
34 List.iter (fun e -> print_endline e.Html5_checker.Message.message) errors;
35 print_endline "\n=== Warnings ===";
36 List.iter (fun e -> print_endline e.Html5_checker.Message.message) warnings;
37 print_endline "\n=== Expected ===";
38 print_endline "Consider adding a \xe2\x80\x9clang\xe2\x80\x9d attribute to the \xe2\x80\x9chtml\xe2\x80\x9d start tag to declare the language of this document."