OCaml HTML5 parser/serialiser based on Python's JustHTML
1let () = 2 let test_file = "validator/tests/html/attributes/lang/missing-lang-attribute-haswarn.html" in 3 let ic = open_in test_file in 4 let html = really_input_string ic (in_channel_length ic) in 5 close_in ic; 6 let reader = Bytesrw.Bytes.Reader.of_string html in 7 let doc = Html5rw.parse ~collect_errors:true reader in 8 let root = Html5rw.root doc in 9 print_endline "=== DOM Structure (with namespaces) ==="; 10 let rec print_node indent (node : Html5rw.Dom.node) = 11 let open Html5rw.Dom in 12 match node.name with 13 | "#text" -> () 14 | "#document" | "#document-fragment" -> 15 Printf.printf "%s%s\n" indent node.name; 16 List.iter (print_node (indent ^ " ")) node.children 17 | "!doctype" -> () 18 | "#comment" -> () 19 | _ -> 20 let ns = match node.namespace with Some ns -> ns | None -> "none" in 21 Printf.printf "%s<%s ns=%s>\n" indent node.name ns; 22 List.iter (fun (k, v) -> 23 if k = "foo" then Printf.printf "%s @%s=%s\n" indent k v 24 ) node.attrs; 25 List.iter (print_node (indent ^ " ")) node.children 26 in 27 print_node "" root; 28 print_endline "\n=== Checking... ==="; 29 let reader2 = Bytesrw.Bytes.Reader.of_string html in 30 let result = Html5_checker.check ~collect_parse_errors:true ~system_id:test_file reader2 in 31 let errors = Html5_checker.errors result in 32 let warnings = Html5_checker.warnings result in 33 print_endline "=== Errors ==="; 34 List.iter (fun e -> print_endline e.Html5_checker.Message.message) errors; 35 print_endline "\n=== Warnings ==="; 36 List.iter (fun e -> print_endline e.Html5_checker.Message.message) warnings; 37 print_endline "\n=== Expected ==="; 38 print_endline "Consider adding a \xe2\x80\x9clang\xe2\x80\x9d attribute to the \xe2\x80\x9chtml\xe2\x80\x9d start tag to declare the language of this document."