OCaml HTML5 parser/serialiser based on Python's JustHTML

more

Changed files
+11 -4
lib
+1
lib/html5_checker/checker_registry.ml
··· 44 44 Hashtbl.replace reg "svg" Svg_checker.checker; 45 45 Hashtbl.replace reg "xhtml-content" Xhtml_content_checker.checker; 46 46 Hashtbl.replace reg "lang-detecting" Lang_detecting_checker.checker; 47 + Hashtbl.replace reg "unknown-element" Unknown_element_checker.checker; 47 48 (* Hashtbl.replace reg "heading" Heading_checker.checker; *) 48 49 (* Hashtbl.replace reg "content" Content_checker.checker; *) 49 50 reg
+4
lib/html5_checker/parse_error_bridge.ml
··· 71 71 ("End tag \xe2\x80\x9cbr\xe2\x80\x9d.", "end-tag-br") 72 72 else if s = "expected-closing-tag-but-got-eof" then 73 73 ("End of file seen and there were open elements.", "eof-in-open-element") 74 + else if String.length s > 28 && String.sub s 0 28 = "bad-start-tag-in-head-noscri" then 75 + let colon_pos = String.index s ':' in 76 + let element = String.sub s (colon_pos + 1) (String.length s - colon_pos - 1) in 77 + (Printf.sprintf "Bad start tag in \xe2\x80\x9c%s\xe2\x80\x9d in \xe2\x80\x9cnoscript\xe2\x80\x9d in \xe2\x80\x9chead\xe2\x80\x9d." element, "bad-start-tag-in-head-noscript") 74 78 else if String.length s > 19 && String.sub s 0 19 = "unexpected-end-tag:" then 75 79 let element = String.sub s 19 (String.length s - 19) in 76 80 (Printf.sprintf "Stray end tag \xe2\x80\x9c%s\xe2\x80\x9d." element, "stray-end-tag")
+4 -2
lib/html5_checker/specialized/aria_checker.ml
··· 580 580 Message_collector.add_typed collector Error_code.Li_bad_role_in_tablist 581 581 | None -> 582 582 (* Check if in list context (ul/ol/menu without explicit role, or role=list) *) 583 + (* Nu validator produces this error for ANY explicit role on li in list context, 584 + even role="listitem" - because having an explicit role is itself the problem. 585 + The message says "other than listitem" but the rule is: don't use explicit roles. *) 583 586 if is_in_list_context state then 584 - if first_role <> "listitem" then 585 - Message_collector.add_typed collector Error_code.Li_bad_role_in_list) 587 + Message_collector.add_typed collector Error_code.Li_bad_role_in_list) 586 588 end 587 589 end; 588 590
+2 -2
lib/html5rw/parser/parser_tree_builder.ml
··· 934 934 | Token.Tag { kind = Token.Start; name; _ } 935 935 when List.mem name ["head"; "noscript"] -> 936 936 parse_error t "unexpected-start-tag" 937 - | Token.Tag { kind = Token.Start; _ } -> 938 - parse_error t "unexpected-start-tag"; 937 + | Token.Tag { kind = Token.Start; name; _ } -> 938 + parse_error t ("bad-start-tag-in-head-noscript:" ^ name); 939 939 pop_current t; (* Pop noscript *) 940 940 t.mode <- Parser_insertion_mode.In_head; 941 941 process_token t token