OCaml HTML5 parser/serialiser based on Python's JustHTML

more

+1 -1
bin/html5check/dune
··· 1 1 (executable 2 2 (name html5check) 3 3 (public_name html5check) 4 - (libraries html5_checker html5rw bytesrw cmdliner)) 4 + (libraries htmlrw_check html5rw bytesrw cmdliner))
+11 -11
bin/html5check/html5check.ml
··· 34 34 (** Format output based on the requested format *) 35 35 let format_output format result = 36 36 match format with 37 - | `Text -> Html5_checker.format_text result 38 - | `Json -> Html5_checker.format_json result 39 - | `Gnu -> Html5_checker.format_gnu result 37 + | `Text -> Htmlrw_check.to_text result 38 + | `Json -> Htmlrw_check.to_json result 39 + | `Gnu -> Htmlrw_check.to_gnu result 40 40 41 41 (** Run the validation *) 42 42 let run format errors_only exit_zero quiet verbose file = ··· 46 46 Exit_code.io_error 47 47 | Ok (reader, ic, system_id) -> 48 48 (* Run validation *) 49 - let result = Html5_checker.check ~system_id reader in 49 + let result = Htmlrw_check.check ~system_id reader in 50 50 51 51 (* Close input if it's not stdin *) 52 52 if file <> "-" then close_in ic; 53 53 54 54 (* Get messages based on filtering *) 55 55 let messages = 56 - if errors_only then Html5_checker.errors result 57 - else Html5_checker.messages result 56 + if errors_only then Htmlrw_check.errors result 57 + else Htmlrw_check.messages result 58 58 in 59 59 60 60 (* Output based on mode *) 61 61 if quiet then begin 62 62 (* Only show counts *) 63 - let error_count = List.length (Html5_checker.errors result) in 64 - let warning_count = List.length (Html5_checker.warnings result) in 63 + let error_count = List.length (Htmlrw_check.errors result) in 64 + let warning_count = List.length (Htmlrw_check.warnings result) in 65 65 if errors_only then 66 66 Printf.printf "%d error%s\n" error_count (if error_count = 1 then "" else "s") 67 67 else ··· 75 75 76 76 (* Show summary if verbose *) 77 77 if verbose && messages <> [] then begin 78 - let error_count = List.length (Html5_checker.errors result) in 79 - let warning_count = List.length (Html5_checker.warnings result) in 78 + let error_count = List.length (Htmlrw_check.errors result) in 79 + let warning_count = List.length (Htmlrw_check.warnings result) in 80 80 Printf.eprintf "\nSummary: %d error%s, %d warning%s\n" 81 81 error_count (if error_count = 1 then "" else "s") 82 82 warning_count (if warning_count = 1 then "" else "s") ··· 84 84 end; 85 85 86 86 (* Determine exit code *) 87 - if exit_zero || not (Html5_checker.has_errors result) then 87 + if exit_zero || not (Htmlrw_check.has_errors result) then 88 88 Exit_code.ok 89 89 else 90 90 Exit_code.validation_errors
+5 -5
test/analyze_failures.ml
··· 36 36 close_in ic; 37 37 38 38 let reader = Bytesrw.Bytes.Reader.of_string content in 39 - let result = Html5_checker.check ~collect_parse_errors:true reader in 40 - let errors = Html5_checker.errors result in 41 - let warnings = Html5_checker.warnings result in 39 + let result = Htmlrw_check.check ~collect_parse_errors:true reader in 40 + let errors = Htmlrw_check.errors result in 41 + let warnings = Htmlrw_check.warnings result in 42 42 43 43 let should_print = match mode with 44 44 | "isvalid" -> outcome = Valid && (errors <> [] || warnings <> []) && !count < 60 ··· 49 49 if mode = "isvalid" then begin 50 50 if errors <> [] then begin 51 51 Printf.printf "ERRORS:\n"; 52 - List.iter (fun e -> Printf.printf " %s\n" e.Html5_checker.Message.message) errors 52 + List.iter (fun e -> Printf.printf " %s\n" e.Htmlrw_check.text) errors 53 53 end; 54 54 if warnings <> [] then begin 55 55 Printf.printf "WARNINGS:\n"; 56 - List.iter (fun w -> Printf.printf " %s\n" w.Html5_checker.Message.message) warnings 56 + List.iter (fun w -> Printf.printf " %s\n" w.Htmlrw_check.text) warnings 57 57 end 58 58 end; 59 59 print_endline content;
+5 -5
test/debug_check.ml
··· 27 27 print_node "" root; 28 28 print_endline "\n=== Checking... ==="; 29 29 let reader2 = Bytesrw.Bytes.Reader.of_string html in 30 - let result = Html5_checker.check ~collect_parse_errors:true ~system_id:test_file reader2 in 31 - let errors = Html5_checker.errors result in 32 - let warnings = Html5_checker.warnings result in 30 + let result = Htmlrw_check.check ~collect_parse_errors:true ~system_id:test_file reader2 in 31 + let errors = Htmlrw_check.errors result in 32 + let warnings = Htmlrw_check.warnings result in 33 33 print_endline "=== Errors ==="; 34 - List.iter (fun e -> print_endline e.Html5_checker.Message.message) errors; 34 + List.iter (fun e -> print_endline e.Htmlrw_check.text) errors; 35 35 print_endline "\n=== Warnings ==="; 36 - List.iter (fun e -> print_endline e.Html5_checker.Message.message) warnings; 36 + List.iter (fun e -> print_endline e.Htmlrw_check.text) warnings; 37 37 print_endline "\n=== Expected ==="; 38 38 print_endline "Consider adding a \xe2\x80\x9clang\xe2\x80\x9d attribute to the \xe2\x80\x9chtml\xe2\x80\x9d start tag to declare the language of this document."
+8 -8
test/debug_validator.ml
··· 15 15 Printf.printf "Input (%d bytes):\n%s\n\n" (String.length content) content; 16 16 17 17 let reader = Bytesrw.Bytes.Reader.of_string content in 18 - let result = Html5_checker.check ~collect_parse_errors:true ~system_id:path reader in 18 + let result = Htmlrw_check.check ~collect_parse_errors:true ~system_id:path reader in 19 19 20 - let errors = Html5_checker.errors result in 21 - let warnings = Html5_checker.warnings result in 20 + let errors = Htmlrw_check.errors result in 21 + let warnings = Htmlrw_check.warnings result in 22 22 23 23 Printf.printf "=== Results ===\n"; 24 24 Printf.printf "Errors: %d\n" (List.length errors); 25 25 List.iter (fun msg -> 26 - Printf.printf " [ERROR] %s\n" msg.Html5_checker.Message.message; 27 - (match msg.Html5_checker.Message.location with 26 + Printf.printf " [ERROR] %s\n" msg.Htmlrw_check.text; 27 + (match msg.Htmlrw_check.location with 28 28 | Some loc -> Printf.printf " at line %d, col %d\n" loc.line loc.column 29 29 | None -> ()) 30 30 ) errors; 31 31 32 32 Printf.printf "Warnings: %d\n" (List.length warnings); 33 33 List.iter (fun msg -> 34 - Printf.printf " [WARN] %s\n" msg.Html5_checker.Message.message; 35 - (match msg.Html5_checker.Message.location with 34 + Printf.printf " [WARN] %s\n" msg.Htmlrw_check.text; 35 + (match msg.Htmlrw_check.location with 36 36 | Some loc -> Printf.printf " at line %d, col %d\n" loc.line loc.column 37 37 | None -> ()) 38 38 ) warnings; 39 39 40 40 Printf.printf "\n=== Formatted Output ===\n"; 41 - Printf.printf "%s\n" (Html5_checker.format_text result) 41 + Printf.printf "%s\n" (Htmlrw_check.to_text result)
+8 -8
test/dune
··· 58 58 (executable 59 59 (name test_nesting_checker) 60 60 (modules test_nesting_checker) 61 - (libraries html5rw.checker)) 61 + (libraries html5rw.check)) 62 62 63 63 (executable 64 64 (name test_html5_checker) 65 65 (modules test_html5_checker) 66 - (libraries bytesrw html5rw html5rw.checker str)) 66 + (libraries bytesrw html5rw html5rw.check str)) 67 67 68 68 (rule 69 69 (alias runtest) ··· 78 78 (library 79 79 (name expected_message) 80 80 (modules expected_message) 81 - (libraries html5rw.checker str jsont jsont.bytesrw)) 81 + (libraries html5rw.check str jsont jsont.bytesrw)) 82 82 83 83 (executable 84 84 (name test_validator) 85 85 (modules test_validator) 86 - (libraries bytesrw html5rw html5rw.checker str jsont jsont.bytesrw test_report validator_messages expected_message)) 86 + (libraries bytesrw html5rw html5rw.check str jsont jsont.bytesrw test_report validator_messages expected_message)) 87 87 88 88 (executable 89 89 (name debug_validator) 90 90 (modules debug_validator) 91 - (libraries bytesrw html5rw html5rw.checker)) 91 + (libraries bytesrw html5rw html5rw.check)) 92 92 93 93 (executable 94 94 (name analyze_failures) 95 95 (modules analyze_failures) 96 - (libraries bytesrw html5rw html5rw.checker)) 96 + (libraries bytesrw html5rw html5rw.check)) 97 97 98 98 (executable 99 99 (name debug_check) 100 100 (modules debug_check) 101 - (libraries html5rw.checker bytesrw)) 101 + (libraries html5rw.check bytesrw)) 102 102 103 103 (executable 104 104 (name test_roundtrip) 105 105 (modules test_roundtrip) 106 - (libraries bytesrw html5rw html5rw.checker astring test_report)) 106 + (libraries bytesrw html5rw html5rw.check astring test_report))
+26 -26
test/expected_message.ml
··· 2 2 3 3 type t = { 4 4 message: string; 5 - error_code: Html5_checker.Error_code.t option; 5 + error_code: Htmlrw_check.Error_code.t option; 6 6 line: int option; 7 7 column: int option; 8 8 element: string option; ··· 80 80 if Str.string_match re msg 0 then 81 81 let child = Str.matched_group 1 msg in 82 82 let parent = Str.matched_group 2 msg in 83 - Some ((`Element (`Not_allowed_as_child (`Child child, `Parent parent)) : Html5_checker.Error_code.t), 83 + Some ((`Element (`Not_allowed_as_child (`Child child, `Parent parent)) : Htmlrw_check.Error_code.t), 84 84 Some child, None) 85 85 else None 86 86 ··· 90 90 if Str.string_match re msg 0 then 91 91 let attr = Str.matched_group 1 msg in 92 92 let element = Str.matched_group 2 msg in 93 - Some ((`Attr (`Not_allowed (`Attr attr, `Elem element)) : Html5_checker.Error_code.t), 93 + Some ((`Attr (`Not_allowed (`Attr attr, `Elem element)) : Htmlrw_check.Error_code.t), 94 94 Some element, Some attr) 95 95 else None 96 96 ··· 99 99 let re = Str.regexp {|Attribute "\([^"]+\)" not allowed here|} in 100 100 if Str.string_match re msg 0 then 101 101 let attr = Str.matched_group 1 msg in 102 - Some ((`Attr (`Not_allowed_here (`Attr attr)) : Html5_checker.Error_code.t), 102 + Some ((`Attr (`Not_allowed_here (`Attr attr)) : Htmlrw_check.Error_code.t), 103 103 None, Some attr) 104 104 else None 105 105 ··· 109 109 if Str.string_match re msg 0 then 110 110 let element = Str.matched_group 1 msg in 111 111 let attr = Str.matched_group 2 msg in 112 - Some ((`Attr (`Missing (`Elem element, `Attr attr)) : Html5_checker.Error_code.t), 112 + Some ((`Attr (`Missing (`Elem element, `Attr attr)) : Htmlrw_check.Error_code.t), 113 113 Some element, Some attr) 114 114 else None 115 115 ··· 119 119 if Str.string_match re msg 0 then 120 120 let parent = Str.matched_group 1 msg in 121 121 let child = Str.matched_group 2 msg in 122 - Some ((`Element (`Missing_child (`Parent parent, `Child child)) : Html5_checker.Error_code.t), 122 + Some ((`Element (`Missing_child (`Parent parent, `Child child)) : Htmlrw_check.Error_code.t), 123 123 Some parent, None) 124 124 else None 125 125 ··· 128 128 let re = Str.regexp {|Duplicate ID "\([^"]+\)"|} in 129 129 if Str.string_match re msg 0 then 130 130 let id = Str.matched_group 1 msg in 131 - Some ((`Attr (`Duplicate_id (`Id id)) : Html5_checker.Error_code.t), 131 + Some ((`Attr (`Duplicate_id (`Id id)) : Htmlrw_check.Error_code.t), 132 132 None, None) 133 133 else None 134 134 ··· 137 137 let re = Str.regexp {|The "\([^"]+\)" element is obsolete|} in 138 138 if Str.string_match re msg 0 then 139 139 let element = Str.matched_group 1 msg in 140 - Some ((`Element (`Obsolete (`Elem element, `Suggestion "")) : Html5_checker.Error_code.t), 140 + Some ((`Element (`Obsolete (`Elem element, `Suggestion "")) : Htmlrw_check.Error_code.t), 141 141 Some element, None) 142 142 else None 143 143 ··· 147 147 if Str.string_match re msg 0 then 148 148 let attr = Str.matched_group 1 msg in 149 149 let element = Str.matched_group 2 msg in 150 - Some ((`Element (`Obsolete_attr (`Elem element, `Attr attr, `Suggestion None)) : Html5_checker.Error_code.t), 150 + Some ((`Element (`Obsolete_attr (`Elem element, `Attr attr, `Suggestion None)) : Htmlrw_check.Error_code.t), 151 151 Some element, Some attr) 152 152 else None 153 153 ··· 156 156 let re = Str.regexp {|Stray end tag "\([^"]+\)"|} in 157 157 if Str.string_match re msg 0 then 158 158 let tag = Str.matched_group 1 msg in 159 - Some ((`Tag (`Stray_end (`Tag tag)) : Html5_checker.Error_code.t), 159 + Some ((`Tag (`Stray_end (`Tag tag)) : Htmlrw_check.Error_code.t), 160 160 Some tag, None) 161 161 else None 162 162 ··· 165 165 let re = Str.regexp {|Stray start tag "\([^"]+\)"|} in 166 166 if Str.string_match re msg 0 then 167 167 let tag = Str.matched_group 1 msg in 168 - Some ((`Tag (`Stray_start (`Tag tag)) : Html5_checker.Error_code.t), 168 + Some ((`Tag (`Stray_start (`Tag tag)) : Htmlrw_check.Error_code.t), 169 169 Some tag, None) 170 170 else None 171 171 ··· 175 175 if Str.string_match re msg 0 then 176 176 let role = Str.matched_group 1 msg in 177 177 let reason = Str.matched_group 2 msg in 178 - Some ((`Aria (`Unnecessary_role (`Role role, `Elem "", `Reason reason)) : Html5_checker.Error_code.t), 178 + Some ((`Aria (`Unnecessary_role (`Role role, `Elem "", `Reason reason)) : Htmlrw_check.Error_code.t), 179 179 None, None) 180 180 else None 181 181 ··· 185 185 if Str.string_match re msg 0 then 186 186 let role = Str.matched_group 1 msg in 187 187 let element = Str.matched_group 2 msg in 188 - Some ((`Aria (`Bad_role (`Elem element, `Role role)) : Html5_checker.Error_code.t), 188 + Some ((`Aria (`Bad_role (`Elem element, `Role role)) : Htmlrw_check.Error_code.t), 189 189 Some element, Some "role") 190 190 else None 191 191 ··· 196 196 let attr = Str.matched_group 1 msg in 197 197 let element = Str.matched_group 2 msg in 198 198 let condition = Str.matched_group 3 msg in 199 - Some ((`Aria (`Must_not_specify (`Attr attr, `Elem element, `Condition condition)) : Html5_checker.Error_code.t), 199 + Some ((`Aria (`Must_not_specify (`Attr attr, `Elem element, `Condition condition)) : Htmlrw_check.Error_code.t), 200 200 Some element, Some attr) 201 201 else None 202 202 ··· 207 207 let attr = Str.matched_group 1 msg in 208 208 let element = Str.matched_group 2 msg in 209 209 let condition = Str.matched_group 3 msg in 210 - Some ((`Aria (`Must_not_use (`Attr attr, `Elem element, `Condition condition)) : Html5_checker.Error_code.t), 210 + Some ((`Aria (`Must_not_use (`Attr attr, `Elem element, `Condition condition)) : Htmlrw_check.Error_code.t), 211 211 Some element, Some attr) 212 212 else None 213 213 ··· 225 225 String.trim (String.sub msg (colon_pos + 1) (String.length msg - colon_pos - 1)) 226 226 with Not_found -> "" 227 227 in 228 - Some ((`Attr (`Bad_value (`Elem element, `Attr attr, `Value value, `Reason reason)) : Html5_checker.Error_code.t), 228 + Some ((`Attr (`Bad_value (`Elem element, `Attr attr, `Value value, `Reason reason)) : Htmlrw_check.Error_code.t), 229 229 Some element, Some attr) 230 230 else None 231 231 ··· 234 234 let re = Str.regexp {|End tag "\([^"]+\)" implied, but there were open elements|} in 235 235 if Str.string_match re msg 0 then 236 236 let tag = Str.matched_group 1 msg in 237 - Some ((`Tag (`End_implied_open (`Tag tag)) : Html5_checker.Error_code.t), 237 + Some ((`Tag (`End_implied_open (`Tag tag)) : Htmlrw_check.Error_code.t), 238 238 Some tag, None) 239 239 else None 240 240 ··· 243 243 let re = Str.regexp {|No "\([^"]+\)" element in scope but a "\([^"]+\)" end tag seen|} in 244 244 if Str.string_match re msg 0 then 245 245 let tag = Str.matched_group 1 msg in 246 - Some ((`Tag (`Not_in_scope (`Tag tag)) : Html5_checker.Error_code.t), 246 + Some ((`Tag (`Not_in_scope (`Tag tag)) : Htmlrw_check.Error_code.t), 247 247 Some tag, None) 248 248 else None 249 249 ··· 252 252 let re = Str.regexp {|Start tag "\([^"]+\)" seen in "table"|} in 253 253 if Str.string_match re msg 0 then 254 254 let tag = Str.matched_group 1 msg in 255 - Some ((`Tag (`Start_in_table (`Tag tag)) : Html5_checker.Error_code.t), 255 + Some ((`Tag (`Start_in_table (`Tag tag)) : Htmlrw_check.Error_code.t), 256 256 Some tag, None) 257 257 else None 258 258 ··· 335 335 336 336 let matches ~strictness ~expected ~actual = 337 337 let expected_norm = normalize_quotes expected.message in 338 - let actual_norm = normalize_quotes actual.Html5_checker.Message.message in 338 + let actual_norm = normalize_quotes actual.Htmlrw_check.text in 339 339 340 340 (* Check severity match *) 341 341 let severity_matches = 342 - match (expected.severity, actual.Html5_checker.Message.severity) with 342 + match (expected.severity, actual.Htmlrw_check.severity) with 343 343 | (None, _) -> true 344 - | (Some `Error, Html5_checker.Message.Error) -> true 345 - | (Some `Warning, Html5_checker.Message.Warning) -> true 346 - | (Some `Info, Html5_checker.Message.Info) -> true 344 + | (Some `Error, Htmlrw_check.Error) -> true 345 + | (Some `Warning, Htmlrw_check.Warning) -> true 346 + | (Some `Info, Htmlrw_check.Info) -> true 347 347 | _ -> false 348 348 in 349 349 350 350 (* Check location match *) 351 351 let location_matches = 352 - match (expected.line, expected.column, actual.Html5_checker.Message.location) with 352 + match (expected.line, expected.column, actual.Htmlrw_check.location) with 353 353 | (None, None, _) -> true 354 354 | (Some el, Some ec, Some loc) -> loc.line = el && loc.column = ec 355 355 | (Some el, None, Some loc) -> loc.line = el ··· 358 358 359 359 (* Check error code match *) 360 360 let code_matches = 361 - match (expected.error_code, actual.Html5_checker.Message.error_code) with 361 + match (expected.error_code, actual.Htmlrw_check.error_code) with 362 362 | (None, _) -> true (* No expected code to match *) 363 363 | (Some ec, Some ac) -> error_codes_match ec ac 364 364 | (Some _, None) -> false (* Expected typed but got untyped *)
+2 -2
test/expected_message.mli
··· 6 6 (** Structured expected message *) 7 7 type t = { 8 8 message: string; (** Full message text *) 9 - error_code: Html5_checker.Error_code.t option; (** Parsed typed code *) 9 + error_code: Htmlrw_check.Error_code.t option; (** Parsed typed code *) 10 10 line: int option; (** Expected line number *) 11 11 column: int option; (** Expected column number *) 12 12 element: string option; (** Element context *) ··· 60 60 61 61 (** Check if actual message matches expected. 62 62 Returns the quality of match achieved. *) 63 - val matches : strictness:strictness -> expected:t -> actual:Html5_checker.Message.t -> match_quality 63 + val matches : strictness:strictness -> expected:t -> actual:Htmlrw_check.message -> match_quality 64 64 65 65 (** Check if match quality is acceptable given strictness *) 66 66 val is_acceptable : strictness:strictness -> match_quality -> bool
+47 -47
test/test_html5_checker.ml
··· 5 5 6 6 (** Helper to check if a message contains a substring *) 7 7 let message_contains msg substring = 8 - String.lowercase_ascii msg.Html5_checker.Message.message 8 + String.lowercase_ascii msg.Htmlrw_check.text 9 9 |> fun s -> String.length s >= String.length substring && 10 10 try 11 11 ignore (Str.search_forward (Str.regexp_case_fold (Str.quote substring)) s 0); ··· 21 21 <body><p>Hello world</p></body> 22 22 </html>|} in 23 23 let reader = reader_of_string html in 24 - let result = Html5_checker.check reader in 25 - let errors = Html5_checker.errors result in 24 + let result = Htmlrw_check.check reader in 25 + let errors = Htmlrw_check.errors result in 26 26 Printf.printf " Found %d error(s)\n" (List.length errors); 27 27 if List.length errors > 0 then begin 28 28 List.iter (fun msg -> 29 - Printf.printf " - %s\n" msg.Html5_checker.Message.message 29 + Printf.printf " - %s\n" msg.Htmlrw_check.text 30 30 ) errors; 31 31 end else 32 32 Printf.printf " OK: No errors as expected\n" ··· 36 36 Printf.printf "\nTest 2: Missing DOCTYPE\n"; 37 37 let html = "<html><body>Hello</body></html>" in 38 38 let reader = reader_of_string html in 39 - let result = Html5_checker.check reader in 40 - let errors = Html5_checker.errors result in 39 + let result = Htmlrw_check.check reader in 40 + let errors = Htmlrw_check.errors result in 41 41 Printf.printf " Found %d error(s)\n" (List.length errors); 42 42 if List.length errors = 0 then 43 43 Printf.printf " Warning: Expected parse errors for missing DOCTYPE\n" 44 44 else begin 45 45 List.iter (fun msg -> 46 - Printf.printf " - %s\n" msg.Html5_checker.Message.message 46 + Printf.printf " - %s\n" msg.Htmlrw_check.text 47 47 ) errors; 48 48 end 49 49 ··· 52 52 Printf.printf "\nTest 3: Obsolete <center> element\n"; 53 53 let html = "<!DOCTYPE html><html><body><center>Centered</center></body></html>" in 54 54 let reader = reader_of_string html in 55 - let result = Html5_checker.check reader in 56 - let all_msgs = Html5_checker.messages result in 55 + let result = Htmlrw_check.check reader in 56 + let all_msgs = Htmlrw_check.messages result in 57 57 Printf.printf " Found %d message(s)\n" (List.length all_msgs); 58 58 let obsolete_msgs = List.filter (fun m -> 59 59 message_contains m "obsolete" || message_contains m "center" ··· 61 61 if List.length obsolete_msgs > 0 then begin 62 62 Printf.printf " Found obsolete-related messages:\n"; 63 63 List.iter (fun msg -> 64 - Printf.printf " - %s\n" msg.Html5_checker.Message.message 64 + Printf.printf " - %s\n" msg.Htmlrw_check.text 65 65 ) obsolete_msgs; 66 66 end else 67 67 Printf.printf " Note: No obsolete element warnings found (checker may not be enabled)\n" ··· 74 74 <div id="foo">Second</div> 75 75 </body></html>|} in 76 76 let reader = reader_of_string html in 77 - let result = Html5_checker.check reader in 78 - let all_msgs = Html5_checker.messages result in 77 + let result = Htmlrw_check.check reader in 78 + let all_msgs = Htmlrw_check.messages result in 79 79 Printf.printf " Found %d message(s)\n" (List.length all_msgs); 80 80 let id_msgs = List.filter (fun m -> 81 81 message_contains m "duplicate" || message_contains m "id" ··· 83 83 if List.length id_msgs > 0 then begin 84 84 Printf.printf " Found ID-related messages:\n"; 85 85 List.iter (fun msg -> 86 - Printf.printf " - %s\n" msg.Html5_checker.Message.message 86 + Printf.printf " - %s\n" msg.Htmlrw_check.text 87 87 ) id_msgs; 88 88 end else 89 89 Printf.printf " Note: No duplicate ID errors found (checker may not be enabled)\n" ··· 96 96 <h3>Skipped h2</h3> 97 97 </body></html>|} in 98 98 let reader = reader_of_string html in 99 - let result = Html5_checker.check reader in 100 - let all_msgs = Html5_checker.messages result in 99 + let result = Htmlrw_check.check reader in 100 + let all_msgs = Htmlrw_check.messages result in 101 101 Printf.printf " Found %d message(s)\n" (List.length all_msgs); 102 102 let heading_msgs = List.filter (fun m -> 103 103 message_contains m "heading" || message_contains m "skip" ··· 105 105 if List.length heading_msgs > 0 then begin 106 106 Printf.printf " Found heading-related messages:\n"; 107 107 List.iter (fun msg -> 108 - Printf.printf " - %s\n" msg.Html5_checker.Message.message 108 + Printf.printf " - %s\n" msg.Htmlrw_check.text 109 109 ) heading_msgs; 110 110 end else 111 111 Printf.printf " Note: No heading structure warnings found (checker may not be enabled)\n" ··· 117 117 <img src="test.jpg"> 118 118 </body></html>|} in 119 119 let reader = reader_of_string html in 120 - let result = Html5_checker.check reader in 121 - let all_msgs = Html5_checker.messages result in 120 + let result = Htmlrw_check.check reader in 121 + let all_msgs = Htmlrw_check.messages result in 122 122 Printf.printf " Found %d message(s)\n" (List.length all_msgs); 123 123 let img_msgs = List.filter (fun m -> 124 124 message_contains m "alt" || (message_contains m "img" && message_contains m "attribute") ··· 126 126 if List.length img_msgs > 0 then begin 127 127 Printf.printf " Found img/alt-related messages:\n"; 128 128 List.iter (fun msg -> 129 - Printf.printf " - %s\n" msg.Html5_checker.Message.message 129 + Printf.printf " - %s\n" msg.Htmlrw_check.text 130 130 ) img_msgs; 131 131 end else 132 132 Printf.printf " Note: No missing alt attribute errors found (checker may not be enabled)\n" ··· 138 138 <a href="#">Link <a href="#">Nested</a></a> 139 139 </body></html>|} in 140 140 let reader = reader_of_string html in 141 - let result = Html5_checker.check reader in 142 - let all_msgs = Html5_checker.messages result in 141 + let result = Htmlrw_check.check reader in 142 + let all_msgs = Htmlrw_check.messages result in 143 143 Printf.printf " Found %d message(s)\n" (List.length all_msgs); 144 144 let nesting_msgs = List.filter (fun m -> 145 145 message_contains m "nesting" || message_contains m "nested" || message_contains m "ancestor" ··· 147 147 if List.length nesting_msgs > 0 then begin 148 148 Printf.printf " Found nesting-related messages:\n"; 149 149 List.iter (fun msg -> 150 - Printf.printf " - %s\n" msg.Html5_checker.Message.message 150 + Printf.printf " - %s\n" msg.Htmlrw_check.text 151 151 ) nesting_msgs; 152 152 end else 153 153 Printf.printf " Note: No nesting errors found (checker may not be enabled)\n" ··· 159 159 <form><form></form></form> 160 160 </body></html>|} in 161 161 let reader = reader_of_string html in 162 - let result = Html5_checker.check reader in 163 - let all_msgs = Html5_checker.messages result in 162 + let result = Htmlrw_check.check reader in 163 + let all_msgs = Htmlrw_check.messages result in 164 164 Printf.printf " Found %d message(s)\n" (List.length all_msgs); 165 165 let form_msgs = List.filter (fun m -> 166 166 message_contains m "form" ··· 168 168 if List.length form_msgs > 0 then begin 169 169 Printf.printf " Found form-related messages:\n"; 170 170 List.iter (fun msg -> 171 - Printf.printf " - %s\n" msg.Html5_checker.Message.message 171 + Printf.printf " - %s\n" msg.Htmlrw_check.text 172 172 ) form_msgs; 173 173 end else 174 174 Printf.printf " Note: No form nesting errors found (checker may not be enabled)\n" ··· 178 178 Printf.printf "\nTest 9: Output format testing\n"; 179 179 let html = {|<!DOCTYPE html><html><body><p>Test</p></body></html>|} in 180 180 let reader = reader_of_string html in 181 - let result = Html5_checker.check reader in 181 + let result = Htmlrw_check.check reader in 182 182 183 183 Printf.printf " Testing text format:\n"; 184 - let text_output = Html5_checker.format_text result in 184 + let text_output = Htmlrw_check.to_text result in 185 185 Printf.printf " Length: %d chars\n" (String.length text_output); 186 186 187 187 Printf.printf " Testing JSON format:\n"; 188 - let json_output = Html5_checker.format_json result in 188 + let json_output = Htmlrw_check.to_json result in 189 189 Printf.printf " Length: %d chars\n" (String.length json_output); 190 190 191 191 Printf.printf " Testing GNU format:\n"; 192 - let gnu_output = Html5_checker.format_gnu result in 192 + let gnu_output = Htmlrw_check.to_gnu result in 193 193 Printf.printf " Length: %d chars\n" (String.length gnu_output) 194 194 195 195 (** Test has_errors function *) ··· 198 198 199 199 (* Valid document should have no errors *) 200 200 let valid_html = "<!DOCTYPE html><html><body><p>Valid</p></body></html>" in 201 - let result1 = Html5_checker.check (reader_of_string valid_html) in 202 - Printf.printf " Valid document has_errors: %b\n" (Html5_checker.has_errors result1); 201 + let result1 = Htmlrw_check.check (reader_of_string valid_html) in 202 + Printf.printf " Valid document has_errors: %b\n" (Htmlrw_check.has_errors result1); 203 203 204 204 (* Document with likely parse errors *) 205 205 let invalid_html = "<html><body><p>Unclosed" in 206 - let result2 = Html5_checker.check (reader_of_string invalid_html) in 207 - Printf.printf " Invalid document has_errors: %b\n" (Html5_checker.has_errors result2) 206 + let result2 = Htmlrw_check.check (reader_of_string invalid_html) in 207 + Printf.printf " Invalid document has_errors: %b\n" (Htmlrw_check.has_errors result2) 208 208 209 209 (** Test check_dom with pre-parsed document *) 210 210 let test_check_dom () = ··· 212 212 let html = "<!DOCTYPE html><html><body><p>Test</p></body></html>" in 213 213 let reader = reader_of_string html in 214 214 let parsed = Html5rw.parse reader in 215 - let result = Html5_checker.check_dom parsed in 216 - let all_msgs = Html5_checker.messages result in 215 + let result = Htmlrw_check.check_parsed parsed in 216 + let all_msgs = Htmlrw_check.messages result in 217 217 Printf.printf " check_dom found %d message(s)\n" (List.length all_msgs); 218 218 Printf.printf " OK: check_dom completed successfully\n" 219 219 ··· 222 222 Printf.printf "\nTest 12: system_id parameter\n"; 223 223 let html = "<!DOCTYPE html><html><body><p>Test</p></body></html>" in 224 224 let reader = reader_of_string html in 225 - let result = Html5_checker.check ~system_id:"test.html" reader in 226 - match Html5_checker.system_id result with 225 + let result = Htmlrw_check.check ~system_id:"test.html" reader in 226 + match Htmlrw_check.system_id result with 227 227 | Some id -> Printf.printf " system_id: %s\n" id 228 228 | None -> Printf.printf " Warning: system_id not set\n" 229 229 ··· 232 232 Printf.printf "\nTest 13: collect_parse_errors flag\n"; 233 233 let html = "<!DOCTYPE html><html><body><p>Test</p></body></html>" in 234 234 235 - let result_with = Html5_checker.check ~collect_parse_errors:true (reader_of_string html) in 236 - let msgs_with = Html5_checker.messages result_with in 235 + let result_with = Htmlrw_check.check ~collect_parse_errors:true (reader_of_string html) in 236 + let msgs_with = Htmlrw_check.messages result_with in 237 237 Printf.printf " With parse errors: %d message(s)\n" (List.length msgs_with); 238 238 239 - let result_without = Html5_checker.check ~collect_parse_errors:false (reader_of_string html) in 240 - let msgs_without = Html5_checker.messages result_without in 239 + let result_without = Htmlrw_check.check ~collect_parse_errors:false (reader_of_string html) in 240 + let msgs_without = Htmlrw_check.messages result_without in 241 241 Printf.printf " Without parse errors: %d message(s)\n" (List.length msgs_without) 242 242 243 243 (** Test document accessor *) ··· 245 245 Printf.printf "\nTest 14: document accessor\n"; 246 246 let html = "<!DOCTYPE html><html><body><p>Test</p></body></html>" in 247 247 let reader = reader_of_string html in 248 - let result = Html5_checker.check reader in 249 - let _doc = Html5_checker.document result in 248 + let result = Htmlrw_check.check reader in 249 + let _doc = Htmlrw_check.document result in 250 250 Printf.printf " OK: document accessor works\n" 251 251 252 252 (** Test message severity filtering *) ··· 254 254 Printf.printf "\nTest 15: Message severity filtering\n"; 255 255 let html = "<!DOCTYPE html><html><body><p>Test</p></body></html>" in 256 256 let reader = reader_of_string html in 257 - let result = Html5_checker.check reader in 257 + let result = Htmlrw_check.check reader in 258 258 259 - let all_msgs = Html5_checker.messages result in 260 - let errors = Html5_checker.errors result in 261 - let warnings = Html5_checker.warnings result in 259 + let all_msgs = Htmlrw_check.messages result in 260 + let errors = Htmlrw_check.errors result in 261 + let warnings = Htmlrw_check.warnings result in 262 262 263 263 Printf.printf " Total messages: %d\n" (List.length all_msgs); 264 264 Printf.printf " Errors: %d\n" (List.length errors);
+34 -92
test/test_nesting_checker.ml
··· 1 - (** Simple test for nesting_checker functionality *) 2 - 3 - let () = 4 - (* Create a message collector *) 5 - let collector = Html5_checker.Message_collector.create () in 1 + (** Test for nesting checker functionality via public API *) 6 2 7 - (* Get the nesting checker *) 8 - let module C = (val Html5_checker__Nesting_checker.checker : Html5_checker__Checker.S) in 9 - let state = C.create () in 3 + let check_html html = 4 + let reader = Bytesrw.Bytes.Reader.of_string html in 5 + Htmlrw_check.check reader 10 6 7 + let () = 11 8 (* Test 1: <a> cannot contain another <a> *) 12 9 Printf.printf "Test 1: Checking <a href> inside <a href>\n"; 13 - C.start_element state ~name:"a" ~namespace:None ~attrs:[("href", "#")] collector; 14 - C.start_element state ~name:"a" ~namespace:None ~attrs:[("href", "#")] collector; 15 - 16 - let errors1 = Html5_checker.Message_collector.errors collector in 10 + let result1 = check_html "<a href='#'><a href='#'>nested</a></a>" in 11 + let errors1 = Htmlrw_check.errors result1 in 17 12 Printf.printf " Found %d error(s)\n" (List.length errors1); 18 13 List.iter (fun msg -> 19 - Printf.printf " - %s\n" msg.Html5_checker.Message.message 14 + Printf.printf " - %s\n" msg.Htmlrw_check.text 20 15 ) errors1; 21 16 22 - C.end_element state ~name:"a" ~namespace:None collector; 23 - C.end_element state ~name:"a" ~namespace:None collector; 24 - Html5_checker.Message_collector.clear collector; 25 - 26 17 (* Test 2: <button> inside <a> *) 27 18 Printf.printf "\nTest 2: Checking <button> inside <a href>\n"; 28 - C.start_element state ~name:"a" ~namespace:None ~attrs:[("href", "#")] collector; 29 - C.start_element state ~name:"button" ~namespace:None ~attrs:[] collector; 30 - 31 - let errors2 = Html5_checker.Message_collector.errors collector in 19 + let result2 = check_html "<a href='#'><button>click</button></a>" in 20 + let errors2 = Htmlrw_check.errors result2 in 32 21 Printf.printf " Found %d error(s)\n" (List.length errors2); 33 22 List.iter (fun msg -> 34 - Printf.printf " - %s\n" msg.Html5_checker.Message.message 23 + Printf.printf " - %s\n" msg.Htmlrw_check.text 35 24 ) errors2; 36 25 37 - C.end_element state ~name:"button" ~namespace:None collector; 38 - C.end_element state ~name:"a" ~namespace:None collector; 39 - Html5_checker.Message_collector.clear collector; 40 - 41 26 (* Test 3: form inside form *) 42 27 Printf.printf "\nTest 3: Checking <form> inside <form>\n"; 43 - C.start_element state ~name:"form" ~namespace:None ~attrs:[] collector; 44 - C.start_element state ~name:"form" ~namespace:None ~attrs:[] collector; 45 - 46 - let errors3 = Html5_checker.Message_collector.errors collector in 28 + let result3 = check_html "<form><form>nested</form></form>" in 29 + let errors3 = Htmlrw_check.errors result3 in 47 30 Printf.printf " Found %d error(s)\n" (List.length errors3); 48 31 List.iter (fun msg -> 49 - Printf.printf " - %s\n" msg.Html5_checker.Message.message 32 + Printf.printf " - %s\n" msg.Htmlrw_check.text 50 33 ) errors3; 51 34 52 - C.end_element state ~name:"form" ~namespace:None collector; 53 - C.end_element state ~name:"form" ~namespace:None collector; 54 - Html5_checker.Message_collector.clear collector; 55 - 56 - (* Test 4: header inside footer *) 35 + (* Test 4: header inside footer (should be allowed) *) 57 36 Printf.printf "\nTest 4: Checking <header> inside <footer>\n"; 58 - C.start_element state ~name:"footer" ~namespace:None ~attrs:[] collector; 59 - C.start_element state ~name:"header" ~namespace:None ~attrs:[] collector; 60 - 61 - let errors4 = Html5_checker.Message_collector.errors collector in 37 + let result4 = check_html "<footer><header>test</header></footer>" in 38 + let errors4 = Htmlrw_check.errors result4 in 62 39 Printf.printf " Found %d error(s)\n" (List.length errors4); 63 - List.iter (fun msg -> 64 - Printf.printf " - %s\n" msg.Html5_checker.Message.message 65 - ) errors4; 66 - 67 - C.end_element state ~name:"header" ~namespace:None collector; 68 - C.end_element state ~name:"footer" ~namespace:None collector; 69 - Html5_checker.Message_collector.clear collector; 40 + if List.length errors4 > 0 then 41 + List.iter (fun msg -> 42 + Printf.printf " - %s\n" msg.Htmlrw_check.text 43 + ) errors4 44 + else 45 + Printf.printf " OK: No errors (header inside footer is valid)\n"; 70 46 71 - (* Test 5: input (not hidden) inside button *) 47 + (* Test 5: input inside button *) 72 48 Printf.printf "\nTest 5: Checking <input type=text> inside <button>\n"; 73 - C.start_element state ~name:"button" ~namespace:None ~attrs:[] collector; 74 - C.start_element state ~name:"input" ~namespace:None ~attrs:[("type", "text")] collector; 75 - 76 - let errors5 = Html5_checker.Message_collector.errors collector in 49 + let result5 = check_html "<button><input type='text'></button>" in 50 + let errors5 = Htmlrw_check.errors result5 in 77 51 Printf.printf " Found %d error(s)\n" (List.length errors5); 78 52 List.iter (fun msg -> 79 - Printf.printf " - %s\n" msg.Html5_checker.Message.message 53 + Printf.printf " - %s\n" msg.Htmlrw_check.text 80 54 ) errors5; 81 55 82 - C.end_element state ~name:"input" ~namespace:None collector; 83 - C.end_element state ~name:"button" ~namespace:None collector; 84 - Html5_checker.Message_collector.clear collector; 85 - 86 56 (* Test 6: valid nesting - should not error *) 87 57 Printf.printf "\nTest 6: Checking valid nesting: <div> inside <div>\n"; 88 - C.start_element state ~name:"div" ~namespace:None ~attrs:[] collector; 89 - C.start_element state ~name:"div" ~namespace:None ~attrs:[] collector; 90 - 91 - let errors6 = Html5_checker.Message_collector.errors collector in 58 + let result6 = check_html "<div><div>nested</div></div>" in 59 + let errors6 = Htmlrw_check.errors result6 in 92 60 Printf.printf " Found %d error(s)\n" (List.length errors6); 93 61 if List.length errors6 = 0 then 94 - Printf.printf " OK: No errors as expected\n"; 95 - 96 - C.end_element state ~name:"div" ~namespace:None collector; 97 - C.end_element state ~name:"div" ~namespace:None collector; 98 - Html5_checker.Message_collector.clear collector; 99 - 100 - (* Test 7: area without map ancestor *) 101 - Printf.printf "\nTest 7: Checking <area> without <map> ancestor\n"; 102 - C.start_element state ~name:"area" ~namespace:None ~attrs:[] collector; 103 - 104 - let errors7 = Html5_checker.Message_collector.errors collector in 105 - Printf.printf " Found %d error(s)\n" (List.length errors7); 106 - List.iter (fun msg -> 107 - Printf.printf " - %s\n" msg.Html5_checker.Message.message 108 - ) errors7; 109 - 110 - C.end_element state ~name:"area" ~namespace:None collector; 111 - Html5_checker.Message_collector.clear collector; 112 - 113 - (* Test 8: area with map ancestor (valid) *) 114 - Printf.printf "\nTest 8: Checking <area> with <map> ancestor (valid)\n"; 115 - C.start_element state ~name:"map" ~namespace:None ~attrs:[] collector; 116 - C.start_element state ~name:"area" ~namespace:None ~attrs:[] collector; 117 - 118 - let errors8 = Html5_checker.Message_collector.errors collector in 119 - Printf.printf " Found %d error(s)\n" (List.length errors8); 120 - if List.length errors8 = 0 then 121 - Printf.printf " OK: No errors as expected\n"; 122 - 123 - C.end_element state ~name:"area" ~namespace:None collector; 124 - C.end_element state ~name:"map" ~namespace:None collector; 62 + Printf.printf " OK: No errors as expected\n" 63 + else 64 + List.iter (fun msg -> 65 + Printf.printf " - %s\n" msg.Htmlrw_check.text 66 + ) errors6; 125 67 126 68 Printf.printf "\nAll tests completed!\n"
+3 -3
test/test_nfc_debug.ml
··· 3 3 In_channel.input_all ic 4 4 ) in 5 5 let reader = Bytesrw.Bytes.Reader.of_string content in 6 - let result = Html5_checker.check ~system_id:"test.html" reader in 7 - let warnings = Html5_checker.warnings result in 6 + let result = Htmlrw_check.check ~system_id:"test.html" reader in 7 + let warnings = Htmlrw_check.warnings result in 8 8 Printf.printf "Total warnings: %d\n" (List.length warnings); 9 9 List.iter (fun msg -> 10 - Printf.printf "WARNING: %s\n" (Html5_checker.Message.message msg) 10 + Printf.printf "WARNING: %s\n" msg.Htmlrw_check.text 11 11 ) warnings
+6 -6
test/test_roundtrip.ml
··· 37 37 38 38 (* Count errors in validation result *) 39 39 let count_errors messages = 40 - List.length (List.filter (fun (m : Html5_checker.Message.t) -> 41 - m.severity = Html5_checker.Message.Error 40 + List.length (List.filter (fun (m : Htmlrw_check.message) -> 41 + m.severity = Htmlrw_check.Error 42 42 ) messages) 43 43 44 44 (* Serialize a document to HTML string *) ··· 65 65 let original_doc = Html5rw.root original_result in 66 66 67 67 (* Validate original *) 68 - let checker_result = Html5_checker.check_dom ~system_id:path original_result in 69 - let original_messages = Html5_checker.messages checker_result in 68 + let checker_result = Htmlrw_check.check_parsed ~system_id:path original_result in 69 + let original_messages = Htmlrw_check.messages checker_result in 70 70 let original_errors = count_errors original_messages in 71 71 let original_valid = original_errors = 0 in 72 72 ··· 77 77 let roundtrip_result = Html5rw.parse_bytes (Bytes.of_string serialized) in 78 78 79 79 (* Validate roundtripped document *) 80 - let roundtrip_checker = Html5_checker.check_dom ~system_id:path roundtrip_result in 81 - let roundtrip_messages = Html5_checker.messages roundtrip_checker in 80 + let roundtrip_checker = Htmlrw_check.check_parsed ~system_id:path roundtrip_result in 81 + let roundtrip_messages = Htmlrw_check.messages roundtrip_checker in 82 82 let roundtrip_errors = count_errors roundtrip_messages in 83 83 let roundtrip_valid = roundtrip_errors = 0 in 84 84
+7 -7
test/test_validator.ml
··· 105 105 close_in ic; 106 106 107 107 let reader = Bytesrw.Bytes.Reader.of_string content in 108 - let result = Html5_checker.check ~collect_parse_errors:true ~system_id:test.relative_path reader in 108 + let result = Htmlrw_check.check ~collect_parse_errors:true ~system_id:test.relative_path reader in 109 109 110 110 (* Keep full message objects for proper matching *) 111 - let error_msgs = Html5_checker.errors result in 112 - let warning_msgs = Html5_checker.warnings result in 113 - let info_msgs = Html5_checker.infos result in 111 + let error_msgs = Htmlrw_check.errors result in 112 + let warning_msgs = Htmlrw_check.warnings result in 113 + let info_msgs = Htmlrw_check.infos result in 114 114 115 115 (* Extract text for reporting *) 116 - let errors = List.map (fun m -> m.Html5_checker.Message.message) error_msgs in 117 - let warnings = List.map (fun m -> m.Html5_checker.Message.message) warning_msgs in 118 - let infos = List.map (fun m -> m.Html5_checker.Message.message) info_msgs in 116 + let errors = List.map (fun m -> m.Htmlrw_check.text) error_msgs in 117 + let warnings = List.map (fun m -> m.Htmlrw_check.text) warning_msgs in 118 + let infos = List.map (fun m -> m.Htmlrw_check.text) info_msgs in 119 119 let expected_msg = Validator_messages.get messages test.relative_path in 120 120 121 121 let (passed, match_quality, details) = match test.expected with