OCaml HTML5 parser/serialiser based on Python's JustHTML
1(** Tests for the html5_checker library *) 2 3(** Helper to create a reader from a string *) 4let reader_of_string s = Bytesrw.Bytes.Reader.of_string s 5 6(** Helper to check if a message contains a substring *) 7let message_contains msg substring = 8 String.lowercase_ascii msg.Html5_checker.Message.message 9 |> fun s -> String.length s >= String.length substring && 10 try 11 ignore (Str.search_forward (Str.regexp_case_fold (Str.quote substring)) s 0); 12 true 13 with Not_found -> false 14 15(** Test that valid HTML5 produces no errors *) 16let test_valid_html5 () = 17 Printf.printf "Test 1: Valid HTML5 document\n"; 18 let html = {|<!DOCTYPE html> 19<html lang="en"> 20<head><title>Test</title></head> 21<body><p>Hello world</p></body> 22</html>|} in 23 let reader = reader_of_string html in 24 let result = Html5_checker.check reader in 25 let errors = Html5_checker.errors result in 26 Printf.printf " Found %d error(s)\n" (List.length errors); 27 if List.length errors > 0 then begin 28 List.iter (fun msg -> 29 Printf.printf " - %s\n" msg.Html5_checker.Message.message 30 ) errors; 31 end else 32 Printf.printf " OK: No errors as expected\n" 33 34(** Test that missing DOCTYPE is detected *) 35let test_missing_doctype () = 36 Printf.printf "\nTest 2: Missing DOCTYPE\n"; 37 let html = "<html><body>Hello</body></html>" in 38 let reader = reader_of_string html in 39 let result = Html5_checker.check reader in 40 let errors = Html5_checker.errors result in 41 Printf.printf " Found %d error(s)\n" (List.length errors); 42 if List.length errors = 0 then 43 Printf.printf " Warning: Expected parse errors for missing DOCTYPE\n" 44 else begin 45 List.iter (fun msg -> 46 Printf.printf " - %s\n" msg.Html5_checker.Message.message 47 ) errors; 48 end 49 50(** Test that obsolete elements are detected *) 51let test_obsolete_element () = 52 Printf.printf "\nTest 3: Obsolete <center> element\n"; 53 let html = "<!DOCTYPE html><html><body><center>Centered</center></body></html>" in 54 let reader = reader_of_string html in 55 let result = Html5_checker.check reader in 56 let all_msgs = Html5_checker.messages result in 57 Printf.printf " Found %d message(s)\n" (List.length all_msgs); 58 let obsolete_msgs = List.filter (fun m -> 59 message_contains m "obsolete" || message_contains m "center" 60 ) all_msgs in 61 if List.length obsolete_msgs > 0 then begin 62 Printf.printf " Found obsolete-related messages:\n"; 63 List.iter (fun msg -> 64 Printf.printf " - %s\n" msg.Html5_checker.Message.message 65 ) obsolete_msgs; 66 end else 67 Printf.printf " Note: No obsolete element warnings found (checker may not be enabled)\n" 68 69(** Test duplicate IDs *) 70let test_duplicate_id () = 71 Printf.printf "\nTest 4: Duplicate ID attributes\n"; 72 let html = {|<!DOCTYPE html><html><body> 73 <div id="foo">First</div> 74 <div id="foo">Second</div> 75 </body></html>|} in 76 let reader = reader_of_string html in 77 let result = Html5_checker.check reader in 78 let all_msgs = Html5_checker.messages result in 79 Printf.printf " Found %d message(s)\n" (List.length all_msgs); 80 let id_msgs = List.filter (fun m -> 81 message_contains m "duplicate" || message_contains m "id" 82 ) all_msgs in 83 if List.length id_msgs > 0 then begin 84 Printf.printf " Found ID-related messages:\n"; 85 List.iter (fun msg -> 86 Printf.printf " - %s\n" msg.Html5_checker.Message.message 87 ) id_msgs; 88 end else 89 Printf.printf " Note: No duplicate ID errors found (checker may not be enabled)\n" 90 91(** Test heading structure *) 92let test_heading_skip () = 93 Printf.printf "\nTest 5: Skipped heading level\n"; 94 let html = {|<!DOCTYPE html><html><body> 95 <h1>Title</h1> 96 <h3>Skipped h2</h3> 97 </body></html>|} in 98 let reader = reader_of_string html in 99 let result = Html5_checker.check reader in 100 let all_msgs = Html5_checker.messages result in 101 Printf.printf " Found %d message(s)\n" (List.length all_msgs); 102 let heading_msgs = List.filter (fun m -> 103 message_contains m "heading" || message_contains m "skip" 104 ) all_msgs in 105 if List.length heading_msgs > 0 then begin 106 Printf.printf " Found heading-related messages:\n"; 107 List.iter (fun msg -> 108 Printf.printf " - %s\n" msg.Html5_checker.Message.message 109 ) heading_msgs; 110 end else 111 Printf.printf " Note: No heading structure warnings found (checker may not be enabled)\n" 112 113(** Test img without alt *) 114let test_img_without_alt () = 115 Printf.printf "\nTest 6: Image without alt attribute\n"; 116 let html = {|<!DOCTYPE html><html><body> 117 <img src="test.jpg"> 118 </body></html>|} in 119 let reader = reader_of_string html in 120 let result = Html5_checker.check reader in 121 let all_msgs = Html5_checker.messages result in 122 Printf.printf " Found %d message(s)\n" (List.length all_msgs); 123 let img_msgs = List.filter (fun m -> 124 message_contains m "alt" || (message_contains m "img" && message_contains m "attribute") 125 ) all_msgs in 126 if List.length img_msgs > 0 then begin 127 Printf.printf " Found img/alt-related messages:\n"; 128 List.iter (fun msg -> 129 Printf.printf " - %s\n" msg.Html5_checker.Message.message 130 ) img_msgs; 131 end else 132 Printf.printf " Note: No missing alt attribute errors found (checker may not be enabled)\n" 133 134(** Test invalid nesting *) 135let test_invalid_nesting () = 136 Printf.printf "\nTest 7: Invalid nesting - <a> inside <a>\n"; 137 let html = {|<!DOCTYPE html><html><body> 138 <a href="#">Link <a href="#">Nested</a></a> 139 </body></html>|} in 140 let reader = reader_of_string html in 141 let result = Html5_checker.check reader in 142 let all_msgs = Html5_checker.messages result in 143 Printf.printf " Found %d message(s)\n" (List.length all_msgs); 144 let nesting_msgs = List.filter (fun m -> 145 message_contains m "nesting" || message_contains m "nested" || message_contains m "ancestor" 146 ) all_msgs in 147 if List.length nesting_msgs > 0 then begin 148 Printf.printf " Found nesting-related messages:\n"; 149 List.iter (fun msg -> 150 Printf.printf " - %s\n" msg.Html5_checker.Message.message 151 ) nesting_msgs; 152 end else 153 Printf.printf " Note: No nesting errors found (checker may not be enabled)\n" 154 155(** Test form inside form *) 156let test_form_nesting () = 157 Printf.printf "\nTest 8: Invalid nesting - <form> inside <form>\n"; 158 let html = {|<!DOCTYPE html><html><body> 159 <form><form></form></form> 160 </body></html>|} in 161 let reader = reader_of_string html in 162 let result = Html5_checker.check reader in 163 let all_msgs = Html5_checker.messages result in 164 Printf.printf " Found %d message(s)\n" (List.length all_msgs); 165 let form_msgs = List.filter (fun m -> 166 message_contains m "form" 167 ) all_msgs in 168 if List.length form_msgs > 0 then begin 169 Printf.printf " Found form-related messages:\n"; 170 List.iter (fun msg -> 171 Printf.printf " - %s\n" msg.Html5_checker.Message.message 172 ) form_msgs; 173 end else 174 Printf.printf " Note: No form nesting errors found (checker may not be enabled)\n" 175 176(** Test output formatting *) 177let test_output_formats () = 178 Printf.printf "\nTest 9: Output format testing\n"; 179 let html = {|<!DOCTYPE html><html><body><p>Test</p></body></html>|} in 180 let reader = reader_of_string html in 181 let result = Html5_checker.check reader in 182 183 Printf.printf " Testing text format:\n"; 184 let text_output = Html5_checker.format_text result in 185 Printf.printf " Length: %d chars\n" (String.length text_output); 186 187 Printf.printf " Testing JSON format:\n"; 188 let json_output = Html5_checker.format_json result in 189 Printf.printf " Length: %d chars\n" (String.length json_output); 190 191 Printf.printf " Testing GNU format:\n"; 192 let gnu_output = Html5_checker.format_gnu result in 193 Printf.printf " Length: %d chars\n" (String.length gnu_output) 194 195(** Test has_errors function *) 196let test_has_errors () = 197 Printf.printf "\nTest 10: has_errors function\n"; 198 199 (* Valid document should have no errors *) 200 let valid_html = "<!DOCTYPE html><html><body><p>Valid</p></body></html>" in 201 let result1 = Html5_checker.check (reader_of_string valid_html) in 202 Printf.printf " Valid document has_errors: %b\n" (Html5_checker.has_errors result1); 203 204 (* Document with likely parse errors *) 205 let invalid_html = "<html><body><p>Unclosed" in 206 let result2 = Html5_checker.check (reader_of_string invalid_html) in 207 Printf.printf " Invalid document has_errors: %b\n" (Html5_checker.has_errors result2) 208 209(** Test check_dom with pre-parsed document *) 210let test_check_dom () = 211 Printf.printf "\nTest 11: check_dom with pre-parsed document\n"; 212 let html = "<!DOCTYPE html><html><body><p>Test</p></body></html>" in 213 let reader = reader_of_string html in 214 let parsed = Html5rw.parse reader in 215 let result = Html5_checker.check_dom parsed in 216 let all_msgs = Html5_checker.messages result in 217 Printf.printf " check_dom found %d message(s)\n" (List.length all_msgs); 218 Printf.printf " OK: check_dom completed successfully\n" 219 220(** Test system_id parameter *) 221let test_system_id () = 222 Printf.printf "\nTest 12: system_id parameter\n"; 223 let html = "<!DOCTYPE html><html><body><p>Test</p></body></html>" in 224 let reader = reader_of_string html in 225 let result = Html5_checker.check ~system_id:"test.html" reader in 226 match Html5_checker.system_id result with 227 | Some id -> Printf.printf " system_id: %s\n" id 228 | None -> Printf.printf " Warning: system_id not set\n" 229 230(** Test collect_parse_errors flag *) 231let test_collect_parse_errors_flag () = 232 Printf.printf "\nTest 13: collect_parse_errors flag\n"; 233 let html = "<!DOCTYPE html><html><body><p>Test</p></body></html>" in 234 235 let result_with = Html5_checker.check ~collect_parse_errors:true (reader_of_string html) in 236 let msgs_with = Html5_checker.messages result_with in 237 Printf.printf " With parse errors: %d message(s)\n" (List.length msgs_with); 238 239 let result_without = Html5_checker.check ~collect_parse_errors:false (reader_of_string html) in 240 let msgs_without = Html5_checker.messages result_without in 241 Printf.printf " Without parse errors: %d message(s)\n" (List.length msgs_without) 242 243(** Test document accessor *) 244let test_document_accessor () = 245 Printf.printf "\nTest 14: document accessor\n"; 246 let html = "<!DOCTYPE html><html><body><p>Test</p></body></html>" in 247 let reader = reader_of_string html in 248 let result = Html5_checker.check reader in 249 let _doc = Html5_checker.document result in 250 Printf.printf " OK: document accessor works\n" 251 252(** Test message severity filtering *) 253let test_severity_filtering () = 254 Printf.printf "\nTest 15: Message severity filtering\n"; 255 let html = "<!DOCTYPE html><html><body><p>Test</p></body></html>" in 256 let reader = reader_of_string html in 257 let result = Html5_checker.check reader in 258 259 let all_msgs = Html5_checker.messages result in 260 let errors = Html5_checker.errors result in 261 let warnings = Html5_checker.warnings result in 262 263 Printf.printf " Total messages: %d\n" (List.length all_msgs); 264 Printf.printf " Errors: %d\n" (List.length errors); 265 Printf.printf " Warnings: %d\n" (List.length warnings); 266 267 (* Verify that errors + warnings <= all messages *) 268 if List.length errors + List.length warnings <= List.length all_msgs then 269 Printf.printf " OK: Message counts are consistent\n" 270 else 271 Printf.printf " Warning: Message counts inconsistent\n" 272 273(** Run all tests *) 274let () = 275 Printf.printf "Running html5_checker tests...\n"; 276 Printf.printf "========================================\n\n"; 277 278 test_valid_html5 (); 279 test_missing_doctype (); 280 test_obsolete_element (); 281 test_duplicate_id (); 282 test_heading_skip (); 283 test_img_without_alt (); 284 test_invalid_nesting (); 285 test_form_nesting (); 286 test_output_formats (); 287 test_has_errors (); 288 test_check_dom (); 289 test_system_id (); 290 test_collect_parse_errors_flag (); 291 test_document_accessor (); 292 test_severity_filtering (); 293 294 Printf.printf "\n========================================\n"; 295 Printf.printf "All tests completed!\n"; 296 Printf.printf "\nNote: Some checkers may not be enabled yet.\n"; 297 Printf.printf "Tests marked with 'Note:' indicate features that may be\n"; 298 Printf.printf "implemented in future versions.\n"