OCaml HTML5 parser/serialiser based on Python's JustHTML
1(** Tests for the html5_checker library *)
2
3(** Helper to create a reader from a string *)
4let reader_of_string s = Bytesrw.Bytes.Reader.of_string s
5
6(** Helper to check if a message contains a substring *)
7let message_contains msg substring =
8 String.lowercase_ascii msg.Html5_checker.Message.message
9 |> fun s -> String.length s >= String.length substring &&
10 try
11 ignore (Str.search_forward (Str.regexp_case_fold (Str.quote substring)) s 0);
12 true
13 with Not_found -> false
14
15(** Test that valid HTML5 produces no errors *)
16let test_valid_html5 () =
17 Printf.printf "Test 1: Valid HTML5 document\n";
18 let html = {|<!DOCTYPE html>
19<html lang="en">
20<head><title>Test</title></head>
21<body><p>Hello world</p></body>
22</html>|} in
23 let reader = reader_of_string html in
24 let result = Html5_checker.check reader in
25 let errors = Html5_checker.errors result in
26 Printf.printf " Found %d error(s)\n" (List.length errors);
27 if List.length errors > 0 then begin
28 List.iter (fun msg ->
29 Printf.printf " - %s\n" msg.Html5_checker.Message.message
30 ) errors;
31 end else
32 Printf.printf " OK: No errors as expected\n"
33
34(** Test that missing DOCTYPE is detected *)
35let test_missing_doctype () =
36 Printf.printf "\nTest 2: Missing DOCTYPE\n";
37 let html = "<html><body>Hello</body></html>" in
38 let reader = reader_of_string html in
39 let result = Html5_checker.check reader in
40 let errors = Html5_checker.errors result in
41 Printf.printf " Found %d error(s)\n" (List.length errors);
42 if List.length errors = 0 then
43 Printf.printf " Warning: Expected parse errors for missing DOCTYPE\n"
44 else begin
45 List.iter (fun msg ->
46 Printf.printf " - %s\n" msg.Html5_checker.Message.message
47 ) errors;
48 end
49
50(** Test that obsolete elements are detected *)
51let test_obsolete_element () =
52 Printf.printf "\nTest 3: Obsolete <center> element\n";
53 let html = "<!DOCTYPE html><html><body><center>Centered</center></body></html>" in
54 let reader = reader_of_string html in
55 let result = Html5_checker.check reader in
56 let all_msgs = Html5_checker.messages result in
57 Printf.printf " Found %d message(s)\n" (List.length all_msgs);
58 let obsolete_msgs = List.filter (fun m ->
59 message_contains m "obsolete" || message_contains m "center"
60 ) all_msgs in
61 if List.length obsolete_msgs > 0 then begin
62 Printf.printf " Found obsolete-related messages:\n";
63 List.iter (fun msg ->
64 Printf.printf " - %s\n" msg.Html5_checker.Message.message
65 ) obsolete_msgs;
66 end else
67 Printf.printf " Note: No obsolete element warnings found (checker may not be enabled)\n"
68
69(** Test duplicate IDs *)
70let test_duplicate_id () =
71 Printf.printf "\nTest 4: Duplicate ID attributes\n";
72 let html = {|<!DOCTYPE html><html><body>
73 <div id="foo">First</div>
74 <div id="foo">Second</div>
75 </body></html>|} in
76 let reader = reader_of_string html in
77 let result = Html5_checker.check reader in
78 let all_msgs = Html5_checker.messages result in
79 Printf.printf " Found %d message(s)\n" (List.length all_msgs);
80 let id_msgs = List.filter (fun m ->
81 message_contains m "duplicate" || message_contains m "id"
82 ) all_msgs in
83 if List.length id_msgs > 0 then begin
84 Printf.printf " Found ID-related messages:\n";
85 List.iter (fun msg ->
86 Printf.printf " - %s\n" msg.Html5_checker.Message.message
87 ) id_msgs;
88 end else
89 Printf.printf " Note: No duplicate ID errors found (checker may not be enabled)\n"
90
91(** Test heading structure *)
92let test_heading_skip () =
93 Printf.printf "\nTest 5: Skipped heading level\n";
94 let html = {|<!DOCTYPE html><html><body>
95 <h1>Title</h1>
96 <h3>Skipped h2</h3>
97 </body></html>|} in
98 let reader = reader_of_string html in
99 let result = Html5_checker.check reader in
100 let all_msgs = Html5_checker.messages result in
101 Printf.printf " Found %d message(s)\n" (List.length all_msgs);
102 let heading_msgs = List.filter (fun m ->
103 message_contains m "heading" || message_contains m "skip"
104 ) all_msgs in
105 if List.length heading_msgs > 0 then begin
106 Printf.printf " Found heading-related messages:\n";
107 List.iter (fun msg ->
108 Printf.printf " - %s\n" msg.Html5_checker.Message.message
109 ) heading_msgs;
110 end else
111 Printf.printf " Note: No heading structure warnings found (checker may not be enabled)\n"
112
113(** Test img without alt *)
114let test_img_without_alt () =
115 Printf.printf "\nTest 6: Image without alt attribute\n";
116 let html = {|<!DOCTYPE html><html><body>
117 <img src="test.jpg">
118 </body></html>|} in
119 let reader = reader_of_string html in
120 let result = Html5_checker.check reader in
121 let all_msgs = Html5_checker.messages result in
122 Printf.printf " Found %d message(s)\n" (List.length all_msgs);
123 let img_msgs = List.filter (fun m ->
124 message_contains m "alt" || (message_contains m "img" && message_contains m "attribute")
125 ) all_msgs in
126 if List.length img_msgs > 0 then begin
127 Printf.printf " Found img/alt-related messages:\n";
128 List.iter (fun msg ->
129 Printf.printf " - %s\n" msg.Html5_checker.Message.message
130 ) img_msgs;
131 end else
132 Printf.printf " Note: No missing alt attribute errors found (checker may not be enabled)\n"
133
134(** Test invalid nesting *)
135let test_invalid_nesting () =
136 Printf.printf "\nTest 7: Invalid nesting - <a> inside <a>\n";
137 let html = {|<!DOCTYPE html><html><body>
138 <a href="#">Link <a href="#">Nested</a></a>
139 </body></html>|} in
140 let reader = reader_of_string html in
141 let result = Html5_checker.check reader in
142 let all_msgs = Html5_checker.messages result in
143 Printf.printf " Found %d message(s)\n" (List.length all_msgs);
144 let nesting_msgs = List.filter (fun m ->
145 message_contains m "nesting" || message_contains m "nested" || message_contains m "ancestor"
146 ) all_msgs in
147 if List.length nesting_msgs > 0 then begin
148 Printf.printf " Found nesting-related messages:\n";
149 List.iter (fun msg ->
150 Printf.printf " - %s\n" msg.Html5_checker.Message.message
151 ) nesting_msgs;
152 end else
153 Printf.printf " Note: No nesting errors found (checker may not be enabled)\n"
154
155(** Test form inside form *)
156let test_form_nesting () =
157 Printf.printf "\nTest 8: Invalid nesting - <form> inside <form>\n";
158 let html = {|<!DOCTYPE html><html><body>
159 <form><form></form></form>
160 </body></html>|} in
161 let reader = reader_of_string html in
162 let result = Html5_checker.check reader in
163 let all_msgs = Html5_checker.messages result in
164 Printf.printf " Found %d message(s)\n" (List.length all_msgs);
165 let form_msgs = List.filter (fun m ->
166 message_contains m "form"
167 ) all_msgs in
168 if List.length form_msgs > 0 then begin
169 Printf.printf " Found form-related messages:\n";
170 List.iter (fun msg ->
171 Printf.printf " - %s\n" msg.Html5_checker.Message.message
172 ) form_msgs;
173 end else
174 Printf.printf " Note: No form nesting errors found (checker may not be enabled)\n"
175
176(** Test output formatting *)
177let test_output_formats () =
178 Printf.printf "\nTest 9: Output format testing\n";
179 let html = {|<!DOCTYPE html><html><body><p>Test</p></body></html>|} in
180 let reader = reader_of_string html in
181 let result = Html5_checker.check reader in
182
183 Printf.printf " Testing text format:\n";
184 let text_output = Html5_checker.format_text result in
185 Printf.printf " Length: %d chars\n" (String.length text_output);
186
187 Printf.printf " Testing JSON format:\n";
188 let json_output = Html5_checker.format_json result in
189 Printf.printf " Length: %d chars\n" (String.length json_output);
190
191 Printf.printf " Testing GNU format:\n";
192 let gnu_output = Html5_checker.format_gnu result in
193 Printf.printf " Length: %d chars\n" (String.length gnu_output)
194
195(** Test has_errors function *)
196let test_has_errors () =
197 Printf.printf "\nTest 10: has_errors function\n";
198
199 (* Valid document should have no errors *)
200 let valid_html = "<!DOCTYPE html><html><body><p>Valid</p></body></html>" in
201 let result1 = Html5_checker.check (reader_of_string valid_html) in
202 Printf.printf " Valid document has_errors: %b\n" (Html5_checker.has_errors result1);
203
204 (* Document with likely parse errors *)
205 let invalid_html = "<html><body><p>Unclosed" in
206 let result2 = Html5_checker.check (reader_of_string invalid_html) in
207 Printf.printf " Invalid document has_errors: %b\n" (Html5_checker.has_errors result2)
208
209(** Test check_dom with pre-parsed document *)
210let test_check_dom () =
211 Printf.printf "\nTest 11: check_dom with pre-parsed document\n";
212 let html = "<!DOCTYPE html><html><body><p>Test</p></body></html>" in
213 let reader = reader_of_string html in
214 let parsed = Html5rw.parse reader in
215 let result = Html5_checker.check_dom parsed in
216 let all_msgs = Html5_checker.messages result in
217 Printf.printf " check_dom found %d message(s)\n" (List.length all_msgs);
218 Printf.printf " OK: check_dom completed successfully\n"
219
220(** Test system_id parameter *)
221let test_system_id () =
222 Printf.printf "\nTest 12: system_id parameter\n";
223 let html = "<!DOCTYPE html><html><body><p>Test</p></body></html>" in
224 let reader = reader_of_string html in
225 let result = Html5_checker.check ~system_id:"test.html" reader in
226 match Html5_checker.system_id result with
227 | Some id -> Printf.printf " system_id: %s\n" id
228 | None -> Printf.printf " Warning: system_id not set\n"
229
230(** Test collect_parse_errors flag *)
231let test_collect_parse_errors_flag () =
232 Printf.printf "\nTest 13: collect_parse_errors flag\n";
233 let html = "<!DOCTYPE html><html><body><p>Test</p></body></html>" in
234
235 let result_with = Html5_checker.check ~collect_parse_errors:true (reader_of_string html) in
236 let msgs_with = Html5_checker.messages result_with in
237 Printf.printf " With parse errors: %d message(s)\n" (List.length msgs_with);
238
239 let result_without = Html5_checker.check ~collect_parse_errors:false (reader_of_string html) in
240 let msgs_without = Html5_checker.messages result_without in
241 Printf.printf " Without parse errors: %d message(s)\n" (List.length msgs_without)
242
243(** Test document accessor *)
244let test_document_accessor () =
245 Printf.printf "\nTest 14: document accessor\n";
246 let html = "<!DOCTYPE html><html><body><p>Test</p></body></html>" in
247 let reader = reader_of_string html in
248 let result = Html5_checker.check reader in
249 let _doc = Html5_checker.document result in
250 Printf.printf " OK: document accessor works\n"
251
252(** Test message severity filtering *)
253let test_severity_filtering () =
254 Printf.printf "\nTest 15: Message severity filtering\n";
255 let html = "<!DOCTYPE html><html><body><p>Test</p></body></html>" in
256 let reader = reader_of_string html in
257 let result = Html5_checker.check reader in
258
259 let all_msgs = Html5_checker.messages result in
260 let errors = Html5_checker.errors result in
261 let warnings = Html5_checker.warnings result in
262
263 Printf.printf " Total messages: %d\n" (List.length all_msgs);
264 Printf.printf " Errors: %d\n" (List.length errors);
265 Printf.printf " Warnings: %d\n" (List.length warnings);
266
267 (* Verify that errors + warnings <= all messages *)
268 if List.length errors + List.length warnings <= List.length all_msgs then
269 Printf.printf " OK: Message counts are consistent\n"
270 else
271 Printf.printf " Warning: Message counts inconsistent\n"
272
273(** Run all tests *)
274let () =
275 Printf.printf "Running html5_checker tests...\n";
276 Printf.printf "========================================\n\n";
277
278 test_valid_html5 ();
279 test_missing_doctype ();
280 test_obsolete_element ();
281 test_duplicate_id ();
282 test_heading_skip ();
283 test_img_without_alt ();
284 test_invalid_nesting ();
285 test_form_nesting ();
286 test_output_formats ();
287 test_has_errors ();
288 test_check_dom ();
289 test_system_id ();
290 test_collect_parse_errors_flag ();
291 test_document_accessor ();
292 test_severity_filtering ();
293
294 Printf.printf "\n========================================\n";
295 Printf.printf "All tests completed!\n";
296 Printf.printf "\nNote: Some checkers may not be enabled yet.\n";
297 Printf.printf "Tests marked with 'Note:' indicate features that may be\n";
298 Printf.printf "implemented in future versions.\n"