-1
.gitignore
-1
.gitignore
+3
.gitmodules
+3
.gitmodules
+16
-15
test/dune
+16
-15
test/dune
···
55
(action
56
(run %{exe:test_serializer.exe} ../html5lib-tests/serializer)))
57
58
-
(executable
59
-
(name test_nesting_checker)
60
-
(modules test_nesting_checker)
61
-
(libraries html5rw.check))
62
-
63
-
(executable
64
-
(name test_html5_checker)
65
-
(modules test_html5_checker)
66
-
(libraries bytesrw html5rw html5rw.check str))
67
-
68
-
(rule
69
-
(alias runtest)
70
-
(action
71
-
(run %{exe:test_html5_checker.exe})))
72
-
73
(library
74
(name validator_messages)
75
(modules validator_messages)
···
85
(modules test_validator)
86
(libraries bytesrw html5rw html5rw.check str jsont jsont.bytesrw test_report validator_messages expected_message unix))
87
88
(executable
89
(name test_roundtrip)
90
(modules test_roundtrip)
91
(libraries bytesrw html5rw html5rw.check astring test_report))
···
55
(action
56
(run %{exe:test_serializer.exe} ../html5lib-tests/serializer)))
57
58
(library
59
(name validator_messages)
60
(modules validator_messages)
···
70
(modules test_validator)
71
(libraries bytesrw html5rw html5rw.check str jsont jsont.bytesrw test_report validator_messages expected_message unix))
72
73
+
(rule
74
+
(alias runtest)
75
+
(deps
76
+
(glob_files_rec ../validator/tests/**/*.html)
77
+
(glob_files_rec ../validator/tests/**/*.xhtml)
78
+
../validator/tests/messages.json)
79
+
(action
80
+
(run %{exe:test_validator.exe} ../validator/tests)))
81
+
82
(executable
83
(name test_roundtrip)
84
(modules test_roundtrip)
85
(libraries bytesrw html5rw html5rw.check astring test_report))
86
+
87
+
(rule
88
+
(alias runtest)
89
+
(deps
90
+
(glob_files_rec ../validator/tests/**/*.html))
91
+
(action
92
+
(run %{exe:test_roundtrip.exe} ../validator/tests)))
-298
test/test_html5_checker.ml
-298
test/test_html5_checker.ml
···
1
-
(** Tests for the html5_checker library *)
2
-
3
-
(** Helper to create a reader from a string *)
4
-
let reader_of_string s = Bytesrw.Bytes.Reader.of_string s
5
-
6
-
(** Helper to check if a message contains a substring *)
7
-
let message_contains msg substring =
8
-
String.lowercase_ascii msg.Htmlrw_check.text
9
-
|> fun s -> String.length s >= String.length substring &&
10
-
try
11
-
ignore (Str.search_forward (Str.regexp_case_fold (Str.quote substring)) s 0);
12
-
true
13
-
with Not_found -> false
14
-
15
-
(** Test that valid HTML5 produces no errors *)
16
-
let test_valid_html5 () =
17
-
Printf.printf "Test 1: Valid HTML5 document\n";
18
-
let html = {|<!DOCTYPE html>
19
-
<html lang="en">
20
-
<head><title>Test</title></head>
21
-
<body><p>Hello world</p></body>
22
-
</html>|} in
23
-
let reader = reader_of_string html in
24
-
let result = Htmlrw_check.check reader in
25
-
let errors = Htmlrw_check.errors result in
26
-
Printf.printf " Found %d error(s)\n" (List.length errors);
27
-
if List.length errors > 0 then begin
28
-
List.iter (fun msg ->
29
-
Printf.printf " - %s\n" msg.Htmlrw_check.text
30
-
) errors;
31
-
end else
32
-
Printf.printf " OK: No errors as expected\n"
33
-
34
-
(** Test that missing DOCTYPE is detected *)
35
-
let test_missing_doctype () =
36
-
Printf.printf "\nTest 2: Missing DOCTYPE\n";
37
-
let html = "<html><body>Hello</body></html>" in
38
-
let reader = reader_of_string html in
39
-
let result = Htmlrw_check.check reader in
40
-
let errors = Htmlrw_check.errors result in
41
-
Printf.printf " Found %d error(s)\n" (List.length errors);
42
-
if List.length errors = 0 then
43
-
Printf.printf " Warning: Expected parse errors for missing DOCTYPE\n"
44
-
else begin
45
-
List.iter (fun msg ->
46
-
Printf.printf " - %s\n" msg.Htmlrw_check.text
47
-
) errors;
48
-
end
49
-
50
-
(** Test that obsolete elements are detected *)
51
-
let test_obsolete_element () =
52
-
Printf.printf "\nTest 3: Obsolete <center> element\n";
53
-
let html = "<!DOCTYPE html><html><body><center>Centered</center></body></html>" in
54
-
let reader = reader_of_string html in
55
-
let result = Htmlrw_check.check reader in
56
-
let all_msgs = Htmlrw_check.messages result in
57
-
Printf.printf " Found %d message(s)\n" (List.length all_msgs);
58
-
let obsolete_msgs = List.filter (fun m ->
59
-
message_contains m "obsolete" || message_contains m "center"
60
-
) all_msgs in
61
-
if List.length obsolete_msgs > 0 then begin
62
-
Printf.printf " Found obsolete-related messages:\n";
63
-
List.iter (fun msg ->
64
-
Printf.printf " - %s\n" msg.Htmlrw_check.text
65
-
) obsolete_msgs;
66
-
end else
67
-
Printf.printf " Note: No obsolete element warnings found (checker may not be enabled)\n"
68
-
69
-
(** Test duplicate IDs *)
70
-
let test_duplicate_id () =
71
-
Printf.printf "\nTest 4: Duplicate ID attributes\n";
72
-
let html = {|<!DOCTYPE html><html><body>
73
-
<div id="foo">First</div>
74
-
<div id="foo">Second</div>
75
-
</body></html>|} in
76
-
let reader = reader_of_string html in
77
-
let result = Htmlrw_check.check reader in
78
-
let all_msgs = Htmlrw_check.messages result in
79
-
Printf.printf " Found %d message(s)\n" (List.length all_msgs);
80
-
let id_msgs = List.filter (fun m ->
81
-
message_contains m "duplicate" || message_contains m "id"
82
-
) all_msgs in
83
-
if List.length id_msgs > 0 then begin
84
-
Printf.printf " Found ID-related messages:\n";
85
-
List.iter (fun msg ->
86
-
Printf.printf " - %s\n" msg.Htmlrw_check.text
87
-
) id_msgs;
88
-
end else
89
-
Printf.printf " Note: No duplicate ID errors found (checker may not be enabled)\n"
90
-
91
-
(** Test heading structure *)
92
-
let test_heading_skip () =
93
-
Printf.printf "\nTest 5: Skipped heading level\n";
94
-
let html = {|<!DOCTYPE html><html><body>
95
-
<h1>Title</h1>
96
-
<h3>Skipped h2</h3>
97
-
</body></html>|} in
98
-
let reader = reader_of_string html in
99
-
let result = Htmlrw_check.check reader in
100
-
let all_msgs = Htmlrw_check.messages result in
101
-
Printf.printf " Found %d message(s)\n" (List.length all_msgs);
102
-
let heading_msgs = List.filter (fun m ->
103
-
message_contains m "heading" || message_contains m "skip"
104
-
) all_msgs in
105
-
if List.length heading_msgs > 0 then begin
106
-
Printf.printf " Found heading-related messages:\n";
107
-
List.iter (fun msg ->
108
-
Printf.printf " - %s\n" msg.Htmlrw_check.text
109
-
) heading_msgs;
110
-
end else
111
-
Printf.printf " Note: No heading structure warnings found (checker may not be enabled)\n"
112
-
113
-
(** Test img without alt *)
114
-
let test_img_without_alt () =
115
-
Printf.printf "\nTest 6: Image without alt attribute\n";
116
-
let html = {|<!DOCTYPE html><html><body>
117
-
<img src="test.jpg">
118
-
</body></html>|} in
119
-
let reader = reader_of_string html in
120
-
let result = Htmlrw_check.check reader in
121
-
let all_msgs = Htmlrw_check.messages result in
122
-
Printf.printf " Found %d message(s)\n" (List.length all_msgs);
123
-
let img_msgs = List.filter (fun m ->
124
-
message_contains m "alt" || (message_contains m "img" && message_contains m "attribute")
125
-
) all_msgs in
126
-
if List.length img_msgs > 0 then begin
127
-
Printf.printf " Found img/alt-related messages:\n";
128
-
List.iter (fun msg ->
129
-
Printf.printf " - %s\n" msg.Htmlrw_check.text
130
-
) img_msgs;
131
-
end else
132
-
Printf.printf " Note: No missing alt attribute errors found (checker may not be enabled)\n"
133
-
134
-
(** Test invalid nesting *)
135
-
let test_invalid_nesting () =
136
-
Printf.printf "\nTest 7: Invalid nesting - <a> inside <a>\n";
137
-
let html = {|<!DOCTYPE html><html><body>
138
-
<a href="#">Link <a href="#">Nested</a></a>
139
-
</body></html>|} in
140
-
let reader = reader_of_string html in
141
-
let result = Htmlrw_check.check reader in
142
-
let all_msgs = Htmlrw_check.messages result in
143
-
Printf.printf " Found %d message(s)\n" (List.length all_msgs);
144
-
let nesting_msgs = List.filter (fun m ->
145
-
message_contains m "nesting" || message_contains m "nested" || message_contains m "ancestor"
146
-
) all_msgs in
147
-
if List.length nesting_msgs > 0 then begin
148
-
Printf.printf " Found nesting-related messages:\n";
149
-
List.iter (fun msg ->
150
-
Printf.printf " - %s\n" msg.Htmlrw_check.text
151
-
) nesting_msgs;
152
-
end else
153
-
Printf.printf " Note: No nesting errors found (checker may not be enabled)\n"
154
-
155
-
(** Test form inside form *)
156
-
let test_form_nesting () =
157
-
Printf.printf "\nTest 8: Invalid nesting - <form> inside <form>\n";
158
-
let html = {|<!DOCTYPE html><html><body>
159
-
<form><form></form></form>
160
-
</body></html>|} in
161
-
let reader = reader_of_string html in
162
-
let result = Htmlrw_check.check reader in
163
-
let all_msgs = Htmlrw_check.messages result in
164
-
Printf.printf " Found %d message(s)\n" (List.length all_msgs);
165
-
let form_msgs = List.filter (fun m ->
166
-
message_contains m "form"
167
-
) all_msgs in
168
-
if List.length form_msgs > 0 then begin
169
-
Printf.printf " Found form-related messages:\n";
170
-
List.iter (fun msg ->
171
-
Printf.printf " - %s\n" msg.Htmlrw_check.text
172
-
) form_msgs;
173
-
end else
174
-
Printf.printf " Note: No form nesting errors found (checker may not be enabled)\n"
175
-
176
-
(** Test output formatting *)
177
-
let test_output_formats () =
178
-
Printf.printf "\nTest 9: Output format testing\n";
179
-
let html = {|<!DOCTYPE html><html><body><p>Test</p></body></html>|} in
180
-
let reader = reader_of_string html in
181
-
let result = Htmlrw_check.check reader in
182
-
183
-
Printf.printf " Testing text format:\n";
184
-
let text_output = Htmlrw_check.to_text result in
185
-
Printf.printf " Length: %d chars\n" (String.length text_output);
186
-
187
-
Printf.printf " Testing JSON format:\n";
188
-
let json_output = Htmlrw_check.to_json result in
189
-
Printf.printf " Length: %d chars\n" (String.length json_output);
190
-
191
-
Printf.printf " Testing GNU format:\n";
192
-
let gnu_output = Htmlrw_check.to_gnu result in
193
-
Printf.printf " Length: %d chars\n" (String.length gnu_output)
194
-
195
-
(** Test has_errors function *)
196
-
let test_has_errors () =
197
-
Printf.printf "\nTest 10: has_errors function\n";
198
-
199
-
(* Valid document should have no errors *)
200
-
let valid_html = "<!DOCTYPE html><html><body><p>Valid</p></body></html>" in
201
-
let result1 = Htmlrw_check.check (reader_of_string valid_html) in
202
-
Printf.printf " Valid document has_errors: %b\n" (Htmlrw_check.has_errors result1);
203
-
204
-
(* Document with likely parse errors *)
205
-
let invalid_html = "<html><body><p>Unclosed" in
206
-
let result2 = Htmlrw_check.check (reader_of_string invalid_html) in
207
-
Printf.printf " Invalid document has_errors: %b\n" (Htmlrw_check.has_errors result2)
208
-
209
-
(** Test check_dom with pre-parsed document *)
210
-
let test_check_dom () =
211
-
Printf.printf "\nTest 11: check_dom with pre-parsed document\n";
212
-
let html = "<!DOCTYPE html><html><body><p>Test</p></body></html>" in
213
-
let reader = reader_of_string html in
214
-
let parsed = Html5rw.parse reader in
215
-
let result = Htmlrw_check.check_parsed parsed in
216
-
let all_msgs = Htmlrw_check.messages result in
217
-
Printf.printf " check_dom found %d message(s)\n" (List.length all_msgs);
218
-
Printf.printf " OK: check_dom completed successfully\n"
219
-
220
-
(** Test system_id parameter *)
221
-
let test_system_id () =
222
-
Printf.printf "\nTest 12: system_id parameter\n";
223
-
let html = "<!DOCTYPE html><html><body><p>Test</p></body></html>" in
224
-
let reader = reader_of_string html in
225
-
let result = Htmlrw_check.check ~system_id:"test.html" reader in
226
-
match Htmlrw_check.system_id result with
227
-
| Some id -> Printf.printf " system_id: %s\n" id
228
-
| None -> Printf.printf " Warning: system_id not set\n"
229
-
230
-
(** Test collect_parse_errors flag *)
231
-
let test_collect_parse_errors_flag () =
232
-
Printf.printf "\nTest 13: collect_parse_errors flag\n";
233
-
let html = "<!DOCTYPE html><html><body><p>Test</p></body></html>" in
234
-
235
-
let result_with = Htmlrw_check.check ~collect_parse_errors:true (reader_of_string html) in
236
-
let msgs_with = Htmlrw_check.messages result_with in
237
-
Printf.printf " With parse errors: %d message(s)\n" (List.length msgs_with);
238
-
239
-
let result_without = Htmlrw_check.check ~collect_parse_errors:false (reader_of_string html) in
240
-
let msgs_without = Htmlrw_check.messages result_without in
241
-
Printf.printf " Without parse errors: %d message(s)\n" (List.length msgs_without)
242
-
243
-
(** Test document accessor *)
244
-
let test_document_accessor () =
245
-
Printf.printf "\nTest 14: document accessor\n";
246
-
let html = "<!DOCTYPE html><html><body><p>Test</p></body></html>" in
247
-
let reader = reader_of_string html in
248
-
let result = Htmlrw_check.check reader in
249
-
let _doc = Htmlrw_check.document result in
250
-
Printf.printf " OK: document accessor works\n"
251
-
252
-
(** Test message severity filtering *)
253
-
let test_severity_filtering () =
254
-
Printf.printf "\nTest 15: Message severity filtering\n";
255
-
let html = "<!DOCTYPE html><html><body><p>Test</p></body></html>" in
256
-
let reader = reader_of_string html in
257
-
let result = Htmlrw_check.check reader in
258
-
259
-
let all_msgs = Htmlrw_check.messages result in
260
-
let errors = Htmlrw_check.errors result in
261
-
let warnings = Htmlrw_check.warnings result in
262
-
263
-
Printf.printf " Total messages: %d\n" (List.length all_msgs);
264
-
Printf.printf " Errors: %d\n" (List.length errors);
265
-
Printf.printf " Warnings: %d\n" (List.length warnings);
266
-
267
-
(* Verify that errors + warnings <= all messages *)
268
-
if List.length errors + List.length warnings <= List.length all_msgs then
269
-
Printf.printf " OK: Message counts are consistent\n"
270
-
else
271
-
Printf.printf " Warning: Message counts inconsistent\n"
272
-
273
-
(** Run all tests *)
274
-
let () =
275
-
Printf.printf "Running html5_checker tests...\n";
276
-
Printf.printf "========================================\n\n";
277
-
278
-
test_valid_html5 ();
279
-
test_missing_doctype ();
280
-
test_obsolete_element ();
281
-
test_duplicate_id ();
282
-
test_heading_skip ();
283
-
test_img_without_alt ();
284
-
test_invalid_nesting ();
285
-
test_form_nesting ();
286
-
test_output_formats ();
287
-
test_has_errors ();
288
-
test_check_dom ();
289
-
test_system_id ();
290
-
test_collect_parse_errors_flag ();
291
-
test_document_accessor ();
292
-
test_severity_filtering ();
293
-
294
-
Printf.printf "\n========================================\n";
295
-
Printf.printf "All tests completed!\n";
296
-
Printf.printf "\nNote: Some checkers may not be enabled yet.\n";
297
-
Printf.printf "Tests marked with 'Note:' indicate features that may be\n";
298
-
Printf.printf "implemented in future versions.\n"
···
-68
test/test_nesting_checker.ml
-68
test/test_nesting_checker.ml
···
1
-
(** Test for nesting checker functionality via public API *)
2
-
3
-
let check_html html =
4
-
let reader = Bytesrw.Bytes.Reader.of_string html in
5
-
Htmlrw_check.check reader
6
-
7
-
let () =
8
-
(* Test 1: <a> cannot contain another <a> *)
9
-
Printf.printf "Test 1: Checking <a href> inside <a href>\n";
10
-
let result1 = check_html "<a href='#'><a href='#'>nested</a></a>" in
11
-
let errors1 = Htmlrw_check.errors result1 in
12
-
Printf.printf " Found %d error(s)\n" (List.length errors1);
13
-
List.iter (fun msg ->
14
-
Printf.printf " - %s\n" msg.Htmlrw_check.text
15
-
) errors1;
16
-
17
-
(* Test 2: <button> inside <a> *)
18
-
Printf.printf "\nTest 2: Checking <button> inside <a href>\n";
19
-
let result2 = check_html "<a href='#'><button>click</button></a>" in
20
-
let errors2 = Htmlrw_check.errors result2 in
21
-
Printf.printf " Found %d error(s)\n" (List.length errors2);
22
-
List.iter (fun msg ->
23
-
Printf.printf " - %s\n" msg.Htmlrw_check.text
24
-
) errors2;
25
-
26
-
(* Test 3: form inside form *)
27
-
Printf.printf "\nTest 3: Checking <form> inside <form>\n";
28
-
let result3 = check_html "<form><form>nested</form></form>" in
29
-
let errors3 = Htmlrw_check.errors result3 in
30
-
Printf.printf " Found %d error(s)\n" (List.length errors3);
31
-
List.iter (fun msg ->
32
-
Printf.printf " - %s\n" msg.Htmlrw_check.text
33
-
) errors3;
34
-
35
-
(* Test 4: header inside footer (should be allowed) *)
36
-
Printf.printf "\nTest 4: Checking <header> inside <footer>\n";
37
-
let result4 = check_html "<footer><header>test</header></footer>" in
38
-
let errors4 = Htmlrw_check.errors result4 in
39
-
Printf.printf " Found %d error(s)\n" (List.length errors4);
40
-
if List.length errors4 > 0 then
41
-
List.iter (fun msg ->
42
-
Printf.printf " - %s\n" msg.Htmlrw_check.text
43
-
) errors4
44
-
else
45
-
Printf.printf " OK: No errors (header inside footer is valid)\n";
46
-
47
-
(* Test 5: input inside button *)
48
-
Printf.printf "\nTest 5: Checking <input type=text> inside <button>\n";
49
-
let result5 = check_html "<button><input type='text'></button>" in
50
-
let errors5 = Htmlrw_check.errors result5 in
51
-
Printf.printf " Found %d error(s)\n" (List.length errors5);
52
-
List.iter (fun msg ->
53
-
Printf.printf " - %s\n" msg.Htmlrw_check.text
54
-
) errors5;
55
-
56
-
(* Test 6: valid nesting - should not error *)
57
-
Printf.printf "\nTest 6: Checking valid nesting: <div> inside <div>\n";
58
-
let result6 = check_html "<div><div>nested</div></div>" in
59
-
let errors6 = Htmlrw_check.errors result6 in
60
-
Printf.printf " Found %d error(s)\n" (List.length errors6);
61
-
if List.length errors6 = 0 then
62
-
Printf.printf " OK: No errors as expected\n"
63
-
else
64
-
List.iter (fun msg ->
65
-
Printf.printf " - %s\n" msg.Htmlrw_check.text
66
-
) errors6;
67
-
68
-
Printf.printf "\nAll tests completed!\n"
···