+5
test/dune
+5
test/dune
+195
test/test_roundtrip.ml
+195
test/test_roundtrip.ml
···
···
1
+
(* Roundtrip test: Parse -> Serialize -> Re-parse -> Validate
2
+
3
+
This test validates that the HTML5 serializer produces valid HTML5
4
+
by roundtripping the validator test suite files through:
5
+
1. Parse with HTML5 parser
6
+
2. Serialize DOM back to HTML
7
+
3. Re-parse the serialized HTML
8
+
4. Validate the result
9
+
10
+
For "isvalid" tests: the roundtripped document should still be valid
11
+
For "novalid/haswarn" tests: we just verify the roundtrip works without crashes
12
+
*)
13
+
14
+
module Report = Test_report
15
+
16
+
(* Test result type *)
17
+
type test_result = {
18
+
filename : string;
19
+
test_type : string; (* isvalid, novalid, haswarn *)
20
+
original_valid : bool; (* Did original pass validation? *)
21
+
roundtrip_valid : bool; (* Did roundtripped doc pass validation? *)
22
+
roundtrip_ok : bool; (* Did roundtrip work without errors? *)
23
+
original_errors : int;
24
+
roundtrip_errors : int;
25
+
parse_error : string option;
26
+
}
27
+
28
+
(* Get test type from filename *)
29
+
let get_test_type filename =
30
+
if Astring.String.is_suffix ~affix:"-isvalid.html" filename ||
31
+
Astring.String.is_suffix ~affix:"-isvalid.xhtml" filename then "isvalid"
32
+
else if Astring.String.is_suffix ~affix:"-novalid.html" filename ||
33
+
Astring.String.is_suffix ~affix:"-novalid.xhtml" filename then "novalid"
34
+
else if Astring.String.is_suffix ~affix:"-haswarn.html" filename ||
35
+
Astring.String.is_suffix ~affix:"-haswarn.xhtml" filename then "haswarn"
36
+
else "unknown"
37
+
38
+
(* Count errors in validation result *)
39
+
let count_errors messages =
40
+
List.length (List.filter (fun (m : Html5_checker.Message.t) ->
41
+
m.severity = Html5_checker.Message.Error
42
+
) messages)
43
+
44
+
(* Serialize a document to HTML string *)
45
+
let serialize_document doc =
46
+
Html5rw.Dom.to_html ~pretty:false doc
47
+
48
+
(* Run roundtrip test on a single file *)
49
+
let test_file path =
50
+
let filename = Filename.basename path in
51
+
let test_type = get_test_type filename in
52
+
53
+
try
54
+
(* Read file content *)
55
+
let content =
56
+
let ic = open_in path in
57
+
let n = in_channel_length ic in
58
+
let s = really_input_string ic n in
59
+
close_in ic;
60
+
s
61
+
in
62
+
63
+
(* Parse original *)
64
+
let original_result = Html5rw.parse_bytes (Bytes.of_string content) in
65
+
let original_doc = Html5rw.root original_result in
66
+
67
+
(* Validate original *)
68
+
let checker_result = Html5_checker.check_dom ~system_id:path original_result in
69
+
let original_messages = Html5_checker.messages checker_result in
70
+
let original_errors = count_errors original_messages in
71
+
let original_valid = original_errors = 0 in
72
+
73
+
(* Serialize to HTML *)
74
+
let serialized = serialize_document original_doc in
75
+
76
+
(* Re-parse serialized HTML *)
77
+
let roundtrip_result = Html5rw.parse_bytes (Bytes.of_string serialized) in
78
+
79
+
(* Validate roundtripped document *)
80
+
let roundtrip_checker = Html5_checker.check_dom ~system_id:path roundtrip_result in
81
+
let roundtrip_messages = Html5_checker.messages roundtrip_checker in
82
+
let roundtrip_errors = count_errors roundtrip_messages in
83
+
let roundtrip_valid = roundtrip_errors = 0 in
84
+
85
+
{
86
+
filename;
87
+
test_type;
88
+
original_valid;
89
+
roundtrip_valid;
90
+
roundtrip_ok = true;
91
+
original_errors;
92
+
roundtrip_errors;
93
+
parse_error = None;
94
+
}
95
+
with e ->
96
+
{
97
+
filename;
98
+
test_type;
99
+
original_valid = false;
100
+
roundtrip_valid = false;
101
+
roundtrip_ok = false;
102
+
original_errors = 0;
103
+
roundtrip_errors = 0;
104
+
parse_error = Some (Printexc.to_string e);
105
+
}
106
+
107
+
(* Recursively find all test files *)
108
+
let rec find_test_files dir =
109
+
let files = Sys.readdir dir |> Array.to_list in
110
+
List.concat_map (fun f ->
111
+
let path = Filename.concat dir f in
112
+
if Sys.is_directory path then
113
+
find_test_files path
114
+
else if Astring.String.is_suffix ~affix:"-isvalid.html" f ||
115
+
Astring.String.is_suffix ~affix:"-novalid.html" f ||
116
+
Astring.String.is_suffix ~affix:"-haswarn.html" f then
117
+
[path]
118
+
else
119
+
[]
120
+
) files
121
+
122
+
let () =
123
+
let test_dir = Sys.argv.(1) in
124
+
125
+
Printf.printf "Discovering test files...\n%!";
126
+
let test_files = find_test_files test_dir in
127
+
Printf.printf "Found %d test files\n%!" (List.length test_files);
128
+
129
+
Printf.printf "Running roundtrip tests...\n%!";
130
+
131
+
(* Run tests *)
132
+
let results = List.map test_file test_files in
133
+
134
+
(* Categorize results *)
135
+
let isvalid_tests = List.filter (fun r -> r.test_type = "isvalid") results in
136
+
let novalid_tests = List.filter (fun r -> r.test_type = "novalid") results in
137
+
let haswarn_tests = List.filter (fun r -> r.test_type = "haswarn") results in
138
+
139
+
(* For isvalid tests: check that roundtripped document is still valid *)
140
+
let isvalid_passed = List.filter (fun r ->
141
+
r.roundtrip_ok && r.roundtrip_valid
142
+
) isvalid_tests in
143
+
144
+
(* For novalid/haswarn tests: just check roundtrip works *)
145
+
let novalid_passed = List.filter (fun r -> r.roundtrip_ok) novalid_tests in
146
+
let haswarn_passed = List.filter (fun r -> r.roundtrip_ok) haswarn_tests in
147
+
148
+
(* Print failures for isvalid tests *)
149
+
let isvalid_failed = List.filter (fun r ->
150
+
not r.roundtrip_ok || not r.roundtrip_valid
151
+
) isvalid_tests in
152
+
153
+
if List.length isvalid_failed > 0 then begin
154
+
Printf.printf "\n=== Failing isvalid roundtrip tests (first 20) ===\n";
155
+
List.iteri (fun i r ->
156
+
if i < 20 then begin
157
+
match r.parse_error with
158
+
| Some err -> Printf.printf "%s: %s\n" r.filename err
159
+
| None ->
160
+
Printf.printf "%s: original_valid=%b, roundtrip_valid=%b (errors: %d -> %d)\n"
161
+
r.filename r.original_valid r.roundtrip_valid
162
+
r.original_errors r.roundtrip_errors
163
+
end
164
+
) isvalid_failed
165
+
end;
166
+
167
+
(* Print roundtrip failures for all tests *)
168
+
let roundtrip_failures = List.filter (fun r -> not r.roundtrip_ok) results in
169
+
if List.length roundtrip_failures > 0 then begin
170
+
Printf.printf "\n=== Roundtrip failures (first 20) ===\n";
171
+
List.iteri (fun i r ->
172
+
if i < 20 then
173
+
Printf.printf "%s: %s\n" r.filename
174
+
(Option.value ~default:"unknown error" r.parse_error)
175
+
) roundtrip_failures
176
+
end;
177
+
178
+
(* Summary *)
179
+
Printf.printf "\n=== Roundtrip Test Results ===\n";
180
+
Printf.printf "isvalid tests: %d/%d passed (roundtripped and still valid)\n"
181
+
(List.length isvalid_passed) (List.length isvalid_tests);
182
+
Printf.printf "novalid tests: %d/%d roundtripped successfully\n"
183
+
(List.length novalid_passed) (List.length novalid_tests);
184
+
Printf.printf "haswarn tests: %d/%d roundtripped successfully\n"
185
+
(List.length haswarn_passed) (List.length haswarn_tests);
186
+
187
+
let total_roundtrip_ok = List.length (List.filter (fun r -> r.roundtrip_ok) results) in
188
+
Printf.printf "\nTotal: %d/%d roundtripped without errors\n"
189
+
total_roundtrip_ok (List.length results);
190
+
Printf.printf "isvalid preservation: %d/%d still valid after roundtrip\n"
191
+
(List.length isvalid_passed) (List.length isvalid_tests);
192
+
193
+
(* Exit with error if isvalid tests fail validation after roundtrip *)
194
+
let exit_code = if List.length isvalid_failed > 0 then 1 else 0 in
195
+
exit exit_code