+17
-4
test/dune
+17
-4
test/dune
···
1
+
(library
2
+
(name test_report)
3
+
(modules test_report))
4
+
5
+
(executable
6
+
(name test_all)
7
+
(modules test_all)
8
+
(libraries bytesrw html5rw.parser html5rw.dom html5rw.tokenizer html5rw.encoding jsont jsont.bytesrw test_report))
9
+
1
10
(executable
2
11
(name test_html5lib)
3
-
(libraries bytesrw html5rw.parser html5rw.dom))
12
+
(modules test_html5lib)
13
+
(libraries bytesrw html5rw.parser html5rw.dom test_report))
4
14
5
15
(rule
6
16
(alias runtest)
···
11
21
12
22
(executable
13
23
(name test_tokenizer)
14
-
(libraries bytesrw html5rw.tokenizer jsont jsont.bytesrw))
24
+
(modules test_tokenizer)
25
+
(libraries bytesrw html5rw.tokenizer jsont jsont.bytesrw test_report))
15
26
16
27
(rule
17
28
(alias runtest)
···
22
33
23
34
(executable
24
35
(name test_encoding)
25
-
(libraries html5rw.encoding))
36
+
(modules test_encoding)
37
+
(libraries html5rw.encoding test_report))
26
38
27
39
(rule
28
40
(alias runtest)
···
33
45
34
46
(executable
35
47
(name test_serializer)
36
-
(libraries html5rw.dom jsont jsont.bytesrw))
48
+
(modules test_serializer)
49
+
(libraries html5rw.dom jsont jsont.bytesrw test_report))
37
50
38
51
(rule
39
52
(alias runtest)
+670
test/test_all.ml
+670
test/test_all.ml
···
1
+
(* Combined test runner for all html5lib-tests *)
2
+
(* Generates a single standalone HTML report *)
3
+
4
+
open Bytesrw
5
+
6
+
module Report = Test_report
7
+
8
+
(* ============================================================ *)
9
+
(* Tree Construction Tests *)
10
+
(* ============================================================ *)
11
+
12
+
module TreeConstruction = struct
13
+
module Parser = Html5rw_parser
14
+
module Dom = Html5rw_dom
15
+
16
+
type test_case = {
17
+
input : string;
18
+
expected_tree : string;
19
+
expected_errors : string list;
20
+
script_on : bool;
21
+
fragment_context : string option;
22
+
raw_lines : string;
23
+
}
24
+
25
+
let parse_test_case lines =
26
+
let raw_lines = String.concat "\n" lines in
27
+
let rec parse acc = function
28
+
| [] -> acc
29
+
| line :: rest when String.length line > 0 && line.[0] = '#' ->
30
+
let section = String.trim line in
31
+
let content, remaining = collect_section rest in
32
+
parse ((section, content) :: acc) remaining
33
+
| _ :: rest -> parse acc rest
34
+
and collect_section lines =
35
+
let rec loop acc = function
36
+
| [] -> (List.rev acc, [])
37
+
| line :: rest when String.length line > 0 && line.[0] = '#' ->
38
+
(List.rev acc, line :: rest)
39
+
| line :: rest -> loop (line :: acc) rest
40
+
in
41
+
loop [] lines
42
+
in
43
+
let sections = parse [] lines in
44
+
let get_section name =
45
+
match List.assoc_opt name sections with
46
+
| Some lines -> String.concat "\n" lines
47
+
| None -> ""
48
+
in
49
+
let data = get_section "#data" in
50
+
let document = get_section "#document" in
51
+
let errors_text = get_section "#errors" in
52
+
let errors =
53
+
String.split_on_char '\n' errors_text
54
+
|> List.filter (fun s -> String.trim s <> "")
55
+
in
56
+
let script_on = List.mem_assoc "#script-on" sections in
57
+
let fragment =
58
+
if List.mem_assoc "#document-fragment" sections then
59
+
Some (get_section "#document-fragment" |> String.trim)
60
+
else None
61
+
in
62
+
{ input = data; expected_tree = document; expected_errors = errors;
63
+
script_on; fragment_context = fragment; raw_lines }
64
+
65
+
let parse_dat_file content =
66
+
let lines = String.split_on_char '\n' content in
67
+
let rec split_tests current acc = function
68
+
| [] ->
69
+
if current = [] then List.rev acc
70
+
else List.rev (List.rev current :: acc)
71
+
| "" :: "#data" :: rest ->
72
+
let new_acc = if current = [] then acc else (List.rev current :: acc) in
73
+
split_tests ["#data"] new_acc rest
74
+
| line :: rest ->
75
+
split_tests (line :: current) acc rest
76
+
in
77
+
let test_groups = split_tests [] [] lines in
78
+
List.filter_map (fun lines ->
79
+
if List.exists (fun l -> l = "#data") lines then
80
+
Some (parse_test_case lines)
81
+
else None
82
+
) test_groups
83
+
84
+
let strip_tree_prefix s =
85
+
let lines = String.split_on_char '\n' s in
86
+
let stripped = List.filter_map (fun line ->
87
+
if String.length line >= 2 && String.sub line 0 2 = "| " then
88
+
Some (String.sub line 2 (String.length line - 2))
89
+
else if String.trim line = "" then None
90
+
else Some line
91
+
) lines in
92
+
String.concat "\n" stripped
93
+
94
+
let normalize_tree s =
95
+
let lines = String.split_on_char '\n' s in
96
+
let non_empty = List.filter (fun l -> String.trim l <> "") lines in
97
+
String.concat "\n" non_empty
98
+
99
+
let run_test test =
100
+
try
101
+
let result =
102
+
match test.fragment_context with
103
+
| Some ctx_str ->
104
+
let (namespace, tag_name) =
105
+
match String.split_on_char ' ' ctx_str with
106
+
| [ns; tag] when ns = "svg" -> (Some "svg", tag)
107
+
| [ns; tag] when ns = "math" -> (Some "mathml", tag)
108
+
| [tag] -> (None, tag)
109
+
| _ -> (None, ctx_str)
110
+
in
111
+
let context = Parser.make_fragment_context ~tag_name ~namespace () in
112
+
let reader = Bytes.Reader.of_string test.input in
113
+
Parser.parse ~collect_errors:true ~fragment_context:context reader
114
+
| None ->
115
+
let reader = Bytes.Reader.of_string test.input in
116
+
Parser.parse ~collect_errors:true reader
117
+
in
118
+
let actual_tree = Dom.to_test_format (Parser.root result) in
119
+
let expected = normalize_tree (strip_tree_prefix test.expected_tree) in
120
+
let actual = normalize_tree (strip_tree_prefix actual_tree) in
121
+
let error_count = List.length (Parser.errors result) in
122
+
let expected_error_count = List.length test.expected_errors in
123
+
(expected = actual, expected, actual, error_count, expected_error_count)
124
+
with e ->
125
+
let expected = normalize_tree (strip_tree_prefix test.expected_tree) in
126
+
(false, expected, Printf.sprintf "EXCEPTION: %s" (Printexc.to_string e), 0, 0)
127
+
128
+
let run_file path =
129
+
let ic = open_in path in
130
+
let content = really_input_string ic (in_channel_length ic) in
131
+
close_in ic;
132
+
let tests = parse_dat_file content in
133
+
let filename = Filename.basename path in
134
+
let passed = ref 0 in
135
+
let failed = ref 0 in
136
+
let results = ref [] in
137
+
List.iteri (fun i test ->
138
+
if test.script_on then ()
139
+
else begin
140
+
let (success, expected, actual, actual_error_count, expected_error_count) = run_test test in
141
+
let description =
142
+
let input_preview = String.sub test.input 0 (min 60 (String.length test.input)) in
143
+
if test.fragment_context <> None then
144
+
Printf.sprintf "Fragment (%s): %s" (Option.get test.fragment_context) input_preview
145
+
else input_preview
146
+
in
147
+
let result : Report.test_result = {
148
+
test_num = i + 1; description; input = test.input; expected; actual; success;
149
+
details = [
150
+
("Fragment Context", Option.value test.fragment_context ~default:"(none)");
151
+
("Expected Errors", string_of_int expected_error_count);
152
+
("Actual Errors", string_of_int actual_error_count);
153
+
];
154
+
raw_test_data = Some test.raw_lines;
155
+
} in
156
+
results := result :: !results;
157
+
if success then incr passed else incr failed
158
+
end
159
+
) tests;
160
+
let file_result : Report.file_result = {
161
+
filename; test_type = "Tree Construction";
162
+
passed_count = !passed; failed_count = !failed;
163
+
tests = List.rev !results;
164
+
} in
165
+
(file_result, !passed, !failed)
166
+
167
+
let run_dir test_dir =
168
+
let files = Sys.readdir test_dir |> Array.to_list in
169
+
let dat_files = List.filter (fun f ->
170
+
Filename.check_suffix f ".dat" && not (String.contains f '/')
171
+
) files in
172
+
let total_passed = ref 0 in
173
+
let total_failed = ref 0 in
174
+
let file_results = ref [] in
175
+
List.iter (fun file ->
176
+
let path = Filename.concat test_dir file in
177
+
if Sys.is_directory path then () else begin
178
+
let (file_result, passed, failed) = run_file path in
179
+
total_passed := !total_passed + passed;
180
+
total_failed := !total_failed + failed;
181
+
file_results := file_result :: !file_results;
182
+
Printf.printf " %s: %d passed, %d failed\n" file passed failed
183
+
end
184
+
) (List.sort String.compare dat_files);
185
+
(List.rev !file_results, !total_passed, !total_failed)
186
+
end
187
+
188
+
(* ============================================================ *)
189
+
(* Tokenizer Tests *)
190
+
(* ============================================================ *)
191
+
192
+
module Tokenizer_tests = struct
193
+
module Tokenizer = Html5rw_tokenizer
194
+
195
+
module TokenCollector = struct
196
+
type t = { mutable tokens : Tokenizer.Token.t list }
197
+
let create () = { tokens = [] }
198
+
let process t token = t.tokens <- token :: t.tokens; `Continue
199
+
let adjusted_current_node_in_html_namespace _ = true
200
+
let get_tokens t = List.rev t.tokens
201
+
end
202
+
203
+
type test_case = {
204
+
description : string;
205
+
input : string;
206
+
output : Jsont.json list;
207
+
expected_error_count : int;
208
+
initial_states : string list;
209
+
last_start_tag : string option;
210
+
double_escaped : bool;
211
+
xml_mode : bool;
212
+
raw_json : string;
213
+
}
214
+
215
+
let unescape_double s =
216
+
let b = Buffer.create (String.length s) in
217
+
let i = ref 0 in
218
+
while !i < String.length s do
219
+
if !i + 1 < String.length s && s.[!i] = '\\' then begin
220
+
match s.[!i + 1] with
221
+
| 'u' when !i + 5 < String.length s ->
222
+
let hex = String.sub s (!i + 2) 4 in
223
+
(try
224
+
let code = int_of_string ("0x" ^ hex) in
225
+
if code < 128 then Buffer.add_char b (Char.chr code)
226
+
else begin
227
+
if code < 0x800 then begin
228
+
Buffer.add_char b (Char.chr (0xC0 lor (code lsr 6)));
229
+
Buffer.add_char b (Char.chr (0x80 lor (code land 0x3F)))
230
+
end else begin
231
+
Buffer.add_char b (Char.chr (0xE0 lor (code lsr 12)));
232
+
Buffer.add_char b (Char.chr (0x80 lor ((code lsr 6) land 0x3F)));
233
+
Buffer.add_char b (Char.chr (0x80 lor (code land 0x3F)))
234
+
end
235
+
end;
236
+
i := !i + 6
237
+
with _ -> Buffer.add_char b s.[!i]; incr i)
238
+
| 'n' -> Buffer.add_char b '\n'; i := !i + 2
239
+
| 'r' -> Buffer.add_char b '\r'; i := !i + 2
240
+
| 't' -> Buffer.add_char b '\t'; i := !i + 2
241
+
| '\\' -> Buffer.add_char b '\\'; i := !i + 2
242
+
| _ -> Buffer.add_char b s.[!i]; incr i
243
+
end else begin
244
+
Buffer.add_char b s.[!i]; incr i
245
+
end
246
+
done;
247
+
Buffer.contents b
248
+
249
+
let json_string = function Jsont.String (s, _) -> s | _ -> failwith "Expected string"
250
+
let json_bool = function Jsont.Bool (b, _) -> b | _ -> failwith "Expected bool"
251
+
let json_array = function Jsont.Array (arr, _) -> arr | _ -> failwith "Expected array"
252
+
let json_object = function Jsont.Object (obj, _) -> obj | _ -> failwith "Expected object"
253
+
254
+
let json_mem name obj =
255
+
match List.find_opt (fun ((n, _), _) -> n = name) obj with
256
+
| Some (_, v) -> Some v | None -> None
257
+
258
+
let json_mem_exn name obj =
259
+
match json_mem name obj with Some v -> v | None -> failwith ("Missing member: " ^ name)
260
+
261
+
let rec json_to_string = function
262
+
| Jsont.Null _ -> "null"
263
+
| Jsont.Bool (b, _) -> string_of_bool b
264
+
| Jsont.Number (n, _) -> Printf.sprintf "%g" n
265
+
| Jsont.String (s, _) -> Printf.sprintf "%S" s
266
+
| Jsont.Array (arr, _) -> "[" ^ String.concat ", " (List.map json_to_string arr) ^ "]"
267
+
| Jsont.Object (obj, _) ->
268
+
"{" ^ String.concat ", " (List.map (fun ((n, _), v) -> Printf.sprintf "%S: %s" n (json_to_string v)) obj) ^ "}"
269
+
270
+
let parse_test_case ~xml_mode json =
271
+
let raw_json = json_to_string json in
272
+
let obj = json_object json in
273
+
let description = json_string (json_mem_exn "description" obj) in
274
+
let input = json_string (json_mem_exn "input" obj) in
275
+
let output = json_array (json_mem_exn "output" obj) in
276
+
let expected_error_count = match json_mem "errors" obj with
277
+
| Some e -> List.length (json_array e) | None -> 0
278
+
in
279
+
let initial_states = match json_mem "initialStates" obj with
280
+
| Some s -> List.map json_string (json_array s) | None -> ["Data state"]
281
+
in
282
+
let last_start_tag = match json_mem "lastStartTag" obj with
283
+
| Some s -> Some (json_string s) | None -> None
284
+
in
285
+
let double_escaped = match json_mem "doubleEscaped" obj with
286
+
| Some b -> json_bool b | None -> false
287
+
in
288
+
{ description; input; output; expected_error_count; initial_states;
289
+
last_start_tag; double_escaped; xml_mode; raw_json }
290
+
291
+
let state_of_string = function
292
+
| "Data state" -> Tokenizer.State.Data
293
+
| "PLAINTEXT state" -> Tokenizer.State.Plaintext
294
+
| "RCDATA state" -> Tokenizer.State.Rcdata
295
+
| "RAWTEXT state" -> Tokenizer.State.Rawtext
296
+
| "Script data state" -> Tokenizer.State.Script_data
297
+
| "CDATA section state" -> Tokenizer.State.Cdata_section
298
+
| s -> failwith ("Unknown state: " ^ s)
299
+
300
+
let token_to_test_json (tok : Tokenizer.Token.t) : Jsont.json list =
301
+
let str s = Jsont.String (s, Jsont.Meta.none) in
302
+
let arr l = Jsont.Array (l, Jsont.Meta.none) in
303
+
match tok with
304
+
| Tokenizer.Token.Doctype { name; public_id; system_id; force_quirks } ->
305
+
let name_json = match name with Some n -> str n | None -> Jsont.Null ((), Jsont.Meta.none) in
306
+
let public_json = match public_id with Some p -> str p | None -> Jsont.Null ((), Jsont.Meta.none) in
307
+
let system_json = match system_id with Some s -> str s | None -> Jsont.Null ((), Jsont.Meta.none) in
308
+
let correctness = Jsont.Bool (not force_quirks, Jsont.Meta.none) in
309
+
[arr [str "DOCTYPE"; name_json; public_json; system_json; correctness]]
310
+
| Tokenizer.Token.Tag { kind = Start; name; attrs; self_closing } ->
311
+
let attrs_obj = Jsont.Object (
312
+
List.map (fun (n, v) -> ((n, Jsont.Meta.none), str v)) (List.rev attrs),
313
+
Jsont.Meta.none
314
+
) in
315
+
if self_closing then [arr [str "StartTag"; str name; attrs_obj; Jsont.Bool (true, Jsont.Meta.none)]]
316
+
else [arr [str "StartTag"; str name; attrs_obj]]
317
+
| Tokenizer.Token.Tag { kind = End; name; _ } -> [arr [str "EndTag"; str name]]
318
+
| Tokenizer.Token.Comment data -> [arr [str "Comment"; str data]]
319
+
| Tokenizer.Token.Character data -> [arr [str "Character"; str data]]
320
+
| Tokenizer.Token.EOF -> []
321
+
322
+
let rec json_equal a b =
323
+
match a, b with
324
+
| Jsont.Null _, Jsont.Null _ -> true
325
+
| Jsont.Bool (a, _), Jsont.Bool (b, _) -> a = b
326
+
| Jsont.Number (a, _), Jsont.Number (b, _) -> a = b
327
+
| Jsont.String (a, _), Jsont.String (b, _) -> a = b
328
+
| Jsont.Array (a, _), Jsont.Array (b, _) ->
329
+
List.length a = List.length b && List.for_all2 json_equal a b
330
+
| Jsont.Object (a, _), Jsont.Object (b, _) ->
331
+
let a_sorted = List.sort (fun ((n1, _), _) ((n2, _), _) -> String.compare n1 n2) a in
332
+
let b_sorted = List.sort (fun ((n1, _), _) ((n2, _), _) -> String.compare n1 n2) b in
333
+
List.length a_sorted = List.length b_sorted &&
334
+
List.for_all2 (fun ((n1, _), v1) ((n2, _), v2) -> n1 = n2 && json_equal v1 v2) a_sorted b_sorted
335
+
| _ -> false
336
+
337
+
let merge_character_tokens tokens =
338
+
let rec loop acc = function
339
+
| [] -> List.rev acc
340
+
| Tokenizer.Token.Character s1 :: Tokenizer.Token.Character s2 :: rest ->
341
+
loop acc (Tokenizer.Token.Character (s1 ^ s2) :: rest)
342
+
| tok :: rest -> loop (tok :: acc) rest
343
+
in loop [] tokens
344
+
345
+
let run_test test initial_state =
346
+
let input = if test.double_escaped then unescape_double test.input else test.input in
347
+
let collector = TokenCollector.create () in
348
+
let tokenizer = Tokenizer.create (module TokenCollector) collector ~collect_errors:true ~xml_mode:test.xml_mode () in
349
+
Tokenizer.set_state tokenizer initial_state;
350
+
(match test.last_start_tag with Some tag -> Tokenizer.set_last_start_tag tokenizer tag | None -> ());
351
+
let reader = Bytes.Reader.of_string input in
352
+
Tokenizer.run tokenizer (module TokenCollector) reader;
353
+
let tokens = merge_character_tokens (TokenCollector.get_tokens collector) in
354
+
let actual_tokens = List.concat_map token_to_test_json tokens in
355
+
let expected_output = if test.double_escaped then
356
+
let rec unescape_json = function
357
+
| Jsont.String (s, m) -> Jsont.String (unescape_double s, m)
358
+
| Jsont.Array (arr, m) -> Jsont.Array (List.map unescape_json arr, m)
359
+
| Jsont.Object (obj, m) -> Jsont.Object (List.map (fun (n, v) -> (n, unescape_json v)) obj, m)
360
+
| other -> other
361
+
in List.map unescape_json test.output
362
+
else test.output in
363
+
let rec merge_expected = function
364
+
| [] -> []
365
+
| [x] -> [x]
366
+
| Jsont.Array ([Jsont.String ("Character", _); Jsont.String (s1, m1)], am1) ::
367
+
Jsont.Array ([Jsont.String ("Character", _); Jsont.String (s2, _)], _) :: rest ->
368
+
merge_expected (Jsont.Array ([Jsont.String ("Character", Jsont.Meta.none); Jsont.String (s1 ^ s2, m1)], am1) :: rest)
369
+
| x :: rest -> x :: merge_expected rest
370
+
in
371
+
let expected = merge_expected expected_output in
372
+
let tokens_match =
373
+
List.length actual_tokens = List.length expected &&
374
+
List.for_all2 json_equal actual_tokens expected
375
+
in
376
+
let actual_error_count = List.length (Tokenizer.get_errors tokenizer) in
377
+
let errors_count_match = actual_error_count = test.expected_error_count in
378
+
(tokens_match && errors_count_match, actual_tokens, expected, actual_error_count, test.expected_error_count)
379
+
380
+
let run_file path =
381
+
let content =
382
+
let ic = open_in path in
383
+
let n = in_channel_length ic in
384
+
let s = really_input_string ic n in
385
+
close_in ic; s
386
+
in
387
+
let json = match Jsont_bytesrw.decode_string Jsont.json content with
388
+
| Ok j -> j
389
+
| Error e -> failwith (Printf.sprintf "JSON parse error in %s: %s" path e)
390
+
in
391
+
let obj = json_object json in
392
+
let regular_tests = match json_mem "tests" obj with
393
+
| Some t -> List.map (parse_test_case ~xml_mode:false) (json_array t) | None -> []
394
+
in
395
+
let xml_tests = match json_mem "xmlViolationTests" obj with
396
+
| Some t -> List.map (parse_test_case ~xml_mode:true) (json_array t) | None -> []
397
+
in
398
+
let all_tests = regular_tests @ xml_tests in
399
+
let filename = Filename.basename path in
400
+
let passed = ref 0 in
401
+
let failed = ref 0 in
402
+
let results = ref [] in
403
+
List.iteri (fun i test ->
404
+
List.iter (fun state_name ->
405
+
try
406
+
let state = state_of_string state_name in
407
+
let (success, actual, expected, actual_err_count, expected_err_count) = run_test test state in
408
+
let description = Printf.sprintf "[%s] %s" state_name test.description in
409
+
let result : Report.test_result = {
410
+
test_num = i + 1; description; input = test.input;
411
+
expected = String.concat "\n" (List.map json_to_string expected);
412
+
actual = String.concat "\n" (List.map json_to_string actual);
413
+
success;
414
+
details = [
415
+
("Initial State", state_name);
416
+
("Last Start Tag", Option.value test.last_start_tag ~default:"(none)");
417
+
("Double Escaped", string_of_bool test.double_escaped);
418
+
("XML Mode", string_of_bool test.xml_mode);
419
+
("Expected Errors", string_of_int expected_err_count);
420
+
("Actual Errors", string_of_int actual_err_count);
421
+
];
422
+
raw_test_data = Some test.raw_json;
423
+
} in
424
+
results := result :: !results;
425
+
if success then incr passed else incr failed
426
+
with e ->
427
+
incr failed;
428
+
let result : Report.test_result = {
429
+
test_num = i + 1;
430
+
description = Printf.sprintf "[%s] %s" state_name test.description;
431
+
input = test.input; expected = "";
432
+
actual = Printf.sprintf "EXCEPTION: %s" (Printexc.to_string e);
433
+
success = false; details = [];
434
+
raw_test_data = Some test.raw_json;
435
+
} in
436
+
results := result :: !results
437
+
) test.initial_states
438
+
) all_tests;
439
+
let file_result : Report.file_result = {
440
+
filename; test_type = "Tokenizer";
441
+
passed_count = !passed; failed_count = !failed;
442
+
tests = List.rev !results;
443
+
} in
444
+
(file_result, !passed, !failed)
445
+
446
+
let run_dir test_dir =
447
+
let files = Sys.readdir test_dir |> Array.to_list in
448
+
let test_files = List.filter (fun f -> Filename.check_suffix f ".test") files in
449
+
let total_passed = ref 0 in
450
+
let total_failed = ref 0 in
451
+
let file_results = ref [] in
452
+
List.iter (fun file ->
453
+
let path = Filename.concat test_dir file in
454
+
let (file_result, passed, failed) = run_file path in
455
+
total_passed := !total_passed + passed;
456
+
total_failed := !total_failed + failed;
457
+
file_results := file_result :: !file_results;
458
+
Printf.printf " %s: %d passed, %d failed\n" file passed failed
459
+
) (List.sort String.compare test_files);
460
+
(List.rev !file_results, !total_passed, !total_failed)
461
+
end
462
+
463
+
(* ============================================================ *)
464
+
(* Encoding Tests *)
465
+
(* ============================================================ *)
466
+
467
+
module Encoding_tests = struct
468
+
module Encoding = Html5rw_encoding
469
+
470
+
type test_case = {
471
+
input : string;
472
+
expected_encoding : string;
473
+
raw_lines : string;
474
+
}
475
+
476
+
let normalize_encoding_name s = String.lowercase_ascii (String.trim s)
477
+
478
+
let encoding_to_test_name = function
479
+
| Encoding.Utf8 -> "utf-8"
480
+
| Encoding.Utf16le -> "utf-16le"
481
+
| Encoding.Utf16be -> "utf-16be"
482
+
| Encoding.Windows_1252 -> "windows-1252"
483
+
| Encoding.Iso_8859_2 -> "iso-8859-2"
484
+
| Encoding.Euc_jp -> "euc-jp"
485
+
486
+
let parse_test_case lines =
487
+
let raw_lines = String.concat "\n" lines in
488
+
let rec parse acc = function
489
+
| [] -> acc
490
+
| line :: rest when String.length line > 0 && line.[0] = '#' ->
491
+
let section = String.trim line in
492
+
let content, remaining = collect_section rest in
493
+
parse ((section, content) :: acc) remaining
494
+
| _ :: rest -> parse acc rest
495
+
and collect_section lines =
496
+
let rec loop acc = function
497
+
| [] -> (List.rev acc, [])
498
+
| line :: rest when String.length line > 0 && line.[0] = '#' ->
499
+
(List.rev acc, line :: rest)
500
+
| line :: rest -> loop (line :: acc) rest
501
+
in loop [] lines
502
+
in
503
+
let sections = parse [] lines in
504
+
let get_section name =
505
+
match List.assoc_opt name sections with
506
+
| Some lines -> String.concat "\n" lines | None -> ""
507
+
in
508
+
let data = get_section "#data" in
509
+
let encoding = get_section "#encoding" in
510
+
{ input = data; expected_encoding = String.trim encoding; raw_lines }
511
+
512
+
let parse_dat_file content =
513
+
let lines = String.split_on_char '\n' content in
514
+
let rec split_tests current acc = function
515
+
| [] -> if current = [] then List.rev acc else List.rev (List.rev current :: acc)
516
+
| "" :: "#data" :: rest ->
517
+
let new_acc = if current = [] then acc else (List.rev current :: acc) in
518
+
split_tests ["#data"] new_acc rest
519
+
| line :: rest -> split_tests (line :: current) acc rest
520
+
in
521
+
let test_groups = split_tests [] [] lines in
522
+
List.filter_map (fun lines ->
523
+
if List.exists (fun l -> l = "#data") lines then Some (parse_test_case lines)
524
+
else None
525
+
) test_groups
526
+
527
+
let run_test test =
528
+
try
529
+
let (_, detected_encoding) = Encoding.decode (Bytes.of_string test.input) () in
530
+
let detected_name = encoding_to_test_name detected_encoding in
531
+
let expected_name = normalize_encoding_name test.expected_encoding in
532
+
let match_encoding det exp =
533
+
det = exp ||
534
+
(det = "windows-1252" && (exp = "windows-1252" || exp = "cp1252" || exp = "iso-8859-1")) ||
535
+
(det = "iso-8859-2" && (exp = "iso-8859-2" || exp = "iso8859-2" || exp = "latin2")) ||
536
+
(det = "utf-8" && (exp = "utf-8" || exp = "utf8")) ||
537
+
(det = "euc-jp" && (exp = "euc-jp" || exp = "eucjp"))
538
+
in
539
+
(match_encoding detected_name expected_name, detected_name, expected_name)
540
+
with e ->
541
+
(false, Printf.sprintf "EXCEPTION: %s" (Printexc.to_string e), test.expected_encoding)
542
+
543
+
let run_file path =
544
+
let ic = open_in path in
545
+
let content = really_input_string ic (in_channel_length ic) in
546
+
close_in ic;
547
+
let tests = parse_dat_file content in
548
+
let filename = Filename.basename path in
549
+
let passed = ref 0 in
550
+
let failed = ref 0 in
551
+
let results = ref [] in
552
+
List.iteri (fun i test ->
553
+
if String.trim test.expected_encoding = "" then ()
554
+
else begin
555
+
let (success, detected, expected) = run_test test in
556
+
let result : Report.test_result = {
557
+
test_num = i + 1;
558
+
description = Printf.sprintf "Detect %s encoding" expected;
559
+
input = String.escaped test.input;
560
+
expected; actual = detected; success;
561
+
details = [
562
+
("Input Length", string_of_int (String.length test.input));
563
+
("Has BOM", string_of_bool (String.length test.input >= 3 &&
564
+
(String.sub test.input 0 3 = "\xEF\xBB\xBF" ||
565
+
String.sub test.input 0 2 = "\xFF\xFE" ||
566
+
String.sub test.input 0 2 = "\xFE\xFF")));
567
+
];
568
+
raw_test_data = Some test.raw_lines;
569
+
} in
570
+
results := result :: !results;
571
+
if success then incr passed else incr failed
572
+
end
573
+
) tests;
574
+
let file_result : Report.file_result = {
575
+
filename; test_type = "Encoding Detection";
576
+
passed_count = !passed; failed_count = !failed;
577
+
tests = List.rev !results;
578
+
} in
579
+
(file_result, !passed, !failed)
580
+
581
+
let run_dir test_dir =
582
+
let files = Sys.readdir test_dir |> Array.to_list in
583
+
let dat_files = List.filter (fun f ->
584
+
Filename.check_suffix f ".dat" && not (String.contains f '/')
585
+
) files in
586
+
let total_passed = ref 0 in
587
+
let total_failed = ref 0 in
588
+
let file_results = ref [] in
589
+
List.iter (fun file ->
590
+
let path = Filename.concat test_dir file in
591
+
if Sys.is_directory path then () else begin
592
+
let (file_result, passed, failed) = run_file path in
593
+
total_passed := !total_passed + passed;
594
+
total_failed := !total_failed + failed;
595
+
file_results := file_result :: !file_results;
596
+
Printf.printf " %s: %d passed, %d failed\n" file passed failed
597
+
end
598
+
) (List.sort String.compare dat_files);
599
+
(List.rev !file_results, !total_passed, !total_failed)
600
+
end
601
+
602
+
(* ============================================================ *)
603
+
(* Main Entry Point *)
604
+
(* ============================================================ *)
605
+
606
+
let () =
607
+
let base_dir = if Array.length Sys.argv > 1 then Sys.argv.(1) else "html5lib-tests" in
608
+
609
+
let all_files = ref [] in
610
+
let total_passed = ref 0 in
611
+
let total_failed = ref 0 in
612
+
613
+
(* Run Tree Construction Tests *)
614
+
Printf.printf "\n=== Tree Construction Tests ===\n";
615
+
let tree_dir = Filename.concat base_dir "tree-construction" in
616
+
if Sys.file_exists tree_dir then begin
617
+
let (files, passed, failed) = TreeConstruction.run_dir tree_dir in
618
+
all_files := !all_files @ files;
619
+
total_passed := !total_passed + passed;
620
+
total_failed := !total_failed + failed;
621
+
Printf.printf " Subtotal: %d passed, %d failed\n" passed failed
622
+
end else
623
+
Printf.printf " (directory not found: %s)\n" tree_dir;
624
+
625
+
(* Run Tokenizer Tests *)
626
+
Printf.printf "\n=== Tokenizer Tests ===\n";
627
+
let tok_dir = Filename.concat base_dir "tokenizer" in
628
+
if Sys.file_exists tok_dir then begin
629
+
let (files, passed, failed) = Tokenizer_tests.run_dir tok_dir in
630
+
all_files := !all_files @ files;
631
+
total_passed := !total_passed + passed;
632
+
total_failed := !total_failed + failed;
633
+
Printf.printf " Subtotal: %d passed, %d failed\n" passed failed
634
+
end else
635
+
Printf.printf " (directory not found: %s)\n" tok_dir;
636
+
637
+
(* Run Encoding Tests *)
638
+
Printf.printf "\n=== Encoding Detection Tests ===\n";
639
+
let enc_dir = Filename.concat base_dir "encoding" in
640
+
if Sys.file_exists enc_dir then begin
641
+
let (files, passed, failed) = Encoding_tests.run_dir enc_dir in
642
+
all_files := !all_files @ files;
643
+
total_passed := !total_passed + passed;
644
+
total_failed := !total_failed + failed;
645
+
Printf.printf " Subtotal: %d passed, %d failed\n" passed failed
646
+
end else
647
+
Printf.printf " (directory not found: %s)\n" enc_dir;
648
+
649
+
(* Note: Serializer tests use the standalone test_serializer.exe for full implementation *)
650
+
651
+
Printf.printf "\n=== Overall Summary ===\n";
652
+
Printf.printf "Total: %d passed, %d failed\n" !total_passed !total_failed;
653
+
654
+
(* Generate combined HTML report *)
655
+
let report : Report.report = {
656
+
title = "HTML5 Parser Test Suite";
657
+
test_type = "combined";
658
+
description = "This is a comprehensive test report for the html5rw OCaml HTML5 parser library, \
659
+
validating conformance against the official html5lib-tests test suite. \
660
+
Tests cover: (1) Tree Construction - validating the DOM tree building algorithm; \
661
+
(2) Tokenization - validating the HTML tokenizer state machine; \
662
+
(3) Encoding Detection - validating character encoding sniffing. \
663
+
Each test shows the input, expected output, actual output, and original test data.";
664
+
files = !all_files;
665
+
total_passed = !total_passed;
666
+
total_failed = !total_failed;
667
+
} in
668
+
Report.generate_report report "html5lib_test_report.html";
669
+
670
+
exit (if !total_failed > 0 then 1 else 0)
+50
-27
test/test_encoding.ml
+50
-27
test/test_encoding.ml
···
1
1
(* Test runner for html5lib-tests encoding tests *)
2
2
3
3
module Encoding = Html5rw_encoding
4
+
module Report = Test_report
4
5
5
6
type test_case = {
6
7
input : string;
7
8
expected_encoding : string;
9
+
raw_lines : string; (* Original test data from .dat file *)
8
10
}
9
11
10
12
(* Normalize encoding name for comparison *)
···
22
24
23
25
(* Parse a single test case from lines *)
24
26
let parse_test_case lines =
27
+
let raw_lines = String.concat "\n" lines in
25
28
let rec parse acc = function
26
29
| [] -> acc
27
30
| line :: rest when String.length line > 0 && line.[0] = '#' ->
···
49
52
let data = get_section "#data" in
50
53
let encoding = get_section "#encoding" in
51
54
52
-
{ input = data; expected_encoding = String.trim encoding }
55
+
{ input = data; expected_encoding = String.trim encoding; raw_lines }
53
56
54
57
(* Parse a .dat file into test cases *)
55
58
let parse_dat_file content =
···
104
107
105
108
let passed = ref 0 in
106
109
let failed = ref 0 in
107
-
let errors = ref [] in
110
+
let results = ref [] in
108
111
109
112
List.iteri (fun i test ->
110
113
if String.trim test.expected_encoding = "" then
···
112
115
()
113
116
else begin
114
117
let (success, detected, expected) = run_test test in
115
-
if success then
116
-
incr passed
117
-
else begin
118
-
incr failed;
119
-
if List.length !errors < 5 then
120
-
errors := (i + 1, test.input, detected, expected) :: !errors
121
-
end
118
+
let result : Report.test_result = {
119
+
test_num = i + 1;
120
+
description = Printf.sprintf "Detect %s encoding" expected;
121
+
input = String.escaped test.input; (* Show escaped version of full input *)
122
+
expected;
123
+
actual = detected;
124
+
success;
125
+
details = [
126
+
("Input Length", string_of_int (String.length test.input));
127
+
("Has BOM", string_of_bool (String.length test.input >= 3 &&
128
+
(String.sub test.input 0 3 = "\xEF\xBB\xBF" || (* UTF-8 BOM *)
129
+
String.sub test.input 0 2 = "\xFF\xFE" || (* UTF-16 LE BOM *)
130
+
String.sub test.input 0 2 = "\xFE\xFF"))); (* UTF-16 BE BOM *)
131
+
];
132
+
raw_test_data = Some test.raw_lines;
133
+
} in
134
+
results := result :: !results;
135
+
if success then incr passed else incr failed
122
136
end
123
137
) tests;
124
138
125
-
(!passed, !failed, List.rev !errors, filename)
139
+
let file_result : Report.file_result = {
140
+
filename;
141
+
test_type = "Encoding Detection";
142
+
passed_count = !passed;
143
+
failed_count = !failed;
144
+
tests = List.rev !results;
145
+
} in
146
+
(file_result, !passed, !failed)
126
147
127
148
let () =
128
149
let test_dir = Sys.argv.(1) in
···
134
155
135
156
let total_passed = ref 0 in
136
157
let total_failed = ref 0 in
137
-
let all_errors = ref [] in
158
+
let file_results = ref [] in
138
159
139
160
List.iter (fun file ->
140
161
let path = Filename.concat test_dir file in
141
162
if Sys.is_directory path then () else begin
142
-
let (passed, failed, errors, filename) = run_file path in
163
+
let (file_result, passed, failed) = run_file path in
143
164
total_passed := !total_passed + passed;
144
165
total_failed := !total_failed + failed;
145
-
if errors <> [] then
146
-
all_errors := (filename, errors) :: !all_errors;
147
-
Printf.printf "%s: %d passed, %d failed\n" filename passed failed
166
+
file_results := file_result :: !file_results;
167
+
Printf.printf "%s: %d passed, %d failed\n" file passed failed
148
168
end
149
169
) (List.sort String.compare dat_files);
150
170
151
171
Printf.printf "\n=== Summary ===\n";
152
172
Printf.printf "Total: %d passed, %d failed\n" !total_passed !total_failed;
153
173
154
-
if !all_errors <> [] then begin
155
-
Printf.printf "\n=== First failures ===\n";
156
-
List.iter (fun (filename, errors) ->
157
-
List.iter (fun (test_num, input, detected, expected) ->
158
-
Printf.printf "\n--- %s test %d ---\n" filename test_num;
159
-
Printf.printf "Input (first 200 chars): %s\n"
160
-
(String.escaped (String.sub input 0 (min 200 (String.length input))));
161
-
Printf.printf "Expected encoding: %s\n" expected;
162
-
Printf.printf "Detected encoding: %s\n" detected
163
-
) errors
164
-
) (List.rev !all_errors)
165
-
end;
174
+
(* Generate HTML report *)
175
+
let report : Report.report = {
176
+
title = "HTML5 Encoding Detection Tests";
177
+
test_type = "encoding";
178
+
description = "These tests validate the character encoding detection algorithm as specified in the WHATWG \
179
+
Encoding Standard. The parser must determine the document's character encoding from byte order \
180
+
marks (BOM), meta charset declarations, or content sniffing. Tests cover UTF-8, UTF-16 \
181
+
(big/little endian), Windows-1252, ISO-8859-2, EUC-JP, and other encodings. The algorithm \
182
+
examines initial bytes for BOM signatures and scans the first 1024 bytes for meta elements \
183
+
declaring charset or http-equiv content-type.";
184
+
files = List.rev !file_results;
185
+
total_passed = !total_passed;
186
+
total_failed = !total_failed;
187
+
} in
188
+
Report.generate_report report "test_encoding_report.html";
166
189
167
190
exit (if !total_failed > 0 then 1 else 0)
+60
-26
test/test_html5lib.ml
+60
-26
test/test_html5lib.ml
···
4
4
5
5
module Parser = Html5rw_parser
6
6
module Dom = Html5rw_dom
7
+
module Report = Test_report
7
8
8
9
type test_case = {
9
10
input : string;
···
11
12
expected_errors : string list;
12
13
script_on : bool;
13
14
fragment_context : string option;
15
+
raw_lines : string; (* Original test data from .dat file *)
14
16
}
15
17
16
18
let _is_blank s = String.trim s = ""
17
19
18
20
(* Parse a single test case from lines *)
19
21
let parse_test_case lines =
22
+
let raw_lines = String.concat "\n" lines in
20
23
let rec parse acc = function
21
24
| [] -> acc
22
25
| line :: rest when String.length line > 0 && line.[0] = '#' ->
···
61
64
expected_errors = errors;
62
65
script_on;
63
66
fragment_context = fragment;
67
+
raw_lines;
64
68
}
65
69
66
70
(* Parse a .dat file into test cases *)
···
125
129
let actual_tree = Dom.to_test_format (Parser.root result) in
126
130
let expected = normalize_tree (strip_tree_prefix test.expected_tree) in
127
131
let actual = normalize_tree (strip_tree_prefix actual_tree) in
128
-
(expected = actual, expected, actual, List.length (Parser.errors result), List.length test.expected_errors)
132
+
let error_count = List.length (Parser.errors result) in
133
+
let expected_error_count = List.length test.expected_errors in
134
+
(expected = actual, expected, actual, error_count, expected_error_count)
129
135
with e ->
130
136
let expected = normalize_tree (strip_tree_prefix test.expected_tree) in
131
137
(false, expected, Printf.sprintf "EXCEPTION: %s" (Printexc.to_string e), 0, 0)
···
140
146
141
147
let passed = ref 0 in
142
148
let failed = ref 0 in
143
-
let errors = ref [] in
149
+
let results = ref [] in
144
150
145
151
List.iteri (fun i test ->
146
152
(* Skip script-on tests since we don't support scripting *)
147
153
if test.script_on then
148
154
() (* Skip this test *)
149
155
else begin
150
-
let (success, expected, actual, _actual_error_count, _expected_error_count) = run_test test in
151
-
if success then
152
-
incr passed
153
-
else begin
154
-
incr failed;
155
-
errors := (i + 1, test.input, expected, actual) :: !errors
156
-
end
156
+
let (success, expected, actual, actual_error_count, expected_error_count) = run_test test in
157
+
let description =
158
+
let input_preview = String.sub test.input 0 (min 60 (String.length test.input)) in
159
+
if test.fragment_context <> None then
160
+
Printf.sprintf "Fragment (%s): %s" (Option.get test.fragment_context) input_preview
161
+
else
162
+
input_preview
163
+
in
164
+
let details = [
165
+
("Fragment Context", Option.value test.fragment_context ~default:"(none)");
166
+
("Expected Errors", string_of_int expected_error_count);
167
+
("Actual Errors", string_of_int actual_error_count);
168
+
] in
169
+
let result : Report.test_result = {
170
+
test_num = i + 1;
171
+
description;
172
+
input = test.input;
173
+
expected;
174
+
actual;
175
+
success;
176
+
details;
177
+
raw_test_data = Some test.raw_lines;
178
+
} in
179
+
results := result :: !results;
180
+
if success then incr passed else incr failed
157
181
end
158
182
) tests;
159
183
160
-
(!passed, !failed, List.rev !errors, filename)
184
+
let file_result : Report.file_result = {
185
+
filename;
186
+
test_type = "Tree Construction";
187
+
passed_count = !passed;
188
+
failed_count = !failed;
189
+
tests = List.rev !results;
190
+
} in
191
+
(file_result, !passed, !failed)
161
192
162
193
let () =
163
194
let test_dir = Sys.argv.(1) in
···
169
200
170
201
let total_passed = ref 0 in
171
202
let total_failed = ref 0 in
172
-
let all_errors = ref [] in
203
+
let file_results = ref [] in
173
204
174
205
List.iter (fun file ->
175
206
let path = Filename.concat test_dir file in
176
207
if Sys.is_directory path then () else begin
177
-
let (passed, failed, errors, filename) = run_file path in
208
+
let (file_result, passed, failed) = run_file path in
178
209
total_passed := !total_passed + passed;
179
210
total_failed := !total_failed + failed;
180
-
if errors <> [] then
181
-
all_errors := (filename, errors) :: !all_errors;
182
-
Printf.printf "%s: %d passed, %d failed\n" filename passed failed
211
+
file_results := file_result :: !file_results;
212
+
Printf.printf "%s: %d passed, %d failed\n" file passed failed
183
213
end
184
214
) (List.sort String.compare dat_files);
185
215
186
216
Printf.printf "\n=== Summary ===\n";
187
217
Printf.printf "Total: %d passed, %d failed\n" !total_passed !total_failed;
188
218
189
-
if !all_errors <> [] then begin
190
-
Printf.printf "\n=== First failures ===\n";
191
-
List.iter (fun (filename, errors) ->
192
-
List.iter (fun (test_num, input, expected, actual) ->
193
-
Printf.printf "\n--- %s test %d ---\n" filename test_num;
194
-
Printf.printf "Input: %s\n" (String.escaped input);
195
-
Printf.printf "Expected:\n%s\n" expected;
196
-
Printf.printf "Actual:\n%s\n" actual
197
-
) (List.filteri (fun i _ -> i < 3) errors)
198
-
) (List.filteri (fun i _ -> i < 10) !all_errors)
199
-
end;
219
+
(* Generate HTML report *)
220
+
let report : Report.report = {
221
+
title = "HTML5 Tree Construction Tests";
222
+
test_type = "tree-construction";
223
+
description = "These tests validate the HTML5 tree construction algorithm as specified in the WHATWG HTML Standard. \
224
+
Each test provides HTML input and the expected DOM tree structure. The parser processes the HTML and \
225
+
builds a document tree, which is then serialized and compared against the expected output. \
226
+
Tests cover various edge cases including malformed HTML, implicit element creation, foster parenting, \
227
+
adoption agency algorithm, and foreign content (SVG/MathML). Fragment parsing tests verify parsing \
228
+
in the context of specific elements.";
229
+
files = List.rev !file_results;
230
+
total_passed = !total_passed;
231
+
total_failed = !total_failed;
232
+
} in
233
+
Report.generate_report report "test_html5lib_report.html";
200
234
201
235
exit (if !total_failed > 0 then 1 else 0)
+587
test/test_report.ml
+587
test/test_report.ml
···
1
+
(* HTML Test Report Generator *)
2
+
3
+
type test_result = {
4
+
test_num : int;
5
+
description : string;
6
+
input : string;
7
+
expected : string;
8
+
actual : string;
9
+
success : bool;
10
+
details : (string * string) list; (* Additional key-value pairs *)
11
+
raw_test_data : string option; (* Original test file content for context *)
12
+
}
13
+
14
+
type file_result = {
15
+
filename : string;
16
+
test_type : string;
17
+
passed_count : int;
18
+
failed_count : int;
19
+
tests : test_result list;
20
+
}
21
+
22
+
type report = {
23
+
title : string;
24
+
test_type : string;
25
+
description : string; (* Explanation of what this test suite validates *)
26
+
files : file_result list;
27
+
total_passed : int;
28
+
total_failed : int;
29
+
}
30
+
31
+
let html_escape s =
32
+
let buf = Buffer.create (String.length s * 2) in
33
+
String.iter (fun c ->
34
+
match c with
35
+
| '&' -> Buffer.add_string buf "&"
36
+
| '<' -> Buffer.add_string buf "<"
37
+
| '>' -> Buffer.add_string buf ">"
38
+
| '"' -> Buffer.add_string buf """
39
+
| '\'' -> Buffer.add_string buf "'"
40
+
| c -> Buffer.add_char buf c
41
+
) s;
42
+
Buffer.contents buf
43
+
44
+
(* No truncation - show full content for standalone reports *)
45
+
let truncate_string ?(max_len=10000) s =
46
+
if String.length s <= max_len then s
47
+
else String.sub s 0 max_len ^ "\n... (truncated at " ^ string_of_int max_len ^ " chars)"
48
+
49
+
let css = {|
50
+
:root {
51
+
--bg-primary: #1a1a2e;
52
+
--bg-secondary: #16213e;
53
+
--bg-tertiary: #0f3460;
54
+
--text-primary: #eee;
55
+
--text-secondary: #aaa;
56
+
--accent: #e94560;
57
+
--success: #4ade80;
58
+
--failure: #f87171;
59
+
--border: #333;
60
+
}
61
+
62
+
* { box-sizing: border-box; margin: 0; padding: 0; }
63
+
64
+
body {
65
+
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
66
+
background: var(--bg-primary);
67
+
color: var(--text-primary);
68
+
line-height: 1.6;
69
+
}
70
+
71
+
.container {
72
+
max-width: 1400px;
73
+
margin: 0 auto;
74
+
padding: 20px;
75
+
}
76
+
77
+
header {
78
+
background: var(--bg-secondary);
79
+
padding: 20px;
80
+
border-radius: 8px;
81
+
margin-bottom: 20px;
82
+
}
83
+
84
+
header h1 {
85
+
font-size: 1.5rem;
86
+
margin-bottom: 10px;
87
+
color: var(--accent);
88
+
}
89
+
90
+
.summary {
91
+
display: flex;
92
+
gap: 20px;
93
+
flex-wrap: wrap;
94
+
align-items: center;
95
+
}
96
+
97
+
.stat {
98
+
padding: 8px 16px;
99
+
border-radius: 6px;
100
+
font-weight: 600;
101
+
}
102
+
103
+
.stat.total { background: var(--bg-tertiary); }
104
+
.stat.passed { background: rgba(74, 222, 128, 0.2); color: var(--success); }
105
+
.stat.failed { background: rgba(248, 113, 113, 0.2); color: var(--failure); }
106
+
107
+
.controls {
108
+
display: flex;
109
+
gap: 10px;
110
+
margin-top: 10px;
111
+
flex-wrap: wrap;
112
+
}
113
+
114
+
input[type="search"], select {
115
+
padding: 8px 12px;
116
+
border: 1px solid var(--border);
117
+
border-radius: 6px;
118
+
background: var(--bg-primary);
119
+
color: var(--text-primary);
120
+
font-size: 14px;
121
+
}
122
+
123
+
input[type="search"] { width: 300px; }
124
+
125
+
button {
126
+
padding: 8px 16px;
127
+
border: none;
128
+
border-radius: 6px;
129
+
background: var(--accent);
130
+
color: white;
131
+
cursor: pointer;
132
+
font-size: 14px;
133
+
}
134
+
135
+
button:hover { opacity: 0.9; }
136
+
137
+
.sidebar {
138
+
position: fixed;
139
+
left: 0;
140
+
top: 0;
141
+
bottom: 0;
142
+
width: 280px;
143
+
background: var(--bg-secondary);
144
+
border-right: 1px solid var(--border);
145
+
overflow-y: auto;
146
+
padding: 10px;
147
+
padding-top: 20px;
148
+
}
149
+
150
+
.sidebar-item {
151
+
padding: 8px 12px;
152
+
border-radius: 6px;
153
+
cursor: pointer;
154
+
display: flex;
155
+
justify-content: space-between;
156
+
align-items: center;
157
+
margin-bottom: 4px;
158
+
font-size: 14px;
159
+
}
160
+
161
+
.sidebar-item:hover { background: var(--bg-tertiary); }
162
+
.sidebar-item.active { background: var(--accent); }
163
+
164
+
.sidebar-item .count {
165
+
font-size: 12px;
166
+
padding: 2px 8px;
167
+
border-radius: 10px;
168
+
background: var(--bg-primary);
169
+
}
170
+
171
+
.sidebar-item .count.all-passed { color: var(--success); }
172
+
.sidebar-item .count.has-failed { color: var(--failure); }
173
+
174
+
main {
175
+
margin-left: 300px;
176
+
padding: 20px;
177
+
padding-top: 30px;
178
+
}
179
+
180
+
.intro {
181
+
background: var(--bg-secondary);
182
+
padding: 20px;
183
+
border-radius: 8px;
184
+
margin-bottom: 20px;
185
+
}
186
+
187
+
.file-section {
188
+
margin-bottom: 30px;
189
+
background: var(--bg-secondary);
190
+
border-radius: 8px;
191
+
overflow: hidden;
192
+
}
193
+
194
+
.file-header {
195
+
padding: 15px 20px;
196
+
background: var(--bg-tertiary);
197
+
cursor: pointer;
198
+
display: flex;
199
+
justify-content: space-between;
200
+
align-items: center;
201
+
}
202
+
203
+
.file-header h2 {
204
+
font-size: 1.1rem;
205
+
display: flex;
206
+
align-items: center;
207
+
gap: 10px;
208
+
}
209
+
210
+
.file-header .toggle {
211
+
font-size: 1.2rem;
212
+
transition: transform 0.2s;
213
+
}
214
+
215
+
.file-header.collapsed .toggle { transform: rotate(-90deg); }
216
+
217
+
.file-stats {
218
+
display: flex;
219
+
gap: 15px;
220
+
font-size: 14px;
221
+
}
222
+
223
+
.file-stats .passed { color: var(--success); }
224
+
.file-stats .failed { color: var(--failure); }
225
+
226
+
.tests-container {
227
+
padding: 10px;
228
+
}
229
+
230
+
.tests-container.hidden { display: none; }
231
+
232
+
.test-item {
233
+
margin: 8px 0;
234
+
border: 1px solid var(--border);
235
+
border-radius: 6px;
236
+
overflow: hidden;
237
+
}
238
+
239
+
.test-header {
240
+
padding: 10px 15px;
241
+
cursor: pointer;
242
+
display: flex;
243
+
justify-content: space-between;
244
+
align-items: center;
245
+
background: var(--bg-primary);
246
+
}
247
+
248
+
.test-header:hover { background: var(--bg-tertiary); }
249
+
250
+
.test-header .status {
251
+
width: 10px;
252
+
height: 10px;
253
+
border-radius: 50%;
254
+
margin-right: 10px;
255
+
}
256
+
257
+
.test-header .status.passed { background: var(--success); }
258
+
.test-header .status.failed { background: var(--failure); }
259
+
260
+
.test-header .test-info {
261
+
flex: 1;
262
+
display: flex;
263
+
align-items: center;
264
+
}
265
+
266
+
.test-header .test-num {
267
+
font-weight: 600;
268
+
margin-right: 10px;
269
+
color: var(--text-secondary);
270
+
}
271
+
272
+
.test-header .test-desc {
273
+
font-size: 14px;
274
+
color: var(--text-primary);
275
+
white-space: nowrap;
276
+
overflow: hidden;
277
+
text-overflow: ellipsis;
278
+
max-width: 600px;
279
+
}
280
+
281
+
.test-details {
282
+
padding: 15px;
283
+
background: var(--bg-primary);
284
+
border-top: 1px solid var(--border);
285
+
display: none;
286
+
}
287
+
288
+
.test-details.visible { display: block; }
289
+
290
+
.detail-section {
291
+
margin-bottom: 15px;
292
+
}
293
+
294
+
.detail-section h4 {
295
+
font-size: 12px;
296
+
text-transform: uppercase;
297
+
color: var(--text-secondary);
298
+
margin-bottom: 8px;
299
+
letter-spacing: 0.5px;
300
+
}
301
+
302
+
.detail-section pre {
303
+
background: var(--bg-secondary);
304
+
padding: 12px;
305
+
border-radius: 6px;
306
+
overflow-x: auto;
307
+
font-family: 'Monaco', 'Menlo', monospace;
308
+
font-size: 13px;
309
+
white-space: pre-wrap;
310
+
word-break: break-all;
311
+
max-height: 300px;
312
+
overflow-y: auto;
313
+
}
314
+
315
+
.detail-row {
316
+
display: grid;
317
+
grid-template-columns: 1fr 1fr;
318
+
gap: 15px;
319
+
}
320
+
321
+
.detail-row.single { grid-template-columns: 1fr; }
322
+
323
+
.meta-info {
324
+
display: flex;
325
+
gap: 20px;
326
+
flex-wrap: wrap;
327
+
font-size: 13px;
328
+
color: var(--text-secondary);
329
+
margin-bottom: 15px;
330
+
}
331
+
332
+
.meta-info span {
333
+
background: var(--bg-secondary);
334
+
padding: 4px 10px;
335
+
border-radius: 4px;
336
+
}
337
+
338
+
.diff-indicator {
339
+
color: var(--failure);
340
+
font-weight: bold;
341
+
margin-left: 5px;
342
+
}
343
+
344
+
@media (max-width: 900px) {
345
+
.sidebar { display: none; }
346
+
main { margin-left: 0; }
347
+
.detail-row { grid-template-columns: 1fr; }
348
+
}
349
+
|}
350
+
351
+
let js = {|
352
+
document.addEventListener('DOMContentLoaded', function() {
353
+
// File section toggling
354
+
document.querySelectorAll('.file-header').forEach(header => {
355
+
header.addEventListener('click', function() {
356
+
this.classList.toggle('collapsed');
357
+
const container = this.nextElementSibling;
358
+
container.classList.toggle('hidden');
359
+
});
360
+
});
361
+
362
+
// Test details toggling
363
+
document.querySelectorAll('.test-header').forEach(header => {
364
+
header.addEventListener('click', function(e) {
365
+
e.stopPropagation();
366
+
const details = this.nextElementSibling;
367
+
details.classList.toggle('visible');
368
+
});
369
+
});
370
+
371
+
// Sidebar navigation
372
+
document.querySelectorAll('.sidebar-item').forEach(item => {
373
+
item.addEventListener('click', function() {
374
+
const fileId = this.dataset.file;
375
+
const section = document.getElementById(fileId);
376
+
if (section) {
377
+
section.scrollIntoView({ behavior: 'smooth' });
378
+
// Expand if collapsed
379
+
const header = section.querySelector('.file-header');
380
+
if (header.classList.contains('collapsed')) {
381
+
header.click();
382
+
}
383
+
}
384
+
// Update active state
385
+
document.querySelectorAll('.sidebar-item').forEach(i => i.classList.remove('active'));
386
+
this.classList.add('active');
387
+
});
388
+
});
389
+
390
+
// Search functionality
391
+
const searchInput = document.getElementById('search');
392
+
if (searchInput) {
393
+
searchInput.addEventListener('input', function() {
394
+
const query = this.value.toLowerCase();
395
+
document.querySelectorAll('.test-item').forEach(item => {
396
+
const text = item.textContent.toLowerCase();
397
+
item.style.display = text.includes(query) ? '' : 'none';
398
+
});
399
+
});
400
+
}
401
+
402
+
// Filter functionality
403
+
const filterSelect = document.getElementById('filter');
404
+
if (filterSelect) {
405
+
filterSelect.addEventListener('change', function() {
406
+
const filter = this.value;
407
+
document.querySelectorAll('.test-item').forEach(item => {
408
+
const passed = item.querySelector('.status.passed') !== null;
409
+
if (filter === 'all') {
410
+
item.style.display = '';
411
+
} else if (filter === 'passed') {
412
+
item.style.display = passed ? '' : 'none';
413
+
} else if (filter === 'failed') {
414
+
item.style.display = passed ? 'none' : '';
415
+
}
416
+
});
417
+
});
418
+
}
419
+
420
+
// Expand/Collapse all
421
+
document.getElementById('expand-all')?.addEventListener('click', function() {
422
+
document.querySelectorAll('.file-header.collapsed').forEach(h => h.click());
423
+
});
424
+
425
+
document.getElementById('collapse-all')?.addEventListener('click', function() {
426
+
document.querySelectorAll('.file-header:not(.collapsed)').forEach(h => h.click());
427
+
});
428
+
});
429
+
|}
430
+
431
+
let generate_test_html test =
432
+
let status_class = if test.success then "passed" else "failed" in
433
+
let desc_escaped = html_escape test.description in
434
+
let input_escaped = html_escape (truncate_string test.input) in
435
+
let expected_escaped = html_escape (truncate_string test.expected) in
436
+
let actual_escaped = html_escape (truncate_string test.actual) in
437
+
438
+
let details_html = String.concat "" (List.map (fun (key, value) ->
439
+
Printf.sprintf {|
440
+
<div class="detail-section">
441
+
<h4>%s</h4>
442
+
<pre>%s</pre>
443
+
</div>
444
+
|} (html_escape key) (html_escape value)
445
+
) test.details) in
446
+
447
+
let raw_data_html = match test.raw_test_data with
448
+
| Some data ->
449
+
Printf.sprintf {|
450
+
<div class="detail-section">
451
+
<h4>Original Test Data (from .dat/.test file)</h4>
452
+
<pre>%s</pre>
453
+
</div>
454
+
|} (html_escape (truncate_string data))
455
+
| None -> ""
456
+
in
457
+
458
+
let diff_indicator = if test.success then "" else {|<span class="diff-indicator">✗</span>|} in
459
+
460
+
Printf.sprintf {|
461
+
<div class="test-item" data-passed="%b">
462
+
<div class="test-header">
463
+
<div class="test-info">
464
+
<span class="status %s"></span>
465
+
<span class="test-num">#%d</span>
466
+
<span class="test-desc">%s</span>
467
+
</div>
468
+
<span>▼</span>
469
+
</div>
470
+
<div class="test-details">
471
+
%s
472
+
<div class="detail-section">
473
+
<h4>Input (HTML to parse)</h4>
474
+
<pre>%s</pre>
475
+
</div>
476
+
<div class="detail-row">
477
+
<div class="detail-section">
478
+
<h4>Expected Output%s</h4>
479
+
<pre>%s</pre>
480
+
</div>
481
+
<div class="detail-section">
482
+
<h4>Actual Output%s</h4>
483
+
<pre>%s</pre>
484
+
</div>
485
+
</div>
486
+
%s
487
+
</div>
488
+
</div>
489
+
|} test.success status_class test.test_num desc_escaped
490
+
raw_data_html input_escaped diff_indicator expected_escaped diff_indicator actual_escaped details_html
491
+
492
+
let generate_file_html file =
493
+
let file_id = String.map (fun c -> if c = '.' then '-' else c) file.filename in
494
+
let tests_html = String.concat "\n" (List.map generate_test_html file.tests) in
495
+
let collapsed = if file.failed_count = 0 then "collapsed" else "" in
496
+
let hidden = if file.failed_count = 0 then "hidden" else "" in
497
+
498
+
Printf.sprintf {|
499
+
<div class="file-section" id="file-%s">
500
+
<div class="file-header %s">
501
+
<h2>
502
+
<span class="toggle">▼</span>
503
+
%s
504
+
<span style="font-weight: normal; font-size: 0.9em; color: var(--text-secondary)">(%s)</span>
505
+
</h2>
506
+
<div class="file-stats">
507
+
<span class="passed">✓ %d passed</span>
508
+
<span class="failed">✗ %d failed</span>
509
+
</div>
510
+
</div>
511
+
<div class="tests-container %s">
512
+
%s
513
+
</div>
514
+
</div>
515
+
|} file_id collapsed file.filename file.test_type file.passed_count file.failed_count hidden tests_html
516
+
517
+
let generate_sidebar_html files =
518
+
String.concat "\n" (List.map (fun file ->
519
+
let file_id = String.map (fun c -> if c = '.' then '-' else c) file.filename in
520
+
let count_class = if file.failed_count = 0 then "all-passed" else "has-failed" in
521
+
Printf.sprintf {|
522
+
<div class="sidebar-item" data-file="file-%s">
523
+
<span>%s</span>
524
+
<span class="count %s">%d/%d</span>
525
+
</div>
526
+
|} file_id file.filename count_class file.passed_count (file.passed_count + file.failed_count)
527
+
) files)
528
+
529
+
let generate_report report output_path =
530
+
let files_html = String.concat "\n" (List.map generate_file_html report.files) in
531
+
let sidebar_html = generate_sidebar_html report.files in
532
+
533
+
let html = Printf.sprintf {|<!DOCTYPE html>
534
+
<html lang="en">
535
+
<head>
536
+
<meta charset="UTF-8">
537
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
538
+
<title>%s - Test Report</title>
539
+
<style>%s</style>
540
+
</head>
541
+
<body>
542
+
<div class="sidebar">
543
+
<h3 style="padding: 10px; color: var(--text-secondary); font-size: 12px; text-transform: uppercase;">Files</h3>
544
+
%s
545
+
</div>
546
+
547
+
<main>
548
+
<header>
549
+
<h1>%s</h1>
550
+
<p style="color: var(--text-secondary); margin: 10px 0; max-width: 900px;">%s</p>
551
+
<div class="summary">
552
+
<span class="stat total">%d tests</span>
553
+
<span class="stat passed">✓ %d passed</span>
554
+
<span class="stat failed">✗ %d failed</span>
555
+
<span class="stat total">%.1f%% pass rate</span>
556
+
</div>
557
+
<div class="controls">
558
+
<input type="search" id="search" placeholder="Search tests...">
559
+
<select id="filter">
560
+
<option value="all">All tests</option>
561
+
<option value="passed">Passed only</option>
562
+
<option value="failed">Failed only</option>
563
+
</select>
564
+
<button id="expand-all">Expand All</button>
565
+
<button id="collapse-all">Collapse All</button>
566
+
</div>
567
+
</header>
568
+
%s
569
+
</main>
570
+
571
+
<script>%s</script>
572
+
</body>
573
+
</html>
574
+
|} report.title css
575
+
sidebar_html
576
+
report.title (html_escape report.description)
577
+
(report.total_passed + report.total_failed)
578
+
report.total_passed
579
+
report.total_failed
580
+
(100.0 *. float_of_int report.total_passed /. float_of_int (max 1 (report.total_passed + report.total_failed)))
581
+
files_html js
582
+
in
583
+
584
+
let oc = open_out output_path in
585
+
output_string oc html;
586
+
close_out oc;
587
+
Printf.printf "HTML report written to: %s\n" output_path
+103
-26
test/test_serializer.ml
+103
-26
test/test_serializer.ml
···
1
1
(* Test runner for html5lib-tests serializer tests *)
2
2
3
3
module Dom = Html5rw_dom
4
+
module Report = Test_report
4
5
5
6
(* Extract values from JSON *)
6
7
let json_string = function
···
33
34
match json_mem name obj with
34
35
| Some v -> v
35
36
| None -> failwith ("Missing member: " ^ name)
37
+
38
+
let rec json_to_string = function
39
+
| Jsont.Null _ -> "null"
40
+
| Jsont.Bool (b, _) -> string_of_bool b
41
+
| Jsont.Number (n, _) -> Printf.sprintf "%g" n
42
+
| Jsont.String (s, _) -> Printf.sprintf "%S" s
43
+
| Jsont.Array (arr, _) ->
44
+
"[" ^ String.concat ", " (List.map json_to_string arr) ^ "]"
45
+
| Jsont.Object (obj, _) ->
46
+
"{" ^ String.concat ", " (List.map (fun ((n, _), v) -> Printf.sprintf "%S: %s" n (json_to_string v)) obj) ^ "}"
36
47
37
48
(* Serialization options *)
38
49
type serialize_options = {
···
101
112
input : Jsont.json list;
102
113
expected : string list;
103
114
options : serialize_options;
115
+
raw_json : string; (* Original JSON representation of this test *)
104
116
}
105
117
106
118
let parse_test_case json =
119
+
let raw_json = json_to_string json in
107
120
let obj = json_object json in
108
121
let description = json_string (json_mem_exn "description" obj) in
109
122
let input = json_array (json_mem_exn "input" obj) in
110
123
let expected = List.map json_string (json_array (json_mem_exn "expected" obj)) in
111
124
let options = parse_options (json_mem "options" obj) in
112
-
{ description; input; expected; options }
125
+
{ description; input; expected; options; raw_json }
113
126
114
127
(* Parse attrs that can be either array [{name, value}] or object {name: value} or empty {} *)
115
128
let parse_attrs attrs_json =
···
694
707
(false, Printf.sprintf "EXCEPTION: %s" (Printexc.to_string e), test.expected)
695
708
696
709
(* Run all tests in a file *)
710
+
let format_options opts =
711
+
let parts = [] in
712
+
let parts = if opts.quote_char_explicit then
713
+
Printf.sprintf "quote_char='%c'" opts.quote_char :: parts else parts in
714
+
let parts = if not opts.minimize_boolean_attributes then
715
+
"minimize_bool=false" :: parts else parts in
716
+
let parts = if opts.use_trailing_solidus then
717
+
"trailing_solidus=true" :: parts else parts in
718
+
let parts = if opts.escape_lt_in_attrs then
719
+
"escape_lt=true" :: parts else parts in
720
+
let parts = if opts.escape_rcdata then
721
+
"escape_rcdata=true" :: parts else parts in
722
+
let parts = if opts.strip_whitespace then
723
+
"strip_ws=true" :: parts else parts in
724
+
let parts = if opts.inject_meta_charset then
725
+
"inject_charset=true" :: parts else parts in
726
+
let parts = if not opts.omit_optional_tags then
727
+
"omit_tags=false" :: parts else parts in
728
+
if parts = [] then "(defaults)" else String.concat ", " (List.rev parts)
729
+
697
730
let run_file path =
698
731
let content =
699
732
let ic = open_in path in
···
717
750
let filename = Filename.basename path in
718
751
let passed = ref 0 in
719
752
let failed = ref 0 in
720
-
let first_failures = ref [] in
753
+
let results = ref [] in
721
754
722
755
List.iteri (fun i test_json ->
723
756
try
724
757
let test = parse_test_case test_json in
725
758
let (success, actual, expected) = run_test test in
726
759
727
-
if success then
728
-
incr passed
729
-
else begin
730
-
incr failed;
731
-
if List.length !first_failures < 3 then
732
-
first_failures := (i + 1, test.description, actual, expected) :: !first_failures
733
-
end
760
+
let result : Report.test_result = {
761
+
test_num = i + 1;
762
+
description = test.description;
763
+
input = String.concat "\n" (List.map (fun tok ->
764
+
(* Simplified token representation *)
765
+
match tok with
766
+
| Jsont.Array (arr, _) ->
767
+
(match arr with
768
+
| Jsont.String (ty, _) :: rest ->
769
+
Printf.sprintf "%s: %s" ty (String.concat ", " (List.map (function
770
+
| Jsont.String (s, _) -> Printf.sprintf "%S" s
771
+
| Jsont.Object _ -> "{...}"
772
+
| Jsont.Null _ -> "null"
773
+
| _ -> "?"
774
+
) rest))
775
+
| _ -> "?")
776
+
| _ -> "?"
777
+
) test.input);
778
+
expected = String.concat " | " expected;
779
+
actual;
780
+
success;
781
+
details = [
782
+
("Options", format_options test.options);
783
+
("Expected Variants", string_of_int (List.length expected));
784
+
];
785
+
raw_test_data = Some test.raw_json;
786
+
} in
787
+
results := result :: !results;
788
+
789
+
if success then incr passed else incr failed
734
790
with e ->
735
791
incr failed;
792
+
let result : Report.test_result = {
793
+
test_num = i + 1;
794
+
description = Printf.sprintf "Test %d" (i + 1);
795
+
input = "";
796
+
expected = "";
797
+
actual = Printf.sprintf "EXCEPTION: %s" (Printexc.to_string e);
798
+
success = false;
799
+
details = [];
800
+
raw_test_data = Some (json_to_string test_json);
801
+
} in
802
+
results := result :: !results;
736
803
Printf.eprintf "Exception parsing test %d: %s\n" (i + 1) (Printexc.to_string e)
737
804
) tests_json;
738
805
739
-
(!passed, !failed, List.rev !first_failures, filename)
806
+
let file_result : Report.file_result = {
807
+
filename;
808
+
test_type = "Serializer";
809
+
passed_count = !passed;
810
+
failed_count = !failed;
811
+
tests = List.rev !results;
812
+
} in
813
+
(file_result, !passed, !failed)
740
814
741
815
let () =
742
816
let test_dir = Sys.argv.(1) in
···
745
819
746
820
let total_passed = ref 0 in
747
821
let total_failed = ref 0 in
748
-
let all_failures = ref [] in
822
+
let file_results = ref [] in
749
823
750
824
List.iter (fun file ->
751
825
let path = Filename.concat test_dir file in
752
-
let (passed, failed, failures, filename) = run_file path in
826
+
let (file_result, passed, failed) = run_file path in
753
827
total_passed := !total_passed + passed;
754
828
total_failed := !total_failed + failed;
755
-
if failures <> [] then
756
-
all_failures := (filename, failures) :: !all_failures;
757
-
Printf.printf "%s: %d passed, %d failed\n" filename passed failed
829
+
file_results := file_result :: !file_results;
830
+
Printf.printf "%s: %d passed, %d failed\n" file passed failed
758
831
) (List.sort String.compare test_files);
759
832
760
833
Printf.printf "\n=== Summary ===\n";
761
834
Printf.printf "Total: %d passed, %d failed\n" !total_passed !total_failed;
762
835
763
-
if !all_failures <> [] then begin
764
-
Printf.printf "\n=== First failures ===\n";
765
-
List.iter (fun (filename, failures) ->
766
-
List.iter (fun (test_num, desc, actual, expected) ->
767
-
Printf.printf "\n--- %s test %d ---\n" filename test_num;
768
-
Printf.printf "Description: %s\n" desc;
769
-
Printf.printf "Expected: %s\n" (String.concat " | " expected);
770
-
Printf.printf "Actual: %s\n" actual
771
-
) failures
772
-
) (List.rev !all_failures)
773
-
end;
836
+
(* Generate HTML report *)
837
+
let report : Report.report = {
838
+
title = "HTML5 Serializer Tests";
839
+
test_type = "serializer";
840
+
description = "These tests validate the HTML serialization algorithm for converting DOM trees back to HTML text. \
841
+
Each test provides a sequence of tokens (start tags, end tags, text, comments, doctypes) and one \
842
+
or more valid serialized outputs. Tests cover attribute quoting, boolean attribute minimization, \
843
+
self-closing tag syntax (trailing solidus), entity escaping, whitespace handling, meta charset \
844
+
injection, and optional tag omission rules as specified in the HTML Standard. Multiple expected \
845
+
outputs allow for valid variations in serialization style.";
846
+
files = List.rev !file_results;
847
+
total_passed = !total_passed;
848
+
total_failed = !total_failed;
849
+
} in
850
+
Report.generate_report report "test_serializer_report.html";
774
851
775
852
exit (if !total_failed > 0 then 1 else 0)
+75
-41
test/test_tokenizer.ml
+75
-41
test/test_tokenizer.ml
···
3
3
open Bytesrw
4
4
5
5
module Tokenizer = Html5rw_tokenizer
6
+
module Report = Test_report
6
7
7
8
(* Token collector sink - collects all tokens into a list *)
8
9
module TokenCollector = struct
···
31
32
last_start_tag : string option;
32
33
double_escaped : bool;
33
34
xml_mode : bool;
35
+
raw_json : string; (* Original JSON representation of this test *)
34
36
}
35
37
36
38
(* Unescape double-escaped strings from tests *)
···
99
101
| Some v -> v
100
102
| None -> failwith ("Missing member: " ^ name)
101
103
104
+
(* Format JSON for display *)
105
+
let rec json_to_string = function
106
+
| Jsont.Null _ -> "null"
107
+
| Jsont.Bool (b, _) -> string_of_bool b
108
+
| Jsont.Number (n, _) -> Printf.sprintf "%g" n
109
+
| Jsont.String (s, _) -> Printf.sprintf "%S" s
110
+
| Jsont.Array (arr, _) ->
111
+
"[" ^ String.concat ", " (List.map json_to_string arr) ^ "]"
112
+
| Jsont.Object (obj, _) ->
113
+
"{" ^ String.concat ", " (List.map (fun ((n, _), v) -> Printf.sprintf "%S: %s" n (json_to_string v)) obj) ^ "}"
114
+
102
115
(* Parse a single test case from JSON *)
103
116
let parse_test_case ~xml_mode json =
117
+
let raw_json = json_to_string json in
104
118
let obj = json_object json in
105
119
let description = json_string (json_mem_exn "description" obj) in
106
120
let input = json_string (json_mem_exn "input" obj) in
···
121
135
| Some b -> json_bool b
122
136
| None -> false
123
137
in
124
-
{ description; input; output; expected_error_count; initial_states; last_start_tag; double_escaped; xml_mode }
138
+
{ description; input; output; expected_error_count; initial_states; last_start_tag; double_escaped; xml_mode; raw_json }
125
139
126
140
(* Convert state name to State.t *)
127
141
let state_of_string = function
···
258
272
259
273
(tokens_match && errors_count_match, actual_tokens, expected, actual_error_count, test.expected_error_count)
260
274
261
-
(* Format JSON for display *)
262
-
let rec json_to_string = function
263
-
| Jsont.Null _ -> "null"
264
-
| Jsont.Bool (b, _) -> string_of_bool b
265
-
| Jsont.Number (n, _) -> Printf.sprintf "%g" n
266
-
| Jsont.String (s, _) -> Printf.sprintf "%S" s
267
-
| Jsont.Array (arr, _) ->
268
-
"[" ^ String.concat ", " (List.map json_to_string arr) ^ "]"
269
-
| Jsont.Object (obj, _) ->
270
-
"{" ^ String.concat ", " (List.map (fun ((n, _), v) -> Printf.sprintf "%S: %s" n (json_to_string v)) obj) ^ "}"
271
-
272
275
(* Run all tests in a file *)
273
276
let run_file path =
274
277
let content =
···
303
306
let filename = Filename.basename path in
304
307
let passed = ref 0 in
305
308
let failed = ref 0 in
306
-
let first_failures = ref [] in
309
+
let results = ref [] in
307
310
308
311
List.iteri (fun i test ->
309
-
(* test is already parsed *)
310
-
311
312
(* Run for each initial state *)
312
313
List.iter (fun state_name ->
313
314
try
314
315
let state = state_of_string state_name in
315
316
let (success, actual, expected, actual_err_count, expected_err_count) = run_test test state in
316
317
317
-
if success then
318
-
incr passed
319
-
else begin
320
-
incr failed;
321
-
if List.length !first_failures < 3 then
322
-
first_failures := (i + 1, test.description, state_name, actual, expected, actual_err_count, expected_err_count) :: !first_failures
323
-
end
318
+
let description = Printf.sprintf "[%s] %s" state_name test.description in
319
+
let result : Report.test_result = {
320
+
test_num = i + 1;
321
+
description;
322
+
input = test.input;
323
+
expected = String.concat "\n" (List.map json_to_string expected);
324
+
actual = String.concat "\n" (List.map json_to_string actual);
325
+
success;
326
+
details = [
327
+
("Initial State", state_name);
328
+
("Last Start Tag", Option.value test.last_start_tag ~default:"(none)");
329
+
("Double Escaped", string_of_bool test.double_escaped);
330
+
("XML Mode", string_of_bool test.xml_mode);
331
+
("Expected Errors", string_of_int expected_err_count);
332
+
("Actual Errors", string_of_int actual_err_count);
333
+
];
334
+
raw_test_data = Some test.raw_json;
335
+
} in
336
+
results := result :: !results;
337
+
338
+
if success then incr passed else incr failed
324
339
with e ->
325
340
incr failed;
326
-
if List.length !first_failures < 3 then
327
-
first_failures := (i + 1, test.description, state_name, [], [], 0, 0) :: !first_failures;
341
+
let result : Report.test_result = {
342
+
test_num = i + 1;
343
+
description = Printf.sprintf "[%s] %s" state_name test.description;
344
+
input = test.input;
345
+
expected = "";
346
+
actual = Printf.sprintf "EXCEPTION: %s" (Printexc.to_string e);
347
+
success = false;
348
+
details = [];
349
+
raw_test_data = Some test.raw_json;
350
+
} in
351
+
results := result :: !results;
328
352
Printf.eprintf "Exception in test %d (%s): %s\n" (i + 1) test.description (Printexc.to_string e)
329
353
) test.initial_states
330
354
) all_tests;
331
355
332
-
(!passed, !failed, List.rev !first_failures, filename)
356
+
let file_result : Report.file_result = {
357
+
filename;
358
+
test_type = "Tokenizer";
359
+
passed_count = !passed;
360
+
failed_count = !failed;
361
+
tests = List.rev !results;
362
+
} in
363
+
(file_result, !passed, !failed)
333
364
334
365
let () =
335
366
let test_dir = Sys.argv.(1) in
···
338
369
339
370
let total_passed = ref 0 in
340
371
let total_failed = ref 0 in
341
-
let all_failures = ref [] in
372
+
let file_results = ref [] in
342
373
343
374
List.iter (fun file ->
344
375
let path = Filename.concat test_dir file in
345
-
let (passed, failed, failures, filename) = run_file path in
376
+
let (file_result, passed, failed) = run_file path in
346
377
total_passed := !total_passed + passed;
347
378
total_failed := !total_failed + failed;
348
-
if failures <> [] then
349
-
all_failures := (filename, failures) :: !all_failures;
350
-
Printf.printf "%s: %d passed, %d failed\n" filename passed failed
379
+
file_results := file_result :: !file_results;
380
+
Printf.printf "%s: %d passed, %d failed\n" file passed failed
351
381
) (List.sort String.compare test_files);
352
382
353
383
Printf.printf "\n=== Summary ===\n";
354
384
Printf.printf "Total: %d passed, %d failed\n" !total_passed !total_failed;
355
385
356
-
if !all_failures <> [] then begin
357
-
Printf.printf "\n=== First failures ===\n";
358
-
List.iter (fun (filename, failures) ->
359
-
List.iter (fun (test_num, desc, state, actual, expected, actual_err_count, expected_err_count) ->
360
-
Printf.printf "\n--- %s test %d (%s) in %s ---\n" filename test_num state desc;
361
-
Printf.printf "Expected tokens: [%s]\n" (String.concat "; " (List.map json_to_string expected));
362
-
Printf.printf "Actual tokens: [%s]\n" (String.concat "; " (List.map json_to_string actual));
363
-
Printf.printf "Expected %d errors, got %d\n" expected_err_count actual_err_count
364
-
) failures
365
-
) (List.rev !all_failures)
366
-
end;
386
+
(* Generate HTML report *)
387
+
let report : Report.report = {
388
+
title = "HTML5 Tokenizer Tests";
389
+
test_type = "tokenizer";
390
+
description = "These tests validate the HTML5 tokenization algorithm as specified in the WHATWG HTML Standard. \
391
+
The tokenizer converts HTML input into a stream of tokens (DOCTYPE, start tags, end tags, comments, \
392
+
character data, and EOF). Each test specifies input HTML, expected tokens in JSON array format, \
393
+
and the initial tokenizer state. Tests cover normal parsing, RCDATA/RAWTEXT/PLAINTEXT states, \
394
+
script data parsing, CDATA sections, and various error conditions. Some tests are double-escaped \
395
+
to represent special characters. XML violation tests check behavior differences from XML mode.";
396
+
files = List.rev !file_results;
397
+
total_passed = !total_passed;
398
+
total_failed = !total_failed;
399
+
} in
400
+
Report.generate_report report "test_tokenizer_report.html";
367
401
368
402
exit (if !total_failed > 0 then 1 else 0)