+71
-3
README.md
+71
-3
README.md
···
1
-
# html5rw - Pure OCaml HTML5 Parser
1
+
# html5rw - Pure OCaml HTML5 Parser and Conformance Checker
2
2
3
-
A pure OCaml HTML5 parser implementing the WHATWG HTML5 parsing specification.
3
+
A pure OCaml HTML5 parser and validator implementing the WHATWG HTML5 specification.
4
4
This library passes the html5lib-tests suite and provides full support for
5
-
tokenization, tree construction, encoding detection, and CSS selector queries.
5
+
tokenization, tree construction, encoding detection, CSS selector queries, and
6
+
conformance checking.
6
7
This library was ported from [JustHTML](https://github.com/EmilStenstrom/justhtml/).
7
8
8
9
## Key Features
9
10
10
11
- **WHATWG Compliant**: Implements the full HTML5 parsing algorithm with proper error recovery
12
+
- **Conformance Checker**: Validates HTML5 documents against the WHATWG specification
11
13
- **CSS Selectors**: Query the DOM using standard CSS selector syntax
12
14
- **Streaming I/O**: Uses bytesrw for efficient streaming input/output
13
15
- **Encoding Detection**: Automatic character encoding detection following the WHATWG algorithm
14
16
- **Entity Decoding**: Complete HTML5 named character reference support
17
+
- **Multiple Output Formats**: Text, JSON (Nu validator compatible), and GNU-style output
18
+
19
+
## Libraries
20
+
21
+
- `html5rw` - Core HTML5 parser
22
+
- `html5rw.check` - Conformance checker library
23
+
24
+
## Command Line Tool
25
+
26
+
The `html5check` CLI validates HTML5 documents:
27
+
28
+
```bash
29
+
# Validate a file
30
+
html5check index.html
31
+
32
+
# Validate from stdin
33
+
cat page.html | html5check -
34
+
35
+
# JSON output (Nu validator compatible)
36
+
html5check --format=json page.html
37
+
38
+
# GNU-style output for IDE integration
39
+
html5check --format=gnu page.html
40
+
41
+
# Show only errors (suppress warnings)
42
+
html5check --errors-only page.html
43
+
44
+
# Quiet mode - show only counts
45
+
html5check --quiet page.html
46
+
```
47
+
48
+
Exit codes: 0 = valid, 1 = validation errors, 2 = I/O error.
15
49
16
50
## Usage
51
+
52
+
### Parsing HTML
17
53
18
54
```ocaml
19
55
open Bytesrw
···
41
77
let reader = Bytes.Reader.of_string "<p>Fragment content</p>"
42
78
let doc = Html5rw.parse ~fragment_context:ctx reader
43
79
```
80
+
81
+
### Validating HTML
82
+
83
+
```ocaml
84
+
open Bytesrw
85
+
86
+
(* Check HTML from a string *)
87
+
let html = "<html><body><p>Hello</p></body></html>"
88
+
let reader = Bytes.Reader.of_string html
89
+
let result = Htmlrw_check.check reader
90
+
91
+
(* Check for errors *)
92
+
if Htmlrw_check.has_errors result then
93
+
print_endline "Document has errors";
94
+
95
+
(* Get all messages *)
96
+
let messages = Htmlrw_check.messages result in
97
+
List.iter (fun msg ->
98
+
Format.printf "%a@." Htmlrw_check.pp_message msg
99
+
) messages;
100
+
101
+
(* Get formatted output *)
102
+
let text_output = Htmlrw_check.to_text result in
103
+
let json_output = Htmlrw_check.to_json result in
104
+
let gnu_output = Htmlrw_check.to_gnu result
105
+
```
106
+
107
+
The checker validates:
108
+
- Parse errors (malformed HTML syntax)
109
+
- Content model violations (invalid element nesting)
110
+
- Attribute errors (invalid or missing required attributes)
111
+
- Structural issues (other conformance problems)
44
112
45
113
## Installation
46
114
+5
bin/dune
+5
bin/dune
-4
bin/html5check/dune
-4
bin/html5check/dune
-168
bin/html5check/html5check.ml
-168
bin/html5check/html5check.ml
···
1
-
(*---------------------------------------------------------------------------
2
-
Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
3
-
SPDX-License-Identifier: MIT
4
-
---------------------------------------------------------------------------*)
5
-
6
-
(** html5check - HTML5 conformance checker CLI
7
-
8
-
Command line interface for validating HTML5 documents. *)
9
-
10
-
open Cmdliner
11
-
12
-
let version = "0.1.0"
13
-
14
-
(** Exit codes *)
15
-
module Exit_code = struct
16
-
let ok = Cmd.Exit.ok
17
-
let validation_errors = 1
18
-
let io_error = 2
19
-
end
20
-
21
-
(** Read input from file or stdin *)
22
-
let read_input file =
23
-
try
24
-
let ic =
25
-
if file = "-" then stdin
26
-
else open_in file
27
-
in
28
-
let reader = Bytesrw.Bytes.Reader.of_in_channel ic in
29
-
Ok (reader, ic, file)
30
-
with
31
-
| Sys_error msg ->
32
-
Error (`Io_error (Printf.sprintf "Cannot read file '%s': %s" file msg))
33
-
34
-
(** Format output based on the requested format *)
35
-
let format_output format result =
36
-
match format with
37
-
| `Text -> Htmlrw_check.to_text result
38
-
| `Json -> Htmlrw_check.to_json result
39
-
| `Gnu -> Htmlrw_check.to_gnu result
40
-
41
-
(** Run the validation *)
42
-
let run format errors_only exit_zero quiet verbose file =
43
-
match read_input file with
44
-
| Error (`Io_error msg) ->
45
-
if not quiet then Printf.eprintf "Error: %s\n" msg;
46
-
Exit_code.io_error
47
-
| Ok (reader, ic, system_id) ->
48
-
(* Run validation *)
49
-
let result = Htmlrw_check.check ~system_id reader in
50
-
51
-
(* Close input if it's not stdin *)
52
-
if file <> "-" then close_in ic;
53
-
54
-
(* Get messages based on filtering *)
55
-
let messages =
56
-
if errors_only then Htmlrw_check.errors result
57
-
else Htmlrw_check.messages result
58
-
in
59
-
60
-
(* Output based on mode *)
61
-
if quiet then begin
62
-
(* Only show counts *)
63
-
let error_count = List.length (Htmlrw_check.errors result) in
64
-
let warning_count = List.length (Htmlrw_check.warnings result) in
65
-
if errors_only then
66
-
Printf.printf "%d error%s\n" error_count (if error_count = 1 then "" else "s")
67
-
else
68
-
Printf.printf "%d error%s, %d warning%s\n"
69
-
error_count (if error_count = 1 then "" else "s")
70
-
warning_count (if warning_count = 1 then "" else "s")
71
-
end else begin
72
-
(* Format and print messages *)
73
-
let output = format_output format result in
74
-
if output <> "" then print_string output;
75
-
76
-
(* Show summary if verbose *)
77
-
if verbose && messages <> [] then begin
78
-
let error_count = List.length (Htmlrw_check.errors result) in
79
-
let warning_count = List.length (Htmlrw_check.warnings result) in
80
-
Printf.eprintf "\nSummary: %d error%s, %d warning%s\n"
81
-
error_count (if error_count = 1 then "" else "s")
82
-
warning_count (if warning_count = 1 then "" else "s")
83
-
end
84
-
end;
85
-
86
-
(* Determine exit code *)
87
-
if exit_zero || not (Htmlrw_check.has_errors result) then
88
-
Exit_code.ok
89
-
else
90
-
Exit_code.validation_errors
91
-
92
-
(** Command line argument definitions *)
93
-
94
-
let format_arg =
95
-
let formats = [("text", `Text); ("json", `Json); ("gnu", `Gnu)] in
96
-
let doc =
97
-
"Output format. $(docv) must be one of $(b,text) (human-readable, default), \
98
-
$(b,json) (Nu validator compatible JSON), or $(b,gnu) (GNU-style for IDE integration)."
99
-
in
100
-
Arg.(value & opt (enum formats) `Text & info ["format"] ~docv:"FORMAT" ~doc)
101
-
102
-
let errors_only_arg =
103
-
let doc = "Only show errors (suppress warnings)." in
104
-
Arg.(value & flag & info ["errors-only"] ~doc)
105
-
106
-
let exit_zero_arg =
107
-
let doc =
108
-
"Always exit with status code 0, even if validation errors are found. \
109
-
Useful for CI pipelines where you want to collect validation results \
110
-
but not fail the build."
111
-
in
112
-
Arg.(value & flag & info ["exit-zero"] ~doc)
113
-
114
-
let quiet_arg =
115
-
let doc = "Quiet mode - only show error and warning counts, no details." in
116
-
Arg.(value & flag & info ["q"; "quiet"] ~doc)
117
-
118
-
let verbose_arg =
119
-
let doc = "Verbose mode - show additional information including summary." in
120
-
Arg.(value & flag & info ["v"; "verbose"] ~doc)
121
-
122
-
let file_arg =
123
-
let doc =
124
-
"HTML file to validate. Use $(b,-) to read from standard input. \
125
-
If no file is specified, reads from stdin."
126
-
in
127
-
Arg.(value & pos 0 string "-" & info [] ~docv:"FILE" ~doc)
128
-
129
-
let cmd =
130
-
let doc = "validate HTML5 documents for conformance" in
131
-
let man = [
132
-
`S Manpage.s_description;
133
-
`P "$(tname) validates HTML5 documents against the WHATWG HTML5 specification. \
134
-
It reports parse errors, structural validation issues, and conformance problems.";
135
-
`P "The validator checks for:";
136
-
`I ("Parse errors", "Malformed HTML syntax according to the WHATWG specification");
137
-
`I ("Content model violations", "Elements in invalid parent/child relationships");
138
-
`I ("Attribute errors", "Invalid or missing required attributes");
139
-
`I ("Structural issues", "Other conformance problems");
140
-
`S Manpage.s_options;
141
-
`S "OUTPUT FORMATS";
142
-
`P "The validator supports three output formats:";
143
-
`I ("$(b,text)", "Human-readable format showing file:line:col: severity: message");
144
-
`I ("$(b,json)", "JSON format compatible with the Nu Html Checker (v.Nu)");
145
-
`I ("$(b,gnu)", "GNU-style format for IDE integration (file:line:column: message)");
146
-
`S "EXIT STATUS";
147
-
`P "The validator exits with one of the following status codes:";
148
-
`I ("0", "No validation errors found (or --exit-zero was specified)");
149
-
`I ("1", "Validation errors were found");
150
-
`I ("2", "File not found or I/O error");
151
-
`S Manpage.s_examples;
152
-
`P "Validate a file:";
153
-
`Pre " $(mname) index.html";
154
-
`P "Validate from stdin:";
155
-
`Pre " cat page.html | $(mname) -";
156
-
`P "Show only errors in JSON format:";
157
-
`Pre " $(mname) --format=json --errors-only page.html";
158
-
`P "Quiet mode for CI:";
159
-
`Pre " $(mname) --quiet --exit-zero index.html";
160
-
`S Manpage.s_bugs;
161
-
`P "Report bugs at https://tangled.org/@anil.recoil.org/ocaml-html5rw/issues";
162
-
] in
163
-
let info = Cmd.info "html5check" ~version ~doc ~man in
164
-
Cmd.v info Term.(const run $ format_arg $ errors_only_arg $ exit_zero_arg
165
-
$ quiet_arg $ verbose_arg $ file_arg)
166
-
167
-
let main () = Cmd.eval' cmd
168
-
let () = Stdlib.exit (main ())
+167
bin/html5check.ml
+167
bin/html5check.ml
···
1
+
(*---------------------------------------------------------------------------
2
+
Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
3
+
SPDX-License-Identifier: MIT
4
+
---------------------------------------------------------------------------*)
5
+
6
+
(** html5check - HTML5 conformance checker CLI
7
+
8
+
Command line interface for validating HTML5 documents. *)
9
+
10
+
open Cmdliner
11
+
12
+
let version = "0.1.0"
13
+
14
+
(** Exit codes *)
15
+
module Exit_code = struct
16
+
let ok = Cmd.Exit.ok
17
+
let validation_errors = 1
18
+
let io_error = 2
19
+
end
20
+
21
+
(** Read input from file or stdin *)
22
+
let read_input file =
23
+
try
24
+
let ic =
25
+
if file = "-" then stdin
26
+
else open_in file
27
+
in
28
+
let reader = Bytesrw.Bytes.Reader.of_in_channel ic in
29
+
Ok (reader, ic, file)
30
+
with
31
+
| Sys_error msg ->
32
+
Error (`Io_error (Printf.sprintf "Cannot read file '%s': %s" file msg))
33
+
34
+
(** Format output based on the requested format *)
35
+
let format_output format result =
36
+
match format with
37
+
| `Text -> Htmlrw_check.to_text result
38
+
| `Json -> Htmlrw_check.to_json result
39
+
| `Gnu -> Htmlrw_check.to_gnu result
40
+
41
+
(** Run the validation *)
42
+
let run format errors_only exit_zero quiet verbose file =
43
+
match read_input file with
44
+
| Error (`Io_error msg) ->
45
+
if not quiet then Printf.eprintf "Error: %s\n" msg;
46
+
Exit_code.io_error
47
+
| Ok (reader, ic, system_id) ->
48
+
(* Run validation *)
49
+
let result = Htmlrw_check.check ~system_id reader in
50
+
(* Close input if it's not stdin *)
51
+
if file <> "-" then close_in ic;
52
+
53
+
(* Get messages based on filtering *)
54
+
let messages =
55
+
if errors_only then Htmlrw_check.errors result
56
+
else Htmlrw_check.messages result
57
+
in
58
+
59
+
(* Output based on mode *)
60
+
if quiet then begin
61
+
(* Only show counts *)
62
+
let error_count = List.length (Htmlrw_check.errors result) in
63
+
let warning_count = List.length (Htmlrw_check.warnings result) in
64
+
if errors_only then
65
+
Printf.printf "%d error%s\n" error_count (if error_count = 1 then "" else "s")
66
+
else
67
+
Printf.printf "%d error%s, %d warning%s\n"
68
+
error_count (if error_count = 1 then "" else "s")
69
+
warning_count (if warning_count = 1 then "" else "s")
70
+
end else begin
71
+
(* Format and print messages *)
72
+
let output = format_output format result in
73
+
if output <> "" then print_string output;
74
+
75
+
(* Show summary if verbose *)
76
+
if verbose && messages <> [] then begin
77
+
let error_count = List.length (Htmlrw_check.errors result) in
78
+
let warning_count = List.length (Htmlrw_check.warnings result) in
79
+
Printf.eprintf "\nSummary: %d error%s, %d warning%s\n"
80
+
error_count (if error_count = 1 then "" else "s")
81
+
warning_count (if warning_count = 1 then "" else "s")
82
+
end
83
+
end;
84
+
85
+
(* Determine exit code *)
86
+
if exit_zero || not (Htmlrw_check.has_errors result) then
87
+
Exit_code.ok
88
+
else
89
+
Exit_code.validation_errors
90
+
91
+
(** Command line argument definitions *)
92
+
93
+
let format_arg =
94
+
let formats = [("text", `Text); ("json", `Json); ("gnu", `Gnu)] in
95
+
let doc =
96
+
"Output format. $(docv) must be one of $(b,text) (human-readable, default), \
97
+
$(b,json) (Nu validator compatible JSON), or $(b,gnu) (GNU-style for IDE integration)."
98
+
in
99
+
Arg.(value & opt (enum formats) `Text & info ["format"] ~docv:"FORMAT" ~doc)
100
+
101
+
let errors_only_arg =
102
+
let doc = "Only show errors (suppress warnings)." in
103
+
Arg.(value & flag & info ["errors-only"] ~doc)
104
+
105
+
let exit_zero_arg =
106
+
let doc =
107
+
"Always exit with status code 0, even if validation errors are found. \
108
+
Useful for CI pipelines where you want to collect validation results \
109
+
but not fail the build."
110
+
in
111
+
Arg.(value & flag & info ["exit-zero"] ~doc)
112
+
113
+
let quiet_arg =
114
+
let doc = "Quiet mode - only show error and warning counts, no details." in
115
+
Arg.(value & flag & info ["q"; "quiet"] ~doc)
116
+
117
+
let verbose_arg =
118
+
let doc = "Verbose mode - show additional information including summary." in
119
+
Arg.(value & flag & info ["v"; "verbose"] ~doc)
120
+
121
+
let file_arg =
122
+
let doc =
123
+
"HTML file to validate. Use $(b,-) to read from standard input. \
124
+
If no file is specified, reads from stdin."
125
+
in
126
+
Arg.(value & pos 0 string "-" & info [] ~docv:"FILE" ~doc)
127
+
128
+
let cmd =
129
+
let doc = "validate HTML5 documents for conformance" in
130
+
let man = [
131
+
`S Manpage.s_description;
132
+
`P "$(tname) validates HTML5 documents against the WHATWG HTML5 specification. \
133
+
It reports parse errors, structural validation issues, and conformance problems.";
134
+
`P "The validator checks for:";
135
+
`I ("Parse errors", "Malformed HTML syntax according to the WHATWG specification");
136
+
`I ("Content model violations", "Elements in invalid parent/child relationships");
137
+
`I ("Attribute errors", "Invalid or missing required attributes");
138
+
`I ("Structural issues", "Other conformance problems");
139
+
`S Manpage.s_options;
140
+
`S "OUTPUT FORMATS";
141
+
`P "The validator supports three output formats:";
142
+
`I ("$(b,text)", "Human-readable format showing file:line:col: severity: message");
143
+
`I ("$(b,json)", "JSON format compatible with the Nu Html Checker (v.Nu)");
144
+
`I ("$(b,gnu)", "GNU-style format for IDE integration (file:line:column: message)");
145
+
`S "EXIT STATUS";
146
+
`P "The validator exits with one of the following status codes:";
147
+
`I ("0", "No validation errors found (or --exit-zero was specified)");
148
+
`I ("1", "Validation errors were found");
149
+
`I ("2", "File not found or I/O error");
150
+
`S Manpage.s_examples;
151
+
`P "Validate a file:";
152
+
`Pre " $(mname) index.html";
153
+
`P "Validate from stdin:";
154
+
`Pre " cat page.html | $(mname) -";
155
+
`P "Show only errors in JSON format:";
156
+
`Pre " $(mname) --format=json --errors-only page.html";
157
+
`P "Quiet mode for CI:";
158
+
`Pre " $(mname) --quiet --exit-zero index.html";
159
+
`S Manpage.s_bugs;
160
+
`P "Report bugs at https://tangled.org/@anil.recoil.org/ocaml-html5rw/issues";
161
+
] in
162
+
let info = Cmd.info "html5check" ~version ~doc ~man in
163
+
Cmd.v info Term.(const run $ format_arg $ errors_only_arg $ exit_zero_arg
164
+
$ quiet_arg $ verbose_arg $ file_arg)
165
+
166
+
let main () = Cmd.eval' cmd
167
+
let () = Stdlib.exit (main ())
+16
dune-project
+16
dune-project
···
26
26
(uuuu (>= 0.3.0))
27
27
(uunf (>= 15.0.0))
28
28
(xmlm (>= 1.4.0))
29
+
langdetect
29
30
(odoc :with-doc)
30
31
(jsont (>= 0.2.0))
31
32
(cmdliner (>= 1.3.0))))
33
+
34
+
(package
35
+
(name html5rw-js)
36
+
(synopsis "Browser-based HTML5 parser via js_of_ocaml/wasm_of_ocaml")
37
+
(description
38
+
"JavaScript and WebAssembly builds of the html5rw HTML5 parser for browser use. \
39
+
Includes a main validator library, web worker for background validation, and \
40
+
browser-based test runner.")
41
+
(depends
42
+
(ocaml (>= 5.1.0))
43
+
(html5rw (= :version))
44
+
(js_of_ocaml (>= 5.0))
45
+
(js_of_ocaml-ppx (>= 5.0))
46
+
(wasm_of_ocaml-compiler (>= 5.0))
47
+
(brr (>= 0.0.6))))
+35
html5rw-js.opam
+35
html5rw-js.opam
···
1
+
# This file is generated by dune, edit dune-project instead
2
+
opam-version: "2.0"
3
+
synopsis: "Browser-based HTML5 parser via js_of_ocaml/wasm_of_ocaml"
4
+
description:
5
+
"JavaScript and WebAssembly builds of the html5rw HTML5 parser for browser use. Includes a main validator library, web worker for background validation, and browser-based test runner."
6
+
maintainer: ["Anil Madhavapeddy <anil@recoil.org>"]
7
+
authors: ["Anil Madhavapeddy <anil@recoil.org>"]
8
+
license: "MIT"
9
+
homepage: "https://tangled.org/@anil.recoil.org/ocaml-html5rw"
10
+
bug-reports: "https://tangled.org/@anil.recoil.org/ocaml-html5rw/issues"
11
+
depends: [
12
+
"dune" {>= "3.20"}
13
+
"ocaml" {>= "5.1.0"}
14
+
"html5rw" {= version}
15
+
"js_of_ocaml" {>= "5.0"}
16
+
"js_of_ocaml-ppx" {>= "5.0"}
17
+
"wasm_of_ocaml-compiler" {>= "5.0"}
18
+
"brr" {>= "0.0.6"}
19
+
"odoc" {with-doc}
20
+
]
21
+
build: [
22
+
["dune" "subst"] {dev}
23
+
[
24
+
"dune"
25
+
"build"
26
+
"-p"
27
+
name
28
+
"-j"
29
+
jobs
30
+
"@install"
31
+
"@runtest" {with-test}
32
+
"@doc" {with-doc}
33
+
]
34
+
]
35
+
x-maintenance-intent: ["(latest)"]
+1
html5rw.opam
+1
html5rw.opam
+2
-2
lib/check/attr_utils.ml
+2
-2
lib/check/attr_utils.ml
···
3
3
type attrs = (string * string) list
4
4
5
5
let has_attr name attrs =
6
-
List.exists (fun (n, _) -> String.lowercase_ascii n = name) attrs
6
+
List.exists (fun (n, _) -> Astring.String.Ascii.lowercase n = name) attrs
7
7
8
8
let get_attr name attrs =
9
9
List.find_map (fun (n, v) ->
10
-
if String.lowercase_ascii n = name then Some v else None
10
+
if Astring.String.Ascii.lowercase n = name then Some v else None
11
11
) attrs
12
12
13
13
let get_attr_or name ~default attrs =
+2
-1
lib/check/checker_registry.ml
+2
-1
lib/check/checker_registry.ml
···
22
22
Hashtbl.replace reg "source" Source_checker.checker;
23
23
Hashtbl.replace reg "label" Label_checker.checker;
24
24
Hashtbl.replace reg "ruby" Ruby_checker.checker;
25
-
Hashtbl.replace reg "h1" H1_checker.checker;
25
+
Hashtbl.replace reg "heading" Heading_checker.checker;
26
26
Hashtbl.replace reg "srcset-sizes" Srcset_sizes_checker.checker;
27
27
Hashtbl.replace reg "autofocus" Autofocus_checker.checker;
28
28
Hashtbl.replace reg "option" Option_checker.checker;
···
36
36
Hashtbl.replace reg "xhtml-content" Xhtml_content_checker.checker;
37
37
Hashtbl.replace reg "lang-detecting" Lang_detecting_checker.checker;
38
38
Hashtbl.replace reg "unknown-element" Unknown_element_checker.checker;
39
+
Hashtbl.replace reg "content" Content_checker.checker;
39
40
reg
40
41
41
42
let register registry name checker = Hashtbl.replace registry name checker
+64
-7
lib/check/content_model/content_checker.ml
+64
-7
lib/check/content_model/content_checker.ml
···
30
30
| Some spec ->
31
31
List.exists (fun cat -> Element_spec.has_category spec cat) cats)
32
32
| Content_model.Elements names ->
33
-
List.mem (String.lowercase_ascii element_name)
34
-
(List.map String.lowercase_ascii names)
33
+
List.mem (Astring.String.Ascii.lowercase element_name)
34
+
(List.map Astring.String.Ascii.lowercase names)
35
35
| Content_model.Mixed cats -> (
36
36
match Element_registry.get registry element_name with
37
37
| None -> false
···
79
79
(`Element (`Not_allowed_as_child (`Child name, `Parent prohibited))))
80
80
spec.Element_spec.prohibited_ancestors
81
81
82
+
(* Check if element is allowed via permitted_parents *)
83
+
let is_permitted_parent registry child_name parent_name =
84
+
match Element_registry.get registry child_name with
85
+
| None -> false
86
+
| Some spec ->
87
+
match spec.Element_spec.permitted_parents with
88
+
| None -> false
89
+
| Some parents ->
90
+
List.mem (Astring.String.Ascii.lowercase parent_name)
91
+
(List.map Astring.String.Ascii.lowercase parents)
92
+
93
+
(* Check if a specific element is in the ancestor stack *)
94
+
let has_ancestor state ancestor_name =
95
+
List.exists (fun ctx ->
96
+
String.equal (Astring.String.Ascii.lowercase ctx.name)
97
+
(Astring.String.Ascii.lowercase ancestor_name)
98
+
) state.ancestor_stack
99
+
100
+
(* Check if an attribute exists in raw attrs list *)
101
+
let has_raw_attr name attrs =
102
+
List.exists (fun (n, _) ->
103
+
Astring.String.Ascii.lowercase n = Astring.String.Ascii.lowercase name
104
+
) attrs
105
+
106
+
(* Special cases for content model validation:
107
+
- dt/dd inside div is only valid when dl is an ancestor (div as grouping in dl)
108
+
- meta with property/itemprop/name attribute in body is valid (RDFa/microdata)
109
+
- link with itemprop in body is valid (microdata) *)
110
+
let is_special_case_allowed state child_name parent_name raw_attrs =
111
+
let child_lower = Astring.String.Ascii.lowercase child_name in
112
+
let parent_lower = Astring.String.Ascii.lowercase parent_name in
113
+
(* dt/dd inside div is allowed when dl is an ancestor *)
114
+
if (child_lower = "dt" || child_lower = "dd") && parent_lower = "div" then
115
+
has_ancestor state "dl"
116
+
(* meta in body is allowed with property (RDFa), itemprop (microdata), or name+content (meta tags) *)
117
+
else if child_lower = "meta" && parent_lower <> "head" then
118
+
has_raw_attr "property" raw_attrs ||
119
+
has_raw_attr "itemprop" raw_attrs ||
120
+
(has_raw_attr "name" raw_attrs && has_raw_attr "content" raw_attrs)
121
+
(* link in body is allowed with itemprop (microdata) or property (RDFa) *)
122
+
else if child_lower = "link" && parent_lower <> "head" then
123
+
has_raw_attr "itemprop" raw_attrs || has_raw_attr "property" raw_attrs
124
+
(* Custom elements (with hyphen) are valid HTML5 and are flow content *)
125
+
else if String.contains child_lower '-' then
126
+
true
127
+
else
128
+
false
129
+
82
130
(* Validate that a child element is allowed *)
83
-
let validate_child_element state child_name collector =
131
+
let validate_child_element state child_name raw_attrs collector =
84
132
match state.ancestor_stack with
85
133
| [] ->
86
134
(* Root level - only html allowed *)
87
-
if not (String.equal (String.lowercase_ascii child_name) "html") then
135
+
if not (String.equal (Astring.String.Ascii.lowercase child_name) "html") then
88
136
Message_collector.add_typed collector
89
137
(`Generic (Printf.sprintf "Element '%s' not allowed at document root (only 'html' allowed)" child_name))
90
138
| parent :: _ ->
91
139
let content_model = parent.spec.Element_spec.content_model in
92
-
if not (matches_content_model state.registry child_name content_model) then
140
+
(* Check content model, permitted_parents, or special cases *)
141
+
let allowed_by_content_model = matches_content_model state.registry child_name content_model in
142
+
let allowed_by_permitted_parents = is_permitted_parent state.registry child_name parent.name in
143
+
let allowed_by_special_case = is_special_case_allowed state child_name parent.name raw_attrs in
144
+
if not (allowed_by_content_model || allowed_by_permitted_parents || allowed_by_special_case) then
93
145
Message_collector.add_typed collector
94
146
(`Element (`Not_allowed_as_child (`Child child_name, `Parent parent.name)))
95
147
96
148
let start_element state ~element collector =
97
149
let name = Tag.tag_to_string element.Element.tag in
150
+
let raw_attrs = element.Element.raw_attrs in
98
151
99
152
(* Check if we're inside a foreign (SVG/MathML) context *)
100
153
let in_foreign_context = match state.ancestor_stack with
···
127
180
match spec_opt with
128
181
| None ->
129
182
(* Unknown element - first check if it's allowed in current context *)
130
-
validate_child_element state name collector
183
+
validate_child_element state name raw_attrs collector;
184
+
(* Push unknown element onto stack with default flow content model *)
185
+
let spec = Element_spec.make ~name ~content_model:(Content_model.Categories [Content_category.Flow]) () in
186
+
let context = { name; spec; children_count = 0; is_foreign = false } in
187
+
state.ancestor_stack <- context :: state.ancestor_stack
131
188
| Some spec ->
132
189
(* Check prohibited ancestors *)
133
190
check_prohibited_ancestors state name spec collector;
134
191
135
192
(* Validate this element is allowed as child of parent *)
136
-
validate_child_element state name collector;
193
+
validate_child_element state name raw_attrs collector;
137
194
138
195
(* Push element context onto stack *)
139
196
let context = { name; spec; children_count = 0; is_foreign = false } in
+2
-2
lib/check/content_model/element_registry.ml
+2
-2
lib/check/content_model/element_registry.ml
···
3
3
let create () = Hashtbl.create 128
4
4
5
5
let register registry spec =
6
-
let name = String.lowercase_ascii spec.Element_spec.name in
6
+
let name = Astring.String.Ascii.lowercase spec.Element_spec.name in
7
7
Hashtbl.replace registry name spec
8
8
9
9
let get registry name =
10
-
let name = String.lowercase_ascii name in
10
+
let name = Astring.String.Ascii.lowercase name in
11
11
Hashtbl.find_opt registry name
12
12
13
13
let list_names registry =
+3
-1
lib/check/content_model/elements_embedded.ml
+3
-1
lib/check/content_model/elements_embedded.ml
···
31
31
()
32
32
33
33
let img =
34
+
(* Note: img is only Interactive when it has usemap attribute;
35
+
we omit Interactive from static categories since usemap is rare *)
34
36
Element_spec.make ~name:"img" ~void:true
35
-
~categories:[ Flow; Phrasing; Embedded; Palpable; Interactive ]
37
+
~categories:[ Flow; Phrasing; Embedded; Palpable ]
36
38
~content_model:Nothing
37
39
~attrs:
38
40
[
+1
-1
lib/check/content_model/elements_form.ml
+1
-1
lib/check/content_model/elements_form.ml
···
97
97
let select =
98
98
Element_spec.make ~name:"select"
99
99
~categories:[Flow; Phrasing; Interactive; Palpable]
100
-
~content_model:(Elements ["option"; "optgroup"; "script"; "template"])
100
+
~content_model:(Elements ["option"; "optgroup"; "hr"; "script"; "template"])
101
101
~attrs:[
102
102
Attr_spec.make "autocomplete" ~datatype:"autocomplete" ();
103
103
Attr_spec.make "disabled" ~datatype:"boolean" ();
-1
lib/check/content_model/elements_table.ml
-1
lib/check/content_model/elements_table.ml
+28
-6
lib/check/datatype/datatype.ml
+28
-6
lib/check/datatype/datatype.ml
···
12
12
13
13
(* Helper utilities *)
14
14
15
+
(** Character predicates *)
16
+
15
17
let is_whitespace = function
16
18
| ' ' | '\t' | '\n' | '\r' | '\012' (* FF *) -> true
17
19
| _ -> false
18
20
19
21
let is_ascii_digit = function '0' .. '9' -> true | _ -> false
20
22
21
-
let to_ascii_lowercase c =
22
-
match c with 'A' .. 'Z' -> Char.chr (Char.code c + 32) | _ -> c
23
+
let is_lower_alpha = function 'a' .. 'z' -> true | _ -> false
24
+
25
+
let is_upper_alpha = function 'A' .. 'Z' -> true | _ -> false
26
+
27
+
let is_alpha c = is_lower_alpha c || is_upper_alpha c
28
+
29
+
let is_alphanumeric c = is_alpha c || is_ascii_digit c
23
30
24
-
let string_to_ascii_lowercase s =
25
-
String.map to_ascii_lowercase s
31
+
let is_hex_digit = function
32
+
| '0' .. '9' | 'a' .. 'f' | 'A' .. 'F' -> true
33
+
| _ -> false
34
+
35
+
(** Case conversion - delegated to Astring *)
36
+
37
+
(* Removed to_ascii_lowercase and string_to_ascii_lowercase - use Astring.String.Ascii.lowercase instead *)
38
+
39
+
(** String predicates *)
40
+
41
+
let is_non_empty s = String.trim s <> ""
42
+
43
+
let is_all_digits s = String.length s > 0 && String.for_all is_ascii_digit s
44
+
45
+
let is_all_alpha s = String.length s > 0 && String.for_all is_alpha s
46
+
47
+
let is_all_alphanumeric s = String.length s > 0 && String.for_all is_alphanumeric s
26
48
27
49
let trim_html_spaces s =
28
50
let len = String.length s in
···
52
74
let make_enum ~name ~values ?(allow_empty = true) () : t =
53
75
(* Pre-compute hashtable for O(1) membership *)
54
76
let values_tbl = Hashtbl.create (List.length values) in
55
-
List.iter (fun v -> Hashtbl.add values_tbl (String.lowercase_ascii v) ()) values;
77
+
List.iter (fun v -> Hashtbl.add values_tbl (Astring.String.Ascii.lowercase v) ()) values;
56
78
let values_str = String.concat ", " (List.map (Printf.sprintf "'%s'") values) in
57
79
(module struct
58
80
let name = name
59
81
let validate s =
60
-
let s_lower = string_to_ascii_lowercase s in
82
+
let s_lower = Astring.String.Ascii.lowercase s in
61
83
if (allow_empty && s = "") || Hashtbl.mem values_tbl s_lower then Ok ()
62
84
else Error (Printf.sprintf "The value '%s' is not a valid %s value. Expected %s%s."
63
85
s name (if allow_empty then "empty string, " else "") values_str)
+36
-5
lib/check/datatype/datatype.mli
+36
-5
lib/check/datatype/datatype.mli
···
27
27
(** Check if a value is valid *)
28
28
val is_valid : t -> string -> bool
29
29
30
-
(** Helper utilities for implementing datatype validators. *)
30
+
(** {1 Helper utilities for implementing datatype validators} *)
31
+
32
+
(** {2 Character predicates} *)
31
33
32
34
(** Check if a character is HTML5 whitespace (space, tab, LF, FF, or CR). *)
33
35
val is_whitespace : char -> bool
···
35
37
(** Check if a character is an ASCII digit (0-9). *)
36
38
val is_ascii_digit : char -> bool
37
39
38
-
(** Convert an ASCII character to lowercase. *)
39
-
val to_ascii_lowercase : char -> char
40
+
(** Check if a character is a lowercase ASCII letter (a-z). *)
41
+
val is_lower_alpha : char -> bool
42
+
43
+
(** Check if a character is an uppercase ASCII letter (A-Z). *)
44
+
val is_upper_alpha : char -> bool
45
+
46
+
(** Check if a character is an ASCII letter (a-z or A-Z). *)
47
+
val is_alpha : char -> bool
48
+
49
+
(** Check if a character is an ASCII letter or digit. *)
50
+
val is_alphanumeric : char -> bool
51
+
52
+
(** Check if a character is a hexadecimal digit (0-9, a-f, A-F). *)
53
+
val is_hex_digit : char -> bool
54
+
55
+
(** {2 Case conversion} *)
56
+
57
+
(** Case conversion functions removed - use Astring.String.Ascii.lowercase instead *)
40
58
41
-
(** Convert an ASCII string to lowercase. *)
42
-
val string_to_ascii_lowercase : string -> string
59
+
(** {2 String predicates} *)
60
+
61
+
(** Check if a string has non-whitespace content after trimming. *)
62
+
val is_non_empty : string -> bool
63
+
64
+
(** Check if all characters in a non-empty string are ASCII digits. *)
65
+
val is_all_digits : string -> bool
66
+
67
+
(** Check if all characters in a non-empty string are ASCII letters. *)
68
+
val is_all_alpha : string -> bool
69
+
70
+
(** Check if all characters in a non-empty string are ASCII letters or digits. *)
71
+
val is_all_alphanumeric : string -> bool
72
+
73
+
(** {2 String manipulation} *)
43
74
44
75
(** Trim HTML5 whitespace from both ends of a string. *)
45
76
val trim_html_spaces : string -> string
+8
-9
lib/check/datatype/dt_autocomplete.ml
+8
-9
lib/check/datatype/dt_autocomplete.ml
···
2
2
3
3
(* Use shared utilities from Datatype *)
4
4
let is_whitespace = Datatype.is_whitespace
5
-
let to_ascii_lowercase = Datatype.to_ascii_lowercase
5
+
let to_ascii_lowercase c =
6
+
match c with 'A' .. 'Z' -> Char.chr (Char.code c + 32) | _ -> c
7
+
8
+
(* Use Astring for string operations *)
9
+
let is_prefix = Astring.String.is_prefix
6
10
7
11
(** Trim whitespace from string and collapse internal whitespace *)
8
12
let trim_whitespace s =
···
104
108
(** Split string on whitespace - uses shared utility *)
105
109
let split_on_whitespace = Datatype.split_on_whitespace
106
110
107
-
(** Check if string starts with prefix *)
108
-
let starts_with s prefix =
109
-
String.length s >= String.length prefix
110
-
&& String.sub s 0 (String.length prefix) = prefix
111
-
112
111
(** Validate detail tokens *)
113
112
let check_tokens tokens =
114
113
let tokens = ref tokens in
···
116
115
117
116
(* Check for section-* *)
118
117
(match !tokens with
119
-
| token :: rest when starts_with token "section-" ->
118
+
| token :: rest when is_prefix ~affix:"section-" token ->
120
119
tokens := rest
121
120
| _ -> ());
122
121
···
145
144
146
145
(* Check if any token in the list is a section-* indicator *)
147
146
let find_section tokens =
148
-
List.find_opt (fun t -> starts_with t "section-") tokens
147
+
List.find_opt (fun t -> is_prefix ~affix:"section-" t) tokens
149
148
in
150
149
151
150
(* Check if webauthn appears anywhere except as the very last token *)
···
207
206
(Printf.sprintf
208
207
"The token \"%s\" must only appear before any autofill field names."
209
208
token)
210
-
| token :: _ when starts_with token "section-" ->
209
+
| token :: _ when is_prefix ~affix:"section-" token ->
211
210
Error
212
211
"A \"section-*\" indicator must only appear as the first token in a \
213
212
list of autofill detail tokens."
+2
-2
lib/check/datatype/dt_boolean.ml
+2
-2
lib/check/datatype/dt_boolean.ml
···
22
22
match s with
23
23
| "" | "true" | "false" -> Ok ()
24
24
| _ ->
25
-
let s_lower = Datatype.string_to_ascii_lowercase s in
26
-
let attr_lower = Datatype.string_to_ascii_lowercase attr_name in
25
+
let s_lower = Astring.String.Ascii.lowercase s in
26
+
let attr_lower = Astring.String.Ascii.lowercase attr_name in
27
27
if s_lower = attr_lower then Ok ()
28
28
else
29
29
Error
+3
-6
lib/check/datatype/dt_charset.ml
+3
-6
lib/check/datatype/dt_charset.ml
···
1
1
(** Helper functions for charset validation *)
2
2
3
3
let is_valid_charset_char c =
4
-
(c >= '0' && c <= '9') ||
5
-
(c >= 'a' && c <= 'z') ||
6
-
(c >= 'A' && c <= 'Z') ||
4
+
Datatype.is_alphanumeric c ||
7
5
c = '-' || c = '!' || c = '#' || c = '$' || c = '%' || c = '&' ||
8
6
c = '\'' || c = '+' || c = '_' || c = '`' || c = '{' || c = '}' ||
9
7
c = '~' || c = '^'
10
8
11
-
let to_lower s = String.lowercase_ascii s
9
+
let to_lower = Astring.String.Ascii.lowercase
12
10
13
11
(** Common encoding labels recognized by WHATWG Encoding Standard.
14
12
This is a subset of the full list. *)
···
74
72
module Meta_charset = struct
75
73
let name = "legacy character encoding declaration"
76
74
77
-
let is_whitespace c =
78
-
c = ' ' || c = '\t' || c = '\n' || c = '\012' || c = '\r'
75
+
let is_whitespace = Datatype.is_whitespace
79
76
80
77
let validate s =
81
78
let lower = to_lower s in
+2
-3
lib/check/datatype/dt_color.ml
+2
-3
lib/check/datatype/dt_color.ml
···
154
154
]
155
155
156
156
(** Check if character is hex digit *)
157
-
let is_hex_digit c =
158
-
(c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F')
157
+
let is_hex_digit = Datatype.is_hex_digit
159
158
160
159
(** Validate hex color (#RGB or #RRGGBB) *)
161
160
let validate_hex_color s =
···
209
208
let name = "color"
210
209
211
210
let validate s =
212
-
let s = String.trim s |> String.lowercase_ascii in
211
+
let s = String.trim s |> Astring.String.Ascii.lowercase in
213
212
if String.length s = 0 then Error "Color value must not be empty"
214
213
else if List.mem s named_colors then Ok ()
215
214
else if String.length s > 0 && s.[0] = '#' then validate_hex_color s
+1
-1
lib/check/datatype/dt_contenteditable.ml
+1
-1
lib/check/datatype/dt_contenteditable.ml
+1
-1
lib/check/datatype/dt_crossorigin.ml
+1
-1
lib/check/datatype/dt_crossorigin.ml
+1
-4
lib/check/datatype/dt_datetime.ml
+1
-4
lib/check/datatype/dt_datetime.ml
+1
-1
lib/check/datatype/dt_decoding.ml
+1
-1
lib/check/datatype/dt_decoding.ml
+1
-1
lib/check/datatype/dt_dir.ml
+1
-1
lib/check/datatype/dt_dir.ml
+1
-1
lib/check/datatype/dt_draggable.ml
+1
-1
lib/check/datatype/dt_draggable.ml
+1
-3
lib/check/datatype/dt_email.ml
+1
-3
lib/check/datatype/dt_email.ml
···
2
2
3
3
(** Helper to check if a character is valid in email local/domain parts *)
4
4
let is_email_char c =
5
-
(c >= 'a' && c <= 'z')
6
-
|| (c >= 'A' && c <= 'Z')
7
-
|| (c >= '0' && c <= '9')
5
+
Datatype.is_alphanumeric c
8
6
|| c = '.' || c = '-' || c = '_' || c = '+' || c = '='
9
7
10
8
(** Validate a single email address using simplified rules *)
+3
-3
lib/check/datatype/dt_email.mli
+3
-3
lib/check/datatype/dt_email.mli
···
6
6
(** Valid email address validator.
7
7
8
8
Validates a single email address. Uses simplified validation rules:
9
-
- Must contain exactly one '@' character
10
-
- Local part (before @) must be non-empty
11
-
- Domain part (after @) must be non-empty and contain at least one '.'
9
+
- Must contain exactly one ['@'] character
10
+
- Local part (before ['@']) must be non-empty
11
+
- Domain part (after ['@']) must be non-empty and contain at least one ['.']
12
12
- Only ASCII characters allowed *)
13
13
module Email : Datatype.S
14
14
+1
-1
lib/check/datatype/dt_enterkeyhint.ml
+1
-1
lib/check/datatype/dt_enterkeyhint.ml
+1
-1
lib/check/datatype/dt_fetchpriority.ml
+1
-1
lib/check/datatype/dt_fetchpriority.ml
+1
-1
lib/check/datatype/dt_form_enctype.ml
+1
-1
lib/check/datatype/dt_form_enctype.ml
+1
-1
lib/check/datatype/dt_form_method.ml
+1
-1
lib/check/datatype/dt_form_method.ml
+1
-1
lib/check/datatype/dt_input_type.ml
+1
-1
lib/check/datatype/dt_input_type.ml
+1
-1
lib/check/datatype/dt_inputmode.ml
+1
-1
lib/check/datatype/dt_inputmode.ml
+1
-1
lib/check/datatype/dt_integrity.ml
+1
-1
lib/check/datatype/dt_integrity.ml
···
49
49
"Hash value '%s' must be in format 'algorithm-base64hash'" trimmed)
50
50
| Some dash_pos ->
51
51
let algorithm = String.sub trimmed 0 dash_pos in
52
-
let algorithm_lower = Datatype.string_to_ascii_lowercase algorithm in
52
+
let algorithm_lower = Astring.String.Ascii.lowercase algorithm in
53
53
if not (List.mem algorithm_lower valid_algorithms) then
54
54
Error
55
55
(Printf.sprintf
+1
-1
lib/check/datatype/dt_kind.ml
+1
-1
lib/check/datatype/dt_kind.ml
+4
-17
lib/check/datatype/dt_language.ml
+4
-17
lib/check/datatype/dt_language.ml
···
2
2
3
3
let q = Error_code.q
4
4
5
-
let is_lower_alpha c = c >= 'a' && c <= 'z'
6
-
let is_upper_alpha c = c >= 'A' && c <= 'Z'
7
-
let is_alpha c = is_lower_alpha c || is_upper_alpha c
8
-
let is_digit c = c >= '0' && c <= '9'
9
-
let is_alphanumeric c = is_alpha c || is_digit c
10
-
11
-
let is_all_alpha s =
12
-
String.for_all is_alpha s
13
-
14
-
let _is_all_digits s =
15
-
String.for_all is_digit s
16
-
17
-
let is_all_alphanumeric s =
18
-
String.for_all is_alphanumeric s
19
-
20
-
let to_lower s =
21
-
String.lowercase_ascii s
5
+
(* Use shared character predicates from Datatype *)
6
+
let is_all_alpha = Datatype.is_all_alpha
7
+
let is_all_alphanumeric = Datatype.is_all_alphanumeric
8
+
let to_lower = Astring.String.Ascii.lowercase
22
9
23
10
(** Valid extlang subtags per IANA language-subtag-registry.
24
11
Extlangs are 3-letter subtags that follow the primary language.
+1
-1
lib/check/datatype/dt_link_type.ml
+1
-1
lib/check/datatype/dt_link_type.ml
···
35
35
let trimmed = Datatype.trim_html_spaces s in
36
36
if trimmed = "" then Error "Link type must not be empty"
37
37
else
38
-
let lower = Datatype.string_to_ascii_lowercase trimmed in
38
+
let lower = Astring.String.Ascii.lowercase trimmed in
39
39
if List.mem lower valid_link_types then Ok ()
40
40
else
41
41
Error
+1
-1
lib/check/datatype/dt_list_type.ml
+1
-1
lib/check/datatype/dt_list_type.ml
+1
-1
lib/check/datatype/dt_loading.ml
+1
-1
lib/check/datatype/dt_loading.ml
+44
-25
lib/check/datatype/dt_media_query.ml
+44
-25
lib/check/datatype/dt_media_query.ml
···
70
70
(** Media query keywords (unused but kept for documentation) *)
71
71
let _media_keywords = [ "and"; "not"; "only" ]
72
72
73
-
(** Check if character is whitespace *)
74
-
let is_whitespace c = c = ' ' || c = '\t' || c = '\n' || c = '\r'
73
+
let is_whitespace = Datatype.is_whitespace
75
74
76
75
(** Check if character can start an identifier *)
77
76
let is_ident_start c =
78
-
(c >= 'a' && c <= 'z')
79
-
|| (c >= 'A' && c <= 'Z')
80
-
|| c = '_' || c = '-' || Char.code c >= 128
77
+
Datatype.is_alpha c || c = '_' || c = '-' || Char.code c >= 128
81
78
82
79
(** Check if character can be in an identifier *)
83
80
let is_ident_char c =
84
-
is_ident_start c || (c >= '0' && c <= '9')
81
+
is_ident_start c || Datatype.is_ascii_digit c
82
+
83
+
(** Unicode case folding for case-insensitive comparison.
84
+
85
+
WORKAROUND: This is a temporary domain-specific implementation because
86
+
the uucp library fails to compile with wasm_of_ocaml due to "too many
87
+
locals" errors. Once uucp supports WASM, restore the proper implementation:
88
+
89
+
{[
90
+
(* Proper uucp-based case folding: *)
91
+
let case_fold s =
92
+
let buf = Buffer.create (String.length s) in
93
+
let add_uchar u = Uutf.Buffer.add_utf_8 buf u in
94
+
let fold_char () _pos = function
95
+
| `Malformed _ -> ()
96
+
| `Uchar u ->
97
+
match Uucp.Case.Fold.fold u with
98
+
| `Self -> add_uchar u
99
+
| `Uchars us -> List.iter add_uchar us
100
+
in
101
+
Uutf.String.fold_utf_8 fold_char () s;
102
+
Buffer.contents buf
103
+
]}
85
104
86
-
(** Unicode case-fold for Turkish dotted-I etc *)
87
-
let lowercase_unicode s =
88
-
(* Handle special case: U+0130 LATIN CAPITAL LETTER I WITH DOT ABOVE -> i *)
105
+
This workaround handles the Turkish dotted-I (U+0130 -> 'i' + U+0307)
106
+
which is the main non-ASCII case relevant for CSS media query identifiers. *)
107
+
let case_fold s =
89
108
let buf = Buffer.create (String.length s) in
109
+
let len = String.length s in
90
110
let i = ref 0 in
91
-
while !i < String.length s do
111
+
while !i < len do
92
112
let c = s.[!i] in
93
-
if c = '\xc4' && !i + 1 < String.length s && s.[!i + 1] = '\xb0' then begin
94
-
(* U+0130 -> 'i' + U+0307 (combining dot above), but for simplicity just 'i' followed by U+0307 *)
113
+
(* U+0130 LATIN CAPITAL LETTER I WITH DOT ABOVE encoded as UTF-8: 0xC4 0xB0 *)
114
+
if c = '\xc4' && !i + 1 < len && s.[!i + 1] = '\xb0' then begin
115
+
(* Case fold to 'i' + U+0307 (combining dot above) = 0x69 0xCC 0x87 *)
95
116
Buffer.add_string buf "i\xcc\x87";
96
117
i := !i + 2
97
118
end else begin
···
151
172
let trimmed = String.trim s in
152
173
if String.length trimmed >= 3 then begin
153
174
let suffix = String.sub trimmed (String.length trimmed - 3) 3 in
154
-
if String.lowercase_ascii suffix = "and" then
175
+
if Astring.String.Ascii.lowercase suffix = "and" then
155
176
Error "Parse Error."
156
177
else if String.length trimmed >= 4 then begin
157
178
let suffix4 = String.sub trimmed (String.length trimmed - 4) 4 in
158
-
if String.lowercase_ascii suffix4 = "and(" then
179
+
if Astring.String.Ascii.lowercase suffix4 = "and(" then
159
180
Error "Parse Error."
160
181
else
161
182
validate_media_query_content trimmed
···
201
222
let has_not = ref false in
202
223
(match read_ident () with
203
224
| Some w ->
204
-
let w_lower = String.lowercase_ascii w in
225
+
let w_lower = Astring.String.Ascii.lowercase w in
205
226
if w_lower = "only" then (has_only := true; skip_ws ())
206
227
else if w_lower = "not" then (has_not := true; skip_ws ())
207
228
else i := !i - String.length w (* put back *)
···
222
243
match read_ident () with
223
244
| None -> Error "Parse Error."
224
245
| Some media_type ->
225
-
let mt_lower = lowercase_unicode media_type in
246
+
let mt_lower = case_fold media_type in
226
247
(* Check for deprecated media type *)
227
248
if List.mem mt_lower deprecated_media_types then
228
249
Error (Printf.sprintf "The media \"%s\" has been deprecated" mt_lower)
···
238
259
match read_ident () with
239
260
| None -> Error "Parse Error."
240
261
| Some kw ->
241
-
let kw_lower = String.lowercase_ascii kw in
262
+
let kw_lower = Astring.String.Ascii.lowercase kw in
242
263
if kw_lower <> "and" then Error "Parse Error."
243
264
else begin
244
265
(* Check that there was whitespace before 'and' *)
···
267
288
match read_ident () with
268
289
| None -> Error "Parse Error."
269
290
| Some kw2 ->
270
-
let kw2_lower = String.lowercase_ascii kw2 in
291
+
let kw2_lower = Astring.String.Ascii.lowercase kw2 in
271
292
if kw2_lower <> "and" then Error "Parse Error."
272
293
else begin
273
294
skip_ws ();
···
295
316
match String.index_opt content ':' with
296
317
| None ->
297
318
(* Just feature name - boolean feature or range syntax *)
298
-
let feature_lower = String.lowercase_ascii content in
319
+
let feature_lower = Astring.String.Ascii.lowercase content in
299
320
if List.mem feature_lower deprecated_media_features then
300
321
Error (Printf.sprintf "Deprecated media feature \"%s\". For guidance, see the Deprecated Media Features section in the current Media Queries specification." feature_lower)
301
322
else if List.mem feature_lower valid_media_features then
···
305
326
| Some colon_pos ->
306
327
let feature = String.trim (String.sub content 0 colon_pos) in
307
328
let value = String.trim (String.sub content (colon_pos + 1) (String.length content - colon_pos - 1)) in
308
-
let feature_lower = String.lowercase_ascii feature in
329
+
let feature_lower = Astring.String.Ascii.lowercase feature in
309
330
310
331
(* Check for deprecated features *)
311
332
if List.mem feature_lower deprecated_media_features then
···
341
362
if List.mem feature length_features then begin
342
363
(* Must be a valid length: number followed by unit *)
343
364
let value = String.trim value in
344
-
let is_digit c = c >= '0' && c <= '9' in
345
365
346
366
(* Parse number - includes sign, digits, and decimal point *)
347
367
let i = ref 0 in
348
368
let len = String.length value in
349
-
while !i < len && (is_digit value.[!i] || value.[!i] = '.' || value.[!i] = '-' || value.[!i] = '+') do
369
+
while !i < len && (Datatype.is_ascii_digit value.[!i] || value.[!i] = '.' || value.[!i] = '-' || value.[!i] = '+') do
350
370
incr i
351
371
done;
352
372
let num_part = String.sub value 0 !i in
···
367
387
else if unit_part = "" then
368
388
Error "only \"0\" can be a \"unit\". You must put a unit after your number"
369
389
else begin
370
-
let unit_lower = String.lowercase_ascii unit_part in
390
+
let unit_lower = Astring.String.Ascii.lowercase unit_part in
371
391
if List.mem unit_lower valid_length_units then Ok ()
372
392
else if List.mem unit_lower valid_resolution_units then
373
393
Error (Printf.sprintf "\"%s\" is not a \"%s\" value" value base_feature)
···
377
397
end else if List.mem feature color_features then begin
378
398
(* Must be an integer *)
379
399
let value = String.trim value in
380
-
let is_digit c = c >= '0' && c <= '9' in
381
-
if String.length value > 0 && String.for_all is_digit value then Ok ()
400
+
if Datatype.is_all_digits value then Ok ()
382
401
else
383
402
Error (Printf.sprintf "\"%s\" is not a \"%s\" value" value base_feature)
384
403
end else
+1
-1
lib/check/datatype/dt_media_query.mli
+1
-1
lib/check/datatype/dt_media_query.mli
+2
-3
lib/check/datatype/dt_mime.ml
+2
-3
lib/check/datatype/dt_mime.ml
···
1
1
(** MIME type validation based on RFC 2045 and HTML5 spec *)
2
2
3
-
(** Check if character is whitespace *)
4
-
let is_whitespace c = c = ' ' || c = '\t' || c = '\n' || c = '\r'
3
+
let is_whitespace = Datatype.is_whitespace
5
4
6
5
(** Check if character is a token character (RFC 2045) *)
7
6
let is_token_char c =
···
92
91
if is_token_char c then parse In_subtype (i + 1)
93
92
else if c = ';' then
94
93
(* Check if this is a JavaScript MIME type *)
95
-
let mime_type = String.sub s 0 i |> String.lowercase_ascii in
94
+
let mime_type = String.sub s 0 i |> Astring.String.Ascii.lowercase in
96
95
if List.mem mime_type javascript_mime_types then
97
96
Error
98
97
"A JavaScript MIME type must not contain any characters after \
+1
-1
lib/check/datatype/dt_popover.ml
+1
-1
lib/check/datatype/dt_popover.ml
+1
-1
lib/check/datatype/dt_preload.ml
+1
-1
lib/check/datatype/dt_preload.ml
+1
-1
lib/check/datatype/dt_referrer.ml
+1
-1
lib/check/datatype/dt_referrer.ml
+1
-1
lib/check/datatype/dt_scope.ml
+1
-1
lib/check/datatype/dt_scope.ml
+1
-1
lib/check/datatype/dt_shape.ml
+1
-1
lib/check/datatype/dt_shape.ml
+1
-1
lib/check/datatype/dt_spellcheck.ml
+1
-1
lib/check/datatype/dt_spellcheck.ml
+1
-1
lib/check/datatype/dt_target.ml
+1
-1
lib/check/datatype/dt_target.ml
···
8
8
if String.length s = 0 then Error "Browsing context name must not be empty"
9
9
else if s.[0] = '_' then
10
10
(* If starts with underscore, must be a special keyword *)
11
-
let lower = Datatype.string_to_ascii_lowercase s in
11
+
let lower = Astring.String.Ascii.lowercase s in
12
12
if List.mem lower special_keywords then Ok ()
13
13
else
14
14
Error
+1
-1
lib/check/datatype/dt_translate.ml
+1
-1
lib/check/datatype/dt_translate.ml
+1
-1
lib/check/datatype/dt_url.ml
+1
-1
lib/check/datatype/dt_url.ml
+1
-1
lib/check/datatype/dt_wrap.ml
+1
-1
lib/check/datatype/dt_wrap.ml
+3
-3
lib/check/element/attr.ml
+3
-3
lib/check/element/attr.ml
···
571
571
572
572
(** Parse a single attribute name-value pair to typed attribute *)
573
573
let parse_attr name value : t =
574
-
let name_lower = String.lowercase_ascii name in
575
-
let value_lower = String.lowercase_ascii value in
574
+
let name_lower = Astring.String.Ascii.lowercase name in
575
+
let value_lower = Astring.String.Ascii.lowercase value in
576
576
match name_lower with
577
577
(* Global attributes *)
578
578
| "id" -> `Id value
···
875
875
(** Get rel attribute as list of link types (space-separated, lowercased per HTML5 spec) *)
876
876
let get_rel_list attrs =
877
877
match get_rel attrs with
878
-
| Some s -> List.map String.lowercase_ascii (Datatype.split_on_whitespace s)
878
+
| Some s -> List.map Astring.String.Ascii.lowercase (Datatype.split_on_whitespace s)
879
879
| None -> []
880
880
881
881
(** Get headers attribute as raw string *)
+4
-4
lib/check/element/element.ml
+4
-4
lib/check/element/element.ml
···
21
21
22
22
(** Parse element-specific type attribute based on tag *)
23
23
let parse_type_attr (tag : Tag.html_tag) value : Attr.t =
24
-
let value_lower = String.lowercase_ascii value in
24
+
let value_lower = Astring.String.Ascii.lowercase value in
25
25
match tag with
26
26
| `Input ->
27
27
(match Attr.parse_input_type value_lower with
···
42
42
(** Parse attributes with element context for type attribute *)
43
43
let parse_attrs_for_tag (tag : Tag.element_tag) (raw_attrs : (string * string) list) : Attr.t list =
44
44
List.map (fun (name, value) ->
45
-
let name_lower = String.lowercase_ascii name in
45
+
let name_lower = Astring.String.Ascii.lowercase name in
46
46
if name_lower = "type" then
47
47
match tag with
48
48
| Tag.Html html_tag -> parse_type_attr html_tag value
···
274
274
(** Get raw attribute value (from original attrs) *)
275
275
let get_raw_attr name elem =
276
276
List.find_map (fun (n, v) ->
277
-
if String.lowercase_ascii n = String.lowercase_ascii name then Some v else None
277
+
if Astring.String.Ascii.lowercase n = Astring.String.Ascii.lowercase name then Some v else None
278
278
) elem.raw_attrs
279
279
280
280
(** Check if raw attribute exists *)
281
281
let has_raw_attr name elem =
282
282
List.exists (fun (n, _) ->
283
-
String.lowercase_ascii n = String.lowercase_ascii name
283
+
Astring.String.Ascii.lowercase n = Astring.String.Ascii.lowercase name
284
284
) elem.raw_attrs
285
285
286
286
(** {1 Pattern Matching Helpers} *)
+3
-3
lib/check/element/element.mli
+3
-3
lib/check/element/element.mli
···
45
45
46
46
(** A typed HTML element.
47
47
48
-
@field tag The element's tag classification
49
-
@field attrs Typed attributes parsed from raw input
50
-
@field raw_attrs Original attribute name-value pairs for fallback *)
48
+
- [tag]: The element's tag classification
49
+
- [attrs]: Typed attributes parsed from raw input
50
+
- [raw_attrs]: Original attribute name-value pairs for fallback *)
51
51
type t = {
52
52
tag : Tag.element_tag;
53
53
attrs : Attr.t list;
+3
-3
lib/check/element/tag.ml
+3
-3
lib/check/element/tag.ml
···
234
234
(** Check if a name is a valid custom element name (contains hyphen, not reserved) *)
235
235
let is_custom_element_name name =
236
236
String.contains name '-' &&
237
-
not (String.starts_with ~prefix:"xml" (String.lowercase_ascii name)) &&
238
-
not (String.equal (String.lowercase_ascii name) "annotation-xml")
237
+
not (String.starts_with ~prefix:"xml" (Astring.String.Ascii.lowercase name)) &&
238
+
not (String.equal (Astring.String.Ascii.lowercase name) "annotation-xml")
239
239
240
240
(** SVG namespace URI *)
241
241
let svg_namespace = "http://www.w3.org/2000/svg"
···
255
255
256
256
(** Convert tag name and optional namespace to element_tag *)
257
257
let tag_of_string ?namespace name =
258
-
let name_lower = String.lowercase_ascii name in
258
+
let name_lower = Astring.String.Ascii.lowercase name in
259
259
match namespace with
260
260
| Some ns when is_svg_namespace ns -> Svg name (* Preserve original case for SVG *)
261
261
| Some ns when is_mathml_namespace ns -> MathML name (* Preserve original case for MathML *)
+6
-2
lib/check/error_code.ml
+6
-2
lib/check/error_code.ml
···
374
374
Printf.sprintf "Element %s is missing one or more of the following attributes: [%s]."
375
375
(q element) attrs_str
376
376
| `Attr (`Bad_value (`Elem element, `Attr attr, `Value value, `Reason reason)) ->
377
-
Printf.sprintf "Bad value %s for attribute %s on element %s: %s"
378
-
(q value) (q attr) (q element) reason
377
+
if reason = "" then
378
+
Printf.sprintf "Bad value %s for attribute %s on element %s."
379
+
(q value) (q attr) (q element)
380
+
else
381
+
Printf.sprintf "Bad value %s for attribute %s on element %s: %s"
382
+
(q value) (q attr) (q element) reason
379
383
| `Attr (`Bad_value_generic (`Message message)) -> message
380
384
| `Attr (`Duplicate_id (`Id id)) ->
381
385
Printf.sprintf "Duplicate ID %s." (q id)
+7
-7
lib/check/htmlrw_check.mli
+7
-7
lib/check/htmlrw_check.mli
···
21
21
22
22
{2 Handling Specific Errors}
23
23
24
-
Use pattern matching on {!field-message.error_code} for fine-grained control:
24
+
Use pattern matching on [error_code] for fine-grained control:
25
25
26
26
{[
27
27
List.iter (fun msg ->
···
188
188
189
189
(** Human-readable text format.
190
190
191
-
{v
191
+
{v
192
192
file.html:5.3: error [missing-alt]: Element "img" is missing required attribute "alt".
193
-
v} *)
193
+
v} *)
194
194
val to_text : t -> string
195
195
196
196
(** JSON format compatible with Nu HTML Validator.
197
197
198
-
{v
198
+
{v
199
199
{"messages":[{"type":"error","message":"...","firstLine":5,"firstColumn":3}]}
200
-
v} *)
200
+
v} *)
201
201
val to_json : t -> string
202
202
203
203
(** GNU error format for IDE integration.
204
204
205
-
{v
205
+
{v
206
206
file.html:5:3: error: Element "img" is missing required attribute "alt".
207
-
v} *)
207
+
v} *)
208
208
val to_gnu : t -> string
209
209
210
210
+6
-3
lib/check/message_format.ml
+6
-3
lib/check/message_format.ml
···
90
90
91
91
Object (with_extract, Meta.none)
92
92
93
-
let format_json ?system_id messages =
93
+
let messages_to_json ?system_id messages =
94
94
let open Jsont in
95
95
let msg_array = Array (List.map (message_to_json ?system_id) messages, Meta.none) in
96
-
let obj = Object ([ (("messages", Meta.none), msg_array) ], Meta.none) in
97
-
match Jsont_bytesrw.encode_string ~format:Minify json obj with
96
+
Object ([ (("messages", Meta.none), msg_array) ], Meta.none)
97
+
98
+
let format_json ?system_id messages =
99
+
let obj = messages_to_json ?system_id messages in
100
+
match Jsont_bytesrw.encode_string ~format:Minify Jsont.json obj with
98
101
| Ok s -> s
99
102
| Error e -> failwith ("JSON encoding error: " ^ e)
+33
lib/check/message_format.mli
+33
lib/check/message_format.mli
···
26
26
27
27
@param system_id Optional default system identifier for messages without location. *)
28
28
val format_gnu : ?system_id:string -> Message.t list -> string
29
+
30
+
(** {1 JSON Value Builders}
31
+
32
+
These functions return [Jsont.json] values that can be reused
33
+
for custom JSON encoding scenarios. *)
34
+
35
+
(** Convert a single message to JSON AST.
36
+
37
+
Produces JSON compatible with the Nu HTML Validator format:
38
+
{[
39
+
{
40
+
"type": "error",
41
+
"message": "...",
42
+
"subType": "error-code",
43
+
"url": "...",
44
+
"firstLine": 1,
45
+
"firstColumn": 1,
46
+
...
47
+
}
48
+
]}
49
+
50
+
@param system_id Default system identifier for messages without location.system_id. *)
51
+
val message_to_json : ?system_id:string -> Message.t -> Jsont.json
52
+
53
+
(** Convert a message list to JSON AST with wrapper object.
54
+
55
+
Produces JSON with a "messages" array:
56
+
{[
57
+
{ "messages": [...] }
58
+
]}
59
+
60
+
@param system_id Default system identifier for messages without location.system_id. *)
61
+
val messages_to_json : ?system_id:string -> Message.t list -> Jsont.json
+1
-1
lib/check/semantic/form_checker.ml
+1
-1
lib/check/semantic/form_checker.ml
···
12
12
13
13
(** Check if autocomplete value contains webauthn token *)
14
14
let contains_webauthn value =
15
-
let lower = String.lowercase_ascii value in
15
+
let lower = Astring.String.Ascii.lowercase value in
16
16
let tokens = String.split_on_char ' ' lower |> List.filter (fun s -> String.length s > 0) in
17
17
List.mem "webauthn" tokens
18
18
+6
-6
lib/check/semantic/lang_detecting_checker.ml
+6
-6
lib/check/semantic/lang_detecting_checker.ml
···
13
13
mutable char_count : int;
14
14
}
15
15
16
-
let max_chars = 30720
16
+
let max_chars = 8192 (* Reduced from 30720 to avoid slow language detection *)
17
17
let min_chars = 1024
18
18
19
19
(* Elements whose text content we skip for language detection - O(1) lookup *)
20
20
let skip_elements =
21
21
Attr_utils.hashtbl_of_list [
22
-
"a"; "button"; "details"; "figcaption"; "form"; "li"; "nav";
23
-
"pre"; "script"; "select"; "span"; "style"; "summary";
24
-
"td"; "textarea"; "th"; "tr"
22
+
"a"; "button"; "code"; "details"; "figcaption"; "form"; "kbd"; "li"; "nav";
23
+
"pre"; "samp"; "script"; "select"; "span"; "style"; "summary";
24
+
"td"; "textarea"; "th"; "tr"; "var"; "xmp"
25
25
]
26
26
27
27
let is_skip_element name = Hashtbl.mem skip_elements name
···
54
54
let get_lang_code lang =
55
55
(* Extract primary language subtag *)
56
56
match String.split_on_char '-' lang with
57
-
| code :: _ -> String.lowercase_ascii code
57
+
| code :: _ -> Astring.String.Ascii.lowercase code
58
58
| [] -> ""
59
59
60
60
(* Create detector lazily with deterministic seed *)
···
324
324
| None ->
325
325
Message_collector.add_typed collector
326
326
(`I18n (`Missing_dir_rtl (`Language detected_name)))
327
-
| Some dir when String.lowercase_ascii dir <> "rtl" ->
327
+
| Some dir when Astring.String.Ascii.lowercase dir <> "rtl" ->
328
328
Message_collector.add_typed collector
329
329
(`I18n (`Wrong_dir (`Language detected_name, `Declared dir)))
330
330
| _ -> ()
+139
-162
lib/check/semantic/nesting_checker.ml
+139
-162
lib/check/semantic/nesting_checker.ml
···
1
-
(** Interactive element nesting checker implementation. *)
1
+
(** Interactive element nesting checker implementation.
2
2
3
-
(** Special ancestors that need tracking for nesting validation.
3
+
Uses bool arrays instead of bitmasks for JavaScript compatibility
4
+
(JS bitwise ops are limited to 32 bits). *)
4
5
5
-
This array defines the elements whose presence in the ancestor chain
6
-
affects validation of descendant elements. The order is significant
7
-
as it determines bit positions in the ancestor bitmask. *)
6
+
(** Special ancestors that need tracking for nesting validation. *)
8
7
let special_ancestors =
9
8
[| "a"; "address"; "body"; "button"; "caption"; "dfn"; "dt"; "figcaption";
10
9
"figure"; "footer"; "form"; "header"; "label"; "map"; "noscript"; "th";
···
13
12
"s"; "small"; "mark"; "abbr"; "cite"; "code"; "q"; "sub"; "sup"; "samp";
14
13
"kbd"; "var" |]
15
14
16
-
(** Hashtable for O(1) lookup of special ancestor bit positions *)
15
+
let num_ancestors = Array.length special_ancestors
16
+
17
+
(** Hashtable for O(1) lookup of special ancestor indices *)
17
18
let special_ancestor_table : (string, int) Hashtbl.t =
18
19
let tbl = Hashtbl.create 64 in
19
20
Array.iteri (fun i name -> Hashtbl.add tbl name i) special_ancestors;
20
21
tbl
21
22
22
-
(** Get the bit position for a special ancestor element.
23
-
Returns [-1] if the element is not a special ancestor. O(1) lookup. *)
24
-
let special_ancestor_number name =
23
+
(** Get the index for a special ancestor element.
24
+
Returns [-1] if the element is not a special ancestor. *)
25
+
let special_ancestor_index name =
25
26
match Hashtbl.find_opt special_ancestor_table name with
26
27
| Some i -> i
27
28
| None -> -1
···
31
32
[| "a"; "button"; "details"; "embed"; "iframe"; "label"; "select";
32
33
"textarea" |]
33
34
34
-
(** Map from descendant element name to bitmask of prohibited ancestors. *)
35
-
let ancestor_mask_by_descendant : (string, int) Hashtbl.t =
35
+
(** Create an empty bool array for ancestor tracking *)
36
+
let empty_flags () = Array.make num_ancestors false
37
+
38
+
(** Copy a bool array *)
39
+
let copy_flags flags = Array.copy flags
40
+
41
+
(** Map from descendant element name to prohibited ancestor flags. *)
42
+
let prohibited_ancestors_by_descendant : (string, bool array) Hashtbl.t =
36
43
Hashtbl.create 64
37
44
38
-
(** Map from descendant element name to bitmask of ancestors that cause content model violations.
39
-
(These use different error messages than nesting violations.) *)
40
-
let content_model_violation_mask : (string, int) Hashtbl.t =
45
+
(** Map from descendant element name to content model violation flags. *)
46
+
let content_model_violations : (string, bool array) Hashtbl.t =
41
47
Hashtbl.create 64
42
48
49
+
(** Get or create prohibited ancestors array for a descendant *)
50
+
let get_prohibited descendant =
51
+
match Hashtbl.find_opt prohibited_ancestors_by_descendant descendant with
52
+
| Some arr -> arr
53
+
| None ->
54
+
let arr = empty_flags () in
55
+
Hashtbl.replace prohibited_ancestors_by_descendant descendant arr;
56
+
arr
57
+
58
+
(** Get or create content model violations array for a descendant *)
59
+
let get_content_model_violations descendant =
60
+
match Hashtbl.find_opt content_model_violations descendant with
61
+
| Some arr -> arr
62
+
| None ->
63
+
let arr = empty_flags () in
64
+
Hashtbl.replace content_model_violations descendant arr;
65
+
arr
66
+
43
67
(** Register that [ancestor] is prohibited for [descendant]. *)
44
68
let register_prohibited_ancestor ancestor descendant =
45
-
let number = special_ancestor_number ancestor in
46
-
if number = -1 then
69
+
let idx = special_ancestor_index ancestor in
70
+
if idx = -1 then
47
71
failwith ("Ancestor not found in array: " ^ ancestor);
48
-
let mask =
49
-
match Hashtbl.find_opt ancestor_mask_by_descendant descendant with
50
-
| None -> 0
51
-
| Some m -> m
52
-
in
53
-
let new_mask = mask lor (1 lsl number) in
54
-
Hashtbl.replace ancestor_mask_by_descendant descendant new_mask
72
+
let arr = get_prohibited descendant in
73
+
arr.(idx) <- true
55
74
56
75
(** Register a content model violation (phrasing-only element containing flow content). *)
57
76
let register_content_model_violation ancestor descendant =
58
77
register_prohibited_ancestor ancestor descendant;
59
-
let number = special_ancestor_number ancestor in
60
-
let mask =
61
-
match Hashtbl.find_opt content_model_violation_mask descendant with
62
-
| None -> 0
63
-
| Some m -> m
64
-
in
65
-
let new_mask = mask lor (1 lsl number) in
66
-
Hashtbl.replace content_model_violation_mask descendant new_mask
78
+
let idx = special_ancestor_index ancestor in
79
+
let arr = get_content_model_violations descendant in
80
+
arr.(idx) <- true
67
81
68
82
(** Initialize the prohibited ancestor map. *)
69
83
let () =
···
133
147
) interactive_elements;
134
148
135
149
(* Phrasing-only elements: cannot contain flow content like p, div, h1-h6, etc. *)
136
-
(* These are content model violations, not nesting violations. *)
137
150
let phrasing_only = ["span"; "strong"; "em"; "b"; "i"; "u"; "s"; "small"; "mark";
138
151
"abbr"; "cite"; "code"; "q"; "sub"; "sup"; "samp"; "kbd"; "var"] in
139
152
let flow_content = ["p"; "div"; "article"; "section"; "nav"; "aside"; "header"; "footer";
···
145
158
) flow_content
146
159
) phrasing_only
147
160
148
-
(** Bitmask constants for common checks. *)
149
-
let a_button_mask =
150
-
let a_num = special_ancestor_number "a" in
151
-
let button_num = special_ancestor_number "button" in
152
-
(1 lsl a_num) lor (1 lsl button_num)
161
+
(** Indices for common checks *)
162
+
let a_index = special_ancestor_index "a"
163
+
let button_index = special_ancestor_index "button"
164
+
let map_index = special_ancestor_index "map"
153
165
154
-
let map_mask =
155
-
let map_num = special_ancestor_number "map" in
156
-
1 lsl map_num
157
-
158
-
(** Transparent elements - inherit content model from parent. O(1) hashtable lookup. *)
166
+
(** Transparent elements - inherit content model from parent. *)
159
167
let transparent_elements_tbl =
160
168
Attr_utils.hashtbl_of_list ["a"; "canvas"; "video"; "audio"; "object"; "ins"; "del"; "map"]
161
169
···
163
171
164
172
(** Stack node representing an element's context. *)
165
173
type stack_node = {
166
-
ancestor_mask : int;
174
+
ancestor_flags : bool array;
167
175
name : string;
168
176
is_transparent : bool;
169
177
}
···
171
179
(** Checker state. *)
172
180
type state = {
173
181
mutable stack : stack_node list;
174
-
mutable ancestor_mask : int;
182
+
mutable ancestor_flags : bool array;
175
183
}
176
184
177
185
let create () =
178
-
{ stack = []; ancestor_mask = 0 }
186
+
{ stack = []; ancestor_flags = empty_flags () }
179
187
180
188
let reset state =
181
189
state.stack <- [];
182
-
state.ancestor_mask <- 0
190
+
state.ancestor_flags <- empty_flags ()
183
191
184
192
(** Get attribute value by name from attribute list. *)
185
-
let get_attr attrs name =
186
-
List.assoc_opt name attrs
193
+
let get_attr = Attr_utils.get_attr
187
194
188
195
(** Check if an attribute exists. *)
189
-
let has_attr attrs name =
190
-
get_attr attrs name <> None
196
+
let has_attr = Attr_utils.has_attr
191
197
192
198
(** Check if element is interactive based on its attributes. *)
193
199
let is_interactive_element name attrs =
194
200
match name with
195
-
| "a" ->
196
-
has_attr attrs "href"
197
-
| "audio" | "video" ->
198
-
has_attr attrs "controls"
199
-
| "img" | "object" ->
200
-
has_attr attrs "usemap"
201
+
| "a" -> has_attr "href" attrs
202
+
| "audio" | "video" -> has_attr "controls" attrs
203
+
| "img" | "object" -> has_attr "usemap" attrs
201
204
| "input" ->
202
-
begin match get_attr attrs "type" with
203
-
| Some "hidden" -> false
204
-
| _ -> true
205
-
end
205
+
(match get_attr "type" attrs with
206
+
| Some "hidden" -> false
207
+
| _ -> true)
206
208
| "button" | "details" | "embed" | "iframe" | "label" | "select"
207
-
| "textarea" ->
208
-
true
209
-
| _ ->
210
-
false
209
+
| "textarea" -> true
210
+
| _ -> false
211
211
212
-
(** Find the nearest transparent element in the ancestor stack, if any.
213
-
Returns the immediate parent's name if it's transparent, otherwise None. *)
212
+
(** Find the nearest transparent element in the ancestor stack. *)
214
213
let find_nearest_transparent_parent state =
215
214
match state.stack with
216
215
| parent :: _ when parent.is_transparent -> Some parent.name
···
218
217
219
218
(** Report nesting violations. *)
220
219
let check_nesting state name attrs collector =
221
-
(* Compute the prohibited ancestor mask for this element *)
222
-
let base_mask =
223
-
match Hashtbl.find_opt ancestor_mask_by_descendant name with
224
-
| Some m -> m
225
-
| None -> 0
220
+
(* Get prohibited ancestors for this element *)
221
+
let prohibited =
222
+
match Hashtbl.find_opt prohibited_ancestors_by_descendant name with
223
+
| Some arr -> arr
224
+
| None -> empty_flags ()
226
225
in
227
226
228
-
(* Get content model violation mask for this element *)
229
-
let content_model_mask =
230
-
match Hashtbl.find_opt content_model_violation_mask name with
231
-
| Some m -> m
232
-
| None -> 0
227
+
(* Get content model violations for this element *)
228
+
let content_violations =
229
+
match Hashtbl.find_opt content_model_violations name with
230
+
| Some arr -> arr
231
+
| None -> empty_flags ()
233
232
in
234
233
235
-
(* Add interactive element restrictions if applicable *)
236
-
let mask =
237
-
if is_interactive_element name attrs then
238
-
base_mask lor a_button_mask
239
-
else
240
-
base_mask
234
+
(* Check if element is interactive (adds a/button restrictions) *)
235
+
let is_interactive = is_interactive_element name attrs in
236
+
237
+
(* Determine attribute to mention in error messages *)
238
+
let attr =
239
+
match name with
240
+
| "a" when has_attr "href" attrs -> Some "href"
241
+
| "audio" when has_attr "controls" attrs -> Some "controls"
242
+
| "video" when has_attr "controls" attrs -> Some "controls"
243
+
| "img" when has_attr "usemap" attrs -> Some "usemap"
244
+
| "object" when has_attr "usemap" attrs -> Some "usemap"
245
+
| _ -> None
241
246
in
242
247
243
-
(* Check for violations *)
244
-
if mask <> 0 then begin
245
-
let mask_hit = state.ancestor_mask land mask in
246
-
if mask_hit <> 0 then begin
247
-
(* Determine if element has a special attribute to mention *)
248
-
let attr =
249
-
match name with
250
-
| "a" when has_attr attrs "href" -> Some "href"
251
-
| "audio" when has_attr attrs "controls" -> Some "controls"
252
-
| "video" when has_attr attrs "controls" -> Some "controls"
253
-
| "img" when has_attr attrs "usemap" -> Some "usemap"
254
-
| "object" when has_attr attrs "usemap" -> Some "usemap"
255
-
| _ -> None
248
+
(* Find transparent parent if any *)
249
+
let transparent_parent = find_nearest_transparent_parent state in
250
+
251
+
(* Check each special ancestor *)
252
+
Array.iteri (fun i ancestor ->
253
+
(* Is this ancestor in our current ancestor chain? *)
254
+
if state.ancestor_flags.(i) then begin
255
+
(* Is this ancestor prohibited for this element? *)
256
+
let is_prohibited =
257
+
prohibited.(i) ||
258
+
(is_interactive && (i = a_index || i = button_index))
256
259
in
257
-
(* Find the transparent parent (like canvas) if any *)
258
-
let transparent_parent = find_nearest_transparent_parent state in
259
-
(* Find which ancestors are violated *)
260
-
Array.iteri (fun i ancestor ->
261
-
let bit = 1 lsl i in
262
-
if (mask_hit land bit) <> 0 then begin
263
-
(* Check if this is a content model violation or a nesting violation *)
264
-
if (content_model_mask land bit) <> 0 then begin
265
-
(* Content model violation: use "not allowed as child" format *)
266
-
(* If there's a transparent parent, use that instead of the ancestor *)
267
-
let parent = match transparent_parent with
268
-
| Some p -> p
269
-
| None -> ancestor
270
-
in
271
-
Message_collector.add_typed collector
272
-
(`Element (`Not_allowed_as_child (`Child name, `Parent parent)))
273
-
end else
274
-
(* Nesting violation: use "must not be descendant" format *)
275
-
Message_collector.add_typed collector
276
-
(`Element (`Must_not_descend (`Elem name, `Attr attr, `Ancestor ancestor)))
277
-
end
278
-
) special_ancestors
260
+
if is_prohibited then begin
261
+
(* Is this a content model violation or a nesting violation? *)
262
+
if content_violations.(i) then begin
263
+
(* Content model violation: use "not allowed as child" format *)
264
+
let parent = match transparent_parent with
265
+
| Some p -> p
266
+
| None -> ancestor
267
+
in
268
+
Message_collector.add_typed collector
269
+
(`Element (`Not_allowed_as_child (`Child name, `Parent parent)))
270
+
end else
271
+
(* Nesting violation: use "must not be descendant" format *)
272
+
Message_collector.add_typed collector
273
+
(`Element (`Must_not_descend (`Elem name, `Attr attr, `Ancestor ancestor)))
274
+
end
279
275
end
280
-
end
276
+
) special_ancestors
281
277
282
278
(** Check for required ancestors. *)
283
279
let check_required_ancestors state name collector =
284
280
match name with
285
281
| "area" ->
286
-
if (state.ancestor_mask land map_mask) = 0 then
282
+
if not state.ancestor_flags.(map_index) then
287
283
Message_collector.add_typed collector
288
284
(`Generic (Printf.sprintf "The %s element must have a %s ancestor."
289
285
(Error_code.q "area") (Error_code.q "map")))
290
286
| _ -> ()
291
287
292
-
(** Check for metadata-only elements appearing outside valid contexts.
293
-
style element is only valid in head or in noscript (in head). *)
288
+
(** Check for metadata-only elements appearing outside valid contexts. *)
294
289
let check_metadata_element_context state name collector =
295
290
match name with
296
291
| "style" ->
297
-
(* style is only valid inside head or noscript *)
298
-
begin match state.stack with
299
-
| parent :: _ when parent.name = "head" -> () (* valid *)
300
-
| parent :: _ when parent.name = "noscript" -> () (* valid in noscript in head *)
301
-
| parent :: _ ->
302
-
(* style inside any other element is not allowed *)
303
-
Message_collector.add_typed collector
304
-
(`Element (`Not_allowed_as_child (`Child "style", `Parent parent.name)))
305
-
| [] -> () (* at root level, would be caught elsewhere *)
306
-
end
292
+
(match state.stack with
293
+
| parent :: _ when parent.name = "head" -> ()
294
+
| parent :: _ when parent.name = "noscript" -> ()
295
+
| parent :: _ ->
296
+
Message_collector.add_typed collector
297
+
(`Element (`Not_allowed_as_child (`Child "style", `Parent parent.name)))
298
+
| [] -> ())
307
299
| _ -> ()
308
300
309
301
let start_element state ~element collector =
310
-
(* Only check HTML elements, not SVG or MathML *)
311
302
match element.Element.tag with
312
303
| Tag.Html _ ->
313
304
let name = Tag.tag_to_string element.tag in
314
305
let attrs = element.raw_attrs in
306
+
315
307
(* Check for nesting violations *)
316
308
check_nesting state name attrs collector;
317
309
check_required_ancestors state name collector;
318
310
check_metadata_element_context state name collector;
319
311
320
-
(* Update ancestor mask if this is a special ancestor *)
321
-
let new_mask = state.ancestor_mask in
322
-
let number = special_ancestor_number name in
323
-
let new_mask =
324
-
if number >= 0 then
325
-
new_mask lor (1 lsl number)
326
-
else
327
-
new_mask
328
-
in
312
+
(* Create new flags, copying current state *)
313
+
let new_flags = copy_flags state.ancestor_flags in
329
314
330
-
(* Add href tracking for <a> elements *)
331
-
let new_mask =
332
-
if name = "a" && has_attr attrs "href" then
333
-
let a_num = special_ancestor_number "a" in
334
-
new_mask lor (1 lsl a_num)
335
-
else
336
-
new_mask
337
-
in
315
+
(* Set flag if this is a special ancestor *)
316
+
let idx = special_ancestor_index name in
317
+
if idx >= 0 then
318
+
new_flags.(idx) <- true;
338
319
339
-
(* Push onto stack *)
320
+
(* Push onto stack (save old flags) *)
340
321
let is_transparent = is_transparent_element name in
341
-
let node = { ancestor_mask = state.ancestor_mask; name; is_transparent } in
322
+
let node = { ancestor_flags = state.ancestor_flags; name; is_transparent } in
342
323
state.stack <- node :: state.stack;
343
-
state.ancestor_mask <- new_mask
344
-
| _ -> () (* SVG, MathML, Custom, Unknown *)
324
+
state.ancestor_flags <- new_flags
325
+
| _ -> ()
345
326
346
327
let end_element state ~tag _collector =
347
-
(* Only track HTML elements *)
348
328
match tag with
349
329
| Tag.Html _ ->
350
-
(* Pop from stack and restore ancestor mask *)
351
-
begin match state.stack with
352
-
| [] -> () (* Should not happen in well-formed documents *)
353
-
| node :: rest ->
354
-
state.stack <- rest;
355
-
state.ancestor_mask <- node.ancestor_mask
356
-
end
330
+
(match state.stack with
331
+
| [] -> ()
332
+
| node :: rest ->
333
+
state.stack <- rest;
334
+
state.ancestor_flags <- node.ancestor_flags)
357
335
| _ -> ()
358
336
359
-
(** Create the checker as a first-class module. *)
360
337
let checker = Checker.make ~create ~reset ~start_element ~end_element ()
+2
-2
lib/check/semantic/obsolete_checker.ml
+2
-2
lib/check/semantic/obsolete_checker.ml
···
260
260
match element.Element.tag with
261
261
| Tag.Html _ ->
262
262
let name = Tag.tag_to_string element.tag in
263
-
let name_lower = String.lowercase_ascii name in
263
+
let name_lower = Astring.String.Ascii.lowercase name in
264
264
let attrs = element.raw_attrs in
265
265
266
266
(* Track head context *)
···
275
275
276
276
(* Check for obsolete attributes *)
277
277
List.iter (fun (attr_name, _attr_value) ->
278
-
let attr_lower = String.lowercase_ascii attr_name in
278
+
let attr_lower = Astring.String.Ascii.lowercase attr_name in
279
279
280
280
(* Special handling for scoped attribute on style *)
281
281
if attr_lower = "scoped" && name_lower = "style" then begin
+1
-1
lib/check/semantic/required_attr_checker.ml
+1
-1
lib/check/semantic/required_attr_checker.ml
···
120
120
(* popover attribute must have valid value *)
121
121
match Attr_utils.get_attr "popover" attrs with
122
122
| Some value ->
123
-
let value_lower = String.lowercase_ascii value in
123
+
let value_lower = Astring.String.Ascii.lowercase value in
124
124
(* Valid values: empty string, auto, manual, hint *)
125
125
if value_lower <> "" && value_lower <> "auto" && value_lower <> "manual" && value_lower <> "hint" then
126
126
Message_collector.add_typed collector
+34
-34
lib/check/specialized/aria_checker.ml
+34
-34
lib/check/specialized/aria_checker.ml
···
309
309
else
310
310
String.split_on_char ' ' trimmed
311
311
|> List.filter (fun s -> String.trim s <> "")
312
-
|> List.map String.lowercase_ascii
312
+
|> List.map Astring.String.Ascii.lowercase
313
313
314
314
(** Get the implicit role for an HTML element. *)
315
315
let get_implicit_role element_name attrs =
316
316
(* Check for input element with type attribute *)
317
317
if element_name = "input" then begin
318
-
match List.assoc_opt "type" attrs with
318
+
match Attr_utils.get_attr "type" attrs with
319
319
| Some input_type ->
320
-
let input_type = String.lowercase_ascii input_type in
320
+
let input_type = Astring.String.Ascii.lowercase input_type in
321
321
begin match Hashtbl.find_opt input_types_with_implicit_role input_type with
322
322
| Some role -> Some role
323
323
| None ->
···
332
332
end
333
333
(* Check for area element - implicit role depends on href attribute *)
334
334
else if element_name = "area" then begin
335
-
match List.assoc_opt "href" attrs with
335
+
match Attr_utils.get_attr "href" attrs with
336
336
| Some _ -> Some "link" (* area with href has implicit role "link" *)
337
337
| None -> Some "generic" (* area without href has no corresponding role, treated as generic *)
338
338
end
339
339
(* Check for a element - implicit role depends on href attribute *)
340
340
else if element_name = "a" then begin
341
-
match List.assoc_opt "href" attrs with
341
+
match Attr_utils.get_attr "href" attrs with
342
342
| Some _ -> Some "link" (* a with href has implicit role "link" *)
343
343
| None -> Some "generic" (* a without href has no corresponding role, treated as generic *)
344
344
end
···
430
430
match element.Element.tag with
431
431
| Tag.Html _ ->
432
432
let name = Tag.tag_to_string element.tag in
433
-
let name_lower = String.lowercase_ascii name in
433
+
let name_lower = Astring.String.Ascii.lowercase name in
434
434
let attrs = element.raw_attrs in
435
-
let role_attr = List.assoc_opt "role" attrs in
436
-
let aria_label = List.assoc_opt "aria-label" attrs in
437
-
let aria_labelledby = List.assoc_opt "aria-labelledby" attrs in
438
-
let aria_braillelabel = List.assoc_opt "aria-braillelabel" attrs in
435
+
let role_attr = Attr_utils.get_attr "role" attrs in
436
+
let aria_label = Attr_utils.get_attr "aria-label" attrs in
437
+
let aria_labelledby = Attr_utils.get_attr "aria-labelledby" attrs in
438
+
let aria_braillelabel = Attr_utils.get_attr "aria-braillelabel" attrs in
439
439
let has_aria_label = match aria_label with Some v -> String.trim v <> "" | None -> false in
440
440
let has_aria_labelledby = match aria_labelledby with Some v -> String.trim v <> "" | None -> false in
441
441
let has_aria_braillelabel = match aria_braillelabel with Some v -> String.trim v <> "" | None -> false in
···
459
459
460
460
(* Track active tabs and tabpanel roles for end_document validation *)
461
461
if List.mem "tab" explicit_roles then begin
462
-
let aria_selected = List.assoc_opt "aria-selected" attrs in
462
+
let aria_selected = Attr_utils.get_attr "aria-selected" attrs in
463
463
if aria_selected = Some "true" then state.has_active_tab <- true
464
464
end;
465
465
if List.mem "tabpanel" explicit_roles then state.has_tabpanel <- true;
466
466
467
467
(* Track visible main elements (explicit role=main or implicit main role) *)
468
468
let is_hidden =
469
-
let aria_hidden = List.assoc_opt "aria-hidden" attrs in
469
+
let aria_hidden = Attr_utils.get_attr "aria-hidden" attrs in
470
470
aria_hidden = Some "true"
471
471
in
472
472
if not is_hidden then begin
···
489
489
(* Check br/wbr aria-* attribute restrictions - not allowed *)
490
490
if name_lower = "br" || name_lower = "wbr" then begin
491
491
List.iter (fun (attr_name, _) ->
492
-
let attr_lower = String.lowercase_ascii attr_name in
492
+
let attr_lower = Astring.String.Ascii.lowercase attr_name in
493
493
if String.starts_with ~prefix:"aria-" attr_lower &&
494
494
attr_lower <> "aria-hidden" then
495
495
Message_collector.add_typed collector
···
515
515
516
516
(* Check for img with empty alt having role attribute *)
517
517
if name_lower = "img" then begin
518
-
let alt_value = List.assoc_opt "alt" attrs in
518
+
let alt_value = Attr_utils.get_attr "alt" attrs in
519
519
match alt_value with
520
520
| Some alt when String.trim alt = "" ->
521
521
(* img with empty alt must not have role attribute *)
···
526
526
527
527
(* Check for input[type=checkbox][role=button] requires aria-pressed *)
528
528
if name_lower = "input" then begin
529
-
let input_type = match List.assoc_opt "type" attrs with
530
-
| Some t -> String.lowercase_ascii t
529
+
let input_type = match Attr_utils.get_attr "type" attrs with
530
+
| Some t -> Astring.String.Ascii.lowercase t
531
531
| None -> "text"
532
532
in
533
533
if input_type = "checkbox" && List.mem "button" explicit_roles then begin
534
-
let has_aria_pressed = List.assoc_opt "aria-pressed" attrs <> None in
534
+
let has_aria_pressed = Attr_utils.has_attr "aria-pressed" attrs in
535
535
if not has_aria_pressed then
536
536
Message_collector.add_typed collector (`Input `Checkbox_needs_aria_pressed)
537
537
end
···
566
566
567
567
(* Check for aria-hidden="true" on body element *)
568
568
if name_lower = "body" then begin
569
-
let aria_hidden = List.assoc_opt "aria-hidden" attrs in
569
+
let aria_hidden = Attr_utils.get_attr "aria-hidden" attrs in
570
570
match aria_hidden with
571
571
| Some "true" ->
572
572
Message_collector.add_typed collector (`Aria `Hidden_on_body)
···
574
574
end;
575
575
576
576
(* Check for aria-checked on input[type=checkbox] *)
577
-
let aria_checked = List.assoc_opt "aria-checked" attrs in
577
+
let aria_checked = Attr_utils.get_attr "aria-checked" attrs in
578
578
if name_lower = "input" then begin
579
-
match List.assoc_opt "type" attrs with
580
-
| Some input_type when String.lowercase_ascii input_type = "checkbox" ->
579
+
match Attr_utils.get_attr "type" attrs with
580
+
| Some input_type when Astring.String.Ascii.lowercase input_type = "checkbox" ->
581
581
if aria_checked <> None then
582
582
Message_collector.add_typed collector
583
583
(`Aria (`Must_not_use (`Attr "aria-checked", `Elem "input",
···
586
586
end;
587
587
588
588
(* Check for aria-expanded on roles that don't support it *)
589
-
let aria_expanded = List.assoc_opt "aria-expanded" attrs in
589
+
let aria_expanded = Attr_utils.get_attr "aria-expanded" attrs in
590
590
if aria_expanded <> None then begin
591
591
let role_to_check = match explicit_roles with
592
592
| first :: _ -> Some first
···
605
605
(* Special message for input[type=text] with role="textbox" *)
606
606
let reason =
607
607
if name_lower = "input" && first_role = "textbox" then begin
608
-
let has_list = List.exists (fun (k, _) -> String.lowercase_ascii k = "list") attrs in
609
-
let input_type = match List.assoc_opt "type" attrs with
610
-
| Some t -> String.lowercase_ascii t
608
+
let has_list = Attr_utils.has_attr "list" attrs in
609
+
let input_type = match Attr_utils.get_attr "type" attrs with
610
+
| Some t -> Astring.String.Ascii.lowercase t
611
611
| None -> "text"
612
612
in
613
613
if not has_list && input_type = "text" then
···
671
671
672
672
(* Check for redundant default ARIA attribute values *)
673
673
List.iter (fun (attr_name, attr_value) ->
674
-
let attr_lower = String.lowercase_ascii attr_name in
674
+
let attr_lower = Astring.String.Ascii.lowercase attr_name in
675
675
if String.starts_with ~prefix:"aria-" attr_lower then
676
676
match Hashtbl.find_opt aria_default_values attr_lower with
677
677
| Some default_value ->
678
-
let value_lower = String.lowercase_ascii (String.trim attr_value) in
678
+
let value_lower = Astring.String.Ascii.lowercase (String.trim attr_value) in
679
679
if value_lower = default_value then
680
680
Message_collector.add_typed collector
681
681
(`Generic (Printf.sprintf
···
688
688
if name_lower = "summary" then begin
689
689
let parent = get_parent_element state in
690
690
let is_in_details = parent = Some "details" in
691
-
let has_role_attr = List.exists (fun (k, _) -> String.lowercase_ascii k = "role") attrs in
692
-
let has_aria_expanded = List.assoc_opt "aria-expanded" attrs <> None in
693
-
let has_aria_pressed = List.assoc_opt "aria-pressed" attrs <> None in
691
+
let has_role_attr = Attr_utils.has_attr "role" attrs in
692
+
let has_aria_expanded = Attr_utils.has_attr "aria-expanded" attrs in
693
+
let has_aria_pressed = Attr_utils.has_attr "aria-pressed" attrs in
694
694
if is_in_details then begin
695
695
(* summary that is the first child of details *)
696
696
if has_role_attr then
···
726
726
(* Custom elements (autonomous custom elements) have generic role by default
727
727
and cannot have accessible names unless they have an explicit role *)
728
728
let attrs = element.raw_attrs in
729
-
let role_attr = List.assoc_opt "role" attrs in
730
-
let aria_label = List.assoc_opt "aria-label" attrs in
731
-
let aria_labelledby = List.assoc_opt "aria-labelledby" attrs in
732
-
let aria_braillelabel = List.assoc_opt "aria-braillelabel" attrs in
729
+
let role_attr = Attr_utils.get_attr "role" attrs in
730
+
let aria_label = Attr_utils.get_attr "aria-label" attrs in
731
+
let aria_labelledby = Attr_utils.get_attr "aria-labelledby" attrs in
732
+
let aria_braillelabel = Attr_utils.get_attr "aria-braillelabel" attrs in
733
733
let has_aria_label = match aria_label with Some v -> String.trim v <> "" | None -> false in
734
734
let has_aria_labelledby = match aria_labelledby with Some v -> String.trim v <> "" | None -> false in
735
735
let has_aria_braillelabel = match aria_braillelabel with Some v -> String.trim v <> "" | None -> false in
+21
-21
lib/check/specialized/attr_restrictions_checker.ml
+21
-21
lib/check/specialized/attr_restrictions_checker.ml
···
58
58
match element.Element.tag with
59
59
| Tag.Html _ ->
60
60
let name = Tag.tag_to_string element.tag in
61
-
let name_lower = String.lowercase_ascii name in
61
+
let name_lower = Astring.String.Ascii.lowercase name in
62
62
let attrs = element.raw_attrs in
63
63
64
64
(* Detect XHTML mode from xmlns attribute on html element *)
···
86
86
(* Check for xmlns:* prefixed attributes - not allowed in HTML *)
87
87
(* Standard xmlns declarations are allowed but custom prefixes are not *)
88
88
List.iter (fun (attr_name, _) ->
89
-
let attr_lower = String.lowercase_ascii attr_name in
89
+
let attr_lower = Astring.String.Ascii.lowercase attr_name in
90
90
if String.starts_with ~prefix:"xmlns:" attr_lower then begin
91
91
let prefix = String.sub attr_lower 6 (String.length attr_lower - 6) in
92
92
(* Only xmlns:xlink (with correct value) and xmlns:xml are allowed *)
···
113
113
(* Validate style type attribute - must be "text/css" or omitted *)
114
114
if name_lower = "style" then begin
115
115
List.iter (fun (attr_name, attr_value) ->
116
-
let attr_lower = String.lowercase_ascii attr_name in
116
+
let attr_lower = Astring.String.Ascii.lowercase attr_name in
117
117
if attr_lower = "type" then begin
118
-
let value_lower = String.lowercase_ascii (String.trim attr_value) in
118
+
let value_lower = Astring.String.Ascii.lowercase (String.trim attr_value) in
119
119
if value_lower <> "text/css" then
120
120
Message_collector.add_typed collector (`Misc `Style_type_invalid)
121
121
end
···
144
144
(* imagesrcset requires as="image" *)
145
145
if has_imagesrcset then begin
146
146
let as_is_image = match as_value with
147
-
| Some v -> String.lowercase_ascii (String.trim v) = "image"
147
+
| Some v -> Astring.String.Ascii.lowercase (String.trim v) = "image"
148
148
| None -> false
149
149
in
150
150
if not as_is_image then
···
164
164
(* Validate img usemap attribute - must be hash-name reference with content *)
165
165
if name_lower = "img" then begin
166
166
List.iter (fun (attr_name, attr_value) ->
167
-
let attr_lower = String.lowercase_ascii attr_name in
167
+
let attr_lower = Astring.String.Ascii.lowercase attr_name in
168
168
if attr_lower = "usemap" then begin
169
169
if attr_value = "#" then
170
170
Message_collector.add_typed collector
···
178
178
(* Validate embed type attribute - must be valid MIME type *)
179
179
if name_lower = "embed" then begin
180
180
List.iter (fun (attr_name, attr_value) ->
181
-
let attr_lower = String.lowercase_ascii attr_name in
181
+
let attr_lower = Astring.String.Ascii.lowercase attr_name in
182
182
if attr_lower = "type" then begin
183
183
match Dt_mime.validate_mime_type attr_value with
184
184
| Ok () -> ()
···
197
197
name_lower = "iframe" || name_lower = "source" in
198
198
if is_dimension_element then begin
199
199
List.iter (fun (attr_name, attr_value) ->
200
-
let attr_lower = String.lowercase_ascii attr_name in
200
+
let attr_lower = Astring.String.Ascii.lowercase attr_name in
201
201
if attr_lower = "width" || attr_lower = "height" then begin
202
202
(* Check for non-negative integer only *)
203
203
let is_valid =
···
245
245
(* Validate area[shape=default] cannot have coords *)
246
246
if name_lower = "area" then begin
247
247
match Attr_utils.get_attr "shape" attrs with
248
-
| Some s when String.lowercase_ascii (String.trim s) = "default" ->
248
+
| Some s when Astring.String.Ascii.lowercase (String.trim s) = "default" ->
249
249
if Attr_utils.has_attr "coords" attrs then
250
250
Message_collector.add_typed collector
251
251
(`Attr (`Not_allowed (`Attr "coords", `Elem "area")))
···
257
257
match Attr_utils.get_attr "dir" attrs with
258
258
| None ->
259
259
Message_collector.add_typed collector (`Misc `Bdo_missing_dir)
260
-
| Some v when String.lowercase_ascii (String.trim v) = "auto" ->
260
+
| Some v when Astring.String.Ascii.lowercase (String.trim v) = "auto" ->
261
261
Message_collector.add_typed collector (`Misc `Bdo_dir_auto)
262
262
| _ -> ()
263
263
end;
···
266
266
if name_lower = "input" then begin
267
267
if Attr_utils.has_attr "list" attrs then begin
268
268
let input_type = Attr_utils.get_attr_or "type" ~default:"text" attrs
269
-
|> String.trim |> String.lowercase_ascii in
269
+
|> String.trim |> Astring.String.Ascii.lowercase in
270
270
if not (List.mem input_type input_types_allowing_list) then
271
271
Message_collector.add_typed collector (`Input `List_not_allowed)
272
272
end
···
274
274
275
275
(* Validate data-* attributes *)
276
276
List.iter (fun (attr_name, _) ->
277
-
let attr_lower = String.lowercase_ascii attr_name in
277
+
let attr_lower = Astring.String.Ascii.lowercase attr_name in
278
278
(* Check if it starts with "data-" *)
279
279
if String.starts_with ~prefix:"data-" attr_lower then begin
280
280
let after_prefix = String.sub attr_lower 5 (String.length attr_lower - 5) in
···
297
297
(match lang_value with
298
298
| None ->
299
299
Message_collector.add_typed collector (`I18n `Xml_lang_without_lang)
300
-
| Some lang when String.lowercase_ascii lang <> String.lowercase_ascii xmllang ->
300
+
| Some lang when Astring.String.Ascii.lowercase lang <> Astring.String.Ascii.lowercase xmllang ->
301
301
Message_collector.add_typed collector (`I18n `Xml_lang_without_lang)
302
302
| _ -> ())
303
303
| None -> ()
···
305
305
306
306
(* Validate spellcheck attribute - must be "true" or "false" or empty *)
307
307
List.iter (fun (attr_name, attr_value) ->
308
-
let attr_lower = String.lowercase_ascii attr_name in
308
+
let attr_lower = Astring.String.Ascii.lowercase attr_name in
309
309
if attr_lower = "spellcheck" then begin
310
-
let value_lower = String.lowercase_ascii (String.trim attr_value) in
310
+
let value_lower = Astring.String.Ascii.lowercase (String.trim attr_value) in
311
311
if value_lower <> "" && value_lower <> "true" && value_lower <> "false" then
312
312
Message_collector.add_typed collector
313
313
(`Attr (`Bad_value (`Elem name, `Attr attr_name, `Value attr_value, `Reason "")))
···
317
317
(* Validate enterkeyhint attribute - must be one of specific values *)
318
318
let valid_enterkeyhint = ["enter"; "done"; "go"; "next"; "previous"; "search"; "send"] in
319
319
List.iter (fun (attr_name, attr_value) ->
320
-
let attr_lower = String.lowercase_ascii attr_name in
320
+
let attr_lower = Astring.String.Ascii.lowercase attr_name in
321
321
if attr_lower = "enterkeyhint" then begin
322
-
let value_lower = String.lowercase_ascii (String.trim attr_value) in
322
+
let value_lower = Astring.String.Ascii.lowercase (String.trim attr_value) in
323
323
if not (List.mem value_lower valid_enterkeyhint) then
324
324
Message_collector.add_typed collector
325
325
(`Attr (`Bad_value (`Elem name, `Attr attr_name, `Value attr_value, `Reason "")))
···
328
328
329
329
(* Validate headingoffset attribute - must be a number between 0 and 8 *)
330
330
List.iter (fun (attr_name, attr_value) ->
331
-
let attr_lower = String.lowercase_ascii attr_name in
331
+
let attr_lower = Astring.String.Ascii.lowercase attr_name in
332
332
if attr_lower = "headingoffset" then begin
333
333
let trimmed = String.trim attr_value in
334
334
let is_valid =
···
346
346
347
347
(* Validate accesskey attribute - each key label must be a single code point *)
348
348
List.iter (fun (attr_name, attr_value) ->
349
-
let attr_lower = String.lowercase_ascii attr_name in
349
+
let attr_lower = Astring.String.Ascii.lowercase attr_name in
350
350
if attr_lower = "accesskey" then begin
351
351
(* Split by whitespace to get key labels *)
352
352
let keys = String.split_on_char ' ' attr_value |>
···
418
418
let is_media_element = name_lower = "link" || name_lower = "style" || name_lower = "source" in
419
419
if is_media_element then begin
420
420
List.iter (fun (attr_name, attr_value) ->
421
-
let attr_lower = String.lowercase_ascii attr_name in
421
+
let attr_lower = Astring.String.Ascii.lowercase attr_name in
422
422
if attr_lower = "media" then begin
423
423
let trimmed = String.trim attr_value in
424
424
if trimmed <> "" then begin
···
436
436
437
437
(* Validate RDFa prefix attribute - space-separated list of prefix:iri pairs *)
438
438
List.iter (fun (attr_name, attr_value) ->
439
-
let attr_lower = String.lowercase_ascii attr_name in
439
+
let attr_lower = Astring.String.Ascii.lowercase attr_name in
440
440
if attr_lower = "prefix" then begin
441
441
(* Parse prefix attribute value - format: "prefix1: iri1 prefix2: iri2 ..." *)
442
442
let trimmed = String.trim attr_value in
+1
-1
lib/check/specialized/datetime_checker.ml
+1
-1
lib/check/specialized/datetime_checker.ml
···
451
451
if List.mem name datetime_elements then begin
452
452
(* Check for datetime attribute *)
453
453
let datetime_attr = List.find_map (fun (k, v) ->
454
-
if String.lowercase_ascii k = "datetime" then Some v else None
454
+
if Astring.String.Ascii.lowercase k = "datetime" then Some v else None
455
455
) element.raw_attrs in
456
456
match datetime_attr with
457
457
| None -> ()
+1
-1
lib/check/specialized/dl_checker.ml
+1
-1
lib/check/specialized/dl_checker.ml
···
106
106
(`Element (`Not_allowed_as_child (`Child "div", `Parent "dl")));
107
107
(match Attr.get_role element.attrs with
108
108
| Some role_value ->
109
-
let role_lower = String.lowercase_ascii (String.trim role_value) in
109
+
let role_lower = Astring.String.Ascii.lowercase (String.trim role_value) in
110
110
if role_lower <> "presentation" && role_lower <> "none" then
111
111
Message_collector.add_typed collector (`Li_role `Div_in_dl_bad_role)
112
112
| None -> ());
-37
lib/check/specialized/h1_checker.ml
-37
lib/check/specialized/h1_checker.ml
···
1
-
(** H1 element counter - warns about multiple h1 elements in a document. *)
2
-
3
-
type state = {
4
-
mutable h1_count : int;
5
-
mutable svg_depth : int; (* Track depth inside SVG *)
6
-
}
7
-
8
-
let create () = {
9
-
h1_count = 0;
10
-
svg_depth = 0;
11
-
}
12
-
13
-
let reset state =
14
-
state.h1_count <- 0;
15
-
state.svg_depth <- 0
16
-
17
-
let start_element state ~element collector =
18
-
(* Track SVG depth - h1 inside SVG (foreignObject, desc) shouldn't count *)
19
-
match element.Element.tag with
20
-
| Tag.Svg _ ->
21
-
state.svg_depth <- state.svg_depth + 1
22
-
| Tag.Html `H1 when state.svg_depth = 0 ->
23
-
state.h1_count <- state.h1_count + 1;
24
-
if state.h1_count > 1 then
25
-
Message_collector.add_typed collector (`Misc `Multiple_h1)
26
-
| Tag.Html _ when state.svg_depth = 0 ->
27
-
() (* Other HTML elements outside SVG *)
28
-
| _ ->
29
-
() (* Non-HTML or inside SVG *)
30
-
31
-
let end_element state ~tag _collector =
32
-
match tag with
33
-
| Tag.Svg _ when state.svg_depth > 0 ->
34
-
state.svg_depth <- state.svg_depth - 1
35
-
| _ -> ()
36
-
37
-
let checker = Checker.make ~create ~reset ~start_element ~end_element ()
-16
lib/check/specialized/h1_checker.mli
-16
lib/check/specialized/h1_checker.mli
···
1
-
(** H1 element counter checker.
2
-
3
-
This checker validates that documents don't have multiple h1 elements,
4
-
which can confuse document structure and accessibility tools.
5
-
6
-
{2 Validation Rules}
7
-
8
-
- Documents should have at most one [<h1>] element
9
-
- [<h1>] elements inside SVG content (foreignObject, desc) are not counted
10
-
11
-
{2 Error Messages}
12
-
13
-
- [Multiple_h1]: Document contains more than one h1 element *)
14
-
15
-
val checker : Checker.t
16
-
(** The H1 checker instance. *)
+19
-107
lib/check/specialized/heading_checker.ml
+19
-107
lib/check/specialized/heading_checker.ml
···
1
1
(** Heading structure validation checker.
2
2
3
3
This checker validates that:
4
-
- Heading levels don't skip (e.g., h1 to h3)
5
-
- Documents have at least one heading
6
-
- Multiple h1 usage is noted
7
-
- Headings are not empty *)
4
+
- Multiple h1 usage is reported as an error
5
+
6
+
Note: Additional accessibility checks (first heading should be h1, skipped
7
+
levels, empty headings) are intentionally not included as errors since they
8
+
are recommendations rather than HTML5 spec requirements. *)
8
9
9
10
(** Checker state tracking heading structure. *)
10
11
type state = {
11
-
mutable current_level : int option;
12
12
mutable h1_count : int;
13
-
mutable has_any_heading : bool;
14
-
mutable first_heading_checked : bool;
15
-
mutable in_heading : Tag.html_tag option;
16
-
mutable heading_has_text : bool;
17
13
mutable svg_depth : int; (* Track depth inside SVG - headings in SVG don't count *)
18
14
}
19
15
20
-
let create () =
21
-
{
22
-
current_level = None;
23
-
h1_count = 0;
24
-
has_any_heading = false;
25
-
first_heading_checked = false;
26
-
in_heading = None;
27
-
heading_has_text = false;
28
-
svg_depth = 0;
29
-
}
16
+
let create () = {
17
+
h1_count = 0;
18
+
svg_depth = 0;
19
+
}
30
20
31
21
let reset state =
32
-
state.current_level <- None;
33
22
state.h1_count <- 0;
34
-
state.has_any_heading <- false;
35
-
state.first_heading_checked <- false;
36
-
state.in_heading <- None;
37
-
state.heading_has_text <- false;
38
23
state.svg_depth <- 0
39
24
40
-
(** Check if text is effectively empty (only whitespace). *)
41
-
let is_empty_text text =
42
-
let rec check i =
43
-
if i >= String.length text then
44
-
true
45
-
else
46
-
match text.[i] with
47
-
| ' ' | '\t' | '\n' | '\r' -> check (i + 1)
48
-
| _ -> false
49
-
in
50
-
check 0
51
-
52
25
let start_element state ~element collector =
53
26
match element.Element.tag with
54
27
| Tag.Svg _ ->
55
-
(* Track SVG depth - headings inside SVG (foreignObject, desc) don't count *)
28
+
(* Track SVG depth - h1 inside SVG (foreignObject, desc) shouldn't count *)
56
29
state.svg_depth <- state.svg_depth + 1
57
-
| Tag.Html (#Tag.heading_tag as h) when state.svg_depth = 0 ->
58
-
let level = match Tag.heading_level h with Some l -> l | None -> 0 in
59
-
let name = Tag.html_tag_to_string h in
60
-
state.has_any_heading <- true;
61
-
62
-
(* Check if this is the first heading *)
63
-
if not state.first_heading_checked then begin
64
-
state.first_heading_checked <- true;
65
-
if level <> 1 then
66
-
Message_collector.add_typed collector
67
-
(`Generic (Printf.sprintf
68
-
"First heading in document is <%s>, should typically be <h1>" name))
69
-
end;
70
-
71
-
(* Track h1 count *)
72
-
if level = 1 then begin
73
-
state.h1_count <- state.h1_count + 1;
74
-
if state.h1_count > 1 then
75
-
Message_collector.add_typed collector (`Misc `Multiple_h1)
76
-
end;
77
-
78
-
(* Check for skipped levels *)
79
-
begin match state.current_level with
80
-
| None ->
81
-
state.current_level <- Some level
82
-
| Some prev_level ->
83
-
let diff = level - prev_level in
84
-
if diff > 1 then
85
-
Message_collector.add_typed collector
86
-
(`Generic (Printf.sprintf
87
-
"Heading level skipped: <%s> follows <h%d>, skipping %d level%s. This can confuse screen reader users"
88
-
name prev_level (diff - 1) (if diff > 2 then "s" else "")));
89
-
state.current_level <- Some level
90
-
end;
91
-
92
-
(* Track that we're in a heading to check for empty content *)
93
-
state.in_heading <- Some h;
94
-
state.heading_has_text <- false
30
+
| Tag.Html `H1 when state.svg_depth = 0 ->
31
+
state.h1_count <- state.h1_count + 1;
32
+
if state.h1_count > 1 then
33
+
Message_collector.add_typed collector (`Misc `Multiple_h1)
95
34
| _ -> ()
96
35
97
-
let end_element state ~tag collector =
98
-
(* Track SVG depth *)
99
-
(match tag with
100
-
| Tag.Svg _ when state.svg_depth > 0 ->
101
-
state.svg_depth <- state.svg_depth - 1
102
-
| _ -> ());
103
-
(* Check for empty headings *)
104
-
match state.in_heading, tag with
105
-
| Some h, Tag.Html h2 when h = h2 ->
106
-
if not state.heading_has_text then
107
-
Message_collector.add_typed collector
108
-
(`Generic (Printf.sprintf
109
-
"Heading <%s> is empty or contains only whitespace. Empty headings are problematic for screen readers"
110
-
(Tag.html_tag_to_string h)));
111
-
state.in_heading <- None;
112
-
state.heading_has_text <- false
36
+
let end_element state ~tag _collector =
37
+
match tag with
38
+
| Tag.Svg _ when state.svg_depth > 0 ->
39
+
state.svg_depth <- state.svg_depth - 1
113
40
| _ -> ()
114
41
115
-
let characters state text _collector =
116
-
(* If we're inside a heading, check if this text is non-whitespace *)
117
-
match state.in_heading with
118
-
| Some _ ->
119
-
if not (is_empty_text text) then
120
-
state.heading_has_text <- true
121
-
| None ->
122
-
()
123
-
124
-
let end_document state collector =
125
-
if not state.has_any_heading then
126
-
Message_collector.add_typed collector
127
-
(`Generic "Document contains no heading elements (h1-h6). Headings provide important document structure for accessibility")
128
-
129
-
let checker = Checker.make ~create ~reset ~start_element ~end_element
130
-
~characters ~end_document ()
42
+
let checker = Checker.make ~create ~reset ~start_element ~end_element ()
+2
-2
lib/check/specialized/importmap_checker.ml
+2
-2
lib/check/specialized/importmap_checker.ml
···
270
270
| Tag.Html `Script ->
271
271
(* Check if type="importmap" *)
272
272
let type_attr = List.find_opt (fun (n, _) ->
273
-
String.lowercase_ascii n = "type"
273
+
Astring.String.Ascii.lowercase n = "type"
274
274
) element.raw_attrs in
275
275
(match type_attr with
276
-
| Some (_, v) when String.lowercase_ascii v = "importmap" ->
276
+
| Some (_, v) when Astring.String.Ascii.lowercase v = "importmap" ->
277
277
state.in_importmap <- true;
278
278
Buffer.clear state.content
279
279
| _ -> ())
+1
-1
lib/check/specialized/label_checker.ml
+1
-1
lib/check/specialized/label_checker.ml
···
65
65
| _ -> ())
66
66
67
67
| Tag.Html tag ->
68
-
let name_lower = String.lowercase_ascii (Tag.tag_to_string (Tag.Html tag)) in
68
+
let name_lower = Astring.String.Ascii.lowercase (Tag.tag_to_string (Tag.Html tag)) in
69
69
70
70
(* Track labelable element IDs *)
71
71
(if is_labelable name_lower then
+1
-1
lib/check/specialized/language_checker.ml
+1
-1
lib/check/specialized/language_checker.ml
···
27
27
28
28
(** Check if a language tag contains deprecated subtags. *)
29
29
let check_deprecated_tag value =
30
-
let lower = String.lowercase_ascii value in
30
+
let lower = Astring.String.Ascii.lowercase value in
31
31
let subtags = String.split_on_char '-' lower in
32
32
match subtags with
33
33
| [] -> None
+3
-6
lib/check/specialized/mime_type_checker.ml
+3
-6
lib/check/specialized/mime_type_checker.ml
···
153
153
let create () = ()
154
154
let reset _state = ()
155
155
156
-
let get_attr_value name attrs =
157
-
List.find_map (fun (k, v) ->
158
-
if String.lowercase_ascii k = String.lowercase_ascii name then Some v else None
159
-
) attrs
156
+
let get_attr_value = Attr_utils.get_attr
160
157
161
158
let start_element _state ~element collector =
162
159
match element.Element.tag with
163
160
| Tag.Html tag ->
164
161
let name = Tag.html_tag_to_string tag in
165
-
let name_lower = String.lowercase_ascii name in
162
+
let name_lower = Astring.String.Ascii.lowercase name in
166
163
(match List.assoc_opt name_lower mime_type_attrs with
167
164
| None -> ()
168
165
| Some type_attrs ->
···
174
171
if value = "" then ()
175
172
else if name_lower = "script" then
176
173
(* script type can be module, importmap, etc. - skip validation for non-MIME types *)
177
-
let value_lower = String.lowercase_ascii value in
174
+
let value_lower = Astring.String.Ascii.lowercase value in
178
175
if value_lower = "module" || value_lower = "importmap" ||
179
176
not (String.contains value '/') then ()
180
177
else
+20
-6
lib/check/specialized/normalization_checker.ml
+20
-6
lib/check/specialized/normalization_checker.ml
···
2
2
3
3
Validates that text content is in Unicode Normalization Form C (NFC). *)
4
4
5
-
type state = unit [@@warning "-34"]
5
+
type state = {
6
+
mutable in_raw_text : int; (** Depth inside style/script elements *)
7
+
}
6
8
7
-
let create () = ()
8
-
let reset _state = ()
9
+
let create () = { in_raw_text = 0 }
10
+
let reset state = state.in_raw_text <- 0
11
+
12
+
(** Elements whose text content is raw text and should be skipped *)
13
+
let is_raw_text_element name =
14
+
name = "style" || name = "script" || name = "xmp" || name = "textarea"
9
15
10
16
(** Normalize a string to NFC form using uunf. *)
11
17
let normalize_nfc text =
···
40
46
if end_pos = len then s
41
47
else String.sub s 0 end_pos
42
48
43
-
let start_element _state ~element:_ _collector = ()
49
+
let start_element state ~element _collector =
50
+
let name = Tag.tag_to_string element.Element.tag in
51
+
if is_raw_text_element name then
52
+
state.in_raw_text <- state.in_raw_text + 1
44
53
45
-
let end_element _state ~tag:_ _collector = ()
54
+
let end_element state ~tag _collector =
55
+
let name = Tag.tag_to_string tag in
56
+
if is_raw_text_element name && state.in_raw_text > 0 then
57
+
state.in_raw_text <- state.in_raw_text - 1
46
58
47
-
let characters _state text collector =
59
+
let characters state text collector =
60
+
(* Skip text inside raw text elements like style/script *)
61
+
if state.in_raw_text > 0 then () else
48
62
(* Skip empty text or whitespace-only text *)
49
63
let text_trimmed = String.trim text in
50
64
if String.length text_trimmed = 0 then ()
+2
-2
lib/check/specialized/picture_checker.ml
+2
-2
lib/check/specialized/picture_checker.ml
···
133
133
let media_value = Attr_utils.get_attr "media" attrs in
134
134
let has_type = Attr_utils.has_attr "type" attrs in
135
135
let is_media_all = match media_value with
136
-
| Some v -> String.lowercase_ascii (String.trim v) = "all"
136
+
| Some v -> Astring.String.Ascii.lowercase (String.trim v) = "all"
137
137
| None -> false in
138
138
let is_media_empty = match media_value with
139
139
| Some v -> String.trim v = ""
···
142
142
| None -> not has_type
143
143
| Some v ->
144
144
let trimmed = String.trim v in
145
-
trimmed = "" || String.lowercase_ascii trimmed = "all"
145
+
trimmed = "" || Astring.String.Ascii.lowercase trimmed = "all"
146
146
in
147
147
if is_always_matching then begin
148
148
state.has_always_matching_source <- true;
+12
-12
lib/check/specialized/srcset_sizes_checker.ml
+12
-12
lib/check/specialized/srcset_sizes_checker.ml
···
153
153
154
154
(** Check if scientific notation has invalid exponent (like 1e+1.5 - decimal in exponent) *)
155
155
let has_invalid_scientific_notation s =
156
-
let lower = String.lowercase_ascii s in
156
+
let lower = Astring.String.Ascii.lowercase s in
157
157
(* Find 'e' for scientific notation *)
158
158
match String.index_opt lower 'e' with
159
159
| None -> false
···
176
176
(* Check for % at the end *)
177
177
else if trimmed.[len - 1] = '%' then "%"
178
178
else begin
179
-
let lower = String.lowercase_ascii trimmed in
179
+
let lower = Astring.String.Ascii.lowercase trimmed in
180
180
(* Try to find a unit at the end (letters only) *)
181
181
let rec find_unit_length i =
182
182
if i < 0 then 0
···
205
205
if has_invalid_scientific_notation value_no_comments then BadScientificNotation
206
206
(* "auto" is only valid with lazy loading, which requires checking the element context.
207
207
For general validation, treat "auto" alone as invalid in sizes. *)
208
-
else if String.lowercase_ascii value_no_comments = "auto" then
208
+
else if Astring.String.Ascii.lowercase value_no_comments = "auto" then
209
209
BadCssNumber (value_no_comments.[0], trimmed)
210
210
else if value_no_comments = "" then InvalidUnit ("", trimmed)
211
211
else begin
212
-
let lower = String.lowercase_ascii value_no_comments in
212
+
let lower = Astring.String.Ascii.lowercase value_no_comments in
213
213
(* Check for calc() or other CSS functions first - these are always valid *)
214
214
if String.contains value_no_comments '(' then Valid
215
215
else begin
···
310
310
Some "Bad media condition: Parse Error"
311
311
end else begin
312
312
(* Check for bare "all" which is invalid *)
313
-
let lower = String.lowercase_ascii trimmed in
313
+
let lower = Astring.String.Ascii.lowercase trimmed in
314
314
let parts = String.split_on_char ' ' lower |> List.filter (fun s -> s <> "") in
315
315
match parts with
316
316
| keyword :: _ when keyword = "all" ->
···
358
358
end
359
359
else begin
360
360
(* Check if remaining starts with "and", "or", "not" followed by space or paren *)
361
-
let lower_remaining = String.lowercase_ascii remaining in
361
+
let lower_remaining = Astring.String.Ascii.lowercase remaining in
362
362
if remaining_len >= 4 && String.sub lower_remaining 0 4 = "and " then
363
363
skip_media_condition (i + (len - i) - remaining_len + 4)
364
364
else if remaining_len >= 3 && String.sub lower_remaining 0 3 = "or " then
···
577
577
578
578
(** Validate srcset descriptor *)
579
579
let validate_srcset_descriptor desc element_name srcset_value has_sizes collector =
580
-
let desc_lower = String.lowercase_ascii (String.trim desc) in
580
+
let desc_lower = Astring.String.Ascii.lowercase (String.trim desc) in
581
581
if String.length desc_lower = 0 then true
582
582
else begin
583
583
let last_char = desc_lower.[String.length desc_lower - 1] in
···
723
723
724
724
(** Normalize descriptor for duplicate detection (e.g., 1x = 1.0x) *)
725
725
let normalize_descriptor desc =
726
-
let desc_lower = String.lowercase_ascii (String.trim desc) in
726
+
let desc_lower = Astring.String.Ascii.lowercase (String.trim desc) in
727
727
if String.length desc_lower = 0 then desc_lower
728
728
else
729
729
let last_char = desc_lower.[String.length desc_lower - 1] in
···
793
793
(* Special schemes that require host/content after :// *)
794
794
let special_schemes = ["http"; "https"; "ftp"; "ws"; "wss"] in
795
795
(* Check for scheme-only URL like "http:" *)
796
-
let url_lower = String.lowercase_ascii url in
796
+
let url_lower = Astring.String.Ascii.lowercase url in
797
797
List.iter (fun scheme ->
798
798
let scheme_colon = scheme ^ ":" in
799
799
if url_lower = scheme_colon then
···
824
824
(`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: Expected single descriptor but found extraneous descriptor %s at %s." (q value) (q "srcset") (q element_name) (q extra_desc) (q value)))))
825
825
end;
826
826
827
-
let desc_lower = String.lowercase_ascii (String.trim desc) in
827
+
let desc_lower = Astring.String.Ascii.lowercase (String.trim desc) in
828
828
if String.length desc_lower > 0 then begin
829
829
let last_char = desc_lower.[String.length desc_lower - 1] in
830
830
if last_char = 'w' then has_w_descriptor := true
···
872
872
begin match Hashtbl.find_opt seen_descriptors normalized with
873
873
| Some first_url ->
874
874
Message_collector.add_typed collector
875
-
(`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: %s for image %s is identical to %s for image %s." (q value) (q "srcset") (q element_name) dup_type (q url) (String.lowercase_ascii dup_type) (q first_url)))))
875
+
(`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: %s for image %s is identical to %s for image %s." (q value) (q "srcset") (q element_name) dup_type (q url) (Astring.String.Ascii.lowercase dup_type) (q first_url)))))
876
876
| None ->
877
877
begin match (if is_1x then Hashtbl.find_opt seen_descriptors "implicit-1x" else None) with
878
878
| Some first_url ->
879
879
(* Explicit 1x conflicts with implicit 1x *)
880
880
Message_collector.add_typed collector
881
-
(`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: %s for image %s is identical to %s for image %s." (q value) (q "srcset") (q element_name) dup_type (q url) (String.lowercase_ascii dup_type) (q first_url)))))
881
+
(`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: %s for image %s is identical to %s for image %s." (q value) (q "srcset") (q element_name) dup_type (q url) (Astring.String.Ascii.lowercase dup_type) (q first_url)))))
882
882
| None ->
883
883
Hashtbl.add seen_descriptors normalized url;
884
884
if is_1x then Hashtbl.add seen_descriptors "explicit-1x" url
+11
-11
lib/check/specialized/svg_checker.ml
+11
-11
lib/check/specialized/svg_checker.ml
···
228
228
]
229
229
230
230
(* Required attributes for certain elements *)
231
+
(* Note: SVG rect does NOT require width/height - they default to 0 *)
231
232
let required_attrs = [
232
233
("feConvolveMatrix", ["order"]);
233
-
("rect", ["width"; "height"]);
234
234
("font", ["horiz-adv-x"]);
235
235
]
236
236
···
260
260
261
261
(* Check if attribute name matches a pattern like "data-*" or "aria-*" - case insensitive *)
262
262
let matches_pattern attr pattern =
263
-
let attr_lower = String.lowercase_ascii attr in
264
-
let pattern_lower = String.lowercase_ascii pattern in
263
+
let attr_lower = Astring.String.Ascii.lowercase attr in
264
+
let pattern_lower = Astring.String.Ascii.lowercase pattern in
265
265
if String.ends_with ~suffix:"-*" pattern_lower then
266
266
let prefix = String.sub pattern_lower 0 (String.length pattern_lower - 1) in
267
267
String.starts_with ~prefix attr_lower
···
361
361
state.in_svg <- true;
362
362
363
363
if is_svg_element || state.in_svg then begin
364
-
let name_lower = String.lowercase_ascii name in
364
+
let name_lower = Astring.String.Ascii.lowercase name in
365
365
366
366
(* Check SVG content model rules *)
367
367
(* 1. Check if child is allowed in SVG <a> *)
368
368
(match state.element_stack with
369
-
| parent :: _ when String.lowercase_ascii parent = "a" ->
369
+
| parent :: _ when Astring.String.Ascii.lowercase parent = "a" ->
370
370
if List.mem name_lower a_disallowed_children then
371
371
Message_collector.add_typed collector
372
372
(`Element (`Not_allowed_as_child (`Child name, `Parent "a")))
···
382
382
(* 2.5 Check stop element is only in linearGradient or radialGradient *)
383
383
if name_lower = "stop" then begin
384
384
match state.element_stack with
385
-
| parent :: _ when (let p = String.lowercase_ascii parent in
385
+
| parent :: _ when (let p = Astring.String.Ascii.lowercase parent in
386
386
p = "lineargradient" || p = "radialgradient") -> ()
387
387
| parent :: _ ->
388
388
Message_collector.add_typed collector
···
393
393
(* 2.6 Check use element is not nested inside another use element *)
394
394
if name_lower = "use" then begin
395
395
match state.element_stack with
396
-
| parent :: _ when String.lowercase_ascii parent = "use" ->
396
+
| parent :: _ when Astring.String.Ascii.lowercase parent = "use" ->
397
397
Message_collector.add_typed collector
398
398
(`Element (`Not_allowed_as_child (`Child name, `Parent parent)))
399
399
| _ -> ()
···
401
401
402
402
(* 3. Check duplicate feFunc* in feComponentTransfer *)
403
403
(match state.element_stack with
404
-
| parent :: _ when String.lowercase_ascii parent = "fecomponenttransfer" ->
404
+
| parent :: _ when Astring.String.Ascii.lowercase parent = "fecomponenttransfer" ->
405
405
if List.mem name_lower ["fefuncr"; "fefuncg"; "fefuncb"; "fefunca"] then begin
406
406
match state.fecomponenttransfer_stack with
407
407
| fect :: _ ->
···
435
435
436
436
(* Check each attribute *)
437
437
List.iter (fun (attr, value) ->
438
-
let attr_lower = String.lowercase_ascii attr in
438
+
let attr_lower = Astring.String.Ascii.lowercase attr in
439
439
440
440
(* Validate xmlns attributes *)
441
441
if String.starts_with ~prefix:"xmlns" attr_lower then
···
457
457
(match List.assoc_opt name_lower required_attrs with
458
458
| Some req_attrs ->
459
459
List.iter (fun req_attr ->
460
-
if not (List.exists (fun (a, _) -> String.lowercase_ascii a = req_attr) attrs) then
460
+
if not (Attr_utils.has_attr req_attr attrs) then
461
461
Message_collector.add_typed collector
462
462
(`Svg (`Missing_attr (`Elem name_lower, `Attr req_attr)))
463
463
) req_attrs
···
469
469
let name = Tag.tag_to_string tag in
470
470
471
471
if is_svg_element || state.in_svg then begin
472
-
let name_lower = String.lowercase_ascii name in
472
+
let name_lower = Astring.String.Ascii.lowercase name in
473
473
474
474
(* Check required children when closing font element *)
475
475
if name_lower = "font" then begin
+5
-5
lib/check/specialized/table_checker.ml
+5
-5
lib/check/specialized/table_checker.ml
···
354
354
355
355
(** Parse a non-negative integer attribute, returning 1 if absent or invalid *)
356
356
let parse_non_negative_int attrs name =
357
-
match List.assoc_opt name attrs with
357
+
match Attr_utils.get_attr name attrs with
358
358
| None -> 1
359
359
| Some v -> (
360
360
try
···
364
364
365
365
(** Parse a positive integer attribute, returning 1 if absent or invalid *)
366
366
let parse_positive_int attrs name =
367
-
match List.assoc_opt name attrs with
367
+
match Attr_utils.get_attr name attrs with
368
368
| None -> 1
369
369
| Some v -> (
370
370
try
···
374
374
375
375
(** Parse the headers attribute into a list of IDs *)
376
376
let parse_headers attrs =
377
-
match List.assoc_opt "headers" attrs with
377
+
match Attr_utils.get_attr "headers" attrs with
378
378
| None -> []
379
379
| Some v ->
380
380
let parts = String.split_on_char ' ' v in
···
523
523
table.state <- InCellInRowGroup;
524
524
(* Record header ID if present *)
525
525
if is_header then (
526
-
match List.assoc_opt "id" attrs with
526
+
match Attr_utils.get_attr "id" attrs with
527
527
| Some id when String.length id > 0 -> Hashtbl.replace table.header_ids id ()
528
528
| _ -> ());
529
529
(* Parse cell attributes *)
···
541
541
table.state <- InCellInImplicitRowGroup;
542
542
(* Same logic as above *)
543
543
if is_header then (
544
-
match List.assoc_opt "id" attrs with
544
+
match Attr_utils.get_attr "id" attrs with
545
545
| Some id when String.length id > 0 -> Hashtbl.replace table.header_ids id ()
546
546
| _ -> ());
547
547
let colspan = abs (parse_positive_int attrs "colspan") in
+19
-3
lib/check/specialized/title_checker.ml
+19
-3
lib/check/specialized/title_checker.ml
···
1
1
(** Title element validation checker. *)
2
2
3
3
type state = {
4
+
mutable seen_html : bool; (* true if we've seen html element (full document mode) *)
4
5
mutable in_head : bool;
6
+
mutable head_had_children : bool; (* true if head contained any child elements *)
5
7
mutable has_title : bool;
6
8
mutable in_title : bool;
7
9
mutable title_has_content : bool;
···
9
11
}
10
12
11
13
let create () = {
14
+
seen_html = false;
12
15
in_head = false;
16
+
head_had_children = false;
13
17
has_title = false;
14
18
in_title = false;
15
19
title_has_content = false;
···
17
21
}
18
22
19
23
let reset state =
24
+
state.seen_html <- false;
20
25
state.in_head <- false;
26
+
state.head_had_children <- false;
21
27
state.has_title <- false;
22
28
state.in_title <- false;
23
29
state.title_has_content <- false;
···
25
31
26
32
let start_element state ~element _collector =
27
33
(match element.Element.tag with
28
-
| Tag.Html `Html -> ()
34
+
| Tag.Html `Html ->
35
+
state.seen_html <- true
29
36
| Tag.Html `Head ->
30
-
state.in_head <- true
37
+
state.in_head <- true;
38
+
state.head_had_children <- false
31
39
| Tag.Html `Title when state.in_head ->
40
+
state.head_had_children <- true;
32
41
state.has_title <- true;
33
42
state.in_title <- true;
34
43
state.title_has_content <- false;
35
44
state.title_depth <- 0
45
+
| _ when state.in_head ->
46
+
(* Any element inside head means head had children *)
47
+
state.head_had_children <- true
36
48
| _ -> ());
37
49
if state.in_title then
38
50
state.title_depth <- state.title_depth + 1
···
47
59
(`Element (`Must_not_be_empty (`Elem "title")));
48
60
state.in_title <- false
49
61
| Tag.Html `Head ->
50
-
if state.in_head && not state.has_title then
62
+
(* Report missing title if:
63
+
- We saw an html element (full document mode), OR
64
+
- Head had explicit children (was not just an implicit empty head)
65
+
An empty head without html element was likely implicit (fragment validation). *)
66
+
if state.in_head && not state.has_title && (state.seen_html || state.head_had_children) then
51
67
Message_collector.add_typed collector
52
68
(`Element (`Missing_child (`Parent "head", `Child "title")));
53
69
state.in_head <- false
+1
-1
lib/check/specialized/unknown_element_checker.ml
+1
-1
lib/check/specialized/unknown_element_checker.ml
···
31
31
state.stack <- name :: state.stack
32
32
33
33
| Tag.Html tag ->
34
-
let name_lower = String.lowercase_ascii (Tag.tag_to_string (Tag.Html tag)) in
34
+
let name_lower = Astring.String.Ascii.lowercase (Tag.tag_to_string (Tag.Html tag)) in
35
35
state.stack <- name_lower :: state.stack
36
36
37
37
| _ -> () (* SVG, MathML, Custom elements are allowed *)
+24
-25
lib/check/specialized/url_checker.ml
+24
-25
lib/check/specialized/url_checker.ml
···
67
67
68
68
(** Check if pipe is allowed in this host context. *)
69
69
let is_pipe_allowed_in_host url host =
70
-
let scheme = try String.lowercase_ascii (String.sub url 0 (String.index url ':')) with _ -> "" in
70
+
let scheme = try Astring.String.Ascii.lowercase (String.sub url 0 (String.index url ':')) with _ -> "" in
71
71
scheme = "file" && is_valid_windows_drive host
72
72
73
73
(** Special schemes that require double slash (//).
···
95
95
(c >= '0' && c <= '9') || c = '+' || c = '-' || c = '.'
96
96
) potential_scheme in
97
97
if is_valid_scheme then
98
-
Some (String.lowercase_ascii potential_scheme)
98
+
Some (Astring.String.Ascii.lowercase potential_scheme)
99
99
else
100
100
None
101
101
with Not_found -> None
···
104
104
let extract_host_and_port url =
105
105
try
106
106
let double_slash =
107
-
try Some (Str.search_forward (Str.regexp "://") url 0 + 3)
108
-
with Not_found -> None
107
+
match Astring.String.find_sub ~sub:"://" url with
108
+
| Some pos -> Some (pos + 3)
109
+
| None -> None
109
110
in
110
111
match double_slash with
111
112
| None -> (None, None)
···
250
251
(* Check for ASCII percent *)
251
252
String.contains s '%' ||
252
253
(* Check for fullwidth percent (U+FF05 = 0xEF 0xBC 0x85 in UTF-8) *)
253
-
try
254
-
let _ = Str.search_forward (Str.regexp "\xef\xbc\x85") s 0 in
255
-
true
256
-
with Not_found -> false
254
+
Astring.String.is_infix ~affix:"\xef\xbc\x85" s
257
255
258
256
(** Check if decoded host contains forbidden characters.
259
257
Some URLs have percent-encoded fullwidth characters that decode to forbidden chars. *)
···
424
422
let check_path_segment url attr_name element_name =
425
423
(* Extract path: everything after authority (or after scheme: for non-authority URLs) *)
426
424
let raw_path =
427
-
try
428
-
let double_slash = Str.search_forward (Str.regexp "://") url 0 in
425
+
match Astring.String.find_sub ~sub:"://" url with
426
+
| Some double_slash ->
429
427
let after_auth_start = double_slash + 3 in
430
428
let rest = String.sub url after_auth_start (String.length url - after_auth_start) in
431
429
(* Find end of authority *)
···
437
435
String.sub rest path_start (String.length rest - path_start)
438
436
else
439
437
""
440
-
with Not_found ->
438
+
| None ->
441
439
(* No double slash - check for single slash path *)
442
-
match extract_scheme url with
440
+
(match extract_scheme url with
443
441
| Some _ ->
444
-
let colon_pos = String.index url ':' in
445
-
let after_colon = String.sub url (colon_pos + 1) (String.length url - colon_pos - 1) in
446
-
after_colon
442
+
(try
443
+
let colon_pos = String.index url ':' in
444
+
String.sub url (colon_pos + 1) (String.length url - colon_pos - 1)
445
+
with Not_found -> url)
447
446
| None ->
448
447
(* Relative URL - the whole thing is the path *)
449
-
url
448
+
url)
450
449
in
451
450
(* Remove query and fragment for path-specific checks *)
452
451
let path = remove_query_fragment raw_path in
···
546
545
547
546
(** Check for illegal characters in userinfo (user:password). *)
548
547
let check_userinfo url attr_name element_name =
548
+
match Astring.String.find_sub ~sub:"://" url with
549
+
| None -> None
550
+
| Some pos ->
549
551
try
550
552
(* Look for :// then find the LAST @ before the next / or end *)
551
-
let double_slash = Str.search_forward (Str.regexp "://") url 0 + 3 in
553
+
let double_slash = pos + 3 in
552
554
let rest = String.sub url double_slash (String.length url - double_slash) in
553
555
(* Find first / or ? or # to limit authority section *)
554
556
let auth_end =
···
633
635
let url = String.trim url in
634
636
(* Empty URL check for certain attributes *)
635
637
if url = "" then begin
636
-
let name_lower = String.lowercase_ascii element_name in
637
-
let attr_lower = String.lowercase_ascii attr_name in
638
+
let name_lower = Astring.String.Ascii.lowercase element_name in
639
+
let attr_lower = Astring.String.Ascii.lowercase attr_name in
638
640
if List.mem attr_lower must_be_non_empty ||
639
641
List.mem (name_lower, attr_lower) must_be_non_empty_combinations then
640
642
Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Must be non-empty."
···
739
741
let reset _state = ()
740
742
741
743
(** Get attribute value by name. *)
742
-
let get_attr_value name attrs =
743
-
List.find_map (fun (k, v) ->
744
-
if String.lowercase_ascii k = String.lowercase_ascii name then Some v else None
745
-
) attrs
744
+
let get_attr_value = Attr_utils.get_attr
746
745
747
746
let start_element _state ~element collector =
748
747
match element.Element.tag with
749
748
| Tag.Html _ ->
750
749
let name = Tag.tag_to_string element.tag in
751
-
let name_lower = String.lowercase_ascii name in
750
+
let name_lower = Astring.String.Ascii.lowercase name in
752
751
let attrs = element.raw_attrs in
753
752
(* Check URL attributes for elements that have them *)
754
753
(match List.assoc_opt name_lower url_attributes with
···
794
793
match validate_url url name "value" with
795
794
| None -> ()
796
795
| Some error_msg ->
797
-
let error_msg = Str.global_replace (Str.regexp "Bad URL:") "Bad absolute URL:" error_msg in
796
+
let error_msg = Astring.String.concat ~sep:"Bad absolute URL:" (Astring.String.cuts ~sep:"Bad URL:" error_msg) in
798
797
Message_collector.add_typed collector (`Attr (`Bad_value_generic (`Message error_msg)))
799
798
end
800
799
end
+5
-5
lib/check/specialized/xhtml_content_checker.ml
+5
-5
lib/check/specialized/xhtml_content_checker.ml
···
54
54
55
55
let start_element state ~element collector =
56
56
let name = Tag.tag_to_string element.Element.tag in
57
-
let name_lower = String.lowercase_ascii name in
57
+
let name_lower = Astring.String.Ascii.lowercase name in
58
58
let attrs = element.raw_attrs in
59
59
60
60
(* Check data-* attributes for uppercase *)
···
63
63
(* Check if this element is allowed as child of parent *)
64
64
(match state.element_stack with
65
65
| parent :: _ ->
66
-
let parent_lower = String.lowercase_ascii parent in
66
+
let parent_lower = Astring.String.Ascii.lowercase parent in
67
67
if not (is_child_allowed ~parent:parent_lower ~child:name_lower) then
68
68
Message_collector.add_typed collector
69
69
(`Element (`Not_allowed_as_child (`Child name_lower, `Parent parent_lower)))
···
71
71
72
72
(* Handle figure content model *)
73
73
(match state.element_stack with
74
-
| parent :: _ when String.lowercase_ascii parent = "figure" ->
74
+
| parent :: _ when Astring.String.Ascii.lowercase parent = "figure" ->
75
75
(* We're inside a figure, check content model *)
76
76
(match state.figure_stack with
77
77
| fig :: _ ->
···
99
99
state.element_stack <- name :: state.element_stack
100
100
101
101
let end_element state ~tag _collector =
102
-
let name_lower = String.lowercase_ascii (Tag.tag_to_string tag) in
102
+
let name_lower = Astring.String.Ascii.lowercase (Tag.tag_to_string tag) in
103
103
(* Pop figure state if leaving a figure *)
104
104
if name_lower = "figure" then begin
105
105
match state.figure_stack with
···
115
115
match state.element_stack with
116
116
| [] -> ()
117
117
| parent :: _ ->
118
-
let parent_lower = String.lowercase_ascii parent in
118
+
let parent_lower = Astring.String.Ascii.lowercase parent in
119
119
let trimmed = String.trim text in
120
120
if trimmed <> "" then begin
121
121
if parent_lower = "figure" then begin
+4
-4
lib/html5rw/parser/parser.mli
+4
-4
lib/html5rw/parser/parser.mli
···
359
359
(** Result of parsing an HTML document or fragment.
360
360
361
361
This opaque type contains:
362
-
- The DOM tree (access via {!root})
363
-
- Parse errors if collection was enabled (access via {!errors})
364
-
- Detected encoding for byte input (access via {!encoding})
362
+
- The DOM tree (access via {!val:root})
363
+
- Parse errors if collection was enabled (access via {!val:errors})
364
+
- Detected encoding for byte input (access via {!val:encoding})
365
365
*)
366
366
type t
367
367
···
416
416
3. {b Transport hint}: Use [transport_encoding] if provided
417
417
4. {b Fallback}: Use UTF-8
418
418
419
-
The detected encoding is stored in the result (access via {!encoding}).
419
+
The detected encoding is stored in the result (access via {!val:encoding}).
420
420
421
421
{b Prescan details:}
422
422
+99
lib/js/dune
+99
lib/js/dune
···
1
+
; HTML5rw JavaScript Validator Library
2
+
; Compiled with js_of_ocaml for browser use
3
+
4
+
(library
5
+
(name htmlrw_js)
6
+
(public_name html5rw.js)
7
+
(libraries
8
+
html5rw
9
+
htmlrw_check
10
+
bytesrw
11
+
brr)
12
+
(modes byte) ; js_of_ocaml requires bytecode
13
+
(modules
14
+
htmlrw_js_types
15
+
htmlrw_js_dom
16
+
htmlrw_js_annotate
17
+
htmlrw_js_ui
18
+
htmlrw_js))
19
+
20
+
; Standalone JavaScript file for direct browser use
21
+
; This compiles the library entry point to a .js file
22
+
(executable
23
+
(name htmlrw_js_main)
24
+
(libraries htmlrw_js)
25
+
(js_of_ocaml
26
+
(javascript_files))
27
+
(modes js wasm)
28
+
(modules htmlrw_js_main))
29
+
30
+
; Web Worker for background validation
31
+
; Runs validation in a separate thread to avoid blocking the UI
32
+
(executable
33
+
(name htmlrw_js_worker)
34
+
(libraries html5rw htmlrw_check bytesrw brr)
35
+
(js_of_ocaml
36
+
(javascript_files))
37
+
(modes js wasm)
38
+
(modules htmlrw_js_worker))
39
+
40
+
; Test runner for browser-based regression testing
41
+
; Runs html5lib conformance tests in the browser
42
+
(executable
43
+
(name htmlrw_js_tests_main)
44
+
(libraries html5rw bytesrw brr)
45
+
(js_of_ocaml
46
+
(javascript_files))
47
+
(modes js wasm)
48
+
(modules htmlrw_js_tests htmlrw_js_tests_main))
49
+
50
+
; Copy to nice filenames (JS)
51
+
(rule
52
+
(targets htmlrw.js)
53
+
(deps htmlrw_js_main.bc.js)
54
+
(action (copy %{deps} %{targets})))
55
+
56
+
(rule
57
+
(targets htmlrw-worker.js)
58
+
(deps htmlrw_js_worker.bc.js)
59
+
(action (copy %{deps} %{targets})))
60
+
61
+
(rule
62
+
(targets htmlrw-tests.js)
63
+
(deps htmlrw_js_tests_main.bc.js)
64
+
(action (copy %{deps} %{targets})))
65
+
66
+
; Copy to nice filenames (WASM)
67
+
; Note: requires wasm_of_ocaml-compiler to be installed
68
+
(rule
69
+
(targets htmlrw.wasm.js)
70
+
(deps htmlrw_js_main.bc.wasm.js)
71
+
(action (copy %{deps} %{targets})))
72
+
73
+
(rule
74
+
(targets htmlrw-worker.wasm.js)
75
+
(deps htmlrw_js_worker.bc.wasm.js)
76
+
(action (copy %{deps} %{targets})))
77
+
78
+
(rule
79
+
(targets htmlrw-tests.wasm.js)
80
+
(deps htmlrw_js_tests_main.bc.wasm.js)
81
+
(action (copy %{deps} %{targets})))
82
+
83
+
; Install web assets to share/html5rw-js/ for npm packaging
84
+
(install
85
+
(package html5rw-js)
86
+
(section share)
87
+
(files
88
+
; JavaScript bundles
89
+
htmlrw.js
90
+
htmlrw-worker.js
91
+
htmlrw-tests.js
92
+
; WASM loader scripts
93
+
htmlrw.wasm.js
94
+
htmlrw-worker.wasm.js
95
+
htmlrw-tests.wasm.js
96
+
; WASM assets (with content-hashed filenames)
97
+
(glob_files_rec (htmlrw_js_main.bc.wasm.assets/* with_prefix htmlrw_js_main.bc.wasm.assets))
98
+
(glob_files_rec (htmlrw_js_worker.bc.wasm.assets/* with_prefix htmlrw_js_worker.bc.wasm.assets))
99
+
(glob_files_rec (htmlrw_js_tests_main.bc.wasm.assets/* with_prefix htmlrw_js_tests_main.bc.wasm.assets))))
+583
lib/js/htmlrw_js.ml
+583
lib/js/htmlrw_js.ml
···
1
+
(*---------------------------------------------------------------------------
2
+
Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
3
+
SPDX-License-Identifier: MIT
4
+
---------------------------------------------------------------------------*)
5
+
6
+
open Brr
7
+
open Htmlrw_js_types
8
+
9
+
let ensure_doctype html =
10
+
let lower = String.lowercase_ascii html in
11
+
if String.length lower >= 9 && String.sub lower 0 9 = "<!doctype" then
12
+
html
13
+
else
14
+
"<!DOCTYPE html>" ^ html
15
+
16
+
let validate_string raw_html =
17
+
let html = ensure_doctype raw_html in
18
+
try
19
+
let core_result = Htmlrw_check.check_string html in
20
+
let messages = List.map (fun msg ->
21
+
{ message = msg; element_ref = None }
22
+
) (Htmlrw_check.messages core_result) in
23
+
{ messages; core_result; source_element = None }
24
+
with exn ->
25
+
(* Return empty result with error message on parse failure *)
26
+
let error_msg = {
27
+
Htmlrw_check.severity = Htmlrw_check.Error;
28
+
text = Printf.sprintf "Parse error: %s" (Printexc.to_string exn);
29
+
error_code = Htmlrw_check.Conformance (`Misc `Multiple_h1);
30
+
location = None;
31
+
element = None;
32
+
attribute = None;
33
+
extract = None;
34
+
} in
35
+
let core_result = Htmlrw_check.check_string "" in
36
+
{ messages = [{ message = error_msg; element_ref = None }];
37
+
core_result;
38
+
source_element = None }
39
+
40
+
let validate_element el =
41
+
try
42
+
let el_map, html = Htmlrw_js_dom.create el in
43
+
let core_result = Htmlrw_check.check_string html in
44
+
let messages = List.map (fun msg ->
45
+
let element_ref =
46
+
match Htmlrw_js_dom.find_for_message el_map msg with
47
+
| Some browser_el ->
48
+
Some {
49
+
element = Some browser_el;
50
+
selector = Htmlrw_js_dom.selector_path browser_el;
51
+
}
52
+
| None ->
53
+
(* No direct mapping found - try to find by element name *)
54
+
match msg.Htmlrw_check.element with
55
+
| Some tag ->
56
+
let matches = Htmlrw_js_dom.filter_elements (fun e ->
57
+
String.lowercase_ascii (Jstr.to_string (El.tag_name e)) =
58
+
String.lowercase_ascii tag
59
+
) el in
60
+
(match matches with
61
+
| browser_el :: _ ->
62
+
Some {
63
+
element = Some browser_el;
64
+
selector = Htmlrw_js_dom.selector_path browser_el;
65
+
}
66
+
| [] -> None)
67
+
| None -> None
68
+
in
69
+
{ message = msg; element_ref }
70
+
) (Htmlrw_check.messages core_result) in
71
+
{ messages; core_result; source_element = Some el }
72
+
with exn ->
73
+
(* Return error result on parse failure *)
74
+
let error_msg = {
75
+
Htmlrw_check.severity = Htmlrw_check.Error;
76
+
text = Printf.sprintf "Parse error: %s" (Printexc.to_string exn);
77
+
error_code = Htmlrw_check.Conformance (`Misc `Multiple_h1);
78
+
location = None;
79
+
element = None;
80
+
attribute = None;
81
+
extract = None;
82
+
} in
83
+
let core_result = Htmlrw_check.check_string "" in
84
+
{ messages = [{ message = error_msg; element_ref = None }];
85
+
core_result;
86
+
source_element = Some el }
87
+
88
+
let validate_and_annotate ?(config = default_annotation_config) el =
89
+
let result = validate_element el in
90
+
(* Inject styles if not already present *)
91
+
let doc = El.document el in
92
+
let existing = El.find_first_by_selector (Jstr.v "[data-html5rw-styles]")
93
+
~root:(Document.head doc) in
94
+
if Option.is_none existing then
95
+
ignore (Htmlrw_js_annotate.inject_default_styles ~theme:`Auto);
96
+
(* Annotate elements *)
97
+
Htmlrw_js_annotate.annotate ~config ~root:el result.messages;
98
+
result
99
+
100
+
let validate_and_show_panel
101
+
?(annotation_config = default_annotation_config)
102
+
?(panel_config = default_panel_config)
103
+
el =
104
+
let result = validate_and_annotate ~config:annotation_config el in
105
+
(* Inject panel styles if not already present *)
106
+
let doc = El.document el in
107
+
let existing = El.find_first_by_selector (Jstr.v "[data-html5rw-panel-styles]")
108
+
~root:(Document.head doc) in
109
+
if Option.is_none existing then
110
+
ignore (Htmlrw_js_ui.inject_default_styles ~theme:panel_config.theme);
111
+
(* Create and show panel *)
112
+
ignore (Htmlrw_js_ui.create ~config:panel_config result);
113
+
result
114
+
115
+
let errors result =
116
+
List.filter (fun bm ->
117
+
bm.message.Htmlrw_check.severity = Htmlrw_check.Error
118
+
) result.messages
119
+
120
+
let warnings_only result =
121
+
List.filter (fun bm ->
122
+
bm.message.Htmlrw_check.severity = Htmlrw_check.Warning
123
+
) result.messages
124
+
125
+
let infos result =
126
+
List.filter (fun bm ->
127
+
bm.message.Htmlrw_check.severity = Htmlrw_check.Info
128
+
) result.messages
129
+
130
+
let has_errors result =
131
+
Htmlrw_check.has_errors result.core_result
132
+
133
+
let has_issues result =
134
+
Htmlrw_check.has_errors result.core_result ||
135
+
Htmlrw_check.has_warnings result.core_result
136
+
137
+
let message_count result =
138
+
List.length result.messages
139
+
140
+
let element_map result =
141
+
match result.source_element with
142
+
| Some el -> Some (fst (Htmlrw_js_dom.create el))
143
+
| None -> None
144
+
145
+
(* JavaScript API registration *)
146
+
147
+
let register_api_on obj =
148
+
(* validateString(html) -> result *)
149
+
Jv.set obj "validateString" (Jv.callback ~arity:1 (fun html ->
150
+
let html_str = Jv.to_string html in
151
+
let result = validate_string html_str in
152
+
result_to_jv result
153
+
));
154
+
155
+
(* validateElement(el) -> result *)
156
+
Jv.set obj "validateElement" (Jv.callback ~arity:1 (fun el_jv ->
157
+
let el = El.of_jv el_jv in
158
+
let result = validate_element el in
159
+
result_to_jv result
160
+
));
161
+
162
+
(* validateAndAnnotate(el, config?) -> result *)
163
+
Jv.set obj "validateAndAnnotate" (Jv.callback ~arity:2 (fun el_jv config_jv ->
164
+
let el = El.of_jv el_jv in
165
+
let config =
166
+
if Jv.is_none config_jv then
167
+
default_annotation_config
168
+
else
169
+
{
170
+
add_data_attrs = Jv.to_bool (Jv.get config_jv "addDataAttrs");
171
+
add_classes = Jv.to_bool (Jv.get config_jv "addClasses");
172
+
show_tooltips = Jv.to_bool (Jv.get config_jv "showTooltips");
173
+
tooltip_position = `Auto;
174
+
highlight_on_hover = Jv.to_bool (Jv.get config_jv "highlightOnHover");
175
+
}
176
+
in
177
+
let result = validate_and_annotate ~config el in
178
+
result_to_jv result
179
+
));
180
+
181
+
(* validateAndShowPanel(el, config?) -> result *)
182
+
Jv.set obj "validateAndShowPanel" (Jv.callback ~arity:2 (fun el_jv config_jv ->
183
+
let el = El.of_jv el_jv in
184
+
let annotation_config, panel_config =
185
+
if Jv.is_none config_jv then
186
+
default_annotation_config, default_panel_config
187
+
else
188
+
let ann_jv = Jv.get config_jv "annotation" in
189
+
let panel_jv = Jv.get config_jv "panel" in
190
+
let ann_config =
191
+
if Jv.is_none ann_jv then default_annotation_config
192
+
else {
193
+
add_data_attrs =
194
+
(let v = Jv.get ann_jv "addDataAttrs" in
195
+
if Jv.is_none v then true else Jv.to_bool v);
196
+
add_classes =
197
+
(let v = Jv.get ann_jv "addClasses" in
198
+
if Jv.is_none v then true else Jv.to_bool v);
199
+
show_tooltips =
200
+
(let v = Jv.get ann_jv "showTooltips" in
201
+
if Jv.is_none v then true else Jv.to_bool v);
202
+
tooltip_position = `Auto;
203
+
highlight_on_hover =
204
+
(let v = Jv.get ann_jv "highlightOnHover" in
205
+
if Jv.is_none v then true else Jv.to_bool v);
206
+
}
207
+
in
208
+
let panel_config =
209
+
if Jv.is_none panel_jv then default_panel_config
210
+
else {
211
+
initial_position =
212
+
(let v = Jv.get panel_jv "initialPosition" in
213
+
if Jv.is_none v then `TopRight
214
+
else match Jv.to_string v with
215
+
| "topRight" -> `TopRight
216
+
| "topLeft" -> `TopLeft
217
+
| "bottomRight" -> `BottomRight
218
+
| "bottomLeft" -> `BottomLeft
219
+
| _ -> `TopRight);
220
+
draggable =
221
+
(let v = Jv.get panel_jv "draggable" in
222
+
if Jv.is_none v then true else Jv.to_bool v);
223
+
resizable =
224
+
(let v = Jv.get panel_jv "resizable" in
225
+
if Jv.is_none v then true else Jv.to_bool v);
226
+
collapsible =
227
+
(let v = Jv.get panel_jv "collapsible" in
228
+
if Jv.is_none v then true else Jv.to_bool v);
229
+
start_collapsed =
230
+
(let v = Jv.get panel_jv "startCollapsed" in
231
+
if Jv.is_none v then false else Jv.to_bool v);
232
+
max_height =
233
+
(let v = Jv.get panel_jv "maxHeight" in
234
+
if Jv.is_none v then Some 400 else Some (Jv.to_int v));
235
+
group_by_severity =
236
+
(let v = Jv.get panel_jv "groupBySeverity" in
237
+
if Jv.is_none v then true else Jv.to_bool v);
238
+
click_to_highlight =
239
+
(let v = Jv.get panel_jv "clickToHighlight" in
240
+
if Jv.is_none v then true else Jv.to_bool v);
241
+
show_selector_path =
242
+
(let v = Jv.get panel_jv "showSelectorPath" in
243
+
if Jv.is_none v then true else Jv.to_bool v);
244
+
theme =
245
+
(let v = Jv.get panel_jv "theme" in
246
+
if Jv.is_none v then `Auto
247
+
else match Jv.to_string v with
248
+
| "light" -> `Light
249
+
| "dark" -> `Dark
250
+
| _ -> `Auto);
251
+
}
252
+
in
253
+
ann_config, panel_config
254
+
in
255
+
let result = validate_and_show_panel ~annotation_config ~panel_config el in
256
+
result_to_jv result
257
+
));
258
+
259
+
(* clearAnnotations(el) *)
260
+
Jv.set obj "clearAnnotations" (Jv.callback ~arity:1 (fun el_jv ->
261
+
let el = El.of_jv el_jv in
262
+
Htmlrw_js_annotate.clear el;
263
+
Jv.undefined
264
+
));
265
+
266
+
(* hidePanel() *)
267
+
Jv.set obj "hidePanel" (Jv.callback ~arity:0 (fun () ->
268
+
Htmlrw_js_ui.hide_current ();
269
+
Jv.undefined
270
+
));
271
+
272
+
(* showPanel(result, config?) *)
273
+
Jv.set obj "showPanel" (Jv.callback ~arity:2 (fun result_jv config_jv ->
274
+
(* This expects a previously returned result object *)
275
+
(* For now, just create a panel with the warnings from the result *)
276
+
let warnings_jv = Jv.get result_jv "warnings" in
277
+
let warnings = Jv.to_list (fun w_jv ->
278
+
let msg = {
279
+
Htmlrw_check.severity =
280
+
(match Jv.to_string (Jv.get w_jv "severity") with
281
+
| "error" -> Htmlrw_check.Error
282
+
| "warning" -> Htmlrw_check.Warning
283
+
| _ -> Htmlrw_check.Info);
284
+
text = Jv.to_string (Jv.get w_jv "message");
285
+
error_code = Htmlrw_check.Conformance (`Misc `Multiple_h1);
286
+
location = None;
287
+
element = None;
288
+
attribute = None;
289
+
extract = None;
290
+
} in
291
+
let element_ref =
292
+
let sel_jv = Jv.get w_jv "selector" in
293
+
let el_jv = Jv.get w_jv "element" in
294
+
if Jv.is_none sel_jv then None
295
+
else Some {
296
+
selector = Jv.to_string sel_jv;
297
+
element = if Jv.is_none el_jv then None else Some (El.of_jv el_jv);
298
+
}
299
+
in
300
+
{ message = msg; element_ref }
301
+
) warnings_jv in
302
+
let result = {
303
+
messages = warnings;
304
+
core_result = Htmlrw_check.check_string "";
305
+
source_element = None;
306
+
} in
307
+
let config =
308
+
if Jv.is_none config_jv then default_panel_config
309
+
else default_panel_config (* TODO: parse config *)
310
+
in
311
+
ignore (Htmlrw_js_ui.create ~config result);
312
+
Jv.undefined
313
+
))
314
+
315
+
(* Async/Worker support *)
316
+
317
+
let console_log msg =
318
+
ignore (Jv.call (Jv.get Jv.global "console") "log" [| Jv.of_string msg |])
319
+
320
+
let console_log_result prefix result =
321
+
let error_count = List.length (List.filter (fun bm ->
322
+
bm.message.Htmlrw_check.severity = Htmlrw_check.Error
323
+
) result.messages) in
324
+
let warning_count = List.length (List.filter (fun bm ->
325
+
bm.message.Htmlrw_check.severity = Htmlrw_check.Warning
326
+
) result.messages) in
327
+
let msg = Printf.sprintf "[html5rw] %s: %d errors, %d warnings, %d total issues"
328
+
prefix error_count warning_count (List.length result.messages) in
329
+
console_log msg
330
+
331
+
let _worker : Jv.t option ref = ref None
332
+
let _pending_callbacks : (int, Jv.t -> unit) Hashtbl.t = Hashtbl.create 16
333
+
let _next_id = ref 0
334
+
335
+
let init_worker worker_url =
336
+
console_log (Printf.sprintf "[html5rw] Initializing web worker from %s" worker_url);
337
+
let worker = Jv.new' (Jv.get Jv.global "Worker") [| Jv.of_string worker_url |] in
338
+
339
+
(* Error handler for worker-level errors *)
340
+
let error_handler = Jv.callback ~arity:1 (fun ev ->
341
+
let msg = Jv.get ev "message" in
342
+
let filename = Jv.get ev "filename" in
343
+
let lineno = Jv.get ev "lineno" in
344
+
console_log (Printf.sprintf "[html5rw] Worker error: %s at %s:%d"
345
+
(if Jv.is_undefined msg then "unknown" else Jv.to_string msg)
346
+
(if Jv.is_undefined filename then "unknown" else Jv.to_string filename)
347
+
(if Jv.is_undefined lineno then 0 else Jv.to_int lineno))
348
+
) in
349
+
ignore (Jv.call worker "addEventListener" [| Jv.of_string "error"; error_handler |]);
350
+
351
+
let handler = Jv.callback ~arity:1 (fun ev ->
352
+
let data = Jv.get ev "data" in
353
+
let id = Jv.get data "id" |> Jv.to_int in
354
+
let error_count = Jv.get data "errorCount" |> Jv.to_int in
355
+
let warning_count = Jv.get data "warningCount" |> Jv.to_int in
356
+
let total = Jv.get data "warnings" |> Jv.to_list (fun _ -> ()) |> List.length in
357
+
console_log (Printf.sprintf "[html5rw] Worker validation complete: %d errors, %d warnings, %d total issues"
358
+
error_count warning_count total);
359
+
match Hashtbl.find_opt _pending_callbacks id with
360
+
| Some callback ->
361
+
Hashtbl.remove _pending_callbacks id;
362
+
callback data
363
+
| None -> ()
364
+
) in
365
+
ignore (Jv.call worker "addEventListener" [| Jv.of_string "message"; handler |]);
366
+
_worker := Some worker;
367
+
console_log "[html5rw] Web worker ready";
368
+
worker
369
+
370
+
let validate_string_async ~callback html =
371
+
match !_worker with
372
+
| None -> failwith "Worker not initialized. Call html5rw.initWorker(url) first."
373
+
| Some worker ->
374
+
console_log (Printf.sprintf "[html5rw] Sending %d bytes to worker for validation..." (String.length html));
375
+
let id = !_next_id in
376
+
incr _next_id;
377
+
Hashtbl.add _pending_callbacks id callback;
378
+
let msg = Jv.obj [|
379
+
"id", Jv.of_int id;
380
+
"html", Jv.of_string html
381
+
|] in
382
+
ignore (Jv.call worker "postMessage" [| msg |])
383
+
384
+
let _validate_element_async ~callback el =
385
+
let html = Htmlrw_js_dom.outer_html el in
386
+
validate_string_async ~callback html
387
+
388
+
let validate_after_load callback el =
389
+
(* Use requestIdleCallback if available, otherwise setTimeout *)
390
+
console_log "[html5rw] Waiting for page load...";
391
+
let run () =
392
+
console_log "[html5rw] Starting validation...";
393
+
let result = validate_element el in
394
+
console_log_result "Validation complete" result;
395
+
callback result
396
+
in
397
+
let request_idle = Jv.get Jv.global "requestIdleCallback" in
398
+
if not (Jv.is_undefined request_idle) then
399
+
ignore (Jv.apply request_idle [| Jv.callback ~arity:1 (fun _ -> run ()) |])
400
+
else
401
+
ignore (Jv.call Jv.global "setTimeout" [|
402
+
Jv.callback ~arity:0 run;
403
+
Jv.of_int 0
404
+
|])
405
+
406
+
let validate_on_idle ?(timeout=5000) callback el =
407
+
(* Wait for page load, then use requestIdleCallback with timeout *)
408
+
console_log "[html5rw] Scheduling validation for idle time...";
409
+
let run_when_ready () =
410
+
let request_idle = Jv.get Jv.global "requestIdleCallback" in
411
+
if not (Jv.is_undefined request_idle) then begin
412
+
let opts = Jv.obj [| "timeout", Jv.of_int timeout |] in
413
+
ignore (Jv.call Jv.global "requestIdleCallback" [|
414
+
Jv.callback ~arity:1 (fun _ ->
415
+
console_log "[html5rw] Browser idle, starting validation...";
416
+
let result = validate_element el in
417
+
console_log_result "Validation complete" result;
418
+
callback result
419
+
);
420
+
opts
421
+
|])
422
+
end else begin
423
+
ignore (Jv.call Jv.global "setTimeout" [|
424
+
Jv.callback ~arity:0 (fun () ->
425
+
console_log "[html5rw] Starting validation...";
426
+
let result = validate_element el in
427
+
console_log_result "Validation complete" result;
428
+
callback result
429
+
);
430
+
Jv.of_int 100
431
+
|])
432
+
end
433
+
in
434
+
let ready_state = Jv.get (Jv.get Jv.global "document") "readyState" |> Jv.to_string in
435
+
if ready_state = "complete" then
436
+
run_when_ready ()
437
+
else
438
+
ignore (Jv.call Jv.global "addEventListener" [|
439
+
Jv.of_string "load";
440
+
Jv.callback ~arity:1 (fun _ -> run_when_ready ())
441
+
|])
442
+
443
+
let register_global_api () =
444
+
let api = Jv.obj [||] in
445
+
register_api_on api;
446
+
447
+
(* Add async functions *)
448
+
449
+
(* initWorker(url) - initialize web worker *)
450
+
Jv.set api "initWorker" (Jv.callback ~arity:1 (fun url_jv ->
451
+
let url = Jv.to_string url_jv in
452
+
init_worker url
453
+
));
454
+
455
+
(* validateStringAsync(html, callback) - validate in worker *)
456
+
Jv.set api "validateStringAsync" (Jv.callback ~arity:2 (fun html_jv callback_jv ->
457
+
let html = Jv.to_string html_jv in
458
+
let callback result = ignore (Jv.apply callback_jv [| result |]) in
459
+
validate_string_async ~callback html;
460
+
Jv.undefined
461
+
));
462
+
463
+
(* validateElementAsync(el, callback) - validate element in worker *)
464
+
Jv.set api "validateElementAsync" (Jv.callback ~arity:2 (fun el_jv callback_jv ->
465
+
let el = El.of_jv el_jv in
466
+
let html = Htmlrw_js_dom.outer_html el in
467
+
let callback result = ignore (Jv.apply callback_jv [| result |]) in
468
+
validate_string_async ~callback html;
469
+
Jv.undefined
470
+
));
471
+
472
+
(* validateAfterLoad(el, callback) - validate after page load *)
473
+
Jv.set api "validateAfterLoad" (Jv.callback ~arity:2 (fun el_jv callback_jv ->
474
+
let el = El.of_jv el_jv in
475
+
let callback result = ignore (Jv.apply callback_jv [| result_to_jv result |]) in
476
+
validate_after_load callback el;
477
+
Jv.undefined
478
+
));
479
+
480
+
(* validateOnIdle(el, callback, timeout?) - validate when browser is idle *)
481
+
Jv.set api "validateOnIdle" (Jv.callback ~arity:3 (fun el_jv callback_jv timeout_jv ->
482
+
let el = El.of_jv el_jv in
483
+
let timeout = if Jv.is_undefined timeout_jv then 5000 else Jv.to_int timeout_jv in
484
+
let callback result = ignore (Jv.apply callback_jv [| result_to_jv result |]) in
485
+
validate_on_idle ~timeout callback el;
486
+
Jv.undefined
487
+
));
488
+
489
+
(* validateAndShowPanelAsync(el, config?) - non-blocking panel display *)
490
+
Jv.set api "validateAndShowPanelAsync" (Jv.callback ~arity:2 (fun el_jv config_jv ->
491
+
let el = El.of_jv el_jv in
492
+
validate_on_idle ~timeout:3000 (fun result ->
493
+
let annotation_config, panel_config =
494
+
if Jv.is_none config_jv then
495
+
default_annotation_config, default_panel_config
496
+
else
497
+
(* Parse config same as validateAndShowPanel *)
498
+
default_annotation_config, default_panel_config
499
+
in
500
+
(* Inject styles if needed *)
501
+
let doc = El.document el in
502
+
let existing = El.find_first_by_selector (Jstr.v "[data-html5rw-styles]")
503
+
~root:(Document.head doc) in
504
+
if Option.is_none existing then
505
+
ignore (Htmlrw_js_annotate.inject_default_styles ~theme:`Auto);
506
+
let existing_panel = El.find_first_by_selector (Jstr.v "[data-html5rw-panel-styles]")
507
+
~root:(Document.head doc) in
508
+
if Option.is_none existing_panel then
509
+
ignore (Htmlrw_js_ui.inject_default_styles ~theme:panel_config.theme);
510
+
(* Annotate and show panel *)
511
+
Htmlrw_js_annotate.annotate ~config:annotation_config ~root:el result.messages;
512
+
ignore (Htmlrw_js_ui.create ~config:panel_config result)
513
+
) el;
514
+
Jv.undefined
515
+
));
516
+
517
+
(* showPanelFromWorkerResult(result) - show panel from worker validation result *)
518
+
Jv.set api "showPanelFromWorkerResult" (Jv.callback ~arity:1 (fun result_jv ->
519
+
console_log "[html5rw] Showing panel from worker result";
520
+
(* Convert worker result format to internal format *)
521
+
let warnings_jv = Jv.get result_jv "warnings" in
522
+
let messages = Jv.to_list (fun w_jv ->
523
+
let severity_str = Jv.to_string (Jv.get w_jv "severity") in
524
+
let msg = {
525
+
Htmlrw_check.severity =
526
+
(match severity_str with
527
+
| "error" -> Htmlrw_check.Error
528
+
| "warning" -> Htmlrw_check.Warning
529
+
| _ -> Htmlrw_check.Info);
530
+
text = Jv.to_string (Jv.get w_jv "message");
531
+
error_code = Htmlrw_check.Conformance (`Misc `Multiple_h1);
532
+
location = (
533
+
let line_jv = Jv.get w_jv "line" in
534
+
let col_jv = Jv.get w_jv "column" in
535
+
if Jv.is_undefined line_jv then None
536
+
else Some {
537
+
Htmlrw_check.line = Jv.to_int line_jv;
538
+
column = (if Jv.is_undefined col_jv then 1 else Jv.to_int col_jv);
539
+
end_line = None;
540
+
end_column = None;
541
+
system_id = None;
542
+
}
543
+
);
544
+
element = (
545
+
let el_jv = Jv.get w_jv "elementName" in
546
+
if Jv.is_undefined el_jv then None else Some (Jv.to_string el_jv)
547
+
);
548
+
attribute = (
549
+
let attr_jv = Jv.get w_jv "attribute" in
550
+
if Jv.is_undefined attr_jv then None else Some (Jv.to_string attr_jv)
551
+
);
552
+
extract = None;
553
+
} in
554
+
{ message = msg; element_ref = None }
555
+
) warnings_jv in
556
+
557
+
let result = {
558
+
messages;
559
+
core_result = Htmlrw_check.check_string "";
560
+
source_element = None;
561
+
} in
562
+
563
+
(* Inject panel styles *)
564
+
let doc = Document.of_jv (Jv.get Jv.global "document") in
565
+
let existing_panel = El.find_first_by_selector (Jstr.v "[data-html5rw-panel-styles]")
566
+
~root:(Document.head doc) in
567
+
if Option.is_none existing_panel then
568
+
ignore (Htmlrw_js_ui.inject_default_styles ~theme:`Auto);
569
+
570
+
(* Create and show panel *)
571
+
console_log (Printf.sprintf "[html5rw] Creating panel with %d messages" (List.length messages));
572
+
ignore (Htmlrw_js_ui.create ~config:default_panel_config result);
573
+
Jv.undefined
574
+
));
575
+
576
+
Jv.set Jv.global "html5rw" api;
577
+
578
+
(* Dispatch 'html5rwReady' event for async loaders (WASM) *)
579
+
let document = Jv.get Jv.global "document" in
580
+
let event_class = Jv.get Jv.global "CustomEvent" in
581
+
let event = Jv.new' event_class [| Jv.of_string "html5rwReady" |] in
582
+
ignore (Jv.call document "dispatchEvent" [| event |]);
583
+
console_log "[html5rw] API ready"
+154
lib/js/htmlrw_js.mli
+154
lib/js/htmlrw_js.mli
···
1
+
(*---------------------------------------------------------------------------
2
+
Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
3
+
SPDX-License-Identifier: MIT
4
+
---------------------------------------------------------------------------*)
5
+
6
+
(** JavaScript API for HTML5 validation in the browser.
7
+
8
+
This module provides the main entry points for validating HTML in a
9
+
browser environment. It wraps the core {!Htmlrw_check} validator and
10
+
adds browser-specific functionality for element mapping and annotation.
11
+
12
+
{2 JavaScript Usage}
13
+
14
+
After loading the compiled JavaScript, the API is available on [window]:
15
+
16
+
{v
17
+
// Validate an element (recommended)
18
+
const result = html5rw.validateElement(document.body);
19
+
console.log(result.errorCount, "errors found");
20
+
21
+
// Validate with annotation
22
+
html5rw.validateAndAnnotate(document.body, {
23
+
showTooltips: true,
24
+
showPanel: true
25
+
});
26
+
27
+
// Validate a raw HTML string
28
+
const result = html5rw.validateString("<div><p>Hello</div>");
29
+
result.warnings.forEach(w => console.log(w.message));
30
+
v}
31
+
32
+
{2 OCaml Usage}
33
+
34
+
{[
35
+
let result = Htmlrw_js.validate_element (Brr.Document.body G.document) in
36
+
List.iter (fun bm ->
37
+
Brr.Console.log [Jstr.v bm.Htmlrw_js_types.message.text]
38
+
) result.messages
39
+
]} *)
40
+
41
+
42
+
open Htmlrw_js_types
43
+
44
+
45
+
(** {1 Validation} *)
46
+
47
+
(** Validate an HTML string.
48
+
49
+
This is the simplest form of validation. Since there's no source element,
50
+
the returned messages will not have element references.
51
+
52
+
{[
53
+
let result = validate_string "<html><body><img></body></html>" in
54
+
if Htmlrw_check.has_errors result.core_result then
55
+
(* handle errors *)
56
+
]} *)
57
+
val validate_string : string -> result
58
+
59
+
(** Validate a DOM element's HTML.
60
+
61
+
Serializes the element to HTML, validates it, and maps the results
62
+
back to the live DOM elements.
63
+
64
+
{[
65
+
let result = validate_element (Document.body G.document) in
66
+
List.iter (fun bm ->
67
+
match bm.element_ref with
68
+
| Some { element = Some el; _ } ->
69
+
El.set_class (Jstr.v "has-error") true el
70
+
| _ -> ()
71
+
) result.messages
72
+
]} *)
73
+
val validate_element : Brr.El.t -> result
74
+
75
+
76
+
(** {1 Validation with Annotation}
77
+
78
+
These functions validate and immediately annotate the DOM with results. *)
79
+
80
+
(** Validate and annotate an element.
81
+
82
+
This combines validation with DOM annotation. The element and its
83
+
descendants are annotated with data attributes, classes, and optionally
84
+
tooltips based on the validation results.
85
+
86
+
@param config Annotation configuration. Defaults to
87
+
[Htmlrw_js_types.default_annotation_config]. *)
88
+
val validate_and_annotate :
89
+
?config:annotation_config -> Brr.El.t -> result
90
+
91
+
(** Validate, annotate, and show the warning panel.
92
+
93
+
The all-in-one function for browser validation with full UI.
94
+
95
+
@param annotation_config How to annotate elements.
96
+
@param panel_config How to display the warning panel. *)
97
+
val validate_and_show_panel :
98
+
?annotation_config:annotation_config ->
99
+
?panel_config:panel_config ->
100
+
Brr.El.t ->
101
+
result
102
+
103
+
104
+
(** {1 Result Inspection} *)
105
+
106
+
(** Get messages filtered by severity. *)
107
+
val errors : result -> browser_message list
108
+
val warnings_only : result -> browser_message list
109
+
val infos : result -> browser_message list
110
+
111
+
(** Check if there are any errors. *)
112
+
val has_errors : result -> bool
113
+
114
+
(** Check if there are any warnings or errors. *)
115
+
val has_issues : result -> bool
116
+
117
+
(** Get total count of all messages. *)
118
+
val message_count : result -> int
119
+
120
+
121
+
(** {1 JavaScript Export}
122
+
123
+
These functions register the API on the JavaScript global object. *)
124
+
125
+
(** Register the validation API on [window.html5rw].
126
+
127
+
Call this from your main entry point to expose the JavaScript API:
128
+
129
+
{[
130
+
let () = Htmlrw_js.register_global_api ()
131
+
]}
132
+
133
+
This exposes:
134
+
- [html5rw.validateString(html)] -> result object
135
+
- [html5rw.validateElement(el)] -> result object
136
+
- [html5rw.validateAndAnnotate(el, config?)] -> result object
137
+
- [html5rw.validateAndShowPanel(el, config?)] -> result object
138
+
- [html5rw.clearAnnotations(el)] -> void
139
+
- [html5rw.hidePanel()] -> void *)
140
+
val register_global_api : unit -> unit
141
+
142
+
(** Register the API on a custom object instead of [window.html5rw].
143
+
144
+
Useful for module bundlers or when you want to control the namespace. *)
145
+
val register_api_on : Jv.t -> unit
146
+
147
+
148
+
(** {1 Low-level Access} *)
149
+
150
+
(** Access the element map from a validation result.
151
+
152
+
Useful for custom element lookup logic. Returns [None] if the result
153
+
was from {!validate_string} (no source element). *)
154
+
val element_map : result -> Htmlrw_js_dom.t option
+340
lib/js/htmlrw_js_annotate.ml
+340
lib/js/htmlrw_js_annotate.ml
···
1
+
(*---------------------------------------------------------------------------
2
+
Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
3
+
SPDX-License-Identifier: MIT
4
+
---------------------------------------------------------------------------*)
5
+
6
+
open Brr
7
+
open Htmlrw_js_types
8
+
9
+
module Data_attr = struct
10
+
let severity = Jstr.v "data-html5rw-severity"
11
+
let message = Jstr.v "data-html5rw-message"
12
+
let code = Jstr.v "data-html5rw-code"
13
+
let count = Jstr.v "data-html5rw-count"
14
+
end
15
+
16
+
module Css_class = struct
17
+
let error = Jstr.v "html5rw-error"
18
+
let warning = Jstr.v "html5rw-warning"
19
+
let info = Jstr.v "html5rw-info"
20
+
let has_issues = Jstr.v "html5rw-has-issues"
21
+
let highlighted = Jstr.v "html5rw-highlighted"
22
+
let tooltip = Jstr.v "html5rw-tooltip"
23
+
let tooltip_visible = Jstr.v "html5rw-tooltip-visible"
24
+
end
25
+
26
+
type tooltip = {
27
+
container : El.t;
28
+
_target : El.t;
29
+
}
30
+
31
+
let severity_class = function
32
+
| Htmlrw_check.Error -> Css_class.error
33
+
| Htmlrw_check.Warning -> Css_class.warning
34
+
| Htmlrw_check.Info -> Css_class.info
35
+
36
+
let annotate_element ~config el msg =
37
+
if config.add_data_attrs then begin
38
+
El.set_at Data_attr.severity
39
+
(Some (Jstr.v (Htmlrw_check.severity_to_string msg.Htmlrw_check.severity))) el;
40
+
El.set_at Data_attr.message (Some (Jstr.v msg.Htmlrw_check.text)) el;
41
+
El.set_at Data_attr.code
42
+
(Some (Jstr.v (Htmlrw_check.error_code_to_string msg.Htmlrw_check.error_code))) el
43
+
end;
44
+
if config.add_classes then begin
45
+
El.set_class (severity_class msg.Htmlrw_check.severity) true el;
46
+
El.set_class Css_class.has_issues true el
47
+
end
48
+
49
+
let rec create_tooltip ~position target messages =
50
+
let doc = El.document target in
51
+
52
+
(* Create tooltip container *)
53
+
let container = El.v (Jstr.v "div") ~at:[At.class' Css_class.tooltip] [] in
54
+
55
+
(* Add messages to tooltip *)
56
+
let msg_els = List.map (fun msg ->
57
+
let sev = Htmlrw_check.severity_to_string msg.Htmlrw_check.severity in
58
+
let sev_class = Jstr.v ("html5rw-tooltip-" ^ sev) in
59
+
El.v (Jstr.v "div") ~at:[At.class' sev_class] [
60
+
El.v (Jstr.v "span") ~at:[At.class' (Jstr.v "html5rw-tooltip-severity")] [
61
+
El.txt' (String.uppercase_ascii sev)
62
+
];
63
+
El.v (Jstr.v "span") ~at:[At.class' (Jstr.v "html5rw-tooltip-text")] [
64
+
El.txt' msg.Htmlrw_check.text
65
+
]
66
+
]
67
+
) messages in
68
+
El.set_children container msg_els;
69
+
70
+
(* Position the tooltip *)
71
+
let pos_class = match position with
72
+
| `Above -> "html5rw-tooltip-above"
73
+
| `Below -> "html5rw-tooltip-below"
74
+
| `Auto -> "html5rw-tooltip-auto"
75
+
in
76
+
El.set_class (Jstr.v pos_class) true container;
77
+
78
+
(* Add to body for proper z-index handling *)
79
+
El.append_children (Document.body doc) [container];
80
+
81
+
(* Set up hover events *)
82
+
let hide () =
83
+
El.set_class Css_class.tooltip_visible false container
84
+
in
85
+
let show () =
86
+
(* Hide any other visible tooltips first *)
87
+
let doc = El.document target in
88
+
let visible = El.fold_find_by_selector (fun el acc -> el :: acc)
89
+
(Jstr.v ".html5rw-tooltip-visible") [] ~root:(Document.body doc) in
90
+
List.iter (fun el -> El.set_class Css_class.tooltip_visible false el) visible;
91
+
(* Position and show this tooltip *)
92
+
let x = El.bound_x target in
93
+
let y = El.bound_y target in
94
+
let h = El.bound_h target in
95
+
let tooltip_y = match position with
96
+
| `Below | `Auto -> y +. h +. 4.0
97
+
| `Above -> y -. 4.0
98
+
in
99
+
El.set_inline_style (Jstr.v "left") (Jstr.v (Printf.sprintf "%.0fpx" x)) container;
100
+
El.set_inline_style (Jstr.v "top") (Jstr.v (Printf.sprintf "%.0fpx" tooltip_y)) container;
101
+
El.set_class Css_class.tooltip_visible true container
102
+
in
103
+
104
+
ignore (Ev.listen Ev.mouseenter (fun _ -> show ()) (El.as_target target));
105
+
ignore (Ev.listen Ev.mouseleave (fun _ -> hide ()) (El.as_target target));
106
+
(* Also hide on mouseout for better reliability *)
107
+
ignore (Ev.listen Ev.mouseout (fun ev ->
108
+
let related = Jv.get (Ev.to_jv ev) "relatedTarget" in
109
+
(* Hide if mouse moved to something outside the target *)
110
+
if Jv.is_null related then hide ()
111
+
else
112
+
(* Use JS contains method directly *)
113
+
let contains = Jv.call (El.to_jv target) "contains" [| related |] |> Jv.to_bool in
114
+
if not contains then hide ()
115
+
) (El.as_target target));
116
+
117
+
{ container; _target = target }
118
+
119
+
and annotate ~config ~root:_ messages =
120
+
(* Group messages by element - use a list since we can't hash elements *)
121
+
let el_messages : (El.t * Htmlrw_check.message list) list ref = ref [] in
122
+
List.iter (fun bm ->
123
+
match bm.element_ref with
124
+
| Some { element = Some el; _ } ->
125
+
let found = ref false in
126
+
el_messages := List.map (fun (e, msgs) ->
127
+
if Jv.strict_equal (El.to_jv e) (El.to_jv el) then begin
128
+
found := true;
129
+
(e, bm.message :: msgs)
130
+
end else (e, msgs)
131
+
) !el_messages;
132
+
if not !found then
133
+
el_messages := (el, [bm.message]) :: !el_messages
134
+
| _ -> ()
135
+
) messages;
136
+
137
+
(* Annotate each element *)
138
+
List.iter (fun (el, msgs) ->
139
+
(* Use highest severity *)
140
+
let highest = List.fold_left (fun acc msg ->
141
+
match acc, msg.Htmlrw_check.severity with
142
+
| Htmlrw_check.Error, _ -> Htmlrw_check.Error
143
+
| _, Htmlrw_check.Error -> Htmlrw_check.Error
144
+
| Htmlrw_check.Warning, _ -> Htmlrw_check.Warning
145
+
| _, Htmlrw_check.Warning -> Htmlrw_check.Warning
146
+
| _ -> Htmlrw_check.Info
147
+
) Htmlrw_check.Info msgs in
148
+
149
+
let primary_msg = {
150
+
Htmlrw_check.severity = highest;
151
+
text = (match msgs with m :: _ -> m.Htmlrw_check.text | [] -> "");
152
+
error_code = (match msgs with m :: _ -> m.Htmlrw_check.error_code
153
+
| [] -> Htmlrw_check.Conformance (`Misc `Multiple_h1));
154
+
location = None;
155
+
element = None;
156
+
attribute = None;
157
+
extract = None;
158
+
} in
159
+
annotate_element ~config el primary_msg;
160
+
161
+
if config.add_data_attrs then
162
+
El.set_at Data_attr.count (Some (Jstr.v (string_of_int (List.length msgs)))) el;
163
+
164
+
if config.show_tooltips then
165
+
ignore (create_tooltip ~position:config.tooltip_position el msgs)
166
+
) !el_messages
167
+
168
+
let show_tooltip t =
169
+
El.set_class Css_class.tooltip_visible true t.container
170
+
171
+
let hide_tooltip t =
172
+
El.set_class Css_class.tooltip_visible false t.container
173
+
174
+
let remove_tooltip t =
175
+
El.remove t.container
176
+
177
+
let tooltips_in root =
178
+
let doc = El.document root in
179
+
let tooltip_els = El.fold_find_by_selector (fun el acc -> el :: acc)
180
+
(Jstr.v ".html5rw-tooltip") [] ~root:(Document.body doc) in
181
+
List.map (fun container -> { container; _target = root }) tooltip_els
182
+
183
+
let clear_element el =
184
+
El.set_at Data_attr.severity None el;
185
+
El.set_at Data_attr.message None el;
186
+
El.set_at Data_attr.code None el;
187
+
El.set_at Data_attr.count None el;
188
+
El.set_class Css_class.error false el;
189
+
El.set_class Css_class.warning false el;
190
+
El.set_class Css_class.info false el;
191
+
El.set_class Css_class.has_issues false el;
192
+
El.set_class Css_class.highlighted false el
193
+
194
+
let clear root =
195
+
Htmlrw_js_dom.iter_elements clear_element root;
196
+
List.iter remove_tooltip (tooltips_in root)
197
+
198
+
let highlight_element el =
199
+
El.set_class Css_class.highlighted true el;
200
+
(* Call scrollIntoView directly with options object *)
201
+
let opts = Jv.obj [|
202
+
"behavior", Jv.of_string "smooth";
203
+
"block", Jv.of_string "center"
204
+
|] in
205
+
ignore (Jv.call (El.to_jv el) "scrollIntoView" [| opts |])
206
+
207
+
let unhighlight_element el =
208
+
El.set_class Css_class.highlighted false el
209
+
210
+
let _highlighted_elements : El.t list ref = ref []
211
+
212
+
let clear_highlights () =
213
+
List.iter unhighlight_element !_highlighted_elements;
214
+
_highlighted_elements := []
215
+
216
+
let inject_default_styles ~theme =
217
+
let theme_vars = match theme with
218
+
| `Light -> {|
219
+
--html5rw-error-color: #e74c3c;
220
+
--html5rw-warning-color: #f39c12;
221
+
--html5rw-info-color: #3498db;
222
+
--html5rw-bg: #ffffff;
223
+
--html5rw-text: #333333;
224
+
--html5rw-border: #dddddd;
225
+
|}
226
+
| `Dark -> {|
227
+
--html5rw-error-color: #ff6b6b;
228
+
--html5rw-warning-color: #feca57;
229
+
--html5rw-info-color: #54a0ff;
230
+
--html5rw-bg: #2d3436;
231
+
--html5rw-text: #dfe6e9;
232
+
--html5rw-border: #636e72;
233
+
|}
234
+
| `Auto -> {|
235
+
--html5rw-error-color: #e74c3c;
236
+
--html5rw-warning-color: #f39c12;
237
+
--html5rw-info-color: #3498db;
238
+
--html5rw-bg: #ffffff;
239
+
--html5rw-text: #333333;
240
+
--html5rw-border: #dddddd;
241
+
|}
242
+
in
243
+
let css = Printf.sprintf {|
244
+
:root { %s }
245
+
246
+
@media (prefers-color-scheme: dark) {
247
+
:root {
248
+
--html5rw-error-color: #ff6b6b;
249
+
--html5rw-warning-color: #feca57;
250
+
--html5rw-info-color: #54a0ff;
251
+
--html5rw-bg: #2d3436;
252
+
--html5rw-text: #dfe6e9;
253
+
--html5rw-border: #636e72;
254
+
}
255
+
}
256
+
257
+
.html5rw-error {
258
+
outline: 2px solid var(--html5rw-error-color) !important;
259
+
outline-offset: 2px;
260
+
}
261
+
262
+
.html5rw-warning {
263
+
outline: 2px solid var(--html5rw-warning-color) !important;
264
+
outline-offset: 2px;
265
+
}
266
+
267
+
.html5rw-info {
268
+
outline: 2px solid var(--html5rw-info-color) !important;
269
+
outline-offset: 2px;
270
+
}
271
+
272
+
.html5rw-highlighted {
273
+
background-color: rgba(52, 152, 219, 0.3) !important;
274
+
animation: html5rw-pulse 1s ease-in-out;
275
+
}
276
+
277
+
@keyframes html5rw-pulse {
278
+
0%%, 100%% { background-color: rgba(52, 152, 219, 0.3); }
279
+
50%% { background-color: rgba(52, 152, 219, 0.5); }
280
+
}
281
+
282
+
.html5rw-tooltip {
283
+
position: fixed;
284
+
z-index: 100000;
285
+
background: var(--html5rw-bg);
286
+
border: 1px solid var(--html5rw-border);
287
+
border-radius: 6px;
288
+
padding: 8px 12px;
289
+
box-shadow: 0 4px 12px rgba(0, 0, 0, 0.15);
290
+
max-width: 400px;
291
+
font-family: system-ui, -apple-system, sans-serif;
292
+
font-size: 13px;
293
+
color: var(--html5rw-text);
294
+
opacity: 0;
295
+
visibility: hidden;
296
+
transition: opacity 0.2s, visibility 0.2s;
297
+
pointer-events: none;
298
+
}
299
+
300
+
.html5rw-tooltip-visible {
301
+
opacity: 1;
302
+
visibility: visible;
303
+
}
304
+
305
+
.html5rw-tooltip-error .html5rw-tooltip-severity {
306
+
color: var(--html5rw-error-color);
307
+
font-weight: 600;
308
+
margin-right: 8px;
309
+
}
310
+
311
+
.html5rw-tooltip-warning .html5rw-tooltip-severity {
312
+
color: var(--html5rw-warning-color);
313
+
font-weight: 600;
314
+
margin-right: 8px;
315
+
}
316
+
317
+
.html5rw-tooltip-info .html5rw-tooltip-severity {
318
+
color: var(--html5rw-info-color);
319
+
font-weight: 600;
320
+
margin-right: 8px;
321
+
}
322
+
323
+
.html5rw-tooltip > div {
324
+
margin-bottom: 4px;
325
+
}
326
+
327
+
.html5rw-tooltip > div:last-child {
328
+
margin-bottom: 0;
329
+
}
330
+
|} theme_vars in
331
+
332
+
let doc = G.document in
333
+
let style_el = El.v (Jstr.v "style") [] in
334
+
El.set_children style_el [El.txt' css];
335
+
El.set_at (Jstr.v "data-html5rw-styles") (Some (Jstr.v "true")) style_el;
336
+
El.append_children (Document.head doc) [style_el];
337
+
style_el
338
+
339
+
let remove_injected_styles style_el =
340
+
El.remove style_el
+166
lib/js/htmlrw_js_annotate.mli
+166
lib/js/htmlrw_js_annotate.mli
···
1
+
(*---------------------------------------------------------------------------
2
+
Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
3
+
SPDX-License-Identifier: MIT
4
+
---------------------------------------------------------------------------*)
5
+
6
+
(** DOM annotation for validation warnings.
7
+
8
+
This module applies validation results to the live DOM by adding
9
+
data attributes, CSS classes, and tooltip overlays to elements
10
+
that have warnings. *)
11
+
12
+
open Htmlrw_js_types
13
+
14
+
15
+
(** {1 Annotation} *)
16
+
17
+
(** Annotate elements in a subtree based on validation results.
18
+
19
+
For each message with an element reference, this function:
20
+
1. Adds data attributes ([data-html5rw-severity], etc.) if configured
21
+
2. Adds CSS classes ([html5rw-error], etc.) if configured
22
+
3. Creates tooltip elements if configured
23
+
24
+
@param config Annotation configuration.
25
+
@param root The root element to annotate within.
26
+
@param messages The validation messages with element references. *)
27
+
val annotate :
28
+
config:annotation_config ->
29
+
root:Brr.El.t ->
30
+
browser_message list ->
31
+
unit
32
+
33
+
(** Annotate a single element with a message.
34
+
35
+
Lower-level function for custom annotation logic. *)
36
+
val annotate_element :
37
+
config:annotation_config ->
38
+
Brr.El.t ->
39
+
Htmlrw_check.message ->
40
+
unit
41
+
42
+
43
+
(** {1 Clearing Annotations} *)
44
+
45
+
(** Remove all annotations from a subtree.
46
+
47
+
This removes:
48
+
- All [data-html5rw-*] attributes
49
+
- All [html5rw-*] CSS classes
50
+
- All tooltip elements created by this module *)
51
+
val clear : Brr.El.t -> unit
52
+
53
+
(** Remove annotations from a single element (not descendants). *)
54
+
val clear_element : Brr.El.t -> unit
55
+
56
+
57
+
(** {1 Tooltips} *)
58
+
59
+
(** Tooltip state for an element. *)
60
+
type tooltip
61
+
62
+
(** Create a tooltip for an element.
63
+
64
+
The tooltip is not immediately visible; it appears on hover
65
+
if CSS is set up correctly, or can be shown programmatically.
66
+
67
+
@param position Where to position the tooltip.
68
+
@param el The element to attach the tooltip to.
69
+
@param messages All messages for this element (may be multiple). *)
70
+
val create_tooltip :
71
+
position:[ `Above | `Below | `Auto ] ->
72
+
Brr.El.t ->
73
+
Htmlrw_check.message list ->
74
+
tooltip
75
+
76
+
(** Show a tooltip immediately. *)
77
+
val show_tooltip : tooltip -> unit
78
+
79
+
(** Hide a tooltip. *)
80
+
val hide_tooltip : tooltip -> unit
81
+
82
+
(** Remove a tooltip from the DOM. *)
83
+
val remove_tooltip : tooltip -> unit
84
+
85
+
(** Get all tooltips created in a subtree. *)
86
+
val tooltips_in : Brr.El.t -> tooltip list
87
+
88
+
89
+
(** {1 Highlighting} *)
90
+
91
+
(** Highlight an element (for click-to-navigate in the panel).
92
+
93
+
Adds a temporary visual highlight and scrolls the element into view. *)
94
+
val highlight_element : Brr.El.t -> unit
95
+
96
+
(** Remove highlight from an element. *)
97
+
val unhighlight_element : Brr.El.t -> unit
98
+
99
+
(** Remove all highlights. *)
100
+
val clear_highlights : unit -> unit
101
+
102
+
103
+
(** {1 Data Attributes}
104
+
105
+
Constants for the data attributes used by annotation. *)
106
+
107
+
module Data_attr : sig
108
+
(** [data-html5rw-severity] - "error", "warning", or "info" *)
109
+
val severity : Jstr.t
110
+
111
+
(** [data-html5rw-message] - The warning message text *)
112
+
val message : Jstr.t
113
+
114
+
(** [data-html5rw-code] - The error code *)
115
+
val code : Jstr.t
116
+
117
+
(** [data-html5rw-count] - Number of warnings on this element *)
118
+
val count : Jstr.t
119
+
end
120
+
121
+
122
+
(** {1 CSS Classes}
123
+
124
+
Constants for the CSS classes used by annotation. *)
125
+
126
+
module Css_class : sig
127
+
(** [html5rw-error] - Element has at least one error *)
128
+
val error : Jstr.t
129
+
130
+
(** [html5rw-warning] - Element has warnings but no errors *)
131
+
val warning : Jstr.t
132
+
133
+
(** [html5rw-info] - Element has only info messages *)
134
+
val info : Jstr.t
135
+
136
+
(** [html5rw-has-issues] - Element has any validation messages *)
137
+
val has_issues : Jstr.t
138
+
139
+
(** [html5rw-highlighted] - Element is currently highlighted *)
140
+
val highlighted : Jstr.t
141
+
142
+
(** [html5rw-tooltip] - The tooltip container element *)
143
+
val tooltip : Jstr.t
144
+
145
+
(** [html5rw-tooltip-visible] - Tooltip is currently visible *)
146
+
val tooltip_visible : Jstr.t
147
+
end
148
+
149
+
150
+
(** {1 CSS Injection}
151
+
152
+
Optionally inject default styles for annotations. *)
153
+
154
+
(** Inject default CSS styles for annotations and tooltips.
155
+
156
+
Adds a [<style>] element to the document head with styles for:
157
+
- Annotation classes (outlines, backgrounds)
158
+
- Tooltip positioning and appearance
159
+
- Highlight animation
160
+
161
+
@param theme Light or dark theme. [`Auto] uses [prefers-color-scheme].
162
+
@return The injected style element (can be removed later). *)
163
+
val inject_default_styles : theme:[ `Light | `Dark | `Auto ] -> Brr.El.t
164
+
165
+
(** Remove the injected style element. *)
166
+
val remove_injected_styles : Brr.El.t -> unit
+220
lib/js/htmlrw_js_dom.ml
+220
lib/js/htmlrw_js_dom.ml
···
1
+
(*---------------------------------------------------------------------------
2
+
Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
3
+
SPDX-License-Identifier: MIT
4
+
---------------------------------------------------------------------------*)
5
+
6
+
open Brr
7
+
8
+
(* Helper to compare elements using JavaScript strict equality *)
9
+
let el_equal a b =
10
+
Jv.strict_equal (El.to_jv a) (El.to_jv b)
11
+
12
+
(* A location-keyed map for finding elements by line/column *)
13
+
module LocMap = Map.Make(struct
14
+
type t = int * int
15
+
let compare = compare
16
+
end)
17
+
18
+
type t = {
19
+
root : El.t;
20
+
html_source : string;
21
+
loc_to_el : El.t LocMap.t;
22
+
(* Mapping from (line, column) to browser elements *)
23
+
}
24
+
25
+
let outer_html el =
26
+
Jstr.to_string (Jv.get (El.to_jv el) "outerHTML" |> Jv.to_jstr)
27
+
28
+
let inner_html el =
29
+
Jstr.to_string (Jv.get (El.to_jv el) "innerHTML" |> Jv.to_jstr)
30
+
31
+
let iter_elements f root =
32
+
let rec walk el =
33
+
f el;
34
+
List.iter walk (El.children ~only_els:true el)
35
+
in
36
+
walk root
37
+
38
+
let fold_elements f acc root =
39
+
let rec walk acc el =
40
+
let acc = f acc el in
41
+
List.fold_left walk acc (El.children ~only_els:true el)
42
+
in
43
+
walk acc root
44
+
45
+
let filter_elements pred root =
46
+
fold_elements (fun acc el ->
47
+
if pred el then el :: acc else acc
48
+
) [] root |> List.rev
49
+
50
+
(* Build element map by walking browser DOM and parsed DOM in parallel *)
51
+
let create root =
52
+
let raw_html = outer_html root in
53
+
(* Prepend DOCTYPE if not present - outerHTML doesn't include it *)
54
+
let html =
55
+
let lower = String.lowercase_ascii raw_html in
56
+
if String.length lower >= 9 && String.sub lower 0 9 = "<!doctype" then
57
+
raw_html
58
+
else
59
+
"<!DOCTYPE html>" ^ raw_html
60
+
in
61
+
62
+
(* Parse the HTML to get a tree with locations *)
63
+
let reader = Bytesrw.Bytes.Reader.of_string html in
64
+
let parsed = Html5rw.parse ~collect_errors:false reader in
65
+
66
+
(* Walk both trees in parallel to build the mapping.
67
+
Browser elements are in document order, and so are Html5rw nodes. *)
68
+
let browser_elements = fold_elements (fun acc el -> el :: acc) [] root |> List.rev in
69
+
70
+
(* Extract elements from Html5rw DOM in document order *)
71
+
let rec extract_html5rw_elements acc node =
72
+
if Html5rw.is_element node then
73
+
let children = node.Html5rw.Dom.children in
74
+
let acc = node :: acc in
75
+
List.fold_left extract_html5rw_elements acc children
76
+
else
77
+
let children = node.Html5rw.Dom.children in
78
+
List.fold_left extract_html5rw_elements acc children
79
+
in
80
+
let html5rw_elements = extract_html5rw_elements [] (Html5rw.root parsed) |> List.rev in
81
+
82
+
(* Build the location map by matching elements *)
83
+
let loc_to_el =
84
+
(* Find the starting point in parsed elements that matches the root tag *)
85
+
let root_tag = String.lowercase_ascii (Jstr.to_string (El.tag_name root)) in
86
+
let rec find_start = function
87
+
| [] -> []
88
+
| h_el :: rest ->
89
+
if String.lowercase_ascii h_el.Html5rw.Dom.name = root_tag then
90
+
h_el :: rest
91
+
else
92
+
find_start rest
93
+
in
94
+
let html5rw_elements_aligned = find_start html5rw_elements in
95
+
96
+
let rec match_elements loc_map browser_els html5rw_els =
97
+
match browser_els, html5rw_els with
98
+
| [], _ | _, [] -> loc_map
99
+
| b_el :: b_rest, h_el :: h_rest ->
100
+
let b_tag = String.lowercase_ascii (Jstr.to_string (El.tag_name b_el)) in
101
+
let h_tag = String.lowercase_ascii h_el.Html5rw.Dom.name in
102
+
if b_tag = h_tag then
103
+
(* Tags match - record the mapping if we have a location *)
104
+
let loc_map =
105
+
match h_el.Html5rw.Dom.location with
106
+
| Some loc -> LocMap.add (loc.line, loc.column) b_el loc_map
107
+
| None -> loc_map
108
+
in
109
+
match_elements loc_map b_rest h_rest
110
+
else
111
+
(* Tags don't match - try skipping the parsed element first *)
112
+
(* This handles cases where parser creates implicit elements *)
113
+
match_elements loc_map browser_els h_rest
114
+
in
115
+
match_elements LocMap.empty browser_elements html5rw_elements_aligned
116
+
in
117
+
118
+
{ root; html_source = html; loc_to_el }, html
119
+
120
+
let find_by_location t ~line ~column =
121
+
LocMap.find_opt (line, column) t.loc_to_el
122
+
123
+
let find_by_location_and_tag t ~line ~column ~tag =
124
+
match LocMap.find_opt (line, column) t.loc_to_el with
125
+
| Some el when String.lowercase_ascii (Jstr.to_string (El.tag_name el)) =
126
+
String.lowercase_ascii tag ->
127
+
Some el
128
+
| _ -> None
129
+
130
+
let find_for_message t msg =
131
+
(* Try to find element by location first *)
132
+
match msg.Htmlrw_check.location with
133
+
| Some loc ->
134
+
(match msg.Htmlrw_check.element with
135
+
| Some tag -> find_by_location_and_tag t ~line:loc.line ~column:loc.column ~tag
136
+
| None -> find_by_location t ~line:loc.line ~column:loc.column)
137
+
| None ->
138
+
(* No location - try to find by element name if we have one *)
139
+
match msg.Htmlrw_check.element with
140
+
| Some tag ->
141
+
(* Find first element with this tag *)
142
+
let matches = filter_elements (fun el ->
143
+
String.lowercase_ascii (Jstr.to_string (El.tag_name el)) =
144
+
String.lowercase_ascii tag
145
+
) t.root in
146
+
(match matches with
147
+
| el :: _ -> Some el
148
+
| [] -> None)
149
+
| None -> None
150
+
151
+
let html_source t = t.html_source
152
+
153
+
let root_element t = t.root
154
+
155
+
let selector_path ?root el =
156
+
let stop_at = match root with
157
+
| Some r -> Some r
158
+
| None -> None
159
+
in
160
+
let rec build_path el acc =
161
+
(* Stop if we've reached the root *)
162
+
let should_stop = match stop_at with
163
+
| Some r -> el_equal el r
164
+
| None -> String.lowercase_ascii (Jstr.to_string (El.tag_name el)) = "body"
165
+
in
166
+
if should_stop then
167
+
acc
168
+
else
169
+
let tag = String.lowercase_ascii (Jstr.to_string (El.tag_name el)) in
170
+
let segment =
171
+
match El.parent el with
172
+
| None -> tag
173
+
| Some parent ->
174
+
let siblings = El.children ~only_els:true parent in
175
+
let same_tag = List.filter (fun sib ->
176
+
String.lowercase_ascii (Jstr.to_string (El.tag_name sib)) = tag
177
+
) siblings in
178
+
if List.length same_tag <= 1 then
179
+
tag
180
+
else
181
+
let idx =
182
+
let rec find_idx i = function
183
+
| [] -> 1
184
+
| sib :: rest ->
185
+
if el_equal sib el then i
186
+
else find_idx (i + 1) rest
187
+
in
188
+
find_idx 1 same_tag
189
+
in
190
+
Printf.sprintf "%s:nth-of-type(%d)" tag idx
191
+
in
192
+
let new_acc = segment :: acc in
193
+
match El.parent el with
194
+
| None -> new_acc
195
+
| Some parent -> build_path parent new_acc
196
+
in
197
+
String.concat " > " (build_path el [])
198
+
199
+
let short_selector ?root el =
200
+
(* Try ID first *)
201
+
match El.at (Jstr.v "id") el with
202
+
| Some id when not (Jstr.is_empty id) ->
203
+
"#" ^ Jstr.to_string id
204
+
| _ ->
205
+
(* Try parent ID + short path *)
206
+
let rec find_id_ancestor el depth =
207
+
if depth > 3 then None
208
+
else match El.parent el with
209
+
| None -> None
210
+
| Some parent ->
211
+
match El.at (Jstr.v "id") parent with
212
+
| Some id when not (Jstr.is_empty id) -> Some (parent, id)
213
+
| _ -> find_id_ancestor parent (depth + 1)
214
+
in
215
+
match find_id_ancestor el 0 with
216
+
| Some (ancestor, id) ->
217
+
let path = selector_path ~root:ancestor el in
218
+
"#" ^ Jstr.to_string id ^ " > " ^ path
219
+
| None ->
220
+
selector_path ?root el
+111
lib/js/htmlrw_js_dom.mli
+111
lib/js/htmlrw_js_dom.mli
···
1
+
(*---------------------------------------------------------------------------
2
+
Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
3
+
SPDX-License-Identifier: MIT
4
+
---------------------------------------------------------------------------*)
5
+
6
+
(** Browser DOM utilities for mapping validation results to live elements.
7
+
8
+
This module bridges the gap between HTML string validation (which produces
9
+
line/column locations) and live DOM manipulation (which needs element
10
+
references). It builds mappings between source positions and DOM elements
11
+
by walking both the serialized HTML and the DOM tree in parallel. *)
12
+
13
+
14
+
(** {1 Element Mapping}
15
+
16
+
When we validate [element.outerHTML], we get messages with line/column
17
+
positions. To annotate the original DOM, we need to map those positions
18
+
back to the live elements. *)
19
+
20
+
(** An element map associates source locations with DOM elements. *)
21
+
type t
22
+
23
+
(** Build an element map by walking a DOM element and its serialization.
24
+
25
+
This function:
26
+
1. Serializes the element to HTML via [outerHTML]
27
+
2. Parses that HTML with Html5rw to get the parse tree with locations
28
+
3. Walks both trees in parallel to build a bidirectional mapping
29
+
30
+
@param root The DOM element to map.
31
+
@return The element map and the HTML source string. *)
32
+
val create : Brr.El.t -> t * string
33
+
34
+
(** Find the DOM element corresponding to a source location.
35
+
36
+
@param line 1-indexed line number
37
+
@param column 1-indexed column number
38
+
@return The element at or containing that position, or [None]. *)
39
+
val find_by_location : t -> line:int -> column:int -> Brr.El.t option
40
+
41
+
(** Find the DOM element corresponding to an element name at a location.
42
+
43
+
More precise than {!find_by_location} when the validator provides
44
+
the element name along with the location.
45
+
46
+
@param line 1-indexed line number
47
+
@param column 1-indexed column number
48
+
@param tag Element tag name (lowercase)
49
+
@return The matching element, or [None]. *)
50
+
val find_by_location_and_tag :
51
+
t -> line:int -> column:int -> tag:string -> Brr.El.t option
52
+
53
+
(** Find the DOM element for a validation message.
54
+
55
+
Uses the message's location and element fields to find the best match.
56
+
This is the primary function used by the annotation system. *)
57
+
val find_for_message : t -> Htmlrw_check.message -> Brr.El.t option
58
+
59
+
(** The HTML source string that was used to build this map. *)
60
+
val html_source : t -> string
61
+
62
+
(** The root element this map was built from. *)
63
+
val root_element : t -> Brr.El.t
64
+
65
+
66
+
(** {1 CSS Selector Generation} *)
67
+
68
+
(** Build a CSS selector path that uniquely identifies an element.
69
+
70
+
The selector uses child combinators and [:nth-child] to be specific:
71
+
["body > div.main:nth-child(2) > p > img:nth-child(1)"]
72
+
73
+
@param root Optional root element; selector will be relative to this.
74
+
Defaults to [document.body].
75
+
@param el The element to build a selector for.
76
+
@return A CSS selector string. *)
77
+
val selector_path : ?root:Brr.El.t -> Brr.El.t -> string
78
+
79
+
(** Build a shorter selector using IDs and classes when available.
80
+
81
+
Tries to find the shortest unique selector:
82
+
1. If element has an ID: ["#myId"]
83
+
2. If parent has ID: ["#parentId > .myClass"]
84
+
3. Falls back to full path from {!selector_path}
85
+
86
+
@param root Optional root element.
87
+
@param el The element to build a selector for. *)
88
+
val short_selector : ?root:Brr.El.t -> Brr.El.t -> string
89
+
90
+
91
+
(** {1 DOM Iteration} *)
92
+
93
+
(** Iterate over all elements in document order (depth-first pre-order). *)
94
+
val iter_elements : (Brr.El.t -> unit) -> Brr.El.t -> unit
95
+
96
+
(** Fold over all elements in document order. *)
97
+
val fold_elements : ('a -> Brr.El.t -> 'a) -> 'a -> Brr.El.t -> 'a
98
+
99
+
(** Find all elements matching a predicate. *)
100
+
val filter_elements : (Brr.El.t -> bool) -> Brr.El.t -> Brr.El.t list
101
+
102
+
103
+
(** {1 Serialization} *)
104
+
105
+
(** Get the outer HTML of an element.
106
+
107
+
This is a wrapper around the browser's [outerHTML] property. *)
108
+
val outer_html : Brr.El.t -> string
109
+
110
+
(** Get the inner HTML of an element. *)
111
+
val inner_html : Brr.El.t -> string
+9
lib/js/htmlrw_js_main.ml
+9
lib/js/htmlrw_js_main.ml
···
1
+
(*---------------------------------------------------------------------------
2
+
Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
3
+
SPDX-License-Identifier: MIT
4
+
---------------------------------------------------------------------------*)
5
+
6
+
(* Entry point for the standalone JavaScript build.
7
+
This registers the API on window.html5rw when the script loads. *)
8
+
9
+
let () = Htmlrw_js.register_global_api ()
+56
lib/js/htmlrw_js_main.mli
+56
lib/js/htmlrw_js_main.mli
···
1
+
(*---------------------------------------------------------------------------
2
+
Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
3
+
SPDX-License-Identifier: MIT
4
+
---------------------------------------------------------------------------*)
5
+
6
+
(** Entry point for the standalone JavaScript build.
7
+
8
+
This module is compiled to [htmlrw.js] and automatically registers
9
+
the validation API on [window.html5rw] when loaded.
10
+
11
+
{2 Browser Usage}
12
+
13
+
{v
14
+
<script src="htmlrw.js"></script>
15
+
<script>
16
+
// API is available immediately after loading
17
+
const result = html5rw.validateElement(document.body);
18
+
19
+
if (result.errorCount > 0) {
20
+
console.log("Found", result.errorCount, "errors");
21
+
22
+
// Show the warning panel
23
+
html5rw.showPanel(result);
24
+
}
25
+
</script>
26
+
v}
27
+
28
+
{2 Module Bundler Usage}
29
+
30
+
If using a bundler that supports CommonJS or ES modules, you can
31
+
import the module instead:
32
+
33
+
{v
34
+
import { validateElement, showPanel } from './htmlrw.js';
35
+
36
+
const result = validateElement(document.body);
37
+
if (result.hasErrors) {
38
+
showPanel(result);
39
+
}
40
+
v}
41
+
42
+
The module exports are set up to work with both import styles.
43
+
44
+
{2 API Reference}
45
+
46
+
See {!Htmlrw_js} for the full API documentation. The JavaScript API
47
+
mirrors the OCaml API with camelCase naming:
48
+
49
+
- [html5rw.validateString(html)] - Validate an HTML string
50
+
- [html5rw.validateElement(el)] - Validate a DOM element
51
+
- [html5rw.validateAndAnnotate(el, config?)] - Validate and annotate
52
+
- [html5rw.showPanel(result, config?)] - Show the warning panel
53
+
- [html5rw.hidePanel()] - Hide the warning panel
54
+
- [html5rw.clearAnnotations(el)] - Clear annotations from an element *)
55
+
56
+
(* This module has no values; its side effect is registering the API *)
+407
lib/js/htmlrw_js_tests.ml
+407
lib/js/htmlrw_js_tests.ml
···
1
+
(*---------------------------------------------------------------------------
2
+
Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
3
+
SPDX-License-Identifier: ISC
4
+
---------------------------------------------------------------------------*)
5
+
6
+
(** Browser-compatible test runner for html5rw regression tests.
7
+
8
+
This module provides functions to run html5lib conformance tests
9
+
in the browser, receiving test data as strings and returning results
10
+
as JavaScript-accessible objects. *)
11
+
12
+
[@@@warning "-69-33"] (* Silence unused-field and unused-open warnings *)
13
+
14
+
open Brr
15
+
16
+
(* ============================================================ *)
17
+
(* Test Result Types *)
18
+
(* ============================================================ *)
19
+
20
+
type test_result = {
21
+
test_num : int;
22
+
description : string;
23
+
input : string;
24
+
expected : string;
25
+
actual : string;
26
+
success : bool;
27
+
}
28
+
29
+
type file_result = {
30
+
filename : string;
31
+
test_type : string;
32
+
passed_count : int;
33
+
failed_count : int;
34
+
tests : test_result list;
35
+
}
36
+
37
+
type suite_result = {
38
+
name : string;
39
+
total_passed : int;
40
+
total_failed : int;
41
+
files : file_result list;
42
+
}
43
+
44
+
(* ============================================================ *)
45
+
(* Tree Construction Tests *)
46
+
(* ============================================================ *)
47
+
48
+
module TreeConstruction = struct
49
+
type test_case = {
50
+
input : string;
51
+
expected_tree : string;
52
+
expected_errors : string list;
53
+
script_on : bool;
54
+
fragment_context : string option;
55
+
}
56
+
57
+
let parse_test_case lines =
58
+
let rec parse acc = function
59
+
| [] -> acc
60
+
| line :: rest when String.length line > 0 && line.[0] = '#' ->
61
+
let section = String.trim line in
62
+
let content, remaining = collect_section rest in
63
+
parse ((section, content) :: acc) remaining
64
+
| _ :: rest -> parse acc rest
65
+
and collect_section lines =
66
+
let rec loop acc = function
67
+
| [] -> (List.rev acc, [])
68
+
| line :: rest when String.length line > 0 && line.[0] = '#' ->
69
+
(List.rev acc, line :: rest)
70
+
| line :: rest -> loop (line :: acc) rest
71
+
in
72
+
loop [] lines
73
+
in
74
+
let sections = parse [] lines in
75
+
let get_section name =
76
+
match List.assoc_opt name sections with
77
+
| Some lines -> String.concat "\n" lines
78
+
| None -> ""
79
+
in
80
+
let data = get_section "#data" in
81
+
let document = get_section "#document" in
82
+
let errors_text = get_section "#errors" in
83
+
let errors =
84
+
String.split_on_char '\n' errors_text
85
+
|> List.filter (fun s -> String.trim s <> "")
86
+
in
87
+
let script_on = List.mem_assoc "#script-on" sections in
88
+
let fragment =
89
+
if List.mem_assoc "#document-fragment" sections then
90
+
Some (get_section "#document-fragment" |> String.trim)
91
+
else None
92
+
in
93
+
{ input = data; expected_tree = document; expected_errors = errors;
94
+
script_on; fragment_context = fragment }
95
+
96
+
let parse_dat_content content =
97
+
let lines = String.split_on_char '\n' content in
98
+
let rec split_tests current acc = function
99
+
| [] ->
100
+
if current = [] then List.rev acc
101
+
else List.rev (List.rev current :: acc)
102
+
| "" :: "#data" :: rest ->
103
+
let new_acc = if current = [] then acc else (List.rev current :: acc) in
104
+
split_tests ["#data"] new_acc rest
105
+
| line :: rest ->
106
+
split_tests (line :: current) acc rest
107
+
in
108
+
let test_groups = split_tests [] [] lines in
109
+
List.filter_map (fun lines ->
110
+
if List.exists (fun l -> l = "#data") lines then
111
+
Some (parse_test_case lines)
112
+
else None
113
+
) test_groups
114
+
115
+
let strip_tree_prefix s =
116
+
let lines = String.split_on_char '\n' s in
117
+
let stripped = List.filter_map (fun line ->
118
+
if String.length line >= 2 && String.sub line 0 2 = "| " then
119
+
Some (String.sub line 2 (String.length line - 2))
120
+
else if String.trim line = "" then None
121
+
else Some line
122
+
) lines in
123
+
String.concat "\n" stripped
124
+
125
+
let normalize_tree s =
126
+
let lines = String.split_on_char '\n' s in
127
+
let non_empty = List.filter (fun l -> String.trim l <> "") lines in
128
+
String.concat "\n" non_empty
129
+
130
+
let run_test test =
131
+
try
132
+
let result =
133
+
match test.fragment_context with
134
+
| Some ctx_str ->
135
+
let (namespace, tag_name) =
136
+
match String.split_on_char ' ' ctx_str with
137
+
| [ns; tag] when ns = "svg" -> (Some "svg", tag)
138
+
| [ns; tag] when ns = "math" -> (Some "mathml", tag)
139
+
| [tag] -> (None, tag)
140
+
| _ -> (None, ctx_str)
141
+
in
142
+
let context = Html5rw.Parser.make_fragment_context ~tag_name ~namespace () in
143
+
let reader = Bytesrw.Bytes.Reader.of_string test.input in
144
+
Html5rw.Parser.parse ~collect_errors:true ~fragment_context:context reader
145
+
| None ->
146
+
let reader = Bytesrw.Bytes.Reader.of_string test.input in
147
+
Html5rw.Parser.parse ~collect_errors:true reader
148
+
in
149
+
let actual_tree = Html5rw.Dom.to_test_format (Html5rw.Parser.root result) in
150
+
let expected = normalize_tree (strip_tree_prefix test.expected_tree) in
151
+
let actual = normalize_tree (strip_tree_prefix actual_tree) in
152
+
(expected = actual, expected, actual)
153
+
with e ->
154
+
let expected = normalize_tree (strip_tree_prefix test.expected_tree) in
155
+
(false, expected, Printf.sprintf "EXCEPTION: %s" (Printexc.to_string e))
156
+
157
+
let run_content ~filename content =
158
+
let tests = parse_dat_content content in
159
+
let passed = ref 0 in
160
+
let failed = ref 0 in
161
+
let results = ref [] in
162
+
List.iteri (fun i test ->
163
+
if test.script_on then ()
164
+
else begin
165
+
let (success, expected, actual) = run_test test in
166
+
let description =
167
+
let input_preview = String.sub test.input 0 (min 60 (String.length test.input)) in
168
+
if test.fragment_context <> None then
169
+
Printf.sprintf "Fragment (%s): %s" (Option.get test.fragment_context) input_preview
170
+
else input_preview
171
+
in
172
+
let result = {
173
+
test_num = i + 1; description; input = test.input; expected; actual; success;
174
+
} in
175
+
results := result :: !results;
176
+
if success then incr passed else incr failed
177
+
end
178
+
) tests;
179
+
{
180
+
filename; test_type = "Tree Construction";
181
+
passed_count = !passed; failed_count = !failed;
182
+
tests = List.rev !results;
183
+
}
184
+
end
185
+
186
+
(* ============================================================ *)
187
+
(* Encoding Tests *)
188
+
(* ============================================================ *)
189
+
190
+
module EncodingTests = struct
191
+
type test_case = {
192
+
input : string;
193
+
expected_encoding : string;
194
+
}
195
+
196
+
let normalize_encoding_name s = String.lowercase_ascii (String.trim s)
197
+
198
+
let encoding_to_test_name = function
199
+
| Html5rw.Encoding.Utf8 -> "utf-8"
200
+
| Html5rw.Encoding.Utf16le -> "utf-16le"
201
+
| Html5rw.Encoding.Utf16be -> "utf-16be"
202
+
| Html5rw.Encoding.Windows_1252 -> "windows-1252"
203
+
| Html5rw.Encoding.Iso_8859_2 -> "iso-8859-2"
204
+
| Html5rw.Encoding.Euc_jp -> "euc-jp"
205
+
206
+
let parse_test_case lines =
207
+
let rec parse acc = function
208
+
| [] -> acc
209
+
| line :: rest when String.length line > 0 && line.[0] = '#' ->
210
+
let section = String.trim line in
211
+
let content, remaining = collect_section rest in
212
+
parse ((section, content) :: acc) remaining
213
+
| _ :: rest -> parse acc rest
214
+
and collect_section lines =
215
+
let rec loop acc = function
216
+
| [] -> (List.rev acc, [])
217
+
| line :: rest when String.length line > 0 && line.[0] = '#' ->
218
+
(List.rev acc, line :: rest)
219
+
| line :: rest -> loop (line :: acc) rest
220
+
in loop [] lines
221
+
in
222
+
let sections = parse [] lines in
223
+
let get_section name =
224
+
match List.assoc_opt name sections with
225
+
| Some lines -> String.concat "\n" lines | None -> ""
226
+
in
227
+
let data = get_section "#data" in
228
+
let encoding = get_section "#encoding" in
229
+
{ input = data; expected_encoding = String.trim encoding }
230
+
231
+
let parse_dat_content content =
232
+
let lines = String.split_on_char '\n' content in
233
+
let rec split_tests current acc = function
234
+
| [] -> if current = [] then List.rev acc else List.rev (List.rev current :: acc)
235
+
| "" :: "#data" :: rest ->
236
+
let new_acc = if current = [] then acc else (List.rev current :: acc) in
237
+
split_tests ["#data"] new_acc rest
238
+
| line :: rest -> split_tests (line :: current) acc rest
239
+
in
240
+
let test_groups = split_tests [] [] lines in
241
+
List.filter_map (fun lines ->
242
+
if List.exists (fun l -> l = "#data") lines then Some (parse_test_case lines)
243
+
else None
244
+
) test_groups
245
+
246
+
let run_test test =
247
+
try
248
+
let (_, detected_encoding) = Html5rw.Encoding.decode (Bytes.of_string test.input) () in
249
+
let detected_name = encoding_to_test_name detected_encoding in
250
+
let expected_name = normalize_encoding_name test.expected_encoding in
251
+
let match_encoding det exp =
252
+
det = exp ||
253
+
(det = "windows-1252" && (exp = "windows-1252" || exp = "cp1252" || exp = "iso-8859-1")) ||
254
+
(det = "iso-8859-2" && (exp = "iso-8859-2" || exp = "iso8859-2" || exp = "latin2")) ||
255
+
(det = "utf-8" && (exp = "utf-8" || exp = "utf8")) ||
256
+
(det = "euc-jp" && (exp = "euc-jp" || exp = "eucjp"))
257
+
in
258
+
(match_encoding detected_name expected_name, detected_name, expected_name)
259
+
with e ->
260
+
(false, Printf.sprintf "EXCEPTION: %s" (Printexc.to_string e), test.expected_encoding)
261
+
262
+
let run_content ~filename content =
263
+
let tests = parse_dat_content content in
264
+
let passed = ref 0 in
265
+
let failed = ref 0 in
266
+
let results = ref [] in
267
+
List.iteri (fun i test ->
268
+
if String.trim test.expected_encoding = "" then ()
269
+
else begin
270
+
let (success, detected, expected) = run_test test in
271
+
let result = {
272
+
test_num = i + 1;
273
+
description = Printf.sprintf "Detect %s encoding" expected;
274
+
input = String.escaped test.input;
275
+
expected; actual = detected; success;
276
+
} in
277
+
results := result :: !results;
278
+
if success then incr passed else incr failed
279
+
end
280
+
) tests;
281
+
{
282
+
filename; test_type = "Encoding Detection";
283
+
passed_count = !passed; failed_count = !failed;
284
+
tests = List.rev !results;
285
+
}
286
+
end
287
+
288
+
(* ============================================================ *)
289
+
(* JavaScript API *)
290
+
(* ============================================================ *)
291
+
292
+
let test_result_to_jv (r : test_result) =
293
+
Jv.obj [|
294
+
"testNum", Jv.of_int r.test_num;
295
+
"description", Jv.of_string r.description;
296
+
"input", Jv.of_string r.input;
297
+
"expected", Jv.of_string r.expected;
298
+
"actual", Jv.of_string r.actual;
299
+
"success", Jv.of_bool r.success;
300
+
|]
301
+
302
+
let file_result_to_jv (r : file_result) =
303
+
Jv.obj [|
304
+
"filename", Jv.of_string r.filename;
305
+
"testType", Jv.of_string r.test_type;
306
+
"passedCount", Jv.of_int r.passed_count;
307
+
"failedCount", Jv.of_int r.failed_count;
308
+
"tests", Jv.of_list test_result_to_jv r.tests;
309
+
|]
310
+
311
+
let suite_result_to_jv (r : suite_result) =
312
+
Jv.obj [|
313
+
"name", Jv.of_string r.name;
314
+
"totalPassed", Jv.of_int r.total_passed;
315
+
"totalFailed", Jv.of_int r.total_failed;
316
+
"files", Jv.of_list file_result_to_jv r.files;
317
+
|]
318
+
319
+
(** Run tree construction tests on a single file's content *)
320
+
let run_tree_construction_test filename content =
321
+
let result = TreeConstruction.run_content ~filename content in
322
+
file_result_to_jv result
323
+
324
+
(** Run encoding detection tests on a single file's content *)
325
+
let run_encoding_test filename content =
326
+
let result = EncodingTests.run_content ~filename content in
327
+
file_result_to_jv result
328
+
329
+
(** Run all tests from provided test data *)
330
+
let run_all_tests (test_files : (string * string * string) list) =
331
+
let tree_files = ref [] in
332
+
let encoding_files = ref [] in
333
+
let total_passed = ref 0 in
334
+
let total_failed = ref 0 in
335
+
336
+
List.iter (fun (test_type, filename, content) ->
337
+
let result = match test_type with
338
+
| "tree-construction" ->
339
+
let r = TreeConstruction.run_content ~filename content in
340
+
tree_files := r :: !tree_files;
341
+
r
342
+
| "encoding" ->
343
+
let r = EncodingTests.run_content ~filename content in
344
+
encoding_files := r :: !encoding_files;
345
+
r
346
+
| _ -> failwith ("Unknown test type: " ^ test_type)
347
+
in
348
+
total_passed := !total_passed + result.passed_count;
349
+
total_failed := !total_failed + result.failed_count
350
+
) test_files;
351
+
352
+
let all_files = List.rev !tree_files @ List.rev !encoding_files in
353
+
let suite = {
354
+
name = "HTML5lib Regression Tests";
355
+
total_passed = !total_passed;
356
+
total_failed = !total_failed;
357
+
files = all_files;
358
+
} in
359
+
suite_result_to_jv suite
360
+
361
+
(* ============================================================ *)
362
+
(* Simple Parser Test for Quick Validation *)
363
+
(* ============================================================ *)
364
+
365
+
let quick_parse_test html =
366
+
try
367
+
let reader = Bytesrw.Bytes.Reader.of_string html in
368
+
let result = Html5rw.Parser.parse ~collect_errors:true reader in
369
+
let root = Html5rw.Parser.root result in
370
+
let serialized = Html5rw.Dom.to_html root in
371
+
let errors = Html5rw.Parser.errors result in
372
+
let error_to_string e = Format.asprintf "%a" Html5rw.pp_parse_error e in
373
+
Jv.obj [|
374
+
"success", Jv.of_bool true;
375
+
"html", Jv.of_string serialized;
376
+
"errorCount", Jv.of_int (List.length errors);
377
+
"errors", Jv.of_list (fun e -> Jv.of_string (error_to_string e)) errors;
378
+
|]
379
+
with e ->
380
+
Jv.obj [|
381
+
"success", Jv.of_bool false;
382
+
"error", Jv.of_string (Printexc.to_string e);
383
+
|]
384
+
385
+
(* ============================================================ *)
386
+
(* Export to JavaScript *)
387
+
(* ============================================================ *)
388
+
389
+
let () =
390
+
let html5rw_tests = Jv.obj [|
391
+
"runTreeConstructionTest", Jv.callback ~arity:2 (fun filename content ->
392
+
run_tree_construction_test (Jv.to_string filename) (Jv.to_string content));
393
+
"runEncodingTest", Jv.callback ~arity:2 (fun filename content ->
394
+
run_encoding_test (Jv.to_string filename) (Jv.to_string content));
395
+
"runAllTests", Jv.callback ~arity:1 (fun files_jv ->
396
+
let files = Jv.to_list (fun item ->
397
+
let test_type = Jv.to_string (Jv.get item "type") in
398
+
let filename = Jv.to_string (Jv.get item "filename") in
399
+
let content = Jv.to_string (Jv.get item "content") in
400
+
(test_type, filename, content)
401
+
) files_jv in
402
+
run_all_tests files);
403
+
"quickParseTest", Jv.callback ~arity:1 (fun html ->
404
+
quick_parse_test (Jv.to_string html));
405
+
"version", Jv.of_string "1.0.0";
406
+
|] in
407
+
Jv.set Jv.global "html5rwTests" html5rw_tests
+25
lib/js/htmlrw_js_tests.mli
+25
lib/js/htmlrw_js_tests.mli
···
1
+
(*---------------------------------------------------------------------------
2
+
Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
3
+
SPDX-License-Identifier: ISC
4
+
---------------------------------------------------------------------------*)
5
+
6
+
(** Browser-compatible test runner for html5rw regression tests.
7
+
8
+
This module provides functions to run html5lib conformance tests
9
+
in the browser. Test data is provided as strings and results are
10
+
returned as JavaScript-accessible objects.
11
+
12
+
{2 JavaScript API}
13
+
14
+
The following functions are exposed to JavaScript via the global
15
+
[html5rwTests] object:
16
+
17
+
- [html5rwTests.runTreeConstructionTest(filename, content)] - Run tree
18
+
construction tests from a .dat file content
19
+
- [html5rwTests.runEncodingTest(filename, content)] - Run encoding
20
+
detection tests from a .dat file content
21
+
- [html5rwTests.runAllTests(files)] - Run all tests from an array of
22
+
file objects with {type, filename, content}
23
+
- [html5rwTests.quickParseTest(html)] - Quick parse test for a single
24
+
HTML string
25
+
- [html5rwTests.version] - Version string *)
+10
lib/js/htmlrw_js_tests_main.ml
+10
lib/js/htmlrw_js_tests_main.ml
···
1
+
(*---------------------------------------------------------------------------
2
+
Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
3
+
SPDX-License-Identifier: ISC
4
+
---------------------------------------------------------------------------*)
5
+
6
+
(** Entry point for the browser test runner.
7
+
8
+
The test runner module registers its JavaScript exports when loaded. *)
9
+
10
+
(* Nothing needed here - the Htmlrw_js_tests module registers exports at load time *)
+6
lib/js/htmlrw_js_tests_main.mli
+6
lib/js/htmlrw_js_tests_main.mli
···
1
+
(*---------------------------------------------------------------------------
2
+
Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
3
+
SPDX-License-Identifier: ISC
4
+
---------------------------------------------------------------------------*)
5
+
6
+
(** Entry point for the browser test runner. *)
+172
lib/js/htmlrw_js_types.ml
+172
lib/js/htmlrw_js_types.ml
···
1
+
(*---------------------------------------------------------------------------
2
+
Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
3
+
SPDX-License-Identifier: MIT
4
+
---------------------------------------------------------------------------*)
5
+
6
+
open Brr
7
+
8
+
(* Helper to compare elements using JavaScript strict equality *)
9
+
let el_equal a b =
10
+
Jv.strict_equal (El.to_jv a) (El.to_jv b)
11
+
12
+
type element_ref = {
13
+
element : El.t option;
14
+
selector : string;
15
+
}
16
+
17
+
type browser_message = {
18
+
message : Htmlrw_check.message;
19
+
element_ref : element_ref option;
20
+
}
21
+
22
+
type result = {
23
+
messages : browser_message list;
24
+
core_result : Htmlrw_check.t;
25
+
source_element : El.t option;
26
+
}
27
+
28
+
type annotation_config = {
29
+
add_data_attrs : bool;
30
+
add_classes : bool;
31
+
show_tooltips : bool;
32
+
tooltip_position : [ `Above | `Below | `Auto ];
33
+
highlight_on_hover : bool;
34
+
}
35
+
36
+
let default_annotation_config = {
37
+
add_data_attrs = true;
38
+
add_classes = true;
39
+
show_tooltips = true;
40
+
tooltip_position = `Auto;
41
+
highlight_on_hover = true;
42
+
}
43
+
44
+
type panel_config = {
45
+
initial_position : [ `TopRight | `TopLeft | `BottomRight | `BottomLeft | `Custom of int * int ];
46
+
draggable : bool;
47
+
resizable : bool;
48
+
collapsible : bool;
49
+
start_collapsed : bool;
50
+
max_height : int option;
51
+
group_by_severity : bool;
52
+
click_to_highlight : bool;
53
+
show_selector_path : bool;
54
+
theme : [ `Light | `Dark | `Auto ];
55
+
}
56
+
57
+
let default_panel_config = {
58
+
initial_position = `TopRight;
59
+
draggable = true;
60
+
resizable = true;
61
+
collapsible = true;
62
+
start_collapsed = false;
63
+
max_height = Some 400;
64
+
group_by_severity = true;
65
+
click_to_highlight = true;
66
+
show_selector_path = true;
67
+
theme = `Auto;
68
+
}
69
+
70
+
let selector_of_element el =
71
+
let rec build_path el acc =
72
+
let tag = Jstr.to_string (El.tag_name el) in
73
+
let id = El.at (Jstr.v "id") el in
74
+
let segment =
75
+
match id with
76
+
| Some id_val when not (Jstr.is_empty id_val) ->
77
+
(* If element has an ID, use it directly *)
78
+
"#" ^ Jstr.to_string id_val
79
+
| _ ->
80
+
(* Otherwise use tag name with nth-child if needed *)
81
+
match El.parent el with
82
+
| None -> tag
83
+
| Some parent ->
84
+
let siblings = El.children ~only_els:true parent in
85
+
let same_tag = List.filter (fun sib ->
86
+
String.lowercase_ascii (Jstr.to_string (El.tag_name sib)) =
87
+
String.lowercase_ascii tag
88
+
) siblings in
89
+
if List.length same_tag <= 1 then
90
+
tag
91
+
else
92
+
let idx =
93
+
let rec find_idx i = function
94
+
| [] -> 1
95
+
| sib :: rest ->
96
+
if el_equal sib el then i
97
+
else find_idx (i + 1) rest
98
+
in
99
+
find_idx 1 same_tag
100
+
in
101
+
Printf.sprintf "%s:nth-of-type(%d)" tag idx
102
+
in
103
+
let new_acc = segment :: acc in
104
+
(* Stop if we hit an ID (absolute reference) or no parent *)
105
+
if String.length segment > 0 && segment.[0] = '#' then
106
+
new_acc
107
+
else
108
+
match El.parent el with
109
+
| None -> new_acc
110
+
| Some parent ->
111
+
if String.lowercase_ascii (Jstr.to_string (El.tag_name parent)) = "html" then
112
+
new_acc
113
+
else
114
+
build_path parent new_acc
115
+
in
116
+
String.concat " > " (build_path el [])
117
+
118
+
let browser_message_to_jv bm =
119
+
let msg = bm.message in
120
+
let obj = Jv.obj [||] in
121
+
Jv.set obj "severity" (Jv.of_string (Htmlrw_check.severity_to_string msg.severity));
122
+
Jv.set obj "message" (Jv.of_string msg.text);
123
+
Jv.set obj "errorCode" (Jv.of_string (Htmlrw_check.error_code_to_string msg.error_code));
124
+
(match msg.element with
125
+
| Some el -> Jv.set obj "elementName" (Jv.of_string el)
126
+
| None -> ());
127
+
(match msg.attribute with
128
+
| Some attr -> Jv.set obj "attribute" (Jv.of_string attr)
129
+
| None -> ());
130
+
(match msg.location with
131
+
| Some loc ->
132
+
Jv.set obj "line" (Jv.of_int loc.line);
133
+
Jv.set obj "column" (Jv.of_int loc.column)
134
+
| None -> ());
135
+
(match bm.element_ref with
136
+
| Some ref ->
137
+
Jv.set obj "selector" (Jv.of_string ref.selector);
138
+
(match ref.element with
139
+
| Some el -> Jv.set obj "element" (El.to_jv el)
140
+
| None -> ())
141
+
| None -> ());
142
+
obj
143
+
144
+
let result_to_jv result =
145
+
let warnings_arr =
146
+
Jv.of_list browser_message_to_jv result.messages
147
+
in
148
+
let error_count =
149
+
List.length (List.filter (fun bm ->
150
+
bm.message.severity = Htmlrw_check.Error
151
+
) result.messages)
152
+
in
153
+
let warning_count =
154
+
List.length (List.filter (fun bm ->
155
+
bm.message.severity = Htmlrw_check.Warning
156
+
) result.messages)
157
+
in
158
+
let info_count =
159
+
List.length (List.filter (fun bm ->
160
+
bm.message.severity = Htmlrw_check.Info
161
+
) result.messages)
162
+
in
163
+
let obj = Jv.obj [||] in
164
+
Jv.set obj "warnings" warnings_arr;
165
+
Jv.set obj "errorCount" (Jv.of_int error_count);
166
+
Jv.set obj "warningCount" (Jv.of_int warning_count);
167
+
Jv.set obj "infoCount" (Jv.of_int info_count);
168
+
Jv.set obj "hasErrors" (Jv.of_bool (error_count > 0));
169
+
(match result.source_element with
170
+
| Some el -> Jv.set obj "sourceElement" (El.to_jv el)
171
+
| None -> ());
172
+
obj
+125
lib/js/htmlrw_js_types.mli
+125
lib/js/htmlrw_js_types.mli
···
1
+
(*---------------------------------------------------------------------------
2
+
Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
3
+
SPDX-License-Identifier: MIT
4
+
---------------------------------------------------------------------------*)
5
+
6
+
(** Browser-specific types for HTML5rw JavaScript validation.
7
+
8
+
Core validation types ({!Htmlrw_check.severity}, {!Htmlrw_check.message}, etc.)
9
+
are reused from the main library. This module adds only the browser-specific
10
+
types needed for DOM element references, annotation, and UI. *)
11
+
12
+
13
+
(** {1 Element References}
14
+
15
+
Since we validate HTML strings but want to annotate live DOM elements,
16
+
we need to map validation messages back to browser elements. *)
17
+
18
+
(** A reference to a DOM element, providing both programmatic access
19
+
and a serializable CSS selector. *)
20
+
type element_ref = {
21
+
element : Brr.El.t option;
22
+
(** The live DOM element, if still attached to the document.
23
+
May be [None] if validation was performed on a raw HTML string
24
+
without a source element. *)
25
+
26
+
selector : string;
27
+
(** A CSS selector path that uniquely identifies this element.
28
+
Format: ["body > div.container > p:nth-child(3) > img"]
29
+
Useful for logging and re-finding elements. *)
30
+
}
31
+
32
+
(** A validation message paired with its DOM element reference. *)
33
+
type browser_message = {
34
+
message : Htmlrw_check.message;
35
+
(** The core validation message with severity, text, error code, etc. *)
36
+
37
+
element_ref : element_ref option;
38
+
(** Reference to the problematic DOM element, if identifiable.
39
+
[None] for document-level issues like missing DOCTYPE. *)
40
+
}
41
+
42
+
(** Browser validation result. *)
43
+
type result = {
44
+
messages : browser_message list;
45
+
(** All validation messages with element references. *)
46
+
47
+
core_result : Htmlrw_check.t;
48
+
(** The underlying validation result from the core library.
49
+
Use for access to {!Htmlrw_check.errors}, {!Htmlrw_check.has_errors}, etc. *)
50
+
51
+
source_element : Brr.El.t option;
52
+
(** The root element that was validated, if validation started from an element. *)
53
+
}
54
+
55
+
56
+
(** {1 Annotation Configuration} *)
57
+
58
+
(** Configuration for how warnings are displayed on annotated elements. *)
59
+
type annotation_config = {
60
+
add_data_attrs : bool;
61
+
(** Add [data-html5rw-*] attributes to elements:
62
+
- [data-html5rw-severity]: ["error"], ["warning"], or ["info"]
63
+
- [data-html5rw-message]: The warning message text
64
+
- [data-html5rw-code]: The error code *)
65
+
66
+
add_classes : bool;
67
+
(** Add CSS classes: [html5rw-error], [html5rw-warning], [html5rw-info],
68
+
and [html5rw-has-issues] on any element with warnings. *)
69
+
70
+
show_tooltips : bool;
71
+
(** Create tooltip overlays that appear on hover. *)
72
+
73
+
tooltip_position : [ `Above | `Below | `Auto ];
74
+
(** Tooltip position. [`Auto] chooses based on viewport. *)
75
+
76
+
highlight_on_hover : bool;
77
+
(** Highlight elements when hovering over warnings in the panel. *)
78
+
}
79
+
80
+
(** Default: all annotation features enabled, tooltips auto-positioned. *)
81
+
val default_annotation_config : annotation_config
82
+
83
+
84
+
(** {1 Panel Configuration} *)
85
+
86
+
(** Configuration for the floating warning panel. *)
87
+
type panel_config = {
88
+
initial_position : [ `TopRight | `TopLeft | `BottomRight | `BottomLeft | `Custom of int * int ];
89
+
(** Where the panel appears initially. *)
90
+
91
+
draggable : bool;
92
+
resizable : bool;
93
+
collapsible : bool;
94
+
start_collapsed : bool;
95
+
96
+
max_height : int option;
97
+
(** Maximum height in pixels before scrolling. *)
98
+
99
+
group_by_severity : bool;
100
+
(** Group warnings: errors first, then warnings, then info. *)
101
+
102
+
click_to_highlight : bool;
103
+
(** Clicking a warning scrolls to and highlights the element. *)
104
+
105
+
show_selector_path : bool;
106
+
(** Show the CSS selector path in each warning row. *)
107
+
108
+
theme : [ `Light | `Dark | `Auto ];
109
+
(** Color scheme. [`Auto] follows [prefers-color-scheme]. *)
110
+
}
111
+
112
+
(** Default panel configuration. *)
113
+
val default_panel_config : panel_config
114
+
115
+
116
+
(** {1 Conversions} *)
117
+
118
+
(** Build a CSS selector path for an element. *)
119
+
val selector_of_element : Brr.El.t -> string
120
+
121
+
(** Convert a browser message to a JavaScript object. *)
122
+
val browser_message_to_jv : browser_message -> Jv.t
123
+
124
+
(** Convert a result to a JavaScript object. *)
125
+
val result_to_jv : result -> Jv.t
+459
lib/js/htmlrw_js_ui.ml
+459
lib/js/htmlrw_js_ui.ml
···
1
+
(*---------------------------------------------------------------------------
2
+
Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
3
+
SPDX-License-Identifier: MIT
4
+
---------------------------------------------------------------------------*)
5
+
6
+
open Brr
7
+
open Htmlrw_js_types
8
+
9
+
let console_log msg =
10
+
ignore (Jv.call (Jv.get Jv.global "console") "log" [| Jv.of_string ("[html5rw-ui] " ^ msg) |])
11
+
12
+
module Css_class = struct
13
+
let panel = Jstr.v "html5rw-panel"
14
+
let panel_header = Jstr.v "html5rw-panel-header"
15
+
let panel_content = Jstr.v "html5rw-panel-content"
16
+
let panel_collapsed = Jstr.v "html5rw-panel-collapsed"
17
+
let panel_dragging = Jstr.v "html5rw-panel-dragging"
18
+
let warning_list = Jstr.v "html5rw-warning-list"
19
+
let warning_row = Jstr.v "html5rw-warning-row"
20
+
let warning_row_error = Jstr.v "html5rw-warning-row-error"
21
+
let warning_row_warning = Jstr.v "html5rw-warning-row-warning"
22
+
let warning_row_info = Jstr.v "html5rw-warning-row-info"
23
+
let severity_badge = Jstr.v "html5rw-severity-badge"
24
+
let message_text = Jstr.v "html5rw-message-text"
25
+
let selector_path = Jstr.v "html5rw-selector-path"
26
+
let collapse_btn = Jstr.v "html5rw-collapse-btn"
27
+
let close_btn = Jstr.v "html5rw-close-btn"
28
+
let summary_badge = Jstr.v "html5rw-summary-badge"
29
+
let error_count = Jstr.v "html5rw-error-count"
30
+
let warning_count = Jstr.v "html5rw-warning-count"
31
+
let theme_light = Jstr.v "html5rw-theme-light"
32
+
let theme_dark = Jstr.v "html5rw-theme-dark"
33
+
end
34
+
35
+
type t = {
36
+
root : El.t;
37
+
header : El.t;
38
+
content : El.t;
39
+
badge : El.t;
40
+
config : panel_config;
41
+
mutable result : result;
42
+
mutable collapsed : bool;
43
+
mutable highlighted : El.t option;
44
+
mutable on_warning_click : (browser_message -> unit) option;
45
+
mutable on_collapse_toggle : (bool -> unit) option;
46
+
mutable on_close : (unit -> unit) option;
47
+
mutable on_move : (int * int -> unit) option;
48
+
}
49
+
50
+
let _current_panel : t option ref = ref None
51
+
52
+
let current () = !_current_panel
53
+
let root_element t = t.root
54
+
let header_element t = t.header
55
+
let content_element t = t.content
56
+
let badge_element t = t.badge
57
+
58
+
let is_visible t =
59
+
let display = El.computed_style (Jstr.v "display") t.root in
60
+
not (Jstr.equal display (Jstr.v "none"))
61
+
62
+
let is_collapsed t = t.collapsed
63
+
64
+
let position t =
65
+
let x = int_of_float (El.bound_x t.root) in
66
+
let y = int_of_float (El.bound_y t.root) in
67
+
(x, y)
68
+
69
+
let set_position t x y =
70
+
El.set_inline_style (Jstr.v "left") (Jstr.v (Printf.sprintf "%dpx" x)) t.root;
71
+
El.set_inline_style (Jstr.v "top") (Jstr.v (Printf.sprintf "%dpx" y)) t.root;
72
+
El.set_inline_style (Jstr.v "right") (Jstr.v "auto") t.root
73
+
74
+
let highlighted_element t = t.highlighted
75
+
76
+
let clear_highlight t =
77
+
console_log (Printf.sprintf "clear_highlight: highlighted is %s"
78
+
(if t.highlighted = None then "None" else "Some"));
79
+
match t.highlighted with
80
+
| Some el ->
81
+
console_log "clear_highlight: unhighlighting element";
82
+
Htmlrw_js_annotate.unhighlight_element el;
83
+
t.highlighted <- None;
84
+
console_log "clear_highlight: done"
85
+
| None ->
86
+
console_log "clear_highlight: nothing to clear"
87
+
88
+
let navigate_to_element t bm =
89
+
clear_highlight t;
90
+
match bm.element_ref with
91
+
| Some { element = Some el; _ } ->
92
+
Htmlrw_js_annotate.highlight_element el;
93
+
t.highlighted <- Some el
94
+
| _ -> ()
95
+
96
+
let severity_row_class = function
97
+
| Htmlrw_check.Error -> Css_class.warning_row_error
98
+
| Htmlrw_check.Warning -> Css_class.warning_row_warning
99
+
| Htmlrw_check.Info -> Css_class.warning_row_info
100
+
101
+
let create_warning_row ~config t bm =
102
+
let msg = bm.message in
103
+
let sev = Htmlrw_check.severity_to_string msg.Htmlrw_check.severity in
104
+
105
+
let badge = El.v (Jstr.v "span") ~at:[At.class' Css_class.severity_badge] [
106
+
El.txt' (String.uppercase_ascii sev)
107
+
] in
108
+
109
+
let text = El.v (Jstr.v "span") ~at:[At.class' Css_class.message_text] [
110
+
El.txt' msg.Htmlrw_check.text
111
+
] in
112
+
113
+
let children = [badge; text] in
114
+
let children =
115
+
if config.show_selector_path then
116
+
match bm.element_ref with
117
+
| Some ref ->
118
+
let path = El.v (Jstr.v "span") ~at:[At.class' Css_class.selector_path] [
119
+
El.txt' ref.selector
120
+
] in
121
+
children @ [path]
122
+
| None -> children
123
+
else
124
+
children
125
+
in
126
+
127
+
let row = El.v (Jstr.v "div") ~at:[
128
+
At.class' Css_class.warning_row;
129
+
At.class' (severity_row_class msg.Htmlrw_check.severity);
130
+
] children in
131
+
132
+
if config.click_to_highlight then begin
133
+
ignore (Ev.listen Ev.click (fun _ ->
134
+
navigate_to_element t bm;
135
+
match t.on_warning_click with
136
+
| Some f -> f bm
137
+
| None -> ()
138
+
) (El.as_target row))
139
+
end;
140
+
141
+
row
142
+
143
+
let build_content ~config t =
144
+
let messages =
145
+
if config.group_by_severity then
146
+
let errors, warnings, infos = List.fold_left (fun (e, w, i) bm ->
147
+
match bm.message.Htmlrw_check.severity with
148
+
| Htmlrw_check.Error -> (bm :: e, w, i)
149
+
| Htmlrw_check.Warning -> (e, bm :: w, i)
150
+
| Htmlrw_check.Info -> (e, w, bm :: i)
151
+
) ([], [], []) t.result.messages in
152
+
List.rev errors @ List.rev warnings @ List.rev infos
153
+
else
154
+
t.result.messages
155
+
in
156
+
157
+
let rows = List.map (create_warning_row ~config t) messages in
158
+
let list = El.v (Jstr.v "div") ~at:[At.class' Css_class.warning_list] rows in
159
+
160
+
(match config.max_height with
161
+
| Some h ->
162
+
El.set_inline_style (Jstr.v "maxHeight") (Jstr.v (Printf.sprintf "%dpx" h)) list;
163
+
El.set_inline_style (Jstr.v "overflowY") (Jstr.v "auto") list
164
+
| None -> ());
165
+
list
166
+
167
+
let update t result =
168
+
t.result <- result;
169
+
let list = build_content ~config:t.config t in
170
+
El.set_children t.content [list];
171
+
let error_count = List.length (List.filter (fun bm ->
172
+
bm.message.Htmlrw_check.severity = Htmlrw_check.Error
173
+
) result.messages) in
174
+
let warning_count = List.length (List.filter (fun bm ->
175
+
bm.message.Htmlrw_check.severity = Htmlrw_check.Warning
176
+
) result.messages) in
177
+
El.set_children t.badge [
178
+
El.txt' (Printf.sprintf "%d errors, %d warnings" error_count warning_count)
179
+
]
180
+
181
+
let collapse t =
182
+
t.collapsed <- true;
183
+
El.set_class Css_class.panel_collapsed true t.root;
184
+
match t.on_collapse_toggle with Some f -> f true | None -> ()
185
+
186
+
let expand t =
187
+
t.collapsed <- false;
188
+
El.set_class Css_class.panel_collapsed false t.root;
189
+
match t.on_collapse_toggle with Some f -> f false | None -> ()
190
+
191
+
let toggle_collapsed t =
192
+
if t.collapsed then expand t else collapse t
193
+
194
+
let show t =
195
+
El.set_inline_style (Jstr.v "display") (Jstr.v "block") t.root
196
+
197
+
let hide t =
198
+
El.set_inline_style (Jstr.v "display") (Jstr.v "none") t.root
199
+
200
+
let destroy t =
201
+
console_log "destroy: starting";
202
+
clear_highlight t;
203
+
console_log "destroy: cleared highlight";
204
+
(* Clear _current_panel before removing element to avoid comparison issues *)
205
+
(match !_current_panel with
206
+
| Some p when p.root == t.root -> _current_panel := None
207
+
| _ -> ());
208
+
console_log "destroy: cleared current_panel ref";
209
+
El.remove t.root;
210
+
console_log "destroy: removed root element, done"
211
+
212
+
let hide_current () =
213
+
console_log (Printf.sprintf "hide_current: current_panel is %s"
214
+
(if !_current_panel = None then "None" else "Some"));
215
+
match !_current_panel with
216
+
| Some t ->
217
+
console_log "hide_current: destroying existing panel";
218
+
destroy t
219
+
| None ->
220
+
console_log "hide_current: no panel to destroy"
221
+
222
+
let create ~config result =
223
+
console_log (Printf.sprintf "create: starting with %d messages" (List.length result.messages));
224
+
hide_current ();
225
+
console_log "create: hide_current done";
226
+
227
+
let _doc = G.document in
228
+
229
+
let title = El.v (Jstr.v "span") [El.txt' "HTML5 Validation"] in
230
+
231
+
let close_btn = El.v (Jstr.v "button") ~at:[At.class' Css_class.close_btn] [
232
+
El.txt' "x"
233
+
] in
234
+
235
+
let header = El.v (Jstr.v "div") ~at:[At.class' Css_class.panel_header] [
236
+
title; close_btn
237
+
] in
238
+
239
+
let error_count = List.length (List.filter (fun bm ->
240
+
bm.message.Htmlrw_check.severity = Htmlrw_check.Error
241
+
) result.messages) in
242
+
let warning_count = List.length (List.filter (fun bm ->
243
+
bm.message.Htmlrw_check.severity = Htmlrw_check.Warning
244
+
) result.messages) in
245
+
246
+
let badge = El.v (Jstr.v "div") ~at:[At.class' Css_class.summary_badge] [
247
+
El.txt' (Printf.sprintf "%d errors, %d warnings" error_count warning_count)
248
+
] in
249
+
250
+
let content = El.v (Jstr.v "div") ~at:[At.class' Css_class.panel_content] [] in
251
+
252
+
let theme_class = match config.theme with
253
+
| `Light -> Css_class.theme_light
254
+
| `Dark -> Css_class.theme_dark
255
+
| `Auto -> Css_class.theme_light
256
+
in
257
+
258
+
let root = El.v (Jstr.v "div") ~at:[
259
+
At.class' Css_class.panel;
260
+
At.class' theme_class;
261
+
] [header; badge; content] in
262
+
263
+
(match config.initial_position with
264
+
| `TopRight ->
265
+
El.set_inline_style (Jstr.v "top") (Jstr.v "20px") root;
266
+
El.set_inline_style (Jstr.v "right") (Jstr.v "20px") root
267
+
| `TopLeft ->
268
+
El.set_inline_style (Jstr.v "top") (Jstr.v "20px") root;
269
+
El.set_inline_style (Jstr.v "left") (Jstr.v "20px") root
270
+
| `BottomRight ->
271
+
El.set_inline_style (Jstr.v "bottom") (Jstr.v "20px") root;
272
+
El.set_inline_style (Jstr.v "right") (Jstr.v "20px") root
273
+
| `BottomLeft ->
274
+
El.set_inline_style (Jstr.v "bottom") (Jstr.v "20px") root;
275
+
El.set_inline_style (Jstr.v "left") (Jstr.v "20px") root
276
+
| `Custom (x, y) ->
277
+
El.set_inline_style (Jstr.v "left") (Jstr.v (Printf.sprintf "%dpx" x)) root;
278
+
El.set_inline_style (Jstr.v "top") (Jstr.v (Printf.sprintf "%dpx" y)) root);
279
+
280
+
let t = {
281
+
root; header; content; badge; config; result;
282
+
collapsed = config.start_collapsed;
283
+
highlighted = None;
284
+
on_warning_click = None;
285
+
on_collapse_toggle = None;
286
+
on_close = None;
287
+
on_move = None;
288
+
} in
289
+
290
+
update t result;
291
+
292
+
(* Stop mousedown from bubbling to header (prevents drag interference) *)
293
+
ignore (Ev.listen Ev.mousedown (fun ev ->
294
+
console_log "close_btn: mousedown, stopping propagation";
295
+
Ev.stop_propagation ev
296
+
) (El.as_target close_btn));
297
+
298
+
ignore (Ev.listen Ev.click (fun ev ->
299
+
console_log "close_btn: click handler starting";
300
+
Ev.stop_propagation ev;
301
+
console_log "close_btn: stopped propagation, calling destroy";
302
+
destroy t;
303
+
console_log "close_btn: destroy done, checking on_close callback";
304
+
(match t.on_close with Some f -> f () | None -> ());
305
+
console_log "close_btn: click handler done"
306
+
) (El.as_target close_btn));
307
+
308
+
if config.draggable then begin
309
+
let dragging = ref false in
310
+
let offset_x = ref 0.0 in
311
+
let offset_y = ref 0.0 in
312
+
313
+
ignore (Ev.listen Ev.mousedown (fun ev ->
314
+
let m = Ev.as_type ev in
315
+
dragging := true;
316
+
offset_x := Ev.Mouse.client_x m -. El.bound_x root;
317
+
offset_y := Ev.Mouse.client_y m -. El.bound_y root;
318
+
El.set_class Css_class.panel_dragging true root
319
+
) (El.as_target header));
320
+
321
+
ignore (Ev.listen Ev.mousemove (fun ev ->
322
+
if !dragging then begin
323
+
let m = Ev.as_type ev in
324
+
let x = int_of_float (Ev.Mouse.client_x m -. !offset_x) in
325
+
let y = int_of_float (Ev.Mouse.client_y m -. !offset_y) in
326
+
set_position t x y;
327
+
match t.on_move with Some f -> f (x, y) | None -> ()
328
+
end
329
+
) (Window.as_target G.window));
330
+
331
+
ignore (Ev.listen Ev.mouseup (fun _ ->
332
+
dragging := false;
333
+
El.set_class Css_class.panel_dragging false root
334
+
) (Window.as_target G.window))
335
+
end;
336
+
337
+
if config.start_collapsed then
338
+
El.set_class Css_class.panel_collapsed true root;
339
+
340
+
console_log "create: appending panel to document body";
341
+
El.append_children (Document.body G.document) [root];
342
+
343
+
_current_panel := Some t;
344
+
console_log "create: panel creation complete";
345
+
t
346
+
347
+
let on_warning_click t f = t.on_warning_click <- Some f
348
+
let on_collapse_toggle t f = t.on_collapse_toggle <- Some f
349
+
let on_close t f = t.on_close <- Some f
350
+
let on_move t f = t.on_move <- Some f
351
+
352
+
let inject_default_styles ~theme =
353
+
let theme_vars = match theme with
354
+
| `Light -> {|
355
+
--html5rw-panel-bg: #ffffff;
356
+
--html5rw-panel-text: #333333;
357
+
--html5rw-panel-border: #dddddd;
358
+
--html5rw-panel-header-bg: #f5f5f5;
359
+
|}
360
+
| `Dark -> {|
361
+
--html5rw-panel-bg: #2d3436;
362
+
--html5rw-panel-text: #dfe6e9;
363
+
--html5rw-panel-border: #636e72;
364
+
--html5rw-panel-header-bg: #1e272e;
365
+
|}
366
+
| `Auto -> {|
367
+
--html5rw-panel-bg: #ffffff;
368
+
--html5rw-panel-text: #333333;
369
+
--html5rw-panel-border: #dddddd;
370
+
--html5rw-panel-header-bg: #f5f5f5;
371
+
|}
372
+
in
373
+
374
+
let css = Printf.sprintf {|
375
+
:root { %s }
376
+
377
+
@media (prefers-color-scheme: dark) {
378
+
:root {
379
+
--html5rw-panel-bg: #2d3436;
380
+
--html5rw-panel-text: #dfe6e9;
381
+
--html5rw-panel-border: #636e72;
382
+
--html5rw-panel-header-bg: #1e272e;
383
+
}
384
+
}
385
+
386
+
.html5rw-panel {
387
+
position: fixed;
388
+
z-index: 99999;
389
+
width: 400px;
390
+
background: var(--html5rw-panel-bg);
391
+
border: 1px solid var(--html5rw-panel-border);
392
+
border-radius: 8px;
393
+
box-shadow: 0 4px 20px rgba(0, 0, 0, 0.15);
394
+
font-family: system-ui, -apple-system, sans-serif;
395
+
font-size: 13px;
396
+
color: var(--html5rw-panel-text);
397
+
}
398
+
399
+
.html5rw-panel-header {
400
+
display: flex;
401
+
align-items: center;
402
+
padding: 12px 16px;
403
+
background: var(--html5rw-panel-header-bg);
404
+
border-bottom: 1px solid var(--html5rw-panel-border);
405
+
border-radius: 8px 8px 0 0;
406
+
cursor: move;
407
+
user-select: none;
408
+
}
409
+
410
+
.html5rw-panel-header span { flex: 1; font-weight: 600; }
411
+
412
+
.html5rw-panel-header button {
413
+
width: 24px; height: 24px; margin-left: 8px;
414
+
border: none; border-radius: 4px;
415
+
background: transparent; color: var(--html5rw-panel-text);
416
+
cursor: pointer; font-size: 14px;
417
+
display: flex; align-items: center; justify-content: center;
418
+
}
419
+
420
+
.html5rw-panel-header button:hover { background: rgba(0, 0, 0, 0.1); }
421
+
.html5rw-panel-content { padding: 0; }
422
+
.html5rw-panel-collapsed .html5rw-panel-content { display: none; }
423
+
.html5rw-panel-collapsed .html5rw-summary-badge { display: block; }
424
+
.html5rw-summary-badge { display: none; padding: 12px 16px; text-align: center; font-weight: 500; }
425
+
.html5rw-warning-list { max-height: 400px; overflow-y: auto; }
426
+
427
+
.html5rw-warning-row {
428
+
display: flex; flex-direction: column;
429
+
padding: 10px 16px;
430
+
border-bottom: 1px solid var(--html5rw-panel-border);
431
+
cursor: pointer; transition: background 0.15s;
432
+
}
433
+
434
+
.html5rw-warning-row:hover { background: rgba(0, 0, 0, 0.05); }
435
+
.html5rw-warning-row:last-child { border-bottom: none; }
436
+
437
+
.html5rw-severity-badge {
438
+
display: inline-block; padding: 2px 6px; border-radius: 3px;
439
+
font-size: 10px; font-weight: 600; text-transform: uppercase; margin-right: 8px;
440
+
}
441
+
442
+
.html5rw-warning-row-error .html5rw-severity-badge { background: #e74c3c; color: white; }
443
+
.html5rw-warning-row-warning .html5rw-severity-badge { background: #f39c12; color: white; }
444
+
.html5rw-warning-row-info .html5rw-severity-badge { background: #3498db; color: white; }
445
+
.html5rw-message-text { flex: 1; line-height: 1.4; }
446
+
447
+
.html5rw-selector-path {
448
+
display: block; margin-top: 4px; font-size: 11px; color: #888;
449
+
font-family: monospace; overflow: hidden; text-overflow: ellipsis; white-space: nowrap;
450
+
}
451
+
452
+
.html5rw-panel-dragging { opacity: 0.9; }
453
+
|} theme_vars in
454
+
455
+
let doc = G.document in
456
+
let style_el = El.v (Jstr.v "style") [El.txt' css] in
457
+
El.set_at (Jstr.v "data-html5rw-panel-styles") (Some (Jstr.v "true")) style_el;
458
+
El.append_children (Document.head doc) [style_el];
459
+
style_el
+169
lib/js/htmlrw_js_ui.mli
+169
lib/js/htmlrw_js_ui.mli
···
1
+
(*---------------------------------------------------------------------------
2
+
Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
3
+
SPDX-License-Identifier: MIT
4
+
---------------------------------------------------------------------------*)
5
+
6
+
(** Floating warning panel UI.
7
+
8
+
This module creates and manages a draggable, floating panel that displays
9
+
validation warnings. The panel supports:
10
+
- Grouping by severity (errors first)
11
+
- Click-to-navigate to problematic elements
12
+
- Collapse/expand functionality
13
+
- Light/dark themes *)
14
+
15
+
open Htmlrw_js_types
16
+
17
+
18
+
(** {1 Panel Management} *)
19
+
20
+
(** The warning panel. *)
21
+
type t
22
+
23
+
(** Create and display a warning panel.
24
+
25
+
The panel is appended to [document.body] and positioned according
26
+
to the configuration.
27
+
28
+
@param config Panel configuration.
29
+
@param result Validation result to display. *)
30
+
val create : config:panel_config -> result -> t
31
+
32
+
(** Update the panel with new validation results.
33
+
34
+
Use this to re-validate and refresh the panel without destroying it. *)
35
+
val update : t -> result -> unit
36
+
37
+
(** Show the panel if hidden. *)
38
+
val show : t -> unit
39
+
40
+
(** Hide the panel (but keep it in the DOM). *)
41
+
val hide : t -> unit
42
+
43
+
(** Remove the panel from the DOM entirely. *)
44
+
val destroy : t -> unit
45
+
46
+
(** Check if the panel is currently visible. *)
47
+
val is_visible : t -> bool
48
+
49
+
(** Check if the panel is currently collapsed. *)
50
+
val is_collapsed : t -> bool
51
+
52
+
53
+
(** {1 Panel State} *)
54
+
55
+
(** Collapse the panel to just show the summary badge. *)
56
+
val collapse : t -> unit
57
+
58
+
(** Expand the panel to show the full warning list. *)
59
+
val expand : t -> unit
60
+
61
+
(** Toggle collapsed state. *)
62
+
val toggle_collapsed : t -> unit
63
+
64
+
(** Get the current position of the panel. *)
65
+
val position : t -> int * int
66
+
67
+
(** Move the panel to a new position. *)
68
+
val set_position : t -> int -> int -> unit
69
+
70
+
71
+
(** {1 Interaction} *)
72
+
73
+
(** Scroll to and highlight an element from a warning row.
74
+
75
+
This is called internally when clicking a warning, but can be
76
+
invoked programmatically. *)
77
+
val navigate_to_element : t -> browser_message -> unit
78
+
79
+
(** Get the currently highlighted element, if any. *)
80
+
val highlighted_element : t -> Brr.El.t option
81
+
82
+
(** Clear the current highlight. *)
83
+
val clear_highlight : t -> unit
84
+
85
+
86
+
(** {1 Event Callbacks}
87
+
88
+
Register callbacks for panel events. *)
89
+
90
+
(** Called when a warning row is clicked. *)
91
+
val on_warning_click : t -> (browser_message -> unit) -> unit
92
+
93
+
(** Called when the panel is collapsed or expanded. *)
94
+
val on_collapse_toggle : t -> (bool -> unit) -> unit
95
+
96
+
(** Called when the panel is closed. *)
97
+
val on_close : t -> (unit -> unit) -> unit
98
+
99
+
(** Called when the panel is dragged to a new position. *)
100
+
val on_move : t -> (int * int -> unit) -> unit
101
+
102
+
103
+
(** {1 Global Panel State}
104
+
105
+
For convenience, there's a single "current" panel that the
106
+
JavaScript API manages. *)
107
+
108
+
(** Get the current panel, if one exists. *)
109
+
val current : unit -> t option
110
+
111
+
(** Hide and destroy the current panel. *)
112
+
val hide_current : unit -> unit
113
+
114
+
115
+
(** {1 Panel Elements}
116
+
117
+
Access to the panel's DOM structure for custom styling. *)
118
+
119
+
(** The root panel element. *)
120
+
val root_element : t -> Brr.El.t
121
+
122
+
(** The header element (contains title and controls). *)
123
+
val header_element : t -> Brr.El.t
124
+
125
+
(** The content element (contains warning list). *)
126
+
val content_element : t -> Brr.El.t
127
+
128
+
(** The summary badge element (shown when collapsed). *)
129
+
val badge_element : t -> Brr.El.t
130
+
131
+
132
+
(** {1 CSS Classes}
133
+
134
+
Classes used by the panel for custom styling. *)
135
+
136
+
module Css_class : sig
137
+
val panel : Jstr.t
138
+
val panel_header : Jstr.t
139
+
val panel_content : Jstr.t
140
+
val panel_collapsed : Jstr.t
141
+
val panel_dragging : Jstr.t
142
+
val warning_list : Jstr.t
143
+
val warning_row : Jstr.t
144
+
val warning_row_error : Jstr.t
145
+
val warning_row_warning : Jstr.t
146
+
val warning_row_info : Jstr.t
147
+
val severity_badge : Jstr.t
148
+
val message_text : Jstr.t
149
+
val selector_path : Jstr.t
150
+
val collapse_btn : Jstr.t
151
+
val close_btn : Jstr.t
152
+
val summary_badge : Jstr.t
153
+
val error_count : Jstr.t
154
+
val warning_count : Jstr.t
155
+
val theme_light : Jstr.t
156
+
val theme_dark : Jstr.t
157
+
end
158
+
159
+
160
+
(** {1 CSS Injection} *)
161
+
162
+
(** Inject default CSS styles for the panel.
163
+
164
+
Styles include layout, colors, shadows, and animations.
165
+
The styles are scoped to the panel's CSS classes.
166
+
167
+
@param theme Color scheme to use.
168
+
@return The injected style element. *)
169
+
val inject_default_styles : theme:[ `Light | `Dark | `Auto ] -> Brr.El.t
+151
lib/js/htmlrw_js_worker.ml
+151
lib/js/htmlrw_js_worker.ml
···
1
+
(*---------------------------------------------------------------------------
2
+
Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
3
+
SPDX-License-Identifier: MIT
4
+
---------------------------------------------------------------------------*)
5
+
6
+
(* Web Worker entry point for background HTML validation.
7
+
8
+
This runs in a separate thread and communicates via postMessage.
9
+
It only does string-based validation since workers can't access the DOM.
10
+
*)
11
+
12
+
[@@@warning "-33"] (* Suppress unused open - we only need Jv from Brr *)
13
+
open Brr
14
+
15
+
let console_log msg =
16
+
ignore (Jv.call (Jv.get Jv.global "console") "log" [| Jv.of_string msg |])
17
+
18
+
let console_error msg =
19
+
ignore (Jv.call (Jv.get Jv.global "console") "error" [| Jv.of_string msg |])
20
+
21
+
let ensure_doctype html =
22
+
let lower = String.lowercase_ascii html in
23
+
if String.length lower >= 9 && String.sub lower 0 9 = "<!doctype" then
24
+
html
25
+
else
26
+
"<!DOCTYPE html>" ^ html
27
+
28
+
(* Debug: dump tree structure to see what parser built *)
29
+
let dump_tree_structure html =
30
+
let doc = Html5rw.parse (Bytesrw.Bytes.Reader.of_string html) in
31
+
let root = Html5rw.root doc in
32
+
let buf = Buffer.create 1024 in
33
+
let rec dump indent node =
34
+
let prefix = String.make (indent * 2) ' ' in
35
+
let name = node.Html5rw.Dom.name in
36
+
if name = "#text" then begin
37
+
let text = String.trim node.Html5rw.Dom.data in
38
+
if String.length text > 0 then
39
+
Buffer.add_string buf (Printf.sprintf "%s#text: \"%s\"\n" prefix
40
+
(if String.length text > 30 then String.sub text 0 30 ^ "..." else text))
41
+
end else if name = "#comment" then
42
+
()
43
+
else begin
44
+
Buffer.add_string buf (Printf.sprintf "%s<%s>\n" prefix name);
45
+
if indent < 5 then (* only show first 5 levels *)
46
+
List.iter (dump (indent + 1)) node.Html5rw.Dom.children
47
+
end
48
+
in
49
+
dump 0 root;
50
+
Buffer.contents buf
51
+
52
+
let handle_message msg_data =
53
+
console_log "[html5rw worker] Message received";
54
+
let response = Jv.obj [||] in
55
+
try
56
+
let id = Jv.get msg_data "id" |> Jv.to_int in
57
+
let raw_html = Jv.get msg_data "html" |> Jv.to_string in
58
+
let html = ensure_doctype raw_html in
59
+
console_log (Printf.sprintf "[html5rw worker] Validating %d bytes (id=%d)" (String.length html) id);
60
+
(* Log first 500 chars of HTML for debugging *)
61
+
let preview = if String.length html > 500 then String.sub html 0 500 ^ "..." else html in
62
+
console_log (Printf.sprintf "[html5rw worker] HTML preview:\n%s" preview);
63
+
64
+
Jv.set response "id" (Jv.of_int id);
65
+
66
+
(try
67
+
(* Run validation *)
68
+
let core_result = Htmlrw_check.check_string html in
69
+
let messages = Htmlrw_check.messages core_result in
70
+
71
+
(* Convert messages to JS-friendly format *)
72
+
let warnings = Jv.of_list (fun msg ->
73
+
let obj = Jv.obj [||] in
74
+
Jv.set obj "severity" (Jv.of_string (Htmlrw_check.severity_to_string msg.Htmlrw_check.severity));
75
+
Jv.set obj "message" (Jv.of_string msg.Htmlrw_check.text);
76
+
Jv.set obj "errorCode" (Jv.of_string (Htmlrw_check.error_code_to_string msg.Htmlrw_check.error_code));
77
+
(match msg.Htmlrw_check.element with
78
+
| Some el -> Jv.set obj "elementName" (Jv.of_string el)
79
+
| None -> ());
80
+
(match msg.Htmlrw_check.attribute with
81
+
| Some attr -> Jv.set obj "attribute" (Jv.of_string attr)
82
+
| None -> ());
83
+
(match msg.Htmlrw_check.location with
84
+
| Some loc ->
85
+
Jv.set obj "line" (Jv.of_int loc.line);
86
+
Jv.set obj "column" (Jv.of_int loc.column)
87
+
| None -> ());
88
+
obj
89
+
) messages in
90
+
91
+
let error_count = List.length (List.filter (fun m ->
92
+
m.Htmlrw_check.severity = Htmlrw_check.Error) messages) in
93
+
let warning_count = List.length (List.filter (fun m ->
94
+
m.Htmlrw_check.severity = Htmlrw_check.Warning) messages) in
95
+
let info_count = List.length (List.filter (fun m ->
96
+
m.Htmlrw_check.severity = Htmlrw_check.Info) messages) in
97
+
98
+
Jv.set response "warnings" warnings;
99
+
Jv.set response "errorCount" (Jv.of_int error_count);
100
+
Jv.set response "warningCount" (Jv.of_int warning_count);
101
+
Jv.set response "infoCount" (Jv.of_int info_count);
102
+
Jv.set response "hasErrors" (Jv.of_bool (error_count > 0));
103
+
(* Add tree structure for debugging *)
104
+
let tree_dump = dump_tree_structure html in
105
+
Jv.set response "treeStructure" (Jv.of_string tree_dump);
106
+
Jv.set response "htmlPreview" (Jv.of_string preview);
107
+
console_log (Printf.sprintf "[html5rw worker] Tree structure:\n%s" tree_dump)
108
+
with exn ->
109
+
(* Return error on parse failure *)
110
+
let error_obj = Jv.obj [||] in
111
+
Jv.set error_obj "severity" (Jv.of_string "error");
112
+
Jv.set error_obj "message" (Jv.of_string (Printf.sprintf "Parse error: %s" (Printexc.to_string exn)));
113
+
Jv.set error_obj "errorCode" (Jv.of_string "parse-error");
114
+
Jv.set response "warnings" (Jv.of_list Fun.id [error_obj]);
115
+
Jv.set response "errorCount" (Jv.of_int 1);
116
+
Jv.set response "warningCount" (Jv.of_int 0);
117
+
Jv.set response "infoCount" (Jv.of_int 0);
118
+
Jv.set response "hasErrors" (Jv.of_bool true);
119
+
Jv.set response "parseError" (Jv.of_string (Printexc.to_string exn)));
120
+
121
+
console_log "[html5rw worker] Validation complete, posting response";
122
+
(* Post result back to main thread *)
123
+
let self = Jv.get Jv.global "self" in
124
+
ignore (Jv.call self "postMessage" [| response |])
125
+
with exn ->
126
+
(* Outer error handler - catches message parsing errors *)
127
+
console_error (Printf.sprintf "[html5rw worker] Fatal error: %s" (Printexc.to_string exn));
128
+
let error_obj = Jv.obj [||] in
129
+
Jv.set error_obj "severity" (Jv.of_string "error");
130
+
Jv.set error_obj "message" (Jv.of_string (Printf.sprintf "Worker error: %s" (Printexc.to_string exn)));
131
+
Jv.set error_obj "errorCode" (Jv.of_string "worker-error");
132
+
Jv.set response "id" (Jv.of_int (-1));
133
+
Jv.set response "warnings" (Jv.of_list Fun.id [error_obj]);
134
+
Jv.set response "errorCount" (Jv.of_int 1);
135
+
Jv.set response "warningCount" (Jv.of_int 0);
136
+
Jv.set response "infoCount" (Jv.of_int 0);
137
+
Jv.set response "hasErrors" (Jv.of_bool true);
138
+
Jv.set response "fatalError" (Jv.of_string (Printexc.to_string exn));
139
+
let self = Jv.get Jv.global "self" in
140
+
ignore (Jv.call self "postMessage" [| response |])
141
+
142
+
let () =
143
+
console_log "[html5rw worker] Worker script starting...";
144
+
(* Set up message handler *)
145
+
let self = Jv.get Jv.global "self" in
146
+
let handler = Jv.callback ~arity:1 (fun ev ->
147
+
let data = Jv.get ev "data" in
148
+
handle_message data
149
+
) in
150
+
ignore (Jv.call self "addEventListener" [| Jv.of_string "message"; handler |]);
151
+
console_log "[html5rw worker] Message handler registered, ready for messages"
+16
-1
test/dune
+16
-1
test/dune
···
75
75
(deps
76
76
(source_tree ../validator/tests))
77
77
(action
78
-
(run %{exe:test_validator.exe} ../validator/tests)))
78
+
(run %{exe:test_validator.exe} --both ../validator/tests)))
79
79
80
80
(executable
81
81
(name test_roundtrip)
···
88
88
(source_tree ../validator/tests))
89
89
(action
90
90
(run %{exe:test_roundtrip.exe} ../validator/tests)))
91
+
92
+
(executable
93
+
(name test_comprehensive)
94
+
(modules test_comprehensive)
95
+
(libraries bytesrw html5rw html5rw.check jsont jsont.bytesrw test_report validator_messages expected_message unix))
96
+
97
+
(rule
98
+
(alias runtest)
99
+
(deps
100
+
(glob_files ../html5lib-tests/tree-construction/*.dat)
101
+
(glob_files ../html5lib-tests/tokenizer/*.test)
102
+
(glob_files ../html5lib-tests/encoding/*.dat)
103
+
(source_tree ../validator/tests))
104
+
(action
105
+
(run %{exe:test_comprehensive.exe} ../html5lib-tests ../validator/tests comprehensive_test_report.html)))
+529
test/test_comprehensive.ml
+529
test/test_comprehensive.ml
···
1
+
(* Comprehensive test runner for all html5rw tests
2
+
3
+
Generates a single standalone HTML report combining:
4
+
- HTML5lib tree-construction tests
5
+
- HTML5lib tokenizer tests
6
+
- HTML5lib encoding tests
7
+
- HTML5lib serializer tests
8
+
- Nu HTML Validator tests (both lenient and strict modes)
9
+
- Roundtrip tests
10
+
*)
11
+
12
+
module Report = Test_report
13
+
14
+
(* ============================================================ *)
15
+
(* Test Suite Summary Types *)
16
+
(* ============================================================ *)
17
+
18
+
type suite_summary = {
19
+
name : string;
20
+
description : string; [@warning "-69"]
21
+
passed : int;
22
+
failed : int;
23
+
files : Report.file_result list;
24
+
extra_info : (string * string) list;
25
+
}
26
+
27
+
(* ============================================================ *)
28
+
(* HTML5lib Tests Runner *)
29
+
(* ============================================================ *)
30
+
31
+
module Html5lib_runner = struct
32
+
(* Delegate to test_all.ml implementation by running the tests inline *)
33
+
34
+
open Bytesrw
35
+
36
+
(* Tree Construction Tests *)
37
+
module TreeConstruction = struct
38
+
module Parser = Html5rw.Parser
39
+
module Dom = Html5rw.Dom
40
+
41
+
type test_case = {
42
+
input : string;
43
+
expected_tree : string;
44
+
expected_errors : string list;
45
+
script_on : bool;
46
+
fragment_context : string option;
47
+
raw_lines : string;
48
+
}
49
+
50
+
let parse_test_case lines =
51
+
let raw_lines = String.concat "\n" lines in
52
+
let rec parse acc = function
53
+
| [] -> acc
54
+
| line :: rest when String.length line > 0 && line.[0] = '#' ->
55
+
let section = String.trim line in
56
+
let content, remaining = collect_section rest in
57
+
parse ((section, content) :: acc) remaining
58
+
| _ :: rest -> parse acc rest
59
+
and collect_section lines =
60
+
let rec loop acc = function
61
+
| [] -> (List.rev acc, [])
62
+
| line :: rest when String.length line > 0 && line.[0] = '#' ->
63
+
(List.rev acc, line :: rest)
64
+
| line :: rest -> loop (line :: acc) rest
65
+
in
66
+
loop [] lines
67
+
in
68
+
let sections = parse [] lines in
69
+
let get_section name =
70
+
match List.assoc_opt name sections with
71
+
| Some lines -> String.concat "\n" lines
72
+
| None -> ""
73
+
in
74
+
let data = get_section "#data" in
75
+
let document = get_section "#document" in
76
+
let errors_text = get_section "#errors" in
77
+
let errors =
78
+
String.split_on_char '\n' errors_text
79
+
|> List.filter (fun s -> String.trim s <> "")
80
+
in
81
+
let script_on = List.mem_assoc "#script-on" sections in
82
+
let fragment =
83
+
if List.mem_assoc "#document-fragment" sections then
84
+
Some (get_section "#document-fragment" |> String.trim)
85
+
else None
86
+
in
87
+
{ input = data; expected_tree = document; expected_errors = errors;
88
+
script_on; fragment_context = fragment; raw_lines }
89
+
90
+
let parse_dat_file content =
91
+
let lines = String.split_on_char '\n' content in
92
+
let rec split_tests current acc = function
93
+
| [] ->
94
+
if current = [] then List.rev acc
95
+
else List.rev (List.rev current :: acc)
96
+
| "" :: "#data" :: rest ->
97
+
let new_acc = if current = [] then acc else (List.rev current :: acc) in
98
+
split_tests ["#data"] new_acc rest
99
+
| line :: rest ->
100
+
split_tests (line :: current) acc rest
101
+
in
102
+
let test_groups = split_tests [] [] lines in
103
+
List.filter_map (fun lines ->
104
+
if List.exists (fun l -> l = "#data") lines then
105
+
Some (parse_test_case lines)
106
+
else None
107
+
) test_groups
108
+
109
+
let strip_tree_prefix s =
110
+
let lines = String.split_on_char '\n' s in
111
+
let stripped = List.filter_map (fun line ->
112
+
if String.length line >= 2 && String.sub line 0 2 = "| " then
113
+
Some (String.sub line 2 (String.length line - 2))
114
+
else if String.trim line = "" then None
115
+
else Some line
116
+
) lines in
117
+
String.concat "\n" stripped
118
+
119
+
let normalize_tree s =
120
+
let lines = String.split_on_char '\n' s in
121
+
let non_empty = List.filter (fun l -> String.trim l <> "") lines in
122
+
String.concat "\n" non_empty
123
+
124
+
let run_test test =
125
+
try
126
+
let result =
127
+
match test.fragment_context with
128
+
| Some ctx_str ->
129
+
let (namespace, tag_name) =
130
+
match String.split_on_char ' ' ctx_str with
131
+
| [ns; tag] when ns = "svg" -> (Some "svg", tag)
132
+
| [ns; tag] when ns = "math" -> (Some "mathml", tag)
133
+
| [tag] -> (None, tag)
134
+
| _ -> (None, ctx_str)
135
+
in
136
+
let context = Html5rw.Parser.make_fragment_context ~tag_name ~namespace () in
137
+
let reader = Bytes.Reader.of_string test.input in
138
+
Html5rw.Parser.parse ~collect_errors:true ~fragment_context:context reader
139
+
| None ->
140
+
let reader = Bytes.Reader.of_string test.input in
141
+
Html5rw.Parser.parse ~collect_errors:true reader
142
+
in
143
+
let actual_tree = Html5rw.Dom.to_test_format (Html5rw.Parser.root result) in
144
+
let expected = normalize_tree (strip_tree_prefix test.expected_tree) in
145
+
let actual = normalize_tree (strip_tree_prefix actual_tree) in
146
+
let error_count = List.length (Html5rw.Parser.errors result) in
147
+
let expected_error_count = List.length test.expected_errors in
148
+
(expected = actual, expected, actual, error_count, expected_error_count)
149
+
with e ->
150
+
let expected = normalize_tree (strip_tree_prefix test.expected_tree) in
151
+
(false, expected, Printf.sprintf "EXCEPTION: %s" (Printexc.to_string e), 0, 0)
152
+
153
+
let run_file path =
154
+
let ic = open_in path in
155
+
let content = really_input_string ic (in_channel_length ic) in
156
+
close_in ic;
157
+
let tests = parse_dat_file content in
158
+
let filename = Filename.basename path in
159
+
let passed = ref 0 in
160
+
let failed = ref 0 in
161
+
let results = ref [] in
162
+
List.iteri (fun i test ->
163
+
if test.script_on then ()
164
+
else begin
165
+
let (success, expected, actual, actual_error_count, expected_error_count) = run_test test in
166
+
let description =
167
+
let input_preview = String.sub test.input 0 (min 60 (String.length test.input)) in
168
+
if test.fragment_context <> None then
169
+
Printf.sprintf "Fragment (%s): %s" (Option.get test.fragment_context) input_preview
170
+
else input_preview
171
+
in
172
+
let result : Report.test_result = {
173
+
test_num = i + 1; description; input = test.input; expected; actual; success;
174
+
details = [
175
+
("Fragment Context", Option.value test.fragment_context ~default:"(none)");
176
+
("Expected Errors", string_of_int expected_error_count);
177
+
("Actual Errors", string_of_int actual_error_count);
178
+
];
179
+
raw_test_data = Some test.raw_lines;
180
+
} in
181
+
results := result :: !results;
182
+
if success then incr passed else incr failed
183
+
end
184
+
) tests;
185
+
let file_result : Report.file_result = {
186
+
filename = "HTML5lib / " ^ filename; test_type = "Tree Construction";
187
+
passed_count = !passed; failed_count = !failed;
188
+
tests = List.rev !results;
189
+
} in
190
+
(file_result, !passed, !failed)
191
+
192
+
let run_dir test_dir =
193
+
if not (Sys.file_exists test_dir) then ([], 0, 0)
194
+
else begin
195
+
let files = Sys.readdir test_dir |> Array.to_list in
196
+
let dat_files = List.filter (fun f ->
197
+
Filename.check_suffix f ".dat" && not (String.contains f '/')
198
+
) files in
199
+
let total_passed = ref 0 in
200
+
let total_failed = ref 0 in
201
+
let file_results = ref [] in
202
+
List.iter (fun file ->
203
+
let path = Filename.concat test_dir file in
204
+
if Sys.is_directory path then () else begin
205
+
let (file_result, passed, failed) = run_file path in
206
+
total_passed := !total_passed + passed;
207
+
total_failed := !total_failed + failed;
208
+
file_results := file_result :: !file_results
209
+
end
210
+
) (List.sort String.compare dat_files);
211
+
(List.rev !file_results, !total_passed, !total_failed)
212
+
end
213
+
end
214
+
215
+
let run base_dir =
216
+
let tree_dir = Filename.concat base_dir "tree-construction" in
217
+
Printf.printf " Running tree-construction tests...\n%!";
218
+
let (tree_files, tree_passed, tree_failed) = TreeConstruction.run_dir tree_dir in
219
+
Printf.printf " Tree construction: %d passed, %d failed\n%!" tree_passed tree_failed;
220
+
221
+
(* For now, just return tree construction results *)
222
+
(* Full implementation would include tokenizer, encoding, serializer *)
223
+
{
224
+
name = "HTML5lib Tests";
225
+
description = "Official html5lib test suite for HTML5 parsing conformance";
226
+
passed = tree_passed;
227
+
failed = tree_failed;
228
+
files = tree_files;
229
+
extra_info = [
230
+
("Tree Construction", Printf.sprintf "%d/%d" tree_passed (tree_passed + tree_failed));
231
+
];
232
+
}
233
+
end
234
+
235
+
(* ============================================================ *)
236
+
(* Validator Tests Runner *)
237
+
(* ============================================================ *)
238
+
239
+
module Validator_runner = struct
240
+
241
+
type expected_outcome = Valid | Invalid | HasWarning | Unknown
242
+
243
+
type test_file = {
244
+
path : string;
245
+
relative_path : string;
246
+
category : string;
247
+
expected : expected_outcome;
248
+
}
249
+
250
+
type test_result = {
251
+
file : test_file;
252
+
passed : bool;
253
+
actual_errors : string list;
254
+
actual_warnings : string list;
255
+
details : string;
256
+
match_quality : Expected_message.match_quality option; [@warning "-69"]
257
+
}
258
+
259
+
let parse_outcome filename =
260
+
if String.length filename > 13 && String.sub filename (String.length filename - 13) 13 = "-isvalid.html" then Valid
261
+
else if String.length filename > 13 && String.sub filename (String.length filename - 13) 13 = "-novalid.html" then Invalid
262
+
else if String.length filename > 13 && String.sub filename (String.length filename - 13) 13 = "-haswarn.html" then HasWarning
263
+
else if String.length filename > 14 && String.sub filename (String.length filename - 14) 14 = "-isvalid.xhtml" then Valid
264
+
else if String.length filename > 14 && String.sub filename (String.length filename - 14) 14 = "-novalid.xhtml" then Invalid
265
+
else if String.length filename > 14 && String.sub filename (String.length filename - 14) 14 = "-haswarn.xhtml" then HasWarning
266
+
else Unknown
267
+
268
+
let rec discover_tests_in_dir base_dir current_dir =
269
+
let full_path = Filename.concat base_dir current_dir in
270
+
if not (Sys.file_exists full_path) then []
271
+
else if Sys.is_directory full_path then begin
272
+
let entries = Sys.readdir full_path |> Array.to_list in
273
+
List.concat_map (fun entry ->
274
+
let sub_path = if current_dir = "" then entry else Filename.concat current_dir entry in
275
+
discover_tests_in_dir base_dir sub_path
276
+
) entries
277
+
end else if Filename.check_suffix current_dir ".html" || Filename.check_suffix current_dir ".xhtml" then begin
278
+
let outcome = parse_outcome (Filename.basename current_dir) in
279
+
if outcome = Unknown then []
280
+
else
281
+
let category = match String.split_on_char '/' current_dir with cat :: _ -> cat | [] -> "unknown" in
282
+
[{ path = full_path; relative_path = current_dir; category; expected = outcome }]
283
+
end else []
284
+
285
+
let run_test ~strictness messages test =
286
+
try
287
+
let ic = open_in test.path in
288
+
let content = really_input_string ic (in_channel_length ic) in
289
+
close_in ic;
290
+
let reader = Bytesrw.Bytes.Reader.of_string content in
291
+
let result = Htmlrw_check.check ~collect_parse_errors:true ~system_id:test.relative_path reader in
292
+
let error_msgs = Htmlrw_check.errors result in
293
+
let warning_msgs = Htmlrw_check.warnings result in
294
+
let info_msgs = Htmlrw_check.infos result in
295
+
let errors = List.map (fun m -> m.Htmlrw_check.text) error_msgs in
296
+
let warnings = List.map (fun m -> m.Htmlrw_check.text) warning_msgs in
297
+
let infos = List.map (fun m -> m.Htmlrw_check.text) info_msgs in
298
+
let expected_msg = Validator_messages.get messages test.relative_path in
299
+
300
+
let (passed, details, match_quality) = match test.expected with
301
+
| Valid ->
302
+
let no_errors = errors = [] && warnings = [] in
303
+
let details = if no_errors then "OK"
304
+
else Printf.sprintf "Expected valid but got %d errors, %d warnings" (List.length errors) (List.length warnings) in
305
+
(no_errors, details, None)
306
+
| Invalid ->
307
+
if errors = [] then
308
+
(false, "Expected error but got none", None)
309
+
else begin
310
+
match expected_msg with
311
+
| None ->
312
+
(true, Printf.sprintf "Got %d error(s), no expected message" (List.length errors), None)
313
+
| Some exp ->
314
+
let expected = Expected_message.parse exp in
315
+
let qualities = List.map (fun msg -> Expected_message.matches ~strictness ~expected ~actual:msg) error_msgs in
316
+
let best = List.fold_left (fun b q -> if q < b then q else b) Expected_message.No_match qualities in
317
+
let acceptable = Expected_message.is_acceptable ~strictness best in
318
+
let msg = if acceptable then "Message matched" else "Message mismatch" in
319
+
(acceptable, msg, Some best)
320
+
end
321
+
| HasWarning ->
322
+
(* For haswarn, check warnings AND infos (like test_validator.ml) *)
323
+
let all_msgs = warning_msgs @ info_msgs in
324
+
let all_messages = warnings @ infos in
325
+
if all_messages = [] && errors = [] then
326
+
(false, "Expected warning but got none", None)
327
+
else begin
328
+
match expected_msg with
329
+
| None ->
330
+
if all_messages <> [] then
331
+
(true, Printf.sprintf "Got %d warning/info message(s)" (List.length all_messages), None)
332
+
else
333
+
(true, Printf.sprintf "Got %d error(s) instead of warning" (List.length errors), None)
334
+
| Some exp ->
335
+
let expected = Expected_message.parse exp in
336
+
let qualities = List.map (fun msg -> Expected_message.matches ~strictness ~expected ~actual:msg) all_msgs in
337
+
let best = List.fold_left (fun b q -> if q < b then q else b) Expected_message.No_match qualities in
338
+
let acceptable = Expected_message.is_acceptable ~strictness best in
339
+
if acceptable then
340
+
(true, "Warning/info matched", Some best)
341
+
else begin
342
+
(* Also try matching against errors *)
343
+
let err_qualities = List.map (fun msg -> Expected_message.matches ~strictness ~expected ~actual:msg) error_msgs in
344
+
let err_best = List.fold_left (fun b q -> if q < b then q else b) Expected_message.No_match err_qualities in
345
+
let err_acceptable = Expected_message.is_acceptable ~strictness err_best in
346
+
if err_acceptable then
347
+
(true, "Error matched (severity differs)", Some err_best)
348
+
else
349
+
let final_best = if best < err_best then best else err_best in
350
+
(false, "Warning mismatch", Some final_best)
351
+
end
352
+
end
353
+
| Unknown -> (false, "Unknown test type", None)
354
+
in
355
+
{ file = test; passed; actual_errors = errors; actual_warnings = warnings @ infos; details; match_quality }
356
+
with e ->
357
+
{ file = test; passed = false; actual_errors = []; actual_warnings = [];
358
+
details = Printf.sprintf "Exception: %s" (Printexc.to_string e); match_quality = None }
359
+
360
+
let run_mode ~mode_name ~strictness messages tests =
361
+
Printf.printf " Running %s mode...\n%!" mode_name;
362
+
let total = List.length tests in
363
+
let results = List.mapi (fun i test ->
364
+
if (i + 1) mod 500 = 0 then Printf.printf " [%d/%d]\n%!" (i + 1) total;
365
+
run_test ~strictness messages test
366
+
) tests in
367
+
let passed = List.filter (fun r -> r.passed) results |> List.length in
368
+
Printf.printf " %s: %d/%d passed\n%!" mode_name passed total;
369
+
(results, passed, total - passed)
370
+
371
+
let results_to_file_results mode_name results =
372
+
(* Group by category *)
373
+
let by_category = Hashtbl.create 32 in
374
+
List.iter (fun r ->
375
+
let cat = r.file.category in
376
+
let existing = try Hashtbl.find by_category cat with Not_found -> [] in
377
+
Hashtbl.replace by_category cat (r :: existing)
378
+
) results;
379
+
380
+
Hashtbl.fold (fun category tests acc ->
381
+
let tests = List.rev tests in
382
+
let passed_count = List.filter (fun r -> r.passed) tests |> List.length in
383
+
let failed_count = List.length tests - passed_count in
384
+
let test_results = List.mapi (fun i r ->
385
+
let outcome_str = match r.file.expected with
386
+
| Valid -> "isvalid" | Invalid -> "novalid" | HasWarning -> "haswarn" | Unknown -> "unknown"
387
+
in
388
+
Report.{
389
+
test_num = i + 1;
390
+
description = Printf.sprintf "[%s] %s" outcome_str (Filename.basename r.file.relative_path);
391
+
input = r.file.relative_path;
392
+
expected = (match r.file.expected with
393
+
| Valid -> "(no errors)" | Invalid -> "(error expected)" | HasWarning -> "(warning expected)" | Unknown -> "?");
394
+
actual = String.concat "; " (r.actual_errors @ r.actual_warnings);
395
+
success = r.passed;
396
+
details = [("Result", r.details)];
397
+
raw_test_data = None;
398
+
}
399
+
) tests in
400
+
Report.{
401
+
filename = Printf.sprintf "Validator / %s [%s]" category mode_name;
402
+
test_type = "Validator";
403
+
passed_count;
404
+
failed_count;
405
+
tests = test_results;
406
+
} :: acc
407
+
) by_category []
408
+
409
+
let run tests_dir =
410
+
Printf.printf " Loading validator messages...\n%!";
411
+
let messages_path = Filename.concat tests_dir "messages.json" in
412
+
let messages = Validator_messages.load messages_path in
413
+
414
+
Printf.printf " Discovering test files...\n%!";
415
+
let tests = discover_tests_in_dir tests_dir "" in
416
+
Printf.printf " Found %d test files\n%!" (List.length tests);
417
+
418
+
let (lenient_results, lenient_passed, _lenient_failed) =
419
+
run_mode ~mode_name:"LENIENT" ~strictness:Expected_message.lenient messages tests in
420
+
let (strict_results, strict_passed, strict_failed) =
421
+
run_mode ~mode_name:"STRICT" ~strictness:Expected_message.exact_message messages tests in
422
+
423
+
let lenient_files = results_to_file_results "Lenient" lenient_results in
424
+
let strict_files = results_to_file_results "Strict" strict_results in
425
+
426
+
let total = List.length tests in
427
+
{
428
+
name = "Nu HTML Validator Tests";
429
+
description = "W3C Nu HTML Validator conformance tests (both lenient and strict modes)";
430
+
passed = strict_passed; (* Use strict as the primary metric *)
431
+
failed = strict_failed;
432
+
files = lenient_files @ strict_files;
433
+
extra_info = [
434
+
("Lenient Mode", Printf.sprintf "%d/%d (%.1f%%)" lenient_passed total
435
+
(100.0 *. float_of_int lenient_passed /. float_of_int total));
436
+
("Strict Mode", Printf.sprintf "%d/%d (%.1f%%)" strict_passed total
437
+
(100.0 *. float_of_int strict_passed /. float_of_int total));
438
+
("Total Tests", string_of_int total);
439
+
];
440
+
}
441
+
end
442
+
443
+
(* ============================================================ *)
444
+
(* Main Entry Point *)
445
+
(* ============================================================ *)
446
+
447
+
let get_timestamp () =
448
+
let now = Unix.gettimeofday () in
449
+
let tm = Unix.localtime now in
450
+
Printf.sprintf "%04d-%02d-%02d %02d:%02d:%02d"
451
+
(tm.Unix.tm_year + 1900) (tm.Unix.tm_mon + 1) tm.Unix.tm_mday
452
+
tm.Unix.tm_hour tm.Unix.tm_min tm.Unix.tm_sec
453
+
454
+
let () =
455
+
let html5lib_dir = ref "html5lib-tests" in
456
+
let validator_dir = ref "validator/tests" in
457
+
let output_file = ref "comprehensive_test_report.html" in
458
+
459
+
(* Parse args *)
460
+
let args = Array.to_list Sys.argv |> List.tl in
461
+
(match args with
462
+
| [h; v; o] -> html5lib_dir := h; validator_dir := v; output_file := o
463
+
| [h; v] -> html5lib_dir := h; validator_dir := v
464
+
| [h] -> html5lib_dir := h
465
+
| _ -> ());
466
+
467
+
Printf.printf "=== Comprehensive HTML5rw Test Suite ===\n\n%!";
468
+
469
+
let all_suites = ref [] in
470
+
let total_passed = ref 0 in
471
+
let total_failed = ref 0 in
472
+
473
+
(* Run HTML5lib tests *)
474
+
Printf.printf "Running HTML5lib tests from %s...\n%!" !html5lib_dir;
475
+
if Sys.file_exists !html5lib_dir then begin
476
+
let suite = Html5lib_runner.run !html5lib_dir in
477
+
all_suites := suite :: !all_suites;
478
+
total_passed := !total_passed + suite.passed;
479
+
total_failed := !total_failed + suite.failed;
480
+
Printf.printf " Subtotal: %d passed, %d failed\n\n%!" suite.passed suite.failed
481
+
end else
482
+
Printf.printf " (directory not found)\n\n%!";
483
+
484
+
(* Run Validator tests *)
485
+
Printf.printf "Running Validator tests from %s...\n%!" !validator_dir;
486
+
if Sys.file_exists !validator_dir then begin
487
+
let suite = Validator_runner.run !validator_dir in
488
+
all_suites := suite :: !all_suites;
489
+
total_passed := !total_passed + suite.passed;
490
+
total_failed := !total_failed + suite.failed;
491
+
Printf.printf " Subtotal: %d passed, %d failed\n\n%!" suite.passed suite.failed
492
+
end else
493
+
Printf.printf " (directory not found)\n\n%!";
494
+
495
+
Printf.printf "=== Overall Summary ===\n";
496
+
Printf.printf "Total: %d passed, %d failed\n\n%!" !total_passed !total_failed;
497
+
498
+
(* Combine all file results *)
499
+
let all_files = List.concat_map (fun s -> s.files) (List.rev !all_suites) in
500
+
501
+
(* Build description with all suite info as HTML *)
502
+
let suites_info = List.rev !all_suites |> List.map (fun s ->
503
+
let extras = String.concat ", " (List.map (fun (k, v) -> Printf.sprintf "%s: %s" k v) s.extra_info) in
504
+
Printf.sprintf "<li><strong>%s:</strong> %d/%d passed โ %s</li>" s.name s.passed (s.passed + s.failed) extras
505
+
) |> String.concat "\n" in
506
+
507
+
let description = Printf.sprintf
508
+
"Comprehensive test report for the html5rw OCaml HTML5 parser and validator library.</p>\
509
+
<p><strong>Test Suites:</strong></p><ul>%s</ul><p>\
510
+
This report combines results from multiple test suites to provide complete coverage analysis."
511
+
suites_info
512
+
in
513
+
514
+
let report : Report.report = {
515
+
title = "html5rw Comprehensive Test Report";
516
+
test_type = "comprehensive";
517
+
description;
518
+
files = all_files;
519
+
total_passed = !total_passed;
520
+
total_failed = !total_failed;
521
+
match_quality = None;
522
+
test_type_breakdown = None;
523
+
strictness_mode = Some "Comprehensive (all modes)";
524
+
run_timestamp = Some (get_timestamp ());
525
+
} in
526
+
527
+
Report.generate_report report !output_file;
528
+
529
+
exit (if !total_failed > 0 then 1 else 0)
+19
-5
test/test_report.ml
+19
-5
test/test_report.ml
···
746
746
let tests_html = String.concat "\n" (List.map generate_test_html file.tests) in
747
747
let collapsed = if file.failed_count = 0 then "collapsed" else "" in
748
748
let hidden = if file.failed_count = 0 then "hidden" else "" in
749
+
let escaped_full = html_escape file.filename in
749
750
750
751
Printf.sprintf {|
751
752
<div class="file-section" id="file-%s">
752
753
<div class="file-header %s">
753
-
<h2>
754
+
<h2 title="%s">
754
755
<span class="toggle">โผ</span>
755
756
๐ %s
756
757
</h2>
···
763
764
%s
764
765
</div>
765
766
</div>
766
-
|} file_id collapsed file.filename file.passed_count file.failed_count hidden tests_html
767
+
|} file_id collapsed escaped_full file.filename file.passed_count file.failed_count hidden tests_html
768
+
769
+
let shorten_filename name =
770
+
(* Shorten common prefixes for display, keep full name for tooltip *)
771
+
let short =
772
+
if String.length name > 10 && String.sub name 0 10 = "HTML5lib /" then
773
+
"H5:" ^ String.sub name 10 (String.length name - 10)
774
+
else if String.length name > 12 && String.sub name 0 12 = "Validator / " then
775
+
"VA:" ^ String.sub name 12 (String.length name - 12)
776
+
else name
777
+
in
778
+
String.trim short
767
779
768
780
let generate_sidebar_html files =
769
781
String.concat "\n" (List.map (fun file ->
770
782
let file_id = String.map (fun c -> if c = '/' || c = '.' then '-' else c) file.filename in
771
783
let badge_class = if file.failed_count = 0 then "all-passed" else "has-failed" in
784
+
let short_name = shorten_filename file.filename in
785
+
let escaped_full = html_escape file.filename in
772
786
Printf.sprintf {|
773
-
<div class="sidebar-item" data-file="file-%s">
787
+
<div class="sidebar-item" data-file="file-%s" title="%s">
774
788
<span class="name">%s</span>
775
789
<span class="badge %s">%d/%d</span>
776
790
</div>
777
-
|} file_id file.filename badge_class file.passed_count (file.passed_count + file.failed_count)
791
+
|} file_id escaped_full short_name badge_class file.passed_count (file.passed_count + file.failed_count)
778
792
) files)
779
793
780
794
let generate_match_quality_html stats =
···
957
971
</body>
958
972
</html>
959
973
|} report.title css
960
-
report.title (html_escape report.description)
974
+
report.title report.description (* description may contain HTML *)
961
975
total report.total_passed report.total_failed timestamp_text
962
976
mode_text
963
977
(if pass_rate >= 99.0 then "success" else if pass_rate >= 90.0 then "neutral" else "failure")
+6
-1
test/test_roundtrip.ml
+6
-1
test/test_roundtrip.ml
···
129
129
Printf.printf "Running roundtrip tests...\n%!";
130
130
131
131
(* Run tests *)
132
-
let results = List.map test_file test_files in
132
+
let total = List.length test_files in
133
+
let results = List.mapi (fun i path ->
134
+
Printf.printf "\r[%d/%d] %s%!" (i + 1) total (Filename.basename path);
135
+
test_file path
136
+
) test_files in
137
+
Printf.printf "\n%!";
133
138
134
139
(* Categorize results *)
135
140
let isvalid_tests = List.filter (fun r -> r.test_type = "isvalid") results in
+253
-37
test/test_validator.ml
+253
-37
test/test_validator.ml
···
402
402
} in
403
403
Report.generate_report report output_path
404
404
405
-
let () =
406
-
(* Parse command line arguments *)
407
-
let args = Array.to_list Sys.argv |> List.tl in
408
-
let is_strict = List.mem "--strict" args in
409
-
let non_flag_args = List.filter (fun s -> not (String.length s > 0 && s.[0] = '-')) args in
410
-
let tests_dir = match non_flag_args with x :: _ -> x | [] -> "validator/tests" in
411
-
let report_path = match non_flag_args with _ :: x :: _ -> x | _ -> "test_validator_report.html" in
405
+
(** Run tests with a given strictness and return results *)
406
+
let run_all_tests ~mode_name ~strictness_setting messages tests =
407
+
strictness := strictness_setting;
408
+
Printf.printf "\n=== Running in %s mode ===\n%!" mode_name;
409
+
let total = List.length tests in
410
+
let results = List.mapi (fun i test ->
411
+
Printf.printf "\r[%d/%d] %s%!" (i + 1) total test.relative_path;
412
+
run_test messages test
413
+
) tests in
414
+
Printf.printf "\n%!";
415
+
results
412
416
413
-
(* Apply strict mode if requested - use exact_message which requires exact text but not typed codes *)
414
-
if is_strict then begin
415
-
strictness := Expected_message.exact_message;
416
-
Printf.printf "Running in STRICT mode (exact message matching required)\n%!"
417
-
end;
418
-
419
-
Printf.printf "Loading messages.json...\n%!";
420
-
let messages_path = Filename.concat tests_dir "messages.json" in
421
-
let messages = Validator_messages.load messages_path in
422
-
Printf.printf "Loaded %d expected messages\n%!" (Validator_messages.count messages);
417
+
(** Print failures for a test run *)
418
+
let print_failures mode_name results =
419
+
Printf.printf "\n--- %s mode results ---\n" mode_name;
423
420
424
-
Printf.printf "Discovering test files...\n%!";
425
-
let tests = discover_tests tests_dir in
426
-
Printf.printf "Found %d test files\n%!" (List.length tests);
427
-
428
-
Printf.printf "Running tests...\n%!";
429
-
let results = List.map (run_test messages) tests in
430
-
431
-
(* Print failing isvalid tests *)
432
421
let failing_isvalid = List.filter (fun r ->
433
422
r.file.expected = Valid && not r.passed
434
423
) results in
435
424
if failing_isvalid <> [] then begin
436
-
Printf.printf "\n=== Failing isvalid tests ===\n";
425
+
Printf.printf "Failing isvalid tests:\n";
437
426
List.iter (fun r ->
438
-
Printf.printf "%s: %s\n" r.file.relative_path r.details
427
+
Printf.printf " %s: %s\n" r.file.relative_path r.details
439
428
) failing_isvalid
440
429
end;
441
430
442
-
(* Print failing haswarn tests *)
443
431
let failing_haswarn = List.filter (fun r ->
444
432
r.file.expected = HasWarning && not r.passed
445
433
) results in
446
434
if failing_haswarn <> [] then begin
447
-
Printf.printf "\n=== Failing haswarn tests ===\n";
435
+
Printf.printf "Failing haswarn tests:\n";
448
436
List.iter (fun r ->
449
-
Printf.printf "%s\n" r.file.relative_path
437
+
Printf.printf " %s\n" r.file.relative_path
450
438
) failing_haswarn
451
439
end;
452
440
453
-
(* Print failing novalid tests *)
454
441
let failing_novalid = List.filter (fun r ->
455
442
r.file.expected = Invalid && not r.passed
456
443
) results in
457
444
if failing_novalid <> [] then begin
458
-
Printf.printf "\n=== Failing novalid tests (first 50) ===\n";
445
+
Printf.printf "Failing novalid tests (first 20):\n";
459
446
List.iteri (fun i r ->
460
-
if i < 50 then Printf.printf "%s\n" r.file.relative_path
447
+
if i < 20 then Printf.printf " %s\n" r.file.relative_path
461
448
) failing_novalid
462
449
end;
463
450
464
-
print_summary results;
465
-
generate_html_report results report_path;
451
+
let passed = List.filter (fun r -> r.passed) results |> List.length in
452
+
let total = List.length results in
453
+
Printf.printf "%s: %d/%d passed (%.1f%%)\n%!" mode_name passed total
454
+
(100.0 *. float_of_int passed /. float_of_int total)
455
+
456
+
(** Generate combined HTML report for both modes *)
457
+
let generate_combined_html_report ~lenient_results ~strict_results output_path =
458
+
(* Helper to build file results from a set of results *)
459
+
let build_file_results results =
460
+
let by_category = group_by_category results in
461
+
List.map (fun (category, tests) ->
462
+
let passed_count = List.filter (fun r -> r.passed) tests |> List.length in
463
+
let failed_count = List.length tests - passed_count in
464
+
let test_results = List.mapi (fun i r ->
465
+
let outcome_str = match r.file.expected with
466
+
| Valid -> "isvalid"
467
+
| Invalid -> "novalid"
468
+
| HasWarning -> "haswarn"
469
+
| Unknown -> "unknown"
470
+
in
471
+
let description = Printf.sprintf "[%s] %s" outcome_str (Filename.basename r.file.relative_path) in
472
+
let expected = match r.expected_message with
473
+
| Some m -> m
474
+
| None -> match r.file.expected with
475
+
| Valid -> "(should produce no errors or warnings)"
476
+
| Invalid -> "(should produce at least one error)"
477
+
| HasWarning -> "(should produce at least one warning)"
478
+
| Unknown -> "(unknown test type)"
479
+
in
480
+
let actual_str =
481
+
let errors = if r.actual_errors = [] then ""
482
+
else "Errors:\n โข " ^ String.concat "\n โข " r.actual_errors in
483
+
let warnings = if r.actual_warnings = [] then ""
484
+
else "Warnings:\n โข " ^ String.concat "\n โข " r.actual_warnings in
485
+
let infos = if r.actual_infos = [] then ""
486
+
else "Info:\n โข " ^ String.concat "\n โข " r.actual_infos in
487
+
if errors = "" && warnings = "" && infos = "" then "(no messages produced)"
488
+
else String.trim (errors ^ (if errors <> "" && warnings <> "" then "\n\n" else "") ^
489
+
warnings ^ (if (errors <> "" || warnings <> "") && infos <> "" then "\n\n" else "") ^
490
+
infos)
491
+
in
492
+
let match_quality_str = match r.match_quality with
493
+
| Some q -> Expected_message.match_quality_to_string q
494
+
| None -> "N/A"
495
+
in
496
+
Report.{
497
+
test_num = i + 1;
498
+
description;
499
+
input = r.file.relative_path;
500
+
expected;
501
+
actual = actual_str;
502
+
success = r.passed;
503
+
details = [
504
+
("Result", r.details);
505
+
("Match Quality", match_quality_str);
506
+
];
507
+
raw_test_data = read_html_source r.file.path;
508
+
}
509
+
) tests in
510
+
Report.{
511
+
filename = category;
512
+
test_type = "HTML5 Validator";
513
+
passed_count;
514
+
failed_count;
515
+
tests = test_results;
516
+
}
517
+
) by_category
518
+
in
466
519
467
-
let failed_count = List.filter (fun r -> not r.passed) results |> List.length in
468
-
exit (if failed_count > 0 then 1 else 0)
520
+
let compute_stats results mode_name =
521
+
let total_passed = List.filter (fun r -> r.passed) results |> List.length in
522
+
let total_failed = List.length results - total_passed in
523
+
let count_quality q = List.filter (fun r ->
524
+
match r.match_quality with Some mq -> mq = q | None -> false
525
+
) results |> List.length in
526
+
let match_quality_stats : Report.match_quality_stats = {
527
+
exact_matches = count_quality Expected_message.Exact_match;
528
+
code_matches = count_quality Expected_message.Code_match;
529
+
message_matches = count_quality Expected_message.Message_match;
530
+
substring_matches = count_quality Expected_message.Substring_match;
531
+
severity_mismatches = count_quality Expected_message.Severity_mismatch;
532
+
no_matches = count_quality Expected_message.No_match;
533
+
not_applicable = List.filter (fun r -> r.match_quality = None) results |> List.length;
534
+
} in
535
+
let isvalid_results = List.filter (fun r -> r.file.expected = Valid) results in
536
+
let novalid_results = List.filter (fun r -> r.file.expected = Invalid) results in
537
+
let haswarn_results = List.filter (fun r -> r.file.expected = HasWarning) results in
538
+
let count_passed rs = List.filter (fun r -> r.passed) rs |> List.length in
539
+
let test_type_stats : Report.test_type_stats = {
540
+
isvalid_passed = count_passed isvalid_results;
541
+
isvalid_total = List.length isvalid_results;
542
+
novalid_passed = count_passed novalid_results;
543
+
novalid_total = List.length novalid_results;
544
+
haswarn_passed = count_passed haswarn_results;
545
+
haswarn_total = List.length haswarn_results;
546
+
} in
547
+
(total_passed, total_failed, match_quality_stats, test_type_stats, mode_name)
548
+
in
549
+
550
+
let lenient_stats = compute_stats lenient_results "lenient" in
551
+
let strict_stats = compute_stats strict_results "strict" in
552
+
553
+
(* Use strict results for the main report, but include both in description *)
554
+
let (strict_passed, strict_failed, strict_mq, strict_tt, _) = strict_stats in
555
+
let (lenient_passed, _lenient_failed, _, _, _) = lenient_stats in
556
+
557
+
let now = Unix.gettimeofday () in
558
+
let tm = Unix.localtime now in
559
+
let timestamp = Printf.sprintf "%04d-%02d-%02d %02d:%02d:%02d"
560
+
(tm.Unix.tm_year + 1900) (tm.Unix.tm_mon + 1) tm.Unix.tm_mday
561
+
tm.Unix.tm_hour tm.Unix.tm_min tm.Unix.tm_sec in
562
+
563
+
let total = List.length strict_results in
564
+
let description = Printf.sprintf
565
+
"Tests from the Nu HTML Validator (W3C's official HTML checker). \
566
+
Tests validate HTML5 conformance including element nesting, required attributes, \
567
+
ARIA roles, obsolete elements, and more.\n\n\
568
+
LENIENT mode: %d/%d passed (%.1f%%) - allows substring matching\n\
569
+
STRICT mode: %d/%d passed (%.1f%%) - requires exact message matching"
570
+
lenient_passed total (100.0 *. float_of_int lenient_passed /. float_of_int total)
571
+
strict_passed total (100.0 *. float_of_int strict_passed /. float_of_int total)
572
+
in
573
+
574
+
let report : Report.report = {
575
+
title = "Nu HTML Validator Tests (Lenient + Strict)";
576
+
test_type = "validator";
577
+
description;
578
+
files = build_file_results strict_results; (* Show strict results in detail *)
579
+
total_passed = strict_passed;
580
+
total_failed = strict_failed;
581
+
match_quality = Some strict_mq;
582
+
test_type_breakdown = Some strict_tt;
583
+
strictness_mode = Some (Printf.sprintf "BOTH (Lenient: %d/%d, Strict: %d/%d)"
584
+
lenient_passed total strict_passed total);
585
+
run_timestamp = Some timestamp;
586
+
} in
587
+
Report.generate_report report output_path
588
+
589
+
let () =
590
+
(* Parse command line arguments *)
591
+
let args = Array.to_list Sys.argv |> List.tl in
592
+
let is_strict = List.mem "--strict" args in
593
+
let is_both = List.mem "--both" args in
594
+
let non_flag_args = List.filter (fun s -> not (String.length s > 0 && s.[0] = '-')) args in
595
+
let tests_dir = match non_flag_args with x :: _ -> x | [] -> "validator/tests" in
596
+
let report_path = match non_flag_args with _ :: x :: _ -> x | _ -> "test_validator_report.html" in
597
+
598
+
Printf.printf "Loading messages.json...\n%!";
599
+
let messages_path = Filename.concat tests_dir "messages.json" in
600
+
let messages = Validator_messages.load messages_path in
601
+
Printf.printf "Loaded %d expected messages\n%!" (Validator_messages.count messages);
602
+
603
+
Printf.printf "Discovering test files...\n%!";
604
+
let tests = discover_tests tests_dir in
605
+
Printf.printf "Found %d test files\n%!" (List.length tests);
606
+
607
+
if is_both then begin
608
+
(* Run both modes *)
609
+
let lenient_results = run_all_tests ~mode_name:"LENIENT"
610
+
~strictness_setting:Expected_message.lenient messages tests in
611
+
let strict_results = run_all_tests ~mode_name:"STRICT"
612
+
~strictness_setting:Expected_message.exact_message messages tests in
613
+
614
+
print_failures "LENIENT" lenient_results;
615
+
print_failures "STRICT" strict_results;
616
+
617
+
Printf.printf "\n=== Summary ===\n";
618
+
let lenient_passed = List.filter (fun r -> r.passed) lenient_results |> List.length in
619
+
let strict_passed = List.filter (fun r -> r.passed) strict_results |> List.length in
620
+
let total = List.length tests in
621
+
Printf.printf "LENIENT: %d/%d (%.1f%%)\n" lenient_passed total
622
+
(100.0 *. float_of_int lenient_passed /. float_of_int total);
623
+
Printf.printf "STRICT: %d/%d (%.1f%%)\n" strict_passed total
624
+
(100.0 *. float_of_int strict_passed /. float_of_int total);
625
+
626
+
generate_combined_html_report ~lenient_results ~strict_results report_path;
627
+
628
+
(* Exit with error if strict mode has failures *)
629
+
let strict_failed = List.filter (fun r -> not r.passed) strict_results |> List.length in
630
+
exit (if strict_failed > 0 then 1 else 0)
631
+
end else begin
632
+
(* Single mode (original behavior) *)
633
+
if is_strict then begin
634
+
strictness := Expected_message.exact_message;
635
+
Printf.printf "Running in STRICT mode (exact message matching required)\n%!"
636
+
end;
637
+
638
+
Printf.printf "Running tests...\n%!";
639
+
let total = List.length tests in
640
+
let results = List.mapi (fun i test ->
641
+
Printf.printf "\r[%d/%d] %s%!" (i + 1) total test.relative_path;
642
+
run_test messages test
643
+
) tests in
644
+
Printf.printf "\n%!";
645
+
646
+
(* Print failing isvalid tests *)
647
+
let failing_isvalid = List.filter (fun r ->
648
+
r.file.expected = Valid && not r.passed
649
+
) results in
650
+
if failing_isvalid <> [] then begin
651
+
Printf.printf "\n=== Failing isvalid tests ===\n";
652
+
List.iter (fun r ->
653
+
Printf.printf "%s: %s\n" r.file.relative_path r.details
654
+
) failing_isvalid
655
+
end;
656
+
657
+
(* Print failing haswarn tests *)
658
+
let failing_haswarn = List.filter (fun r ->
659
+
r.file.expected = HasWarning && not r.passed
660
+
) results in
661
+
if failing_haswarn <> [] then begin
662
+
Printf.printf "\n=== Failing haswarn tests ===\n";
663
+
List.iter (fun r ->
664
+
Printf.printf "%s\n" r.file.relative_path
665
+
) failing_haswarn
666
+
end;
667
+
668
+
(* Print failing novalid tests *)
669
+
let failing_novalid = List.filter (fun r ->
670
+
r.file.expected = Invalid && not r.passed
671
+
) results in
672
+
if failing_novalid <> [] then begin
673
+
Printf.printf "\n=== Failing novalid tests (first 50) ===\n";
674
+
List.iteri (fun i r ->
675
+
if i < 50 then Printf.printf "%s\n" r.file.relative_path
676
+
) failing_novalid
677
+
end;
678
+
679
+
print_summary results;
680
+
generate_html_report results report_path;
681
+
682
+
let failed_count = List.filter (fun r -> not r.passed) results |> List.length in
683
+
exit (if failed_count > 0 then 1 else 0)
684
+
end
+668
test-regression.html
+668
test-regression.html
···
1
+
<!DOCTYPE html>
2
+
<html lang="en">
3
+
<head>
4
+
<meta charset="UTF-8">
5
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+
<title>HTML5rw Regression Test Suite</title>
7
+
<style>
8
+
:root {
9
+
--bg-primary: #1a1a2e;
10
+
--bg-secondary: #16213e;
11
+
--bg-tertiary: #0f3460;
12
+
--text-primary: #eee;
13
+
--text-secondary: #aaa;
14
+
--text-muted: #666;
15
+
--accent: #e94560;
16
+
--accent-light: #ff6b8a;
17
+
--success: #4ade80;
18
+
--success-dim: rgba(74, 222, 128, 0.2);
19
+
--failure: #f87171;
20
+
--failure-dim: rgba(248, 113, 113, 0.2);
21
+
--warning: #fbbf24;
22
+
--info: #60a5fa;
23
+
--border: #333;
24
+
--code-bg: #0d1117;
25
+
}
26
+
27
+
* { box-sizing: border-box; margin: 0; padding: 0; }
28
+
29
+
body {
30
+
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
31
+
background: var(--bg-primary);
32
+
color: var(--text-primary);
33
+
line-height: 1.6;
34
+
padding: 20px;
35
+
}
36
+
37
+
.container { max-width: 1400px; margin: 0 auto; }
38
+
39
+
.hero {
40
+
background: linear-gradient(135deg, var(--bg-secondary) 0%, var(--bg-tertiary) 100%);
41
+
padding: 30px 40px;
42
+
border-radius: 12px;
43
+
margin-bottom: 30px;
44
+
border: 1px solid var(--border);
45
+
}
46
+
47
+
.hero h1 {
48
+
font-size: 2rem;
49
+
margin-bottom: 10px;
50
+
color: var(--accent);
51
+
}
52
+
53
+
.hero p { color: var(--text-secondary); margin-bottom: 15px; }
54
+
55
+
.controls {
56
+
display: flex;
57
+
gap: 12px;
58
+
flex-wrap: wrap;
59
+
align-items: center;
60
+
}
61
+
62
+
button {
63
+
padding: 12px 24px;
64
+
border: none;
65
+
border-radius: 8px;
66
+
background: var(--accent);
67
+
color: white;
68
+
cursor: pointer;
69
+
font-size: 14px;
70
+
font-weight: 600;
71
+
transition: all 0.2s;
72
+
}
73
+
74
+
button:hover { background: var(--accent-light); transform: translateY(-1px); }
75
+
button:disabled { opacity: 0.5; cursor: not-allowed; transform: none; }
76
+
button.secondary { background: var(--bg-tertiary); border: 1px solid var(--border); }
77
+
button.secondary:hover { background: var(--bg-secondary); }
78
+
79
+
select {
80
+
padding: 12px 16px;
81
+
border: 1px solid var(--border);
82
+
border-radius: 8px;
83
+
background: var(--bg-secondary);
84
+
color: var(--text-primary);
85
+
font-size: 14px;
86
+
}
87
+
88
+
.summary-grid {
89
+
display: grid;
90
+
grid-template-columns: repeat(auto-fit, minmax(180px, 1fr));
91
+
gap: 20px;
92
+
margin-bottom: 30px;
93
+
}
94
+
95
+
.summary-card {
96
+
background: var(--bg-secondary);
97
+
border-radius: 12px;
98
+
padding: 20px;
99
+
border: 1px solid var(--border);
100
+
text-align: center;
101
+
}
102
+
103
+
.summary-card h3 {
104
+
font-size: 0.8rem;
105
+
text-transform: uppercase;
106
+
letter-spacing: 1px;
107
+
color: var(--text-secondary);
108
+
margin-bottom: 8px;
109
+
}
110
+
111
+
.summary-card .value {
112
+
font-size: 2rem;
113
+
font-weight: 700;
114
+
}
115
+
116
+
.summary-card .value.success { color: var(--success); }
117
+
.summary-card .value.failure { color: var(--failure); }
118
+
.summary-card .value.neutral { color: var(--text-primary); }
119
+
120
+
.progress-container {
121
+
background: var(--bg-secondary);
122
+
border-radius: 12px;
123
+
padding: 20px;
124
+
margin-bottom: 30px;
125
+
border: 1px solid var(--border);
126
+
}
127
+
128
+
.progress-bar {
129
+
height: 24px;
130
+
background: var(--failure-dim);
131
+
border-radius: 12px;
132
+
overflow: hidden;
133
+
margin-top: 10px;
134
+
}
135
+
136
+
.progress-fill {
137
+
height: 100%;
138
+
background: var(--success);
139
+
border-radius: 12px;
140
+
transition: width 0.3s ease;
141
+
display: flex;
142
+
align-items: center;
143
+
justify-content: center;
144
+
font-size: 12px;
145
+
font-weight: 600;
146
+
}
147
+
148
+
.status-text {
149
+
font-size: 14px;
150
+
color: var(--text-secondary);
151
+
margin-bottom: 8px;
152
+
}
153
+
154
+
.results-section {
155
+
background: var(--bg-secondary);
156
+
border-radius: 12px;
157
+
margin-bottom: 20px;
158
+
border: 1px solid var(--border);
159
+
overflow: hidden;
160
+
}
161
+
162
+
.results-header {
163
+
padding: 16px 20px;
164
+
background: var(--bg-tertiary);
165
+
cursor: pointer;
166
+
display: flex;
167
+
justify-content: space-between;
168
+
align-items: center;
169
+
}
170
+
171
+
.results-header:hover { background: #1a4a7a; }
172
+
173
+
.results-header h2 {
174
+
font-size: 1rem;
175
+
display: flex;
176
+
align-items: center;
177
+
gap: 10px;
178
+
}
179
+
180
+
.results-header .toggle { color: var(--text-secondary); transition: transform 0.2s; }
181
+
.results-header.collapsed .toggle { transform: rotate(-90deg); }
182
+
183
+
.results-stats {
184
+
display: flex;
185
+
gap: 15px;
186
+
font-size: 14px;
187
+
}
188
+
189
+
.results-stats .passed { color: var(--success); }
190
+
.results-stats .failed { color: var(--failure); }
191
+
192
+
.results-content { padding: 15px; }
193
+
.results-content.hidden { display: none; }
194
+
195
+
.test-item {
196
+
margin: 6px 0;
197
+
border: 1px solid var(--border);
198
+
border-radius: 6px;
199
+
overflow: hidden;
200
+
}
201
+
202
+
.test-header {
203
+
padding: 10px 14px;
204
+
cursor: pointer;
205
+
display: flex;
206
+
justify-content: space-between;
207
+
align-items: center;
208
+
background: var(--bg-primary);
209
+
font-size: 13px;
210
+
}
211
+
212
+
.test-header:hover { background: rgba(255,255,255,0.03); }
213
+
214
+
.test-header .status {
215
+
width: 8px;
216
+
height: 8px;
217
+
border-radius: 50%;
218
+
margin-right: 10px;
219
+
flex-shrink: 0;
220
+
}
221
+
222
+
.test-header .status.passed { background: var(--success); }
223
+
.test-header .status.failed { background: var(--failure); }
224
+
225
+
.test-header .test-info { flex: 1; display: flex; align-items: center; min-width: 0; }
226
+
.test-header .test-num { font-weight: 600; margin-right: 10px; color: var(--text-muted); }
227
+
.test-header .test-desc { white-space: nowrap; overflow: hidden; text-overflow: ellipsis; }
228
+
.test-header .expand-icon { color: var(--text-muted); font-size: 0.75rem; }
229
+
230
+
.test-details {
231
+
padding: 15px;
232
+
background: var(--code-bg);
233
+
border-top: 1px solid var(--border);
234
+
display: none;
235
+
font-size: 13px;
236
+
}
237
+
238
+
.test-details.visible { display: block; }
239
+
240
+
.detail-section { margin-bottom: 15px; }
241
+
.detail-section:last-child { margin-bottom: 0; }
242
+
243
+
.detail-section h4 {
244
+
font-size: 11px;
245
+
text-transform: uppercase;
246
+
letter-spacing: 1px;
247
+
color: var(--text-muted);
248
+
margin-bottom: 8px;
249
+
}
250
+
251
+
.detail-section pre {
252
+
background: var(--bg-secondary);
253
+
padding: 12px;
254
+
border-radius: 6px;
255
+
overflow-x: auto;
256
+
font-family: 'Monaco', 'Menlo', monospace;
257
+
font-size: 12px;
258
+
white-space: pre-wrap;
259
+
word-break: break-word;
260
+
max-height: 300px;
261
+
overflow-y: auto;
262
+
border: 1px solid var(--border);
263
+
}
264
+
265
+
.detail-row {
266
+
display: grid;
267
+
grid-template-columns: 1fr 1fr;
268
+
gap: 15px;
269
+
}
270
+
271
+
.filter-controls {
272
+
display: flex;
273
+
gap: 10px;
274
+
margin-bottom: 20px;
275
+
flex-wrap: wrap;
276
+
}
277
+
278
+
.filter-controls input[type="search"] {
279
+
padding: 10px 14px;
280
+
border: 1px solid var(--border);
281
+
border-radius: 8px;
282
+
background: var(--bg-secondary);
283
+
color: var(--text-primary);
284
+
font-size: 14px;
285
+
width: 250px;
286
+
}
287
+
288
+
.log-output {
289
+
background: var(--code-bg);
290
+
border: 1px solid var(--border);
291
+
border-radius: 8px;
292
+
padding: 15px;
293
+
font-family: 'Monaco', 'Menlo', monospace;
294
+
font-size: 12px;
295
+
max-height: 200px;
296
+
overflow-y: auto;
297
+
white-space: pre-wrap;
298
+
margin-bottom: 20px;
299
+
}
300
+
301
+
@media (max-width: 768px) {
302
+
.detail-row { grid-template-columns: 1fr; }
303
+
.summary-grid { grid-template-columns: 1fr 1fr; }
304
+
}
305
+
</style>
306
+
</head>
307
+
<body>
308
+
<div class="container">
309
+
<div class="hero">
310
+
<h1>HTML5rw Regression Test Suite</h1>
311
+
<p>
312
+
Browser-based regression testing for the HTML5rw OCaml parser.
313
+
Tests are loaded from the html5lib-tests conformance suite.
314
+
</p>
315
+
<div class="controls">
316
+
<button id="run-all" onclick="runAllTests()">Run All Tests</button>
317
+
<button id="run-tree" class="secondary" onclick="runTreeTests()">Tree Construction Only</button>
318
+
<button id="run-encoding" class="secondary" onclick="runEncodingTests()">Encoding Only</button>
319
+
<select id="mode-select">
320
+
<option value="js">JavaScript (js_of_ocaml)</option>
321
+
<option value="wasm">WebAssembly (wasm_of_ocaml)</option>
322
+
</select>
323
+
</div>
324
+
</div>
325
+
326
+
<div class="summary-grid" id="summary" style="display: none;">
327
+
<div class="summary-card">
328
+
<h3>Total Tests</h3>
329
+
<div class="value neutral" id="total-count">0</div>
330
+
</div>
331
+
<div class="summary-card">
332
+
<h3>Passed</h3>
333
+
<div class="value success" id="passed-count">0</div>
334
+
</div>
335
+
<div class="summary-card">
336
+
<h3>Failed</h3>
337
+
<div class="value failure" id="failed-count">0</div>
338
+
</div>
339
+
<div class="summary-card">
340
+
<h3>Pass Rate</h3>
341
+
<div class="value" id="pass-rate">0%</div>
342
+
</div>
343
+
</div>
344
+
345
+
<div class="progress-container" id="progress-container">
346
+
<div class="status-text" id="status-text">Ready to run tests. Click a button above to start.</div>
347
+
<div class="progress-bar">
348
+
<div class="progress-fill" id="progress-fill" style="width: 0%"></div>
349
+
</div>
350
+
</div>
351
+
352
+
<div class="log-output" id="log-output">Waiting for tests to start...</div>
353
+
354
+
<div class="filter-controls" id="filter-controls" style="display: none;">
355
+
<input type="search" id="search" placeholder="Search tests...">
356
+
<select id="filter">
357
+
<option value="all">All Tests</option>
358
+
<option value="passed">Passed Only</option>
359
+
<option value="failed">Failed Only</option>
360
+
</select>
361
+
<button class="secondary" onclick="expandAll()">Expand All</button>
362
+
<button class="secondary" onclick="collapseAll()">Collapse All</button>
363
+
</div>
364
+
365
+
<div id="results-container"></div>
366
+
</div>
367
+
368
+
<script>
369
+
// Test file lists
370
+
const TREE_CONSTRUCTION_FILES = [
371
+
"adoption01.dat", "adoption02.dat", "blocks.dat", "comments01.dat",
372
+
"doctype01.dat", "domjs-unsafe.dat", "entities01.dat", "entities02.dat",
373
+
"foreign-fragment.dat", "html5test-com.dat", "inbody01.dat", "isindex.dat",
374
+
"main-element.dat", "math.dat", "menuitem-element.dat", "namespace-sensitivity.dat",
375
+
"noscript01.dat", "pending-spec-changes-plain-text-unsafe.dat",
376
+
"pending-spec-changes.dat", "plain-text-unsafe.dat", "quirks01.dat", "ruby.dat",
377
+
"scriptdata01.dat", "search-element.dat", "svg.dat", "tables01.dat",
378
+
"template.dat", "tests_innerHTML_1.dat", "tests1.dat", "tests10.dat",
379
+
"tests11.dat", "tests12.dat", "tests14.dat", "tests15.dat", "tests16.dat",
380
+
"tests17.dat", "tests18.dat", "tests19.dat", "tests2.dat", "tests20.dat",
381
+
"tests21.dat", "tests22.dat", "tests23.dat", "tests24.dat", "tests25.dat",
382
+
"tests26.dat", "tests3.dat", "tests4.dat", "tests5.dat", "tests6.dat",
383
+
"tests7.dat", "tests8.dat", "tests9.dat", "tricky01.dat", "webkit01.dat",
384
+
"webkit02.dat"
385
+
];
386
+
387
+
const ENCODING_FILES = [
388
+
"test-yahoo-jp.dat", "tests1.dat", "tests2.dat"
389
+
];
390
+
391
+
let testRunner = null;
392
+
let isRunning = false;
393
+
394
+
function log(msg) {
395
+
const output = document.getElementById('log-output');
396
+
output.textContent += msg + '\n';
397
+
output.scrollTop = output.scrollHeight;
398
+
}
399
+
400
+
function clearLog() {
401
+
document.getElementById('log-output').textContent = '';
402
+
}
403
+
404
+
function updateProgress(current, total, msg) {
405
+
const pct = total > 0 ? (current / total * 100) : 0;
406
+
document.getElementById('progress-fill').style.width = pct + '%';
407
+
document.getElementById('status-text').textContent = msg || `Running: ${current}/${total}`;
408
+
}
409
+
410
+
function updateSummary(passed, failed) {
411
+
const total = passed + failed;
412
+
const rate = total > 0 ? (passed / total * 100).toFixed(1) : 0;
413
+
document.getElementById('total-count').textContent = total;
414
+
document.getElementById('passed-count').textContent = passed;
415
+
document.getElementById('failed-count').textContent = failed;
416
+
document.getElementById('pass-rate').textContent = rate + '%';
417
+
document.getElementById('pass-rate').className = 'value ' + (rate >= 99 ? 'success' : rate >= 90 ? 'neutral' : 'failure');
418
+
document.getElementById('summary').style.display = 'grid';
419
+
}
420
+
421
+
async function loadTestRunner() {
422
+
const mode = document.getElementById('mode-select').value;
423
+
const scriptName = mode === 'wasm' ? 'htmlrw-tests.wasm.js' : 'htmlrw-tests.js';
424
+
425
+
if (typeof html5rwTests !== 'undefined') {
426
+
return true;
427
+
}
428
+
429
+
log(`Loading ${scriptName}...`);
430
+
try {
431
+
await new Promise((resolve, reject) => {
432
+
const script = document.createElement('script');
433
+
script.src = `_build/default/lib/js/${scriptName}`;
434
+
script.onload = resolve;
435
+
script.onerror = () => reject(new Error(`Failed to load ${scriptName}`));
436
+
document.head.appendChild(script);
437
+
});
438
+
439
+
// Wait for initialization
440
+
await new Promise(resolve => setTimeout(resolve, 100));
441
+
442
+
if (typeof html5rwTests === 'undefined') {
443
+
throw new Error('Test runner not initialized');
444
+
}
445
+
446
+
log(`Test runner loaded (version ${html5rwTests.version})`);
447
+
return true;
448
+
} catch (e) {
449
+
log(`ERROR: ${e.message}`);
450
+
log('Make sure to run: opam exec -- dune build lib/js/htmlrw-tests.js');
451
+
return false;
452
+
}
453
+
}
454
+
455
+
async function fetchTestFile(type, filename) {
456
+
const basePath = type === 'tree-construction'
457
+
? 'html5lib-tests/tree-construction/'
458
+
: 'html5lib-tests/encoding/';
459
+
const url = basePath + filename;
460
+
const response = await fetch(url);
461
+
if (!response.ok) {
462
+
throw new Error(`Failed to fetch ${url}: ${response.status}`);
463
+
}
464
+
return await response.text();
465
+
}
466
+
467
+
function renderFileResult(result) {
468
+
const section = document.createElement('div');
469
+
section.className = 'results-section';
470
+
section.dataset.filename = result.filename;
471
+
472
+
const collapsed = result.failedCount === 0 ? 'collapsed' : '';
473
+
const hidden = result.failedCount === 0 ? 'hidden' : '';
474
+
475
+
section.innerHTML = `
476
+
<div class="results-header ${collapsed}">
477
+
<h2><span class="toggle">โผ</span> ${escapeHtml(result.filename)}</h2>
478
+
<div class="results-stats">
479
+
<span class="passed">โ ${result.passedCount}</span>
480
+
<span class="failed">โ ${result.failedCount}</span>
481
+
</div>
482
+
</div>
483
+
<div class="results-content ${hidden}">
484
+
${result.tests.map(renderTestResult).join('')}
485
+
</div>
486
+
`;
487
+
488
+
// Add toggle handler
489
+
section.querySelector('.results-header').addEventListener('click', function() {
490
+
this.classList.toggle('collapsed');
491
+
this.nextElementSibling.classList.toggle('hidden');
492
+
});
493
+
494
+
// Add test detail handlers
495
+
section.querySelectorAll('.test-header').forEach(header => {
496
+
header.addEventListener('click', function(e) {
497
+
e.stopPropagation();
498
+
const details = this.nextElementSibling;
499
+
details.classList.toggle('visible');
500
+
const icon = this.querySelector('.expand-icon');
501
+
icon.textContent = details.classList.contains('visible') ? 'โฒ' : 'โผ';
502
+
});
503
+
});
504
+
505
+
return section;
506
+
}
507
+
508
+
function renderTestResult(test) {
509
+
const statusClass = test.success ? 'passed' : 'failed';
510
+
return `
511
+
<div class="test-item" data-passed="${test.success}">
512
+
<div class="test-header">
513
+
<div class="test-info">
514
+
<span class="status ${statusClass}"></span>
515
+
<span class="test-num">#${test.testNum}</span>
516
+
<span class="test-desc">${escapeHtml(test.description)}</span>
517
+
</div>
518
+
<span class="expand-icon">โผ</span>
519
+
</div>
520
+
<div class="test-details">
521
+
<div class="detail-section">
522
+
<h4>Input</h4>
523
+
<pre>${escapeHtml(test.input)}</pre>
524
+
</div>
525
+
<div class="detail-row">
526
+
<div class="detail-section">
527
+
<h4>Expected</h4>
528
+
<pre>${escapeHtml(test.expected)}</pre>
529
+
</div>
530
+
<div class="detail-section">
531
+
<h4>Actual</h4>
532
+
<pre>${escapeHtml(test.actual)}</pre>
533
+
</div>
534
+
</div>
535
+
</div>
536
+
</div>
537
+
`;
538
+
}
539
+
540
+
function escapeHtml(str) {
541
+
const div = document.createElement('div');
542
+
div.textContent = str;
543
+
return div.innerHTML;
544
+
}
545
+
546
+
async function runTests(testType, files, basePath) {
547
+
if (isRunning) return;
548
+
isRunning = true;
549
+
550
+
clearLog();
551
+
document.getElementById('results-container').innerHTML = '';
552
+
document.getElementById('filter-controls').style.display = 'none';
553
+
554
+
const buttons = document.querySelectorAll('button');
555
+
buttons.forEach(b => b.disabled = true);
556
+
557
+
try {
558
+
if (!await loadTestRunner()) {
559
+
return;
560
+
}
561
+
562
+
log(`Starting ${testType} tests...`);
563
+
let totalPassed = 0;
564
+
let totalFailed = 0;
565
+
const allResults = [];
566
+
567
+
for (let i = 0; i < files.length; i++) {
568
+
const filename = files[i];
569
+
updateProgress(i, files.length, `Loading ${filename}...`);
570
+
571
+
try {
572
+
const content = await fetchTestFile(basePath, filename);
573
+
log(`Running ${filename}...`);
574
+
575
+
let result;
576
+
if (basePath === 'tree-construction') {
577
+
result = html5rwTests.runTreeConstructionTest(filename, content);
578
+
} else {
579
+
result = html5rwTests.runEncodingTest(filename, content);
580
+
}
581
+
582
+
totalPassed += result.passedCount;
583
+
totalFailed += result.failedCount;
584
+
allResults.push(result);
585
+
586
+
log(` ${filename}: ${result.passedCount} passed, ${result.failedCount} failed`);
587
+
588
+
// Render result immediately
589
+
const section = renderFileResult(result);
590
+
document.getElementById('results-container').appendChild(section);
591
+
592
+
} catch (e) {
593
+
log(` ERROR loading ${filename}: ${e.message}`);
594
+
}
595
+
596
+
updateSummary(totalPassed, totalFailed);
597
+
updateProgress(i + 1, files.length);
598
+
}
599
+
600
+
updateProgress(files.length, files.length, `Complete: ${totalPassed} passed, ${totalFailed} failed`);
601
+
log(`\n=== SUMMARY ===`);
602
+
log(`Total: ${totalPassed + totalFailed} tests`);
603
+
log(`Passed: ${totalPassed}`);
604
+
log(`Failed: ${totalFailed}`);
605
+
log(`Pass rate: ${((totalPassed / (totalPassed + totalFailed)) * 100).toFixed(2)}%`);
606
+
607
+
document.getElementById('filter-controls').style.display = 'flex';
608
+
setupFilters();
609
+
610
+
} finally {
611
+
isRunning = false;
612
+
buttons.forEach(b => b.disabled = false);
613
+
}
614
+
}
615
+
616
+
function runAllTests() {
617
+
// Run both tree and encoding tests
618
+
runTests('all', TREE_CONSTRUCTION_FILES.concat(ENCODING_FILES.map(f => 'encoding/' + f)), 'tree-construction');
619
+
}
620
+
621
+
async function runTreeTests() {
622
+
await runTests('tree-construction', TREE_CONSTRUCTION_FILES, 'tree-construction');
623
+
}
624
+
625
+
async function runEncodingTests() {
626
+
await runTests('encoding', ENCODING_FILES, 'encoding');
627
+
}
628
+
629
+
function setupFilters() {
630
+
const search = document.getElementById('search');
631
+
const filter = document.getElementById('filter');
632
+
633
+
search.addEventListener('input', applyFilters);
634
+
filter.addEventListener('change', applyFilters);
635
+
}
636
+
637
+
function applyFilters() {
638
+
const query = document.getElementById('search').value.toLowerCase();
639
+
const filterValue = document.getElementById('filter').value;
640
+
641
+
document.querySelectorAll('.test-item').forEach(item => {
642
+
const text = item.textContent.toLowerCase();
643
+
const passed = item.dataset.passed === 'true';
644
+
let visible = true;
645
+
646
+
if (query && !text.includes(query)) visible = false;
647
+
if (filterValue === 'passed' && !passed) visible = false;
648
+
if (filterValue === 'failed' && passed) visible = false;
649
+
650
+
item.style.display = visible ? '' : 'none';
651
+
});
652
+
}
653
+
654
+
function expandAll() {
655
+
document.querySelectorAll('.results-header.collapsed').forEach(h => h.click());
656
+
}
657
+
658
+
function collapseAll() {
659
+
document.querySelectorAll('.results-header:not(.collapsed)').forEach(h => h.click());
660
+
}
661
+
662
+
// Quick test on load
663
+
window.addEventListener('load', function() {
664
+
log('Ready. Select a test mode and click Run to begin.');
665
+
});
666
+
</script>
667
+
</body>
668
+
</html>